Merge remote-tracking branch 'upstream/dev' into dev

This commit is contained in:
Carl Woffenden 2019-08-23 23:03:52 +02:00
commit 0fcaa675e0
17 changed files with 185 additions and 49 deletions

View File

@ -392,6 +392,11 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param)
bounds.upperBound = ZSTD_TARGETCBLOCKSIZE_MAX; bounds.upperBound = ZSTD_TARGETCBLOCKSIZE_MAX;
return bounds; return bounds;
case ZSTD_c_srcSizeHint:
bounds.lowerBound = ZSTD_SRCSIZEHINT_MIN;
bounds.upperBound = ZSTD_SRCSIZEHINT_MAX;
return bounds;
default: default:
{ ZSTD_bounds const boundError = { ERROR(parameter_unsupported), 0, 0 }; { ZSTD_bounds const boundError = { ERROR(parameter_unsupported), 0, 0 };
return boundError; return boundError;
@ -448,6 +453,7 @@ static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param)
case ZSTD_c_forceAttachDict: case ZSTD_c_forceAttachDict:
case ZSTD_c_literalCompressionMode: case ZSTD_c_literalCompressionMode:
case ZSTD_c_targetCBlockSize: case ZSTD_c_targetCBlockSize:
case ZSTD_c_srcSizeHint:
default: default:
return 0; return 0;
} }
@ -494,6 +500,7 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value)
case ZSTD_c_ldmMinMatch: case ZSTD_c_ldmMinMatch:
case ZSTD_c_ldmBucketSizeLog: case ZSTD_c_ldmBucketSizeLog:
case ZSTD_c_targetCBlockSize: case ZSTD_c_targetCBlockSize:
case ZSTD_c_srcSizeHint:
break; break;
default: RETURN_ERROR(parameter_unsupported); default: RETURN_ERROR(parameter_unsupported);
@ -674,6 +681,12 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
CCtxParams->targetCBlockSize = value; CCtxParams->targetCBlockSize = value;
return CCtxParams->targetCBlockSize; return CCtxParams->targetCBlockSize;
case ZSTD_c_srcSizeHint :
if (value!=0) /* 0 ==> default */
BOUNDCHECK(ZSTD_c_srcSizeHint, value);
CCtxParams->srcSizeHint = value;
return CCtxParams->srcSizeHint;
default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
} }
} }
@ -779,6 +792,9 @@ size_t ZSTD_CCtxParams_getParameter(
case ZSTD_c_targetCBlockSize : case ZSTD_c_targetCBlockSize :
*value = (int)CCtxParams->targetCBlockSize; *value = (int)CCtxParams->targetCBlockSize;
break; break;
case ZSTD_c_srcSizeHint :
*value = (int)CCtxParams->srcSizeHint;
break;
default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
} }
return 0; return 0;
@ -1029,7 +1045,11 @@ ZSTD_adjustCParams(ZSTD_compressionParameters cPar,
ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams( ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize) const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize)
{ {
ZSTD_compressionParameters cParams = ZSTD_getCParams(CCtxParams->compressionLevel, srcSizeHint, dictSize); ZSTD_compressionParameters cParams;
if (srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN && CCtxParams->srcSizeHint > 0) {
srcSizeHint = CCtxParams->srcSizeHint;
}
cParams = ZSTD_getCParams(CCtxParams->compressionLevel, srcSizeHint, dictSize);
if (CCtxParams->ldmParams.enableLdm) cParams.windowLog = ZSTD_LDM_DEFAULT_WINDOW_LOG; if (CCtxParams->ldmParams.enableLdm) cParams.windowLog = ZSTD_LDM_DEFAULT_WINDOW_LOG;
if (CCtxParams->cParams.windowLog) cParams.windowLog = CCtxParams->cParams.windowLog; if (CCtxParams->cParams.windowLog) cParams.windowLog = CCtxParams->cParams.windowLog;
if (CCtxParams->cParams.hashLog) cParams.hashLog = CCtxParams->cParams.hashLog; if (CCtxParams->cParams.hashLog) cParams.hashLog = CCtxParams->cParams.hashLog;

View File

@ -203,6 +203,9 @@ struct ZSTD_CCtx_params_s {
size_t targetCBlockSize; /* Tries to fit compressed block size to be around targetCBlockSize. size_t targetCBlockSize; /* Tries to fit compressed block size to be around targetCBlockSize.
* No target when targetCBlockSize == 0. * No target when targetCBlockSize == 0.
* There is no guarantee on compressed block size */ * There is no guarantee on compressed block size */
int srcSizeHint; /* User's best guess of source size.
* Hint is not valid when srcSizeHint == 0.
* There is no guarantee that hint is close to actual source size */
ZSTD_dictAttachPref_e attachDictPref; ZSTD_dictAttachPref_e attachDictPref;
ZSTD_literalCompressionMode_e literalCompressionMode; ZSTD_literalCompressionMode_e literalCompressionMode;

View File

@ -2889,6 +2889,7 @@ static size_t ZSTD_decodeLiteralsBlock(void* ctx,
const size_t litSize = (MEM_readLE32(istart) & 0xFFFFFF) >> 2; /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */ const size_t litSize = (MEM_readLE32(istart) & 0xFFFFFF) >> 2; /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
if (litSize > srcSize-11) /* risk of reading too far with wildcopy */ if (litSize > srcSize-11) /* risk of reading too far with wildcopy */
{ {
if (litSize > BLOCKSIZE) return ERROR(corruption_detected);
if (litSize > srcSize-3) return ERROR(corruption_detected); if (litSize > srcSize-3) return ERROR(corruption_detected);
memcpy(dctx->litBuffer, istart, litSize); memcpy(dctx->litBuffer, istart, litSize);
dctx->litPtr = dctx->litBuffer; dctx->litPtr = dctx->litBuffer;

View File

@ -2655,6 +2655,7 @@ static size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
const size_t litSize = (MEM_readLE32(istart) & 0xFFFFFF) >> 2; /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */ const size_t litSize = (MEM_readLE32(istart) & 0xFFFFFF) >> 2; /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
if (litSize > srcSize-11) /* risk of reading too far with wildcopy */ if (litSize > srcSize-11) /* risk of reading too far with wildcopy */
{ {
if (litSize > BLOCKSIZE) return ERROR(corruption_detected);
if (litSize > srcSize-3) return ERROR(corruption_detected); if (litSize > srcSize-3) return ERROR(corruption_detected);
memcpy(dctx->litBuffer, istart, litSize); memcpy(dctx->litBuffer, istart, litSize);
dctx->litPtr = dctx->litBuffer; dctx->litPtr = dctx->litBuffer;
@ -3034,9 +3035,12 @@ static size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
{ {
/* blockType == blockCompressed */ /* blockType == blockCompressed */
const BYTE* ip = (const BYTE*)src; const BYTE* ip = (const BYTE*)src;
size_t litCSize;
if (srcSize > BLOCKSIZE) return ERROR(corruption_detected);
/* Decode literals sub-block */ /* Decode literals sub-block */
size_t litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize); litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
if (ZSTD_isError(litCSize)) return litCSize; if (ZSTD_isError(litCSize)) return litCSize;
ip += litCSize; ip += litCSize;
srcSize -= litCSize; srcSize -= litCSize;

View File

@ -15,6 +15,7 @@ extern "C" {
#define ZSTD_H_235446 #define ZSTD_H_235446
/* ====== Dependency ======*/ /* ====== Dependency ======*/
#include <limits.h> /* INT_MAX */
#include <stddef.h> /* size_t */ #include <stddef.h> /* size_t */
@ -386,6 +387,7 @@ typedef enum {
* ZSTD_c_forceAttachDict * ZSTD_c_forceAttachDict
* ZSTD_c_literalCompressionMode * ZSTD_c_literalCompressionMode
* ZSTD_c_targetCBlockSize * ZSTD_c_targetCBlockSize
* ZSTD_c_srcSizeHint
* Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
* note : never ever use experimentalParam? names directly; * note : never ever use experimentalParam? names directly;
* also, the enums values themselves are unstable and can still change. * also, the enums values themselves are unstable and can still change.
@ -396,6 +398,7 @@ typedef enum {
ZSTD_c_experimentalParam4=1001, ZSTD_c_experimentalParam4=1001,
ZSTD_c_experimentalParam5=1002, ZSTD_c_experimentalParam5=1002,
ZSTD_c_experimentalParam6=1003, ZSTD_c_experimentalParam6=1003,
ZSTD_c_experimentalParam7=1004,
} ZSTD_cParameter; } ZSTD_cParameter;
typedef struct { typedef struct {
@ -1063,6 +1066,8 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
/* Advanced parameter bounds */ /* Advanced parameter bounds */
#define ZSTD_TARGETCBLOCKSIZE_MIN 64 #define ZSTD_TARGETCBLOCKSIZE_MIN 64
#define ZSTD_TARGETCBLOCKSIZE_MAX ZSTD_BLOCKSIZE_MAX #define ZSTD_TARGETCBLOCKSIZE_MAX ZSTD_BLOCKSIZE_MAX
#define ZSTD_SRCSIZEHINT_MIN 0
#define ZSTD_SRCSIZEHINT_MAX INT_MAX
/* internal */ /* internal */
#define ZSTD_HASHLOG3_MAX 17 #define ZSTD_HASHLOG3_MAX 17
@ -1441,6 +1446,12 @@ ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* pre
* There is no guarantee on compressed block size (default:0) */ * There is no guarantee on compressed block size (default:0) */
#define ZSTD_c_targetCBlockSize ZSTD_c_experimentalParam6 #define ZSTD_c_targetCBlockSize ZSTD_c_experimentalParam6
/* User's best guess of source size.
* Hint is not valid when srcSizeHint == 0.
* There is no guarantee that hint is close to actual source size,
* but compression ratio may regress significantly if guess considerably underestimates */
#define ZSTD_c_srcSizeHint ZSTD_c_experimentalParam7
/*! ZSTD_CCtx_getParameter() : /*! ZSTD_CCtx_getParameter() :
* Get the requested compression parameter value, selected by enum ZSTD_cParameter, * Get the requested compression parameter value, selected by enum ZSTD_cParameter,
* and store it into int* value. * and store it into int* value.

View File

@ -30,6 +30,7 @@
#include <string.h> /* strcmp, strlen */ #include <string.h> /* strcmp, strlen */
#include <assert.h> #include <assert.h>
#include <errno.h> /* errno */ #include <errno.h> /* errno */
#include <limits.h> /* INT_MAX */
#include <signal.h> #include <signal.h>
#include "timefn.h" /* UTIL_getTime, UTIL_clockSpanMicro */ #include "timefn.h" /* UTIL_getTime, UTIL_clockSpanMicro */
@ -304,7 +305,9 @@ struct FIO_prefs_s {
int ldmMinMatch; int ldmMinMatch;
int ldmBucketSizeLog; int ldmBucketSizeLog;
int ldmHashRateLog; int ldmHashRateLog;
size_t streamSrcSize;
size_t targetCBlockSize; size_t targetCBlockSize;
int srcSizeHint;
ZSTD_literalCompressionMode_e literalCompressionMode; ZSTD_literalCompressionMode_e literalCompressionMode;
/* IO preferences */ /* IO preferences */
@ -349,7 +352,9 @@ FIO_prefs_t* FIO_createPreferences(void)
ret->ldmMinMatch = 0; ret->ldmMinMatch = 0;
ret->ldmBucketSizeLog = FIO_LDM_PARAM_NOTSET; ret->ldmBucketSizeLog = FIO_LDM_PARAM_NOTSET;
ret->ldmHashRateLog = FIO_LDM_PARAM_NOTSET; ret->ldmHashRateLog = FIO_LDM_PARAM_NOTSET;
ret->streamSrcSize = 0;
ret->targetCBlockSize = 0; ret->targetCBlockSize = 0;
ret->srcSizeHint = 0;
ret->literalCompressionMode = ZSTD_lcm_auto; ret->literalCompressionMode = ZSTD_lcm_auto;
return ret; return ret;
} }
@ -418,10 +423,18 @@ void FIO_setRsyncable(FIO_prefs_t* const prefs, int rsyncable) {
prefs->rsyncable = rsyncable; prefs->rsyncable = rsyncable;
} }
void FIO_setStreamSrcSize(FIO_prefs_t* const prefs, size_t streamSrcSize) {
prefs->streamSrcSize = streamSrcSize;
}
void FIO_setTargetCBlockSize(FIO_prefs_t* const prefs, size_t targetCBlockSize) { void FIO_setTargetCBlockSize(FIO_prefs_t* const prefs, size_t targetCBlockSize) {
prefs->targetCBlockSize = targetCBlockSize; prefs->targetCBlockSize = targetCBlockSize;
} }
void FIO_setSrcSizeHint(FIO_prefs_t* const prefs, size_t srcSizeHint) {
prefs->srcSizeHint = (int)MIN((size_t)INT_MAX, srcSizeHint);
}
void FIO_setLiteralCompressionMode( void FIO_setLiteralCompressionMode(
FIO_prefs_t* const prefs, FIO_prefs_t* const prefs,
ZSTD_literalCompressionMode_e mode) { ZSTD_literalCompressionMode_e mode) {
@ -633,7 +646,6 @@ typedef struct {
static cRess_t FIO_createCResources(FIO_prefs_t* const prefs, static cRess_t FIO_createCResources(FIO_prefs_t* const prefs,
const char* dictFileName, int cLevel, const char* dictFileName, int cLevel,
U64 srcSize,
ZSTD_compressionParameters comprParams) { ZSTD_compressionParameters comprParams) {
cRess_t ress; cRess_t ress;
memset(&ress, 0, sizeof(ress)); memset(&ress, 0, sizeof(ress));
@ -667,6 +679,8 @@ static cRess_t FIO_createCResources(FIO_prefs_t* const prefs,
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_compressionLevel, cLevel) ); CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_compressionLevel, cLevel) );
/* max compressed block size */ /* max compressed block size */
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_targetCBlockSize, (int)prefs->targetCBlockSize) ); CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_targetCBlockSize, (int)prefs->targetCBlockSize) );
/* source size hint */
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_srcSizeHint, (int)prefs->srcSizeHint) );
/* long distance matching */ /* long distance matching */
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_enableLongDistanceMatching, prefs->ldmFlag) ); CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_enableLongDistanceMatching, prefs->ldmFlag) );
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmHashLog, prefs->ldmHashLog) ); CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmHashLog, prefs->ldmHashLog) );
@ -698,10 +712,7 @@ static cRess_t FIO_createCResources(FIO_prefs_t* const prefs,
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_rsyncable, prefs->rsyncable) ); CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_rsyncable, prefs->rsyncable) );
#endif #endif
/* dictionary */ /* dictionary */
CHECK( ZSTD_CCtx_setPledgedSrcSize(ress.cctx, srcSize) ); /* set the value temporarily for dictionary loading, to adapt compression parameters */
CHECK( ZSTD_CCtx_loadDictionary(ress.cctx, dictBuffer, dictBuffSize) ); CHECK( ZSTD_CCtx_loadDictionary(ress.cctx, dictBuffer, dictBuffSize) );
CHECK( ZSTD_CCtx_setPledgedSrcSize(ress.cctx, ZSTD_CONTENTSIZE_UNKNOWN) ); /* reset */
free(dictBuffer); free(dictBuffer);
} }
@ -1003,6 +1014,9 @@ FIO_compressZstdFrame(FIO_prefs_t* const prefs,
/* init */ /* init */
if (fileSize != UTIL_FILESIZE_UNKNOWN) { if (fileSize != UTIL_FILESIZE_UNKNOWN) {
CHECK(ZSTD_CCtx_setPledgedSrcSize(ress.cctx, fileSize)); CHECK(ZSTD_CCtx_setPledgedSrcSize(ress.cctx, fileSize));
} else if (prefs->streamSrcSize > 0) {
/* unknown source size; use the declared stream size */
CHECK( ZSTD_CCtx_setPledgedSrcSize(ress.cctx, prefs->streamSrcSize) );
} }
(void)srcFileName; (void)srcFileName;
@ -1361,10 +1375,7 @@ int FIO_compressFilename(FIO_prefs_t* const prefs,
const char* dictFileName, int compressionLevel, const char* dictFileName, int compressionLevel,
ZSTD_compressionParameters comprParams) ZSTD_compressionParameters comprParams)
{ {
U64 const fileSize = UTIL_getFileSize(srcFileName); cRess_t const ress = FIO_createCResources(prefs, dictFileName, compressionLevel, comprParams);
U64 const srcSize = (fileSize == UTIL_FILESIZE_UNKNOWN) ? ZSTD_CONTENTSIZE_UNKNOWN : fileSize;
cRess_t const ress = FIO_createCResources(prefs, dictFileName, compressionLevel, srcSize, comprParams);
int const result = FIO_compressFilename_srcFile(prefs, ress, dstFileName, srcFileName, compressionLevel); int const result = FIO_compressFilename_srcFile(prefs, ress, dstFileName, srcFileName, compressionLevel);
@ -1415,10 +1426,7 @@ int FIO_compressMultipleFilenames(FIO_prefs_t* const prefs,
ZSTD_compressionParameters comprParams) ZSTD_compressionParameters comprParams)
{ {
int error = 0; int error = 0;
U64 const firstFileSize = UTIL_getFileSize(inFileNamesTable[0]); cRess_t ress = FIO_createCResources(prefs, dictFileName, compressionLevel, comprParams);
U64 const firstSrcSize = (firstFileSize == UTIL_FILESIZE_UNKNOWN) ? ZSTD_CONTENTSIZE_UNKNOWN : firstFileSize;
U64 const srcSize = (nbFiles != 1) ? ZSTD_CONTENTSIZE_UNKNOWN : firstSrcSize ;
cRess_t ress = FIO_createCResources(prefs, dictFileName, compressionLevel, srcSize, comprParams);
/* init */ /* init */
assert(outFileName != NULL || suffix != NULL); assert(outFileName != NULL || suffix != NULL);

View File

@ -71,7 +71,9 @@ void FIO_setOverlapLog(FIO_prefs_t* const prefs, int overlapLog);
void FIO_setRemoveSrcFile(FIO_prefs_t* const prefs, unsigned flag); void FIO_setRemoveSrcFile(FIO_prefs_t* const prefs, unsigned flag);
void FIO_setSparseWrite(FIO_prefs_t* const prefs, unsigned sparse); /**< 0: no sparse; 1: disable on stdout; 2: always enabled */ void FIO_setSparseWrite(FIO_prefs_t* const prefs, unsigned sparse); /**< 0: no sparse; 1: disable on stdout; 2: always enabled */
void FIO_setRsyncable(FIO_prefs_t* const prefs, int rsyncable); void FIO_setRsyncable(FIO_prefs_t* const prefs, int rsyncable);
void FIO_setStreamSrcSize(FIO_prefs_t* const prefs, size_t streamSrcSize);
void FIO_setTargetCBlockSize(FIO_prefs_t* const prefs, size_t targetCBlockSize); void FIO_setTargetCBlockSize(FIO_prefs_t* const prefs, size_t targetCBlockSize);
void FIO_setSrcSizeHint(FIO_prefs_t* const prefs, size_t srcSizeHint);
void FIO_setLiteralCompressionMode( void FIO_setLiteralCompressionMode(
FIO_prefs_t* const prefs, FIO_prefs_t* const prefs,
ZSTD_literalCompressionMode_e mode); ZSTD_literalCompressionMode_e mode);

View File

@ -144,6 +144,18 @@ the last one takes effect.
Due to the chaotic nature of dynamic adaptation, compressed result is not reproducible. Due to the chaotic nature of dynamic adaptation, compressed result is not reproducible.
_note_ : at the time of this writing, `--adapt` can remain stuck at low speed _note_ : at the time of this writing, `--adapt` can remain stuck at low speed
when combined with multiple worker threads (>=2). when combined with multiple worker threads (>=2).
* `--stream-size=#` :
Sets the pledged source size of input coming from a stream. This value must be exact, as it
will be included in the produced frame header. Incorrect stream sizes will cause an error.
This information will be used to better optimize compression parameters, resulting in
better and potentially faster compression, especially for smaller source sizes.
* `--size-hint=#`:
When handling input from a stream, `zstd` must guess how large the source size
will be when optimizing compression parameters. If the stream size is relatively
small, this guess may be a poor one, resulting in a higher compression ratio than
expected. This feature allows for controlling the guess when needed.
Exact guesses result in better compression ratios. Overestimates result in slightly
degraded compression ratios, while underestimates may result in significant degradation.
* `--rsyncable` : * `--rsyncable` :
`zstd` will periodically synchronize the compression state to make the `zstd` will periodically synchronize the compression state to make the
compressed file more rsync-friendly. There is a negligible impact to compressed file more rsync-friendly. There is a negligible impact to

View File

@ -141,6 +141,8 @@ static int usage_advanced(const char* programName)
DISPLAY( "--long[=#]: enable long distance matching with given window log (default: %u)\n", g_defaultMaxWindowLog); DISPLAY( "--long[=#]: enable long distance matching with given window log (default: %u)\n", g_defaultMaxWindowLog);
DISPLAY( "--fast[=#]: switch to ultra fast compression level (default: %u)\n", 1); DISPLAY( "--fast[=#]: switch to ultra fast compression level (default: %u)\n", 1);
DISPLAY( "--adapt : dynamically adapt compression level to I/O conditions \n"); DISPLAY( "--adapt : dynamically adapt compression level to I/O conditions \n");
DISPLAY( "--stream-size=# : optimize compression parameters for streaming input of given number of bytes \n");
DISPLAY( "--size-hint=# optimize compression parameters for streaming input of approximately this size\n");
DISPLAY( "--target-compressed-block-size=# : make compressed block near targeted size \n"); DISPLAY( "--target-compressed-block-size=# : make compressed block near targeted size \n");
#ifdef ZSTD_MULTITHREAD #ifdef ZSTD_MULTITHREAD
DISPLAY( " -T# : spawns # compression threads (default: 1, 0==# cores) \n"); DISPLAY( " -T# : spawns # compression threads (default: 1, 0==# cores) \n");
@ -588,7 +590,9 @@ int main(int argCount, const char* argv[])
const char* suffix = ZSTD_EXTENSION; const char* suffix = ZSTD_EXTENSION;
unsigned maxDictSize = g_defaultMaxDictSize; unsigned maxDictSize = g_defaultMaxDictSize;
unsigned dictID = 0; unsigned dictID = 0;
size_t streamSrcSize = 0;
size_t targetCBlockSize = 0; size_t targetCBlockSize = 0;
size_t srcSizeHint = 0;
int dictCLevel = g_defaultDictCLevel; int dictCLevel = g_defaultDictCLevel;
unsigned dictSelect = g_defaultSelectivityLevel; unsigned dictSelect = g_defaultSelectivityLevel;
#ifdef UTIL_HAS_CREATEFILELIST #ifdef UTIL_HAS_CREATEFILELIST
@ -745,7 +749,9 @@ int main(int argCount, const char* argv[])
if (longCommandWArg(&argument, "--maxdict=")) { maxDictSize = readU32FromChar(&argument); continue; } if (longCommandWArg(&argument, "--maxdict=")) { maxDictSize = readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "--dictID=")) { dictID = readU32FromChar(&argument); continue; } if (longCommandWArg(&argument, "--dictID=")) { dictID = readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "--zstd=")) { if (!parseCompressionParameters(argument, &compressionParams)) CLEAN_RETURN(badusage(programName)); continue; } if (longCommandWArg(&argument, "--zstd=")) { if (!parseCompressionParameters(argument, &compressionParams)) CLEAN_RETURN(badusage(programName)); continue; }
if (longCommandWArg(&argument, "--stream-size=")) { streamSrcSize = readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "--target-compressed-block-size=")) { targetCBlockSize = readU32FromChar(&argument); continue; } if (longCommandWArg(&argument, "--target-compressed-block-size=")) { targetCBlockSize = readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "--size-hint=")) { srcSizeHint = readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "--long")) { if (longCommandWArg(&argument, "--long")) {
unsigned ldmWindowLog = 0; unsigned ldmWindowLog = 0;
ldmFlag = 1; ldmFlag = 1;
@ -1150,7 +1156,9 @@ int main(int argCount, const char* argv[])
FIO_setAdaptMin(prefs, adaptMin); FIO_setAdaptMin(prefs, adaptMin);
FIO_setAdaptMax(prefs, adaptMax); FIO_setAdaptMax(prefs, adaptMax);
FIO_setRsyncable(prefs, rsyncable); FIO_setRsyncable(prefs, rsyncable);
FIO_setStreamSrcSize(prefs, streamSrcSize);
FIO_setTargetCBlockSize(prefs, targetCBlockSize); FIO_setTargetCBlockSize(prefs, targetCBlockSize);
FIO_setSrcSizeHint(prefs, srcSizeHint);
FIO_setLiteralCompressionMode(prefs, literalCompressionMode); FIO_setLiteralCompressionMode(prefs, literalCompressionMode);
if (adaptMin > cLevel) cLevel = adaptMin; if (adaptMin > cLevel) cLevel = adaptMin;
if (adaptMax < cLevel) cLevel = adaptMax; if (adaptMax < cLevel) cLevel = adaptMax;
@ -1160,7 +1168,7 @@ int main(int argCount, const char* argv[])
else else
operationResult = FIO_compressMultipleFilenames(prefs, filenameTable, filenameIdx, outFileName, suffix, dictFileName, cLevel, compressionParams); operationResult = FIO_compressMultipleFilenames(prefs, filenameTable, filenameIdx, outFileName, suffix, dictFileName, cLevel, compressionParams);
#else #else
(void)suffix; (void)adapt; (void)rsyncable; (void)ultra; (void)cLevel; (void)ldmFlag; (void)literalCompressionMode; (void)targetCBlockSize; /* not used when ZSTD_NOCOMPRESS set */ (void)suffix; (void)adapt; (void)rsyncable; (void)ultra; (void)cLevel; (void)ldmFlag; (void)literalCompressionMode; (void)targetCBlockSize; (void)streamSrcSize; (void)srcSizeHint; /* not used when ZSTD_NOCOMPRESS set */
DISPLAY("Compression not supported \n"); DISPLAY("Compression not supported \n");
#endif #endif
} else { /* decompression or test */ } else { /* decompression or test */

View File

@ -113,15 +113,6 @@ zstd_frame_info: $(FUZZ_HEADERS) $(FUZZ_OBJ) zstd_frame_info.o
libregression.a: $(FUZZ_HEADERS) $(PRGDIR)/util.h $(PRGDIR)/util.c regression_driver.o libregression.a: $(FUZZ_HEADERS) $(PRGDIR)/util.h $(PRGDIR)/util.c regression_driver.o
$(AR) $(FUZZ_ARFLAGS) $@ regression_driver.o $(AR) $(FUZZ_ARFLAGS) $@ regression_driver.o
# Install libfuzzer (not usable for MSAN testing)
# Provided for convenience. To use this library run make libFuzzer and
# set LDFLAGS=-L.
.PHONY: libFuzzer
libFuzzer:
@$(RM) -rf Fuzzer
@git clone https://chromium.googlesource.com/chromium/llvm-project/compiler-rt/lib/fuzzer Fuzzer
@cd Fuzzer && ./build.sh
corpora/%_seed_corpus.zip: corpora/%_seed_corpus.zip:
@mkdir -p corpora @mkdir -p corpora
$(DOWNLOAD) $@ $(CORPORA_URL_PREFIX)$*_seed_corpus.zip $(DOWNLOAD) $@ $(CORPORA_URL_PREFIX)$*_seed_corpus.zip

View File

@ -35,6 +35,8 @@ The environment variables can be overridden with the corresponding flags
`--cc`, `--cflags`, etc. `--cc`, `--cflags`, etc.
The specific fuzzing engine is selected with `LIB_FUZZING_ENGINE` or The specific fuzzing engine is selected with `LIB_FUZZING_ENGINE` or
`--lib-fuzzing-engine`, the default is `libregression.a`. `--lib-fuzzing-engine`, the default is `libregression.a`.
Alternatively, you can use Clang's built in fuzzing engine with
`--enable-fuzzer`.
It has flags that can easily set up sanitizers `--enable-{a,ub,m}san`, and It has flags that can easily set up sanitizers `--enable-{a,ub,m}san`, and
coverage instrumentation `--enable-coverage`. coverage instrumentation `--enable-coverage`.
It sets sane defaults which can be overridden with flags `--debug`, It sets sane defaults which can be overridden with flags `--debug`,
@ -51,22 +53,25 @@ The command used to run the fuzzer is printed for debugging.
## LibFuzzer ## LibFuzzer
``` ```
# Build libfuzzer if necessary
make libFuzzer
# Build the fuzz targets # Build the fuzz targets
./fuzz.py build all --enable-coverage --enable-asan --enable-ubsan --lib-fuzzing-engine Fuzzer/libFuzzer.a --cc clang --cxx clang++ ./fuzz.py build all --enable-fuzzer --enable-asan --enable-ubsan --cc clang --cxx clang++
# OR equivalently # OR equivalently
CC=clang CXX=clang++ LIB_FUZZING_ENGINE=Fuzzer/libFuzzer.a ./fuzz.py build all --enable-coverage --enable-asan --enable-ubsan CC=clang CXX=clang++ ./fuzz.py build all --enable-fuzzer --enable-asan --enable-ubsan
# Run the fuzzer # Run the fuzzer
./fuzz.py libfuzzer TARGET -max_len=8192 -jobs=4 ./fuzz.py libfuzzer TARGET <libfuzzer args like -jobs=4>
``` ```
where `TARGET` could be `simple_decompress`, `stream_round_trip`, etc. where `TARGET` could be `simple_decompress`, `stream_round_trip`, etc.
### MSAN ### MSAN
Fuzzing with `libFuzzer` and `MSAN` will require building a C++ standard library Fuzzing with `libFuzzer` and `MSAN` is as easy as:
and libFuzzer with MSAN.
```
CC=clang CXX=clang++ ./fuzz.py build all --enable-fuzzer --enable-msan
./fuzz.py libfuzzer TARGET <libfuzzer args>
```
`fuzz.py` respects the environment variables / flags `MSAN_EXTRA_CPPFLAGS`, `fuzz.py` respects the environment variables / flags `MSAN_EXTRA_CPPFLAGS`,
`MSAN_EXTRA_CFLAGS`, `MSAN_EXTRA_CXXFLAGS`, `MSAN_EXTRA_LDFLAGS` to easily pass `MSAN_EXTRA_CFLAGS`, `MSAN_EXTRA_CXXFLAGS`, `MSAN_EXTRA_LDFLAGS` to easily pass
the extra parameters only for MSAN. the extra parameters only for MSAN.

View File

@ -24,21 +24,38 @@ def abs_join(a, *p):
return os.path.abspath(os.path.join(a, *p)) return os.path.abspath(os.path.join(a, *p))
class InputType(object):
RAW_DATA = 1
COMPRESSED_DATA = 2
class FrameType(object):
ZSTD = 1
BLOCK = 2
class TargetInfo(object):
def __init__(self, input_type, frame_type=FrameType.ZSTD):
self.input_type = input_type
self.frame_type = frame_type
# Constants # Constants
FUZZ_DIR = os.path.abspath(os.path.dirname(__file__)) FUZZ_DIR = os.path.abspath(os.path.dirname(__file__))
CORPORA_DIR = abs_join(FUZZ_DIR, 'corpora') CORPORA_DIR = abs_join(FUZZ_DIR, 'corpora')
TARGETS = [ TARGET_INFO = {
'simple_round_trip', 'simple_round_trip': TargetInfo(InputType.RAW_DATA),
'stream_round_trip', 'stream_round_trip': TargetInfo(InputType.RAW_DATA),
'block_round_trip', 'block_round_trip': TargetInfo(InputType.RAW_DATA, FrameType.BLOCK),
'simple_decompress', 'simple_decompress': TargetInfo(InputType.COMPRESSED_DATA),
'stream_decompress', 'stream_decompress': TargetInfo(InputType.COMPRESSED_DATA),
'block_decompress', 'block_decompress': TargetInfo(InputType.COMPRESSED_DATA, FrameType.BLOCK),
'dictionary_round_trip', 'dictionary_round_trip': TargetInfo(InputType.RAW_DATA),
'dictionary_decompress', 'dictionary_decompress': TargetInfo(InputType.COMPRESSED_DATA),
'zstd_frame_info', 'zstd_frame_info': TargetInfo(InputType.COMPRESSED_DATA),
'simple_compress', 'simple_compress': TargetInfo(InputType.RAW_DATA),
] }
TARGETS = list(TARGET_INFO.keys())
ALL_TARGETS = TARGETS + ['all'] ALL_TARGETS = TARGETS + ['all']
FUZZ_RNG_SEED_SIZE = 4 FUZZ_RNG_SEED_SIZE = 4
@ -67,7 +84,7 @@ MSAN_EXTRA_LDFLAGS = os.environ.get('MSAN_EXTRA_LDFLAGS', '')
def create(r): def create(r):
d = os.path.abspath(r) d = os.path.abspath(r)
if not os.path.isdir(d): if not os.path.isdir(d):
os.mkdir(d) os.makedirs(d)
return d return d
@ -158,7 +175,7 @@ def compiler_version(cc, cxx):
assert(b'clang' in cxx_version_bytes) assert(b'clang' in cxx_version_bytes)
compiler = 'clang' compiler = 'clang'
elif b'gcc' in cc_version_bytes: elif b'gcc' in cc_version_bytes:
assert(b'gcc' in cxx_version_bytes) assert(b'gcc' in cxx_version_bytes or b'g++' in cxx_version_bytes)
compiler = 'gcc' compiler = 'gcc'
if compiler is not None: if compiler is not None:
version_regex = b'([0-9])+\.([0-9])+\.([0-9])+' version_regex = b'([0-9])+\.([0-9])+\.([0-9])+'
@ -699,7 +716,8 @@ def gen(args):
'-o{}'.format(decompressed), '-o{}'.format(decompressed),
] ]
if 'block_' in args.TARGET: info = TARGET_INFO[args.TARGET]
if info.frame_type == FrameType.BLOCK:
cmd += [ cmd += [
'--gen-blocks', '--gen-blocks',
'--max-block-size-log={}'.format(args.max_size_log) '--max-block-size-log={}'.format(args.max_size_log)
@ -710,10 +728,11 @@ def gen(args):
print(' '.join(cmd)) print(' '.join(cmd))
subprocess.check_call(cmd) subprocess.check_call(cmd)
if '_round_trip' in args.TARGET: if info.input_type == InputType.RAW_DATA:
print('using decompressed data in {}'.format(decompressed)) print('using decompressed data in {}'.format(decompressed))
samples = decompressed samples = decompressed
elif '_decompress' in args.TARGET: else:
assert info.input_type == InputType.COMPRESSED_DATA
print('using compressed data in {}'.format(compressed)) print('using compressed data in {}'.format(compressed))
samples = compressed samples = compressed

View File

@ -14,6 +14,7 @@
#ifndef FUZZ_HELPERS_H #ifndef FUZZ_HELPERS_H
#define FUZZ_HELPERS_H #define FUZZ_HELPERS_H
#include "debug.h"
#include "fuzz.h" #include "fuzz.h"
#include "xxhash.h" #include "xxhash.h"
#include "zstd.h" #include "zstd.h"

View File

@ -36,6 +36,7 @@ int main(int argc, char const **argv) {
fprintf(stderr, "WARNING: No files passed to %s\n", argv[0]); fprintf(stderr, "WARNING: No files passed to %s\n", argv[0]);
for (i = 0; i < numFiles; ++i) { for (i = 0; i < numFiles; ++i) {
char const *fileName = files[i]; char const *fileName = files[i];
DEBUGLOG(3, "Running %s", fileName);
size_t const fileSize = UTIL_getFileSize(fileName); size_t const fileSize = UTIL_getFileSize(fileName);
size_t readSize; size_t readSize;
FILE *file; FILE *file;

View File

@ -90,6 +90,9 @@ void FUZZ_setRandomParameters(ZSTD_CCtx *cctx, size_t srcSize, uint32_t *state)
setRand(cctx, ZSTD_c_forceMaxWindow, 0, 1, state); setRand(cctx, ZSTD_c_forceMaxWindow, 0, 1, state);
setRand(cctx, ZSTD_c_literalCompressionMode, 0, 2, state); setRand(cctx, ZSTD_c_literalCompressionMode, 0, 2, state);
setRand(cctx, ZSTD_c_forceAttachDict, 0, 2, state); setRand(cctx, ZSTD_c_forceAttachDict, 0, 2, state);
if (FUZZ_rand32(state, 0, 1) == 0) {
setRand(cctx, ZSTD_c_srcSizeHint, ZSTD_SRCSIZEHINT_MIN, 2 * srcSize, state);
}
} }
FUZZ_dict_t FUZZ_train(void const* src, size_t srcSize, uint32_t *state) FUZZ_dict_t FUZZ_train(void const* src, size_t srcSize, uint32_t *state)

View File

@ -108,7 +108,6 @@ else
fi fi
println "\n===> simple tests " println "\n===> simple tests "
./datagen > tmp ./datagen > tmp
@ -409,6 +408,53 @@ println "compress multiple files including a missing one (notHere) : "
$ZSTD -f tmp1 notHere tmp2 && die "missing file not detected!" $ZSTD -f tmp1 notHere tmp2 && die "missing file not detected!"
println "\n===> stream-size mode"
./datagen -g11000 > tmp
println "test : basic file compression vs sized streaming compression"
file_size=$($ZSTD -14 -f tmp -o tmp.zst && wc -c < tmp.zst)
stream_size=$(cat tmp | $ZSTD -14 --stream-size=11000 | wc -c)
if [ "$stream_size" -gt "$file_size" ]; then
die "hinted compression larger than expected"
fi
println "test : sized streaming compression and decompression"
cat tmp | $ZSTD -14 -f tmp -o --stream-size=11000 tmp.zst
$ZSTD -df tmp.zst -o tmp_decompress
cmp tmp tmp_decompress || die "difference between original and decompressed file"
println "test : incorrect stream size"
cat tmp | $ZSTD -14 -f -o tmp.zst --stream-size=11001 && die "should fail with incorrect stream size"
println "\n===> size-hint mode"
./datagen -g11000 > tmp
./datagen -g11000 > tmp2
./datagen > tmpDict
println "test : basic file compression vs hinted streaming compression"
file_size=$($ZSTD -14 -f tmp -o tmp.zst && wc -c < tmp.zst)
stream_size=$(cat tmp | $ZSTD -14 --size-hint=11000 | wc -c)
if [ "$stream_size" -ge "$file_size" ]; then
die "hinted compression larger than expected"
fi
println "test : hinted streaming compression and decompression"
cat tmp | $ZSTD -14 -f -o tmp.zst --size-hint=11000
$ZSTD -df tmp.zst -o tmp_decompress
cmp tmp tmp_decompress || die "difference between original and decompressed file"
println "test : hinted streaming compression with dictionary"
cat tmp | $ZSTD -14 -f -D tmpDict --size-hint=11000 | $ZSTD -t -D tmpDict
println "test : multiple file compression with hints and dictionary"
$ZSTD -14 -f -D tmpDict --size-hint=11000 tmp tmp2
$ZSTD -14 -f -o tmp1_.zst -D tmpDict --size-hint=11000 tmp
$ZSTD -14 -f -o tmp2_.zst -D tmpDict --size-hint=11000 tmp2
cmp tmp.zst tmp1_.zst || die "first file's output differs"
cmp tmp2.zst tmp2_.zst || die "second file's output differs"
println "test : incorrect hinted stream sizes"
cat tmp | $ZSTD -14 -f --size-hint=11050 | $ZSTD -t # slightly too high
cat tmp | $ZSTD -14 -f --size-hint=10950 | $ZSTD -t # slightly too low
cat tmp | $ZSTD -14 -f --size-hint=22000 | $ZSTD -t # considerably too high
cat tmp | $ZSTD -14 -f --size-hint=5500 | $ZSTD -t # considerably too low
println "\n===> dictionary tests " println "\n===> dictionary tests "
println "- test with raw dict (content only) " println "- test with raw dict (content only) "

View File

@ -2106,6 +2106,7 @@ static int fuzzerTests_newAPI(U32 seed, int nbTests, int startTest,
if (FUZ_rand(&lseed) & 3) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_c_ldmMinMatch, FUZ_randomClampedLength(&lseed, ZSTD_LDM_MINMATCH_MIN, ZSTD_LDM_MINMATCH_MAX), opaqueAPI) ); if (FUZ_rand(&lseed) & 3) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_c_ldmMinMatch, FUZ_randomClampedLength(&lseed, ZSTD_LDM_MINMATCH_MIN, ZSTD_LDM_MINMATCH_MAX), opaqueAPI) );
if (FUZ_rand(&lseed) & 3) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_c_ldmBucketSizeLog, FUZ_randomClampedLength(&lseed, ZSTD_LDM_BUCKETSIZELOG_MIN, ZSTD_LDM_BUCKETSIZELOG_MAX), opaqueAPI) ); if (FUZ_rand(&lseed) & 3) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_c_ldmBucketSizeLog, FUZ_randomClampedLength(&lseed, ZSTD_LDM_BUCKETSIZELOG_MIN, ZSTD_LDM_BUCKETSIZELOG_MAX), opaqueAPI) );
if (FUZ_rand(&lseed) & 3) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_c_ldmHashRateLog, FUZ_randomClampedLength(&lseed, ZSTD_LDM_HASHRATELOG_MIN, ZSTD_LDM_HASHRATELOG_MAX), opaqueAPI) ); if (FUZ_rand(&lseed) & 3) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_c_ldmHashRateLog, FUZ_randomClampedLength(&lseed, ZSTD_LDM_HASHRATELOG_MIN, ZSTD_LDM_HASHRATELOG_MAX), opaqueAPI) );
if (FUZ_rand(&lseed) & 3) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_c_srcSizeHint, FUZ_randomClampedLength(&lseed, ZSTD_SRCSIZEHINT_MIN, ZSTD_SRCSIZEHINT_MAX), opaqueAPI) );
} }
/* mess with frame parameters */ /* mess with frame parameters */