From af0c9501d120ff83a7e0871a0b18ad7237813a8c Mon Sep 17 00:00:00 2001 From: Nick Magerko Date: Thu, 15 Aug 2019 23:57:55 -0700 Subject: [PATCH 01/26] Add --stream-size=# command --- programs/fileio.c | 21 +++++++++++++++++++-- programs/fileio.h | 1 + programs/zstdcli.c | 6 +++++- tests/playTests.sh | 22 +++++++++++++++++++++- 4 files changed, 46 insertions(+), 4 deletions(-) diff --git a/programs/fileio.c b/programs/fileio.c index 569a410c..82d70075 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -304,6 +304,7 @@ struct FIO_prefs_s { int ldmMinMatch; int ldmBucketSizeLog; int ldmHashRateLog; + size_t streamSrcSize; size_t targetCBlockSize; ZSTD_literalCompressionMode_e literalCompressionMode; @@ -349,6 +350,7 @@ FIO_prefs_t* FIO_createPreferences(void) ret->ldmMinMatch = 0; ret->ldmBucketSizeLog = FIO_LDM_PARAM_NOTSET; ret->ldmHashRateLog = FIO_LDM_PARAM_NOTSET; + ret->streamSrcSize = 0; ret->targetCBlockSize = 0; ret->literalCompressionMode = ZSTD_lcm_auto; return ret; @@ -418,6 +420,10 @@ void FIO_setRsyncable(FIO_prefs_t* const prefs, int rsyncable) { prefs->rsyncable = rsyncable; } +void FIO_setStreamSrcSize(FIO_prefs_t* const prefs, size_t streamSrcSize) { + prefs->streamSrcSize = streamSrcSize; +} + void FIO_setTargetCBlockSize(FIO_prefs_t* const prefs, size_t targetCBlockSize) { prefs->targetCBlockSize = targetCBlockSize; } @@ -698,9 +704,20 @@ static cRess_t FIO_createCResources(FIO_prefs_t* const prefs, CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_rsyncable, prefs->rsyncable) ); #endif /* dictionary */ - CHECK( ZSTD_CCtx_setPledgedSrcSize(ress.cctx, srcSize) ); /* set the value temporarily for dictionary loading, to adapt compression parameters */ + /* set the pledged size for dictionary loading, to adapt compression parameters */ + if (srcSize == ZSTD_CONTENTSIZE_UNKNOWN && prefs->streamSrcSize > 0) { + /* unknown source size; use the declared stream size and disable writing this size to frame during compression */ + CHECK( ZSTD_CCtx_setPledgedSrcSize(ress.cctx, prefs->streamSrcSize) ); + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_contentSizeFlag, 0) ); + } else { + /* use the known source size for adaption */ + CHECK( ZSTD_CCtx_setPledgedSrcSize(ress.cctx, srcSize) ); + } CHECK( ZSTD_CCtx_loadDictionary(ress.cctx, dictBuffer, dictBuffSize) ); - CHECK( ZSTD_CCtx_setPledgedSrcSize(ress.cctx, ZSTD_CONTENTSIZE_UNKNOWN) ); /* reset */ + if (srcSize != ZSTD_CONTENTSIZE_UNKNOWN || prefs->streamSrcSize == 0) { + /* reset pledge when src size is known or stream size is declared */ + CHECK( ZSTD_CCtx_setPledgedSrcSize(ress.cctx, ZSTD_CONTENTSIZE_UNKNOWN) ); + } free(dictBuffer); } diff --git a/programs/fileio.h b/programs/fileio.h index 311f8c0e..13f6f1d0 100644 --- a/programs/fileio.h +++ b/programs/fileio.h @@ -71,6 +71,7 @@ void FIO_setOverlapLog(FIO_prefs_t* const prefs, int overlapLog); void FIO_setRemoveSrcFile(FIO_prefs_t* const prefs, unsigned flag); void FIO_setSparseWrite(FIO_prefs_t* const prefs, unsigned sparse); /**< 0: no sparse; 1: disable on stdout; 2: always enabled */ void FIO_setRsyncable(FIO_prefs_t* const prefs, int rsyncable); +void FIO_setStreamSrcSize(FIO_prefs_t* const prefs, size_t streamSrcSize); void FIO_setTargetCBlockSize(FIO_prefs_t* const prefs, size_t targetCBlockSize); void FIO_setLiteralCompressionMode( FIO_prefs_t* const prefs, diff --git a/programs/zstdcli.c b/programs/zstdcli.c index de286cdf..401e1ee2 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -141,6 +141,7 @@ static int usage_advanced(const char* programName) DISPLAY( "--long[=#]: enable long distance matching with given window log (default: %u)\n", g_defaultMaxWindowLog); DISPLAY( "--fast[=#]: switch to ultra fast compression level (default: %u)\n", 1); DISPLAY( "--adapt : dynamically adapt compression level to I/O conditions \n"); + DISPLAY( "--stream-size=# : optimize compression parameters for streaming input of given number of bytes \n"); DISPLAY( "--target-compressed-block-size=# : make compressed block near targeted size \n"); #ifdef ZSTD_MULTITHREAD DISPLAY( " -T# : spawns # compression threads (default: 1, 0==# cores) \n"); @@ -588,6 +589,7 @@ int main(int argCount, const char* argv[]) const char* suffix = ZSTD_EXTENSION; unsigned maxDictSize = g_defaultMaxDictSize; unsigned dictID = 0; + size_t streamSrcSize = 0; size_t targetCBlockSize = 0; int dictCLevel = g_defaultDictCLevel; unsigned dictSelect = g_defaultSelectivityLevel; @@ -745,6 +747,7 @@ int main(int argCount, const char* argv[]) if (longCommandWArg(&argument, "--maxdict=")) { maxDictSize = readU32FromChar(&argument); continue; } if (longCommandWArg(&argument, "--dictID=")) { dictID = readU32FromChar(&argument); continue; } if (longCommandWArg(&argument, "--zstd=")) { if (!parseCompressionParameters(argument, &compressionParams)) CLEAN_RETURN(badusage(programName)); continue; } + if (longCommandWArg(&argument, "--stream-size=")) { streamSrcSize = readU32FromChar(&argument); continue; } if (longCommandWArg(&argument, "--target-compressed-block-size=")) { targetCBlockSize = readU32FromChar(&argument); continue; } if (longCommandWArg(&argument, "--long")) { unsigned ldmWindowLog = 0; @@ -1150,6 +1153,7 @@ int main(int argCount, const char* argv[]) FIO_setAdaptMin(prefs, adaptMin); FIO_setAdaptMax(prefs, adaptMax); FIO_setRsyncable(prefs, rsyncable); + FIO_setStreamSrcSize(prefs, streamSrcSize); FIO_setTargetCBlockSize(prefs, targetCBlockSize); FIO_setLiteralCompressionMode(prefs, literalCompressionMode); if (adaptMin > cLevel) cLevel = adaptMin; @@ -1160,7 +1164,7 @@ int main(int argCount, const char* argv[]) else operationResult = FIO_compressMultipleFilenames(prefs, filenameTable, filenameIdx, outFileName, suffix, dictFileName, cLevel, compressionParams); #else - (void)suffix; (void)adapt; (void)rsyncable; (void)ultra; (void)cLevel; (void)ldmFlag; (void)literalCompressionMode; (void)targetCBlockSize; /* not used when ZSTD_NOCOMPRESS set */ + (void)suffix; (void)adapt; (void)rsyncable; (void)ultra; (void)cLevel; (void)ldmFlag; (void)literalCompressionMode; (void)streamSrcSize; (void)targetCBlockSize; /* not used when ZSTD_NOCOMPRESS set */ DISPLAY("Compression not supported \n"); #endif } else { /* decompression or test */ diff --git a/tests/playTests.sh b/tests/playTests.sh index 69387321..431a53a1 100755 --- a/tests/playTests.sh +++ b/tests/playTests.sh @@ -108,7 +108,6 @@ else fi - println "\n===> simple tests " ./datagen > tmp @@ -1020,4 +1019,25 @@ test -f dictionary rm -f tmp* dictionary +println "\n===> stream-size mode" + +./datagen -g11000 > tmp +println "test : basic file compression vs sized streaming compression" +$ZSTD -14 -f tmp -o tmp.zst |& tee file.out +cat tmp | $ZSTD -14 -f -o tmp.zst --stream-size=11000 |& tee stream_sized.out + +file_ratio=$(cat file.out | awk '{print $4}' | sed 's/%//g') +stream_sized_ratio=$(cat stream_sized.out | awk '{print $4}' | sed 's/%//g') +rm file.out stream_sized.out + +ratio_diff=$(echo $file_ratio - $stream_sized_ratio | bc) +if [ $(echo "(100 * $ratio_diff) > 5" | bc -l) == 1 ] +then + die "greater than 0.05% difference between file and sized-streaming compression" +fi + +println "test : incorrect stream size" +cat tmp | $ZSTD -14 -f -o tmp.zst --stream-size=11001 && die "should fail with incorrect stream size" + + rm -f tmp* From 85d07c6c474abc8f16b069332fbaba3054819d14 Mon Sep 17 00:00:00 2001 From: Nick Magerko Date: Fri, 16 Aug 2019 12:49:21 -0700 Subject: [PATCH 02/26] Tweak stdout, stderr redirection in new playTests --- tests/playTests.sh | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tests/playTests.sh b/tests/playTests.sh index 431a53a1..c516aa71 100755 --- a/tests/playTests.sh +++ b/tests/playTests.sh @@ -1023,8 +1023,8 @@ println "\n===> stream-size mode" ./datagen -g11000 > tmp println "test : basic file compression vs sized streaming compression" -$ZSTD -14 -f tmp -o tmp.zst |& tee file.out -cat tmp | $ZSTD -14 -f -o tmp.zst --stream-size=11000 |& tee stream_sized.out +$ZSTD -14 -f tmp -o tmp.zst 2>&1 | tee file.out +cat tmp | $ZSTD -14 -f -o tmp.zst --stream-size=11000 2>&1 | tee stream_sized.out file_ratio=$(cat file.out | awk '{print $4}' | sed 's/%//g') stream_sized_ratio=$(cat stream_sized.out | awk '{print $4}' | sed 's/%//g') @@ -1035,7 +1035,10 @@ if [ $(echo "(100 * $ratio_diff) > 5" | bc -l) == 1 ] then die "greater than 0.05% difference between file and sized-streaming compression" fi - +println "test : sized streaming compression and decompression" +cat tmp | $ZSTD -14 -f tmp -o --stream-size=11000 tmp.zst +$ZSTD -df tmp.zst -o tmp_decompress +cmp tmp tmp_decompress || die "difference between original and decompressed file" println "test : incorrect stream size" cat tmp | $ZSTD -14 -f -o tmp.zst --stream-size=11001 && die "should fail with incorrect stream size" From c403b12f9dbab9ba3ab30fca6e1ce3c31d2f8923 Mon Sep 17 00:00:00 2001 From: Nick Magerko Date: Mon, 19 Aug 2019 09:01:31 -0700 Subject: [PATCH 03/26] Set pledged size just before compression --- programs/fileio.c | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/programs/fileio.c b/programs/fileio.c index 82d70075..75b271a8 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -704,21 +704,7 @@ static cRess_t FIO_createCResources(FIO_prefs_t* const prefs, CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_rsyncable, prefs->rsyncable) ); #endif /* dictionary */ - /* set the pledged size for dictionary loading, to adapt compression parameters */ - if (srcSize == ZSTD_CONTENTSIZE_UNKNOWN && prefs->streamSrcSize > 0) { - /* unknown source size; use the declared stream size and disable writing this size to frame during compression */ - CHECK( ZSTD_CCtx_setPledgedSrcSize(ress.cctx, prefs->streamSrcSize) ); - CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_contentSizeFlag, 0) ); - } else { - /* use the known source size for adaption */ - CHECK( ZSTD_CCtx_setPledgedSrcSize(ress.cctx, srcSize) ); - } CHECK( ZSTD_CCtx_loadDictionary(ress.cctx, dictBuffer, dictBuffSize) ); - if (srcSize != ZSTD_CONTENTSIZE_UNKNOWN || prefs->streamSrcSize == 0) { - /* reset pledge when src size is known or stream size is declared */ - CHECK( ZSTD_CCtx_setPledgedSrcSize(ress.cctx, ZSTD_CONTENTSIZE_UNKNOWN) ); - } - free(dictBuffer); } @@ -1020,6 +1006,10 @@ FIO_compressZstdFrame(FIO_prefs_t* const prefs, /* init */ if (fileSize != UTIL_FILESIZE_UNKNOWN) { CHECK(ZSTD_CCtx_setPledgedSrcSize(ress.cctx, fileSize)); + } else if (prefs->streamSrcSize > 0) { + /* unknown source size; use the declared stream size and disable writing this size to frame during compression */ + CHECK( ZSTD_CCtx_setPledgedSrcSize(ress.cctx, prefs->streamSrcSize) ); + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_contentSizeFlag, 0) ); } (void)srcFileName; From bbd83c2ab34dc498a63ff10e869e26fbe1e221a4 Mon Sep 17 00:00:00 2001 From: Nick Magerko Date: Mon, 19 Aug 2019 09:11:22 -0700 Subject: [PATCH 04/26] Update man page --- programs/zstd.1.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/programs/zstd.1.md b/programs/zstd.1.md index 3ab2667a..e3f72928 100644 --- a/programs/zstd.1.md +++ b/programs/zstd.1.md @@ -152,6 +152,12 @@ the last one takes effect. This feature does not work with `--single-thread`. You probably don't want to use it with long range mode, since it will decrease the effectiveness of the synchronization points, but your milage may vary. +* `--stream-size` : + When handling input from a stream, `zstd` must guess how large the source size + will be when optimizing compression parameters. If the stream size is relatively + small, this guess may be a poor one, resulting in a higher compression ratio than + expected. This feature will set the source size of a stream. Note that it must + be exact; incorrect stream sizes will cause an error. * `-D file`: use `file` as Dictionary to compress or decompress FILE(s) * `--no-dictID`: From f781cf672bfd23dd467a6dcf08ba094180032c76 Mon Sep 17 00:00:00 2001 From: Nick Magerko Date: Mon, 19 Aug 2019 11:07:43 -0700 Subject: [PATCH 05/26] Remove extraneous parameter --- programs/fileio.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/programs/fileio.c b/programs/fileio.c index 75b271a8..f5ecf729 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -639,7 +639,6 @@ typedef struct { static cRess_t FIO_createCResources(FIO_prefs_t* const prefs, const char* dictFileName, int cLevel, - U64 srcSize, ZSTD_compressionParameters comprParams) { cRess_t ress; memset(&ress, 0, sizeof(ress)); @@ -1371,7 +1370,7 @@ int FIO_compressFilename(FIO_prefs_t* const prefs, U64 const fileSize = UTIL_getFileSize(srcFileName); U64 const srcSize = (fileSize == UTIL_FILESIZE_UNKNOWN) ? ZSTD_CONTENTSIZE_UNKNOWN : fileSize; - cRess_t const ress = FIO_createCResources(prefs, dictFileName, compressionLevel, srcSize, comprParams); + cRess_t const ress = FIO_createCResources(prefs, dictFileName, compressionLevel, comprParams); int const result = FIO_compressFilename_srcFile(prefs, ress, dstFileName, srcFileName, compressionLevel); @@ -1424,8 +1423,7 @@ int FIO_compressMultipleFilenames(FIO_prefs_t* const prefs, int error = 0; U64 const firstFileSize = UTIL_getFileSize(inFileNamesTable[0]); U64 const firstSrcSize = (firstFileSize == UTIL_FILESIZE_UNKNOWN) ? ZSTD_CONTENTSIZE_UNKNOWN : firstFileSize; - U64 const srcSize = (nbFiles != 1) ? ZSTD_CONTENTSIZE_UNKNOWN : firstSrcSize ; - cRess_t ress = FIO_createCResources(prefs, dictFileName, compressionLevel, srcSize, comprParams); + cRess_t ress = FIO_createCResources(prefs, dictFileName, compressionLevel, comprParams); /* init */ assert(outFileName != NULL || suffix != NULL); From a24dc3a935a43ef8088711afadece9b94ae59a27 Mon Sep 17 00:00:00 2001 From: Nick Magerko Date: Mon, 19 Aug 2019 11:14:56 -0700 Subject: [PATCH 06/26] Remove extraneous variables --- programs/fileio.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/programs/fileio.c b/programs/fileio.c index f5ecf729..5492f944 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -1367,9 +1367,6 @@ int FIO_compressFilename(FIO_prefs_t* const prefs, const char* dictFileName, int compressionLevel, ZSTD_compressionParameters comprParams) { - U64 const fileSize = UTIL_getFileSize(srcFileName); - U64 const srcSize = (fileSize == UTIL_FILESIZE_UNKNOWN) ? ZSTD_CONTENTSIZE_UNKNOWN : fileSize; - cRess_t const ress = FIO_createCResources(prefs, dictFileName, compressionLevel, comprParams); int const result = FIO_compressFilename_srcFile(prefs, ress, dstFileName, srcFileName, compressionLevel); @@ -1421,8 +1418,6 @@ int FIO_compressMultipleFilenames(FIO_prefs_t* const prefs, ZSTD_compressionParameters comprParams) { int error = 0; - U64 const firstFileSize = UTIL_getFileSize(inFileNamesTable[0]); - U64 const firstSrcSize = (firstFileSize == UTIL_FILESIZE_UNKNOWN) ? ZSTD_CONTENTSIZE_UNKNOWN : firstFileSize; cRess_t ress = FIO_createCResources(prefs, dictFileName, compressionLevel, comprParams); /* init */ From 30bfa228e84500855e80e792f7de796257d99ab3 Mon Sep 17 00:00:00 2001 From: Nick Magerko Date: Mon, 19 Aug 2019 11:20:28 -0700 Subject: [PATCH 07/26] Keep content size flag set in stream size mode --- programs/fileio.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/programs/fileio.c b/programs/fileio.c index 5492f944..873013a5 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -1006,9 +1006,8 @@ FIO_compressZstdFrame(FIO_prefs_t* const prefs, if (fileSize != UTIL_FILESIZE_UNKNOWN) { CHECK(ZSTD_CCtx_setPledgedSrcSize(ress.cctx, fileSize)); } else if (prefs->streamSrcSize > 0) { - /* unknown source size; use the declared stream size and disable writing this size to frame during compression */ + /* unknown source size; use the declared stream size */ CHECK( ZSTD_CCtx_setPledgedSrcSize(ress.cctx, prefs->streamSrcSize) ); - CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_contentSizeFlag, 0) ); } (void)srcFileName; From dffbac5f89dc83106ad5c8a18202a83ff0f37f3b Mon Sep 17 00:00:00 2001 From: Nick Magerko Date: Mon, 19 Aug 2019 08:52:08 -0700 Subject: [PATCH 08/26] Add --size-hint=# option --- doc/zstd_manual.html | 162 +++++++++++++------------- lib/compress/zstd_compress.c | 21 +++- lib/compress/zstd_compress_internal.h | 3 + lib/zstd.h | 8 ++ programs/fileio.c | 8 ++ programs/fileio.h | 1 + programs/zstdcli.c | 6 +- tests/playTests.sh | 28 +++++ 8 files changed, 155 insertions(+), 82 deletions(-) diff --git a/doc/zstd_manual.html b/doc/zstd_manual.html index 26b204e1..806920a5 100644 --- a/doc/zstd_manual.html +++ b/doc/zstd_manual.html @@ -76,7 +76,7 @@

Compresses `src` content as a single zstd compressed frame into already allocated `dst`. Hint : compression runs faster if `dstCapacity` >= `ZSTD_compressBound(srcSize)`. @return : compressed size written into `dst` (<= `dstCapacity), - or an error code if it fails (which can be tested using ZSTD_isError()). + or an error code if it fails (which can be tested using ZSTD_isError()).


size_t ZSTD_decompress( void* dst, size_t dstCapacity,
@@ -85,7 +85,7 @@
   `dstCapacity` is an upper bound of originalSize to regenerate.
   If user cannot imply a maximum upper bound, it's better to use streaming mode to decompress data.
   @return : the number of bytes decompressed into `dst` (<= `dstCapacity`),
-            or an errorCode if it fails (which can be tested using ZSTD_isError()). 
+            or an errorCode if it fails (which can be tested using ZSTD_isError()).
 


#define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1)
@@ -112,7 +112,7 @@ unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize);
    note 5 : If source is untrusted, decompressed size could be wrong or intentionally modified.
             Always ensure return value fits within application's authorized limits.
             Each application can set its own limits.
-   note 6 : This function replaces ZSTD_getDecompressedSize() 
+   note 6 : This function replaces ZSTD_getDecompressedSize()
 


unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize);
@@ -120,7 +120,7 @@ unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize);
   Both functions work the same way, but ZSTD_getDecompressedSize() blends
   "empty", "unknown" and "error" results to the same return value (0),
   while ZSTD_getFrameContentSize() gives them separate return values.
- @return : decompressed size of `src` frame content _if known and not empty_, 0 otherwise. 
+ @return : decompressed size of `src` frame content _if known and not empty_, 0 otherwise.
 


size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize);
@@ -128,7 +128,7 @@ unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize);
  `srcSize` must be >= first frame size
  @return : the compressed size of the first frame starting at `src`,
            suitable to pass as `srcSize` to `ZSTD_decompress` or similar,
-        or an error code if input is invalid 
+        or an error code if input is invalid
 


Helper functions

#define ZSTD_COMPRESSBOUND(srcSize)   ((srcSize) + ((srcSize)>>8) + (((srcSize) < (128<<10)) ? (((128<<10) - (srcSize)) >> 11) /* margin, from 64 to 0 */ : 0))  /* this formula ensures that bound(A) + bound(B) <= bound(A+B) as long as A and B >= 128 KB */
@@ -148,7 +148,7 @@ int         ZSTD_maxCLevel(void);               /*!< maximum compression lev
          It doesn't change the compression ratio, which remains identical.
   Note 2 : In multi-threaded environments,
          use one different context per thread for parallel execution.
- 
+
 
typedef struct ZSTD_CCtx_s ZSTD_CCtx;
 ZSTD_CCtx* ZSTD_createCCtx(void);
 size_t     ZSTD_freeCCtx(ZSTD_CCtx* cctx);
@@ -159,14 +159,14 @@ size_t     ZSTD_freeCCtx(ZSTD_CCtx* cctx);
                          int compressionLevel);
 

Same as ZSTD_compress(), using an explicit ZSTD_CCtx The function will compress at requested compression level, - ignoring any other parameter + ignoring any other parameter


Decompression context

  When decompressing many times,
   it is recommended to allocate a context only once,
   and re-use it for each successive compression operation.
   This will make workload friendlier for system's memory.
-  Use one context per thread for parallel execution. 
+  Use one context per thread for parallel execution.
 
typedef struct ZSTD_DCtx_s ZSTD_DCtx;
 ZSTD_DCtx* ZSTD_createDCtx(void);
 size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);
@@ -177,7 +177,7 @@ size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);
 

Same as ZSTD_decompress(), requires an allocated ZSTD_DCtx. Compatible with sticky parameters. - +


Advanced compression API


@@ -324,6 +324,7 @@ size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);
      * ZSTD_c_forceAttachDict
      * ZSTD_c_literalCompressionMode
      * ZSTD_c_targetCBlockSize
+     * ZSTD_c_srcSizeHint
      * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
      * note : never ever use experimentalParam? names directly;
      *        also, the enums values themselves are unstable and can still change.
@@ -334,6 +335,7 @@ size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);
      ZSTD_c_experimentalParam4=1001,
      ZSTD_c_experimentalParam5=1002,
      ZSTD_c_experimentalParam6=1003,
+     ZSTD_c_experimentalParam7=1004,
 } ZSTD_cParameter;
 

typedef struct {
@@ -348,7 +350,7 @@ size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);
  @return : a structure, ZSTD_bounds, which contains
          - an error status field, which must be tested using ZSTD_isError()
          - lower and upper bounds, both inclusive
- 
+
 


size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value);
@@ -361,7 +363,7 @@ size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);
               => compressionLevel, hashLog, chainLog, searchLog, minMatch, targetLength and strategy.
               new parameters will be active for next job only (after a flush()).
  @return : an error code (which can be tested using ZSTD_isError()).
- 
+
 


size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize);
@@ -378,7 +380,7 @@ size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);
            for example with ZSTD_compress2(),
            or invoking immediately ZSTD_compressStream2(,,,ZSTD_e_end),
            this value is automatically overridden by srcSize instead.
- 
+
 


typedef enum {
@@ -400,7 +402,7 @@ size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);
                   Parameters can only be changed between 2 sessions (i.e. no compression is currently ongoing)
                   otherwise the reset fails, and function returns an error value (which can be tested using ZSTD_isError())
   - Both : similar to resetting the session, followed by resetting parameters.
- 
+
 


size_t ZSTD_compress2( ZSTD_CCtx* cctx,
@@ -414,7 +416,7 @@ size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);
   Hint : compression runs faster if `dstCapacity` >=  `ZSTD_compressBound(srcSize)`.
  @return : compressed size written into `dst` (<= `dstCapacity),
            or an error code if it fails (which can be tested using ZSTD_isError()).
- 
+
 


Advanced decompression API


@@ -445,7 +447,7 @@ size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);
  @return : a structure, ZSTD_bounds, which contains
          - an error status field, which must be tested using ZSTD_isError()
          - both lower and upper bounds, inclusive
- 
+
 


size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int value);
@@ -454,7 +456,7 @@ size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);
   Providing a value beyond bound will either clamp it, or trigger an error (depending on parameter).
   Setting a parameter is only possible during frame initialization (before starting decompression).
  @return : 0, or an error code (which can be tested using ZSTD_isError()).
- 
+
 


size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset);
@@ -462,7 +464,7 @@ size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);
   Session and parameters can be reset jointly or separately.
   Parameters can only be reset when no active frame is being decompressed.
  @return : 0, or an error code, which can be tested with ZSTD_isError()
- 
+
 


Streaming


@@ -536,7 +538,7 @@ size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);
             >0 if some data still present within internal buffer (the value is minimal estimation of remaining size),
             or an error code, which can be tested using ZSTD_isError().
 
- 
+
 
typedef ZSTD_CCtx ZSTD_CStream;  /**< CCtx and CStream are now effectively same object (>= v1.3.0) */
@@ -580,7 +582,7 @@ size_t ZSTD_freeCStream(ZSTD_CStream* zcs);
             only ZSTD_e_end or ZSTD_e_flush operations are allowed.
             Before starting a new compression job, or changing compression parameters,
             it is required to fully flush internal buffers.
- 
+
 


size_t ZSTD_CStreamInSize(void);    /**< recommended size for input buffer */
@@ -603,7 +605,7 @@ size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
      ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
      ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any)
      ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel);
- 
+
 


Streaming decompression - HowTo

@@ -629,7 +631,7 @@ size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
         or any other value > 0, which means there is still some decoding or flushing to do to complete current frame :
                                 the return value is a suggested next input size (just a hint for better latency)
                                 that will never request more than the remaining frame size.
- 
+
 
typedef ZSTD_DCtx ZSTD_DStream;  /**< DCtx and DStream are now effectively same object (>= v1.3.0) */
@@ -654,7 +656,7 @@ size_t ZSTD_freeDStream(ZSTD_DStream* zds);
   or a buffer with specified information (see dictBuilder/zdict.h).
   Note : This function loads the dictionary, resulting in significant startup delay.
          It's intended for a dictionary used only once.
-  Note 2 : When `dict == NULL || dictSize < 8` no dictionary is used. 
+  Note 2 : When `dict == NULL || dictSize < 8` no dictionary is used.
 


size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx,
@@ -665,7 +667,7 @@ size_t ZSTD_freeDStream(ZSTD_DStream* zds);
   Dictionary must be identical to the one used during compression.
   Note : This function loads the dictionary, resulting in significant startup delay.
          It's intended for a dictionary used only once.
-  Note : When `dict == NULL || dictSize < 8` no dictionary is used. 
+  Note : When `dict == NULL || dictSize < 8` no dictionary is used.
 


Bulk processing dictionary API


@@ -677,11 +679,11 @@ size_t ZSTD_freeDStream(ZSTD_DStream* zds);
   ZSTD_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only.
  `dictBuffer` can be released after ZSTD_CDict creation, because its content is copied within CDict.
   Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate `dictBuffer` content.
-  Note : A ZSTD_CDict can be created from an empty dictBuffer, but it is inefficient when used to compress small data. 
+  Note : A ZSTD_CDict can be created from an empty dictBuffer, but it is inefficient when used to compress small data.
 


size_t      ZSTD_freeCDict(ZSTD_CDict* CDict);
-

Function frees memory allocated by ZSTD_createCDict(). +

Function frees memory allocated by ZSTD_createCDict().


size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx,
@@ -691,16 +693,16 @@ size_t ZSTD_freeDStream(ZSTD_DStream* zds);
 

Compression using a digested Dictionary. Recommended when same dictionary is used multiple times. Note : compression level is _decided at dictionary creation time_, - and frame parameters are hardcoded (dictID=yes, contentSize=yes, checksum=no) + and frame parameters are hardcoded (dictID=yes, contentSize=yes, checksum=no)


ZSTD_DDict* ZSTD_createDDict(const void* dictBuffer, size_t dictSize);
 

Create a digested dictionary, ready to start decompression operation without startup delay. - dictBuffer can be released after DDict creation, as its content is copied inside DDict. + dictBuffer can be released after DDict creation, as its content is copied inside DDict.


size_t      ZSTD_freeDDict(ZSTD_DDict* ddict);
-

Function frees memory allocated with ZSTD_createDDict() +

Function frees memory allocated with ZSTD_createDDict()


size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx,
@@ -708,7 +710,7 @@ size_t ZSTD_freeDStream(ZSTD_DStream* zds);
                             const void* src, size_t srcSize,
                             const ZSTD_DDict* ddict);
 

Decompression using a digested Dictionary. - Recommended when same dictionary is used multiple times. + Recommended when same dictionary is used multiple times.


Dictionary helper functions


@@ -716,13 +718,13 @@ size_t ZSTD_freeDStream(ZSTD_DStream* zds);
 
unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize);
 

Provides the dictID stored within dictionary. if @return == 0, the dictionary is not conformant with Zstandard specification. - It can still be loaded, but as a content-only dictionary. + It can still be loaded, but as a content-only dictionary.


unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict);
 

Provides the dictID of the dictionary loaded into `ddict`. If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. - Non-conformant dictionaries can still be loaded, but as content-only dictionaries. + Non-conformant dictionaries can still be loaded, but as content-only dictionaries.


unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize);
@@ -734,7 +736,7 @@ size_t ZSTD_freeDStream(ZSTD_DStream* zds);
     Note : this use case also happens when using a non-conformant dictionary.
   - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`).
   - This is not a Zstandard frame.
-  When identifying the exact failure cause, it's possible to use ZSTD_getFrameHeader(), which will provide a more precise error code. 
+  When identifying the exact failure cause, it's possible to use ZSTD_getFrameHeader(), which will provide a more precise error code.
 


Advanced dictionary and prefix API

@@ -760,7 +762,7 @@ size_t ZSTD_freeDStream(ZSTD_DStream* zds);
            Use experimental ZSTD_CCtx_loadDictionary_byReference() to reference content instead.
            In such a case, dictionary buffer must outlive its users.
   Note 4 : Use ZSTD_CCtx_loadDictionary_advanced()
-           to precisely select how dictionary content must be interpreted. 
+           to precisely select how dictionary content must be interpreted.
 


size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict);
@@ -774,7 +776,7 @@ size_t ZSTD_freeDStream(ZSTD_DStream* zds);
   Special : Referencing a NULL CDict means "return to no-dictionary mode".
   Note 1 : Currently, only one dictionary can be managed.
            Referencing a new dictionary effectively "discards" any previous one.
-  Note 2 : CDict is just referenced, its lifetime must outlive its usage within CCtx. 
+  Note 2 : CDict is just referenced, its lifetime must outlive its usage within CCtx.
 


size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx,
@@ -795,7 +797,7 @@ size_t ZSTD_freeDStream(ZSTD_DStream* zds);
            It's a CPU consuming operation, with non-negligible impact on latency.
            If there is a need to use the same prefix multiple times, consider loadDictionary instead.
   Note 4 : By default, the prefix is interpreted as raw content (ZSTD_dm_rawContent).
-           Use experimental ZSTD_CCtx_refPrefix_advanced() to alter dictionary interpretation. 
+           Use experimental ZSTD_CCtx_refPrefix_advanced() to alter dictionary interpretation.
 


size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);
@@ -812,7 +814,7 @@ size_t ZSTD_freeDStream(ZSTD_DStream* zds);
            Use ZSTD_DCtx_loadDictionary_byReference() to reference dictionary content instead.
   Note 3 : Use ZSTD_DCtx_loadDictionary_advanced() to take control of
            how dictionary content is loaded and interpreted.
- 
+
 


size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict);
@@ -823,7 +825,7 @@ size_t ZSTD_freeDStream(ZSTD_DStream* zds);
            Referencing a new dictionary effectively "discards" any previous one.
   Special: referencing a NULL DDict means "return to no-dictionary mode".
   Note 2 : DDict is just referenced, its lifetime must outlive its usage from DCtx.
- 
+
 


size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx,
@@ -842,7 +844,7 @@ size_t ZSTD_freeDStream(ZSTD_DStream* zds);
            Use ZSTD_CCtx_refPrefix_advanced() to alter dictMode (Experimental section)
   Note 4 : Referencing a raw content prefix has almost no cpu nor memory cost.
            A full dictionary is more costly, as it requires building tables.
- 
+
 


size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx);
@@ -852,7 +854,7 @@ size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds);
 size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict);
 size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
 

These functions give the _current_ memory usage of selected object. - Note that object memory usage can evolve (increase or decrease) over time. + Note that object memory usage can evolve (increase or decrease) over time.


experimental API (static linking only)

@@ -861,7 +863,7 @@ size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
  They can still change in future versions.
  Some of them are planned to remain in the static_only section indefinitely.
  Some of them might be removed in the future (especially when redundant with existing stable functions)
- 
+
 
typedef struct {
@@ -975,7 +977,7 @@ size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
             Each application can set its own limits.
    note 5 : ZSTD_findDecompressedSize handles multiple frames, and so it must traverse the input to
             read each contained frame header.  This is fast as most of the data is skipped,
-            however it does mean that all frame data must be present and valid. 
+            however it does mean that all frame data must be present and valid.
 


unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize);
@@ -990,13 +992,13 @@ size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
             in this case, `ZSTD_findDecompressedSize` and `ZSTD_decompressBound` return the same value.
   note 3  : when the decompressed size field isn't available, the upper-bound for that frame is calculated by:
               upper-bound = # blocks * min(128 KB, Window_Size)
- 
+
 


size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize);
 

srcSize must be >= ZSTD_FRAMEHEADERSIZE_PREFIX. @return : size of the Frame Header, - or an error code (if srcSize is too small) + or an error code (if srcSize is too small)


Memory management


@@ -1012,7 +1014,7 @@ size_t ZSTD_estimateDCtxSize(void);
   If srcSize is known to always be small, ZSTD_estimateCCtxSize_usingCParams() can provide a tighter estimation.
   ZSTD_estimateCCtxSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel.
   ZSTD_estimateCCtxSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParams_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_c_nbWorkers is >= 1.
-  Note : CCtx size estimation is only correct for single-threaded compression. 
+  Note : CCtx size estimation is only correct for single-threaded compression.
 


size_t ZSTD_estimateCStreamSize(int compressionLevel);
@@ -1031,7 +1033,7 @@ size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize);
   or deducted from a valid frame Header, using ZSTD_estimateDStreamSize_fromFrame();
   Note : if streaming is init with function ZSTD_init?Stream_usingDict(),
          an internal ?Dict will be created, which additional size is not estimated here.
-         In this case, get total size by adding ZSTD_estimate?DictSize 
+         In this case, get total size by adding ZSTD_estimate?DictSize
 


size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel);
@@ -1040,7 +1042,7 @@ size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMet
 

ZSTD_estimateCDictSize() will bet that src size is relatively "small", and content is copied, like ZSTD_createCDict(). ZSTD_estimateCDictSize_advanced() makes it possible to control compression parameters precisely, like ZSTD_createCDict_advanced(). Note : dictionaries created by reference (`ZSTD_dlm_byRef`) are logically smaller. - +


ZSTD_CCtx*    ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize);
@@ -1064,7 +1066,7 @@ ZSTD_CStream* ZSTD_initStaticCStream(void* workspace, size_t workspaceSize);
                  ZSTD_CCtx_loadDictionary(), ZSTD_initCStream_usingDict() or ZSTD_initDStream_usingDict().
   Limitation 2 : static cctx currently not compatible with multi-threading.
   Limitation 3 : static dctx is incompatible with legacy support.
- 
+
 


ZSTD_DStream* ZSTD_initStaticDStream(void* workspace, size_t workspaceSize);    /**< same as ZSTD_initStaticDCtx() */
@@ -1076,7 +1078,7 @@ static ZSTD_customMem const ZSTD_defaultCMem = { NULL, NULL, NULL };  /**< t
 

These prototypes make it possible to pass your own allocation/free functions. ZSTD_customMem is provided at creation time, using ZSTD_create*_advanced() variants listed below. All allocation/free operations will be completed using these custom variants instead of regular ones. - +


Advanced compression functions


@@ -1085,22 +1087,22 @@ static ZSTD_customMem const ZSTD_defaultCMem = { NULL, NULL, NULL };  
/**< t

Create a digested dictionary for compression Dictionary content is just referenced, not duplicated. As a consequence, `dictBuffer` **must** outlive CDict, - and its content must remain unmodified throughout the lifetime of CDict. + and its content must remain unmodified throughout the lifetime of CDict.


ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize);
 

@return ZSTD_compressionParameters structure for a selected compression level and estimated srcSize. - `estimatedSrcSize` value is optional, select 0 if not known + `estimatedSrcSize` value is optional, select 0 if not known


ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize);
 

same as ZSTD_getCParams(), but @return a full `ZSTD_parameters` object instead of sub-component `ZSTD_compressionParameters`. - All fields of `ZSTD_frameParameters` are set to default : contentSize=1, checksum=0, noDictID=0 + All fields of `ZSTD_frameParameters` are set to default : contentSize=1, checksum=0, noDictID=0


size_t ZSTD_checkCParams(ZSTD_compressionParameters params);
 

Ensure param values remain within authorized range. - @return 0 on success, or an error code (can be checked with ZSTD_isError()) + @return 0 on success, or an error code (can be checked with ZSTD_isError())


ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize);
@@ -1108,7 +1110,7 @@ static ZSTD_customMem const ZSTD_defaultCMem = { NULL, NULL, NULL };  /**< t
  `srcSize` can be unknown, in which case use ZSTD_CONTENTSIZE_UNKNOWN.
  `dictSize` must be `0` when there is no dictionary.
   cPar can be invalid : all parameters will be clamped within valid range in the @return struct.
-  This function never fails (wide contract) 
+  This function never fails (wide contract)
 


size_t ZSTD_compress_advanced(ZSTD_CCtx* cctx,
@@ -1116,7 +1118,7 @@ static ZSTD_customMem const ZSTD_defaultCMem = { NULL, NULL, NULL };  /**< t
                         const void* src, size_t srcSize,
                         const void* dict,size_t dictSize,
                               ZSTD_parameters params);
-

Same as ZSTD_compress_usingDict(), with fine-tune control over compression parameters (by structure) +

Same as ZSTD_compress_usingDict(), with fine-tune control over compression parameters (by structure)


size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx,
@@ -1124,30 +1126,30 @@ static ZSTD_customMem const ZSTD_defaultCMem = { NULL, NULL, NULL };  /**< t
                             const void* src, size_t srcSize,
                             const ZSTD_CDict* cdict,
                                   ZSTD_frameParameters fParams);
-

Same as ZSTD_compress_usingCDict(), with fine-tune control over frame parameters +

Same as ZSTD_compress_usingCDict(), with fine-tune control over frame parameters


size_t ZSTD_CCtx_loadDictionary_byReference(ZSTD_CCtx* cctx, const void* dict, size_t dictSize);
 

Same as ZSTD_CCtx_loadDictionary(), but dictionary content is referenced, instead of being copied into CCtx. - It saves some memory, but also requires that `dict` outlives its usage within `cctx` + It saves some memory, but also requires that `dict` outlives its usage within `cctx`


size_t ZSTD_CCtx_loadDictionary_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType);
 

Same as ZSTD_CCtx_loadDictionary(), but gives finer control over how to load the dictionary (by copy ? by reference ?) - and how to interpret it (automatic ? force raw mode ? full mode only ?) + and how to interpret it (automatic ? force raw mode ? full mode only ?)


size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType);
 

Same as ZSTD_CCtx_refPrefix(), but gives finer control over - how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?) + how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?)


size_t ZSTD_CCtx_getParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int* value);
 

Get the requested compression parameter value, selected by enum ZSTD_cParameter, and store it into int* value. @return : 0, or an error code (which can be tested with ZSTD_isError()). - +


ZSTD_CCtx_params* ZSTD_createCCtxParams(void);
@@ -1167,24 +1169,24 @@ size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params);
 
   This can be used with ZSTD_estimateCCtxSize_advanced_usingCCtxParams()
   for static allocation of CCtx for single-threaded compression.
- 
+
 


size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params);
 

Reset params to default values. - +


size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel);
 

Initializes the compression parameters of cctxParams according to compression level. All other parameters are reset to their default values. - +


size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params);
 

Initializes the compression and frame parameters of cctxParams according to params. All other parameters are reset to their default values. - +


size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value);
@@ -1192,14 +1194,14 @@ size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params);
   Set one compression parameter, selected by enum ZSTD_cParameter.
   Parameters must be applied to a ZSTD_CCtx using ZSTD_CCtx_setParametersUsingCCtxParams().
  @result : 0, or an error code (which can be tested with ZSTD_isError()).
- 
+
 


size_t ZSTD_CCtxParams_getParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int* value);
 

Similar to ZSTD_CCtx_getParameter. Get the requested value of one compression parameter, selected by enum ZSTD_cParameter. @result : 0, or an error code (which can be tested with ZSTD_isError()). - +


size_t ZSTD_CCtx_setParametersUsingCCtxParams(
@@ -1209,7 +1211,7 @@ size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params);
     if nbWorkers==0, this will have no impact until a new compression is started.
     if nbWorkers>=1, new parameters will be picked up at next job,
        with a few restrictions (windowLog, pledgedSrcSize, nbWorkers, jobSize, and overlapLog are not updated).
- 
+
 


size_t ZSTD_compressStream2_simpleArgs (
@@ -1221,7 +1223,7 @@ size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params);
   but using only integral types as arguments.
   This variant might be helpful for binders from dynamic languages
   which have troubles handling structures containing memory pointers.
- 
+
 


Advanced decompression functions


@@ -1230,33 +1232,33 @@ size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params);
 

Tells if the content of `buffer` starts with a valid Frame Identifier. Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0. Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled. - Note 3 : Skippable Frame Identifiers are considered valid. + Note 3 : Skippable Frame Identifiers are considered valid.


ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize);
 

Create a digested dictionary, ready to start decompression operation without startup delay. Dictionary content is referenced, and therefore stays in dictBuffer. It is important that dictBuffer outlives DDict, - it must remain read accessible throughout the lifetime of DDict + it must remain read accessible throughout the lifetime of DDict


size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);
 

Same as ZSTD_DCtx_loadDictionary(), but references `dict` content instead of copying it into `dctx`. This saves memory if `dict` remains around., - However, it's imperative that `dict` remains accessible (and unmodified) while being used, so it must outlive decompression. + However, it's imperative that `dict` remains accessible (and unmodified) while being used, so it must outlive decompression.


size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType);
 

Same as ZSTD_DCtx_loadDictionary(), but gives direct control over how to load the dictionary (by copy ? by reference ?) - and how to interpret it (automatic ? force raw mode ? full mode only ?). + and how to interpret it (automatic ? force raw mode ? full mode only ?).


size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType);
 

Same as ZSTD_DCtx_refPrefix(), but gives finer control over - how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?) + how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?)


size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize);
@@ -1265,14 +1267,14 @@ size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params);
   This parameter is only useful in streaming mode, since no internal buffer is allocated in single-pass mode.
   By default, a decompression context accepts all window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT)
  @return : 0, or an error code (which can be tested using ZSTD_isError()).
- 
+
 


size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format);
 

Instruct the decoder context about what kind of data to decode next. This instruction is mandatory to decode data without a fully-formed header, such ZSTD_f_zstd1_magicless for example. - @return : 0, or an error code (which can be tested using ZSTD_isError()). + @return : 0, or an error code (which can be tested using ZSTD_isError()).


size_t ZSTD_decompressStream_simpleArgs (
@@ -1283,7 +1285,7 @@ size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params);
   but using only integral types as arguments.
   This can be helpful for binders from dynamic languages
   which have troubles handling structures containing memory pointers.
- 
+
 


Advanced streaming functions

  Warning : most of these functions are now redundant with the Advanced API.
@@ -1361,7 +1363,7 @@ size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, const ZSTD_CDict*
   For the time being, pledgedSrcSize==0 is interpreted as "srcSize unknown" for compatibility with older programs,
   but it will change to mean "empty" in future version, so use macro ZSTD_CONTENTSIZE_UNKNOWN instead.
  @return : 0, or an error code (which can be tested using ZSTD_isError())
- 
+
 


typedef struct {
@@ -1385,7 +1387,7 @@ size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, const ZSTD_CDict*
     but everything it has produced has also been flushed so far,
     therefore flush speed is limited by production speed of oldest job
     irrespective of the speed of concurrent (and newer) jobs.
- 
+
 


Advanced Streaming decompression functions

/**
@@ -1419,7 +1421,7 @@ size_t ZSTD_resetDStream(ZSTD_DStream* zds);
   This is an advanced API, giving full control over buffer management, for users which need direct control over memory.
   But it's also a complex one, with several restrictions, documented below.
   Prefer normal streaming API for an easier experience.
- 
+
 

Buffer-less streaming compression (synchronous mode)

@@ -1517,7 +1519,7 @@ size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned lo
   Note : it's possible to know if next input to present is a header or a block, using ZSTD_nextInputType().
   This information is not required to properly decode a frame.
 
-  == Special case : skippable frames 
+  == Special case : skippable frames
 
   Skippable frames allow integration of user-defined data into a flow of concatenated frames.
   Skippable frames will be ignored (skipped) by decompressor.
@@ -1549,7 +1551,7 @@ size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long
 

decode Frame Header, or requires larger `srcSize`. @return : 0, `zfhPtr` is correctly filled, >0, `srcSize` is too small, value is wanted `srcSize` amount, - or an error code, which can be tested using ZSTD_isError() + or an error code, which can be tested using ZSTD_isError()


typedef enum { ZSTDnit_frameHeader, ZSTDnit_blockHeader, ZSTDnit_block, ZSTDnit_lastBlock, ZSTDnit_checksum, ZSTDnit_skippableFrame } ZSTD_nextInputType_e;
diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c
index cd73db13..5589c323 100644
--- a/lib/compress/zstd_compress.c
+++ b/lib/compress/zstd_compress.c
@@ -392,6 +392,11 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param)
         bounds.upperBound = ZSTD_TARGETCBLOCKSIZE_MAX;
         return bounds;
 
+    case ZSTD_c_srcSizeHint:
+        bounds.lowerBound = 0;
+        bounds.upperBound = ZSTD_SRCSIZEHINT_MAX;
+        return bounds;
+
     default:
         {   ZSTD_bounds const boundError = { ERROR(parameter_unsupported), 0, 0 };
             return boundError;
@@ -448,6 +453,7 @@ static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param)
     case ZSTD_c_forceAttachDict:
     case ZSTD_c_literalCompressionMode:
     case ZSTD_c_targetCBlockSize:
+    case ZSTD_c_srcSizeHint:
     default:
         return 0;
     }
@@ -494,6 +500,7 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value)
     case ZSTD_c_ldmMinMatch:
     case ZSTD_c_ldmBucketSizeLog:
     case ZSTD_c_targetCBlockSize:
+    case ZSTD_c_srcSizeHint:
         break;
 
     default: RETURN_ERROR(parameter_unsupported);
@@ -674,6 +681,12 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
         CCtxParams->targetCBlockSize = value;
         return CCtxParams->targetCBlockSize;
 
+    case ZSTD_c_srcSizeHint :
+        if (value!=0)    /* 0 ==> default */
+            BOUNDCHECK(ZSTD_c_srcSizeHint, value);
+        CCtxParams->srcSizeHint = value;
+        return CCtxParams->srcSizeHint;
+
     default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
     }
 }
@@ -779,6 +792,8 @@ size_t ZSTD_CCtxParams_getParameter(
     case ZSTD_c_targetCBlockSize :
         *value = (int)CCtxParams->targetCBlockSize;
         break;
+    case ZSTD_c_srcSizeHint :
+        *value = (int)CCtxParams->srcSizeHint;
     default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
     }
     return 0;
@@ -1029,7 +1044,11 @@ ZSTD_adjustCParams(ZSTD_compressionParameters cPar,
 ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
         const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize)
 {
-    ZSTD_compressionParameters cParams = ZSTD_getCParams(CCtxParams->compressionLevel, srcSizeHint, dictSize);
+    ZSTD_compressionParameters cParams;
+    if (srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN && CCtxParams->srcSizeHint > 0) {
+      srcSizeHint = CCtxParams->srcSizeHint;
+    }
+    cParams = ZSTD_getCParams(CCtxParams->compressionLevel, srcSizeHint, dictSize);
     if (CCtxParams->ldmParams.enableLdm) cParams.windowLog = ZSTD_LDM_DEFAULT_WINDOW_LOG;
     if (CCtxParams->cParams.windowLog) cParams.windowLog = CCtxParams->cParams.windowLog;
     if (CCtxParams->cParams.hashLog) cParams.hashLog = CCtxParams->cParams.hashLog;
diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h
index 6d623cc6..0e4ec6b7 100644
--- a/lib/compress/zstd_compress_internal.h
+++ b/lib/compress/zstd_compress_internal.h
@@ -203,6 +203,9 @@ struct ZSTD_CCtx_params_s {
     size_t targetCBlockSize;   /* Tries to fit compressed block size to be around targetCBlockSize.
                                 * No target when targetCBlockSize == 0.
                                 * There is no guarantee on compressed block size */
+    size_t srcSizeHint;        /* User's best guess of source size.
+                                * Hint is not valid when srcSizeHint == 0.
+                                * There is no guarantee that hint is close to actual source size */
 
     ZSTD_dictAttachPref_e attachDictPref;
     ZSTD_literalCompressionMode_e literalCompressionMode;
diff --git a/lib/zstd.h b/lib/zstd.h
index f8e95f22..4078f9c6 100644
--- a/lib/zstd.h
+++ b/lib/zstd.h
@@ -386,6 +386,7 @@ typedef enum {
      * ZSTD_c_forceAttachDict
      * ZSTD_c_literalCompressionMode
      * ZSTD_c_targetCBlockSize
+     * ZSTD_c_srcSizeHint
      * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
      * note : never ever use experimentalParam? names directly;
      *        also, the enums values themselves are unstable and can still change.
@@ -396,6 +397,7 @@ typedef enum {
      ZSTD_c_experimentalParam4=1001,
      ZSTD_c_experimentalParam5=1002,
      ZSTD_c_experimentalParam6=1003,
+     ZSTD_c_experimentalParam7=1004,
 } ZSTD_cParameter;
 
 typedef struct {
@@ -1063,6 +1065,7 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
 /* Advanced parameter bounds */
 #define ZSTD_TARGETCBLOCKSIZE_MIN   64
 #define ZSTD_TARGETCBLOCKSIZE_MAX   ZSTD_BLOCKSIZE_MAX
+#define ZSTD_SRCSIZEHINT_MAX        1e9  /* 1 GB */
 
 /* internal */
 #define ZSTD_HASHLOG3_MAX           17
@@ -1441,6 +1444,11 @@ ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* pre
  * There is no guarantee on compressed block size (default:0) */
 #define ZSTD_c_targetCBlockSize ZSTD_c_experimentalParam6
 
+/* User's best guess of source size.
+ * Hint is not valid when srcSizeHint == 0.
+ * There is no guarantee that hint is close to actual source size */
+#define ZSTD_c_srcSizeHint ZSTD_c_experimentalParam7
+
 /*! ZSTD_CCtx_getParameter() :
  *  Get the requested compression parameter value, selected by enum ZSTD_cParameter,
  *  and store it into int* value.
diff --git a/programs/fileio.c b/programs/fileio.c
index 569a410c..20543cd5 100644
--- a/programs/fileio.c
+++ b/programs/fileio.c
@@ -305,6 +305,7 @@ struct FIO_prefs_s {
     int ldmBucketSizeLog;
     int ldmHashRateLog;
     size_t targetCBlockSize;
+    size_t srcSizeHint;
     ZSTD_literalCompressionMode_e literalCompressionMode;
 
     /* IO preferences */
@@ -350,6 +351,7 @@ FIO_prefs_t* FIO_createPreferences(void)
     ret->ldmBucketSizeLog = FIO_LDM_PARAM_NOTSET;
     ret->ldmHashRateLog = FIO_LDM_PARAM_NOTSET;
     ret->targetCBlockSize = 0;
+    ret->srcSizeHint = 0;
     ret->literalCompressionMode = ZSTD_lcm_auto;
     return ret;
 }
@@ -422,6 +424,10 @@ void FIO_setTargetCBlockSize(FIO_prefs_t* const prefs, size_t targetCBlockSize)
     prefs->targetCBlockSize = targetCBlockSize;
 }
 
+void FIO_setSrcSizeHint(FIO_prefs_t* const prefs, size_t srcSizeHint) {
+    prefs->srcSizeHint = srcSizeHint;
+}
+
 void FIO_setLiteralCompressionMode(
         FIO_prefs_t* const prefs,
         ZSTD_literalCompressionMode_e mode) {
@@ -667,6 +673,8 @@ static cRess_t FIO_createCResources(FIO_prefs_t* const prefs,
         CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_compressionLevel, cLevel) );
         /* max compressed block size */
         CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_targetCBlockSize, (int)prefs->targetCBlockSize) );
+        /* source size hint */
+        CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_srcSizeHint, (int)prefs->srcSizeHint) );
         /* long distance matching */
         CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_enableLongDistanceMatching, prefs->ldmFlag) );
         CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmHashLog, prefs->ldmHashLog) );
diff --git a/programs/fileio.h b/programs/fileio.h
index 311f8c0e..fd49a749 100644
--- a/programs/fileio.h
+++ b/programs/fileio.h
@@ -72,6 +72,7 @@ void FIO_setRemoveSrcFile(FIO_prefs_t* const prefs, unsigned flag);
 void FIO_setSparseWrite(FIO_prefs_t* const prefs, unsigned sparse);  /**< 0: no sparse; 1: disable on stdout; 2: always enabled */
 void FIO_setRsyncable(FIO_prefs_t* const prefs, int rsyncable);
 void FIO_setTargetCBlockSize(FIO_prefs_t* const prefs, size_t targetCBlockSize);
+void FIO_setSrcSizeHint(FIO_prefs_t* const prefs, size_t srcSizeHint);
 void FIO_setLiteralCompressionMode(
         FIO_prefs_t* const prefs,
         ZSTD_literalCompressionMode_e mode);
diff --git a/programs/zstdcli.c b/programs/zstdcli.c
index de286cdf..98b9ffb9 100644
--- a/programs/zstdcli.c
+++ b/programs/zstdcli.c
@@ -141,6 +141,7 @@ static int usage_advanced(const char* programName)
     DISPLAY( "--long[=#]: enable long distance matching with given window log (default: %u)\n", g_defaultMaxWindowLog);
     DISPLAY( "--fast[=#]: switch to ultra fast compression level (default: %u)\n", 1);
     DISPLAY( "--adapt : dynamically adapt compression level to I/O conditions \n");
+    DISPLAY( "--size-hint=# optimize compression parameters for streaming input of approximately this size\n");
     DISPLAY( "--target-compressed-block-size=# : make compressed block near targeted size \n");
 #ifdef ZSTD_MULTITHREAD
     DISPLAY( " -T#    : spawns # compression threads (default: 1, 0==# cores) \n");
@@ -589,6 +590,7 @@ int main(int argCount, const char* argv[])
     unsigned maxDictSize = g_defaultMaxDictSize;
     unsigned dictID = 0;
     size_t targetCBlockSize = 0;
+    size_t srcSizeHint = 0;
     int dictCLevel = g_defaultDictCLevel;
     unsigned dictSelect = g_defaultSelectivityLevel;
 #ifdef UTIL_HAS_CREATEFILELIST
@@ -746,6 +748,7 @@ int main(int argCount, const char* argv[])
                     if (longCommandWArg(&argument, "--dictID=")) { dictID = readU32FromChar(&argument); continue; }
                     if (longCommandWArg(&argument, "--zstd=")) { if (!parseCompressionParameters(argument, &compressionParams)) CLEAN_RETURN(badusage(programName)); continue; }
                     if (longCommandWArg(&argument, "--target-compressed-block-size=")) { targetCBlockSize = readU32FromChar(&argument); continue; }
+                    if (longCommandWArg(&argument, "--size-hint=")) { srcSizeHint = readU32FromChar(&argument); continue; }
                     if (longCommandWArg(&argument, "--long")) {
                         unsigned ldmWindowLog = 0;
                         ldmFlag = 1;
@@ -1151,6 +1154,7 @@ int main(int argCount, const char* argv[])
         FIO_setAdaptMax(prefs, adaptMax);
         FIO_setRsyncable(prefs, rsyncable);
         FIO_setTargetCBlockSize(prefs, targetCBlockSize);
+        FIO_setSrcSizeHint(prefs, srcSizeHint);
         FIO_setLiteralCompressionMode(prefs, literalCompressionMode);
         if (adaptMin > cLevel) cLevel = adaptMin;
         if (adaptMax < cLevel) cLevel = adaptMax;
@@ -1160,7 +1164,7 @@ int main(int argCount, const char* argv[])
         else
           operationResult = FIO_compressMultipleFilenames(prefs, filenameTable, filenameIdx, outFileName, suffix, dictFileName, cLevel, compressionParams);
 #else
-        (void)suffix; (void)adapt; (void)rsyncable; (void)ultra; (void)cLevel; (void)ldmFlag; (void)literalCompressionMode; (void)targetCBlockSize; /* not used when ZSTD_NOCOMPRESS set */
+        (void)suffix; (void)adapt; (void)rsyncable; (void)ultra; (void)cLevel; (void)ldmFlag; (void)literalCompressionMode; (void)targetCBlockSize; (void)srcSizeHint; /* not used when ZSTD_NOCOMPRESS set */
         DISPLAY("Compression not supported \n");
 #endif
     } else {  /* decompression or test */
diff --git a/tests/playTests.sh b/tests/playTests.sh
index 69387321..e3f4cac1 100755
--- a/tests/playTests.sh
+++ b/tests/playTests.sh
@@ -409,6 +409,34 @@ println "compress multiple files including a missing one (notHere) : "
 $ZSTD -f tmp1 notHere tmp2 && die "missing file not detected!"
 
 
+println "\n===>  size-hint mode"
+
+./datagen -g11000 > tmp
+println "test : basic file compression vs streaming compression vs hinted streaming compression"
+$ZSTD -14 -f tmp -o tmp.zst 2>&1 | tee file.out
+cat tmp | $ZSTD -14 -f -o tmp.zst  # only run for convenience of comparison
+cat tmp | $ZSTD -14 -f -o tmp.zst --size-hint=11000 2>&1 | tee stream_sized.out
+
+file_ratio=$(cat file.out | awk '{print $4}' | sed 's/%//g')
+stream_sized_ratio=$(cat stream_sized.out | awk '{print $4}' | sed 's/%//g')
+rm file.out stream_sized.out
+
+ratio_diff=$(echo $stream_sized_ratio - $file_ratio | bc)
+if [ $(echo "(100 * $ratio_diff) > 1" | bc -l) -eq 1 ]
+then
+  die "hinted compression greater than 0.01% larger than file compression"
+fi
+println "test : hinted streaming compression and decompression"
+cat tmp | $ZSTD -14 -f -o tmp.zst --size-hint=11000
+$ZSTD -df tmp.zst -o tmp_decompress
+cmp tmp tmp_decompress || die "difference between original and decompressed file"
+println "test : incorrect hinted stream sizes"
+cat tmp | $ZSTD -14 -f -o tmp.zst --size-hint=11050  # slightly too high
+cat tmp | $ZSTD -14 -f -o tmp.zst --size-hint=10950  # slightly too low
+cat tmp | $ZSTD -14 -f -o tmp.zst --size-hint=22000  # considerably too high
+cat tmp | $ZSTD -14 -f -o tmp.zst --size-hint=5500   # considerably too low
+
+
 println "\n===>  dictionary tests "
 
 println "- test with raw dict (content only) "

From edf2abf1069325f24199cdcc9cf405ab46b4bff9 Mon Sep 17 00:00:00 2001
From: Nick Magerko 
Date: Mon, 19 Aug 2019 12:32:43 -0700
Subject: [PATCH 09/26] Fix fall-through case

---
 lib/compress/zstd_compress.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c
index 5589c323..d4471c2b 100644
--- a/lib/compress/zstd_compress.c
+++ b/lib/compress/zstd_compress.c
@@ -794,6 +794,7 @@ size_t ZSTD_CCtxParams_getParameter(
         break;
     case ZSTD_c_srcSizeHint :
         *value = (int)CCtxParams->srcSizeHint;
+        break;
     default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
     }
     return 0;

From fee8fbcddffddc2d18b6d27a01449a4d1dc9d355 Mon Sep 17 00:00:00 2001
From: Nick Magerko 
Date: Mon, 19 Aug 2019 12:58:54 -0700
Subject: [PATCH 10/26] Make upper bound INT_MAX

---
 lib/zstd.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lib/zstd.h b/lib/zstd.h
index 4078f9c6..ee7871f1 100644
--- a/lib/zstd.h
+++ b/lib/zstd.h
@@ -15,6 +15,7 @@ extern "C" {
 #define ZSTD_H_235446
 
 /* ======   Dependency   ======*/
+#include    /* INT_MAX */
 #include    /* size_t */
 
 
@@ -1065,7 +1066,7 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
 /* Advanced parameter bounds */
 #define ZSTD_TARGETCBLOCKSIZE_MIN   64
 #define ZSTD_TARGETCBLOCKSIZE_MAX   ZSTD_BLOCKSIZE_MAX
-#define ZSTD_SRCSIZEHINT_MAX        1e9  /* 1 GB */
+#define ZSTD_SRCSIZEHINT_MAX        INT_MAX
 
 /* internal */
 #define ZSTD_HASHLOG3_MAX           17

From 09894dc2ebd9c33ac87bf20f658fb82da09a2479 Mon Sep 17 00:00:00 2001
From: Nick Magerko 
Date: Mon, 19 Aug 2019 13:08:41 -0700
Subject: [PATCH 11/26] Add mention of regression with poor size hints

---
 lib/zstd.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lib/zstd.h b/lib/zstd.h
index ee7871f1..0fbe71ab 100644
--- a/lib/zstd.h
+++ b/lib/zstd.h
@@ -1447,7 +1447,8 @@ ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* pre
 
 /* User's best guess of source size.
  * Hint is not valid when srcSizeHint == 0.
- * There is no guarantee that hint is close to actual source size */
+ * There is no guarantee that hint is close to actual source size,
+ * but compression ratio may regress significantly if guess considerably underestimates */
 #define ZSTD_c_srcSizeHint ZSTD_c_experimentalParam7
 
 /*! ZSTD_CCtx_getParameter() :

From ea9d35922cc48260f8603757a98f22938abaa52c Mon Sep 17 00:00:00 2001
From: Nick Magerko 
Date: Mon, 19 Aug 2019 15:12:24 -0700
Subject: [PATCH 12/26] Add size-hint to fuzz tests

---
 tests/fuzz/zstd_helpers.c |  3 +++
 tests/zstreamtest.c       | 16 ++++++++++++++++
 2 files changed, 19 insertions(+)

diff --git a/tests/fuzz/zstd_helpers.c b/tests/fuzz/zstd_helpers.c
index 9dff2895..5d24a48c 100644
--- a/tests/fuzz/zstd_helpers.c
+++ b/tests/fuzz/zstd_helpers.c
@@ -90,6 +90,9 @@ void FUZZ_setRandomParameters(ZSTD_CCtx *cctx, size_t srcSize, uint32_t *state)
     setRand(cctx, ZSTD_c_forceMaxWindow, 0, 1, state);
     setRand(cctx, ZSTD_c_literalCompressionMode, 0, 2, state);
     setRand(cctx, ZSTD_c_forceAttachDict, 0, 2, state);
+    if (FUZZ_rand32(state, 0, 1) == 0) {
+      setRand(cctx, ZSTD_c_srcSizeHint, 0, 2 * srcSize, state);
+    }
 }
 
 FUZZ_dict_t FUZZ_train(void const* src, size_t srcSize, uint32_t *state)
diff --git a/tests/zstreamtest.c b/tests/zstreamtest.c
index 97d4e33e..70326677 100644
--- a/tests/zstreamtest.c
+++ b/tests/zstreamtest.c
@@ -1151,6 +1151,21 @@ static int basicUnitTests(U32 seed, double compressibility)
     }
     DISPLAYLEVEL(3, "OK \n");
 
+    DISPLAYLEVEL(3, "test%3i : ZSTD_c_srcSizeHint provides hint about size of stream : ", testNb++);
+    {
+      CHECK_Z( ZSTD_initCStream(zc, 1 /* cLevel */) );
+      outBuff.dst = (char*)(compressedBuffer);
+      outBuff.size = compressedBufferSize;
+      outBuff.pos = 0;
+      inBuff.src = CNBuffer;
+      inBuff.size = CNBufferSize;
+      inBuff.pos = 0;
+      CHECK_Z( ZSTD_compressStream(zc, &outBuff, &inBuff) );
+      if (inBuff.pos != inBuff.size) goto _output_error;   /* entire input should be consumed */
+      { size_t const r = ZSTD_endStream(zc, &outBuff);
+        if (r != 0) goto _output_error; }  /* error, or some data not flushed */
+    }
+
     /* Overlen overwriting window data bug */
     DISPLAYLEVEL(3, "test%3i : wildcopy doesn't overwrite potential match data : ", testNb++);
     {   /* This test has a window size of 1024 bytes and consists of 3 blocks:
@@ -2106,6 +2121,7 @@ static int fuzzerTests_newAPI(U32 seed, int nbTests, int startTest,
                     if (FUZ_rand(&lseed) & 3) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_c_ldmMinMatch, FUZ_randomClampedLength(&lseed, ZSTD_LDM_MINMATCH_MIN, ZSTD_LDM_MINMATCH_MAX), opaqueAPI) );
                     if (FUZ_rand(&lseed) & 3) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_c_ldmBucketSizeLog, FUZ_randomClampedLength(&lseed, ZSTD_LDM_BUCKETSIZELOG_MIN, ZSTD_LDM_BUCKETSIZELOG_MAX), opaqueAPI) );
                     if (FUZ_rand(&lseed) & 3) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_c_ldmHashRateLog, FUZ_randomClampedLength(&lseed, ZSTD_LDM_HASHRATELOG_MIN, ZSTD_LDM_HASHRATELOG_MAX), opaqueAPI) );
+                    if (FUZ_rand(&lseed) & 3) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_c_srcSizeHint, FUZ_randomClampedLength(&lseed, 0, ZSTD_ZSTD_SRCSIZEHINT_MAX), opaqueAPI) );
                 }
 
                 /* mess with frame parameters */

From f9af70ca8a060bb05a476562513cd488a942a3c5 Mon Sep 17 00:00:00 2001
From: Nick Magerko 
Date: Mon, 19 Aug 2019 16:48:35 -0700
Subject: [PATCH 13/26] Fix playTests and add additional cases

---
 tests/playTests.sh | 36 +++++++++++++++++++-----------------
 1 file changed, 19 insertions(+), 17 deletions(-)

diff --git a/tests/playTests.sh b/tests/playTests.sh
index e3f4cac1..2404ffb6 100755
--- a/tests/playTests.sh
+++ b/tests/playTests.sh
@@ -412,29 +412,31 @@ $ZSTD -f tmp1 notHere tmp2 && die "missing file not detected!"
 println "\n===>  size-hint mode"
 
 ./datagen -g11000 > tmp
-println "test : basic file compression vs streaming compression vs hinted streaming compression"
-$ZSTD -14 -f tmp -o tmp.zst 2>&1 | tee file.out
-cat tmp | $ZSTD -14 -f -o tmp.zst  # only run for convenience of comparison
-cat tmp | $ZSTD -14 -f -o tmp.zst --size-hint=11000 2>&1 | tee stream_sized.out
-
-file_ratio=$(cat file.out | awk '{print $4}' | sed 's/%//g')
-stream_sized_ratio=$(cat stream_sized.out | awk '{print $4}' | sed 's/%//g')
-rm file.out stream_sized.out
-
-ratio_diff=$(echo $stream_sized_ratio - $file_ratio | bc)
-if [ $(echo "(100 * $ratio_diff) > 1" | bc -l) -eq 1 ]
-then
-  die "hinted compression greater than 0.01% larger than file compression"
+./datagen -g11000 > tmp2
+./datagen > tmpDict
+println "test : basic file compression vs hinted streaming compression"
+file_size=$($ZSTD -14 -f tmp -o tmp.zst && wc -c < tmp.zst)
+stream_size=$(cat tmp | $ZSTD -14 --size-hint=11000 | wc -c)
+if [ "$stream_size" -ge "$file_size" ]; then
+  die "hinted compression larger than expected"
 fi
 println "test : hinted streaming compression and decompression"
 cat tmp | $ZSTD -14 -f -o tmp.zst --size-hint=11000
 $ZSTD -df tmp.zst -o tmp_decompress
 cmp tmp tmp_decompress || die "difference between original and decompressed file"
+println "test : hinted streaming compression with dictionary"
+cat tmp | $ZSTD -14 -f -D tmpDict --size-hint=11000 | $ZSTD -t -D tmpDict
+println "test : multiple file compression with hints and dictionary"
+$ZSTD -14 -f -D tmpDict --size-hint=11000 tmp tmp2
+$ZSTD -14 -f -o tmp1_.zst -D tmpDict --size-hint=11000 tmp
+$ZSTD -14 -f -o tmp2_.zst -D tmpDict --size-hint=11000 tmp2
+cmp tmp.zst tmp1_.zst || die "first file's output differs"
+cmp tmp2.zst tmp2_.zst || die "second file's output differs"
 println "test : incorrect hinted stream sizes"
-cat tmp | $ZSTD -14 -f -o tmp.zst --size-hint=11050  # slightly too high
-cat tmp | $ZSTD -14 -f -o tmp.zst --size-hint=10950  # slightly too low
-cat tmp | $ZSTD -14 -f -o tmp.zst --size-hint=22000  # considerably too high
-cat tmp | $ZSTD -14 -f -o tmp.zst --size-hint=5500   # considerably too low
+cat tmp | $ZSTD -14 -f --size-hint=11050 | $ZSTD -t  # slightly too high
+cat tmp | $ZSTD -14 -f --size-hint=10950 | $ZSTD -t  # slightly too low
+cat tmp | $ZSTD -14 -f --size-hint=22000 | $ZSTD -t  # considerably too high
+cat tmp | $ZSTD -14 -f --size-hint=5500  | $ZSTD -t  # considerably too low
 
 
 println "\n===>  dictionary tests "

From 2d39b43906343cbe20d47d340a8518e5d6fdf6c8 Mon Sep 17 00:00:00 2001
From: Nick Magerko 
Date: Mon, 19 Aug 2019 16:49:25 -0700
Subject: [PATCH 14/26] Use int for srcSizeHint when sensible

---
 lib/compress/zstd_compress_internal.h | 2 +-
 programs/fileio.c                     | 5 +++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h
index 0e4ec6b7..3e590ec3 100644
--- a/lib/compress/zstd_compress_internal.h
+++ b/lib/compress/zstd_compress_internal.h
@@ -203,7 +203,7 @@ struct ZSTD_CCtx_params_s {
     size_t targetCBlockSize;   /* Tries to fit compressed block size to be around targetCBlockSize.
                                 * No target when targetCBlockSize == 0.
                                 * There is no guarantee on compressed block size */
-    size_t srcSizeHint;        /* User's best guess of source size.
+    int srcSizeHint;           /* User's best guess of source size.
                                 * Hint is not valid when srcSizeHint == 0.
                                 * There is no guarantee that hint is close to actual source size */
 
diff --git a/programs/fileio.c b/programs/fileio.c
index 20543cd5..0eda1264 100644
--- a/programs/fileio.c
+++ b/programs/fileio.c
@@ -30,6 +30,7 @@
 #include      /* strcmp, strlen */
 #include 
 #include       /* errno */
+#include      /* INT_MAX */
 #include 
 #include "timefn.h"     /* UTIL_getTime, UTIL_clockSpanMicro */
 
@@ -305,7 +306,7 @@ struct FIO_prefs_s {
     int ldmBucketSizeLog;
     int ldmHashRateLog;
     size_t targetCBlockSize;
-    size_t srcSizeHint;
+    int srcSizeHint;
     ZSTD_literalCompressionMode_e literalCompressionMode;
 
     /* IO preferences */
@@ -425,7 +426,7 @@ void FIO_setTargetCBlockSize(FIO_prefs_t* const prefs, size_t targetCBlockSize)
 }
 
 void FIO_setSrcSizeHint(FIO_prefs_t* const prefs, size_t srcSizeHint) {
-    prefs->srcSizeHint = srcSizeHint;
+    prefs->srcSizeHint = (int)MIN((size_t)INT_MAX, srcSizeHint);
 }
 
 void FIO_setLiteralCompressionMode(

From 83076ab277884702428d9ebad933d119928eaf3e Mon Sep 17 00:00:00 2001
From: Nick Magerko 
Date: Mon, 19 Aug 2019 16:50:26 -0700
Subject: [PATCH 15/26] Revert change to zstd manual

---
 doc/zstd_manual.html | 162 +++++++++++++++++++++----------------------
 1 file changed, 80 insertions(+), 82 deletions(-)

diff --git a/doc/zstd_manual.html b/doc/zstd_manual.html
index 806920a5..26b204e1 100644
--- a/doc/zstd_manual.html
+++ b/doc/zstd_manual.html
@@ -76,7 +76,7 @@
 

Compresses `src` content as a single zstd compressed frame into already allocated `dst`. Hint : compression runs faster if `dstCapacity` >= `ZSTD_compressBound(srcSize)`. @return : compressed size written into `dst` (<= `dstCapacity), - or an error code if it fails (which can be tested using ZSTD_isError()). + or an error code if it fails (which can be tested using ZSTD_isError()).


size_t ZSTD_decompress( void* dst, size_t dstCapacity,
@@ -85,7 +85,7 @@
   `dstCapacity` is an upper bound of originalSize to regenerate.
   If user cannot imply a maximum upper bound, it's better to use streaming mode to decompress data.
   @return : the number of bytes decompressed into `dst` (<= `dstCapacity`),
-            or an errorCode if it fails (which can be tested using ZSTD_isError()).
+            or an errorCode if it fails (which can be tested using ZSTD_isError()). 
 


#define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1)
@@ -112,7 +112,7 @@ unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize);
    note 5 : If source is untrusted, decompressed size could be wrong or intentionally modified.
             Always ensure return value fits within application's authorized limits.
             Each application can set its own limits.
-   note 6 : This function replaces ZSTD_getDecompressedSize()
+   note 6 : This function replaces ZSTD_getDecompressedSize() 
 


unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize);
@@ -120,7 +120,7 @@ unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize);
   Both functions work the same way, but ZSTD_getDecompressedSize() blends
   "empty", "unknown" and "error" results to the same return value (0),
   while ZSTD_getFrameContentSize() gives them separate return values.
- @return : decompressed size of `src` frame content _if known and not empty_, 0 otherwise.
+ @return : decompressed size of `src` frame content _if known and not empty_, 0 otherwise. 
 


size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize);
@@ -128,7 +128,7 @@ unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize);
  `srcSize` must be >= first frame size
  @return : the compressed size of the first frame starting at `src`,
            suitable to pass as `srcSize` to `ZSTD_decompress` or similar,
-        or an error code if input is invalid
+        or an error code if input is invalid 
 


Helper functions

#define ZSTD_COMPRESSBOUND(srcSize)   ((srcSize) + ((srcSize)>>8) + (((srcSize) < (128<<10)) ? (((128<<10) - (srcSize)) >> 11) /* margin, from 64 to 0 */ : 0))  /* this formula ensures that bound(A) + bound(B) <= bound(A+B) as long as A and B >= 128 KB */
@@ -148,7 +148,7 @@ int         ZSTD_maxCLevel(void);               /*!< maximum compression lev
          It doesn't change the compression ratio, which remains identical.
   Note 2 : In multi-threaded environments,
          use one different context per thread for parallel execution.
-
+ 
 
typedef struct ZSTD_CCtx_s ZSTD_CCtx;
 ZSTD_CCtx* ZSTD_createCCtx(void);
 size_t     ZSTD_freeCCtx(ZSTD_CCtx* cctx);
@@ -159,14 +159,14 @@ size_t     ZSTD_freeCCtx(ZSTD_CCtx* cctx);
                          int compressionLevel);
 

Same as ZSTD_compress(), using an explicit ZSTD_CCtx The function will compress at requested compression level, - ignoring any other parameter + ignoring any other parameter


Decompression context

  When decompressing many times,
   it is recommended to allocate a context only once,
   and re-use it for each successive compression operation.
   This will make workload friendlier for system's memory.
-  Use one context per thread for parallel execution.
+  Use one context per thread for parallel execution. 
 
typedef struct ZSTD_DCtx_s ZSTD_DCtx;
 ZSTD_DCtx* ZSTD_createDCtx(void);
 size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);
@@ -177,7 +177,7 @@ size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);
 

Same as ZSTD_decompress(), requires an allocated ZSTD_DCtx. Compatible with sticky parameters. - +


Advanced compression API


@@ -324,7 +324,6 @@ size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);
      * ZSTD_c_forceAttachDict
      * ZSTD_c_literalCompressionMode
      * ZSTD_c_targetCBlockSize
-     * ZSTD_c_srcSizeHint
      * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
      * note : never ever use experimentalParam? names directly;
      *        also, the enums values themselves are unstable and can still change.
@@ -335,7 +334,6 @@ size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);
      ZSTD_c_experimentalParam4=1001,
      ZSTD_c_experimentalParam5=1002,
      ZSTD_c_experimentalParam6=1003,
-     ZSTD_c_experimentalParam7=1004,
 } ZSTD_cParameter;
 

typedef struct {
@@ -350,7 +348,7 @@ size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);
  @return : a structure, ZSTD_bounds, which contains
          - an error status field, which must be tested using ZSTD_isError()
          - lower and upper bounds, both inclusive
-
+ 
 


size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value);
@@ -363,7 +361,7 @@ size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);
               => compressionLevel, hashLog, chainLog, searchLog, minMatch, targetLength and strategy.
               new parameters will be active for next job only (after a flush()).
  @return : an error code (which can be tested using ZSTD_isError()).
-
+ 
 


size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize);
@@ -380,7 +378,7 @@ size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);
            for example with ZSTD_compress2(),
            or invoking immediately ZSTD_compressStream2(,,,ZSTD_e_end),
            this value is automatically overridden by srcSize instead.
-
+ 
 


typedef enum {
@@ -402,7 +400,7 @@ size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);
                   Parameters can only be changed between 2 sessions (i.e. no compression is currently ongoing)
                   otherwise the reset fails, and function returns an error value (which can be tested using ZSTD_isError())
   - Both : similar to resetting the session, followed by resetting parameters.
-
+ 
 


size_t ZSTD_compress2( ZSTD_CCtx* cctx,
@@ -416,7 +414,7 @@ size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);
   Hint : compression runs faster if `dstCapacity` >=  `ZSTD_compressBound(srcSize)`.
  @return : compressed size written into `dst` (<= `dstCapacity),
            or an error code if it fails (which can be tested using ZSTD_isError()).
-
+ 
 


Advanced decompression API


@@ -447,7 +445,7 @@ size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);
  @return : a structure, ZSTD_bounds, which contains
          - an error status field, which must be tested using ZSTD_isError()
          - both lower and upper bounds, inclusive
-
+ 
 


size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int value);
@@ -456,7 +454,7 @@ size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);
   Providing a value beyond bound will either clamp it, or trigger an error (depending on parameter).
   Setting a parameter is only possible during frame initialization (before starting decompression).
  @return : 0, or an error code (which can be tested using ZSTD_isError()).
-
+ 
 


size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset);
@@ -464,7 +462,7 @@ size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);
   Session and parameters can be reset jointly or separately.
   Parameters can only be reset when no active frame is being decompressed.
  @return : 0, or an error code, which can be tested with ZSTD_isError()
-
+ 
 


Streaming


@@ -538,7 +536,7 @@ size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);
             >0 if some data still present within internal buffer (the value is minimal estimation of remaining size),
             or an error code, which can be tested using ZSTD_isError().
 
-
+ 
 
typedef ZSTD_CCtx ZSTD_CStream;  /**< CCtx and CStream are now effectively same object (>= v1.3.0) */
@@ -582,7 +580,7 @@ size_t ZSTD_freeCStream(ZSTD_CStream* zcs);
             only ZSTD_e_end or ZSTD_e_flush operations are allowed.
             Before starting a new compression job, or changing compression parameters,
             it is required to fully flush internal buffers.
-
+ 
 


size_t ZSTD_CStreamInSize(void);    /**< recommended size for input buffer */
@@ -605,7 +603,7 @@ size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
      ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
      ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any)
      ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel);
-
+ 
 


Streaming decompression - HowTo

@@ -631,7 +629,7 @@ size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
         or any other value > 0, which means there is still some decoding or flushing to do to complete current frame :
                                 the return value is a suggested next input size (just a hint for better latency)
                                 that will never request more than the remaining frame size.
-
+ 
 
typedef ZSTD_DCtx ZSTD_DStream;  /**< DCtx and DStream are now effectively same object (>= v1.3.0) */
@@ -656,7 +654,7 @@ size_t ZSTD_freeDStream(ZSTD_DStream* zds);
   or a buffer with specified information (see dictBuilder/zdict.h).
   Note : This function loads the dictionary, resulting in significant startup delay.
          It's intended for a dictionary used only once.
-  Note 2 : When `dict == NULL || dictSize < 8` no dictionary is used.
+  Note 2 : When `dict == NULL || dictSize < 8` no dictionary is used. 
 


size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx,
@@ -667,7 +665,7 @@ size_t ZSTD_freeDStream(ZSTD_DStream* zds);
   Dictionary must be identical to the one used during compression.
   Note : This function loads the dictionary, resulting in significant startup delay.
          It's intended for a dictionary used only once.
-  Note : When `dict == NULL || dictSize < 8` no dictionary is used.
+  Note : When `dict == NULL || dictSize < 8` no dictionary is used. 
 


Bulk processing dictionary API


@@ -679,11 +677,11 @@ size_t ZSTD_freeDStream(ZSTD_DStream* zds);
   ZSTD_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only.
  `dictBuffer` can be released after ZSTD_CDict creation, because its content is copied within CDict.
   Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate `dictBuffer` content.
-  Note : A ZSTD_CDict can be created from an empty dictBuffer, but it is inefficient when used to compress small data.
+  Note : A ZSTD_CDict can be created from an empty dictBuffer, but it is inefficient when used to compress small data. 
 


size_t      ZSTD_freeCDict(ZSTD_CDict* CDict);
-

Function frees memory allocated by ZSTD_createCDict(). +

Function frees memory allocated by ZSTD_createCDict().


size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx,
@@ -693,16 +691,16 @@ size_t ZSTD_freeDStream(ZSTD_DStream* zds);
 

Compression using a digested Dictionary. Recommended when same dictionary is used multiple times. Note : compression level is _decided at dictionary creation time_, - and frame parameters are hardcoded (dictID=yes, contentSize=yes, checksum=no) + and frame parameters are hardcoded (dictID=yes, contentSize=yes, checksum=no)


ZSTD_DDict* ZSTD_createDDict(const void* dictBuffer, size_t dictSize);
 

Create a digested dictionary, ready to start decompression operation without startup delay. - dictBuffer can be released after DDict creation, as its content is copied inside DDict. + dictBuffer can be released after DDict creation, as its content is copied inside DDict.


size_t      ZSTD_freeDDict(ZSTD_DDict* ddict);
-

Function frees memory allocated with ZSTD_createDDict() +

Function frees memory allocated with ZSTD_createDDict()


size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx,
@@ -710,7 +708,7 @@ size_t ZSTD_freeDStream(ZSTD_DStream* zds);
                             const void* src, size_t srcSize,
                             const ZSTD_DDict* ddict);
 

Decompression using a digested Dictionary. - Recommended when same dictionary is used multiple times. + Recommended when same dictionary is used multiple times.


Dictionary helper functions


@@ -718,13 +716,13 @@ size_t ZSTD_freeDStream(ZSTD_DStream* zds);
 
unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize);
 

Provides the dictID stored within dictionary. if @return == 0, the dictionary is not conformant with Zstandard specification. - It can still be loaded, but as a content-only dictionary. + It can still be loaded, but as a content-only dictionary.


unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict);
 

Provides the dictID of the dictionary loaded into `ddict`. If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. - Non-conformant dictionaries can still be loaded, but as content-only dictionaries. + Non-conformant dictionaries can still be loaded, but as content-only dictionaries.


unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize);
@@ -736,7 +734,7 @@ size_t ZSTD_freeDStream(ZSTD_DStream* zds);
     Note : this use case also happens when using a non-conformant dictionary.
   - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`).
   - This is not a Zstandard frame.
-  When identifying the exact failure cause, it's possible to use ZSTD_getFrameHeader(), which will provide a more precise error code.
+  When identifying the exact failure cause, it's possible to use ZSTD_getFrameHeader(), which will provide a more precise error code. 
 


Advanced dictionary and prefix API

@@ -762,7 +760,7 @@ size_t ZSTD_freeDStream(ZSTD_DStream* zds);
            Use experimental ZSTD_CCtx_loadDictionary_byReference() to reference content instead.
            In such a case, dictionary buffer must outlive its users.
   Note 4 : Use ZSTD_CCtx_loadDictionary_advanced()
-           to precisely select how dictionary content must be interpreted.
+           to precisely select how dictionary content must be interpreted. 
 


size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict);
@@ -776,7 +774,7 @@ size_t ZSTD_freeDStream(ZSTD_DStream* zds);
   Special : Referencing a NULL CDict means "return to no-dictionary mode".
   Note 1 : Currently, only one dictionary can be managed.
            Referencing a new dictionary effectively "discards" any previous one.
-  Note 2 : CDict is just referenced, its lifetime must outlive its usage within CCtx.
+  Note 2 : CDict is just referenced, its lifetime must outlive its usage within CCtx. 
 


size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx,
@@ -797,7 +795,7 @@ size_t ZSTD_freeDStream(ZSTD_DStream* zds);
            It's a CPU consuming operation, with non-negligible impact on latency.
            If there is a need to use the same prefix multiple times, consider loadDictionary instead.
   Note 4 : By default, the prefix is interpreted as raw content (ZSTD_dm_rawContent).
-           Use experimental ZSTD_CCtx_refPrefix_advanced() to alter dictionary interpretation.
+           Use experimental ZSTD_CCtx_refPrefix_advanced() to alter dictionary interpretation. 
 


size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);
@@ -814,7 +812,7 @@ size_t ZSTD_freeDStream(ZSTD_DStream* zds);
            Use ZSTD_DCtx_loadDictionary_byReference() to reference dictionary content instead.
   Note 3 : Use ZSTD_DCtx_loadDictionary_advanced() to take control of
            how dictionary content is loaded and interpreted.
-
+ 
 


size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict);
@@ -825,7 +823,7 @@ size_t ZSTD_freeDStream(ZSTD_DStream* zds);
            Referencing a new dictionary effectively "discards" any previous one.
   Special: referencing a NULL DDict means "return to no-dictionary mode".
   Note 2 : DDict is just referenced, its lifetime must outlive its usage from DCtx.
-
+ 
 


size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx,
@@ -844,7 +842,7 @@ size_t ZSTD_freeDStream(ZSTD_DStream* zds);
            Use ZSTD_CCtx_refPrefix_advanced() to alter dictMode (Experimental section)
   Note 4 : Referencing a raw content prefix has almost no cpu nor memory cost.
            A full dictionary is more costly, as it requires building tables.
-
+ 
 


size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx);
@@ -854,7 +852,7 @@ size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds);
 size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict);
 size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
 

These functions give the _current_ memory usage of selected object. - Note that object memory usage can evolve (increase or decrease) over time. + Note that object memory usage can evolve (increase or decrease) over time.


experimental API (static linking only)

@@ -863,7 +861,7 @@ size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
  They can still change in future versions.
  Some of them are planned to remain in the static_only section indefinitely.
  Some of them might be removed in the future (especially when redundant with existing stable functions)
-
+ 
 
typedef struct {
@@ -977,7 +975,7 @@ size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
             Each application can set its own limits.
    note 5 : ZSTD_findDecompressedSize handles multiple frames, and so it must traverse the input to
             read each contained frame header.  This is fast as most of the data is skipped,
-            however it does mean that all frame data must be present and valid.
+            however it does mean that all frame data must be present and valid. 
 


unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize);
@@ -992,13 +990,13 @@ size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
             in this case, `ZSTD_findDecompressedSize` and `ZSTD_decompressBound` return the same value.
   note 3  : when the decompressed size field isn't available, the upper-bound for that frame is calculated by:
               upper-bound = # blocks * min(128 KB, Window_Size)
-
+ 
 


size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize);
 

srcSize must be >= ZSTD_FRAMEHEADERSIZE_PREFIX. @return : size of the Frame Header, - or an error code (if srcSize is too small) + or an error code (if srcSize is too small)


Memory management


@@ -1014,7 +1012,7 @@ size_t ZSTD_estimateDCtxSize(void);
   If srcSize is known to always be small, ZSTD_estimateCCtxSize_usingCParams() can provide a tighter estimation.
   ZSTD_estimateCCtxSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel.
   ZSTD_estimateCCtxSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParams_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_c_nbWorkers is >= 1.
-  Note : CCtx size estimation is only correct for single-threaded compression.
+  Note : CCtx size estimation is only correct for single-threaded compression. 
 


size_t ZSTD_estimateCStreamSize(int compressionLevel);
@@ -1033,7 +1031,7 @@ size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize);
   or deducted from a valid frame Header, using ZSTD_estimateDStreamSize_fromFrame();
   Note : if streaming is init with function ZSTD_init?Stream_usingDict(),
          an internal ?Dict will be created, which additional size is not estimated here.
-         In this case, get total size by adding ZSTD_estimate?DictSize
+         In this case, get total size by adding ZSTD_estimate?DictSize 
 


size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel);
@@ -1042,7 +1040,7 @@ size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMet
 

ZSTD_estimateCDictSize() will bet that src size is relatively "small", and content is copied, like ZSTD_createCDict(). ZSTD_estimateCDictSize_advanced() makes it possible to control compression parameters precisely, like ZSTD_createCDict_advanced(). Note : dictionaries created by reference (`ZSTD_dlm_byRef`) are logically smaller. - +


ZSTD_CCtx*    ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize);
@@ -1066,7 +1064,7 @@ ZSTD_CStream* ZSTD_initStaticCStream(void* workspace, size_t workspaceSize);
                  ZSTD_CCtx_loadDictionary(), ZSTD_initCStream_usingDict() or ZSTD_initDStream_usingDict().
   Limitation 2 : static cctx currently not compatible with multi-threading.
   Limitation 3 : static dctx is incompatible with legacy support.
-
+ 
 


ZSTD_DStream* ZSTD_initStaticDStream(void* workspace, size_t workspaceSize);    /**< same as ZSTD_initStaticDCtx() */
@@ -1078,7 +1076,7 @@ static ZSTD_customMem const ZSTD_defaultCMem = { NULL, NULL, NULL };  /**< t
 

These prototypes make it possible to pass your own allocation/free functions. ZSTD_customMem is provided at creation time, using ZSTD_create*_advanced() variants listed below. All allocation/free operations will be completed using these custom variants instead of regular ones. - +


Advanced compression functions


@@ -1087,22 +1085,22 @@ static ZSTD_customMem const ZSTD_defaultCMem = { NULL, NULL, NULL };  
/**< t

Create a digested dictionary for compression Dictionary content is just referenced, not duplicated. As a consequence, `dictBuffer` **must** outlive CDict, - and its content must remain unmodified throughout the lifetime of CDict. + and its content must remain unmodified throughout the lifetime of CDict.


ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize);
 

@return ZSTD_compressionParameters structure for a selected compression level and estimated srcSize. - `estimatedSrcSize` value is optional, select 0 if not known + `estimatedSrcSize` value is optional, select 0 if not known


ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize);
 

same as ZSTD_getCParams(), but @return a full `ZSTD_parameters` object instead of sub-component `ZSTD_compressionParameters`. - All fields of `ZSTD_frameParameters` are set to default : contentSize=1, checksum=0, noDictID=0 + All fields of `ZSTD_frameParameters` are set to default : contentSize=1, checksum=0, noDictID=0


size_t ZSTD_checkCParams(ZSTD_compressionParameters params);
 

Ensure param values remain within authorized range. - @return 0 on success, or an error code (can be checked with ZSTD_isError()) + @return 0 on success, or an error code (can be checked with ZSTD_isError())


ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize);
@@ -1110,7 +1108,7 @@ static ZSTD_customMem const ZSTD_defaultCMem = { NULL, NULL, NULL };  /**< t
  `srcSize` can be unknown, in which case use ZSTD_CONTENTSIZE_UNKNOWN.
  `dictSize` must be `0` when there is no dictionary.
   cPar can be invalid : all parameters will be clamped within valid range in the @return struct.
-  This function never fails (wide contract)
+  This function never fails (wide contract) 
 


size_t ZSTD_compress_advanced(ZSTD_CCtx* cctx,
@@ -1118,7 +1116,7 @@ static ZSTD_customMem const ZSTD_defaultCMem = { NULL, NULL, NULL };  /**< t
                         const void* src, size_t srcSize,
                         const void* dict,size_t dictSize,
                               ZSTD_parameters params);
-

Same as ZSTD_compress_usingDict(), with fine-tune control over compression parameters (by structure) +

Same as ZSTD_compress_usingDict(), with fine-tune control over compression parameters (by structure)


size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx,
@@ -1126,30 +1124,30 @@ static ZSTD_customMem const ZSTD_defaultCMem = { NULL, NULL, NULL };  /**< t
                             const void* src, size_t srcSize,
                             const ZSTD_CDict* cdict,
                                   ZSTD_frameParameters fParams);
-

Same as ZSTD_compress_usingCDict(), with fine-tune control over frame parameters +

Same as ZSTD_compress_usingCDict(), with fine-tune control over frame parameters


size_t ZSTD_CCtx_loadDictionary_byReference(ZSTD_CCtx* cctx, const void* dict, size_t dictSize);
 

Same as ZSTD_CCtx_loadDictionary(), but dictionary content is referenced, instead of being copied into CCtx. - It saves some memory, but also requires that `dict` outlives its usage within `cctx` + It saves some memory, but also requires that `dict` outlives its usage within `cctx`


size_t ZSTD_CCtx_loadDictionary_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType);
 

Same as ZSTD_CCtx_loadDictionary(), but gives finer control over how to load the dictionary (by copy ? by reference ?) - and how to interpret it (automatic ? force raw mode ? full mode only ?) + and how to interpret it (automatic ? force raw mode ? full mode only ?)


size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType);
 

Same as ZSTD_CCtx_refPrefix(), but gives finer control over - how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?) + how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?)


size_t ZSTD_CCtx_getParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int* value);
 

Get the requested compression parameter value, selected by enum ZSTD_cParameter, and store it into int* value. @return : 0, or an error code (which can be tested with ZSTD_isError()). - +


ZSTD_CCtx_params* ZSTD_createCCtxParams(void);
@@ -1169,24 +1167,24 @@ size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params);
 
   This can be used with ZSTD_estimateCCtxSize_advanced_usingCCtxParams()
   for static allocation of CCtx for single-threaded compression.
-
+ 
 


size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params);
 

Reset params to default values. - +


size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel);
 

Initializes the compression parameters of cctxParams according to compression level. All other parameters are reset to their default values. - +


size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params);
 

Initializes the compression and frame parameters of cctxParams according to params. All other parameters are reset to their default values. - +


size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value);
@@ -1194,14 +1192,14 @@ size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params);
   Set one compression parameter, selected by enum ZSTD_cParameter.
   Parameters must be applied to a ZSTD_CCtx using ZSTD_CCtx_setParametersUsingCCtxParams().
  @result : 0, or an error code (which can be tested with ZSTD_isError()).
-
+ 
 


size_t ZSTD_CCtxParams_getParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int* value);
 

Similar to ZSTD_CCtx_getParameter. Get the requested value of one compression parameter, selected by enum ZSTD_cParameter. @result : 0, or an error code (which can be tested with ZSTD_isError()). - +


size_t ZSTD_CCtx_setParametersUsingCCtxParams(
@@ -1211,7 +1209,7 @@ size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params);
     if nbWorkers==0, this will have no impact until a new compression is started.
     if nbWorkers>=1, new parameters will be picked up at next job,
        with a few restrictions (windowLog, pledgedSrcSize, nbWorkers, jobSize, and overlapLog are not updated).
-
+ 
 


size_t ZSTD_compressStream2_simpleArgs (
@@ -1223,7 +1221,7 @@ size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params);
   but using only integral types as arguments.
   This variant might be helpful for binders from dynamic languages
   which have troubles handling structures containing memory pointers.
-
+ 
 


Advanced decompression functions


@@ -1232,33 +1230,33 @@ size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params);
 

Tells if the content of `buffer` starts with a valid Frame Identifier. Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0. Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled. - Note 3 : Skippable Frame Identifiers are considered valid. + Note 3 : Skippable Frame Identifiers are considered valid.


ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize);
 

Create a digested dictionary, ready to start decompression operation without startup delay. Dictionary content is referenced, and therefore stays in dictBuffer. It is important that dictBuffer outlives DDict, - it must remain read accessible throughout the lifetime of DDict + it must remain read accessible throughout the lifetime of DDict


size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);
 

Same as ZSTD_DCtx_loadDictionary(), but references `dict` content instead of copying it into `dctx`. This saves memory if `dict` remains around., - However, it's imperative that `dict` remains accessible (and unmodified) while being used, so it must outlive decompression. + However, it's imperative that `dict` remains accessible (and unmodified) while being used, so it must outlive decompression.


size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType);
 

Same as ZSTD_DCtx_loadDictionary(), but gives direct control over how to load the dictionary (by copy ? by reference ?) - and how to interpret it (automatic ? force raw mode ? full mode only ?). + and how to interpret it (automatic ? force raw mode ? full mode only ?).


size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType);
 

Same as ZSTD_DCtx_refPrefix(), but gives finer control over - how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?) + how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?)


size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize);
@@ -1267,14 +1265,14 @@ size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params);
   This parameter is only useful in streaming mode, since no internal buffer is allocated in single-pass mode.
   By default, a decompression context accepts all window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT)
  @return : 0, or an error code (which can be tested using ZSTD_isError()).
-
+ 
 


size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format);
 

Instruct the decoder context about what kind of data to decode next. This instruction is mandatory to decode data without a fully-formed header, such ZSTD_f_zstd1_magicless for example. - @return : 0, or an error code (which can be tested using ZSTD_isError()). + @return : 0, or an error code (which can be tested using ZSTD_isError()).


size_t ZSTD_decompressStream_simpleArgs (
@@ -1285,7 +1283,7 @@ size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params);
   but using only integral types as arguments.
   This can be helpful for binders from dynamic languages
   which have troubles handling structures containing memory pointers.
-
+ 
 


Advanced streaming functions

  Warning : most of these functions are now redundant with the Advanced API.
@@ -1363,7 +1361,7 @@ size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, const ZSTD_CDict*
   For the time being, pledgedSrcSize==0 is interpreted as "srcSize unknown" for compatibility with older programs,
   but it will change to mean "empty" in future version, so use macro ZSTD_CONTENTSIZE_UNKNOWN instead.
  @return : 0, or an error code (which can be tested using ZSTD_isError())
-
+ 
 


typedef struct {
@@ -1387,7 +1385,7 @@ size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, const ZSTD_CDict*
     but everything it has produced has also been flushed so far,
     therefore flush speed is limited by production speed of oldest job
     irrespective of the speed of concurrent (and newer) jobs.
-
+ 
 


Advanced Streaming decompression functions

/**
@@ -1421,7 +1419,7 @@ size_t ZSTD_resetDStream(ZSTD_DStream* zds);
   This is an advanced API, giving full control over buffer management, for users which need direct control over memory.
   But it's also a complex one, with several restrictions, documented below.
   Prefer normal streaming API for an easier experience.
-
+ 
 

Buffer-less streaming compression (synchronous mode)

@@ -1519,7 +1517,7 @@ size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned lo
   Note : it's possible to know if next input to present is a header or a block, using ZSTD_nextInputType().
   This information is not required to properly decode a frame.
 
-  == Special case : skippable frames
+  == Special case : skippable frames 
 
   Skippable frames allow integration of user-defined data into a flow of concatenated frames.
   Skippable frames will be ignored (skipped) by decompressor.
@@ -1551,7 +1549,7 @@ size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long
 

decode Frame Header, or requires larger `srcSize`. @return : 0, `zfhPtr` is correctly filled, >0, `srcSize` is too small, value is wanted `srcSize` amount, - or an error code, which can be tested using ZSTD_isError() + or an error code, which can be tested using ZSTD_isError()


typedef enum { ZSTDnit_frameHeader, ZSTDnit_blockHeader, ZSTDnit_block, ZSTDnit_lastBlock, ZSTDnit_checksum, ZSTDnit_skippableFrame } ZSTD_nextInputType_e;

From 294f1e5cfe2e3f24aed2a36c7a25e1eb401a1636 Mon Sep 17 00:00:00 2001
From: Nick Magerko 
Date: Mon, 19 Aug 2019 16:53:02 -0700
Subject: [PATCH 16/26] Fix typo in test

---
 tests/zstreamtest.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/zstreamtest.c b/tests/zstreamtest.c
index 70326677..b0e5d827 100644
--- a/tests/zstreamtest.c
+++ b/tests/zstreamtest.c
@@ -2121,7 +2121,7 @@ static int fuzzerTests_newAPI(U32 seed, int nbTests, int startTest,
                     if (FUZ_rand(&lseed) & 3) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_c_ldmMinMatch, FUZ_randomClampedLength(&lseed, ZSTD_LDM_MINMATCH_MIN, ZSTD_LDM_MINMATCH_MAX), opaqueAPI) );
                     if (FUZ_rand(&lseed) & 3) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_c_ldmBucketSizeLog, FUZ_randomClampedLength(&lseed, ZSTD_LDM_BUCKETSIZELOG_MIN, ZSTD_LDM_BUCKETSIZELOG_MAX), opaqueAPI) );
                     if (FUZ_rand(&lseed) & 3) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_c_ldmHashRateLog, FUZ_randomClampedLength(&lseed, ZSTD_LDM_HASHRATELOG_MIN, ZSTD_LDM_HASHRATELOG_MAX), opaqueAPI) );
-                    if (FUZ_rand(&lseed) & 3) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_c_srcSizeHint, FUZ_randomClampedLength(&lseed, 0, ZSTD_ZSTD_SRCSIZEHINT_MAX), opaqueAPI) );
+                    if (FUZ_rand(&lseed) & 3) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_c_srcSizeHint, FUZ_randomClampedLength(&lseed, 0, ZSTD_SRCSIZEHINT_MAX), opaqueAPI) );
                 }
 
                 /* mess with frame parameters */

From f23402f1f5f6a5b3adb103f06dd22c9d669a9311 Mon Sep 17 00:00:00 2001
From: Nick Magerko 
Date: Mon, 19 Aug 2019 17:20:46 -0700
Subject: [PATCH 17/26] Remove unnecessary test case

---
 tests/zstreamtest.c | 15 ---------------
 1 file changed, 15 deletions(-)

diff --git a/tests/zstreamtest.c b/tests/zstreamtest.c
index b0e5d827..6fe177cb 100644
--- a/tests/zstreamtest.c
+++ b/tests/zstreamtest.c
@@ -1151,21 +1151,6 @@ static int basicUnitTests(U32 seed, double compressibility)
     }
     DISPLAYLEVEL(3, "OK \n");
 
-    DISPLAYLEVEL(3, "test%3i : ZSTD_c_srcSizeHint provides hint about size of stream : ", testNb++);
-    {
-      CHECK_Z( ZSTD_initCStream(zc, 1 /* cLevel */) );
-      outBuff.dst = (char*)(compressedBuffer);
-      outBuff.size = compressedBufferSize;
-      outBuff.pos = 0;
-      inBuff.src = CNBuffer;
-      inBuff.size = CNBufferSize;
-      inBuff.pos = 0;
-      CHECK_Z( ZSTD_compressStream(zc, &outBuff, &inBuff) );
-      if (inBuff.pos != inBuff.size) goto _output_error;   /* entire input should be consumed */
-      { size_t const r = ZSTD_endStream(zc, &outBuff);
-        if (r != 0) goto _output_error; }  /* error, or some data not flushed */
-    }
-
     /* Overlen overwriting window data bug */
     DISPLAYLEVEL(3, "test%3i : wildcopy doesn't overwrite potential match data : ", testNb++);
     {   /* This test has a window size of 1024 bytes and consists of 3 blocks:

From c7a24d7a14d32dff18b9a98265c9a5ee6578dd25 Mon Sep 17 00:00:00 2001
From: Nick Magerko 
Date: Tue, 20 Aug 2019 13:06:15 -0700
Subject: [PATCH 18/26] Define ZSTD_SRCSIZEHINT_MIN as 0

---
 lib/compress/zstd_compress.c | 2 +-
 lib/zstd.h                   | 1 +
 tests/fuzz/zstd_helpers.c    | 2 +-
 tests/zstreamtest.c          | 2 +-
 4 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c
index d4471c2b..3660e9d1 100644
--- a/lib/compress/zstd_compress.c
+++ b/lib/compress/zstd_compress.c
@@ -393,7 +393,7 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param)
         return bounds;
 
     case ZSTD_c_srcSizeHint:
-        bounds.lowerBound = 0;
+        bounds.lowerBound = ZSTD_SRCSIZEHINT_MIN;
         bounds.upperBound = ZSTD_SRCSIZEHINT_MAX;
         return bounds;
 
diff --git a/lib/zstd.h b/lib/zstd.h
index 0fbe71ab..5396b719 100644
--- a/lib/zstd.h
+++ b/lib/zstd.h
@@ -1066,6 +1066,7 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
 /* Advanced parameter bounds */
 #define ZSTD_TARGETCBLOCKSIZE_MIN   64
 #define ZSTD_TARGETCBLOCKSIZE_MAX   ZSTD_BLOCKSIZE_MAX
+#define ZSTD_SRCSIZEHINT_MAX        0
 #define ZSTD_SRCSIZEHINT_MAX        INT_MAX
 
 /* internal */
diff --git a/tests/fuzz/zstd_helpers.c b/tests/fuzz/zstd_helpers.c
index 5d24a48c..5ff057b8 100644
--- a/tests/fuzz/zstd_helpers.c
+++ b/tests/fuzz/zstd_helpers.c
@@ -91,7 +91,7 @@ void FUZZ_setRandomParameters(ZSTD_CCtx *cctx, size_t srcSize, uint32_t *state)
     setRand(cctx, ZSTD_c_literalCompressionMode, 0, 2, state);
     setRand(cctx, ZSTD_c_forceAttachDict, 0, 2, state);
     if (FUZZ_rand32(state, 0, 1) == 0) {
-      setRand(cctx, ZSTD_c_srcSizeHint, 0, 2 * srcSize, state);
+      setRand(cctx, ZSTD_c_srcSizeHint, ZSTD_SRCSIZEHINT_MIN, 2 * srcSize, state);
     }
 }
 
diff --git a/tests/zstreamtest.c b/tests/zstreamtest.c
index 6fe177cb..d2c4036a 100644
--- a/tests/zstreamtest.c
+++ b/tests/zstreamtest.c
@@ -2106,7 +2106,7 @@ static int fuzzerTests_newAPI(U32 seed, int nbTests, int startTest,
                     if (FUZ_rand(&lseed) & 3) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_c_ldmMinMatch, FUZ_randomClampedLength(&lseed, ZSTD_LDM_MINMATCH_MIN, ZSTD_LDM_MINMATCH_MAX), opaqueAPI) );
                     if (FUZ_rand(&lseed) & 3) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_c_ldmBucketSizeLog, FUZ_randomClampedLength(&lseed, ZSTD_LDM_BUCKETSIZELOG_MIN, ZSTD_LDM_BUCKETSIZELOG_MAX), opaqueAPI) );
                     if (FUZ_rand(&lseed) & 3) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_c_ldmHashRateLog, FUZ_randomClampedLength(&lseed, ZSTD_LDM_HASHRATELOG_MIN, ZSTD_LDM_HASHRATELOG_MAX), opaqueAPI) );
-                    if (FUZ_rand(&lseed) & 3) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_c_srcSizeHint, FUZ_randomClampedLength(&lseed, 0, ZSTD_SRCSIZEHINT_MAX), opaqueAPI) );
+                    if (FUZ_rand(&lseed) & 3) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_c_srcSizeHint, FUZ_randomClampedLength(&lseed, ZSTD_SRCSIZEHINT_MIN, ZSTD_SRCSIZEHINT_MAX), opaqueAPI) );
                 }
 
                 /* mess with frame parameters */

From de6a6c73645092595250f5e9347baf60a7e91a12 Mon Sep 17 00:00:00 2001
From: Nick Magerko 
Date: Tue, 20 Aug 2019 13:07:51 -0700
Subject: [PATCH 19/26] Fix ZSTD_SRCSIZEHINT_MIN typo

---
 lib/zstd.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/zstd.h b/lib/zstd.h
index 5396b719..38c99e01 100644
--- a/lib/zstd.h
+++ b/lib/zstd.h
@@ -1066,7 +1066,7 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
 /* Advanced parameter bounds */
 #define ZSTD_TARGETCBLOCKSIZE_MIN   64
 #define ZSTD_TARGETCBLOCKSIZE_MAX   ZSTD_BLOCKSIZE_MAX
-#define ZSTD_SRCSIZEHINT_MAX        0
+#define ZSTD_SRCSIZEHINT_MIN        0
 #define ZSTD_SRCSIZEHINT_MAX        INT_MAX
 
 /* internal */

From 05d7479a505584563c521822ae353d3f256c3bb8 Mon Sep 17 00:00:00 2001
From: Nick Magerko 
Date: Tue, 20 Aug 2019 14:08:26 -0700
Subject: [PATCH 20/26] Document --size-hint

---
 programs/zstd.1.md | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/programs/zstd.1.md b/programs/zstd.1.md
index 3ab2667a..f8349fa8 100644
--- a/programs/zstd.1.md
+++ b/programs/zstd.1.md
@@ -144,6 +144,13 @@ the last one takes effect.
     Due to the chaotic nature of dynamic adaptation, compressed result is not reproducible.
     _note_ : at the time of this writing, `--adapt` can remain stuck at low speed
     when combined with multiple worker threads (>=2).
+* `--size-hint=#`:
+    When handling input from a stream, `zstd` must guess how large the source size
+    will be when optimizing compression parameters. If the stream size is relatively
+    small, this guess may be a poor one, resulting in a higher compression ratio than
+    expected. This feature allows for controlling the guess when needed.
+    Exact guesses result in better compression ratios. Overestimates result in slightly
+    degraded compression ratios, while underestimates may result in significant degradation.
 * `--rsyncable` :
     `zstd` will periodically synchronize the compression state to make the
     compressed file more rsync-friendly. There is a negligible impact to

From 3982935aefbd6e8c0a8ceb7d54afc5b97f188fc7 Mon Sep 17 00:00:00 2001
From: Nick Terrell 
Date: Tue, 20 Aug 2019 11:33:33 -0700
Subject: [PATCH 21/26] [fuzz] Improve fuzzer build script and docs

* Remove the `make libFuzzer` target since it is broken and obsoleted
  by `CC=clang CXX=clang++ ./fuzz.py build all --enable-fuzzer`. The
  new `-fsanitize=fuzzer` is much better because it works with MSAN
  by default.
* Improve the `./fuzz.py gen` command by making the input type explicit
  when creating a new target.
* Update the `README` for `--enable-fuzzer`.

Fixes #1727.
---
 tests/fuzz/Makefile  |  9 --------
 tests/fuzz/README.md | 19 ++++++++++------
 tests/fuzz/fuzz.py   | 53 ++++++++++++++++++++++++++++++--------------
 3 files changed, 48 insertions(+), 33 deletions(-)

diff --git a/tests/fuzz/Makefile b/tests/fuzz/Makefile
index 8bf16b1f..08dedd66 100644
--- a/tests/fuzz/Makefile
+++ b/tests/fuzz/Makefile
@@ -113,15 +113,6 @@ zstd_frame_info: $(FUZZ_HEADERS) $(FUZZ_OBJ) zstd_frame_info.o
 libregression.a: $(FUZZ_HEADERS) $(PRGDIR)/util.h $(PRGDIR)/util.c regression_driver.o
 	$(AR) $(FUZZ_ARFLAGS) $@ regression_driver.o
 
-# Install libfuzzer (not usable for MSAN testing)
-# Provided for convenience. To use this library run make libFuzzer and
-# set LDFLAGS=-L.
-.PHONY: libFuzzer
-libFuzzer:
-	@$(RM) -rf Fuzzer
-	@git clone https://chromium.googlesource.com/chromium/llvm-project/compiler-rt/lib/fuzzer Fuzzer
-	@cd Fuzzer && ./build.sh
-
 corpora/%_seed_corpus.zip:
 	@mkdir -p corpora
 	$(DOWNLOAD) $@ $(CORPORA_URL_PREFIX)$*_seed_corpus.zip
diff --git a/tests/fuzz/README.md b/tests/fuzz/README.md
index 9e0bb259..856a57f8 100644
--- a/tests/fuzz/README.md
+++ b/tests/fuzz/README.md
@@ -35,6 +35,8 @@ The environment variables can be overridden with the corresponding flags
 `--cc`, `--cflags`, etc.
 The specific fuzzing engine is selected with `LIB_FUZZING_ENGINE` or
 `--lib-fuzzing-engine`, the default is `libregression.a`.
+Alternatively, you can use Clang's built in fuzzing engine with
+`--enable-fuzzer`.
 It has flags that can easily set up sanitizers `--enable-{a,ub,m}san`, and
 coverage instrumentation `--enable-coverage`.
 It sets sane defaults which can be overridden with flags `--debug`,
@@ -51,22 +53,25 @@ The command used to run the fuzzer is printed for debugging.
 ## LibFuzzer
 
 ```
-# Build libfuzzer if necessary
-make libFuzzer
 # Build the fuzz targets
-./fuzz.py build all --enable-coverage --enable-asan --enable-ubsan --lib-fuzzing-engine Fuzzer/libFuzzer.a --cc clang --cxx clang++
+./fuzz.py build all --enable-fuzzer --enable-asan --enable-ubsan --cc clang --cxx clang++
 # OR equivalently
-CC=clang CXX=clang++ LIB_FUZZING_ENGINE=Fuzzer/libFuzzer.a ./fuzz.py build all --enable-coverage --enable-asan --enable-ubsan
+CC=clang CXX=clang++ ./fuzz.py build all --enable-fuzzer --enable-asan --enable-ubsan
 # Run the fuzzer
-./fuzz.py libfuzzer TARGET -max_len=8192 -jobs=4
+./fuzz.py libfuzzer TARGET 
 ```
 
 where `TARGET` could be `simple_decompress`, `stream_round_trip`, etc.
 
 ### MSAN
 
-Fuzzing with `libFuzzer` and `MSAN` will require building a C++ standard library
-and libFuzzer with MSAN.
+Fuzzing with `libFuzzer` and `MSAN` is as easy as:
+
+```
+CC=clang CXX=clang++ ./fuzz.py build all --enable-fuzzer --enable-msan
+./fuzz.py libfuzzer TARGET 
+```
+
 `fuzz.py` respects the environment variables / flags `MSAN_EXTRA_CPPFLAGS`,
 `MSAN_EXTRA_CFLAGS`, `MSAN_EXTRA_CXXFLAGS`, `MSAN_EXTRA_LDFLAGS` to easily pass
 the extra parameters only for MSAN.
diff --git a/tests/fuzz/fuzz.py b/tests/fuzz/fuzz.py
index d993209a..faf8ce8a 100755
--- a/tests/fuzz/fuzz.py
+++ b/tests/fuzz/fuzz.py
@@ -24,21 +24,38 @@ def abs_join(a, *p):
     return os.path.abspath(os.path.join(a, *p))
 
 
+class InputType(object):
+    RAW_DATA = 1
+    COMPRESSED_DATA = 2
+
+
+class FrameType(object):
+    ZSTD = 1
+    BLOCK = 2
+
+
+class TargetInfo(object):
+    def __init__(self, input_type, frame_type=FrameType.ZSTD):
+        self.input_type = input_type
+        self.frame_type = frame_type
+
+
 # Constants
 FUZZ_DIR = os.path.abspath(os.path.dirname(__file__))
 CORPORA_DIR = abs_join(FUZZ_DIR, 'corpora')
-TARGETS = [
-    'simple_round_trip',
-    'stream_round_trip',
-    'block_round_trip',
-    'simple_decompress',
-    'stream_decompress',
-    'block_decompress',
-    'dictionary_round_trip',
-    'dictionary_decompress',
-    'zstd_frame_info',
-    'simple_compress',
-]
+TARGET_INFO = {
+    'simple_round_trip': TargetInfo(InputType.RAW_DATA),
+    'stream_round_trip': TargetInfo(InputType.RAW_DATA),
+    'block_round_trip': TargetInfo(InputType.RAW_DATA, FrameType.BLOCK),
+    'simple_decompress': TargetInfo(InputType.COMPRESSED_DATA),
+    'stream_decompress': TargetInfo(InputType.COMPRESSED_DATA),
+    'block_decompress': TargetInfo(InputType.COMPRESSED_DATA, FrameType.BLOCK),
+    'dictionary_round_trip': TargetInfo(InputType.RAW_DATA),
+    'dictionary_decompress': TargetInfo(InputType.COMPRESSED_DATA),
+    'zstd_frame_info': TargetInfo(InputType.COMPRESSED_DATA),
+    'simple_compress': TargetInfo(InputType.RAW_DATA),
+}
+TARGETS = list(TARGET_INFO.keys())
 ALL_TARGETS = TARGETS + ['all']
 FUZZ_RNG_SEED_SIZE = 4
 
@@ -67,7 +84,7 @@ MSAN_EXTRA_LDFLAGS = os.environ.get('MSAN_EXTRA_LDFLAGS', '')
 def create(r):
     d = os.path.abspath(r)
     if not os.path.isdir(d):
-        os.mkdir(d)
+        os.makedirs(d)
     return d
 
 
@@ -158,7 +175,7 @@ def compiler_version(cc, cxx):
         assert(b'clang' in cxx_version_bytes)
         compiler = 'clang'
     elif b'gcc' in cc_version_bytes:
-        assert(b'gcc' in cxx_version_bytes)
+        assert(b'gcc' in cxx_version_bytes or b'g++' in cxx_version_bytes)
         compiler = 'gcc'
     if compiler is not None:
         version_regex = b'([0-9])+\.([0-9])+\.([0-9])+'
@@ -699,7 +716,8 @@ def gen(args):
                 '-o{}'.format(decompressed),
             ]
 
-            if 'block_' in args.TARGET:
+            info = TARGET_INFO[args.TARGET]
+            if info.frame_type == FrameType.BLOCK:
                 cmd += [
                     '--gen-blocks',
                     '--max-block-size-log={}'.format(args.max_size_log)
@@ -710,10 +728,11 @@ def gen(args):
             print(' '.join(cmd))
             subprocess.check_call(cmd)
 
-            if '_round_trip' in args.TARGET:
+            if info.input_type == InputType.RAW_DATA:
                 print('using decompressed data in {}'.format(decompressed))
                 samples = decompressed
-            elif '_decompress' in args.TARGET:
+            else:
+                assert info.input_type == InputType.COMPRESSED_DATA
                 print('using compressed data in {}'.format(compressed))
                 samples = compressed
 

From 07f22d465d0f85aa00f20fc2f0b59a50ddfe494f Mon Sep 17 00:00:00 2001
From: Nick Terrell 
Date: Tue, 20 Aug 2019 17:13:04 -0700
Subject: [PATCH 22/26] [legacy] Fix buffer overflow in v0.2 and v0.4 raw
 literals decompression

Extends the fix in PR#1722 to v0.2 and v0.4. These aren't built into
zstd by default, and v0.5 onward are not affected.

I only add the `srcSize > BLOCKSIZE` check to v0.4 because the comments
say that it must hold, but the equivalent comment isn't present in v0.2.

Credit to OSS-Fuzz.
---
 lib/legacy/zstd_v02.c | 1 +
 lib/legacy/zstd_v04.c | 6 +++++-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/lib/legacy/zstd_v02.c b/lib/legacy/zstd_v02.c
index 793df602..de0a4bd6 100644
--- a/lib/legacy/zstd_v02.c
+++ b/lib/legacy/zstd_v02.c
@@ -2889,6 +2889,7 @@ static size_t ZSTD_decodeLiteralsBlock(void* ctx,
             const size_t litSize = (MEM_readLE32(istart) & 0xFFFFFF) >> 2;   /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
             if (litSize > srcSize-11)   /* risk of reading too far with wildcopy */
             {
+                if (litSize > BLOCKSIZE) return ERROR(corruption_detected);
                 if (litSize > srcSize-3) return ERROR(corruption_detected);
                 memcpy(dctx->litBuffer, istart, litSize);
                 dctx->litPtr = dctx->litBuffer;
diff --git a/lib/legacy/zstd_v04.c b/lib/legacy/zstd_v04.c
index 645a6e31..201ce2b6 100644
--- a/lib/legacy/zstd_v04.c
+++ b/lib/legacy/zstd_v04.c
@@ -2655,6 +2655,7 @@ static size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
             const size_t litSize = (MEM_readLE32(istart) & 0xFFFFFF) >> 2;   /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
             if (litSize > srcSize-11)   /* risk of reading too far with wildcopy */
             {
+                if (litSize > BLOCKSIZE) return ERROR(corruption_detected);
                 if (litSize > srcSize-3) return ERROR(corruption_detected);
                 memcpy(dctx->litBuffer, istart, litSize);
                 dctx->litPtr = dctx->litBuffer;
@@ -3034,9 +3035,12 @@ static size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
 {
     /* blockType == blockCompressed */
     const BYTE* ip = (const BYTE*)src;
+    size_t litCSize;
+
+    if (srcSize > BLOCKSIZE) return ERROR(corruption_detected);
 
     /* Decode literals sub-block */
-    size_t litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
+    litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
     if (ZSTD_isError(litCSize)) return litCSize;
     ip += litCSize;
     srcSize -= litCSize;

From b3540507f5239f9d0810c3b9e2b920601687738d Mon Sep 17 00:00:00 2001
From: Nick Magerko 
Date: Wed, 21 Aug 2019 10:27:54 -0700
Subject: [PATCH 23/26] Remove bc from play tests

---
 tests/playTests.sh | 41 +++++++++++++++++------------------------
 1 file changed, 17 insertions(+), 24 deletions(-)

diff --git a/tests/playTests.sh b/tests/playTests.sh
index c516aa71..b7407676 100755
--- a/tests/playTests.sh
+++ b/tests/playTests.sh
@@ -408,6 +408,23 @@ println "compress multiple files including a missing one (notHere) : "
 $ZSTD -f tmp1 notHere tmp2 && die "missing file not detected!"
 
 
+println "\n===>  stream-size mode"
+
+./datagen -g11000 > tmp
+println "test : basic file compression vs sized streaming compression"
+file_size=$($ZSTD -14 -f tmp -o tmp.zst && wc -c < tmp.zst)
+stream_size=$(cat tmp | $ZSTD -14 --stream-size=11000 | wc -c)
+if [ "$stream_size" -gt "$file_size" ]; then
+  die "hinted compression larger than expected"
+fi
+println "test : sized streaming compression and decompression"
+cat tmp | $ZSTD -14 -f tmp -o --stream-size=11000 tmp.zst
+$ZSTD -df tmp.zst -o tmp_decompress
+cmp tmp tmp_decompress || die "difference between original and decompressed file"
+println "test : incorrect stream size"
+cat tmp | $ZSTD -14 -f -o tmp.zst --stream-size=11001 && die "should fail with incorrect stream size"
+
+
 println "\n===>  dictionary tests "
 
 println "- test with raw dict (content only) "
@@ -1019,28 +1036,4 @@ test -f dictionary
 rm -f tmp* dictionary
 
 
-println "\n===>  stream-size mode"
-
-./datagen -g11000 > tmp
-println "test : basic file compression vs sized streaming compression"
-$ZSTD -14 -f tmp -o tmp.zst 2>&1 | tee file.out
-cat tmp | $ZSTD -14 -f -o tmp.zst --stream-size=11000 2>&1 | tee stream_sized.out
-
-file_ratio=$(cat file.out | awk '{print $4}' | sed 's/%//g')
-stream_sized_ratio=$(cat stream_sized.out | awk '{print $4}' | sed 's/%//g')
-rm file.out stream_sized.out
-
-ratio_diff=$(echo $file_ratio - $stream_sized_ratio | bc)
-if [ $(echo "(100 * $ratio_diff) > 5" | bc -l) == 1 ]
-then
-  die "greater than 0.05% difference between file and sized-streaming compression"
-fi
-println "test : sized streaming compression and decompression"
-cat tmp | $ZSTD -14 -f tmp -o --stream-size=11000 tmp.zst
-$ZSTD -df tmp.zst -o tmp_decompress
-cmp tmp tmp_decompress || die "difference between original and decompressed file"
-println "test : incorrect stream size"
-cat tmp | $ZSTD -14 -f -o tmp.zst --stream-size=11001 && die "should fail with incorrect stream size"
-
-
 rm -f tmp*

From 2cdda8b3c43c23981941cdd97c1a517981d2339e Mon Sep 17 00:00:00 2001
From: Nick Magerko 
Date: Thu, 22 Aug 2019 09:13:28 -0700
Subject: [PATCH 24/26] Minor documentation update

---
 programs/zstd.1.md | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/programs/zstd.1.md b/programs/zstd.1.md
index e3f72928..ee5db9d6 100644
--- a/programs/zstd.1.md
+++ b/programs/zstd.1.md
@@ -144,6 +144,11 @@ the last one takes effect.
     Due to the chaotic nature of dynamic adaptation, compressed result is not reproducible.
     _note_ : at the time of this writing, `--adapt` can remain stuck at low speed
     when combined with multiple worker threads (>=2).
+* `--stream-size=#` :
+    When handling input from a stream, `zstd` must guess how large the source size
+    will be when optimizing compression parameters. This option sets the pledged source
+    size of a stream to eliminate that guesswork. Note that the pledged size must be exact;
+    incorrect stream sizes will cause an error.
 * `--rsyncable` :
     `zstd` will periodically synchronize the compression state to make the
     compressed file more rsync-friendly. There is a negligible impact to
@@ -152,12 +157,6 @@ the last one takes effect.
     This feature does not work with `--single-thread`. You probably don't want
     to use it with long range mode, since it will decrease the effectiveness of
     the synchronization points, but your milage may vary.
-* `--stream-size` :
-    When handling input from a stream, `zstd` must guess how large the source size
-    will be when optimizing compression parameters. If the stream size is relatively
-    small, this guess may be a poor one, resulting in a higher compression ratio than
-    expected. This feature will set the source size of a stream. Note that it must
-    be exact; incorrect stream sizes will cause an error.
 * `-D file`:
     use `file` as Dictionary to compress or decompress FILE(s)
 * `--no-dictID`:

From fd486a846abcbf6c92496dd4915d6a46bd4790c2 Mon Sep 17 00:00:00 2001
From: Nick Magerko 
Date: Thu, 22 Aug 2019 09:37:47 -0700
Subject: [PATCH 25/26] Differentiate --stream-size from --size-hint

---
 programs/zstd.1.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/programs/zstd.1.md b/programs/zstd.1.md
index ee5db9d6..1bdc4265 100644
--- a/programs/zstd.1.md
+++ b/programs/zstd.1.md
@@ -145,10 +145,10 @@ the last one takes effect.
     _note_ : at the time of this writing, `--adapt` can remain stuck at low speed
     when combined with multiple worker threads (>=2).
 * `--stream-size=#` :
-    When handling input from a stream, `zstd` must guess how large the source size
-    will be when optimizing compression parameters. This option sets the pledged source
-    size of a stream to eliminate that guesswork. Note that the pledged size must be exact;
-    incorrect stream sizes will cause an error.
+    Sets the pledged source size of input coming from a stream. This value must be exact, as it
+    will be included in the produced frame header. Incorrect stream sizes will cause an error.
+    This information will be used to better optimize compression parameters, resulting in
+    better and potentially faster compression, especially for smaller source sizes.
 * `--rsyncable` :
     `zstd` will periodically synchronize the compression state to make the
     compressed file more rsync-friendly. There is a negligible impact to

From e2030a2c40de6e5b452ce4530896773f08cbd9b7 Mon Sep 17 00:00:00 2001
From: Nick Terrell 
Date: Thu, 22 Aug 2019 17:27:15 -0700
Subject: [PATCH 26/26] [fuzz] Add a DEBUGLOG(3) statement to print file

Enable it by building with this command:

```
./fuzz.py build all --debug 3
```
---
 tests/fuzz/fuzz_helpers.h      | 1 +
 tests/fuzz/regression_driver.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/tests/fuzz/fuzz_helpers.h b/tests/fuzz/fuzz_helpers.h
index 0cf79d0d..0ee85fc7 100644
--- a/tests/fuzz/fuzz_helpers.h
+++ b/tests/fuzz/fuzz_helpers.h
@@ -14,6 +14,7 @@
 #ifndef FUZZ_HELPERS_H
 #define FUZZ_HELPERS_H
 
+#include "debug.h"
 #include "fuzz.h"
 #include "xxhash.h"
 #include "zstd.h"
diff --git a/tests/fuzz/regression_driver.c b/tests/fuzz/regression_driver.c
index 658c685f..e3ebcd5c 100644
--- a/tests/fuzz/regression_driver.c
+++ b/tests/fuzz/regression_driver.c
@@ -36,6 +36,7 @@ int main(int argc, char const **argv) {
     fprintf(stderr, "WARNING: No files passed to %s\n", argv[0]);
   for (i = 0; i < numFiles; ++i) {
     char const *fileName = files[i];
+    DEBUGLOG(3, "Running %s", fileName);
     size_t const fileSize = UTIL_getFileSize(fileName);
     size_t readSize;
     FILE *file;