Merge pull request #236 from Cyan4973/dev

v0.7.2
2016-07-03 21:06:42 +02:00 · 2016-07-03 21:06:42 +02:00 · 2b61f74b1e
commit 2b61f74b1e
parent 77054b452d 5cc1882d45
21 changed files with 838 additions and 225 deletions
--- a/5
+++ b/5
@ -168,6 +168,9 @@ bmix32test: clean

 bmi32test: clean
 	CFLAGS="-O3 -mbmi -m32 -Werror" $(MAKE) -C $(PRGDIR) test
+
+staticAnalyze: clean
+	CPPFLAGS=-g scan-build --status-bugs -v $(MAKE) all	
 endif


@ -187,7 +190,7 @@ gcc5install:

 gcc6install:
 	sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test
-	sudo apt-get update -y -qq 
+	sudo apt-get update -y -qq
 	sudo apt-get install -y -qq gcc-6-multilib

 arminstall: clean
--- a/5
+++ b/5
@ -1,3 +1,8 @@
+v0.7.2
+fixed : ZSTD_decompressBlock() using multiple consecutive blocks. Reported by Greg Slazinski
+fixed : potential segfault on very large files (many gigabytes). Reported by Chip Turner.
+fixed : CLI displays system error message when destination file cannot be created (#231). Reported by Chip Turner.
+
 v0.7.1
 fixed : ZBUFF_compressEnd() called multiple times with too small `dst` buffer, reported by Christophe Chevalier
 fixed : dictBuilder fails if first sample is too small, reported by Руслан Ковалёв
--- a/lib/common/zbuff.h
+++ b/lib/common/zbuff.h
@ -44,10 +44,8 @@ extern "C" {
 /* ***************************************************************
 *  Compiler specifics
 *****************************************************************/
-/*!
-*  ZSTD_DLL_EXPORT :
-*  Enable exporting of functions when building a Windows DLL
-*/
+/* ZSTD_DLL_EXPORT :
+*  Enable exporting of functions when building a Windows DLL */
 #if defined(_WIN32) && defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
 #  define ZSTDLIB_API __declspec(dllexport)
 #else
@ -103,8 +101,8 @@ ZSTDLIB_API size_t ZBUFF_compressEnd(ZBUFF_CCtx* cctx, void* dst, size_t* dstCap
 *  @return : nb of bytes still present into internal buffer (0 if it's empty)
 *            or an error code, which can be tested using ZBUFF_isError().
 *
-*  Hint : recommended buffer sizes (not compulsory) : ZBUFF_recommendedCInSize / ZBUFF_recommendedCOutSize
-*  input : ZBUFF_recommendedCInSize==128 KB block size is the internal unit, it improves latency to use this value (skipped buffering).
+*  Hint : _recommended buffer_ sizes (not compulsory) : ZBUFF_recommendedCInSize() / ZBUFF_recommendedCOutSize()
+*  input : ZBUFF_recommendedCInSize==128 KB block size is the internal unit, use this value to reduce intermediate stages (better latency)
 *  output : ZBUFF_recommendedCOutSize==ZSTD_compressBound(128 KB) + 3 + 3 : ensures it's always possible to write/flush/end a full block. Skip some buffering.
 *  By using both, it ensures that input will be entirely consumed, and output will always contain the result, reducing intermediate buffering.
 * **************************************************/
--- a/lib/common/zstd.h
+++ b/lib/common/zstd.h
@ -61,7 +61,7 @@ extern "C" {
 ***************************************/
 #define ZSTD_VERSION_MAJOR    0
 #define ZSTD_VERSION_MINOR    7
-#define ZSTD_VERSION_RELEASE  1
+#define ZSTD_VERSION_RELEASE  2

 #define ZSTD_LIB_VERSION ZSTD_VERSION_MAJOR.ZSTD_VERSION_MINOR.ZSTD_VERSION_RELEASE
 #define ZSTD_QUOTE(str) #str
@ -200,7 +200,6 @@ ZSTDLIB_API size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx,
 /*--- Dependency ---*/
 #include "mem.h"   /* U32 */

-
 /*--- Constants ---*/
 #define ZSTD_MAGICNUMBER            0xFD2FB527   /* v0.7 */
 #define ZSTD_MAGIC_SKIPPABLE_START  0x184D2A50U
@ -230,19 +229,19 @@ static const size_t ZSTD_skippableHeaderSize = 8;  /* magic number + skippable f
 typedef enum { ZSTD_fast, ZSTD_greedy, ZSTD_lazy, ZSTD_lazy2, ZSTD_btlazy2, ZSTD_btopt } ZSTD_strategy;   /*< from faster to stronger */

 typedef struct {
-    U32 windowLog;     /*< largest match distance : larger == more compression, more memory needed during decompression */
-    U32 chainLog;      /*< fully searched segment : larger == more compression, slower, more memory (useless for fast) */
-    U32 hashLog;       /*< dispatch table : larger == faster, more memory */
-    U32 searchLog;     /*< nb of searches : larger == more compression, slower */
-    U32 searchLength;  /*< match length searched : larger == faster decompression, sometimes less compression */
-    U32 targetLength;  /*< acceptable match size for optimal parser (only) : larger == more compression, slower */
+    U32 windowLog;      /*< largest match distance : larger == more compression, more memory needed during decompression */
+    U32 chainLog;       /*< fully searched segment : larger == more compression, slower, more memory (useless for fast) */
+    U32 hashLog;        /*< dispatch table : larger == faster, more memory */
+    U32 searchLog;      /*< nb of searches : larger == more compression, slower */
+    U32 searchLength;   /*< match length searched : larger == faster decompression, sometimes less compression */
+    U32 targetLength;   /*< acceptable match size for optimal parser (only) : larger == more compression, slower */
    ZSTD_strategy strategy;
 } ZSTD_compressionParameters;

 typedef struct {
-    U32 contentSizeFlag;  /*< 1: content size will be in frame header (if known). */
-    U32 checksumFlag;     /*< 1: will generate a 22-bits checksum at end of frame, to be used for error detection by decompressor */
-    U32 noDictIDFlag;     /*< 1: no dict ID will be saved into frame header (if dictionary compression) */
+    U32 contentSizeFlag; /*< 1: content size will be in frame header (if known). */
+    U32 checksumFlag;    /*< 1: will generate a 22-bits checksum at end of frame, to be used for error detection by decompressor */
+    U32 noDictIDFlag;    /*< 1: no dict ID will be saved into frame header (if dictionary compression) */
 } ZSTD_frameParameters;

 typedef struct {
@ -270,16 +269,21 @@ ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictS

 ZSTDLIB_API unsigned ZSTD_maxCLevel (void);

+/*! ZSTD_getParams() :
+*   same as ZSTD_getCParams(), but @return a full `ZSTD_parameters` object instead of a `ZSTD_compressionParameters`.
+*   All fields of `ZSTD_frameParameters` are set to default (0) */
+ZSTD_parameters ZSTD_getParams(int compressionLevel, U64 srcSize, size_t dictSize);
+
 /*! ZSTD_getCParams() :
 *   @return ZSTD_compressionParameters structure for a selected compression level and srcSize.
 *   `srcSize` value is optional, select 0 if not known */
 ZSTDLIB_API ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, U64 srcSize, size_t dictSize);

-/*! ZSTD_checkParams() :
+/*! ZSTD_checkCParams() :
 *   Ensure param values remain within authorized range */
 ZSTDLIB_API size_t ZSTD_checkCParams(ZSTD_compressionParameters params);

-/*! ZSTD_adjustParams() :
+/*! ZSTD_adjustCParams() :
 *   optimize params for a given `srcSize` and `dictSize`.
 *   both values are optional, select `0` if unknown. */
 ZSTDLIB_API ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, U64 srcSize, size_t dictSize);
@ -408,6 +412,7 @@ ZSTDLIB_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t ds

    A few rules to respect :
    - Uncompressed block size must be <= ZSTD_BLOCKSIZE_MAX (128 KB)
+      + If you need to compress more, it's recommended to use ZSTD_compress() instead, since frame metadata costs become negligible.
    - Compressing or decompressing requires a context structure
      + Use ZSTD_createCCtx() and ZSTD_createDCtx()
    - It is necessary to init context before starting
--- a/lib/common/zstd_internal.h
+++ b/lib/common/zstd_internal.h
@ -51,7 +51,7 @@
 /*-*************************************
 *  Common constants
 ***************************************/
-#define ZSTD_OPT_DEBUG 0     // 3 = compression stats;  5 = check encoded sequences;  9 = full logs
+#define ZSTD_OPT_DEBUG 0     /* 3 = compression stats;  5 = check encoded sequences;  9 = full logs */
 #include <stdio.h>
 #if defined(ZSTD_OPT_DEBUG) && ZSTD_OPT_DEBUG>=9
    #define ZSTD_LOG_PARSER(...) printf(__VA_ARGS__)
@ -233,6 +233,6 @@ int ZSTD_isSkipFrame(ZSTD_DCtx* dctx);
 /* custom memory allocation functions */
 void* ZSTD_defaultAllocFunction(void* opaque, size_t size);
 void ZSTD_defaultFreeFunction(void* opaque, void* address);
-static ZSTD_customMem const defaultCustomMem = { ZSTD_defaultAllocFunction, ZSTD_defaultFreeFunction, NULL };
+static const ZSTD_customMem defaultCustomMem = { ZSTD_defaultAllocFunction, ZSTD_defaultFreeFunction, NULL };

 #endif   /* ZSTD_CCOMMON_H_MODULE */
--- a/lib/compress/zbuff_compress.c
+++ b/lib/compress/zbuff_compress.c
@ -170,9 +170,7 @@ size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* zbc,

 size_t ZBUFF_compressInitDictionary(ZBUFF_CCtx* zbc, const void* dict, size_t dictSize, int compressionLevel)
 {
-    ZSTD_parameters params;
-    memset(&params, 0, sizeof(params));
-    params.cParams = ZSTD_getCParams(compressionLevel, 0, dictSize);
+    ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, dictSize);
    return ZBUFF_compressInit_advanced(zbc, dict, dictSize, params, 0);
 }

--- a/lib/compress/zstd_compress.c
+++ b/lib/compress/zstd_compress.c
@ -427,21 +427,8 @@ static void ZSTD_reduceIndex (ZSTD_CCtx* zc, const U32 reducerValue)
 */


-/* Frame descriptor
+/* Frame header :

-    // old
-   1 byte - Alloc :
-   bit 0-3 : windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN   (see zstd_internal.h)
-   bit 4   : reserved for windowLog (must be zero)
-   bit 5   : reserved (must be zero)
-   bit 6-7 : Frame content size : unknown, 1 byte, 2 bytes, 8 bytes
-
-   1 byte - checker :
-   bit 0-1 : dictID (0, 1, 2 or 4 bytes)
-   bit 2-7 : reserved (must be zero)
-
-
-    // new
   1 byte - FrameHeaderDescription :
   bit 0-1 : dictID (0, 1, 2 or 4 bytes)
   bit 2-4 : reserved (must be zero)
@ -453,24 +440,24 @@ static void ZSTD_reduceIndex (ZSTD_CCtx* zc, const U32 reducerValue)
   bit 0-2 : octal Fractional (1/8th)
   bit 3-7 : Power of 2, with 0 = 1 KB (up to 2 TB)

+   Optional : content size (0, 1, 2, 4 or 8 bytes)
+   0 : unknown
+   1 : 0-255 bytes
+   2 : 256 - 65535+256
+   8 : up to 16 exa
+
   Optional : dictID (0, 1, 2 or 4 bytes)
   Automatic adaptation
   0 : no dictID
   1 : 1 - 255
   2 : 256 - 65535
   4 : all other values
-
-   Optional : content size (0, 1, 2, 4 or 8 bytes)
-   0 : unknown
-   1 : 0-255 bytes
-   2 : 256 - 65535+256
-   8 : up to 16 exa
 */


 /* Block format description

-   Block = Literal Section - Sequences Section
+   Block = Literals Section - Sequences Section
   Prerequisite : size of (compressed) block, maximum size of regenerated data

   1) Literal Section
@ -478,7 +465,7 @@ static void ZSTD_reduceIndex (ZSTD_CCtx* zc, const U32 reducerValue)
   1.1) Header : 1-5 bytes
        flags: 2 bits
            00 compressed by Huff0
-            01 unused
+            01 repeat
            10 is Raw (uncompressed)
            11 is Rle
            Note : using 01 => Huff0 with precomputed table ?
@ -514,7 +501,7 @@ static void ZSTD_reduceIndex (ZSTD_CCtx* zc, const U32 reducerValue)
            else           => 5 bytes (2-2-18-18)
            big endian convention

-        1- CTable available (stored into workspace ?)
+        1- CTable available (stored into workspace)
        2- Small input (fast heuristic ? Full comparison ? depend on clevel ?)


@ -936,7 +923,7 @@ _check_compressibility:
    `offsetCode` : distance to match, or 0 == repCode.
    `matchCode` : matchLength - MINMATCH
 */
-MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const void* literals, size_t offsetCode, size_t matchCode)
+MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const void* literals, U32 offsetCode, size_t matchCode)
 {
 #if 0  /* for debug */
    static const BYTE* g_start = NULL;
@ -957,7 +944,7 @@ MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const v
    *seqStorePtr->litLength++ = (U16)litLength;

    /* match offset */
-    *(seqStorePtr->offset++) = (U32)offsetCode + 1;
+    *(seqStorePtr->offset++) = offsetCode + 1;

    /* match Length */
    if (matchCode>0xFFFF) { seqStorePtr->longLengthID = 2; seqStorePtr->longLengthPos = (U32)(seqStorePtr->matchLength - seqStorePtr->matchLengthStart); }
@ -1063,7 +1050,7 @@ static size_t ZSTD_count_2segments(const BYTE* ip, const BYTE* match, const BYTE
 ***************************************/
 static const U32 prime3bytes = 506832829U;
 static U32    ZSTD_hash3(U32 u, U32 h) { return ((u << (32-24)) * prime3bytes)  >> (32-h) ; }
-static size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); }
+MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); }   /* only in zstd_opt.h */

 static const U32 prime4bytes = 2654435761U;
 static U32    ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; }
@ -1129,13 +1116,14 @@ void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* cctx,
    const BYTE* const lowest = base + lowestIndex;
    const BYTE* const iend = istart + srcSize;
    const BYTE* const ilimit = iend - 8;
-    size_t offset_1=cctx->rep[0], offset_2=cctx->rep[1];
+    U32 offset_1=cctx->rep[0], offset_2=cctx->rep[1];
+    U32 offsetSaved = 0;

    /* init */
    ip += (ip==lowest);
    {   U32 const maxRep = (U32)(ip-lowest);
-        if (offset_1 > maxRep) offset_1 = 0;
-        if (offset_2 > maxRep) offset_2 = 0;
+        if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
+        if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
    }

    /* Main Search Loop */
@ -1152,13 +1140,13 @@ void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* cctx,
            ip++;
            ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH);
        } else {
-            size_t offset;
+            U32 offset;
            if ( (matchIndex <= lowestIndex) || (MEM_read32(match) != MEM_read32(ip)) ) {
                ip += ((ip-anchor) >> g_searchStrength) + 1;
                continue;
            }
            mLength = ZSTD_count(ip+EQUAL_READ32, match+EQUAL_READ32, iend) + EQUAL_READ32;
-            offset = ip-match;
+            offset = (U32)(ip-match);
            while (((ip>anchor) & (match>lowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
            offset_2 = offset_1;
            offset_1 = offset;
@ -1180,7 +1168,7 @@ void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* cctx,
                 & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
                /* store sequence */
                size_t const rLength = ZSTD_count(ip+EQUAL_READ32, ip+EQUAL_READ32-offset_2, iend) + EQUAL_READ32;
-                { size_t const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */
+                { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; }  /* swap offset_2 <=> offset_1 */
                hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip-base);
                ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, rLength-MINMATCH);
                ip += rLength;
@ -1189,8 +1177,8 @@ void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* cctx,
    }   }   }

    /* save reps for next block */
-    cctx->savedRep[0] = offset_1 ? (U32)offset_1 : (U32)(iend - base) + 1;
-    cctx->savedRep[1] = offset_2 ? (U32)offset_2 : (U32)(iend - base) + 1;
+    cctx->savedRep[0] = offset_1 ? offset_1 : offsetSaved;
+    cctx->savedRep[1] = offset_2 ? offset_2 : offsetSaved;

    /* Last Literals */
    {   size_t const lastLLSize = iend - anchor;
@ -1364,17 +1352,19 @@ static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, co
    const U32 windowLow = zc->lowLimit;
    U32 matchEndIdx = current+8;
    size_t bestLength = 8;
+#ifdef ZSTD_C_PREDICT
    U32 predictedSmall = *(bt + 2*((current-1)&btMask) + 0);
    U32 predictedLarge = *(bt + 2*((current-1)&btMask) + 1);
    predictedSmall += (predictedSmall>0);
    predictedLarge += (predictedLarge>0);
+#endif /* ZSTD_C_PREDICT */

    hashTable[h] = current;   /* Update Hash Table */

    while (nbCompares-- && (matchIndex > windowLow)) {
        U32* nextPtr = bt + 2*(matchIndex & btMask);
        size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
-#if 0   /* note : can create issues when hlog small <= 11 */
+#ifdef ZSTD_C_PREDICT   /* note : can create issues when hlog small <= 11 */
        const U32* predictPtr = bt + 2*((matchIndex-1) & btMask);   /* written this way, as bt is a roll buffer */
        if (matchIndex == predictedSmall) {
            /* no need to check length, result known */
@ -1731,17 +1721,15 @@ void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx,
                        size_t* offsetPtr,
                        U32 maxNbAttempts, U32 matchLengthSearch);
    searchMax_f const searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS : ZSTD_HcFindBestMatch_selectMLS;
-    U32 rep[ZSTD_REP_INIT];
+    U32 offset_1 = ctx->rep[0], offset_2 = ctx->rep[1], savedOffset=0;

    /* init */
    ip += (ip==base);
    ctx->nextToUpdate3 = ctx->nextToUpdate;
-    {   U32 i;
-        U32 const maxRep = (U32)(ip-base);
-        for (i=0; i<ZSTD_REP_INIT; i++) {
-            rep[i]=ctx->rep[i];
-            if (rep[i]>maxRep) rep[i]=0;
-    }   }
+    {   U32 const maxRep = (U32)(ip-base);
+        if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0;
+        if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0;
+    }

    /* Match Loop */
    while (ip < ilimit) {
@ -1750,9 +1738,9 @@ void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx,
        const BYTE* start=ip+1;

        /* check repCode */
-        if ((rep[0]>0) & (MEM_read32(ip+1) == MEM_read32(ip+1 - rep[0]))) {
+        if ((offset_1>0) & (MEM_read32(ip+1) == MEM_read32(ip+1 - offset_1))) {
            /* repcode : we take it */
-            matchLength = ZSTD_count(ip+1+EQUAL_READ32, ip+1+EQUAL_READ32-rep[0], iend) + EQUAL_READ32;
+            matchLength = ZSTD_count(ip+1+EQUAL_READ32, ip+1+EQUAL_READ32-offset_1, iend) + EQUAL_READ32;
            if (depth==0) goto _storeSequence;
        }

@ -1772,8 +1760,8 @@ void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx,
        if (depth>=1)
        while (ip<ilimit) {
            ip ++;
-            if ((offset) && ((rep[0]>0) & (MEM_read32(ip) == MEM_read32(ip - rep[0])))) {
-                size_t const mlRep = ZSTD_count(ip+EQUAL_READ32, ip+EQUAL_READ32-rep[0], iend) + EQUAL_READ32;
+            if ((offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
+                size_t const mlRep = ZSTD_count(ip+EQUAL_READ32, ip+EQUAL_READ32-offset_1, iend) + EQUAL_READ32;
                int const gain2 = (int)(mlRep * 3);
                int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
                if ((mlRep >= EQUAL_READ32) && (gain2 > gain1))
@ -1791,8 +1779,8 @@ void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx,
            /* let's find an even better one */
            if ((depth==2) && (ip<ilimit)) {
                ip ++;
-                if ((offset) && ((rep[0]>0) & (MEM_read32(ip) == MEM_read32(ip - rep[0])))) {
-                    size_t const ml2 = ZSTD_count(ip+EQUAL_READ32, ip+EQUAL_READ32-rep[0], iend) + EQUAL_READ32;
+                if ((offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
+                    size_t const ml2 = ZSTD_count(ip+EQUAL_READ32, ip+EQUAL_READ32-offset_1, iend) + EQUAL_READ32;
                    int const gain2 = (int)(ml2 * 4);
                    int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
                    if ((ml2 >= EQUAL_READ32) && (gain2 > gain1))
@ -1813,23 +1801,23 @@ void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx,
        if (offset) {
            while ((start>anchor) && (start>base+offset-ZSTD_REP_MOVE) && (start[-1] == start[-1-offset+ZSTD_REP_MOVE]))   /* only search for offset within prefix */
                { start--; matchLength++; }
-            rep[1] = rep[0]; rep[0] = (U32)(offset - ZSTD_REP_MOVE);
+            offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
        }

        /* store sequence */
 _storeSequence:
        {   size_t const litLength = start - anchor;
-            ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, matchLength-MINMATCH);
+            ZSTD_storeSeq(seqStorePtr, litLength, anchor, (U32)offset, matchLength-MINMATCH);
            anchor = ip = start + matchLength;
        }

        /* check immediate repcode */
        while ( (ip <= ilimit)
-             && ((rep[1]>0)
-             & (MEM_read32(ip) == MEM_read32(ip - rep[1])) )) {
+             && ((offset_2>0)
+             & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
            /* store sequence */
-            matchLength = ZSTD_count(ip+EQUAL_READ32, ip+EQUAL_READ32-rep[1], iend) + EQUAL_READ32;
-            offset = rep[1]; rep[1] = rep[0]; rep[0] = (U32)offset; /* swap repcodes */
+            matchLength = ZSTD_count(ip+EQUAL_READ32, ip+EQUAL_READ32-offset_2, iend) + EQUAL_READ32;
+            offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */
            ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength-MINMATCH);
            ip += matchLength;
            anchor = ip;
@ -1837,11 +1825,8 @@ _storeSequence:
    }   }

    /* Save reps for next block */
-    {   int i;
-        for (i=0; i<ZSTD_REP_NUM; i++) {
-            if (!rep[i]) rep[i] = (U32)(iend - ctx->base) + 1;   /* in case some zero are left */
-            ctx->savedRep[i] = rep[i];
-    }   }
+    ctx->savedRep[0] = offset_1 ? offset_1 : savedOffset;
+    ctx->savedRep[1] = offset_2 ? offset_2 : savedOffset;

    /* Last Literals */
    {   size_t const lastLLSize = iend - anchor;
@ -1900,10 +1885,9 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx,
                        U32 maxNbAttempts, U32 matchLengthSearch);
    searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS_extDict : ZSTD_HcFindBestMatch_extDict_selectMLS;

-    /* init */
-    U32 rep[ZSTD_REP_INIT];
-    { U32 i; for (i=0; i<ZSTD_REP_INIT; i++) rep[i]=ctx->rep[i]; }
+    U32 offset_1 = ctx->rep[0], offset_2 = ctx->rep[1];

+    /* init */
    ctx->nextToUpdate3 = ctx->nextToUpdate;
    ip += (ip == prefixStart);

@ -1915,7 +1899,7 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx,
        U32 current = (U32)(ip-base);

        /* check repCode */
-        {   const U32 repIndex = (U32)(current+1 - rep[0]);
+        {   const U32 repIndex = (U32)(current+1 - offset_1);
            const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
            const BYTE* const repMatch = repBase + repIndex;
            if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex))   /* intentional overflow */
@ -1945,7 +1929,7 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx,
            current++;
            /* check repCode */
            if (offset) {
-                const U32 repIndex = (U32)(current - rep[0]);
+                const U32 repIndex = (U32)(current - offset_1);
                const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
                const BYTE* const repMatch = repBase + repIndex;
                if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex))  /* intentional overflow */
@ -1975,7 +1959,7 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx,
                current++;
                /* check repCode */
                if (offset) {
-                    const U32 repIndex = (U32)(current - rep[0]);
+                    const U32 repIndex = (U32)(current - offset_1);
                    const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
                    const BYTE* const repMatch = repBase + repIndex;
                    if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex))  /* intentional overflow */
@ -2007,19 +1991,19 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx,
            const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex;
            const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart;
            while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; }  /* catch up */
-            rep[1] = rep[0]; rep[0] = (U32)(offset - ZSTD_REP_MOVE);
+            offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
        }

        /* store sequence */
 _storeSequence:
        {   size_t const litLength = start - anchor;
-            ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, matchLength-MINMATCH);
+            ZSTD_storeSeq(seqStorePtr, litLength, anchor, (U32)offset, matchLength-MINMATCH);
            anchor = ip = start + matchLength;
        }

        /* check immediate repcode */
        while (ip <= ilimit) {
-            const U32 repIndex = (U32)((ip-base) - rep[1]);
+            const U32 repIndex = (U32)((ip-base) - offset_2);
            const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
            const BYTE* const repMatch = repBase + repIndex;
            if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex))  /* intentional overflow */
@ -2027,7 +2011,7 @@ _storeSequence:
                /* repcode detected we should take it */
                const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
                matchLength = ZSTD_count_2segments(ip+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32;
-                offset = rep[1]; rep[1] = rep[0]; rep[0] = (U32)offset;   /* swap offset history */
+                offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset;   /* swap offset history */
                ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength-MINMATCH);
                ip += matchLength;
                anchor = ip;
@ -2037,7 +2021,7 @@ _storeSequence:
    }   }

    /* Save reps for next block */
-    ctx->savedRep[0] = rep[0]; ctx->savedRep[1] = rep[1]; ctx->savedRep[2] = rep[2];
+    ctx->savedRep[0] = offset_1; ctx->savedRep[1] = offset_2;

    /* Last Literals */
    {   size_t const lastLLSize = iend - anchor;
@ -2068,18 +2052,27 @@ static void ZSTD_compressBlock_btlazy2_extDict(ZSTD_CCtx* ctx, const void* src,
 }


-
 /* The optimal parser */
 #include "zstd_opt.h"

 static void ZSTD_compressBlock_btopt(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
 {
+#ifdef ZSTD_OPT_H_91842398743
    ZSTD_compressBlock_opt_generic(ctx, src, srcSize);
+#else
+    (void)ctx; (void)src; (void)srcSize;
+    return;
+#endif
 }

 static void ZSTD_compressBlock_btopt_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
 {
+#ifdef ZSTD_OPT_H_91842398743
    ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize);
+#else
+    (void)ctx; (void)src; (void)srcSize;
+    return;
+#endif
 }


@ -2426,9 +2419,7 @@ size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx,

 size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel)
 {
-    ZSTD_parameters params;
-    memset(&params, 0, sizeof(params));
-    params.cParams = ZSTD_getCParams(compressionLevel, 0, dictSize);
+    ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, dictSize);
    ZSTD_LOG_BLOCK("%p: ZSTD_compressBegin_usingDict compressionLevel=%d\n", cctx->base, compressionLevel);
    return ZSTD_compressBegin_internal(cctx, dict, dictSize, params, 0);
 }
@ -2538,11 +2529,9 @@ size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx,

 size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, const void* dict, size_t dictSize, int compressionLevel)
 {
-    ZSTD_parameters params;
-    memset(&params, 0, sizeof(params));
-    ZSTD_LOG_BLOCK("%p: ZSTD_compress_usingDict srcSize=%d dictSize=%d compressionLevel=%d\n", ctx->base, (int)srcSize, (int)dictSize, compressionLevel);
-    params.cParams =  ZSTD_getCParams(compressionLevel, srcSize, dictSize);
+    ZSTD_parameters params = ZSTD_getParams(compressionLevel, srcSize, dictSize);
    params.fParams.contentSizeFlag = 1;
+    ZSTD_LOG_BLOCK("%p: ZSTD_compress_usingDict srcSize=%d dictSize=%d compressionLevel=%d\n", ctx->base, (int)srcSize, (int)dictSize, compressionLevel);
    return ZSTD_compress_internal(ctx, dst, dstCapacity, src, srcSize, dict, dictSize, params);
 }

@ -2577,7 +2566,7 @@ ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize, ZSTD_pa
    if (!customMem.customAlloc && !customMem.customFree)
        customMem = defaultCustomMem;

-    if (!customMem.customAlloc || !customMem.customFree)
+    if (!customMem.customAlloc || !customMem.customFree)  /* can't have 1/2 custom alloc/free as NULL */
        return NULL;

    {   ZSTD_CDict* const cdict = (ZSTD_CDict*) customMem.customAlloc(customMem.opaque, sizeof(*cdict));
@ -2772,3 +2761,14 @@ ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, U64 srcSize, si
    cp = ZSTD_adjustCParams(cp, srcSize, dictSize);
    return cp;
 }
+
+/*! ZSTD_getParams() :
+*   same as ZSTD_getCParams(), but @return a `ZSTD_parameters` object instead of a `ZSTD_compressionParameters`.
+*   All fields of `ZSTD_frameParameters` are set to default (0) */
+ZSTD_parameters ZSTD_getParams(int compressionLevel, U64 srcSize, size_t dictSize) {
+    ZSTD_parameters params;
+    ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, srcSize, dictSize);
+    memset(&params, 0, sizeof(params));
+    params.cParams = cParams;
+    return params;
+}
--- a/lib/compress/zstd_opt.h
+++ b/lib/compress/zstd_opt.h
@ -34,6 +34,10 @@
 /* Note : this file is intended to be included within zstd_compress.c */


+#ifndef ZSTD_OPT_H_91842398743
+#define ZSTD_OPT_H_91842398743
+
+
 #define ZSTD_FREQ_DIV   5

 /*-*************************************
@ -110,7 +114,7 @@ FORCE_INLINE U32 ZSTD_getLiteralPrice(seqStore_t* ssPtr, U32 litLength, const BY

    /* literals */
    if (ssPtr->cachedLiterals == literals) {
-        U32 additional = litLength - ssPtr->cachedLitLength;
+        U32 const additional = litLength - ssPtr->cachedLitLength;
        const BYTE* literals2 = ssPtr->cachedLiterals + ssPtr->cachedLitLength;
        price = ssPtr->cachedPrice + additional * ssPtr->log2litSum;
        for (u=0; u < additional; u++)
@ -150,7 +154,7 @@ FORCE_INLINE U32 ZSTD_getLiteralPrice(seqStore_t* ssPtr, U32 litLength, const BY
 FORCE_INLINE U32 ZSTD_getPrice(seqStore_t* seqStorePtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength)
 {
    /* offset */
-    BYTE offCode = (BYTE)ZSTD_highbit32(offset+1);
+    BYTE const offCode = (BYTE)ZSTD_highbit32(offset+1);
    U32 price = offCode + seqStorePtr->log2offCodeSum - ZSTD_highbit32(seqStorePtr->offCodeFreq[offCode]+1);

    /* match Length */
@ -196,7 +200,7 @@ MEM_STATIC void ZSTD_updatePrice(seqStore_t* seqStorePtr, U32 litLength, const B
    }

    /* match offset */
-	{   BYTE offCode = (BYTE)ZSTD_highbit32(offset+1);
+	{   BYTE const offCode = (BYTE)ZSTD_highbit32(offset+1);
 		seqStorePtr->offCodeSum++;
 		seqStorePtr->offCodeFreq[offCode]++;
 	}
@ -232,7 +236,6 @@ MEM_STATIC void ZSTD_updatePrice(seqStore_t* seqStorePtr, U32 litLength, const B



-
 /* Update hashTable3 up to ip (excluded)
   Assumption : always within prefix (ie. not within extDict) */
 FORCE_INLINE
@ -1039,3 +1042,5 @@ _storeSequence:   /* cur, last_pos, best_mlen, best_off have to be set */
        seqStorePtr->lit += lastLLSize;
    }
 }
+
+#endif  /* ZSTD_OPT_H_91842398743 */
--- a/lib/decompress/zbuff_decompress.c
+++ b/lib/decompress/zbuff_decompress.c
@ -173,7 +173,7 @@ size_t ZBUFF_decompressContinue(ZBUFF_DCtx* zbd,
                    if (ZSTD_isError(hSize)) return hSize;
                    if (toLoad > (size_t)(iend-ip)) {   /* not enough input to load full header */
                        memcpy(zbd->headerBuffer + zbd->lhSize, ip, iend-ip);
-                        zbd->lhSize += iend-ip; ip = iend; notDone = 0;
+                        zbd->lhSize += iend-ip;
                        *dstCapacityPtr = 0;
                        return (hSize - zbd->lhSize) + ZSTD_blockHeaderSize;   /* remaining header bytes + next block header */
                    }
--- a/lib/decompress/zstd_decompress.c
+++ b/lib/decompress/zstd_decompress.c
@ -207,20 +207,8 @@ void ZSTD_copyDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx)
 */


-/* Frame descriptor
+/* Frame Header :

-    // old
-   1 byte - Alloc :
-   bit 0-3 : windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN   (see zstd_internal.h)
-   bit 4   : reserved for windowLog (must be zero)
-   bit 5   : reserved (must be zero)
-   bit 6-7 : Frame content size : unknown, 1 byte, 2 bytes, 8 bytes
-
-   1 byte - checker :
-   bit 0-1 : dictID (0, 1, 2 or 4 bytes)
-   bit 2-7 : reserved (must be zero)
-
-    // new
   1 byte - FrameHeaderDescription :
   bit 0-1 : dictID (0, 1, 2 or 4 bytes)
   bit 2   : checksumFlag
@ -454,16 +442,14 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
                          const void* src, size_t srcSize)   /* note : srcSize < BLOCKSIZE */
 {
    const BYTE* const istart = (const BYTE*) src;
-    litBlockType_t lbt;

    if (srcSize < MIN_CBLOCK_SIZE) return ERROR(corruption_detected);
-    lbt = (litBlockType_t)(istart[0]>> 6);

-    switch(lbt)
+    switch((litBlockType_t)(istart[0]>> 6))
    {
    case lbt_huffman:
        {   size_t litSize, litCSize, singleStream=0;
-            U32 lhSize = ((istart[0]) >> 4) & 3;
+            U32 lhSize = (istart[0] >> 4) & 3;
            if (srcSize < 5) return ERROR(corruption_detected);   /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for lhSize, + cSize (+nbSeq) */
            switch(lhSize)
            {
@ -930,8 +916,11 @@ size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
                            void* dst, size_t dstCapacity,
                      const void* src, size_t srcSize)
 {
+    size_t dSize;
    ZSTD_checkContinuity(dctx, dst);
-    return ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize);
+    dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize);
+    dctx->previousDstEnd = (char*)dst + dSize;
+    return dSize;
 }


--- a/lib/dictBuilder/zdict.c
+++ b/lib/dictBuilder/zdict.c
@ -826,7 +826,6 @@ static size_t ZDICT_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,
    MEM_writeLE32(dstPtr+4, repStartValue[1]);
    MEM_writeLE32(dstPtr+8, repStartValue[2]);
 #endif
-    dstPtr += 12;
    eSize += 12;

 _cleanup:
@ -906,6 +905,7 @@ size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictCo
 }

 #define DIB_MINSAMPLESSIZE (DIB_FASTSEGMENTSIZE*3)
+#define EXIT(e) { dictSize = ERROR(e); goto _cleanup; }
 /*! ZDICT_trainFromBuffer_unsafe() :
 *   `samplesBuffer` must be followed by noisy guard band.
 *   @return : size of dictionary.
@ -923,12 +923,12 @@ size_t ZDICT_trainFromBuffer_unsafe(
    size_t dictSize = 0;

    /* checks */
-    if (maxDictSize <= g_provision_entropySize + g_min_fast_dictContent) return ERROR(dstSize_tooSmall);
+    if (maxDictSize <= g_provision_entropySize + g_min_fast_dictContent) EXIT(dstSize_tooSmall);
    if (!dictList) return ERROR(memory_allocation);

    /* init */
    { unsigned u; for (u=0, sBuffSize=0; u<nbSamples; u++) sBuffSize += samplesSizes[u]; }
-    if (sBuffSize < DIB_MINSAMPLESSIZE) return 0;   /* not enough source to create dictionary */
+    if (sBuffSize < DIB_MINSAMPLESSIZE) EXIT(no_error);   /* not enough source to create dictionary */
    ZDICT_initDictItem(dictList);
    g_displayLevel = params.notificationLevel;
    if (selectivity==0) selectivity = g_selectivity_default;
@ -948,9 +948,9 @@ size_t ZDICT_trainFromBuffer_unsafe(
            DISPLAYLEVEL(3, "\n %u segments found, of total size %u \n", dictList[0].pos, dictContentSize);
            DISPLAYLEVEL(3, "list %u best segments \n", nb);
            for (u=1; u<=nb; u++) {
-                U32 p = dictList[u].pos;
-                U32 l = dictList[u].length;
-                U32 d = MIN(40, l);
+                U32 const p = dictList[u].pos;
+                U32 const l = dictList[u].length;
+                U32 const d = MIN(40, l);
                DISPLAYLEVEL(3, "%3u:%3u bytes at pos %8u, savings %7u bytes |",
                             u, l, p, dictList[u].savings);
                ZDICT_printHex(3, (const char*)samplesBuffer+p, d);
@ -966,7 +966,7 @@ size_t ZDICT_trainFromBuffer_unsafe(
            for (u=1; u<dictList->pos; u++) {
                U32 l = dictList[u].length;
                ptr -= l;
-                if (ptr<(BYTE*)dictBuffer) return ERROR(GENERIC);   /* should not happen */
+                if (ptr<(BYTE*)dictBuffer) EXIT(GENERIC);   /* should not happen */
                memcpy(ptr, (const char*)samplesBuffer+dictList[u].pos, l);
        }   }

@ -983,7 +983,7 @@ size_t ZDICT_trainFromBuffer_unsafe(
                                                             params);
    }

-    /* clean up */
+_cleanup :
    free(dictList);
    return dictSize;
 }
--- a/programs/.gitignore
+++ b/programs/.gitignore
@ -43,6 +43,7 @@ _*
 tmp*
 *.zst
 result
+out

 # fuzzer
 afl
--- a/programs/bench.c
+++ b/programs/bench.c
@ -148,16 +148,14 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
    size_t const maxCompressedSize = ZSTD_compressBound(srcSize) + (maxNbBlocks * 1024);   /* add some room for safety */
    void* const compressedBuffer = malloc(maxCompressedSize);
    void* const resultBuffer = malloc(srcSize);
-    ZSTD_CCtx* refCtx = ZSTD_createCCtx();
    ZSTD_CCtx* ctx = ZSTD_createCCtx();
-    ZSTD_DCtx* refDCtx = ZSTD_createDCtx();
    ZSTD_DCtx* dctx = ZSTD_createDCtx();
    U32 nbBlocks;
    UTIL_time_t ticksPerSecond;

    /* checks */
-    if (!compressedBuffer || !resultBuffer || !blockTable || !refCtx || !ctx || !refDCtx || !dctx)
-        EXM_THROW(31, "not enough memory");
+    if (!compressedBuffer || !resultBuffer || !blockTable || !ctx || !dctx)
+        EXM_THROW(31, "allocation error : not enough memory");

    /* init */
    if (strlen(displayName)>17) displayName += strlen(displayName)-17;   /* can only display 17 characters */
@ -213,12 +211,15 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
            DISPLAYLEVEL(2, "%2i-%-17.17s :%10u ->\r", testNb, displayName, (U32)srcSize);
            memset(compressedBuffer, 0xE5, maxCompressedSize);  /* warm up and erase result buffer */

-            UTIL_sleepMilli(1); /* give processor time to other processes */
+            UTIL_sleepMilli(1);  /* give processor time to other processes */
            UTIL_waitForNextTick(ticksPerSecond);
            UTIL_getTime(&clockStart);

-            {   U32 nbLoops = 0;
-                ZSTD_CDict* cdict = ZSTD_createCDict(dictBuffer, dictBufferSize, cLevel);
+            {   size_t const refSrcSize = (nbBlocks == 1) ? srcSize : 0;
+                ZSTD_parameters const zparams = ZSTD_getParams(cLevel, refSrcSize, dictBufferSize);
+                ZSTD_customMem const cmem = { NULL, NULL, NULL };
+                U32 nbLoops = 0;
+                ZSTD_CDict* cdict = ZSTD_createCDict_advanced(dictBuffer, dictBufferSize, zparams, cmem);
                if (cdict==NULL) EXM_THROW(1, "ZSTD_createCDict() allocation failure");
                do {
                    U32 blockNb;
@ -227,7 +228,7 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
                                            blockTable[blockNb].cPtr,  blockTable[blockNb].cRoom,
                                            blockTable[blockNb].srcPtr,blockTable[blockNb].srcSize,
                                            cdict);
-                        if (ZSTD_isError(rSize)) EXM_THROW(1, "ZSTD_compress_usingPreparedCCtx() failed : %s", ZSTD_getErrorName(rSize));
+                        if (ZSTD_isError(rSize)) EXM_THROW(1, "ZSTD_compress_usingCDict() failed : %s", ZSTD_getErrorName(rSize));
                        blockTable[blockNb].cSize = rSize;
                    }
                    nbLoops++;
@ -264,7 +265,7 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
                            blockTable[blockNb].cPtr, blockTable[blockNb].cSize,
                            ddict);
                        if (ZSTD_isError(regenSize)) {
-                            DISPLAY("ZSTD_decompress_usingPreparedDCtx() failed on block %u : %s  \n",
+                            DISPLAY("ZSTD_decompress_usingDDict() failed on block %u : %s  \n",
                                      blockNb, ZSTD_getErrorName(regenSize));
                            clockLoop = 0;   /* force immediate test end */
                            break;
@ -321,9 +322,7 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
    free(blockTable);
    free(compressedBuffer);
    free(resultBuffer);
-    ZSTD_freeCCtx(refCtx);
    ZSTD_freeCCtx(ctx);
-    ZSTD_freeDCtx(refDCtx);
    ZSTD_freeDCtx(dctx);
    return 0;
 }
--- a/programs/datagen.c
+++ b/programs/datagen.c
@ -23,12 +23,19 @@
    - source repository : https://github.com/Cyan4973/zstd
 */

+/* *************************************
+*  Compiler Options
+***************************************/
+#define _CRT_SECURE_NO_WARNINGS  /* removes Visual warning on strerror() */
+
+
 /*-************************************
 *  Includes
 **************************************/
 #include <stdlib.h>    /* malloc */
 #include <stdio.h>     /* FILE, fwrite, fprintf */
 #include <string.h>    /* memcpy */
+#include <errno.h>     /* errno */
 #include "mem.h"       /* U32 */


@ -104,7 +111,7 @@ static BYTE RDG_genChar(U32* seed, const BYTE* ldt)
    U32 const id = RDG_rand(seed) & LTMASK;
    //TRACE(" %u : \n", id);
    //TRACE(" %4u [%4u] ; val : %4u \n", id, id&255, ldt[id]);
-    return (ldt[id]);  /* memory-sanitizer fails here, stating "uninitialized value" when table initialized with 0.0. Checked : table is fully initialized */
+    return ldt[id];  /* memory-sanitizer fails here, stating "uninitialized value" when table initialized with P==0.0. Checked : table is fully initialized */
 }


@ -115,8 +122,7 @@ static U32 RDG_rand15Bits (unsigned* seedPtr)

 static U32 RDG_randLength(unsigned* seedPtr)
 {
-    if (RDG_rand(seedPtr) & 7)
-        return (RDG_rand(seedPtr) & 0xF);
+    if (RDG_rand(seedPtr) & 7) return (RDG_rand(seedPtr) & 0xF);   /* small length */
    return (RDG_rand(seedPtr) & 0x1FF) + 0xF;
 }

@ -185,10 +191,10 @@ void RDG_genStdout(unsigned long long size, double matchProba, double litProba,
    size_t const stdDictSize = 32 KB;
    BYTE* const buff = (BYTE*)malloc(stdDictSize + stdBlockSize);
    U64 total = 0;
-    BYTE ldt[LTSIZE];
+    BYTE ldt[LTSIZE];   /* literals distribution table */

    /* init */
-    if (buff==NULL) { fprintf(stdout, "not enough memory\n"); exit(1); }
+    if (buff==NULL) { fprintf(stderr, "datagen: error: %s \n", strerror(errno)); exit(1); }
    if (litProba<=0.0) litProba = matchProba / 4.5;
    memset(ldt, '0', sizeof(ldt));
    RDG_fillLiteralDistrib(ldt, litProba);
--- a/programs/fileio.c
+++ b/programs/fileio.c
@ -41,13 +41,14 @@
 /* *************************************
 *  Compiler Options
 ***************************************/
-#define _POSIX_SOURCE 1        /* enable %llu on Windows */
+#define _POSIX_SOURCE 1          /* enable %llu on Windows */
+#define _CRT_SECURE_NO_WARNINGS  /* removes Visual warning on strerror() */


 /*-*************************************
 *  Includes
 ***************************************/
-#include "util.h"       /* Compiler options, UTIL_GetFileSize */
+#include "util.h"       /* Compiler options, UTIL_GetFileSize, _LARGEFILE64_SOURCE */
 #include <stdio.h>      /* fprintf, fopen, fread, _fileno, stdin, stdout */
 #include <stdlib.h>     /* malloc, free */
 #include <string.h>     /* strcmp, strlen */
@ -58,7 +59,6 @@
 #include "fileio.h"
 #define ZSTD_STATIC_LINKING_ONLY   /* ZSTD_magicNumber, ZSTD_frameHeaderSize_max */
 #include "zstd.h"
-#include "zstd_internal.h" /* MIN, KB, MB */
 #define ZBUFF_STATIC_LINKING_ONLY
 #include "zbuff.h"

@ -84,6 +84,10 @@
 /*-*************************************
 *  Constants
 ***************************************/
+#define KB *(1<<10)
+#define MB *(1<<20)
+#define GB *(1U<<30)
+
 #define _1BIT  0x01
 #define _2BITS 0x03
 #define _3BITS 0x07
@ -113,21 +117,17 @@ static U32 g_displayLevel = 2;   /* 0 : no display;   1: errors;   2 : + result
 void FIO_setNotificationLevel(unsigned level) { g_displayLevel=level; }

 #define DISPLAYUPDATE(l, ...) if (g_displayLevel>=l) { \
-            if ((FIO_GetMilliSpan(g_time) > refreshRate) || (g_displayLevel>=4)) \
+            if ((clock() - g_time > refreshRate) || (g_displayLevel>=4)) \
            { g_time = clock(); DISPLAY(__VA_ARGS__); \
            if (g_displayLevel>=4) fflush(stdout); } }
-static const unsigned refreshRate = 150;
+static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100;
 static clock_t g_time = 0;

-static unsigned FIO_GetMilliSpan(clock_t nPrevious)
-{
-    clock_t const nCurrent = clock();
-    return (unsigned)(((nCurrent - nPrevious) * 1000) / CLOCKS_PER_SEC);
-}
+#define MIN(a,b)    ((a) < (b) ? (a) : (b))


 /*-*************************************
-*  Local Parameters
+*  Local Parameters - Not thread safe
 ***************************************/
 static U32 g_overwrite = 0;
 void FIO_overwriteMode(void) { g_overwrite=1; }
@ -175,7 +175,7 @@ static FILE* FIO_openSrcFile(const char* srcFileName)
        f = fopen(srcFileName, "rb");
    }

-    if ( f==NULL ) DISPLAYLEVEL(1, "zstd: %s: No such file\n", srcFileName);
+    if ( f==NULL ) DISPLAYLEVEL(1, "zstd: %s: %s \n", srcFileName, strerror(errno));

    return f;
 }
@ -201,18 +201,20 @@ static FILE* FIO_openDstFile(const char* dstFileName)
                if (g_displayLevel <= 1) {
                    /* No interaction possible */
                    DISPLAY("zstd: %s already exists; not overwritten  \n", dstFileName);
-                    return 0;
+                    return NULL;
                }
                DISPLAY("zstd: %s already exists; do you wish to overwrite (y/N) ? ", dstFileName);
                {   int ch = getchar();
                    if ((ch!='Y') && (ch!='y')) {
                        DISPLAY("    not overwritten  \n");
-                        return 0;
+                        return NULL;
                    }
                    while ((ch!=EOF) && (ch!='\n')) ch = getchar();  /* flush rest of input line */
        }   }   }
        f = fopen( dstFileName, "wb" );
    }
+
+    if (f==NULL) DISPLAYLEVEL(1, "zstd: %s: %s\n", dstFileName, strerror(errno));
    return f;
 }

@ -233,18 +235,18 @@ static size_t FIO_loadFile(void** bufferPtr, const char* fileName)

    DISPLAYLEVEL(4,"Loading %s as dictionary \n", fileName);
    fileHandle = fopen(fileName, "rb");
-    if (fileHandle==0) EXM_THROW(31, "Error opening file %s", fileName);
+    if (fileHandle==0) EXM_THROW(31, "zstd: %s: %s", fileName, strerror(errno));
    fileSize = UTIL_getFileSize(fileName);
    if (fileSize > MAX_DICT_SIZE) {
        int seekResult;
        if (fileSize > 1 GB) EXM_THROW(32, "Dictionary file %s is too large", fileName);   /* avoid extreme cases */
        DISPLAYLEVEL(2,"Dictionary %s is too large : using last %u bytes only \n", fileName, MAX_DICT_SIZE);
        seekResult = fseek(fileHandle, (long int)(fileSize-MAX_DICT_SIZE), SEEK_SET);   /* use end of file */
-        if (seekResult != 0) EXM_THROW(33, "Error seeking into file %s", fileName);
+        if (seekResult != 0) EXM_THROW(33, "zstd: %s: %s", fileName, strerror(errno));
        fileSize = MAX_DICT_SIZE;
    }
-    *bufferPtr = (BYTE*)malloc((size_t)fileSize);
-    if (*bufferPtr==NULL) EXM_THROW(34, "Allocation error : not enough memory for dictBuffer");
+    *bufferPtr = malloc((size_t)fileSize);
+    if (*bufferPtr==NULL) EXM_THROW(34, "zstd: %s", strerror(errno));
    { size_t const readSize = fread(*bufferPtr, 1, (size_t)fileSize, fileHandle);
      if (readSize!=fileSize) EXM_THROW(35, "Error reading dictionary file %s", fileName); }
    fclose(fileHandle);
@ -273,14 +275,14 @@ static cRess_t FIO_createCResources(const char* dictFileName)
    cRess_t ress;

    ress.ctx = ZBUFF_createCCtx();
-    if (ress.ctx == NULL) EXM_THROW(30, "Allocation error : can't create ZBUFF context");
+    if (ress.ctx == NULL) EXM_THROW(30, "zstd: allocation error : can't create ZBUFF context");

    /* Allocate Memory */
    ress.srcBufferSize = ZBUFF_recommendedCInSize();
    ress.srcBuffer = malloc(ress.srcBufferSize);
    ress.dstBufferSize = ZBUFF_recommendedCOutSize();
    ress.dstBuffer = malloc(ress.dstBufferSize);
-    if (!ress.srcBuffer || !ress.dstBuffer) EXM_THROW(31, "Allocation error : not enough memory");
+    if (!ress.srcBuffer || !ress.dstBuffer) EXM_THROW(31, "zstd: allocation error : not enough memory");

    /* dictionary */
    ress.dictBufferSize = FIO_loadFile(&(ress.dictBuffer), dictFileName);
@ -295,7 +297,7 @@ static void FIO_freeCResources(cRess_t ress)
    free(ress.dstBuffer);
    free(ress.dictBuffer);
    errorCode = ZBUFF_freeCCtx(ress.ctx);
-    if (ZBUFF_isError(errorCode)) EXM_THROW(38, "Error : can't release ZBUFF context resource : %s", ZBUFF_getErrorName(errorCode));
+    if (ZBUFF_isError(errorCode)) EXM_THROW(38, "zstd: error : can't release ZBUFF context resource : %s", ZBUFF_getErrorName(errorCode));
 }


@ -315,9 +317,7 @@ static int FIO_compressFilename_internal(cRess_t ress,
    U64 const fileSize = UTIL_getFileSize(srcFileName);

    /* init */
-    {   ZSTD_parameters params;
-        memset(&params, 0, sizeof(params));
-        params.cParams = ZSTD_getCParams(cLevel, fileSize, ress.dictBufferSize);
+    {   ZSTD_parameters params = ZSTD_getParams(cLevel, fileSize, ress.dictBufferSize);
        params.fParams.contentSizeFlag = 1;
        params.fParams.checksumFlag = g_checksumFlag;
        params.fParams.noDictIDFlag = !g_dictIDFlag;
@ -375,8 +375,8 @@ static int FIO_compressFilename_internal(cRess_t ress,
 }


-/*! FIO_compressFilename_internal() :
- *  same as FIO_compressFilename_extRess(), with ress.destFile already opened (typically stdout)
+/*! FIO_compressFilename_srcFile() :
+ *  note : ress.destFile already opened
 *  @return : 0 : compression completed correctly,
 *            1 : missing or pb opening srcFileName
 */
@ -417,7 +417,7 @@ static int FIO_compressFilename_dstFile(cRess_t ress,

    result = FIO_compressFilename_srcFile(ress, dstFileName, srcFileName, cLevel);

-    if (fclose(ress.dstFile)) EXM_THROW(28, "Write error : cannot properly close %s", dstFileName);
+    if (fclose(ress.dstFile)) { DISPLAYLEVEL(1, "zstd: %s: %s \n", dstFileName, strerror(errno)); result=1; }
    if (result!=0) remove(dstFileName);   /* remove operation artefact */
    return result;
 }
@ -429,13 +429,13 @@ int FIO_compressFilename(const char* dstFileName, const char* srcFileName,
    clock_t const start = clock();

    cRess_t const ress = FIO_createCResources(dictFileName);
-    int const issueWithSrcFile = FIO_compressFilename_dstFile(ress, dstFileName, srcFileName, compressionLevel);
-    FIO_freeCResources(ress);
+    int const result = FIO_compressFilename_dstFile(ress, dstFileName, srcFileName, compressionLevel);

-    {   double const seconds = (double)(clock() - start) / CLOCKS_PER_SEC;
-        DISPLAYLEVEL(4, "Completed in %.2f sec \n", seconds);
-    }
-    return issueWithSrcFile;
+    double const seconds = (double)(clock() - start) / CLOCKS_PER_SEC;
+    DISPLAYLEVEL(4, "Completed in %.2f sec \n", seconds);
+
+    FIO_freeCResources(ress);
+    return result;
 }


--- a/programs/fuzzer.c
+++ b/programs/fuzzer.c
@ -40,7 +40,7 @@
 #include <sys/timeb.h>   /* timeb */
 #include <string.h>      /* strcmp */
 #include <time.h>        /* clock_t */
-#define ZSTD_STATIC_LINKING_ONLY   /* ZSTD_compressContinue */
+#define ZSTD_STATIC_LINKING_ONLY   /* ZSTD_compressContinue, ZSTD_compressBlock */
 #include "zstd.h"        /* ZSTD_VERSION_STRING, ZSTD_getErrorCode */
 #include "zdict.h"       /* ZDICT_trainFromBuffer */
 #include "datagen.h"     /* RDG_genBuffer */
@ -109,9 +109,9 @@ static unsigned FUZ_highbit32(U32 v32)
 }


-#define CHECKTEST(var, fn)  size_t const var = fn; if (ZSTD_isError(var)) goto _output_error
-#define CHECK(fn)  { CHECKTEST(err, fn); }
-#define CHECKPLUS(var, fn, more)  { CHECKTEST(var, fn); more; }
+#define CHECK_V(var, fn)  size_t const var = fn; if (ZSTD_isError(var)) goto _output_error
+#define CHECK(fn)  { CHECK_V(err, fn); }
+#define CHECKPLUS(var, fn, more)  { CHECK_V(var, fn); more; }
 static int basicUnitTests(U32 seed, double compressibility)
 {
    size_t const CNBuffSize = 5 MB;
@ -216,10 +216,8 @@ static int basicUnitTests(U32 seed, double compressibility)

        DISPLAYLEVEL(4, "test%3i : check content size on duplicated context : ", testNb++);
        {   size_t const testSize = CNBuffSize / 3;
-            {   ZSTD_compressionParameters const cPar = ZSTD_getCParams(2, testSize, dictSize);
-                ZSTD_frameParameters const fPar = { 1 , 0 , 0 };
-                ZSTD_parameters p;
-                p.cParams = cPar; p.fParams = fPar;
+            {   ZSTD_parameters p = ZSTD_getParams(2, testSize, dictSize);
+                p.fParams.contentSizeFlag = 1;
                CHECK( ZSTD_compressBegin_advanced(ctxOrig, CNBuffer, dictSize, p, testSize-1) );
            }
            CHECK( ZSTD_copyCCtx(ctxDuplicated, ctxOrig) );
@ -277,10 +275,8 @@ static int basicUnitTests(U32 seed, double compressibility)
        DISPLAYLEVEL(4, "OK \n");

        DISPLAYLEVEL(4, "test%3i : compress without dictID : ", testNb++);
-        {   ZSTD_frameParameters const fParams = { 0 /*contentSize*/, 0 /*checksum*/, 1 /*NoDictID*/ };
-            ZSTD_compressionParameters const cParams = ZSTD_getCParams(3, CNBuffSize, dictSize);
-            ZSTD_parameters p;
-            p.cParams = cParams; p.fParams = fParams;
+        {   ZSTD_parameters p = ZSTD_getParams(3, CNBuffSize, dictSize);
+            p.fParams.noDictIDFlag = 1;
            cSize = ZSTD_compress_advanced(cctx, compressedBuffer, ZSTD_compressBound(CNBuffSize),
                                           CNBuffer, CNBuffSize,
                                           dictBuffer, dictSize, p);
@ -320,6 +316,7 @@ static int basicUnitTests(U32 seed, double compressibility)
        ZSTD_DCtx* const dctx = ZSTD_createDCtx();
        static const size_t blockSize = 100 KB;
        static const size_t dictSize = 16 KB;
+        size_t cSize2;

        /* basic block compression */
        DISPLAYLEVEL(4, "test%3i : Block compression test : ", testNb++);
@ -330,7 +327,7 @@ static int basicUnitTests(U32 seed, double compressibility)

        DISPLAYLEVEL(4, "test%3i : Block decompression test : ", testNb++);
        CHECK( ZSTD_decompressBegin(dctx) );
-        { CHECKTEST(r, ZSTD_decompressBlock(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize) );
+        { CHECK_V(r, ZSTD_decompressBlock(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize) );
          if (r != blockSize) goto _output_error; }
        DISPLAYLEVEL(4, "OK \n");

@ -339,11 +336,15 @@ static int basicUnitTests(U32 seed, double compressibility)
        CHECK( ZSTD_compressBegin_usingDict(cctx, CNBuffer, dictSize, 5) );
        cSize = ZSTD_compressBlock(cctx, compressedBuffer, ZSTD_compressBound(blockSize), (char*)CNBuffer+dictSize, blockSize);
        if (ZSTD_isError(cSize)) goto _output_error;
+        cSize2 = ZSTD_compressBlock(cctx, (char*)compressedBuffer+cSize, ZSTD_compressBound(blockSize), (char*)CNBuffer+dictSize+blockSize, blockSize);
+        if (ZSTD_isError(cSize2)) goto _output_error;
        DISPLAYLEVEL(4, "OK \n");

        DISPLAYLEVEL(4, "test%3i : Dictionary Block decompression test : ", testNb++);
        CHECK( ZSTD_decompressBegin_usingDict(dctx, CNBuffer, dictSize) );
-        { CHECKTEST( r, ZSTD_decompressBlock(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize) );
+        { CHECK_V( r, ZSTD_decompressBlock(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize) );
+          if (r != blockSize) goto _output_error; }
+        { CHECK_V( r, ZSTD_decompressBlock(dctx, (char*)decodedBuffer+blockSize, CNBuffSize, (char*)compressedBuffer+cSize, cSize2) );
          if (r != blockSize) goto _output_error; }
        DISPLAYLEVEL(4, "OK \n");

@ -361,7 +362,7 @@ static int basicUnitTests(U32 seed, double compressibility)
        sampleSize += 96 KB;
        cSize = ZSTD_compress(compressedBuffer, ZSTD_compressBound(sampleSize), CNBuffer, sampleSize, 1);
        if (ZSTD_isError(cSize)) goto _output_error;
-        { CHECKTEST(regenSize, ZSTD_decompress(decodedBuffer, sampleSize, compressedBuffer, cSize));
+        { CHECK_V(regenSize, ZSTD_decompress(decodedBuffer, sampleSize, compressedBuffer, cSize));
          if (regenSize!=sampleSize) goto _output_error; }
        DISPLAYLEVEL(4, "OK \n");
    }
@ -370,12 +371,12 @@ static int basicUnitTests(U32 seed, double compressibility)
    #define ZEROESLENGTH 100
    DISPLAYLEVEL(4, "test%3i : compress %u zeroes : ", testNb++, ZEROESLENGTH);
    memset(CNBuffer, 0, ZEROESLENGTH);
-    { CHECKTEST(r, ZSTD_compress(compressedBuffer, ZSTD_compressBound(ZEROESLENGTH), CNBuffer, ZEROESLENGTH, 1) );
+    { CHECK_V(r, ZSTD_compress(compressedBuffer, ZSTD_compressBound(ZEROESLENGTH), CNBuffer, ZEROESLENGTH, 1) );
      cSize = r; }
    DISPLAYLEVEL(4, "OK (%u bytes : %.2f%%)\n", (U32)cSize, (double)cSize/ZEROESLENGTH*100);

    DISPLAYLEVEL(4, "test%3i : decompress %u zeroes : ", testNb++, ZEROESLENGTH);
-    { CHECKTEST(r, ZSTD_decompress(decodedBuffer, ZEROESLENGTH, compressedBuffer, cSize) );
+    { CHECK_V(r, ZSTD_decompress(decodedBuffer, ZEROESLENGTH, compressedBuffer, cSize) );
      if (r != ZEROESLENGTH) goto _output_error; }
    DISPLAYLEVEL(4, "OK \n");

@ -403,13 +404,13 @@ static int basicUnitTests(U32 seed, double compressibility)
            ((BYTE*)CNBuffer)[i+2] = _3BytesSeqs[id][2];
    }   }}
    DISPLAYLEVEL(4, "test%3i : compress lots 3-bytes sequences : ", testNb++);
-    { CHECKTEST(r, ZSTD_compress(compressedBuffer, ZSTD_compressBound(_3BYTESTESTLENGTH),
+    { CHECK_V(r, ZSTD_compress(compressedBuffer, ZSTD_compressBound(_3BYTESTESTLENGTH),
                                 CNBuffer, _3BYTESTESTLENGTH, 19) );
      cSize = r; }
    DISPLAYLEVEL(4, "OK (%u bytes : %.2f%%)\n", (U32)cSize, (double)cSize/_3BYTESTESTLENGTH*100);

    DISPLAYLEVEL(4, "test%3i : decompress lots 3-bytes sequence : ", testNb++);
-    { CHECKTEST(r, ZSTD_decompress(decodedBuffer, _3BYTESTESTLENGTH, compressedBuffer, cSize) );
+    { CHECK_V(r, ZSTD_decompress(decodedBuffer, _3BYTESTESTLENGTH, compressedBuffer, cSize) );
      if (r != _3BYTESTESTLENGTH) goto _output_error; }
    DISPLAYLEVEL(4, "OK \n");

--- a/programs/zbufftest.c
+++ b/programs/zbufftest.c
@ -381,13 +381,9 @@ static int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compres
            {   size_t const dictStart = FUZ_rand(&lseed) % (srcBufferSize - dictSize);
                dict = srcBuffer + dictStart;
            }
-            {   ZSTD_compressionParameters const cPar = ZSTD_getCParams(cLevel, 0, dictSize);
-                U32 const checksum = FUZ_rand(&lseed) & 1;
-                U32 const noDictIDFlag = FUZ_rand(&lseed) & 1;
-                ZSTD_frameParameters const fPar = { 0, checksum, noDictIDFlag };
-                ZSTD_parameters params;
-                params.cParams = cPar;
-                params.fParams = fPar;
+            {   ZSTD_parameters params = ZSTD_getParams(cLevel, 0, dictSize);
+                params.fParams.checksumFlag = FUZ_rand(&lseed) & 1;
+                params.fParams.noDictIDFlag = FUZ_rand(&lseed) & 1;
                {   size_t const initError = ZBUFF_compressInit_advanced(zc, dict, dictSize, params, 0);
                    CHECK (ZBUFF_isError(initError),"init error : %s", ZBUFF_getErrorName(initError));
        }   }   }
--- a/programs/zstd.1
+++ b/programs/zstd.1
@ -33,7 +33,8 @@ It is based on the \fBLZ77\fR family, with further FSE & huff0 entropy stages.
 It also features a very fast decoder, with speed > 500 MB/s per core.

 \fBzstd\fR command line is generally similar to gzip, but features the following differences :
- - Original files are preserved
+ - Source files are preserved by default
+   It's possible to remove them automatically by using \fB--rm\fR command
 - By default, when compressing a single file, \fBzstd\fR displays progress notifications and result summary.
     Use \fB-q\fR to turn them off

@ -57,6 +58,19 @@ It also features a very fast decoder, with speed > 500 MB/s per core.
 .BR \-f ", " --force
 overwrite output without prompting
 .TP
+.BR \-c ", " --stdout
+ force write to standard output, even if it is the console
+.TP
+.BR \--rm
+ remove source file(s) after successful compression or decompression
+.TP
+.BR \-k ", " --keep
+ keep source file(s) after successful compression or decompression.
+ This is the default behavior.
+.TP
+.BR \-r
+ operate recursively on directories
+.TP
 .BR \-h/\-H ", " --help
 display help/long help and exit
 .TP
@ -69,14 +83,11 @@ It also features a very fast decoder, with speed > 500 MB/s per core.
 .BR \-q ", " --quiet
 suppress warnings and notifications; specify twice to suppress errors too
 .TP
-.BR \-c ", " --stdout
- force write to standard output, even if it is the console
-.TP
 .BR \-C ", " --check
 add integrity check computed from uncompressed data
 .TP
 .BR \-t ", " --test
- Test the integrity of compressed files.  This option is equivalent to \fB--decompress --stdout > /dev/null\fR.
+ Test the integrity of compressed files. This option is equivalent to \fB--decompress --stdout > /dev/null\fR.
 No files are created or removed.

 .SH DICTIONARY
--- a/programs/zstdcli.c
+++ b/programs/zstdcli.c
@ -115,6 +115,7 @@ static int usage(const char* programName)
    DISPLAY( " -D file: use `file` as Dictionary \n");
    DISPLAY( " -o file: result stored into `file` (only if 1 input file) \n");
    DISPLAY( " -f     : overwrite output without prompting \n");
+    DISPLAY( "--rm    : remove source file(s) after successful de/compression \n");
    DISPLAY( " -h/-H  : display help/long help and exit\n");
    return 0;
 }
@ -132,7 +133,6 @@ static int usage_advanced(const char* programName)
 #ifdef UTIL_HAS_CREATEFILELIST
    DISPLAY( " -r     : operate recursively on directories\n");
 #endif
-    DISPLAY( "--rm    : remove source files after successful de/compression \n");
 #ifndef ZSTD_NOCOMPRESS
    DISPLAY( "--ultra : enable ultra modes (requires more memory to decompress)\n");
    DISPLAY( "--no-dictID : don't write dictID into header (dictionary compression)\n");
@ -181,7 +181,7 @@ static void waitEnter(void)
 /*! readU32FromChar() :
    @return : unsigned integer value reach from input in `char` format
    Will also modify `*stringPtr`, advancing it to position where it stopped reading.
-    Note : this function can overflow if result > MAX_UNIT */
+    Note : this function can overflow if result > MAX_UINT */
 static unsigned readU32FromChar(const char** stringPtr)
 {
    unsigned result = 0;
@ -254,7 +254,7 @@ int main(int argCount, const char** argv)
        if (!strcmp(argument, "--help")) { displayOut=stdout; CLEAN_RETURN(usage_advanced(programName)); }
        if (!strcmp(argument, "--verbose")) { displayLevel=4; continue; }
        if (!strcmp(argument, "--quiet")) { displayLevel--; continue; }
-        if (!strcmp(argument, "--stdout")) { forceStdout=1; outFileName=stdoutmark; displayLevel=1; continue; }
+        if (!strcmp(argument, "--stdout")) { forceStdout=1; outFileName=stdoutmark; displayLevel-=(displayLevel==2); continue; }
        if (!strcmp(argument, "--ultra")) { FIO_setMaxWLog(0); continue; }
        if (!strcmp(argument, "--check")) { FIO_setChecksumFlag(2); continue; }
        if (!strcmp(argument, "--no-check")) { FIO_setChecksumFlag(0); continue; }
@ -265,13 +265,17 @@ int main(int argCount, const char** argv)
        if (!strcmp(argument, "--train")) { dictBuild=1; outFileName=g_defaultDictName; continue; }
        if (!strcmp(argument, "--maxdict")) { nextArgumentIsMaxDict=1; continue; }
        if (!strcmp(argument, "--dictID")) { nextArgumentIsDictID=1; continue; }
-        if (!strcmp(argument, "--keep")) { continue; }   /* does nothing, since preserving input is default; for gzip/xz compatibility */
+        if (!strcmp(argument, "--keep")) { FIO_setRemoveSrcFile(0); continue; }
        if (!strcmp(argument, "--rm")) { FIO_setRemoveSrcFile(1); continue; }

        /* '-' means stdin/stdout */
        if (!strcmp(argument, "-")){
-            if (!filenameIdx) { filenameIdx=1, filenameTable[0]=stdinmark; outFileName=stdoutmark; continue; }
-        }
+            if (!filenameIdx) {
+                filenameIdx=1, filenameTable[0]=stdinmark;
+                outFileName=stdoutmark;
+                displayLevel-=(displayLevel==2);
+                continue;
+        }   }

        /* Decode commands (note : aggregated commands are allowed) */
        if (argument[0]=='-') {
@ -300,7 +304,7 @@ int main(int argCount, const char** argv)
                case 'd': decode=1; argument++; break;

                    /* Force stdout, even if stdout==console */
-                case 'c': forceStdout=1; outFileName=stdoutmark; displayLevel=1; argument++; break;
+                case 'c': forceStdout=1; outFileName=stdoutmark; displayLevel-=(displayLevel==2); argument++; break;

                    /* Use file content as dictionary */
                case 'D': nextEntryIsDictionary = 1; argument++; break;
@ -314,8 +318,8 @@ int main(int argCount, const char** argv)
                    /* Quiet mode */
                case 'q': displayLevel--; argument++; break;

-                    /* keep source file (default anyway, so useless; for gzip/xz compatibility) */
-                case 'k': argument++; break;
+                    /* keep source file (default); for gzip/xz compatibility */
+                case 'k': FIO_setRemoveSrcFile(0); argument++; break;

                    /* Checksum */
                case 'C': argument++; FIO_setChecksumFlag(2); break;
--- a/projects/README.md
+++ b/projects/README.md
@ -1,4 +1,4 @@
-projects for various integrated development environments (IDE) 
+projects for various integrated development environments (IDE)
 ================================

 #### Included projects
@ -7,3 +7,4 @@ The following projects are included with the zstd distribution:
 - cmake - CMake project contributed by Artyom Dymchenko
 - VS2008 - Visual Studio 2008 project
 - VS2010 - Visual Studio 2010 project (which also works well with Visual Studio 2012, 2013, 2015)
+- build - command line scripts prepared for Visual Studio compilation without IDE
--- a/zstd_compression_format.md
+++ b/zstd_compression_format.md
@ -0,0 +1,591 @@
+Zstandard Compression Format
+============================
+
+### Notices
+
+Copyright (c) 2016 Yann Collet
+
+Permission is granted to copy and distribute this document
+for any  purpose and without charge,
+including translations into other  languages
+and incorporation into compilations,
+provided that the copyright notice and this notice are preserved,
+and that any substantive changes or deletions from the original
+are clearly marked.
+Distribution of this document is unlimited.
+
+### Version
+
+0.0.1 (30/06/2016 - Work in progress - unfinished)
+
+
+Introduction
+------------
+
+The purpose of this document is to define a lossless compressed data format,
+that is independent of CPU type, operating system,
+file system and character set, suitable for
+File compression, Pipe and streaming compression
+using the [Zstandard algorithm](http://www.zstandard.org).
+
+The data can be produced or consumed,
+even for an arbitrarily long sequentially presented input data stream,
+using only an a priori bounded amount of intermediate storage,
+and hence can be used in data communications.
+The format uses the Zstandard compression method,
+and optional [xxHash-64 checksum method](http://www.xxhash.org),
+for detection of data corruption.
+
+The data format defined by this specification
+does not attempt to allow random access to compressed data.
+
+This specification is intended for use by implementers of software
+to compress data into Zstandard format and/or decompress data from Zstandard format.
+The text of the specification assumes a basic background in programming
+at the level of bits and other primitive data representations.
+
+Unless otherwise indicated below,
+a compliant compressor must produce data sets
+that conform to the specifications presented here.
+It doesn’t need to support all options though.
+
+A compliant decompressor must be able to decompress
+at least one working set of parameters
+that conforms to the specifications presented here.
+It may also ignore informative fields, such as checksum.
+Whenever it does not support a parameter defined in the compressed stream,
+it must produce a non-ambiguous error code and associated error message
+explaining which parameter is unsupported.
+
+
+Definitions
+-----------
+A content compressed by Zstandard is transformed into a Zstandard __frame__.
+Multiple frames can be appended into a single file or stream.
+A frame is totally independent, has a defined beginning and end,
+and a set of parameters which tells the decoder how to decompress it.
+
+A frame encapsulates one or multiple __blocks__.
+Each block can be compressed or not,
+and has a guaranteed maximum content size, which depends on frame parameters.
+Unlike frames, each block depends on previous blocks for proper decoding.
+However, each block can be decompressed without waiting for its successor,
+allowing streaming operations.
+
+
+General Structure of Zstandard Frame format
+-------------------------------------------
+
+| MagicNb |  F. Header | Block | (More blocks) | EndMark |
+|:-------:|:----------:| ----- | ------------- | ------- |
+| 4 bytes | 2-14 bytes |       |               | 3 bytes |
+
+__Magic Number__
+
+4 Bytes, Little endian format.
+Value : 0xFD2FB527
+
+__Frame Header__
+
+2 to 14 Bytes, to be detailed in the next part.
+
+__Data Blocks__
+
+To be detailed later on.
+That’s where compressed data is stored.
+
+__EndMark__
+
+The flow of blocks ends when the last block header brings an _end signal_ .
+This last block header may optionally host a __Content Checksum__ .
+
+__Content Checksum__
+
+Content Checksum verify that frame content has been regenrated correctly.
+The content checksum is the result
+of [xxh64() hash function](https://www.xxHash.com)
+digesting the original (decoded) data as input, and a seed of zero.
+Bits from 11 to 32 (included) are extracted to form a 22 bits checksum
+stored into the last block header.
+```
+contentChecksum = (XXH64(content, size, 0) >> 11) & (1<<22)-1);
+```
+Content checksum is only present when its associated flag
+is set in the frame descriptor.
+Its usage is optional.
+
+__Frame Concatenation__
+
+In some circumstances, it may be required to append multiple frames,
+for example in order to add new data to an existing compressed file
+without re-framing it.
+
+In such case, each frame brings its own set of descriptor flags.
+Each frame is considered independent.
+The only relation between frames is their sequential order.
+
+The ability to decode multiple concatenated frames
+within a single stream or file is left outside of this specification.
+As an example, the reference `zstd` command line utility is able
+to decode all concatenated frames in their sequential order,
+delivering the final decompressed result as if it was a single content.
+
+
+Frame Header
+-------------
+
+| FHD     | (WD)      | (Content Size) | (dictID)  |
+| ------- | --------- |:--------------:| --------- |
+| 1 byte  | 0-1 byte  |  0 - 8 bytes   | 0-4 bytes |
+
+Frame header has a variable size, which uses a minimum of 2 bytes,
+and up to 14 bytes depending on optional parameters.
+
+__FHD byte__ (Frame Header Descriptor)
+
+The first Header's byte is called the Frame Header Descriptor.
+It tells which other fields are present.
+Decoding this byte is enough to get the full size of the Frame Header.
+
+|  BitNb  |   7-6  |    5    |   4    |    3     |    2     |    1-0   |
+| ------- | ------ | ------- | ------ | -------- | -------- | -------- |
+|FieldName| FCSize | Segment | Unused | Reserved | Checksum |  dictID  |
+
+In this table, bit 7 is highest bit, while bit 0 is lowest.
+
+__Frame Content Size flag__
+
+This is a 2-bits flag (`= FHD >> 6`),
+specifying if decompressed data size is provided within the header.
+
+|  Value  |  0  |  1  |  2  |  3  |
+| ------- | --- | --- | --- | --- |
+|FieldSize| 0-1 |  2  |  4  |  8  |
+
+Value 0 has a double meaning :
+it either means `0` (size not provided) _if_ the `WD` byte is present,
+or it means `1` byte (size <= 255 bytes).
+
+__Single Segment__
+
+If this flag is set,
+data shall be regenerated within a single continuous memory segment.
+In which case, `WD` byte __is not present__,
+but `Frame Content Size` field necessarily is.
+
+As a consequence, the decoder must allocate a memory segment
+of size `>= Frame Content Size`.
+
+In order to preserve the decoder from unreasonable memory requirement,
+a decoder can refuse a compressed frame
+which requests a memory size beyond decoder's authorized range.
+
+For broader compatibility, decoders are recommended to support
+memory sizes of 8 MB at least.
+However, this is merely a recommendation,
+and each decoder is free to support higher or lower limits,
+depending on local limitations.
+
+__Unused bit__
+
+The value of this bit is unimportant
+and not interpreted by a decoder compliant with this specification version.
+It may be used in a future revision,
+to signal a property which is not required to properly decode the frame.
+
+__Reserved bit__
+
+This bit is reserved for some future feature.
+Its value _must be zero_.
+A decoder compliant with this specification version must ensure it is not set.
+This bit may be used in a future revision,
+to signal a feature that must be interpreted in order to decode the frame.
+
+__Content checksum flag__
+
+If this flag is set, a content checksum will be present into the EndMark.
+The checksum is a 22 bits value extracted from the XXH64() of data.
+See __Content Checksum__ .
+
+__Dictionary ID flag__
+
+This is a 2-bits flag (`= FHD & 3`),
+telling if a dictionary ID is provided within the header
+
+|  Value  |  0  |  1  |  2  |  3  |
+| ------- | --- | --- | --- | --- |
+|FieldSize|  0  |  1  |  2  |  4  |
+
+__WD byte__ (Window Descriptor)
+
+Provides guarantees on maximum back-reference distance
+that will be present within compressed data.
+This information is useful for decoders to allocate enough memory.
+
+|   BitNb   |    7-3   |    0-2   |
+| --------- | -------- | -------- |
+| FieldName | Exponent | Mantissa |
+
+Maximum distance is given by the following formulae :
+```
+windowLog = 10 + Exponent;
+windowBase = 1 << windowLog;
+windowAdd = (windowBase / 8) * Mantissa;
+windowSize = windowBase + windowAdd;
+```
+The minimum window size is 1 KB.
+The maximum size is (15*(2^38))-1 bytes, which is almost 1.875 TB.
+
+To properly decode compressed data,
+a decoder will need to allocate a buffer of at least `windowSize` bytes.
+
+Note that `WD` byte is optional. It's not present in `single segment` mode.
+In which case, the maximum back-reference distance is the content size itself,
+which can be any value from 1 to 2^64-1 bytes (16 EB).
+
+In order to preserve decoder from unreasonable memory requirements,
+a decoder can refuse a compressed frame
+which requests a memory size beyond decoder's authorized range.
+
+For better interoperability,
+decoders are recommended to be compatible with window sizes of 8 MB.
+Encoders are recommended to not request more than 8 MB.
+It's merely a recommendation though,
+decoders are free to support larger or lower limits,
+depending on local limitations.
+
+__Frame Content Size__
+
+This is the original (uncompressed) size.
+This information is optional, and only present if associated flag is set.
+Content size is provided using 1, 2, 4 or 8 Bytes.
+Format is Little endian.
+
+| Field Size |    Range   |
+| ---------- | ---------- |
+|     0      |      0     |
+|     1      |   0 - 255  |
+|     2      | 256 - 65791|
+|     4      | 0 - 2^32-1 |
+|     8      | 0 - 2^64-1 |
+
+When field size is 1, 4 or 8 bytes, the value is read directly.
+When field size is 2, _an offset of 256 is added_.
+It's allowed to represent a small size (ex: `18`) using the 8-bytes variant.
+A size of `0` means `content size is unknown`.
+In which case, the `WD` byte will necessarily be present,
+and becomes the only hint to determine memory allocation.
+
+In order to preserve decoder from unreasonable memory requirement,
+a decoder can refuse a compressed frame
+which requests a memory size beyond decoder's authorized range.
+
+__Dictionary ID__
+
+This is a variable size field, which contains a single ID.
+It checks if the correct dictionary is used for decoding.
+Note that this field is optional. If it's not present,
+it's up to the caller to make sure it uses the correct dictionary.
+
+Field size depends on __Dictionary ID flag__.
+1 byte can represent an ID 0-255.
+2 bytes can represent an ID 0-65535.
+4 bytes can represent an ID 0-(2^32-1).
+
+It's allowed to represent a small ID (for example `13`)
+with a large 4-bytes dictionary ID, losing some efficiency in the process.
+
+
+Data Blocks
+-----------
+
+| B. Header |  data  |
+|:---------:| ------ |
+|  3 bytes  |        |
+
+
+__Block Header__
+
+This field uses 3-bytes, format is __big-endian__.
+
+The 2 highest bits represent the `block type`,
+while the remaining 22 bits represent the (compressed) block size.
+
+There are 4 block types :
+
+|    Value   |      0     |  1  |  2  |    3    |
+| ---------- | ---------- | --- | --- | ------- |
+| Block Type | Compressed | Raw | RLE | EndMark |
+
+- Compressed : this is a Zstandard compressed block,
+  detailed in a later part of this specification.
+  "block size" is the compressed size.
+  Decompressed size is unknown,
+  but its maximum possible value is guaranteed (see below)
+- Raw : this is an uncompressed block.
+  "block size" is the number of bytes to read and copy.
+- RLE : this is a single byte, repeated N times.
+  In which case, "block size" is the size to regenerate,
+  while the "compressed" block is just 1 byte (the byte to repeat).
+- EndMark : this is not a block. Signal the end of the frame.
+  The rest of the field may be optionally filled by a checksum
+  (see frame checksum).
+
+Block sizes must respect a few rules :
+- In compressed mode, compressed size if always strictly `< contentSize`.
+- Block decompressed size is necessarily <= maximum back-reference distance .
+- Block decompressed size is necessarily <= 128 KB
+
+
+__Data__
+
+Where the actual data to decode stands.
+It might be compressed or not, depending on previous field indications.
+A data block is not necessarily "full" :
+since an arbitrary “flush” may happen anytime,
+block content can be any size, up to Block Maximum Size.
+Block Maximum Size is the smallest of :
+- Max back-reference distance
+- 128 KB
+
+
+Skippable Frames
+----------------
+
+| Magic Number | Frame Size | User Data |
+|:------------:|:----------:| --------- |
+|   4 bytes    |  4 bytes   |           |
+
+Skippable frames allow the insertion of user-defined data
+into a flow of concatenated frames.
+Its design is pretty straightforward,
+with the sole objective to allow the decoder to quickly skip
+over user-defined data and continue decoding.
+
+Skippable frames defined in this specification are compatible with LZ4 ones.
+
+
+__Magic Number__ :
+
+4 Bytes, Little endian format.
+Value : 0x184D2A5X, which means any value from 0x184D2A50 to 0x184D2A5F.
+All 16 values are valid to identify a skippable frame.
+
+__Frame Size__ :
+
+This is the size, in bytes, of the following User Data
+(without including the magic number nor the size field itself).
+4 Bytes, Little endian format, unsigned 32-bits.
+This means User Data can’t be bigger than (2^32-1) Bytes.
+
+__User Data__ :
+
+User Data can be anything. Data will just be skipped by the decoder.
+
+
+Compressed block format
+-----------------------
+This specification details the content of a _compressed block_.
+A compressed block has a size, which must be known in order to decode it.
+It also has a guaranteed maximum regenerated size,
+in order to properly allocate destination buffer.
+See "Frame format" for more details.
+
+A compressed block consists of 2 sections :
+- Literals section
+- Sequences section
+
+### Prerequisite
+For proper decoding, a compressed block requires access to following elements :
+- Previous decoded blocks, up to a distance of `windowSize`,
+  or all previous blocks in the same frame "single segment" mode.
+- List of "recent offsets" from previous compressed block.
+
+
+### Compressed Literals
+
+Literals are compressed using order-0 huffman compression.
+During sequence phase, literals will be entangled with match copy operations.
+All literals are regrouped in the first part of the block.
+They can be decoded first, and then copied during sequence operations,
+or they can be decoded on the flow, as needed by sequences.
+
+| Header | (Tree Description) | Stream1 | (Stream2) | (Stream3) | (Stream4) |
+| ------ | ------------------ | ------- | --------- | --------- | --------- |
+
+Literals can be compressed, or uncompressed.
+When compressed, an optional tree description can be present,
+followed by 1 or 4 streams.
+
+#### Block Literal Header
+
+Header is in charge of describing precisely how literals are packed.
+It's a byte-aligned variable-size bitfield, ranging from 1 to 5 bytes,
+using big-endian convention.
+
+| BlockType | sizes format | (compressed size) | regenerated size |
+| --------- | ------------ | ----------------- | ---------------- |
+|   2 bits  |  1 - 2 bits  |    0 - 18 bits    |    5 - 20 bits   |
+
+__Block Type__ :
+
+This is a 2-bits field, describing 4 different block types :
+
+|    Value   |      0     |    1   |  2  |    3    |
+| ---------- | ---------- | ------ | --- | ------- |
+| Block Type | Compressed | Repeat | Raw |   RLE   |
+
+- Compressed : This is a standard huffman-compressed block,
+               starting with a huffman tree description.
+               See details below.
+- Repeat Stats : This is a huffman-compressed block,
+               using huffman tree from previous huffman-compressed block.
+               Huffman tree description will be skipped.
+               Compressed stream is equivalent to "compressed" block type.
+- Raw : Literals are stored uncompressed.
+- RLE : Literals consist of a single byte value repeated N times.
+
+__Sizes format__ :
+
+Sizes format are divided into 2 families :
+
+- For compressed block, it requires to decode both the compressed size
+  and the decompressed size. It will also decode the number of streams.
+- For Raw or RLE blocks, it's enough to decode the size to regenerate.
+
+For values spanning several bytes, convention is Big-endian.
+
+__Sizes format for Raw or RLE block__ :
+
+- Value : 0x : Regenerated size uses 5 bits (0-31).
+               Total literal header size is 1 byte.
+               `size = h[0] & 31;`
+- Value : 10 : Regenerated size uses 12 bits (0-4095).
+               Total literal header size is 2 bytes.
+               `size = ((h[0] & 15) << 8) + h[1];`
+- Value : 11 : Regenerated size uses 20 bits (0-1048575).
+               Total literal header size is 2 bytes.
+               `size = ((h[0] & 15) << 16) + (h[1]<<8) + h[2];`
+
+Note : it's allowed to represent a short value (ex : `13`)
+using a long format, accepting the reduced compacity.
+
+__Sizes format for Compressed Block__ :
+
+Note : also applicable to "repeat-stats" blocks.
+- Value : 00 : 4 streams
+               Compressed and regenerated sizes use 10 bits (0-1023)
+               Total literal header size is 3 bytes
+- Value : 01 : _Single stream_
+               Compressed and regenerated sizes use 10 bits (0-1023)
+               Total literal header size is 3 bytes
+- Value : 10 : 4 streams
+               Compressed and regenerated sizes use 14 bits (0-16383)
+               Total literal header size is 4 bytes
+- Value : 10 : 4 streams
+               Compressed and regenerated sizes use 18 bits (0-262143)
+               Total literal header size is 5 bytes
+
+Compressed and regenerated size fields follow big endian convention.
+
+#### Huffman Tree description
+
+This section is only present when block type is _compressed_ (`0`).
+It describes the different leaf nodes of the huffman tree,
+and their relative weights.
+
+##### Representation
+
+All byte values from zero (included) to last present one (excluded)
+are represented by `weight` values, from 0 to `maxBits`.
+Transformation from `weight` to `nbBits` follows this formulae :
+`nbBits = weight ? maxBits + 1 - weight : 0;` .
+The last symbol's weight is deduced from previously decoded ones,
+by completing to the nearest power of 2.
+This power of 2 gives `maxBits`, the depth of the current tree.
+
+__Example__ :
+Let's presume the following huffman tree must be described :
+
+|  Value | 0 | 1 | 2 | 3 | 4 | 5 |
+| ------ | - | - | - | - | - | - |
+| nbBits | 1 | 2 | 3 | 0 | 4 | 4 |
+
+The tree depth is 4, since its smallest element uses 4 bits.
+Value `5` will not be listed, nor will values above `5`.
+Values from `0` to `4` will be listed using `weight` instead of `nbBits`.
+Weight formula is : `weight = nbBits ? maxBits + 1 - nbBits : 0;`
+It gives the following serie of weights :
+
+| weight | 4 | 3 | 2 | 0 | 1 |
+| ------ | - | - | - | - | - |
+|  Value | 0 | 1 | 2 | 3 | 4 |
+
+The decoder will do the inverse operation :
+having collected weights of symbols from `0` to `4`,
+it knows the last symbol, `5`, is present with a non-zero weight.
+The weight of `5` can be deduced by joining to the nearest power of 2.
+Sum of 2^(weight-1) (excluding 0) is :
+8 + 4 + 2 + 0 + 1 = 15
+Nearest power of 2 is 16.
+Therefore, `maxBits = 4` and `weight[5] = 1`.
+It can then proceed to transform back weights into nbBits :
+`weight = nbBits ? maxBits + 1 - nbBits : 0;` .
+
+##### Huffman Tree header
+
+This is a single byte value (0-255), which tells how to decode the tree.
+
+- if headerByte >= 242 : this is one of 14 pre-defined weight distributions :
+  + 242 :  1x1 (+ 1x1)
+  + 243 :  2x1 (+ 1x2)
+  + 244 :  3x1 (+ 1x1)
+  + 245 :  4x1 (+ 1x4)
+  + 246 :  7x1 (+ 1x1)
+  + 247 :  8x1 (+ 1x8)
+  + 248 : 15x1 (+ 1x1)
+  + 249 : 16x1 (+ 1x16)
+  + 250 : 31x1 (+ 1x1)
+  + 251 : 32x1 (+ 1x32)
+  + 252 : 63x1 (+ 1x1)
+  + 253 : 64x1 (+ 1x64)
+  + 254 :127x1 (+ 1x1)
+  + 255 :128x1 (+ 1x128)
+
+- if headerByte >= 128 : this is a direct representation,
+  where each weight is written directly as a 4 bits field (0-15).
+  The full representation occupies (nbSymbols+1/2) bytes,
+  meaning it uses a last full byte even if nbSymbols is odd.
+  `nbSymbols = headerByte - 127;`
+
+- if headerByte < 128 :
+  the serie of weights is compressed by FSE.
+  The length of the compressed serie is `headerByte` (0-127).
+
+##### FSE (Finite State Entropy) compression of huffman weights
+
+The serie of weights is compressed using standard FSE compression.
+It's a single bitstream with 2 interleaved states,
+using a single distribution table.
+
+To decode an FSE bitstream, it is necessary to know its compressed size.
+Compressed size is provided by `headerByte`.
+It's also necessary to know its maximum decompressed size.
+In this case, it's `255`, since literal values range from `0` to `255`,
+and the last symbol value is not represented.
+
+An FSE bitstream starts by a header, describing probabilities distribution.
+Result will create a Decoding Table.
+It is necessary to know the maximum accuracy of distribution
+to properly allocate space for the Table.
+For a list of huffman weights, this maximum is 8 bits.
+
+FSE header and bitstreams are described in a separated chapter.
+
+##### Conversion from weights to huffman prefix codes
+
+
+
+
+Version changes
+---------------