diff --git a/.gitignore b/.gitignore
index a31ffdc7..6ca7f8a7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -20,6 +20,7 @@
 # Visual solution files
 *.suo
 *.user
+*.VC.db
 
 # Build results
 [Dd]ebug/
@@ -46,7 +47,3 @@ ipch/
 .directory
 _codelite
 _zstdbench
-
-lib/zstd_opt_LZ5.c
-lib/zstd_opt_llen.c
-lib/zstd_opt_nollen.c
\ No newline at end of file
diff --git a/Makefile b/Makefile
index 93d5e052..494f59dc 100644
--- a/Makefile
+++ b/Makefile
@@ -89,9 +89,8 @@ gpptest: clean
 	$(MAKE) all CC=g++ CFLAGS="-O3 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Werror"
 
 armtest: clean
-#	$(MAKE) -C $(ZSTDDIR) all CC=arm-linux-gnueabi-gcc MOREFLAGS="-Werror"
 	$(MAKE) -C $(PRGDIR) datagen   # use native, faster
-	$(MAKE) -C $(PRGDIR) test CC=arm-linux-gnueabi-gcc ZSTDRTTEST= MOREFLAGS=-static # MOREFLAGS="-Werror -static"
+	$(MAKE) -C $(PRGDIR) test CC=arm-linux-gnueabi-gcc ZSTDRTTEST= MOREFLAGS="-Werror -static"
 
 # for Travis CI
 arminstall: clean   
@@ -105,7 +104,7 @@ armtest-w-install: clean arminstall armtest
 
 ppctest: clean
 	$(MAKE) -C $(PRGDIR) datagen   # use native, faster
-	$(MAKE) -C $(PRGDIR) test CC=powerpc-linux-gnu-gcc ZSTDRTTEST= MOREFLAGS=-static # MOREFLAGS="-Werror -static" 
+	$(MAKE) -C $(PRGDIR) test CC=powerpc-linux-gnu-gcc ZSTDRTTEST= MOREFLAGS="-Werror -static" 
 
 # for Travis CI
 ppcinstall: clean   
diff --git a/NEWS b/NEWS
index ccbf15eb..d4abd36f 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,9 @@
+v0.6.0
+Stronger high compression modes, thanks to Przemyslaw Skibinski
+API : ZSTD_getFrameParams() provides size of decompressed content
+New : highest compression modes require `--ultra` command to fully unleash their capacity
+Fixed : zstd cli return error code > 0 and removes dst file artifact when decompression fails, thanks to Chip Turner
+
 v0.5.1
 New : Optimal parsing => Very high compression modes, thanks to Przemyslaw Skibinski
 Changed : Dictionary builder integrated into libzstd and zstd cli
diff --git a/README.md b/README.md
index b84d8a8f..5173c9f9 100644
--- a/README.md
+++ b/README.md
@@ -16,7 +16,7 @@ As a reference, several fast compression algorithms were tested and compared to
 |Name             | Ratio | C.speed | D.speed |
 |-----------------|-------|--------:|--------:|
 |                 |       |   MB/s  |  MB/s   |
-|**zstd 0.5.1 -1**|**2.876**|**330**| **890** |
+|**zstd 0.6.0 -1**|**2.877**|**330**| **915** |
 | [zlib] 1.2.8 -1 | 2.730 |    95   |   360   |
 | brotli -0       | 2.708 |   220   |   430   |
 | QuickLZ 1.5     | 2.237 |   510   |   605   |
@@ -43,33 +43,18 @@ For a larger picture including very slow modes, [click on this link](images/DCsp
 
 ### The case for Small Data compression
 
-Above chart provides results applicable to large files or large streams scenarios (200 MB for this case).
-Small data (< 64 KB) come with different perspectives.
-The smaller the amount of data to compress, the more difficult it is to achieve any significant compression.
-On reaching the 1 KB region, it becomes almost impossible to compress anything.
-This problem is common to any compression algorithms, and throwing CPU power at it achieves little gains.
+Previous charts provide results applicable to typical files and streams scenarios (several MB). Small data come with different perspectives. The smaller the amount of data to compress, the more difficult it is to achieve any significant compression.
 
-The reason is, compression algorithms learn from past data how to compress future data.
-But at the beginning of a new file, there is no "past" to build upon.
+This problem is common to any compression algorithm. The reason is, compression algorithms learn from past data how to compress future data. But at the beginning of a new file, there is no "past" to build upon.
 
-To solve this situation, Zstd now offers a __training mode__,
-which can be used to make the algorithm fit a selected type of data, by providing it with some samples.
-The result of the training is a file called "dictionary", which can be loaded before compression and decompression.
-Using this dictionary, the compression ratio achievable on small data improves dramatically :
+To solve this situation, Zstd offers a __training mode__, which can be used to tune the algorithm for a selected type of data, by providing it with a few samples. The result of the training is stored in a file called "dictionary", which can be loaded before compression and decompression. Using this dictionary, the compression ratio achievable on small data improves dramatically :
 
-| Collection Name    | Direct compression | Dictionary Compression | Gains      | Average unit | Range       |
-| ---------------    | ------------------ | ---------------------- | ---------  | ------------:| -----       |
-| Small JSON records | x1.331 - x1.366	  | x5.860 - x6.830        | ~ __x4.7__ | 300          | 200 - 400   |
-| Mercurial events   | x2.322 - x2.538    | x3.377 - x4.462        | ~ __x1.5__ | 1.5 KB       | 20 - 200 KB |	
-| Large JSON docs    | x3.813 - x4.043    | x8.935 - x13.366       | ~ __x2.8__ | 6 KB         | 800 - 20 KB |	
+![Compressing Small Data](images/smallData.png "Compressing Small Data")
 
-These compression gains are achieved without any speed loss, and prove in general a bit faster to compress and decompress.
+These compression gains are achieved while simultaneously providing faster compression and decompression speeds.
 
 Dictionary work if there is some correlation in a family of small data (there is no _universal dictionary_).
-Hence, deploying one dictionary per type of data will provide the greater benefits.
-
-Large documents will benefit proportionally less, since dictionary gains are mostly effective in the first few KB.
-Then, the compression algorithm will rely more and more on already decoded content to compress the rest of the file.
+Hence, deploying one dictionary per type of data will provide the greater benefits. Dictionary gains are mostly effective in the first few KB. Then, the compression algorithm will rely more and more on previously decoded content to compress the rest of the file.
 
 #### Dictionary compression How To :
 
@@ -79,7 +64,7 @@ Then, the compression algorithm will rely more and more on already decoded conte
 
 `zstd --train FullPathToTrainingSet/* -o dictionaryName`
 
-2) Compression with dictionary
+2) Compress with dictionary
 
 `zstd FILE -D dictionaryName`
 
@@ -87,53 +72,15 @@ Then, the compression algorithm will rely more and more on already decoded conte
 
 `zstd --decompress FILE.zst -D dictionaryName`
 
-##### _Using API_ :
-
-1) Create dictionary
-
-```
-#include "zdict.h"
-(...)
-/* Train a dictionary from a memory buffer `samplesBuffer`, 
-   where `nbSamples` samples have been stored concatenated. */
-size_t dictSize = ZDICT_trainFromBuffer(dictBuffer, dictBufferCapacity,
-                                        samplesBuffer, samplesSizes, nbSamples);
-```
-
-2) Compression with dictionary
-
-```
-#include "zstd.h"
-(...)
-ZSTD_CCtx* context = ZSTD_createCCtx();
-size_t compressedSize = ZSTD_compress_usingDict(context, dst, dstCapacity, src, srcSize, dict, dictSize, compressionLevel);
-```
-
-3) Decompress with dictionary
-
-```
-#include "zstd.h"
-(...)
-ZSTD_DCtx* context = ZSTD_createDCtx();
-size_t regeneratedSize = ZSTD_decompress_usingDict(context, dst, dstCapacity, cSrc, cSrcSize, dict, dictSize);
-```
-
-
 ### Status
 
-Zstd has not yet reached "stable format" status. It doesn't guarantee yet that its current compression format will remain stable in future versions. During this period, it can still change to adapt new optimizations still being investigated. "Stable Format" is projected H1 2016, and will be tagged `v1.0`.
-
-That being said, the library is now fairly robust, able to withstand hazards situations, including invalid inputs. It also features legacy support, so that documents compressed with current and previous version of zstd can still be decoded in the future. 
-Library reliability has been tested using [Fuzz Testing](https://en.wikipedia.org/wiki/Fuzz_testing), with both [internal tools](programs/fuzzer.c) and [external ones](http://lcamtuf.coredump.cx/afl). Therefore, Zstandard is considered safe for testings, even within production environments.
+Zstd is in development. The internal format evolves to reach better performance. "Final Format" is projected H1 2016, and will be tagged `v1.0`. Zstd offers legacy support, meaning any data compressed by any version >= 0.1 (therefore including current one) remain decodable in the future.
+The library is also quite robust, able to withstand hazards situations, including invalid inputs. Library reliability has been tested using [Fuzz Testing](https://en.wikipedia.org/wiki/Fuzz_testing), with both [internal tools](programs/fuzzer.c) and [external ones](http://lcamtuf.coredump.cx/afl). Therefore, Zstandard is considered safe for production environments.
 
 ### Branch Policy
 
 The "dev" branch is the one where all contributions will be merged before reaching "master". If you plan to propose a patch, please commit into the "dev" branch or its own feature branch. Direct commit to "master" are not permitted.
 
-
-### Trivia
+### Miscellaneous
 
 Zstd entropy stage is provided by [Huff0 and FSE, from Finite State Entropy library](https://github.com/Cyan4973/FiniteStateEntropy).
-
-Its memory requirement can be configured to fit into low-memory hardware configurations, or servers handling multiple connections/contexts in parallel.
-
diff --git a/images/Cspeed4.png b/images/Cspeed4.png
index 843e5eba..d5219d72 100644
Binary files a/images/Cspeed4.png and b/images/Cspeed4.png differ
diff --git a/images/DCspeed5.png b/images/DCspeed5.png
index db5ef3cf..900b0242 100644
Binary files a/images/DCspeed5.png and b/images/DCspeed5.png differ
diff --git a/images/Dspeed4.png b/images/Dspeed4.png
index 107e26c6..e75091dd 100644
Binary files a/images/Dspeed4.png and b/images/Dspeed4.png differ
diff --git a/images/smallData.png b/images/smallData.png
new file mode 100644
index 00000000..69edf58d
Binary files /dev/null and b/images/smallData.png differ
diff --git a/lib/.debug/zstd_stats.h b/lib/.debug/zstd_stats.h
new file mode 100644
index 00000000..d0189f87
--- /dev/null
+++ b/lib/.debug/zstd_stats.h
@@ -0,0 +1,164 @@
+/*
+    zstd - standard compression library
+    Header File for static linking only
+    Copyright (C) 2014-2016, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd homepage : http://www.zstd.net
+*/
+#ifndef ZSTD_STATS_H
+#define ZSTD_STATS_H
+
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/*-*************************************
+*  Dependencies
+***************************************/
+//#include "zstd.h"
+//#include "mem.h"
+
+
+/*-*************************************
+*  Constants
+***************************************/
+//#define ZSTD_MAGICNUMBER 0xFD2FB526   /* v0.6 */
+
+
+/*-*************************************
+*  Types
+***************************************/
+typedef struct {
+    U32  priceOffset, priceOffCode, priceMatchLength, priceLiteral, priceLitLength, priceDumpsLength;
+    U32  totalMatchSum, totalLitSum, totalSeqSum, totalRepSum;
+    U32  litSum, matchLengthSum, litLengthSum, offCodeSum;
+    U32  matchLengthFreq[1<<MLbits];
+    U32  litLengthFreq[1<<LLbits];
+    U32  litFreq[1<<Litbits];
+    U32  offCodeFreq[1<<Offbits];
+} ZSTD_stats_t;
+
+
+/*-*************************************
+*  Advanced functions
+***************************************/
+MEM_STATIC void ZSTD_statsPrint(ZSTD_stats_t* stats, U32 searchLength)
+{
+    stats->totalMatchSum += stats->totalSeqSum * ((searchLength == 3) ? 3 : 4);
+    printf("avgMatchL=%.2f avgLitL=%.2f match=%.1f%% lit=%.1f%% reps=%d seq=%d\n", (float)stats->totalMatchSum/stats->totalSeqSum, (float)stats->totalLitSum/stats->totalSeqSum, 100.0*stats->totalMatchSum/(stats->totalMatchSum+stats->totalLitSum), 100.0*stats->totalLitSum/(stats->totalMatchSum+stats->totalLitSum), stats->totalRepSum, stats->totalSeqSum);
+    printf("SumBytes=%d Offset=%d OffCode=%d Match=%d Literal=%d LitLength=%d DumpsLength=%d\n", (stats->priceOffset+stats->priceOffCode+stats->priceMatchLength+stats->priceLiteral+stats->priceLitLength+stats->priceDumpsLength)/8, stats->priceOffset/8, stats->priceOffCode/8, stats->priceMatchLength/8, stats->priceLiteral/8, stats->priceLitLength/8, stats->priceDumpsLength/8);
+}
+
+MEM_STATIC void ZSTD_statsInit(ZSTD_stats_t* stats)
+{
+    stats->totalLitSum = stats->totalMatchSum = stats->totalSeqSum = stats->totalRepSum = 1;
+    stats->priceOffset = stats->priceOffCode = stats->priceMatchLength = stats->priceLiteral = stats->priceLitLength = stats->priceDumpsLength = 0;
+}
+
+MEM_STATIC void ZSTD_statsResetFreqs(ZSTD_stats_t* stats)
+{
+    unsigned u;
+
+    stats->litSum = (1<<Litbits);
+    stats->litLengthSum = (1<<LLbits);
+    stats->matchLengthSum = (1<<MLbits);
+    stats->offCodeSum = (1<<Offbits);
+
+    for (u=0; u<=MaxLit; u++)
+        stats->litFreq[u] = 1;
+    for (u=0; u<=MaxLL; u++)
+        stats->litLengthFreq[u] = 1;
+    for (u=0; u<=MaxML; u++)
+        stats->matchLengthFreq[u] = 1;
+    for (u=0; u<=MaxOff; u++)
+        stats->offCodeFreq[u] = 1;
+}
+
+MEM_STATIC void ZSTD_statsUpdatePrices(ZSTD_stats_t* stats, size_t litLength, const BYTE* literals, size_t offset, size_t matchLength)
+{
+    /* offset */
+    BYTE offCode = offset ? (BYTE)ZSTD_highbit(offset+1) + 1 : 0;
+    stats->priceOffCode += ZSTD_highbit(stats->offCodeSum+1) - ZSTD_highbit(stats->offCodeFreq[offCode]+1);
+    stats->priceOffset += (offCode-1) + (!offCode);
+
+    /* match Length */
+    stats->priceDumpsLength += ((matchLength >= MaxML)<<3) + ((matchLength >= 255+MaxML)<<4) + ((matchLength>=(1<<15))<<3);
+    stats->priceMatchLength += ZSTD_highbit(stats->matchLengthSum+1) - ZSTD_highbit(stats->matchLengthFreq[(matchLength >= MaxML) ? MaxML : matchLength]+1);
+
+    if (litLength) {
+        /* literals */
+        U32 u;
+        stats->priceLiteral += litLength * ZSTD_highbit(stats->litSum+1);
+        for (u=0; u < litLength; u++)
+            stats->priceLiteral -= ZSTD_highbit(stats->litFreq[literals[u]]+1);
+
+        /* literal Length */
+        stats->priceDumpsLength += ((litLength >= MaxLL)<<3) + ((litLength >= 255+MaxLL)<<4) + ((litLength>=(1<<15))<<3);
+        stats->priceLitLength += ZSTD_highbit(stats->litLengthSum+1) - ZSTD_highbit(stats->litLengthFreq[(litLength >= MaxLL) ? MaxLL : litLength]+1);
+    } else {
+        stats->priceLitLength += ZSTD_highbit(stats->litLengthSum+1) - ZSTD_highbit(stats->litLengthFreq[0]+1);
+    }
+
+
+    if (offset == 0) stats->totalRepSum++;
+    stats->totalSeqSum++;
+    stats->totalMatchSum += matchLength;
+    stats->totalLitSum += litLength;
+        
+    U32 u;
+    /* literals */
+    stats->litSum += litLength;
+    for (u=0; u < litLength; u++)
+        stats->litFreq[literals[u]]++;
+
+    /* literal Length */
+    stats->litLengthSum++;
+    if (litLength >= MaxLL)
+        stats->litLengthFreq[MaxLL]++;
+    else
+        stats->litLengthFreq[litLength]++;
+
+    /* match offset */
+    stats->offCodeSum++;
+    stats->offCodeFreq[offCode]++;
+
+    /* match Length */
+    stats->matchLengthSum++;
+    if (matchLength >= MaxML)
+        stats->matchLengthFreq[MaxML]++;
+    else
+        stats->matchLengthFreq[matchLength]++;
+}
+
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif  /* ZSTD_STATIC_H */
diff --git a/lib/Makefile b/lib/Makefile
index 6bdf2f87..9bf83f1a 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -51,8 +51,8 @@ FLAGS   = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) $(MOREFLAGS)
 LIBDIR ?= $(PREFIX)/lib
 INCLUDEDIR=$(PREFIX)/include
 
-ZSTD_FILES := zstd_compress.c zstd_decompress.c fse.c huff0.c zdict.c divsufsort.c
-ZSTD_LEGACY:= legacy/zstd_v01.c legacy/zstd_v02.c legacy/zstd_v03.c legacy/zstd_v04.c
+ZSTD_FILES := zstd_compress.c zstd_decompress.c fse.c huff0.c zbuff.c zdict.c divsufsort.c
+ZSTD_LEGACY:= legacy/zstd_v01.c legacy/zstd_v02.c legacy/zstd_v03.c legacy/zstd_v04.c legacy/zstd_v05.c
 
 ifeq ($(ZSTD_LEGACY_SUPPORT), 0)
 CPPFLAGS  += -DZSTD_LEGACY_SUPPORT=0
@@ -118,8 +118,8 @@ install: libzstd libzstd.pc
 	@cp -a libzstd.pc $(DESTDIR)$(LIBDIR)/pkgconfig/
 	@install -m 644 libzstd.a $(DESTDIR)$(LIBDIR)/libzstd.a
 	@install -m 644 zstd.h $(DESTDIR)$(INCLUDEDIR)/zstd.h
-	@install -m 644 zstd.h $(DESTDIR)$(INCLUDEDIR)/zbuff.h
-	@install -m 644 zstd.h $(DESTDIR)$(INCLUDEDIR)/zdict.h
+	@install -m 644 zbuff.h $(DESTDIR)$(INCLUDEDIR)/zbuff.h
+	@install -m 644 zdict.h $(DESTDIR)$(INCLUDEDIR)/zdict.h
 	@echo zstd static and shared library installed
 
 uninstall:
@@ -129,8 +129,8 @@ uninstall:
 	@[ -x $(DESTDIR)$(LIBDIR)/libzstd.$(SHARED_EXT_VER) ] && rm -f $(DESTDIR)$(LIBDIR)/libzstd.$(SHARED_EXT_VER)
 	@[ -f $(DESTDIR)$(LIBDIR)/libzstd.a ] && rm -f $(DESTDIR)$(LIBDIR)/libzstd.a
 	@[ -f $(DESTDIR)$(INCLUDEDIR)/zstd.h ] && rm -f $(DESTDIR)$(INCLUDEDIR)/zstd.h
-	@[ -f $(DESTDIR)$(INCLUDEDIR)/zstd.h ] && rm -f $(DESTDIR)$(INCLUDEDIR)/zbuff.h
-	@[ -f $(DESTDIR)$(INCLUDEDIR)/zstd.h ] && rm -f $(DESTDIR)$(INCLUDEDIR)/zdict.h
+	@[ -f $(DESTDIR)$(INCLUDEDIR)/zbuff.h ] && rm -f $(DESTDIR)$(INCLUDEDIR)/zbuff.h
+	@[ -f $(DESTDIR)$(INCLUDEDIR)/zdict.h ] && rm -f $(DESTDIR)$(INCLUDEDIR)/zdict.h
 	@echo zstd libraries successfully uninstalled
 
 endif
diff --git a/lib/README.md b/lib/README.md
index a0445548..91ef00c4 100644
--- a/lib/README.md
+++ b/lib/README.md
@@ -13,6 +13,11 @@ The __lib__ directory contains several files, but depending on target use case,
 
 ##### zstd core compression
 
+Stable API is exposed in [zstd.h].
+Advanced and experimental API is exposed in `zstd_static.h`.
+`zstd_static.h` API elements should be used with static linking only,
+as their definition may change in future version of the library.
+
 - [bitstream.h](bitstream.h)
 - fse.c
 - fse.h
@@ -24,13 +29,15 @@ The __lib__ directory contains several files, but depending on target use case,
 - zstd_decompress.c
 - zstd_internal.h
 - zstd_opt.h
-- zstd.h
+- [zstd.h]
 - zstd_static.h
 
+[zstd.h]: zstd.h
+
 #### Buffered streaming
 
 This complementary API makes streaming integration easier.
-It is used by `zstd` command line utility :
+It is used by `zstd` command line utility, and [7zip plugin](http://mcmilk.de/projects/7-Zip-ZStd) :
 
 - zbuff.c
 - zbuff.h
diff --git a/lib/bitstream.h b/lib/bitstream.h
index e0487e87..fd114e55 100644
--- a/lib/bitstream.h
+++ b/lib/bitstream.h
@@ -41,7 +41,7 @@ extern "C" {
 
 
 /*
-*  This API consists of small unitary functions, which highly benefit from being inlined.
+*  This API consists of small unitary functions, which must be inlined for best performance.
 *  Since link-time-optimization is not available for all compilers,
 *  these functions are defined into a .h to be included.
 */
@@ -53,13 +53,20 @@ extern "C" {
 #include "error_private.h"  /* error codes and messages */
 
 
+/*=========================================
+*  Target specific
+=========================================*/
+#if defined(__BMI__) && defined(__GNUC__)
+#  include <immintrin.h>   /* support for bextr (experimental) */
+#endif
+
+
 /*-******************************************
 *  bitStream encoding API (write forward)
 ********************************************/
-/*!
-* bitStream can mix input from multiple sources.
-* A critical property of these streams is that they encode and decode in **reverse** direction.
-* So the first bit sequence you add will be the last to be read, like a LIFO stack.
+/* bitStream can mix input from multiple sources.
+*  A critical property of these streams is that they encode and decode in **reverse** direction.
+*  So the first bit sequence you add will be the last to be read, like a LIFO stack.
 */
 typedef struct
 {
@@ -75,22 +82,21 @@ MEM_STATIC void   BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits
 MEM_STATIC void   BIT_flushBits(BIT_CStream_t* bitC);
 MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC);
 
-/*!
-* Start by initCStream, providing the size of buffer to write into.
-* bitStream will never write outside of this buffer.
-* @dstCapacity must be >= sizeof(size_t), otherwise @return will be an error code.
+/* Start with initCStream, providing the size of buffer to write into.
+*  bitStream will never write outside of this buffer.
+*  `dstCapacity` must be >= sizeof(size_t), otherwise @return will be an error code.
 *
-* bits are first added to a local register.
-* Local register is size_t, hence 64-bits on 64-bits systems, or 32-bits on 32-bits systems.
-* Writing data into memory is an explicit operation, performed by the flushBits function.
-* Hence keep track how many bits are potentially stored into local register to avoid register overflow.
-* After a flushBits, a maximum of 7 bits might still be stored into local register.
+*  bits are first added to a local register.
+*  Local register is size_t, hence 64-bits on 64-bits systems, or 32-bits on 32-bits systems.
+*  Writing data into memory is an explicit operation, performed by the flushBits function.
+*  Hence keep track how many bits are potentially stored into local register to avoid register overflow.
+*  After a flushBits, a maximum of 7 bits might still be stored into local register.
 *
-* Avoid storing elements of more than 24 bits if you want compatibility with 32-bits bitstream readers.
+*  Avoid storing elements of more than 24 bits if you want compatibility with 32-bits bitstream readers.
 *
-* Last operation is to close the bitStream.
-* The function returns the final size of CStream in bytes.
-* If data couldn't fit into @dstBuffer, it will return a 0 ( == not storable)
+*  Last operation is to close the bitStream.
+*  The function returns the final size of CStream in bytes.
+*  If data couldn't fit into `dstBuffer`, it will return a 0 ( == not storable)
 */
 
 
@@ -117,15 +123,14 @@ MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD);
 MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD);
 
 
-/*!
-* Start by invoking BIT_initDStream().
-* A chunk of the bitStream is then stored into a local register.
-* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
-* You can then retrieve bitFields stored into the local register, **in reverse order**.
-* Local register is explicitly reloaded from memory by the BIT_reloadDStream() method.
-* A reload guarantee a minimum of ((8*sizeof(size_t))-7) bits when its result is BIT_DStream_unfinished.
-* Otherwise, it can be less than that, so proceed accordingly.
-* Checking if DStream has reached its end can be performed with BIT_endOfDStream()
+/* Start by invoking BIT_initDStream().
+*  A chunk of the bitStream is then stored into a local register.
+*  Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
+*  You can then retrieve bitFields stored into the local register, **in reverse order**.
+*  Local register is explicitly reloaded from memory by the BIT_reloadDStream() method.
+*  A reload guarantee a minimum of ((8*sizeof(size_t))-7) bits when its result is BIT_DStream_unfinished.
+*  Otherwise, it can be less than that, so proceed accordingly.
+*  Checking if DStream has reached its end can be performed with BIT_endOfDStream().
 */
 
 
@@ -144,7 +149,7 @@ MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits);
 
 
 /*-**************************************************************
-*  Helper functions
+*  Internal functions
 ****************************************************************/
 MEM_STATIC unsigned BIT_highbit32 (register U32 val)
 {
@@ -168,29 +173,38 @@ MEM_STATIC unsigned BIT_highbit32 (register U32 val)
 #   endif
 }
 
+/*=====    Local Constants   =====*/
+static const unsigned BIT_mask[] = { 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF, 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF,  0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF };   /* up to 26 bits */
+
 
 /*-**************************************************************
 *  bitStream encoding
 ****************************************************************/
-MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* startPtr, size_t maxSize)
+/*! BIT_initCStream() :
+ *  `dstCapacity` must be > sizeof(void*)
+ *  @return : 0 if success,
+              otherwise an error code (can be tested using ERR_isError() ) */
+MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* startPtr, size_t dstCapacity)
 {
     bitC->bitContainer = 0;
     bitC->bitPos = 0;
     bitC->startPtr = (char*)startPtr;
     bitC->ptr = bitC->startPtr;
-    bitC->endPtr = bitC->startPtr + maxSize - sizeof(bitC->ptr);
-    if (maxSize < sizeof(bitC->ptr)) return ERROR(dstSize_tooSmall);
+    bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->ptr);
+    if (dstCapacity <= sizeof(bitC->ptr)) return ERROR(dstSize_tooSmall);
     return 0;
 }
 
+/*! BIT_addBits() :
+    can add up to 26 bits into `bitC`.
+    Does not check for register overflow ! */
 MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits)
 {
-    static const unsigned mask[] = { 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF, 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF,  0xFFFFFF, 0x1FFFFFF };   /* up to 25 bits */
-    bitC->bitContainer |= (value & mask[nbBits]) << bitC->bitPos;
+    bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos;
     bitC->bitPos += nbBits;
 }
 
-/*! BIT_addBitsFast
+/*! BIT_addBitsFast() :
  *  works only if `value` is _clean_, meaning all high bits above nbBits are 0 */
 MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits)
 {
@@ -198,20 +212,23 @@ MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBi
     bitC->bitPos += nbBits;
 }
 
-/*! BIT_flushBitsFast
+/*! BIT_flushBitsFast() :
  *  unsafe version; does not check buffer overflow */
 MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC)
 {
-    size_t nbBytes = bitC->bitPos >> 3;
+    size_t const nbBytes = bitC->bitPos >> 3;
     MEM_writeLEST(bitC->ptr, bitC->bitContainer);
     bitC->ptr += nbBytes;
     bitC->bitPos &= 7;
     bitC->bitContainer >>= nbBytes*8;   /* if bitPos >= sizeof(bitContainer)*8 --> undefined behavior */
 }
 
+/*! BIT_flushBits() :
+ *  safe version; check for buffer overflow, and prevents it.
+ *  note : does not signal buffer overflow. This will be revealed later on using BIT_closeCStream() */
 MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC)
 {
-    size_t nbBytes = bitC->bitPos >> 3;
+    size_t const nbBytes = bitC->bitPos >> 3;
     MEM_writeLEST(bitC->ptr, bitC->bitContainer);
     bitC->ptr += nbBytes;
     if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;
@@ -219,49 +236,41 @@ MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC)
     bitC->bitContainer >>= nbBytes*8;   /* if bitPos >= sizeof(bitContainer)*8 --> undefined behavior */
 }
 
-/*! BIT_closeCStream
- *  @result : size of CStream, in bytes, or 0 if it cannot fit into dstBuffer */
+/*! BIT_closeCStream() :
+ *  @return : size of CStream, in bytes,
+              or 0 if it could not fit into dstBuffer */
 MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC)
 {
-    char* endPtr;
-
     BIT_addBitsFast(bitC, 1, 1);   /* endMark */
     BIT_flushBits(bitC);
 
-    if (bitC->ptr >= bitC->endPtr) /* too close to buffer's end */
-        return 0;   /* not storable */
+    if (bitC->ptr >= bitC->endPtr) return 0; /* doesn't fit within authorized budget : cancel */
 
-    endPtr = bitC->ptr;
-    endPtr += bitC->bitPos > 0;    /* remaining bits (incomplete byte) */
-
-    return (endPtr - bitC->startPtr);
+    return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0);
 }
 
 
 /*-********************************************************
 * bitStream decoding
 **********************************************************/
-/*!BIT_initDStream
-*  Initialize a BIT_DStream_t.
-*  @bitD : a pointer to an already allocated BIT_DStream_t structure
-*  @srcBuffer must point at the beginning of a bitStream
-*  @srcSize must be the exact size of the bitStream
-*  @result : size of stream (== srcSize) or an errorCode if a problem is detected
+/*! BIT_initDStream() :
+*   Initialize a BIT_DStream_t.
+*   `bitD` : a pointer to an already allocated BIT_DStream_t structure.
+*   `srcSize` must be the *exact* size of the bitStream, in bytes.
+*   @return : size of stream (== srcSize) or an errorCode if a problem is detected
 */
 MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
 {
     if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
 
     if (srcSize >=  sizeof(size_t)) {  /* normal case */
-        U32 contain32;
         bitD->start = (const char*)srcBuffer;
         bitD->ptr   = (const char*)srcBuffer + srcSize - sizeof(size_t);
         bitD->bitContainer = MEM_readLEST(bitD->ptr);
-        contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
-        if (contain32 == 0) return ERROR(GENERIC);   /* endMark not present */
-        bitD->bitsConsumed = 8 - BIT_highbit32(contain32);
+        { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
+          if (lastByte == 0) return ERROR(GENERIC);   /* endMark not present */
+          bitD->bitsConsumed = 8 - BIT_highbit32(lastByte); }
     } else {
-        U32 contain32;
         bitD->start = (const char*)srcBuffer;
         bitD->ptr   = bitD->start;
         bitD->bitContainer = *(const BYTE*)(bitD->start);
@@ -275,33 +284,56 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si
             case 2: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[1]) <<  8;
             default:;
         }
-        contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
-        if (contain32 == 0) return ERROR(GENERIC);   /* endMark not present */
-        bitD->bitsConsumed = 8 - BIT_highbit32(contain32);
+        { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
+          if (lastByte == 0) return ERROR(GENERIC);   /* endMark not present */
+          bitD->bitsConsumed = 8 - BIT_highbit32(lastByte); }
         bitD->bitsConsumed += (U32)(sizeof(size_t) - srcSize)*8;
     }
 
     return srcSize;
 }
 
-/*!BIT_lookBits
- * Provides next n bits from local register
- * local register is not modified (bits are still present for next read/look)
- * On 32-bits, maxNbBits==25
- * On 64-bits, maxNbBits==57
- * @return : value extracted
- */
-MEM_STATIC size_t BIT_lookBits(BIT_DStream_t* bitD, U32 nbBits)
+MEM_STATIC size_t BIT_getUpperBits(size_t bitD, U32 const start)
 {
-    const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1;
-    return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask);
+    return bitD >> start;
 }
 
-/*! BIT_lookBitsFast :
-*   unsafe version; only works only if nbBits >= 1 */
-MEM_STATIC size_t BIT_lookBitsFast(BIT_DStream_t* bitD, U32 nbBits)
+MEM_STATIC size_t BIT_getMiddleBits(size_t bitD, U32 const nbBits, U32 const start)
 {
-    const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1;
+#if defined(__BMI__) && defined(__GNUC__)   /* experimental */
+    return __builtin_ia32_bextr_u64(bitD, (nbBits<<8) | start );
+#else
+    return (bitD >> start) & BIT_mask[nbBits];
+#endif
+}
+
+MEM_STATIC size_t BIT_getLowerBits(size_t bitD, U32 const nbBits)
+{
+    return bitD & BIT_mask[nbBits];
+}
+
+/*! BIT_lookBits() :
+ *  Provides next n bits from local register.
+ *  local register is not modified (bits are still present for next read/look).
+ *  On 32-bits, maxNbBits==24.
+ *  On 64-bits, maxNbBits==56.
+ *  @return : value extracted
+ */
+ MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
+{
+#if defined(__BMI__) && defined(__GNUC__)   /* experimental */
+    return __builtin_ia32_bextr_u64(bitD->bitContainer, (nbBits<<8) | (64 - bitD->bitsConsumed - nbBits) );
+#else
+    U32 const bitMask = sizeof(bitD->bitContainer)*8 - 1;
+    return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask);
+#endif
+}
+
+/*! BIT_lookBitsFast() :
+*   unsafe version; only works only if nbBits >= 1 */
+MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits)
+{
+    U32 const bitMask = sizeof(bitD->bitContainer)*8 - 1;
     return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask+1)-nbBits) & bitMask);
 }
 
@@ -310,27 +342,32 @@ MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
     bitD->bitsConsumed += nbBits;
 }
 
-/*!BIT_readBits
- * Read next n bits from local register.
- * pay attention to not read more than nbBits contained into local register.
- * @return : extracted value.
+/*! BIT_readBits() :
+ *  Read (consume) next n bits from local register and update.
+ *  Pay attention to not read more than nbBits contained into local register.
+ *  @return : extracted value.
  */
 MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, U32 nbBits)
 {
-    size_t value = BIT_lookBits(bitD, nbBits);
+    size_t const value = BIT_lookBits(bitD, nbBits);
     BIT_skipBits(bitD, nbBits);
     return value;
 }
 
-/*!BIT_readBitsFast :
-*  unsafe version; only works only if nbBits >= 1 */
+/*! BIT_readBitsFast() :
+*   unsafe version; only works only if nbBits >= 1 */
 MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits)
 {
-    size_t value = BIT_lookBitsFast(bitD, nbBits);
+    size_t const value = BIT_lookBitsFast(bitD, nbBits);
     BIT_skipBits(bitD, nbBits);
     return value;
 }
 
+/*! BIT_reloadDStream() :
+*   Refill `BIT_DStream_t` from src buffer previously defined (see BIT_initDStream() ).
+*   This function is safe, it guarantees it will not read beyond src buffer.
+*   @return : status of `BIT_DStream_t` internal register.
+              if status == unfinished, internal register is filled with >= (sizeof(size_t)*8 - 7) bits */
 MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
 {
 	if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))  /* should never happen */
@@ -346,8 +383,7 @@ MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
         if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer;
         return BIT_DStream_completed;
     }
-    {
-        U32 nbBytes = bitD->bitsConsumed >> 3;
+    {   U32 nbBytes = bitD->bitsConsumed >> 3;
         BIT_DStream_status result = BIT_DStream_unfinished;
         if (bitD->ptr - nbBytes < bitD->start) {
             nbBytes = (U32)(bitD->ptr - bitD->start);  /* ptr > start */
@@ -360,8 +396,8 @@ MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
     }
 }
 
-/*! BIT_endOfDStream
-*   @return Tells if DStream has reached its exact end
+/*! BIT_endOfDStream() :
+*   @return Tells if DStream has exactly reached its end (all bits consumed).
 */
 MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream)
 {
diff --git a/lib/error_private.h b/lib/error_private.h
index c0c3f490..7bd03065 100644
--- a/lib/error_private.h
+++ b/lib/error_private.h
@@ -28,7 +28,7 @@
    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
    You can contact the author at :
-   - Source repository : https://github.com/Cyan4973/zstd
+   - Homepage : http://www.zstd.net
 ****************************************************************** */
 /* Note : this module is expected to remain private, do not expose it */
 
@@ -62,7 +62,7 @@ extern "C" {
 
 
 /*-****************************************
-*  Customization
+*  Customization (error_public.h)
 ******************************************/
 typedef ZSTD_ErrorCode ERR_enum;
 #define PREFIX(name) ZSTD_error_##name
@@ -74,7 +74,7 @@ typedef ZSTD_ErrorCode ERR_enum;
 #ifdef ERROR
 #  undef ERROR   /* reported already defined on VS 2015 (Rich Geldreich) */
 #endif
-#define ERROR(name) (size_t)-PREFIX(name)
+#define ERROR(name) ((size_t)-PREFIX(name))
 
 ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); }
 
@@ -95,18 +95,19 @@ ERR_STATIC const char* ERR_getErrorName(size_t code)
     case PREFIX(prefix_unknown): return "Unknown frame descriptor";
     case PREFIX(frameParameter_unsupported): return "Unsupported frame parameter";
     case PREFIX(frameParameter_unsupportedBy32bits): return "Frame parameter unsupported in 32-bits mode";
+    case PREFIX(compressionParameter_unsupported): return "Compression parameter is out of bound";
     case PREFIX(init_missing): return "Context should be init first";
     case PREFIX(memory_allocation): return "Allocation error : not enough memory";
     case PREFIX(stage_wrong): return "Operation not authorized at current processing stage";
     case PREFIX(dstSize_tooSmall): return "Destination buffer is too small";
     case PREFIX(srcSize_wrong): return "Src size incorrect";
     case PREFIX(corruption_detected): return "Corrupted block detected";
-    case PREFIX(tableLog_tooLarge): return "tableLog requires too much memory";
-    case PREFIX(maxSymbolValue_tooLarge): return "Unsupported max possible Symbol Value : too large";
+    case PREFIX(tableLog_tooLarge): return "tableLog requires too much memory : unsupported";
+    case PREFIX(maxSymbolValue_tooLarge): return "Unsupported max Symbol Value : too large";
     case PREFIX(maxSymbolValue_tooSmall): return "Specified maxSymbolValue is too small";
     case PREFIX(dictionary_corrupted): return "Dictionary is corrupted";
     case PREFIX(maxCode):
-    default: return notErrorCode;   /* should be impossible, due to ERR_getError() */
+    default: return notErrorCode;   /* impossible, due to ERR_getError() */
     }
 }
 
diff --git a/lib/error_public.h b/lib/error_public.h
index 655e28e0..6fcf802e 100644
--- a/lib/error_public.h
+++ b/lib/error_public.h
@@ -28,7 +28,7 @@
    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
    You can contact the author at :
-   - Source repository : https://github.com/Cyan4973/zstd
+   - Homepage : http://www.zstd.net
 ****************************************************************** */
 #ifndef ERROR_PUBLIC_H_MODULE
 #define ERROR_PUBLIC_H_MODULE
@@ -47,6 +47,7 @@ typedef enum {
   ZSTD_error_prefix_unknown,
   ZSTD_error_frameParameter_unsupported,
   ZSTD_error_frameParameter_unsupportedBy32bits,
+  ZSTD_error_compressionParameter_unsupported,
   ZSTD_error_init_missing,
   ZSTD_error_memory_allocation,
   ZSTD_error_stage_wrong,
@@ -60,8 +61,7 @@ typedef enum {
   ZSTD_error_maxCode
 } ZSTD_ErrorCode;
 
-/* note : functions provide error codes in reverse negative order,
-          so compare with (size_t)(0-enum) */
+/* note : compare with size_t function results using ZSTD_getError() */
 
 
 #if defined (__cplusplus)
diff --git a/lib/fse.c b/lib/fse.c
index 986a0da1..63898ab1 100644
--- a/lib/fse.c
+++ b/lib/fse.c
@@ -145,21 +145,18 @@ static U32 FSE_tableStep(U32 tableSize) { return (tableSize>>1) + (tableSize>>3)
 
 size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
 {
-    const unsigned tableSize = 1 << tableLog;
-    const unsigned tableMask = tableSize - 1;
+    U32 const tableSize = 1 << tableLog;
+    U32 const tableMask = tableSize - 1;
     void* const ptr = ct;
     U16* const tableU16 = ( (U16*) ptr) + 2;
     void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableLog ? tableSize>>1 : 1) ;
     FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);
-    const unsigned step = FSE_tableStep(tableSize);
-    unsigned cumul[FSE_MAX_SYMBOL_VALUE+2];
-    U32 position = 0;
+    U32 const step = FSE_tableStep(tableSize);
+    U32 cumul[FSE_MAX_SYMBOL_VALUE+2];
     FSE_FUNCTION_TYPE tableSymbol[FSE_MAX_TABLESIZE]; /* memset() is not necessary, even if static analyzer complain about it */
     U32 highThreshold = tableSize-1;
-    unsigned symbol;
-    unsigned i;
 
-    /* header */
+    /* CTable header */
     tableU16[-2] = (U16) tableLog;
     tableU16[-1] = (U16) maxSymbolValue;
 
@@ -167,42 +164,44 @@ size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned
     *  http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
 
     /* symbol start positions */
-    cumul[0] = 0;
-    for (i=1; i<=maxSymbolValue+1; i++) {
-        if (normalizedCounter[i-1]==-1) {  /* Low proba symbol */
-            cumul[i] = cumul[i-1] + 1;
-            tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(i-1);
-        } else {
-            cumul[i] = cumul[i-1] + normalizedCounter[i-1];
-    }   }
-    cumul[maxSymbolValue+1] = tableSize+1;
-
-    /* Spread symbols */
-    for (symbol=0; symbol<=maxSymbolValue; symbol++) {
-        int nbOccurences;
-        for (nbOccurences=0; nbOccurences<normalizedCounter[symbol]; nbOccurences++) {
-            tableSymbol[position] = (FSE_FUNCTION_TYPE)symbol;
-            position = (position + step) & tableMask;
-            while (position > highThreshold) position = (position + step) & tableMask;   /* Low proba area */
-    }   }
-
-    if (position!=0) return ERROR(GENERIC);   /* Must have gone through all positions */
-
-    /* Build table */
-    for (i=0; i<tableSize; i++) {
-        FSE_FUNCTION_TYPE s = tableSymbol[i];   /* note : static analyzer may not understand tableSymbol is properly initialized */
-        tableU16[cumul[s]++] = (U16) (tableSize+i);   /* TableU16 : sorted by symbol order; gives next state value */
+    {   U32 u;
+        cumul[0] = 0;
+        for (u=1; u<=maxSymbolValue+1; u++) {
+            if (normalizedCounter[u-1]==-1) {  /* Low proba symbol */
+                cumul[u] = cumul[u-1] + 1;
+                tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u-1);
+            } else {
+                cumul[u] = cumul[u-1] + normalizedCounter[u-1];
+        }   }
+        cumul[maxSymbolValue+1] = tableSize+1;
     }
 
+    /* Spread symbols */
+    {   U32 position = 0;
+        U32 symbol;
+        for (symbol=0; symbol<=maxSymbolValue; symbol++) {
+            int nbOccurences;
+            for (nbOccurences=0; nbOccurences<normalizedCounter[symbol]; nbOccurences++) {
+                tableSymbol[position] = (FSE_FUNCTION_TYPE)symbol;
+                position = (position + step) & tableMask;
+                while (position > highThreshold) position = (position + step) & tableMask;   /* Low proba area */
+        }   }
+        if (position!=0) return ERROR(GENERIC);   /* Must have gone through all positions */
+    }
+
+    /* Build table */
+    { U32 u; for (u=0; u<tableSize; u++) {
+        FSE_FUNCTION_TYPE s = tableSymbol[u];   /* note : static analyzer may not understand tableSymbol is properly initialized */
+        tableU16[cumul[s]++] = (U16) (tableSize+u);   /* TableU16 : sorted by symbol order; gives next state value */
+    }}
+
     /* Build Symbol Transformation Table */
-    {
+    {   unsigned total = 0;
         unsigned s;
-        unsigned total = 0;
         for (s=0; s<=maxSymbolValue; s++) {
             switch (normalizedCounter[s])
             {
-            case  0:
-                break;
+            case  0: break;
             case -1:
             case  1:
                 symbolTT[s].deltaNbBits = (tableLog << 16) - (1<<tableLog);
@@ -211,8 +210,8 @@ size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned
                 break;
             default :
                 {
-                    U32 maxBitsOut = tableLog - BIT_highbit32 (normalizedCounter[s]-1);
-                    U32 minStatePlus = normalizedCounter[s] << maxBitsOut;
+                    U32 const maxBitsOut = tableLog - BIT_highbit32 (normalizedCounter[s]-1);
+                    U32 const minStatePlus = normalizedCounter[s] << maxBitsOut;
                     symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus;
                     symbolTT[s].deltaFindState = total - normalizedCounter[s];
                     total +=  normalizedCounter[s];
@@ -242,9 +241,8 @@ size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned
     const U32 tableMask = tableSize-1;
     const U32 step = FSE_tableStep(tableSize);
     U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1];
-    U32 position = 0;
     U32 highThreshold = tableSize-1;
-    const S16 largeLimit= (S16)(1 << (tableLog-1));
+    S16 const largeLimit= (S16)(1 << (tableLog-1));
     U32 noLarge = 1;
     U32 s;
 
@@ -264,24 +262,24 @@ size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned
     }   }
 
     /* Spread symbols */
-    for (s=0; s<=maxSymbolValue; s++) {
-        int i;
-        for (i=0; i<normalizedCounter[s]; i++) {
-            tableDecode[position].symbol = (FSE_FUNCTION_TYPE)s;
-            position = (position + step) & tableMask;
-            while (position > highThreshold) position = (position + step) & tableMask;   /* lowprob area */
-    }   }
-
-    if (position!=0) return ERROR(GENERIC);   /* position must reach all cells once, otherwise normalizedCounter is incorrect */
+    {   U32 position = 0;
+        for (s=0; s<=maxSymbolValue; s++) {
+            int i;
+            for (i=0; i<normalizedCounter[s]; i++) {
+                tableDecode[position].symbol = (FSE_FUNCTION_TYPE)s;
+                position = (position + step) & tableMask;
+                while (position > highThreshold) position = (position + step) & tableMask;   /* lowprob area */
+        }   }
+        if (position!=0) return ERROR(GENERIC);   /* position must reach all cells once, otherwise normalizedCounter is incorrect */
+    }
 
     /* Build Decoding table */
-    {
-        U32 i;
-        for (i=0; i<tableSize; i++) {
-            FSE_FUNCTION_TYPE symbol = (FSE_FUNCTION_TYPE)(tableDecode[i].symbol);
+    {   U32 u;
+        for (u=0; u<tableSize; u++) {
+            FSE_FUNCTION_TYPE symbol = (FSE_FUNCTION_TYPE)(tableDecode[u].symbol);
             U16 nextState = symbolNext[symbol]++;
-            tableDecode[i].nbBits = (BYTE) (tableLog - BIT_highbit32 ((U32)nextState) );
-            tableDecode[i].newState = (U16) ( (nextState << tableDecode[i].nbBits) - tableSize);
+            tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32 ((U32)nextState) );
+            tableDecode[u].newState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
     }   }
 
     DTableH.fastMode = (U16)noLarge;
@@ -365,8 +363,7 @@ static size_t FSE_writeNCount_generic (void* header, size_t headerBufferSize,
                 bitStream >>= 16;
                 bitCount -= 16;
         }   }
-        {
-            short count = normalizedCounter[charnum++];
+        {   short count = normalizedCounter[charnum++];
             const short max = (short)((2*threshold-1)-remaining);
             remaining -= FSE_abs(count);
             if (remaining<1) return ERROR(GENERIC);
@@ -465,8 +462,7 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t
             else
                 bitStream >>= 2;
         }
-        {
-            const short max = (short)((2*threshold-1)-remaining);
+        {   short const max = (short)((2*threshold-1)-remaining);
             short count;
 
             if ((bitStream & (threshold-1)) < (U32)max) {
@@ -509,11 +505,11 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t
 *  Counting histogram
 ****************************************************************/
 /*! FSE_count_simple
-    This function just counts byte values within @src,
-    and store the histogram into @count.
-    This function is unsafe : it doesn't check that all values within @src can fit into @count.
-    For this reason, prefer using a table @count with 256 elements.
-    @return : highest count for a single element
+    This function just counts byte values within `src`,
+    and store the histogram into table `count`.
+    This function is unsafe : it doesn't check that all values within `src` can fit into `count`.
+    For this reason, prefer using a table `count` with 256 elements.
+    @return : count of most numerous element
 */
 static size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
                                const void* src, size_t srcSize)
@@ -522,7 +518,6 @@ static size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
     const BYTE* const end = ip + srcSize;
     unsigned maxSymbolValue = *maxSymbolValuePtr;
     unsigned max=0;
-    U32 s;
 
     memset(count, 0, (maxSymbolValue+1)*sizeof(*count));
     if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; }
@@ -532,7 +527,7 @@ static size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
     while (!count[maxSymbolValue]) maxSymbolValue--;
     *maxSymbolValuePtr = maxSymbolValue;
 
-    for (s=0; s<=maxSymbolValue; s++) if (count[s] > max) max = count[s];
+    { U32 s; for (s=0; s<=maxSymbolValue; s++) if (count[s] > max) max = count[s]; }
 
     return (size_t)max;
 }
@@ -546,7 +541,6 @@ static size_t FSE_count_parallel(unsigned* count, unsigned* maxSymbolValuePtr,
     const BYTE* const iend = ip+sourceSize;
     unsigned maxSymbolValue = *maxSymbolValuePtr;
     unsigned max=0;
-    U32 s;
 
     U32 Counting1[256] = { 0 };
     U32 Counting2[256] = { 0 };
@@ -561,8 +555,8 @@ static size_t FSE_count_parallel(unsigned* count, unsigned* maxSymbolValuePtr,
     }
     if (!maxSymbolValue) maxSymbolValue = 255;            /* 0 == default */
 
-    {   /* by stripes of 16 bytes */
-        U32 cached = MEM_read32(ip); ip += 4;
+    /* by stripes of 16 bytes */
+    {   U32 cached = MEM_read32(ip); ip += 4;
         while (ip < iend-15) {
             U32 c = cached; cached = MEM_read32(ip); ip += 4;
             Counting1[(BYTE) c     ]++;
@@ -592,15 +586,15 @@ static size_t FSE_count_parallel(unsigned* count, unsigned* maxSymbolValuePtr,
     while (ip<iend) Counting1[*ip++]++;
 
     if (checkMax) {   /* verify stats will fit into destination table */
-        for (s=255; s>maxSymbolValue; s--) {
+        U32 s; for (s=255; s>maxSymbolValue; s--) {
             Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s];
             if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall);
     }   }
 
-    for (s=0; s<=maxSymbolValue; s++) {
+    { U32 s; for (s=0; s<=maxSymbolValue; s++) {
         count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s];
         if (count[s] > max) max = count[s];
-    }
+    }}
 
     while (!count[maxSymbolValue]) maxSymbolValue--;
     *maxSymbolValuePtr = maxSymbolValue;
@@ -628,13 +622,13 @@ size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr,
 /*-**************************************************************
 *  FSE Compression Code
 ****************************************************************/
-/*!
-FSE_CTable is a variable size structure which contains :
-    U16 tableLog;
-    U16 maxSymbolValue;
-    U16 nextStateNumber[1 << tableLog];                         // This size is variable
-    FSE_symbolCompressionTransform symbolTT[maxSymbolValue+1];  // This size is variable
-Allocation is manual, since C standard does not support variable-size structures.
+/*! FSE_sizeof_CTable() :
+    FSE_CTable is a variable size structure which contains :
+    `U16 tableLog;`
+    `U16 maxSymbolValue;`
+    `U16 nextStateNumber[1 << tableLog];`                         // This size is variable
+    `FSE_symbolCompressionTransform symbolTT[maxSymbolValue+1];`  // This size is variable
+Allocation is manual (C standard does not support variable-size structures).
 */
 
 size_t FSE_sizeof_CTable (unsigned maxSymbolValue, unsigned tableLog)
@@ -654,10 +648,7 @@ FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog)
     return (FSE_CTable*)malloc(size);
 }
 
-void  FSE_freeCTable (FSE_CTable* ct)
-{
-    free(ct);
-}
+void  FSE_freeCTable (FSE_CTable* ct) { free(ct); }
 
 /* provides the minimum logSize to safely represent a distribution */
 static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
@@ -733,7 +724,7 @@ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count,
         /* all values are pretty poor;
            probably incompressible data (should have already been detected);
            find max, then give all remaining points to max */
-        U32 maxV = 0, maxC =0;
+        U32 maxV = 0, maxC = 0;
         for (s=0; s<=maxSymbolValue; s++)
             if (count[s] > maxC) maxV=s, maxC=count[s];
         norm[maxV] += (short)ToDistribute;
@@ -771,8 +762,7 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
     if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);   /* Unsupported size */
     if (tableLog < FSE_minTableLog(total, maxSymbolValue)) return ERROR(GENERIC);   /* Too small tableLog, compression potentially impossible */
 
-    {
-        U32 const rtbTable[] = {     0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 };
+    {   U32 const rtbTable[] = {     0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 };
         U64 const scale = 62 - tableLog;
         U64 const step = ((U64)1<<62) / total;   /* <== here, one division ! */
         U64 const vStep = 1ULL<<(scale-20);
@@ -848,13 +838,11 @@ size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits)
         tableU16[s] = (U16)(tableSize + s);
 
     /* Build Symbol Transformation Table */
-    {
-        const U32 deltaNbBits = (nbBits << 16) - (1 << nbBits);
+    {   const U32 deltaNbBits = (nbBits << 16) - (1 << nbBits);
         for (s=0; s<=maxSymbolValue; s++) {
             symbolTT[s].deltaNbBits = deltaNbBits;
             symbolTT[s].deltaFindState = s-1;
-        }
-    }
+    }   }
 
     return 0;
 }
@@ -888,31 +876,30 @@ static size_t FSE_compress_usingCTable_generic (void* dst, size_t dstSize,
                            const FSE_CTable* ct, const unsigned fast)
 {
     const BYTE* const istart = (const BYTE*) src;
-    const BYTE* ip;
     const BYTE* const iend = istart + srcSize;
-
-    size_t errorCode;
+    const BYTE* ip=iend;
     BIT_CStream_t bitC;
     FSE_CState_t CState1, CState2;
 
-
     /* init */
-    errorCode = BIT_initCStream(&bitC, dst, dstSize);
-    if (FSE_isError(errorCode)) return 0;
-    FSE_initCState(&CState1, ct);
-    CState2 = CState1;
-
-    ip=iend;
+    if (srcSize <= 2) return 0;
+    { size_t const errorCode = BIT_initCStream(&bitC, dst, dstSize);
+      if (FSE_isError(errorCode)) return 0; }
 
 #define FSE_FLUSHBITS(s)  (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s))
 
-    /* join to even */
     if (srcSize & 1) {
+        FSE_initCState2(&CState1, ct, *--ip);
+        FSE_initCState2(&CState2, ct, *--ip);
         FSE_encodeSymbol(&bitC, &CState1, *--ip);
         FSE_FLUSHBITS(&bitC);
+    } else {
+        FSE_initCState2(&CState2, ct, *--ip);
+        FSE_initCState2(&CState1, ct, *--ip);
     }
 
     /* join to mod 4 */
+    srcSize -= 2;
     if ((sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) && (srcSize & 2)) {  /* test bit 2 */
         FSE_encodeSymbol(&bitC, &CState2, *--ip);
         FSE_encodeSymbol(&bitC, &CState1, *--ip);
@@ -920,8 +907,7 @@ static size_t FSE_compress_usingCTable_generic (void* dst, size_t dstSize,
     }
 
     /* 2 or 4 encoding per loop */
-    for ( ; ip>istart ; )
-    {
+    for ( ; ip>istart ; ) {
         FSE_encodeSymbol(&bitC, &CState2, *--ip);
 
         if (sizeof(bitC.bitContainer)*8 < FSE_MAX_TABLELOG*2+7 )   /* this test must be static */
@@ -1106,24 +1092,25 @@ FORCE_INLINE size_t FSE_decompress_usingDTable_generic(
     /* tail */
     /* note : BIT_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly BIT_DStream_completed */
     while (1) {
-        if ( (BIT_reloadDStream(&bitD)>BIT_DStream_completed) || (op==omax) || (BIT_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state1))) )
-            break;
+        if (op>(omax-2)) return ERROR(dstSize_tooSmall);
 
         *op++ = FSE_GETSYMBOL(&state1);
 
-        if ( (BIT_reloadDStream(&bitD)>BIT_DStream_completed) || (op==omax) || (BIT_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state2))) )
+        if (BIT_reloadDStream(&bitD)==BIT_DStream_overflow) {
+            *op++ = FSE_GETSYMBOL(&state2);
             break;
+        }
+
+        if (op>(omax-2)) return ERROR(dstSize_tooSmall);
 
         *op++ = FSE_GETSYMBOL(&state2);
-    }
 
-    /* end ? */
-    if (BIT_endOfDStream(&bitD) && FSE_endOfDState(&state1) && FSE_endOfDState(&state2))
-        return op-ostart;
+        if (BIT_reloadDStream(&bitD)==BIT_DStream_overflow) {
+            *op++ = FSE_GETSYMBOL(&state1);
+            break;
+    }   }
 
-    if (op==omax) return ERROR(dstSize_tooSmall);   /* dst buffer is full, but cSrc unfinished */
-
-    return ERROR(corruption_detected);
+    return op-ostart;
 }
 
 
diff --git a/lib/fse.h b/lib/fse.h
index db6f49cf..6dce6830 100644
--- a/lib/fse.h
+++ b/lib/fse.h
@@ -1,7 +1,7 @@
 /* ******************************************************************
-   FSE : Finite State Entropy coder
-   header file
-   Copyright (C) 2013-2015, Yann Collet.
+   FSE : Finite State Entropy codec
+   Public Prototypes declaration
+   Copyright (C) 2013-2016, Yann Collet.
 
    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
 
@@ -30,7 +30,6 @@
 
    You can contact the author at :
    - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
-   - Public forum : https://groups.google.com/forum/#!forum/lz4c
 ****************************************************************** */
 #ifndef FSE_H
 #define FSE_H
@@ -40,8 +39,8 @@ extern "C" {
 #endif
 
 
-/* *****************************************
-*  Includes
+/*-*****************************************
+*  Dependencies
 ******************************************/
 #include <stddef.h>    /* size_t, ptrdiff_t */
 
@@ -49,32 +48,32 @@ extern "C" {
 /*-****************************************
 *  FSE simple functions
 ******************************************/
-size_t FSE_compress(void* dst, size_t maxDstSize,
-              const void* src, size_t srcSize);
-size_t FSE_decompress(void* dst,  size_t maxDstSize,
-                const void* cSrc, size_t cSrcSize);
-/*!
-FSE_compress():
+/*! FSE_compress() :
     Compress content of buffer 'src', of size 'srcSize', into destination buffer 'dst'.
-    'dst' buffer must be already allocated. Compression runs faster is maxDstSize >= FSE_compressBound(srcSize)
-    return : size of compressed data (<= maxDstSize)
+    'dst' buffer must be already allocated. Compression runs faster is dstCapacity >= FSE_compressBound(srcSize).
+    @return : size of compressed data (<= dstCapacity).
     Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!!
                      if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression instead.
                      if FSE_isError(return), compression failed (more details using FSE_getErrorName())
+*/
+size_t FSE_compress(void* dst, size_t dstCapacity,
+              const void* src, size_t srcSize);
 
-FSE_decompress():
+/*! FSE_decompress():
     Decompress FSE data from buffer 'cSrc', of size 'cSrcSize',
-    into already allocated destination buffer 'dst', of size 'maxDstSize'.
-    return : size of regenerated data (<= maxDstSize)
-             or an error code, which can be tested using FSE_isError()
+    into already allocated destination buffer 'dst', of size 'dstCapacity'.
+    @return : size of regenerated data (<= maxDstSize),
+              or an error code, which can be tested using FSE_isError() .
 
-    ** Important ** : FSE_decompress() doesn't decompress non-compressible nor RLE data !!!
+    ** Important ** : FSE_decompress() does not decompress non-compressible nor RLE data !!!
     Why ? : making this distinction requires a header.
     Header management is intentionally delegated to the user layer, which can better manage special cases.
 */
+size_t FSE_decompress(void* dst,  size_t dstCapacity,
+                const void* cSrc, size_t cSrcSize);
 
 
-/* *****************************************
+/*-*****************************************
 *  Tool functions
 ******************************************/
 size_t FSE_compressBound(size_t size);       /* maximum compressed size */
@@ -84,14 +83,13 @@ unsigned    FSE_isError(size_t code);        /* tells if a return value is an er
 const char* FSE_getErrorName(size_t code);   /* provides error code string (useful for debugging) */
 
 
-/* *****************************************
+/*-*****************************************
 *  FSE advanced functions
 ******************************************/
-/*!
-FSE_compress2():
+/*! FSE_compress2() :
     Same as FSE_compress(), but allows the selection of 'maxSymbolValue' and 'tableLog'
     Both parameters can be defined as '0' to mean : use default value
-    return : size of compressed data
+    @return : size of compressed data
     Special values : if return == 0, srcData is not compressible => Nothing is stored within cSrc !!!
                      if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression.
                      if FSE_isError(return), it's an error code.
@@ -99,7 +97,7 @@ FSE_compress2():
 size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
 
 
-/* *****************************************
+/*-*****************************************
 *  FSE detailed API
 ******************************************/
 /*!
@@ -122,65 +120,56 @@ or to save and provide normalized distribution using external method.
 
 /* *** COMPRESSION *** */
 
-/*!
-FSE_count():
-   Provides the precise count of each byte within a table 'count'
-   'count' is a table of unsigned int, of minimum size (*maxSymbolValuePtr+1).
-   *maxSymbolValuePtr will be updated if detected smaller than initial value.
-   @return : the count of the most frequent symbol (which is not identified)
-             if return == srcSize, there is only one symbol.
-             Can also return an error code, which can be tested with FSE_isError() */
+/*! FSE_count():
+    Provides the precise count of each byte within a table 'count'.
+    'count' is a table of unsigned int, of minimum size (*maxSymbolValuePtr+1).
+    *maxSymbolValuePtr will be updated if detected smaller than initial value.
+    @return : the count of the most frequent symbol (which is not identified).
+              if return == srcSize, there is only one symbol.
+              Can also return an error code, which can be tested with FSE_isError(). */
 size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize);
 
-/*!
-FSE_optimalTableLog():
-   dynamically downsize 'tableLog' when conditions are met.
-   It saves CPU time, by using smaller tables, while preserving or even improving compression ratio.
-   return : recommended tableLog (necessarily <= initial 'tableLog') */
+/*! FSE_optimalTableLog():
+    dynamically downsize 'tableLog' when conditions are met.
+    It saves CPU time, by using smaller tables, while preserving or even improving compression ratio.
+    @return : recommended tableLog (necessarily <= initial 'tableLog') */
 unsigned FSE_optimalTableLog(unsigned tableLog, size_t srcSize, unsigned maxSymbolValue);
 
-/*!
-FSE_normalizeCount():
-   normalize counters so that sum(count[]) == Power_of_2 (2^tableLog)
-   'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1).
-   return : tableLog,
-            or an errorCode, which can be tested using FSE_isError() */
+/*! FSE_normalizeCount():
+    normalize counts so that sum(count[]) == Power_of_2 (2^tableLog)
+    'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1).
+    @return : tableLog,
+              or an errorCode, which can be tested using FSE_isError() */
 size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog, const unsigned* count, size_t srcSize, unsigned maxSymbolValue);
 
-/*!
-FSE_NCountWriteBound():
-   Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'
-   Typically useful for allocation purpose. */
+/*! FSE_NCountWriteBound():
+    Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'.
+    Typically useful for allocation purpose. */
 size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog);
 
-/*!
-FSE_writeNCount():
-   Compactly save 'normalizedCounter' into 'buffer'.
-   return : size of the compressed table
-            or an errorCode, which can be tested using FSE_isError() */
+/*! FSE_writeNCount():
+    Compactly save 'normalizedCounter' into 'buffer'.
+    @return : size of the compressed table,
+              or an errorCode, which can be tested using FSE_isError(). */
 size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
 
 
-/*!
-Constructor and Destructor of type FSE_CTable
-    Note that its size depends on 'tableLog' and 'maxSymbolValue' */
+/*! Constructor and Destructor of FSE_CTable.
+    Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */
 typedef unsigned FSE_CTable;   /* don't allocate that. It's only meant to be more restrictive than void* */
 FSE_CTable* FSE_createCTable (unsigned tableLog, unsigned maxSymbolValue);
 void        FSE_freeCTable (FSE_CTable* ct);
 
-/*!
-FSE_buildCTable():
-   Builds @ct, which must be already allocated, using FSE_createCTable()
-   return : 0
-            or an errorCode, which can be tested using FSE_isError() */
+/*! FSE_buildCTable():
+    Builds `ct`, which must be already allocated, using FSE_createCTable().
+    @return : 0, or an errorCode, which can be tested using FSE_isError() */
 size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
 
-/*!
-FSE_compress_usingCTable():
-   Compress @src using @ct into @dst which must be already allocated
-   return : size of compressed data (<= @dstCapacity)
-            or 0 if compressed data could not fit into @dst
-            or an errorCode, which can be tested using FSE_isError() */
+/*! FSE_compress_usingCTable():
+    Compress `src` using `ct` into `dst` which must be already allocated.
+    @return : size of compressed data (<= `dstCapacity`),
+              or 0 if compressed data could not fit into `dst`,
+              or an errorCode, which can be tested using FSE_isError() */
 size_t FSE_compress_usingCTable (void* dst, size_t dstCapacity, const void* src, size_t srcSize, const FSE_CTable* ct);
 
 /*!
@@ -221,7 +210,7 @@ If there is an error, both functions will return an ErrorCode (which can be test
 
 'CTable' can then be used to compress 'src', with FSE_compress_usingCTable().
 Similar to FSE_count(), the convention is that 'src' is assumed to be a table of char of size 'srcSize'
-The function returns the size of compressed data (without header), necessarily <= @dstCapacity.
+The function returns the size of compressed data (without header), necessarily <= `dstCapacity`.
 If it returns '0', compressed data could not fit into 'dst'.
 If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()).
 */
@@ -229,34 +218,29 @@ If there is an error, the function will return an ErrorCode (which can be tested
 
 /* *** DECOMPRESSION *** */
 
-/*!
-FSE_readNCount():
-   Read compactly saved 'normalizedCounter' from 'rBuffer'.
-   return : size read from 'rBuffer'
-            or an errorCode, which can be tested using FSE_isError()
-            maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */
+/*! FSE_readNCount():
+    Read compactly saved 'normalizedCounter' from 'rBuffer'.
+    @return : size read from 'rBuffer',
+              or an errorCode, which can be tested using FSE_isError().
+              maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */
 size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, const void* rBuffer, size_t rBuffSize);
 
-/*!
-Constructor and Destructor of type FSE_DTable
+/*! Constructor and Destructor of FSE_DTable.
     Note that its size depends on 'tableLog' */
 typedef unsigned FSE_DTable;   /* don't allocate that. It's just a way to be more restrictive than void* */
 FSE_DTable* FSE_createDTable(unsigned tableLog);
 void        FSE_freeDTable(FSE_DTable* dt);
 
-/*!
-FSE_buildDTable():
-   Builds 'dt', which must be already allocated, using FSE_createDTable()
-   return : 0,
-            or an errorCode, which can be tested using FSE_isError() */
+/*! FSE_buildDTable():
+    Builds 'dt', which must be already allocated, using FSE_createDTable().
+    return : 0, or an errorCode, which can be tested using FSE_isError() */
 size_t FSE_buildDTable (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
 
-/*!
-FSE_decompress_usingDTable():
-   Decompress compressed source @cSrc of size @cSrcSize using @dt
-   into @dst which must be already allocated.
-   return : size of regenerated data (necessarily <= @dstCapacity)
-            or an errorCode, which can be tested using FSE_isError() */
+/*! FSE_decompress_usingDTable():
+    Decompress compressed source `cSrc` of size `cSrcSize` using `dt`
+    into `dst` which must be already allocated.
+    @return : size of regenerated data (necessarily <= `dstCapacity`),
+              or an errorCode, which can be tested using FSE_isError() */
 size_t FSE_decompress_usingDTable(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, const FSE_DTable* dt);
 
 /*!
@@ -281,9 +265,9 @@ This is performed by the function FSE_buildDTable().
 The space required by 'FSE_DTable' must be already allocated using FSE_createDTable().
 If there is an error, the function will return an error code, which can be tested using FSE_isError().
 
-'FSE_DTable' can then be used to decompress 'cSrc', with FSE_decompress_usingDTable().
-'cSrcSize' must be strictly correct, otherwise decompression will fail.
-FSE_decompress_usingDTable() result will tell how many bytes were regenerated (<=maxDstSize).
+`FSE_DTable` can then be used to decompress `cSrc`, with FSE_decompress_usingDTable().
+`cSrcSize` must be strictly correct, otherwise decompression will fail.
+FSE_decompress_usingDTable() result will tell how many bytes were regenerated (<=`dstCapacity`).
 If there is an error, the function will return an error code, which can be tested using FSE_isError(). (ex: dst buffer too small)
 */
 
diff --git a/lib/fse_static.h b/lib/fse_static.h
index ca303db8..f3c3d44e 100644
--- a/lib/fse_static.h
+++ b/lib/fse_static.h
@@ -267,7 +267,7 @@ MEM_STATIC void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* statePt
     BIT_flushBits(bitC);
 }
 
-/* decompression */
+/*<=====    Decompression    =====>*/
 
 typedef struct {
     U16 tableLog;
@@ -290,34 +290,39 @@ MEM_STATIC void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, con
     DStatePtr->table = dt + 1;
 }
 
-MEM_STATIC size_t FSE_getStateValue(FSE_DState_t* DStatePtr)
+MEM_STATIC BYTE FSE_peekSymbol(const FSE_DState_t* DStatePtr)
 {
-    return DStatePtr->state;
+    FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    return DInfo.symbol;
 }
 
-MEM_STATIC BYTE FSE_peakSymbol(FSE_DState_t* DStatePtr)
+MEM_STATIC void FSE_updateState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD)
 {
-    const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
-    return DInfo.symbol;
+    FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    U32 const nbBits = DInfo.nbBits;
+    size_t const lowBits = BIT_readBits(bitD, nbBits);
+    DStatePtr->state = DInfo.newState + lowBits;
 }
 
 MEM_STATIC BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD)
 {
-    const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
-    const U32  nbBits = DInfo.nbBits;
-    BYTE symbol = DInfo.symbol;
-    size_t lowBits = BIT_readBits(bitD, nbBits);
+    FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    U32 const nbBits = DInfo.nbBits;
+    BYTE const symbol = DInfo.symbol;
+    size_t const lowBits = BIT_readBits(bitD, nbBits);
 
     DStatePtr->state = DInfo.newState + lowBits;
     return symbol;
 }
 
+/*! FSE_decodeSymbolFast() :
+    unsafe, only works if no symbol has a probability > 50% */
 MEM_STATIC BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD)
 {
-    const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
-    const U32 nbBits = DInfo.nbBits;
-    BYTE symbol = DInfo.symbol;
-    size_t lowBits = BIT_readBitsFast(bitD, nbBits);
+    FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    U32 const nbBits = DInfo.nbBits;
+    BYTE const symbol = DInfo.symbol;
+    size_t const lowBits = BIT_readBitsFast(bitD, nbBits);
 
     DStatePtr->state = DInfo.newState + lowBits;
     return symbol;
diff --git a/lib/huff0.c b/lib/huff0.c
index 929bc87b..505adcec 100644
--- a/lib/huff0.c
+++ b/lib/huff0.c
@@ -1,6 +1,6 @@
 /* ******************************************************************
    Huff0 : Huffman coder, part of New Generation Entropy library
-   Copyright (C) 2013-2015, Yann Collet.
+   Copyright (C) 2013-2016, Yann Collet.
 
    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
 
@@ -103,8 +103,7 @@ typedef struct nodeElt_s {
 } nodeElt;
 
 /*! HUF_writeCTable() :
-    @dst : destination buffer
-    @CTable : huffman tree to save, using huff0 representation
+    `CTable` : huffman tree to save, using huff0 representation.
     @return : size of saved CTable */
 size_t HUF_writeCTable (void* dst, size_t maxDstSize,
                         const HUF_CElt* CTable, U32 maxSymbolValue, U32 huffLog)
@@ -181,66 +180,58 @@ size_t HUF_readCTable (HUF_CElt* CTable, U32 maxSymbolValue, const void* src, si
     BYTE huffWeight[HUF_MAX_SYMBOL_VALUE + 1];
     U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1];   /* large enough for values from 0 to 16 */
     U32 tableLog = 0;
-    size_t iSize;
+    size_t readSize;
     U32 nbSymbols = 0;
-    U32 n;
-    U32 nextRankStart;
     //memset(huffWeight, 0, sizeof(huffWeight));   /* is not necessary, even though some analyzer complain ... */
 
     /* get symbol weights */
-    iSize = HUF_readStats(huffWeight, HUF_MAX_SYMBOL_VALUE+1, rankVal, &nbSymbols, &tableLog, src, srcSize);
-    if (HUF_isError(iSize)) return iSize;
+    readSize = HUF_readStats(huffWeight, HUF_MAX_SYMBOL_VALUE+1, rankVal, &nbSymbols, &tableLog, src, srcSize);
+    if (HUF_isError(readSize)) return readSize;
 
     /* check result */
     if (tableLog > HUF_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
     if (nbSymbols > maxSymbolValue+1) return ERROR(maxSymbolValue_tooSmall);
 
     /* Prepare base value per rank */
-    nextRankStart = 0;
-    for (n=1; n<=tableLog; n++) {
-        U32 current = nextRankStart;
-        nextRankStart += (rankVal[n] << (n-1));
-        rankVal[n] = current;
-    }
+    {   U32 n, nextRankStart = 0;
+        for (n=1; n<=tableLog; n++) {
+            U32 current = nextRankStart;
+            nextRankStart += (rankVal[n] << (n-1));
+            rankVal[n] = current;
+    }   }
 
     /* fill nbBits */
-    for (n=0; n<nbSymbols; n++) {
+    { U32 n; for (n=0; n<nbSymbols; n++) {
         const U32 w = huffWeight[n];
         CTable[n].nbBits = (BYTE)(tableLog + 1 - w);
-    }
+    }}
 
     /* fill val */
-    {
-        U16 nbPerRank[HUF_MAX_TABLELOG+1] = {0};
+    {   U16 nbPerRank[HUF_MAX_TABLELOG+1] = {0};
         U16 valPerRank[HUF_MAX_TABLELOG+1] = {0};
-        for (n=0; n<nbSymbols; n++)
-            nbPerRank[CTable[n].nbBits]++;
-        {
-            /* determine stating value per rank */
-            U16 min = 0;
-            for (n=HUF_MAX_TABLELOG; n>0; n--) {
+        { U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[CTable[n].nbBits]++; }
+        /* determine stating value per rank */
+        {   U16 min = 0;
+            U32 n; for (n=HUF_MAX_TABLELOG; n>0; n--) {
                 valPerRank[n] = min;      /* get starting value within each rank */
                 min += nbPerRank[n];
                 min >>= 1;
         }   }
-        for (n=0; n<=maxSymbolValue; n++)
-            CTable[n].val = valPerRank[CTable[n].nbBits]++;   /* assign value within rank, symbol order */
+        /* assign value within rank, symbol order */
+        { U32 n; for (n=0; n<=maxSymbolValue; n++) CTable[n].val = valPerRank[CTable[n].nbBits]++; }
     }
 
-    return iSize;
+    return readSize;
 }
 
 
 static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
 {
-    int totalCost = 0;
     const U32 largestBits = huffNode[lastNonNull].nbBits;
-
-    /* early exit : all is fine */
-    if (largestBits <= maxNbBits) return largestBits;
+    if (largestBits <= maxNbBits) return largestBits;   /* early exit : no elt > maxNbBits */
 
     /* there are several too large elements (at least >= 2) */
-    {
+    {   int totalCost = 0;
         const U32 baseCost = 1 << (largestBits - maxNbBits);
         U32 n = lastNonNull;
 
@@ -248,26 +239,25 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
             totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits));
             huffNode[n].nbBits = (BYTE)maxNbBits;
             n --;
-        } /* n stops at huffNode[n].nbBits <= maxNbBits */
-        while (huffNode[n].nbBits == maxNbBits) n--;   /* n end at index of smallest symbol using (maxNbBits-1) */
+        }  /* n stops at huffNode[n].nbBits <= maxNbBits */
+        while (huffNode[n].nbBits == maxNbBits) n--;   /* n end at index of smallest symbol using < maxNbBits */
 
         /* renorm totalCost */
         totalCost >>= (largestBits - maxNbBits);  /* note : totalCost is necessarily a multiple of baseCost */
 
         /* repay normalized cost */
-        {
-            const U32 noSymbol = 0xF0F0F0F0;
+        {   U32 const noSymbol = 0xF0F0F0F0;
             U32 rankLast[HUF_MAX_TABLELOG+1];
-            U32 currentNbBits = maxNbBits;
             int pos;
 
             /* Get pos of last (smallest) symbol per rank */
             memset(rankLast, 0xF0, sizeof(rankLast));
-            for (pos=n ; pos >= 0; pos--) {
-                if (huffNode[pos].nbBits >= currentNbBits) continue;
-                currentNbBits = huffNode[pos].nbBits;   /* < maxNbBits */
-                rankLast[maxNbBits-currentNbBits] = pos;
-            }
+            {   U32 currentNbBits = maxNbBits;
+                for (pos=n ; pos >= 0; pos--) {
+                    if (huffNode[pos].nbBits >= currentNbBits) continue;
+                    currentNbBits = huffNode[pos].nbBits;   /* < maxNbBits */
+                    rankLast[maxNbBits-currentNbBits] = pos;
+            }   }
 
             while (totalCost > 0) {
                 U32 nBitsToDecrease = BIT_highbit32(totalCost) + 1;
@@ -276,9 +266,8 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
                     U32 lowPos = rankLast[nBitsToDecrease-1];
                     if (highPos == noSymbol) continue;
                     if (lowPos == noSymbol) break;
-                    {
-                        U32 highTotal = huffNode[highPos].count;
-                        U32 lowTotal = 2 * huffNode[lowPos].count;
+                    {   U32 const highTotal = huffNode[highPos].count;
+                        U32 const lowTotal = 2 * huffNode[lowPos].count;
                         if (highTotal <= lowTotal) break;
                 }   }
                 /* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */
@@ -294,7 +283,7 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
                     rankLast[nBitsToDecrease]--;
                     if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease)
                         rankLast[nBitsToDecrease] = noSymbol;   /* this rank is now empty */
-            }   }
+            }   }   /* while (totalCost > 0) */
 
             while (totalCost < 0) {  /* Sometimes, cost correction overshoot */
                 if (rankLast[1] == noSymbol) {  /* special case : no rank 1 symbol (using maxNbBits-1); let's create one from largest rank 0 (using maxNbBits) */
@@ -307,7 +296,7 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
                 huffNode[ rankLast[1] + 1 ].nbBits--;
                 rankLast[1]++;
                 totalCost ++;
-    }   }   }
+    }   }   }   /* there are several too large elements (at least >= 2) */
 
     return maxNbBits;
 }
@@ -331,8 +320,8 @@ static void HUF_sort(nodeElt* huffNode, const U32* count, U32 maxSymbolValue)
     for (n=30; n>0; n--) rank[n-1].base += rank[n].base;
     for (n=0; n<32; n++) rank[n].current = rank[n].base;
     for (n=0; n<=maxSymbolValue; n++) {
-        U32 c = count[n];
-        U32 r = BIT_highbit32(c+1) + 1;
+        U32 const c = count[n];
+        U32 const r = BIT_highbit32(c+1) + 1;
         U32 pos = rank[r].current++;
         while ((pos > rank[r].base) && (c > huffNode[pos-1].count)) huffNode[pos]=huffNode[pos-1], pos--;
         huffNode[pos].count = c;
@@ -389,21 +378,18 @@ size_t HUF_buildCTable (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U3
     maxNbBits = HUF_setMaxHeight(huffNode, nonNullRank, maxNbBits);
 
     /* fill result into tree (val, nbBits) */
-    {
-        U16 nbPerRank[HUF_MAX_TABLELOG+1] = {0};
+    {   U16 nbPerRank[HUF_MAX_TABLELOG+1] = {0};
         U16 valPerRank[HUF_MAX_TABLELOG+1] = {0};
         if (maxNbBits > HUF_MAX_TABLELOG) return ERROR(GENERIC);   /* check fit into table */
         for (n=0; n<=nonNullRank; n++)
             nbPerRank[huffNode[n].nbBits]++;
-        {
-            /* determine stating value per rank */
-            U16 min = 0;
+        /* determine stating value per rank */
+        {   U16 min = 0;
             for (n=maxNbBits; n>0; n--) {
                 valPerRank[n] = min;      /* get starting value within each rank */
                 min += nbPerRank[n];
                 min >>= 1;
-            }
-        }
+        }   }
         for (n=0; n<=maxSymbolValue; n++)
             tree[huffNode[n].byte].nbBits = huffNode[n].nbBits;   /* push nbBits per symbol, symbol order */
         for (n=0; n<=maxSymbolValue; n++)
@@ -432,17 +418,16 @@ size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, si
 {
     const BYTE* ip = (const BYTE*) src;
     BYTE* const ostart = (BYTE*)dst;
-    BYTE* op = ostart;
     BYTE* const oend = ostart + dstSize;
+    BYTE* op = ostart;
     size_t n;
     const unsigned fast = (dstSize >= HUF_BLOCKBOUND(srcSize));
-    size_t errorCode;
     BIT_CStream_t bitC;
 
     /* init */
     if (dstSize < 8) return 0;   /* not enough space to compress */
-    errorCode = BIT_initCStream(&bitC, op, oend-op);
-    if (HUF_isError(errorCode)) return 0;
+    { size_t const errorCode = BIT_initCStream(&bitC, op, oend-op);
+      if (HUF_isError(errorCode)) return 0; }
 
     n = srcSize & ~3;  /* join to mod 4 */
     switch (srcSize & 3)
@@ -475,12 +460,12 @@ size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, si
 size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
 {
     size_t segmentSize = (srcSize+3)/4;   /* first 3 segments */
-    size_t errorCode;
     const BYTE* ip = (const BYTE*) src;
     const BYTE* const iend = ip + srcSize;
     BYTE* const ostart = (BYTE*) dst;
-    BYTE* op = ostart;
     BYTE* const oend = ostart + dstSize;
+    BYTE* op = ostart;
+    size_t errorCode;
 
     if (dstSize < 6 + 1 + 1 + 1 + 8) return 0;   /* minimum space to compress successfully */
     if (srcSize < 12) return 0;   /* no saving possible : too small input */
@@ -523,8 +508,8 @@ static size_t HUF_compress_internal (
                 unsigned singleStream)
 {
     BYTE* const ostart = (BYTE*)dst;
-    BYTE* op = ostart;
     BYTE* const oend = ostart + dstSize;
+    BYTE* op = ostart;
 
     U32 count[HUF_MAX_SYMBOL_VALUE+1];
     HUF_CElt CTable[HUF_MAX_SYMBOL_VALUE+1];
@@ -573,8 +558,8 @@ static size_t HUF_compress_internal (
 
 
 size_t HUF_compress1X (void* dst, size_t dstSize,
-                const void* src, size_t srcSize,
-                unsigned maxSymbolValue, unsigned huffLog)
+                 const void* src, size_t srcSize,
+                 unsigned maxSymbolValue, unsigned huffLog)
 {
     return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1);
 }
@@ -602,9 +587,9 @@ typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX4;  /* doubl
 
 typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t;
 
-/*! HUF_readStats
-    Read compact Huffman tree, saved by HUF_writeCTable
-    @huffWeight : destination buffer
+/*! HUF_readStats() :
+    Read compact Huffman tree, saved by HUF_writeCTable().
+    `huffWeight` is destination buffer.
     @return : size read from `src`
 */
 static size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
@@ -616,13 +601,12 @@ static size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
     const BYTE* ip = (const BYTE*) src;
     size_t iSize = ip[0];
     size_t oSize;
-    U32 n;
 
     //memset(huffWeight, 0, hwSize);   /* is not necessary, even though some analyzer complain ... */
 
     if (iSize >= 128)  { /* special header */
         if (iSize >= (242)) {  /* RLE */
-            static int l[14] = { 1, 2, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64, 127, 128 };
+            static U32 l[14] = { 1, 2, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64, 127, 128 };
             oSize = l[iSize-242];
             memset(huffWeight, 1, hwSize);
             iSize = 0;
@@ -633,10 +617,11 @@ static size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
             if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
             if (oSize >= hwSize) return ERROR(corruption_detected);
             ip += 1;
-            for (n=0; n<oSize; n+=2) {
-                huffWeight[n]   = ip[n/2] >> 4;
-                huffWeight[n+1] = ip[n/2] & 15;
-    }   }   }
+            {   U32 n;
+                for (n=0; n<oSize; n+=2) {
+                    huffWeight[n]   = ip[n/2] >> 4;
+                    huffWeight[n+1] = ip[n/2] & 15;
+    }   }   }   }
     else  {   /* header compressed with FSE (normal case) */
         if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
         oSize = FSE_decompress(huffWeight, hwSize-1, ip+1, iSize);   /* max (hwSize-1) values decoded, as last one is implied */
@@ -646,20 +631,20 @@ static size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
     /* collect weight stats */
     memset(rankStats, 0, (HUF_ABSOLUTEMAX_TABLELOG + 1) * sizeof(U32));
     weightTotal = 0;
-    for (n=0; n<oSize; n++) {
+    { U32 n; for (n=0; n<oSize; n++) {
         if (huffWeight[n] >= HUF_ABSOLUTEMAX_TABLELOG) return ERROR(corruption_detected);
         rankStats[huffWeight[n]]++;
         weightTotal += (1 << huffWeight[n]) >> 1;
-    }
+    }}
 
     /* get last non-null symbol weight (implied, total must be 2^n) */
     tableLog = BIT_highbit32(weightTotal) + 1;
     if (tableLog > HUF_ABSOLUTEMAX_TABLELOG) return ERROR(corruption_detected);
-    {   /* determine last weight */
-        U32 total = 1 << tableLog;
-        U32 rest = total - weightTotal;
-        U32 verif = 1 << BIT_highbit32(rest);
-        U32 lastWeight = BIT_highbit32(rest) + 1;
+    /* determine last weight */
+    {   U32 const total = 1 << tableLog;
+        U32 const rest = total - weightTotal;
+        U32 const verif = 1 << BIT_highbit32(rest);
+        U32 const lastWeight = BIT_highbit32(rest) + 1;
         if (verif != rest) return ERROR(corruption_detected);    /* last value must be a clean power of 2 */
         huffWeight[oSize] = (BYTE)lastWeight;
         rankStats[lastWeight]++;
@@ -724,12 +709,13 @@ size_t HUF_readDTableX2 (U16* DTable, const void* src, size_t srcSize)
     return iSize;
 }
 
+
 static BYTE HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, const U32 dtLog)
 {
-        const size_t val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
-        const BYTE c = dt[val].byte;
-        BIT_skipBits(Dstream, dt[val].nbBits);
-        return c;
+    const size_t val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
+    const BYTE c = dt[val].byte;
+    BIT_skipBits(Dstream, dt[val].nbBits);
+    return c;
 }
 
 #define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
@@ -773,13 +759,13 @@ size_t HUF_decompress1X2_usingDTable(
 {
     BYTE* op = (BYTE*)dst;
     BYTE* const oend = op + dstSize;
-    size_t errorCode;
     const U32 dtLog = DTable[0];
     const void* dtPtr = DTable;
     const HUF_DEltX2* const dt = ((const HUF_DEltX2*)dtPtr)+1;
     BIT_DStream_t bitD;
-    errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize);
-    if (HUF_isError(errorCode)) return errorCode;
+
+    { size_t const errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize);
+      if (HUF_isError(errorCode)) return errorCode; }
 
     HUF_decodeStreamX2(op, &bitD, oend, dt, dtLog);
 
@@ -793,9 +779,8 @@ size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cS
 {
     HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_MAX_TABLELOG);
     const BYTE* ip = (const BYTE*) cSrc;
-    size_t errorCode;
 
-    errorCode = HUF_readDTableX2 (DTable, cSrc, cSrcSize);
+    size_t const errorCode = HUF_readDTableX2 (DTable, cSrc, cSrcSize);
     if (HUF_isError(errorCode)) return errorCode;
     if (errorCode >= cSrcSize) return ERROR(srcSize_wrong);
     ip += errorCode;
@@ -810,91 +795,92 @@ size_t HUF_decompress4X2_usingDTable(
     const void* cSrc, size_t cSrcSize,
     const U16* DTable)
 {
-    const BYTE* const istart = (const BYTE*) cSrc;
-    BYTE* const ostart = (BYTE*) dst;
-    BYTE* const oend = ostart + dstSize;
-    const void* const dtPtr = DTable;
-    const HUF_DEltX2* const dt = ((const HUF_DEltX2*)dtPtr) +1;
-    const U32 dtLog = DTable[0];
-    size_t errorCode;
-
     /* Check */
-    if (cSrcSize < 10) return ERROR(corruption_detected);   /* strict minimum : jump table + 1 byte per stream */
+    if (cSrcSize < 10) return ERROR(corruption_detected);  /* strict minimum : jump table + 1 byte per stream */
 
-    /* Init */
-    BIT_DStream_t bitD1;
-    BIT_DStream_t bitD2;
-    BIT_DStream_t bitD3;
-    BIT_DStream_t bitD4;
-    const size_t length1 = MEM_readLE16(istart);
-    const size_t length2 = MEM_readLE16(istart+2);
-    const size_t length3 = MEM_readLE16(istart+4);
-    size_t length4;
-    const BYTE* const istart1 = istart + 6;  /* jumpTable */
-    const BYTE* const istart2 = istart1 + length1;
-    const BYTE* const istart3 = istart2 + length2;
-    const BYTE* const istart4 = istart3 + length3;
-    const size_t segmentSize = (dstSize+3) / 4;
-    BYTE* const opStart2 = ostart + segmentSize;
-    BYTE* const opStart3 = opStart2 + segmentSize;
-    BYTE* const opStart4 = opStart3 + segmentSize;
-    BYTE* op1 = ostart;
-    BYTE* op2 = opStart2;
-    BYTE* op3 = opStart3;
-    BYTE* op4 = opStart4;
-    U32 endSignal;
+    {   const BYTE* const istart = (const BYTE*) cSrc;
+        BYTE* const ostart = (BYTE*) dst;
+        BYTE* const oend = ostart + dstSize;
+        const void* const dtPtr = DTable;
+        const HUF_DEltX2* const dt = ((const HUF_DEltX2*)dtPtr) +1;
+        const U32 dtLog = DTable[0];
+        size_t errorCode;
 
-    length4 = cSrcSize - (length1 + length2 + length3 + 6);
-    if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
-    errorCode = BIT_initDStream(&bitD1, istart1, length1);
-    if (HUF_isError(errorCode)) return errorCode;
-    errorCode = BIT_initDStream(&bitD2, istart2, length2);
-    if (HUF_isError(errorCode)) return errorCode;
-    errorCode = BIT_initDStream(&bitD3, istart3, length3);
-    if (HUF_isError(errorCode)) return errorCode;
-    errorCode = BIT_initDStream(&bitD4, istart4, length4);
-    if (HUF_isError(errorCode)) return errorCode;
+        /* Init */
+        BIT_DStream_t bitD1;
+        BIT_DStream_t bitD2;
+        BIT_DStream_t bitD3;
+        BIT_DStream_t bitD4;
+        const size_t length1 = MEM_readLE16(istart);
+        const size_t length2 = MEM_readLE16(istart+2);
+        const size_t length3 = MEM_readLE16(istart+4);
+        size_t length4;
+        const BYTE* const istart1 = istart + 6;  /* jumpTable */
+        const BYTE* const istart2 = istart1 + length1;
+        const BYTE* const istart3 = istart2 + length2;
+        const BYTE* const istart4 = istart3 + length3;
+        const size_t segmentSize = (dstSize+3) / 4;
+        BYTE* const opStart2 = ostart + segmentSize;
+        BYTE* const opStart3 = opStart2 + segmentSize;
+        BYTE* const opStart4 = opStart3 + segmentSize;
+        BYTE* op1 = ostart;
+        BYTE* op2 = opStart2;
+        BYTE* op3 = opStart3;
+        BYTE* op4 = opStart4;
+        U32 endSignal;
 
-    /* 16-32 symbols per loop (4-8 symbols per stream) */
-    endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
-    for ( ; (endSignal==BIT_DStream_unfinished) && (op4<(oend-7)) ; ) {
-        HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
-        HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
-        HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
-        HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
-        HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
-        HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
-        HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
-        HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
-        HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
-        HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
-        HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
-        HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
-        HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
-        HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
-        HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
-        HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
+        length4 = cSrcSize - (length1 + length2 + length3 + 6);
+        if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
+        errorCode = BIT_initDStream(&bitD1, istart1, length1);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD2, istart2, length2);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD3, istart3, length3);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD4, istart4, length4);
+        if (HUF_isError(errorCode)) return errorCode;
+
+        /* 16-32 symbols per loop (4-8 symbols per stream) */
         endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+        for ( ; (endSignal==BIT_DStream_unfinished) && (op4<(oend-7)) ; ) {
+            HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
+            endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+        }
+
+        /* check corruption */
+        if (op1 > opStart2) return ERROR(corruption_detected);
+        if (op2 > opStart3) return ERROR(corruption_detected);
+        if (op3 > opStart4) return ERROR(corruption_detected);
+        /* note : op4 supposed already verified within main loop */
+
+        /* finish bitStreams one by one */
+        HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
+        HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
+        HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
+        HUF_decodeStreamX2(op4, &bitD4, oend,     dt, dtLog);
+
+        /* check */
+        endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
+        if (!endSignal) return ERROR(corruption_detected);
+
+        /* decoded size */
+        return dstSize;
     }
-
-    /* check corruption */
-    if (op1 > opStart2) return ERROR(corruption_detected);
-    if (op2 > opStart3) return ERROR(corruption_detected);
-    if (op3 > opStart4) return ERROR(corruption_detected);
-    /* note : op4 supposed already verified within main loop */
-
-    /* finish bitStreams one by one */
-    HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
-    HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
-    HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
-    HUF_decodeStreamX2(op4, &bitD4, oend,     dt, dtLog);
-
-    /* check */
-    endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
-    if (!endSignal) return ERROR(corruption_detected);
-
-    /* decoded size */
-    return dstSize;
 }
 
 
@@ -902,9 +888,8 @@ size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cS
 {
     HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_MAX_TABLELOG);
     const BYTE* ip = (const BYTE*) cSrc;
-    size_t errorCode;
 
-    errorCode = HUF_readDTableX2 (DTable, cSrc, cSrcSize);
+    size_t const errorCode = HUF_readDTableX2 (DTable, cSrc, cSrcSize);
     if (HUF_isError(errorCode)) return errorCode;
     if (errorCode >= cSrcSize) return ERROR(srcSize_wrong);
     ip += errorCode;
@@ -925,7 +910,6 @@ static void HUF_fillDTableX4Level2(HUF_DEltX4* DTable, U32 sizeLog, const U32 co
 {
     HUF_DEltX4 DElt;
     U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1];
-    U32 s;
 
     /* get pre-calculated rankVal */
     memcpy(rankVal, rankValOrigin, sizeof(rankVal));
@@ -941,7 +925,7 @@ static void HUF_fillDTableX4Level2(HUF_DEltX4* DTable, U32 sizeLog, const U32 co
     }
 
     /* fill DTable */
-    for (s=0; s<sortedListSize; s++) {   /* note : sortedSymbols already skipped */
+    { U32 s; for (s=0; s<sortedListSize; s++) {   /* note : sortedSymbols already skipped */
         const U32 symbol = sortedSymbols[s].symbol;
         const U32 weight = sortedSymbols[s].weight;
         const U32 nbBits = nbBitsBaseline - weight;
@@ -956,7 +940,7 @@ static void HUF_fillDTableX4Level2(HUF_DEltX4* DTable, U32 sizeLog, const U32 co
         do { DTable[i++] = DElt; } while (i<end);   /* since length >= 1 */
 
         rankVal[weight] += length;
-    }
+    }}
 }
 
 typedef U32 rankVal_t[HUF_ABSOLUTEMAX_TABLELOG][HUF_ABSOLUTEMAX_TABLELOG + 1];
@@ -991,16 +975,14 @@ static void HUF_fillDTableX4(HUF_DEltX4* DTable, const U32 targetLog,
                            sortedList+sortedRank, sortedListSize-sortedRank,
                            nbBitsBaseline, symbol);
         } else {
-            U32 i;
-            const U32 end = start + length;
             HUF_DEltX4 DElt;
-
             MEM_writeLE16(&(DElt.sequence), symbol);
-            DElt.nbBits   = (BYTE)(nbBits);
-            DElt.length   = 1;
-            for (i = start; i < end; i++)
-                DTable[i] = DElt;
-        }
+            DElt.nbBits = (BYTE)(nbBits);
+            DElt.length = 1;
+            {   U32 u;
+                const U32 end = start + length;
+                for (u = start; u < end; u++) DTable[u] = DElt;
+        }   }
         rankVal[weight] += length;
     }
 }
@@ -1033,8 +1015,7 @@ size_t HUF_readDTableX4 (U32* DTable, const void* src, size_t srcSize)
     for (maxW = tableLog; rankStats[maxW]==0; maxW--) {}  /* necessarily finds a solution before 0 */
 
     /* Get start index of each weight */
-    {
-        U32 w, nextRankStart = 0;
+    {   U32 w, nextRankStart = 0;
         for (w=1; w<=maxW; w++) {
             U32 current = nextRankStart;
             nextRankStart += rankStats[w];
@@ -1045,8 +1026,7 @@ size_t HUF_readDTableX4 (U32* DTable, const void* src, size_t srcSize)
     }
 
     /* sort symbols by weight */
-    {
-        U32 s;
+    {   U32 s;
         for (s=0; s<nbSymbols; s++) {
             U32 w = weightList[s];
             U32 r = rankStart[w]++;
@@ -1057,8 +1037,7 @@ size_t HUF_readDTableX4 (U32* DTable, const void* src, size_t srcSize)
     }
 
     /* Build rankVal */
-    {
-        const U32 minBits = tableLog+1 - maxW;
+    {   const U32 minBits = tableLog+1 - maxW;
         U32 nextRankVal = 0;
         U32 w, consumed;
         const int rescale = (memLog-tableLog) - 1;   /* tableLog <= memLog */
@@ -1155,15 +1134,14 @@ size_t HUF_decompress1X4_usingDTable(
     const U32 dtLog = DTable[0];
     const void* const dtPtr = DTable;
     const HUF_DEltX4* const dt = ((const HUF_DEltX4*)dtPtr) +1;
-    size_t errorCode;
 
     /* Init */
     BIT_DStream_t bitD;
-    errorCode = BIT_initDStream(&bitD, istart, cSrcSize);
-    if (HUF_isError(errorCode)) return errorCode;
+    { size_t const errorCode = BIT_initDStream(&bitD, istart, cSrcSize);
+      if (HUF_isError(errorCode)) return errorCode; }
 
-    /* finish bitStreams one by one */
-    HUF_decodeStreamX4(ostart, &bitD, oend,     dt, dtLog);
+    /* decode */
+    HUF_decodeStreamX4(ostart, &bitD, oend, dt, dtLog);
 
     /* check */
     if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
@@ -1177,7 +1155,7 @@ size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cS
     HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_MAX_TABLELOG);
     const BYTE* ip = (const BYTE*) cSrc;
 
-    size_t hSize = HUF_readDTableX4 (DTable, cSrc, cSrcSize);
+    size_t const hSize = HUF_readDTableX4 (DTable, cSrc, cSrcSize);
     if (HUF_isError(hSize)) return hSize;
     if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
     ip += hSize;
@@ -1193,8 +1171,7 @@ size_t HUF_decompress4X4_usingDTable(
 {
     if (cSrcSize < 10) return ERROR(corruption_detected);   /* strict minimum : jump table + 1 byte per stream */
 
-    {
-        const BYTE* const istart = (const BYTE*) cSrc;
+    {   const BYTE* const istart = (const BYTE*) cSrc;
         BYTE* const ostart = (BYTE*) dst;
         BYTE* const oend = ostart + dstSize;
         const void* const dtPtr = DTable;
@@ -1381,11 +1358,10 @@ size_t HUF_readDTableX6 (U32* DTable, const void* src, size_t srcSize)
     if (tableLog > memLog) return ERROR(tableLog_tooLarge);   /* DTable is too small */
 
     /* find maxWeight */
-    for (maxW = tableLog; rankStats[maxW]==0; maxW--) {}  /* necessarily finds a solution before 0 */
+    for (maxW = tableLog; maxW && rankStats[maxW]==0; maxW--) {}  /* necessarily finds a solution before 0 */
 
     /* Get start index of each weight */
-    {
-        U32 w, nextRankStart = 0;
+    {   U32 w, nextRankStart = 0;
         for (w=1; w<=maxW; w++) {
             U32 current = nextRankStart;
             nextRankStart += rankStats[w];
@@ -1396,8 +1372,7 @@ size_t HUF_readDTableX6 (U32* DTable, const void* src, size_t srcSize)
     }
 
     /* sort symbols by weight */
-    {
-        U32 s;
+    {   U32 s;
         for (s=0; s<nbSymbols; s++) {
             U32 w = weightList[s];
             U32 r = rankStart[w]++;
@@ -1408,8 +1383,7 @@ size_t HUF_readDTableX6 (U32* DTable, const void* src, size_t srcSize)
     }
 
     /* Build rankVal */
-    {
-        const U32 minBits = tableLog+1 - maxW;
+    {   const U32 minBits = tableLog+1 - maxW;
         U32 nextRankVal = 0;
         U32 w, consumed;
         const int rescale = (memLog-tableLog) - 1;   /* tableLog <= memLog */
@@ -1426,8 +1400,7 @@ size_t HUF_readDTableX6 (U32* DTable, const void* src, size_t srcSize)
     }   }   }
 
     /* fill tables */
-    {
-        void* ddPtr = DTable+1;
+    {   void* ddPtr = DTable+1;
         HUF_DDescX6* DDescription = (HUF_DDescX6*)ddPtr;
         void* dsPtr = DTable + 1 + ((size_t)1<<(memLog-1));
         HUF_DSeqX6* DSequence = (HUF_DSeqX6*)dsPtr;
@@ -1559,95 +1532,96 @@ size_t HUF_decompress4X6_usingDTable(
     const void* cSrc, size_t cSrcSize,
     const U32* DTable)
 {
-    const BYTE* const istart = (const BYTE*) cSrc;
-    BYTE* const ostart = (BYTE*) dst;
-    BYTE* const oend = ostart + dstSize;
-
-    const U32 dtLog = DTable[0];
-    const void* const ddPtr = DTable+1;
-    const HUF_DDescX6* dd = (const HUF_DDescX6*)ddPtr;
-    const void* const dsPtr = DTable + 1 + ((size_t)1<<(dtLog-1));
-    const HUF_DSeqX6* ds = (const HUF_DSeqX6*)dsPtr;
-    size_t errorCode;
-
     /* Check */
     if (cSrcSize < 10) return ERROR(corruption_detected);   /* strict minimum : jump table + 1 byte per stream */
 
-    /* Init */
-    BIT_DStream_t bitD1;
-    BIT_DStream_t bitD2;
-    BIT_DStream_t bitD3;
-    BIT_DStream_t bitD4;
-    const size_t length1 = MEM_readLE16(istart);
-    const size_t length2 = MEM_readLE16(istart+2);
-    const size_t length3 = MEM_readLE16(istart+4);
-    size_t length4;
-    const BYTE* const istart1 = istart + 6;  /* jumpTable */
-    const BYTE* const istart2 = istart1 + length1;
-    const BYTE* const istart3 = istart2 + length2;
-    const BYTE* const istart4 = istart3 + length3;
-    const size_t segmentSize = (dstSize+3) / 4;
-    BYTE* const opStart2 = ostart + segmentSize;
-    BYTE* const opStart3 = opStart2 + segmentSize;
-    BYTE* const opStart4 = opStart3 + segmentSize;
-    BYTE* op1 = ostart;
-    BYTE* op2 = opStart2;
-    BYTE* op3 = opStart3;
-    BYTE* op4 = opStart4;
-    U32 endSignal;
+    {   const BYTE* const istart = (const BYTE*) cSrc;
+        BYTE* const ostart = (BYTE*) dst;
+        BYTE* const oend = ostart + dstSize;
 
-    length4 = cSrcSize - (length1 + length2 + length3 + 6);
-    if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
-    errorCode = BIT_initDStream(&bitD1, istart1, length1);
-    if (HUF_isError(errorCode)) return errorCode;
-    errorCode = BIT_initDStream(&bitD2, istart2, length2);
-    if (HUF_isError(errorCode)) return errorCode;
-    errorCode = BIT_initDStream(&bitD3, istart3, length3);
-    if (HUF_isError(errorCode)) return errorCode;
-    errorCode = BIT_initDStream(&bitD4, istart4, length4);
-    if (HUF_isError(errorCode)) return errorCode;
+        const U32 dtLog = DTable[0];
+        const void* const ddPtr = DTable+1;
+        const HUF_DDescX6* dd = (const HUF_DDescX6*)ddPtr;
+        const void* const dsPtr = DTable + 1 + ((size_t)1<<(dtLog-1));
+        const HUF_DSeqX6* ds = (const HUF_DSeqX6*)dsPtr;
+        size_t errorCode;
 
-    /* 16-64 symbols per loop (4-16 symbols per stream) */
-    endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
-    for ( ; (op3 <= opStart4) && (endSignal==BIT_DStream_unfinished) && (op4<=(oend-16)) ; ) {
-        HUF_DECODE_SYMBOLX6_2(op1, &bitD1);
-        HUF_DECODE_SYMBOLX6_2(op2, &bitD2);
-        HUF_DECODE_SYMBOLX6_2(op3, &bitD3);
-        HUF_DECODE_SYMBOLX6_2(op4, &bitD4);
-        HUF_DECODE_SYMBOLX6_1(op1, &bitD1);
-        HUF_DECODE_SYMBOLX6_1(op2, &bitD2);
-        HUF_DECODE_SYMBOLX6_1(op3, &bitD3);
-        HUF_DECODE_SYMBOLX6_1(op4, &bitD4);
-        HUF_DECODE_SYMBOLX6_2(op1, &bitD1);
-        HUF_DECODE_SYMBOLX6_2(op2, &bitD2);
-        HUF_DECODE_SYMBOLX6_2(op3, &bitD3);
-        HUF_DECODE_SYMBOLX6_2(op4, &bitD4);
-        HUF_DECODE_SYMBOLX6_0(op1, &bitD1);
-        HUF_DECODE_SYMBOLX6_0(op2, &bitD2);
-        HUF_DECODE_SYMBOLX6_0(op3, &bitD3);
-        HUF_DECODE_SYMBOLX6_0(op4, &bitD4);
+        /* Init */
+        BIT_DStream_t bitD1;
+        BIT_DStream_t bitD2;
+        BIT_DStream_t bitD3;
+        BIT_DStream_t bitD4;
+        const size_t length1 = MEM_readLE16(istart);
+        const size_t length2 = MEM_readLE16(istart+2);
+        const size_t length3 = MEM_readLE16(istart+4);
+        size_t length4;
+        const BYTE* const istart1 = istart + 6;  /* jumpTable */
+        const BYTE* const istart2 = istart1 + length1;
+        const BYTE* const istart3 = istart2 + length2;
+        const BYTE* const istart4 = istart3 + length3;
+        const size_t segmentSize = (dstSize+3) / 4;
+        BYTE* const opStart2 = ostart + segmentSize;
+        BYTE* const opStart3 = opStart2 + segmentSize;
+        BYTE* const opStart4 = opStart3 + segmentSize;
+        BYTE* op1 = ostart;
+        BYTE* op2 = opStart2;
+        BYTE* op3 = opStart3;
+        BYTE* op4 = opStart4;
+        U32 endSignal;
 
+        length4 = cSrcSize - (length1 + length2 + length3 + 6);
+        if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
+        errorCode = BIT_initDStream(&bitD1, istart1, length1);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD2, istart2, length2);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD3, istart3, length3);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD4, istart4, length4);
+        if (HUF_isError(errorCode)) return errorCode;
+
+        /* 16-64 symbols per loop (4-16 symbols per stream) */
         endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+        for ( ; (op3 <= opStart4) && (endSignal==BIT_DStream_unfinished) && (op4<=(oend-16)) ; ) {
+            HUF_DECODE_SYMBOLX6_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX6_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX6_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX6_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX6_1(op1, &bitD1);
+            HUF_DECODE_SYMBOLX6_1(op2, &bitD2);
+            HUF_DECODE_SYMBOLX6_1(op3, &bitD3);
+            HUF_DECODE_SYMBOLX6_1(op4, &bitD4);
+            HUF_DECODE_SYMBOLX6_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX6_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX6_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX6_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX6_0(op1, &bitD1);
+            HUF_DECODE_SYMBOLX6_0(op2, &bitD2);
+            HUF_DECODE_SYMBOLX6_0(op3, &bitD3);
+            HUF_DECODE_SYMBOLX6_0(op4, &bitD4);
+
+            endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+        }
+
+        /* check corruption */
+        if (op1 > opStart2) return ERROR(corruption_detected);
+        if (op2 > opStart3) return ERROR(corruption_detected);
+        if (op3 > opStart4) return ERROR(corruption_detected);
+        /* note : op4 supposed already verified within main loop */
+
+        /* finish bitStreams one by one */
+        HUF_decodeStreamX6(op1, &bitD1, opStart2, DTable, dtLog);
+        HUF_decodeStreamX6(op2, &bitD2, opStart3, DTable, dtLog);
+        HUF_decodeStreamX6(op3, &bitD3, opStart4, DTable, dtLog);
+        HUF_decodeStreamX6(op4, &bitD4, oend,     DTable, dtLog);
+
+        /* check */
+        endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
+        if (!endSignal) return ERROR(corruption_detected);
+
+        /* decoded size */
+        return dstSize;
     }
-
-    /* check corruption */
-    if (op1 > opStart2) return ERROR(corruption_detected);
-    if (op2 > opStart3) return ERROR(corruption_detected);
-    if (op3 > opStart4) return ERROR(corruption_detected);
-    /* note : op4 supposed already verified within main loop */
-
-    /* finish bitStreams one by one */
-    HUF_decodeStreamX6(op1, &bitD1, opStart2, DTable, dtLog);
-    HUF_decodeStreamX6(op2, &bitD2, opStart3, DTable, dtLog);
-    HUF_decodeStreamX6(op3, &bitD3, opStart4, DTable, dtLog);
-    HUF_decodeStreamX6(op4, &bitD4, oend,     DTable, dtLog);
-
-    /* check */
-    endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
-    if (!endSignal) return ERROR(corruption_detected);
-
-    /* decoded size */
-    return dstSize;
 }
 
 
@@ -1656,7 +1630,7 @@ size_t HUF_decompress4X6 (void* dst, size_t dstSize, const void* cSrc, size_t cS
     HUF_CREATE_STATIC_DTABLEX6(DTable, HUF_MAX_TABLELOG);
     const BYTE* ip = (const BYTE*) cSrc;
 
-    size_t hSize = HUF_readDTableX6 (DTable, cSrc, cSrcSize);
+    size_t const hSize = HUF_readDTableX6 (DTable, cSrc, cSrcSize);
     if (HUF_isError(hSize)) return hSize;
     if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
     ip += hSize;
diff --git a/lib/huff0.h b/lib/huff0.h
index fe28d7be..9d6e11f7 100644
--- a/lib/huff0.h
+++ b/lib/huff0.h
@@ -48,24 +48,24 @@ extern "C" {
 /* ****************************************
 *  Huff0 simple functions
 ******************************************/
-size_t HUF_compress(void* dst, size_t maxDstSize,
+size_t HUF_compress(void* dst, size_t dstCapacity,
               const void* src, size_t srcSize);
 size_t HUF_decompress(void* dst,  size_t dstSize,
                 const void* cSrc, size_t cSrcSize);
-/*!
-HUF_compress():
+/*
+HUF_compress() :
     Compress content of buffer 'src', of size 'srcSize', into destination buffer 'dst'.
-    'dst' buffer must be already allocated. Compression runs faster if maxDstSize >= HUF_compressBound(srcSize).
+    'dst' buffer must be already allocated. Compression runs faster if dstCapacity >= HUF_compressBound(srcSize).
     Note : srcSize must be <= 128 KB
-    @return : size of compressed data (<= maxDstSize)
+    @return : size of compressed data (<= dstCapacity)
     Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!!
-                     if return == 1, srcData is a single repeated byte symbol (RLE compression)
+                     if return == 1, srcData is a single repeated byte symbol (RLE compression).
                      if HUF_isError(return), compression failed (more details using HUF_getErrorName())
 
-HUF_decompress():
+HUF_decompress() :
     Decompress Huff0 data from buffer 'cSrc', of size 'cSrcSize',
     into already allocated destination buffer 'dst', of size 'dstSize'.
-    @dstSize : must be the **exact** size of original (uncompressed) data.
+    `dstSize` : must be the **exact** size of original (uncompressed) data.
     Note : in contrast with FSE, HUF_decompress can regenerate
            RLE (cSrcSize==1) and uncompressed (cSrcSize==dstSize) data,
            because it knows size to regenerate.
@@ -77,11 +77,11 @@ HUF_decompress():
 /* ****************************************
 *  Tool functions
 ******************************************/
-size_t HUF_compressBound(size_t size);       /* maximum compressed size */
+size_t HUF_compressBound(size_t size);       /**< maximum compressed size */
 
 /* Error Management */
-unsigned    HUF_isError(size_t code);        /* tells if a return value is an error code */
-const char* HUF_getErrorName(size_t code);   /* provides error code string (useful for debugging) */
+unsigned    HUF_isError(size_t code);        /**< tells if a return value is an error code */
+const char* HUF_getErrorName(size_t code);   /**< provides error code string (useful for debugging) */
 
 
 /* ****************************************
diff --git a/lib/huff0_static.h b/lib/huff0_static.h
index 84033964..32d66d35 100644
--- a/lib/huff0_static.h
+++ b/lib/huff0_static.h
@@ -85,7 +85,7 @@ HUF_compress() does the following:
 1. count symbol occurrence from source[] into table count[] using FSE_count()
 2. build Huffman table from count using HUF_buildCTable()
 3. save Huffman table to memory buffer using HUF_writeCTable()
-4. encode the data stream using HUF_compress_usingCTable()
+4. encode the data stream using HUF_compress4X_usingCTable()
 
 The following API allows targeting specific sub-functions for advanced tasks.
 For example, it's possible to compress several blocks using the same 'CTable',
@@ -95,7 +95,7 @@ or to save and regenerate 'CTable' using external methods.
 typedef struct HUF_CElt_s HUF_CElt;   /* incomplete type */
 size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits);
 size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog);
-size_t HUF_compress4X_into4Segments(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
+size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
 
 
 /*!
diff --git a/lib/legacy/zstd_legacy.h b/lib/legacy/zstd_legacy.h
index 4ae1deb6..3285bbeb 100644
--- a/lib/legacy/zstd_legacy.h
+++ b/lib/legacy/zstd_legacy.h
@@ -46,6 +46,7 @@ extern "C" {
 #include "zstd_v02.h"
 #include "zstd_v03.h"
 #include "zstd_v04.h"
+#include "zstd_v05.h"
 
 MEM_STATIC unsigned ZSTD_isLegacy (U32 magicNumberLE)
 {
@@ -53,28 +54,32 @@ MEM_STATIC unsigned ZSTD_isLegacy (U32 magicNumberLE)
 	{
 		case ZSTDv01_magicNumberLE :
 		case ZSTDv02_magicNumber :
-		case ZSTDv03_magicNumber : 
-		case ZSTDv04_magicNumber : return 1;
+		case ZSTDv03_magicNumber :
+		case ZSTDv04_magicNumber :
+		case ZSTDv05_MAGICNUMBER :
+            return 1;
 		default : return 0;
 	}
 }
 
 
 MEM_STATIC size_t ZSTD_decompressLegacy(
-                     void* dst, size_t maxOriginalSize,
+                     void* dst, size_t dstCapacity,
                const void* src, size_t compressedSize,
                      U32 magicNumberLE)
 {
 	switch(magicNumberLE)
 	{
 		case ZSTDv01_magicNumberLE :
-			return ZSTDv01_decompress(dst, maxOriginalSize, src, compressedSize);
+			return ZSTDv01_decompress(dst, dstCapacity, src, compressedSize);
 		case ZSTDv02_magicNumber :
-			return ZSTDv02_decompress(dst, maxOriginalSize, src, compressedSize);
+			return ZSTDv02_decompress(dst, dstCapacity, src, compressedSize);
 		case ZSTDv03_magicNumber :
-			return ZSTDv03_decompress(dst, maxOriginalSize, src, compressedSize);
+			return ZSTDv03_decompress(dst, dstCapacity, src, compressedSize);
 		case ZSTDv04_magicNumber :
-			return ZSTDv04_decompress(dst, maxOriginalSize, src, compressedSize);
+			return ZSTDv04_decompress(dst, dstCapacity, src, compressedSize);
+		case ZSTDv05_MAGICNUMBER :
+			return ZSTDv05_decompress(dst, dstCapacity, src, compressedSize);
 		default :
 		    return ERROR(prefix_unknown);
 	}
diff --git a/lib/legacy/zstd_v04.h b/lib/legacy/zstd_v04.h
index a6129823..9279ebf9 100644
--- a/lib/legacy/zstd_v04.h
+++ b/lib/legacy/zstd_v04.h
@@ -95,7 +95,7 @@ size_t ZSTDv04_decompressContinue(ZSTDv04_Dctx* dctx, void* dst, size_t maxDstSi
 ***************************************/
 typedef struct ZBUFFv04_DCtx_s ZBUFFv04_DCtx;
 ZBUFFv04_DCtx* ZBUFFv04_createDCtx(void);
-size_t      ZBUFFv04_freeDCtx(ZBUFFv04_DCtx* dctx);
+size_t         ZBUFFv04_freeDCtx(ZBUFFv04_DCtx* dctx);
 
 size_t ZBUFFv04_decompressInit(ZBUFFv04_DCtx* dctx);
 size_t ZBUFFv04_decompressWithDictionary(ZBUFFv04_DCtx* dctx, const void* dict, size_t dictSize);
diff --git a/lib/legacy/zstd_v05.c b/lib/legacy/zstd_v05.c
new file mode 100644
index 00000000..d5072479
--- /dev/null
+++ b/lib/legacy/zstd_v05.c
@@ -0,0 +1,4726 @@
+/* ******************************************************************
+   zstd_v05.c
+   Decompression module for ZSTD v0.5 legacy format
+   Copyright (C) 2016, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - Homepage : http://www.zstd.net/
+****************************************************************** */
+
+/*- Dependencies -*/
+#include "zstd_v05.h"
+
+
+/* ******************************************************************
+   mem.h
+   low-level memory access routines
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - FSEv05 source repository : https://github.com/Cyan4973/FiniteStateEntropy
+    - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#ifndef MEM_H_MODULE
+#define MEM_H_MODULE
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/*-****************************************
+*  Dependencies
+******************************************/
+#include <stddef.h>    /* size_t, ptrdiff_t */
+#include <string.h>    /* memcpy */
+
+
+/*-****************************************
+*  Compiler specifics
+******************************************/
+#if defined(__GNUC__)
+#  define MEM_STATIC static __attribute__((unused))
+#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+#  define MEM_STATIC static inline
+#elif defined(_MSC_VER)
+#  define MEM_STATIC static __inline
+#else
+#  define MEM_STATIC static  /* this version may generate warnings for unused static functions; disable the relevant warning */
+#endif
+
+
+/*-**************************************************************
+*  Basic Types
+*****************************************************************/
+#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+# include <stdint.h>
+  typedef  uint8_t BYTE;
+  typedef uint16_t U16;
+  typedef  int16_t S16;
+  typedef uint32_t U32;
+  typedef  int32_t S32;
+  typedef uint64_t U64;
+  typedef  int64_t S64;
+#else
+  typedef unsigned char       BYTE;
+  typedef unsigned short      U16;
+  typedef   signed short      S16;
+  typedef unsigned int        U32;
+  typedef   signed int        S32;
+  typedef unsigned long long  U64;
+  typedef   signed long long  S64;
+#endif
+
+
+/*-**************************************************************
+*  Memory I/O
+*****************************************************************/
+/* MEM_FORCE_MEMORY_ACCESS :
+ * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
+ * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
+ * The below switch allow to select different access method for improved performance.
+ * Method 0 (default) : use `memcpy()`. Safe and portable.
+ * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
+ *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
+ * Method 2 : direct access. This method is portable but violate C standard.
+ *            It can generate buggy code on targets depending on alignment.
+ *            In some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
+ * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
+ * Prefer these methods in priority order (0 > 1 > 2)
+ */
+#ifndef MEM_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
+#  if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
+#    define MEM_FORCE_MEMORY_ACCESS 2
+#  elif defined(__INTEL_COMPILER) || \
+  (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) ))
+#    define MEM_FORCE_MEMORY_ACCESS 1
+#  endif
+#endif
+
+MEM_STATIC unsigned MEM_32bits(void) { return sizeof(void*)==4; }
+MEM_STATIC unsigned MEM_64bits(void) { return sizeof(void*)==8; }
+
+MEM_STATIC unsigned MEM_isLittleEndian(void)
+{
+    const union { U32 u; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental  */
+    return one.c[0];
+}
+
+#if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==2)
+
+/* violates C standard, by lying on structure alignment.
+Only use if no other choice to achieve best performance on target platform */
+MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; }
+MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; }
+MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; }
+
+MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; }
+MEM_STATIC void MEM_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; }
+MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(U64*)memPtr = value; }
+
+#elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1)
+
+/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
+/* currently only defined for gcc and icc */
+typedef union { U16 u16; U32 u32; U64 u64; size_t st; } __attribute__((packed)) unalign;
+
+MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign*)ptr)->u16; }
+MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
+MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign*)ptr)->u64; }
+
+MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; }
+MEM_STATIC void MEM_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; }
+MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign*)memPtr)->u64 = value; }
+
+#else
+
+/* default method, safe and standard.
+   can sometimes prove slower */
+
+MEM_STATIC U16 MEM_read16(const void* memPtr)
+{
+    U16 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+MEM_STATIC U32 MEM_read32(const void* memPtr)
+{
+    U32 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+MEM_STATIC U64 MEM_read64(const void* memPtr)
+{
+    U64 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+MEM_STATIC void MEM_write16(void* memPtr, U16 value)
+{
+    memcpy(memPtr, &value, sizeof(value));
+}
+
+MEM_STATIC void MEM_write32(void* memPtr, U32 value)
+{
+    memcpy(memPtr, &value, sizeof(value));
+}
+
+MEM_STATIC void MEM_write64(void* memPtr, U64 value)
+{
+    memcpy(memPtr, &value, sizeof(value));
+}
+
+#endif /* MEM_FORCE_MEMORY_ACCESS */
+
+
+MEM_STATIC U16 MEM_readLE16(const void* memPtr)
+{
+    if (MEM_isLittleEndian())
+        return MEM_read16(memPtr);
+    else {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U16)(p[0] + (p[1]<<8));
+    }
+}
+
+MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val)
+{
+    if (MEM_isLittleEndian()) {
+        MEM_write16(memPtr, val);
+    } else {
+        BYTE* p = (BYTE*)memPtr;
+        p[0] = (BYTE)val;
+        p[1] = (BYTE)(val>>8);
+    }
+}
+
+MEM_STATIC U32 MEM_readLE32(const void* memPtr)
+{
+    if (MEM_isLittleEndian())
+        return MEM_read32(memPtr);
+    else {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24));
+    }
+}
+
+MEM_STATIC void MEM_writeLE32(void* memPtr, U32 val32)
+{
+    if (MEM_isLittleEndian()) {
+        MEM_write32(memPtr, val32);
+    } else {
+        BYTE* p = (BYTE*)memPtr;
+        p[0] = (BYTE)val32;
+        p[1] = (BYTE)(val32>>8);
+        p[2] = (BYTE)(val32>>16);
+        p[3] = (BYTE)(val32>>24);
+    }
+}
+
+MEM_STATIC U64 MEM_readLE64(const void* memPtr)
+{
+    if (MEM_isLittleEndian())
+        return MEM_read64(memPtr);
+    else {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U64)((U64)p[0] + ((U64)p[1]<<8) + ((U64)p[2]<<16) + ((U64)p[3]<<24)
+                     + ((U64)p[4]<<32) + ((U64)p[5]<<40) + ((U64)p[6]<<48) + ((U64)p[7]<<56));
+    }
+}
+
+MEM_STATIC void MEM_writeLE64(void* memPtr, U64 val64)
+{
+    if (MEM_isLittleEndian()) {
+        MEM_write64(memPtr, val64);
+    } else {
+        BYTE* p = (BYTE*)memPtr;
+        p[0] = (BYTE)val64;
+        p[1] = (BYTE)(val64>>8);
+        p[2] = (BYTE)(val64>>16);
+        p[3] = (BYTE)(val64>>24);
+        p[4] = (BYTE)(val64>>32);
+        p[5] = (BYTE)(val64>>40);
+        p[6] = (BYTE)(val64>>48);
+        p[7] = (BYTE)(val64>>56);
+    }
+}
+
+MEM_STATIC size_t MEM_readLEST(const void* memPtr)
+{
+    if (MEM_32bits())
+        return (size_t)MEM_readLE32(memPtr);
+    else
+        return (size_t)MEM_readLE64(memPtr);
+}
+
+MEM_STATIC void MEM_writeLEST(void* memPtr, size_t val)
+{
+    if (MEM_32bits())
+        MEM_writeLE32(memPtr, (U32)val);
+    else
+        MEM_writeLE64(memPtr, (U64)val);
+}
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* MEM_H_MODULE */
+
+/* ******************************************************************
+   Error codes list
+   Copyright (C) 2016, Yann Collet
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/zstd
+****************************************************************** */
+#ifndef ERROR_PUBLIC_H_MODULE
+#define ERROR_PUBLIC_H_MODULE
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/* ****************************************
+*  error codes list
+******************************************/
+typedef enum {
+  ZSTDv05_error_no_error,
+  ZSTDv05_error_GENERIC,
+  ZSTDv05_error_prefix_unknown,
+  ZSTDv05_error_frameParameter_unsupported,
+  ZSTDv05_error_frameParameter_unsupportedBy32bits,
+  ZSTDv05_error_init_missing,
+  ZSTDv05_error_memory_allocation,
+  ZSTDv05_error_stage_wrong,
+  ZSTDv05_error_dstSize_tooSmall,
+  ZSTDv05_error_srcSize_wrong,
+  ZSTDv05_error_corruption_detected,
+  ZSTDv05_error_tableLog_tooLarge,
+  ZSTDv05_error_maxSymbolValue_tooLarge,
+  ZSTDv05_error_maxSymbolValue_tooSmall,
+  ZSTDv05_error_dictionary_corrupted,
+  ZSTDv05_error_maxCode
+} ZSTDv05_ErrorCode;
+
+/* note : functions provide error codes in reverse negative order,
+          so compare with (size_t)(0-enum) */
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* ERROR_PUBLIC_H_MODULE */
+
+
+/*
+    zstd - standard compression library
+    Header File for static linking only
+    Copyright (C) 2014-2016, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd homepage : http://www.zstd.net
+*/
+#ifndef ZSTD_STATIC_H
+#define ZSTD_STATIC_H
+
+/* The prototypes defined within this file are considered experimental.
+ * They should not be used in the context DLL as they may change in the future.
+ * Prefer static linking if you need them, to control breaking version changes issues.
+ */
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+
+/*-*************************************
+*  Types
+***************************************/
+#define ZSTDv05_WINDOWLOG_ABSOLUTEMIN 11
+
+/* from faster to stronger */
+typedef enum { ZSTDv05_fast, ZSTDv05_greedy, ZSTDv05_lazy, ZSTDv05_lazy2, ZSTDv05_btlazy2, ZSTDv05_opt, ZSTDv05_btopt } ZSTDv05_strategy;
+
+typedef struct
+{
+    U64 srcSize;       /* optional : tells how much bytes are present in the frame. Use 0 if not known. */
+    U32 windowLog;     /* largest match distance : larger == more compression, more memory needed during decompression */
+    U32 contentLog;    /* full search segment : larger == more compression, slower, more memory (useless for fast) */
+    U32 hashLog;       /* dispatch table : larger == faster, more memory */
+    U32 searchLog;     /* nb of searches : larger == more compression, slower */
+    U32 searchLength;  /* match length searched : larger == faster decompression, sometimes less compression */
+    U32 targetLength;  /* acceptable match size for optimal parser (only) : larger == more compression, slower */
+    ZSTDv05_strategy strategy;
+} ZSTDv05_parameters;
+
+
+/*-*************************************
+*  Advanced functions
+***************************************/
+/*- Advanced Decompression functions -*/
+
+/*! ZSTDv05_decompress_usingPreparedDCtx() :
+*   Same as ZSTDv05_decompress_usingDict, but using a reference context `preparedDCtx`, where dictionary has been loaded.
+*   It avoids reloading the dictionary each time.
+*   `preparedDCtx` must have been properly initialized using ZSTDv05_decompressBegin_usingDict().
+*   Requires 2 contexts : 1 for reference, which will not be modified, and 1 to run the decompression operation */
+size_t ZSTDv05_decompress_usingPreparedDCtx(
+                                             ZSTDv05_DCtx* dctx, const ZSTDv05_DCtx* preparedDCtx,
+                                             void* dst, size_t dstCapacity,
+                                       const void* src, size_t srcSize);
+
+
+/* **************************************
+*  Streaming functions (direct mode)
+****************************************/
+size_t ZSTDv05_decompressBegin(ZSTDv05_DCtx* dctx);
+size_t ZSTDv05_decompressBegin_usingDict(ZSTDv05_DCtx* dctx, const void* dict, size_t dictSize);
+void   ZSTDv05_copyDCtx(ZSTDv05_DCtx* dctx, const ZSTDv05_DCtx* preparedDCtx);
+
+size_t ZSTDv05_getFrameParams(ZSTDv05_parameters* params, const void* src, size_t srcSize);
+
+size_t ZSTDv05_nextSrcSizeToDecompress(ZSTDv05_DCtx* dctx);
+size_t ZSTDv05_decompressContinue(ZSTDv05_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+
+/*
+  Streaming decompression, direct mode (bufferless)
+
+  A ZSTDv05_DCtx object is required to track streaming operations.
+  Use ZSTDv05_createDCtx() / ZSTDv05_freeDCtx() to manage it.
+  A ZSTDv05_DCtx object can be re-used multiple times.
+
+  First typical operation is to retrieve frame parameters, using ZSTDv05_getFrameParams().
+  This operation is independent, and just needs enough input data to properly decode the frame header.
+  Objective is to retrieve *params.windowlog, to know minimum amount of memory required during decoding.
+  Result : 0 when successful, it means the ZSTDv05_parameters structure has been filled.
+           >0 : means there is not enough data into src. Provides the expected size to successfully decode header.
+           errorCode, which can be tested using ZSTDv05_isError()
+
+  Start decompression, with ZSTDv05_decompressBegin() or ZSTDv05_decompressBegin_usingDict()
+  Alternatively, you can copy a prepared context, using ZSTDv05_copyDCtx()
+
+  Then use ZSTDv05_nextSrcSizeToDecompress() and ZSTDv05_decompressContinue() alternatively.
+  ZSTDv05_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTDv05_decompressContinue().
+  ZSTDv05_decompressContinue() requires this exact amount of bytes, or it will fail.
+  ZSTDv05_decompressContinue() needs previous data blocks during decompression, up to (1 << windowlog).
+  They should preferably be located contiguously, prior to current block. Alternatively, a round buffer is also possible.
+
+  @result of ZSTDv05_decompressContinue() is the number of bytes regenerated within 'dst'.
+  It can be zero, which is not an error; it just means ZSTDv05_decompressContinue() has decoded some header.
+
+  A frame is fully decoded when ZSTDv05_nextSrcSizeToDecompress() returns zero.
+  Context can then be reset to start a new decompression.
+*/
+
+
+/* **************************************
+*  Block functions
+****************************************/
+/*! Block functions produce and decode raw zstd blocks, without frame metadata.
+    User will have to take in charge required information to regenerate data, such as block sizes.
+
+    A few rules to respect :
+    - Uncompressed block size must be <= 128 KB
+    - Compressing or decompressing requires a context structure
+      + Use ZSTDv05_createCCtx() and ZSTDv05_createDCtx()
+    - It is necessary to init context before starting
+      + compression : ZSTDv05_compressBegin()
+      + decompression : ZSTDv05_decompressBegin()
+      + variants _usingDict() are also allowed
+      + copyCCtx() and copyDCtx() work too
+    - When a block is considered not compressible enough, ZSTDv05_compressBlock() result will be zero.
+      In which case, nothing is produced into `dst`.
+      + User must test for such outcome and deal directly with uncompressed data
+      + ZSTDv05_decompressBlock() doesn't accept uncompressed data as input !!
+*/
+
+size_t ZSTDv05_decompressBlock(ZSTDv05_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+
+
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif  /* ZSTDv05_STATIC_H */
+
+
+
+/* ******************************************************************
+   Error codes and messages
+   Copyright (C) 2013-2016, Yann Collet
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/zstd
+****************************************************************** */
+/* Note : this module is expected to remain private, do not expose it */
+
+#ifndef ERROR_H_MODULE
+#define ERROR_H_MODULE
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+
+/* ****************************************
+*  Compiler-specific
+******************************************/
+#if defined(__GNUC__)
+#  define ERR_STATIC static __attribute__((unused))
+#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+#  define ERR_STATIC static inline
+#elif defined(_MSC_VER)
+#  define ERR_STATIC static __inline
+#else
+#  define ERR_STATIC static  /* this version may generate warnings for unused static functions; disable the relevant warning */
+#endif
+
+
+/*-****************************************
+*  Customization
+******************************************/
+typedef ZSTDv05_ErrorCode ERR_enum;
+#define PREFIX(name) ZSTDv05_error_##name
+
+
+/*-****************************************
+*  Error codes handling
+******************************************/
+#ifdef ERROR
+#  undef ERROR   /* reported already defined on VS 2015 (Rich Geldreich) */
+#endif
+#define ERROR(name) (size_t)-PREFIX(name)
+
+ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); }
+
+ERR_STATIC ERR_enum ERR_getError(size_t code) { if (!ERR_isError(code)) return (ERR_enum)0; return (ERR_enum) (0-code); }
+
+
+/*-****************************************
+*  Error Strings
+******************************************/
+
+ERR_STATIC const char* ERR_getErrorName(size_t code)
+{
+    static const char* notErrorCode = "Unspecified error code";
+    switch( ERR_getError(code) )
+    {
+    case PREFIX(no_error): return "No error detected";
+    case PREFIX(GENERIC):  return "Error (generic)";
+    case PREFIX(prefix_unknown): return "Unknown frame descriptor";
+    case PREFIX(frameParameter_unsupported): return "Unsupported frame parameter";
+    case PREFIX(frameParameter_unsupportedBy32bits): return "Frame parameter unsupported in 32-bits mode";
+    case PREFIX(init_missing): return "Context should be init first";
+    case PREFIX(memory_allocation): return "Allocation error : not enough memory";
+    case PREFIX(stage_wrong): return "Operation not authorized at current processing stage";
+    case PREFIX(dstSize_tooSmall): return "Destination buffer is too small";
+    case PREFIX(srcSize_wrong): return "Src size incorrect";
+    case PREFIX(corruption_detected): return "Corrupted block detected";
+    case PREFIX(tableLog_tooLarge): return "tableLog requires too much memory";
+    case PREFIX(maxSymbolValue_tooLarge): return "Unsupported max possible Symbol Value : too large";
+    case PREFIX(maxSymbolValue_tooSmall): return "Specified maxSymbolValue is too small";
+    case PREFIX(dictionary_corrupted): return "Dictionary is corrupted";
+    case PREFIX(maxCode):
+    default: return notErrorCode;   /* should be impossible, due to ERR_getError() */
+    }
+}
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* ERROR_H_MODULE */
+/*
+    zstd_internal - common functions to include
+    Header File for include
+    Copyright (C) 2014-2016, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+*/
+#ifndef ZSTD_CCOMMON_H_MODULE
+#define ZSTD_CCOMMON_H_MODULE
+
+
+
+/*-*************************************
+*  Common macros
+***************************************/
+#define MIN(a,b) ((a)<(b) ? (a) : (b))
+#define MAX(a,b) ((a)>(b) ? (a) : (b))
+
+
+/*-*************************************
+*  Common constants
+***************************************/
+#define ZSTDv05_DICT_MAGIC  0xEC30A435
+
+#define KB *(1 <<10)
+#define MB *(1 <<20)
+#define GB *(1U<<30)
+
+#define BLOCKSIZE (128 KB)                 /* define, for static allocation */
+
+static const size_t ZSTDv05_blockHeaderSize = 3;
+static const size_t ZSTDv05_frameHeaderSize_min = 5;
+#define ZSTDv05_frameHeaderSize_max 5         /* define, for static allocation */
+
+#define BITv057 128
+#define BITv056  64
+#define BITv055  32
+#define BITv054  16
+#define BITv051   2
+#define BITv050   1
+
+#define IS_HUFv05 0
+#define IS_PCH 1
+#define IS_RAW 2
+#define IS_RLE 3
+
+#define MINMATCH 4
+#define REPCODE_STARTVALUE 1
+
+#define Litbits  8
+#define MLbits   7
+#define LLbits   6
+#define Offbits  5
+#define MaxLit ((1<<Litbits) - 1)
+#define MaxML  ((1<<MLbits) - 1)
+#define MaxLL  ((1<<LLbits) - 1)
+#define MaxOff ((1<<Offbits)- 1)
+#define MLFSEv05Log   10
+#define LLFSEv05Log   10
+#define OffFSEv05Log   9
+#define MaxSeq MAX(MaxLL, MaxML)
+
+#define FSEv05_ENCODING_RAW     0
+#define FSEv05_ENCODING_RLE     1
+#define FSEv05_ENCODING_STATIC  2
+#define FSEv05_ENCODING_DYNAMIC 3
+
+
+#define HufLog 12
+
+#define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */
+#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */)   /* for a non-null block */
+
+#define WILDCOPY_OVERLENGTH 8
+
+typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t;
+
+
+/*-*******************************************
+*  Shared functions to include for inlining
+*********************************************/
+static void ZSTDv05_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
+
+#define COPY8(d,s) { ZSTDv05_copy8(d,s); d+=8; s+=8; }
+
+/*! ZSTDv05_wildcopy() :
+*   custom version of memcpy(), can copy up to 7 bytes too many (8 bytes if length==0) */
+MEM_STATIC void ZSTDv05_wildcopy(void* dst, const void* src, size_t length)
+{
+    const BYTE* ip = (const BYTE*)src;
+    BYTE* op = (BYTE*)dst;
+    BYTE* const oend = op + length;
+    do
+        COPY8(op, ip)
+    while (op < oend);
+}
+
+MEM_STATIC unsigned ZSTDv05_highbit(U32 val)
+{
+#   if defined(_MSC_VER)   /* Visual */
+    unsigned long r=0;
+    _BitScanReverse(&r, val);
+    return (unsigned)r;
+#   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* GCC Intrinsic */
+    return 31 - __builtin_clz(val);
+#   else   /* Software version */
+    static const int DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
+    U32 v = val;
+    int r;
+    v |= v >> 1;
+    v |= v >> 2;
+    v |= v >> 4;
+    v |= v >> 8;
+    v |= v >> 16;
+    r = DeBruijnClz[(U32)(v * 0x07C4ACDDU) >> 27];
+    return r;
+#   endif
+}
+
+
+/*-*******************************************
+*  Private interfaces
+*********************************************/
+typedef struct {
+    void* buffer;
+    U32*  offsetStart;
+    U32*  offset;
+    BYTE* offCodeStart;
+    BYTE* offCode;
+    BYTE* litStart;
+    BYTE* lit;
+    BYTE* litLengthStart;
+    BYTE* litLength;
+    BYTE* matchLengthStart;
+    BYTE* matchLength;
+    BYTE* dumpsStart;
+    BYTE* dumps;
+    /* opt */
+    U32* matchLengthFreq;
+    U32* litLengthFreq;
+    U32* litFreq;
+    U32* offCodeFreq;
+    U32  matchLengthSum;
+    U32  litLengthSum;
+    U32  litSum;
+    U32  offCodeSum;
+} seqStore_t;
+
+
+
+#endif   /* ZSTDv05_CCOMMON_H_MODULE */
+/* ******************************************************************
+   FSEv05 : Finite State Entropy coder
+   header file
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#ifndef FSEv05_H
+#define FSEv05_H
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/* *****************************************
+*  Includes
+******************************************/
+#include <stddef.h>    /* size_t, ptrdiff_t */
+
+
+/*-****************************************
+*  FSEv05 simple functions
+******************************************/
+size_t FSEv05_decompress(void* dst,  size_t maxDstSize,
+                const void* cSrc, size_t cSrcSize);
+/*!
+FSEv05_decompress():
+    Decompress FSEv05 data from buffer 'cSrc', of size 'cSrcSize',
+    into already allocated destination buffer 'dst', of size 'maxDstSize'.
+    return : size of regenerated data (<= maxDstSize)
+             or an error code, which can be tested using FSEv05_isError()
+
+    ** Important ** : FSEv05_decompress() doesn't decompress non-compressible nor RLE data !!!
+    Why ? : making this distinction requires a header.
+    Header management is intentionally delegated to the user layer, which can better manage special cases.
+*/
+
+
+/* *****************************************
+*  Tool functions
+******************************************/
+/* Error Management */
+unsigned    FSEv05_isError(size_t code);        /* tells if a return value is an error code */
+const char* FSEv05_getErrorName(size_t code);   /* provides error code string (useful for debugging) */
+
+
+
+
+/* *****************************************
+*  FSEv05 detailed API
+******************************************/
+/* *** DECOMPRESSION *** */
+
+/*!
+FSEv05_readNCount():
+   Read compactly saved 'normalizedCounter' from 'rBuffer'.
+   return : size read from 'rBuffer'
+            or an errorCode, which can be tested using FSEv05_isError()
+            maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */
+size_t FSEv05_readNCount (short* normalizedCounter, unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, const void* rBuffer, size_t rBuffSize);
+
+/*!
+Constructor and Destructor of type FSEv05_DTable
+    Note that its size depends on 'tableLog' */
+typedef unsigned FSEv05_DTable;   /* don't allocate that. It's just a way to be more restrictive than void* */
+FSEv05_DTable* FSEv05_createDTable(unsigned tableLog);
+void        FSEv05_freeDTable(FSEv05_DTable* dt);
+
+/*!
+FSEv05_buildDTable():
+   Builds 'dt', which must be already allocated, using FSEv05_createDTable()
+   return : 0,
+            or an errorCode, which can be tested using FSEv05_isError() */
+size_t FSEv05_buildDTable (FSEv05_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
+
+/*!
+FSEv05_decompress_usingDTable():
+   Decompress compressed source @cSrc of size @cSrcSize using @dt
+   into @dst which must be already allocated.
+   return : size of regenerated data (necessarily <= @dstCapacity)
+            or an errorCode, which can be tested using FSEv05_isError() */
+size_t FSEv05_decompress_usingDTable(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, const FSEv05_DTable* dt);
+
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif  /* FSEv05_H */
+/* ******************************************************************
+   bitstream
+   Part of FSEv05 library
+   header file (to include)
+   Copyright (C) 2013-2016, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+****************************************************************** */
+#ifndef BITv05STREAM_H_MODULE
+#define BITv05STREAM_H_MODULE
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/*
+*  This API consists of small unitary functions, which highly benefit from being inlined.
+*  Since link-time-optimization is not available for all compilers,
+*  these functions are defined into a .h to be included.
+*/
+
+
+
+/*-********************************************
+*  bitStream decoding API (read backward)
+**********************************************/
+typedef struct
+{
+    size_t   bitContainer;
+    unsigned bitsConsumed;
+    const char* ptr;
+    const char* start;
+} BITv05_DStream_t;
+
+typedef enum { BITv05_DStream_unfinished = 0,
+               BITv05_DStream_endOfBuffer = 1,
+               BITv05_DStream_completed = 2,
+               BITv05_DStream_overflow = 3 } BITv05_DStream_status;  /* result of BITv05_reloadDStream() */
+               /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */
+
+MEM_STATIC size_t   BITv05_initDStream(BITv05_DStream_t* bitD, const void* srcBuffer, size_t srcSize);
+MEM_STATIC size_t   BITv05_readBits(BITv05_DStream_t* bitD, unsigned nbBits);
+MEM_STATIC BITv05_DStream_status BITv05_reloadDStream(BITv05_DStream_t* bitD);
+MEM_STATIC unsigned BITv05_endOfDStream(const BITv05_DStream_t* bitD);
+
+
+/*!
+* Start by invoking BITv05_initDStream().
+* A chunk of the bitStream is then stored into a local register.
+* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
+* You can then retrieve bitFields stored into the local register, **in reverse order**.
+* Local register is explicitly reloaded from memory by the BITv05_reloadDStream() method.
+* A reload guarantee a minimum of ((8*sizeof(size_t))-7) bits when its result is BITv05_DStream_unfinished.
+* Otherwise, it can be less than that, so proceed accordingly.
+* Checking if DStream has reached its end can be performed with BITv05_endOfDStream()
+*/
+
+
+/*-****************************************
+*  unsafe API
+******************************************/
+MEM_STATIC size_t BITv05_readBitsFast(BITv05_DStream_t* bitD, unsigned nbBits);
+/* faster, but works only if nbBits >= 1 */
+
+
+
+/*-**************************************************************
+*  Helper functions
+****************************************************************/
+MEM_STATIC unsigned BITv05_highbit32 (register U32 val)
+{
+#   if defined(_MSC_VER)   /* Visual */
+    unsigned long r=0;
+    _BitScanReverse ( &r, val );
+    return (unsigned) r;
+#   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* Use GCC Intrinsic */
+    return 31 - __builtin_clz (val);
+#   else   /* Software version */
+    static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
+    U32 v = val;
+    unsigned r;
+    v |= v >> 1;
+    v |= v >> 2;
+    v |= v >> 4;
+    v |= v >> 8;
+    v |= v >> 16;
+    r = DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
+    return r;
+#   endif
+}
+
+
+
+/*-********************************************************
+* bitStream decoding
+**********************************************************/
+/*!BITv05_initDStream
+*  Initialize a BITv05_DStream_t.
+*  @bitD : a pointer to an already allocated BITv05_DStream_t structure
+*  @srcBuffer must point at the beginning of a bitStream
+*  @srcSize must be the exact size of the bitStream
+*  @result : size of stream (== srcSize) or an errorCode if a problem is detected
+*/
+MEM_STATIC size_t BITv05_initDStream(BITv05_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
+{
+    if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
+
+    if (srcSize >=  sizeof(size_t)) {  /* normal case */
+        U32 contain32;
+        bitD->start = (const char*)srcBuffer;
+        bitD->ptr   = (const char*)srcBuffer + srcSize - sizeof(size_t);
+        bitD->bitContainer = MEM_readLEST(bitD->ptr);
+        contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
+        if (contain32 == 0) return ERROR(GENERIC);   /* endMark not present */
+        bitD->bitsConsumed = 8 - BITv05_highbit32(contain32);
+    } else {
+        U32 contain32;
+        bitD->start = (const char*)srcBuffer;
+        bitD->ptr   = bitD->start;
+        bitD->bitContainer = *(const BYTE*)(bitD->start);
+        switch(srcSize)
+        {
+            case 7: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[6]) << (sizeof(size_t)*8 - 16);
+            case 6: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[5]) << (sizeof(size_t)*8 - 24);
+            case 5: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[4]) << (sizeof(size_t)*8 - 32);
+            case 4: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[3]) << 24;
+            case 3: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[2]) << 16;
+            case 2: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[1]) <<  8;
+            default:;
+        }
+        contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
+        if (contain32 == 0) return ERROR(GENERIC);   /* endMark not present */
+        bitD->bitsConsumed = 8 - BITv05_highbit32(contain32);
+        bitD->bitsConsumed += (U32)(sizeof(size_t) - srcSize)*8;
+    }
+
+    return srcSize;
+}
+
+/*!BITv05_lookBits
+ * Provides next n bits from local register
+ * local register is not modified (bits are still present for next read/look)
+ * On 32-bits, maxNbBits==25
+ * On 64-bits, maxNbBits==57
+ * @return : value extracted
+ */
+MEM_STATIC size_t BITv05_lookBits(BITv05_DStream_t* bitD, U32 nbBits)
+{
+    const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1;
+    return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask);
+}
+
+/*! BITv05_lookBitsFast :
+*   unsafe version; only works only if nbBits >= 1 */
+MEM_STATIC size_t BITv05_lookBitsFast(BITv05_DStream_t* bitD, U32 nbBits)
+{
+    const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1;
+    return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask+1)-nbBits) & bitMask);
+}
+
+MEM_STATIC void BITv05_skipBits(BITv05_DStream_t* bitD, U32 nbBits)
+{
+    bitD->bitsConsumed += nbBits;
+}
+
+/*!BITv05_readBits
+ * Read next n bits from local register.
+ * pay attention to not read more than nbBits contained into local register.
+ * @return : extracted value.
+ */
+MEM_STATIC size_t BITv05_readBits(BITv05_DStream_t* bitD, U32 nbBits)
+{
+    size_t value = BITv05_lookBits(bitD, nbBits);
+    BITv05_skipBits(bitD, nbBits);
+    return value;
+}
+
+/*!BITv05_readBitsFast :
+*  unsafe version; only works only if nbBits >= 1 */
+MEM_STATIC size_t BITv05_readBitsFast(BITv05_DStream_t* bitD, U32 nbBits)
+{
+    size_t value = BITv05_lookBitsFast(bitD, nbBits);
+    BITv05_skipBits(bitD, nbBits);
+    return value;
+}
+
+MEM_STATIC BITv05_DStream_status BITv05_reloadDStream(BITv05_DStream_t* bitD)
+{
+	if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))  /* should never happen */
+		return BITv05_DStream_overflow;
+
+    if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer)) {
+        bitD->ptr -= bitD->bitsConsumed >> 3;
+        bitD->bitsConsumed &= 7;
+        bitD->bitContainer = MEM_readLEST(bitD->ptr);
+        return BITv05_DStream_unfinished;
+    }
+    if (bitD->ptr == bitD->start) {
+        if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BITv05_DStream_endOfBuffer;
+        return BITv05_DStream_completed;
+    }
+    {
+        U32 nbBytes = bitD->bitsConsumed >> 3;
+        BITv05_DStream_status result = BITv05_DStream_unfinished;
+        if (bitD->ptr - nbBytes < bitD->start) {
+            nbBytes = (U32)(bitD->ptr - bitD->start);  /* ptr > start */
+            result = BITv05_DStream_endOfBuffer;
+        }
+        bitD->ptr -= nbBytes;
+        bitD->bitsConsumed -= nbBytes*8;
+        bitD->bitContainer = MEM_readLEST(bitD->ptr);   /* reminder : srcSize > sizeof(bitD) */
+        return result;
+    }
+}
+
+/*! BITv05_endOfDStream
+*   @return Tells if DStream has reached its exact end
+*/
+MEM_STATIC unsigned BITv05_endOfDStream(const BITv05_DStream_t* DStream)
+{
+    return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8));
+}
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* BITv05STREAM_H_MODULE */
+/* ******************************************************************
+   FSEv05 : Finite State Entropy coder
+   header file for static linking (only)
+   Copyright (C) 2013-2015, Yann Collet
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#ifndef FSEv05_STATIC_H
+#define FSEv05_STATIC_H
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+
+/* *****************************************
+*  Static allocation
+*******************************************/
+/* It is possible to statically allocate FSEv05 CTable/DTable as a table of unsigned using below macros */
+#define FSEv05_DTABLE_SIZE_U32(maxTableLog)                   (1 + (1<<maxTableLog))
+
+
+/* *****************************************
+*  FSEv05 advanced API
+*******************************************/
+size_t FSEv05_buildDTable_raw (FSEv05_DTable* dt, unsigned nbBits);
+/* build a fake FSEv05_DTable, designed to read an uncompressed bitstream where each symbol uses nbBits */
+
+size_t FSEv05_buildDTable_rle (FSEv05_DTable* dt, unsigned char symbolValue);
+/* build a fake FSEv05_DTable, designed to always generate the same symbolValue */
+
+
+
+/* *****************************************
+*  FSEv05 symbol decompression API
+*******************************************/
+typedef struct
+{
+    size_t      state;
+    const void* table;   /* precise table may vary, depending on U16 */
+} FSEv05_DState_t;
+
+
+static void     FSEv05_initDState(FSEv05_DState_t* DStatePtr, BITv05_DStream_t* bitD, const FSEv05_DTable* dt);
+
+static unsigned char FSEv05_decodeSymbol(FSEv05_DState_t* DStatePtr, BITv05_DStream_t* bitD);
+
+static unsigned FSEv05_endOfDState(const FSEv05_DState_t* DStatePtr);
+
+/*!
+Let's now decompose FSEv05_decompress_usingDTable() into its unitary components.
+You will decode FSEv05-encoded symbols from the bitStream,
+and also any other bitFields you put in, **in reverse order**.
+
+You will need a few variables to track your bitStream. They are :
+
+BITv05_DStream_t DStream;    // Stream context
+FSEv05_DState_t  DState;     // State context. Multiple ones are possible
+FSEv05_DTable*   DTablePtr;  // Decoding table, provided by FSEv05_buildDTable()
+
+The first thing to do is to init the bitStream.
+    errorCode = BITv05_initDStream(&DStream, srcBuffer, srcSize);
+
+You should then retrieve your initial state(s)
+(in reverse flushing order if you have several ones) :
+    errorCode = FSEv05_initDState(&DState, &DStream, DTablePtr);
+
+You can then decode your data, symbol after symbol.
+For information the maximum number of bits read by FSEv05_decodeSymbol() is 'tableLog'.
+Keep in mind that symbols are decoded in reverse order, like a LIFO stack (last in, first out).
+    unsigned char symbol = FSEv05_decodeSymbol(&DState, &DStream);
+
+You can retrieve any bitfield you eventually stored into the bitStream (in reverse order)
+Note : maximum allowed nbBits is 25, for 32-bits compatibility
+    size_t bitField = BITv05_readBits(&DStream, nbBits);
+
+All above operations only read from local register (which size depends on size_t).
+Refueling the register from memory is manually performed by the reload method.
+    endSignal = FSEv05_reloadDStream(&DStream);
+
+BITv05_reloadDStream() result tells if there is still some more data to read from DStream.
+BITv05_DStream_unfinished : there is still some data left into the DStream.
+BITv05_DStream_endOfBuffer : Dstream reached end of buffer. Its container may no longer be completely filled.
+BITv05_DStream_completed : Dstream reached its exact end, corresponding in general to decompression completed.
+BITv05_DStream_tooFar : Dstream went too far. Decompression result is corrupted.
+
+When reaching end of buffer (BITv05_DStream_endOfBuffer), progress slowly, notably if you decode multiple symbols per loop,
+to properly detect the exact end of stream.
+After each decoded symbol, check if DStream is fully consumed using this simple test :
+    BITv05_reloadDStream(&DStream) >= BITv05_DStream_completed
+
+When it's done, verify decompression is fully completed, by checking both DStream and the relevant states.
+Checking if DStream has reached its end is performed by :
+    BITv05_endOfDStream(&DStream);
+Check also the states. There might be some symbols left there, if some high probability ones (>50%) are possible.
+    FSEv05_endOfDState(&DState);
+*/
+
+
+/* *****************************************
+*  FSEv05 unsafe API
+*******************************************/
+static unsigned char FSEv05_decodeSymbolFast(FSEv05_DState_t* DStatePtr, BITv05_DStream_t* bitD);
+/* faster, but works only if nbBits is always >= 1 (otherwise, result will be corrupted) */
+
+
+/* *****************************************
+*  Implementation of inlined functions
+*******************************************/
+/* decompression */
+
+typedef struct {
+    U16 tableLog;
+    U16 fastMode;
+} FSEv05_DTableHeader;   /* sizeof U32 */
+
+typedef struct
+{
+    unsigned short newState;
+    unsigned char  symbol;
+    unsigned char  nbBits;
+} FSEv05_decode_t;   /* size == U32 */
+
+MEM_STATIC void FSEv05_initDState(FSEv05_DState_t* DStatePtr, BITv05_DStream_t* bitD, const FSEv05_DTable* dt)
+{
+    const void* ptr = dt;
+    const FSEv05_DTableHeader* const DTableH = (const FSEv05_DTableHeader*)ptr;
+    DStatePtr->state = BITv05_readBits(bitD, DTableH->tableLog);
+    BITv05_reloadDStream(bitD);
+    DStatePtr->table = dt + 1;
+}
+
+MEM_STATIC size_t FSEv05_getStateValue(FSEv05_DState_t* DStatePtr)
+{
+    return DStatePtr->state;
+}
+
+MEM_STATIC BYTE FSEv05_peakSymbol(FSEv05_DState_t* DStatePtr)
+{
+    const FSEv05_decode_t DInfo = ((const FSEv05_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    return DInfo.symbol;
+}
+
+MEM_STATIC BYTE FSEv05_decodeSymbol(FSEv05_DState_t* DStatePtr, BITv05_DStream_t* bitD)
+{
+    const FSEv05_decode_t DInfo = ((const FSEv05_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    const U32  nbBits = DInfo.nbBits;
+    BYTE symbol = DInfo.symbol;
+    size_t lowBits = BITv05_readBits(bitD, nbBits);
+
+    DStatePtr->state = DInfo.newState + lowBits;
+    return symbol;
+}
+
+MEM_STATIC BYTE FSEv05_decodeSymbolFast(FSEv05_DState_t* DStatePtr, BITv05_DStream_t* bitD)
+{
+    const FSEv05_decode_t DInfo = ((const FSEv05_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    const U32 nbBits = DInfo.nbBits;
+    BYTE symbol = DInfo.symbol;
+    size_t lowBits = BITv05_readBitsFast(bitD, nbBits);
+
+    DStatePtr->state = DInfo.newState + lowBits;
+    return symbol;
+}
+
+MEM_STATIC unsigned FSEv05_endOfDState(const FSEv05_DState_t* DStatePtr)
+{
+    return DStatePtr->state == 0;
+}
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif  /* FSEv05_STATIC_H */
+/* ******************************************************************
+   FSEv05 : Finite State Entropy coder
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - FSEv05 source repository : https://github.com/Cyan4973/FiniteStateEntropy
+    - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+
+#ifndef FSEv05_COMMONDEFS_ONLY
+
+/* **************************************************************
+*  Tuning parameters
+****************************************************************/
+/*!MEMORY_USAGE :
+*  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+*  Increasing memory usage improves compression ratio
+*  Reduced memory usage can improve speed, due to cache effect
+*  Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
+#define FSEv05_MAX_MEMORY_USAGE 14
+#define FSEv05_DEFAULT_MEMORY_USAGE 13
+
+/*!FSEv05_MAX_SYMBOL_VALUE :
+*  Maximum symbol value authorized.
+*  Required for proper stack allocation */
+#define FSEv05_MAX_SYMBOL_VALUE 255
+
+
+/* **************************************************************
+*  template functions type & suffix
+****************************************************************/
+#define FSEv05_FUNCTION_TYPE BYTE
+#define FSEv05_FUNCTION_EXTENSION
+#define FSEv05_DECODE_TYPE FSEv05_decode_t
+
+
+#endif   /* !FSEv05_COMMONDEFS_ONLY */
+
+/* **************************************************************
+*  Compiler specifics
+****************************************************************/
+#ifdef _MSC_VER    /* Visual Studio */
+#  define FORCE_INLINE static __forceinline
+#  include <intrin.h>                    /* For Visual 2005 */
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#  pragma warning(disable : 4214)        /* disable: C4214: non-int bitfields */
+#else
+#  ifdef __GNUC__
+#    define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+#    define FORCE_INLINE static inline __attribute__((always_inline))
+#  else
+#    define FORCE_INLINE static inline
+#  endif
+#endif
+
+
+/* **************************************************************
+*  Includes
+****************************************************************/
+#include <stdlib.h>     /* malloc, free, qsort */
+#include <string.h>     /* memcpy, memset */
+#include <stdio.h>      /* printf (debug) */
+
+
+
+/* ***************************************************************
+*  Constants
+*****************************************************************/
+#define FSEv05_MAX_TABLELOG  (FSEv05_MAX_MEMORY_USAGE-2)
+#define FSEv05_MAX_TABLESIZE (1U<<FSEv05_MAX_TABLELOG)
+#define FSEv05_MAXTABLESIZE_MASK (FSEv05_MAX_TABLESIZE-1)
+#define FSEv05_DEFAULT_TABLELOG (FSEv05_DEFAULT_MEMORY_USAGE-2)
+#define FSEv05_MIN_TABLELOG 5
+
+#define FSEv05_TABLELOG_ABSOLUTE_MAX 15
+#if FSEv05_MAX_TABLELOG > FSEv05_TABLELOG_ABSOLUTE_MAX
+#error "FSEv05_MAX_TABLELOG > FSEv05_TABLELOG_ABSOLUTE_MAX is not supported"
+#endif
+
+
+/* **************************************************************
+*  Error Management
+****************************************************************/
+#define FSEv05_STATIC_ASSERT(c) { enum { FSEv05_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
+
+
+/* **************************************************************
+*  Complex types
+****************************************************************/
+typedef U32 DTable_max_t[FSEv05_DTABLE_SIZE_U32(FSEv05_MAX_TABLELOG)];
+
+
+/* **************************************************************
+*  Templates
+****************************************************************/
+/*
+  designed to be included
+  for type-specific functions (template emulation in C)
+  Objective is to write these functions only once, for improved maintenance
+*/
+
+/* safety checks */
+#ifndef FSEv05_FUNCTION_EXTENSION
+#  error "FSEv05_FUNCTION_EXTENSION must be defined"
+#endif
+#ifndef FSEv05_FUNCTION_TYPE
+#  error "FSEv05_FUNCTION_TYPE must be defined"
+#endif
+
+/* Function names */
+#define FSEv05_CAT(X,Y) X##Y
+#define FSEv05_FUNCTION_NAME(X,Y) FSEv05_CAT(X,Y)
+#define FSEv05_TYPE_NAME(X,Y) FSEv05_CAT(X,Y)
+
+
+/* Function templates */
+static U32 FSEv05_tableStep(U32 tableSize) { return (tableSize>>1) + (tableSize>>3) + 3; }
+
+
+
+FSEv05_DTable* FSEv05_createDTable (unsigned tableLog)
+{
+    if (tableLog > FSEv05_TABLELOG_ABSOLUTE_MAX) tableLog = FSEv05_TABLELOG_ABSOLUTE_MAX;
+    return (FSEv05_DTable*)malloc( FSEv05_DTABLE_SIZE_U32(tableLog) * sizeof (U32) );
+}
+
+void FSEv05_freeDTable (FSEv05_DTable* dt)
+{
+    free(dt);
+}
+
+size_t FSEv05_buildDTable(FSEv05_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
+{
+    FSEv05_DTableHeader DTableH;
+    void* const tdPtr = dt+1;   /* because dt is unsigned, 32-bits aligned on 32-bits */
+    FSEv05_DECODE_TYPE* const tableDecode = (FSEv05_DECODE_TYPE*) (tdPtr);
+    const U32 tableSize = 1 << tableLog;
+    const U32 tableMask = tableSize-1;
+    const U32 step = FSEv05_tableStep(tableSize);
+    U16 symbolNext[FSEv05_MAX_SYMBOL_VALUE+1];
+    U32 position = 0;
+    U32 highThreshold = tableSize-1;
+    const S16 largeLimit= (S16)(1 << (tableLog-1));
+    U32 noLarge = 1;
+    U32 s;
+
+    /* Sanity Checks */
+    if (maxSymbolValue > FSEv05_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge);
+    if (tableLog > FSEv05_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
+
+    /* Init, lay down lowprob symbols */
+    DTableH.tableLog = (U16)tableLog;
+    for (s=0; s<=maxSymbolValue; s++) {
+        if (normalizedCounter[s]==-1) {
+            tableDecode[highThreshold--].symbol = (FSEv05_FUNCTION_TYPE)s;
+            symbolNext[s] = 1;
+        } else {
+            if (normalizedCounter[s] >= largeLimit) noLarge=0;
+            symbolNext[s] = normalizedCounter[s];
+    }   }
+
+    /* Spread symbols */
+    for (s=0; s<=maxSymbolValue; s++) {
+        int i;
+        for (i=0; i<normalizedCounter[s]; i++) {
+            tableDecode[position].symbol = (FSEv05_FUNCTION_TYPE)s;
+            position = (position + step) & tableMask;
+            while (position > highThreshold) position = (position + step) & tableMask;   /* lowprob area */
+    }   }
+
+    if (position!=0) return ERROR(GENERIC);   /* position must reach all cells once, otherwise normalizedCounter is incorrect */
+
+    /* Build Decoding table */
+    {
+        U32 i;
+        for (i=0; i<tableSize; i++) {
+            FSEv05_FUNCTION_TYPE symbol = (FSEv05_FUNCTION_TYPE)(tableDecode[i].symbol);
+            U16 nextState = symbolNext[symbol]++;
+            tableDecode[i].nbBits = (BYTE) (tableLog - BITv05_highbit32 ((U32)nextState) );
+            tableDecode[i].newState = (U16) ( (nextState << tableDecode[i].nbBits) - tableSize);
+    }   }
+
+    DTableH.fastMode = (U16)noLarge;
+    memcpy(dt, &DTableH, sizeof(DTableH));
+    return 0;
+}
+
+
+#ifndef FSEv05_COMMONDEFS_ONLY
+/*-****************************************
+*  FSEv05 helper functions
+******************************************/
+unsigned FSEv05_isError(size_t code) { return ERR_isError(code); }
+
+const char* FSEv05_getErrorName(size_t code) { return ERR_getErrorName(code); }
+
+
+/*-**************************************************************
+*  FSEv05 NCount encoding-decoding
+****************************************************************/
+static short FSEv05_abs(short a) { return a<0 ? -a : a; }
+
+
+size_t FSEv05_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
+                 const void* headerBuffer, size_t hbSize)
+{
+    const BYTE* const istart = (const BYTE*) headerBuffer;
+    const BYTE* const iend = istart + hbSize;
+    const BYTE* ip = istart;
+    int nbBits;
+    int remaining;
+    int threshold;
+    U32 bitStream;
+    int bitCount;
+    unsigned charnum = 0;
+    int previous0 = 0;
+
+    if (hbSize < 4) return ERROR(srcSize_wrong);
+    bitStream = MEM_readLE32(ip);
+    nbBits = (bitStream & 0xF) + FSEv05_MIN_TABLELOG;   /* extract tableLog */
+    if (nbBits > FSEv05_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge);
+    bitStream >>= 4;
+    bitCount = 4;
+    *tableLogPtr = nbBits;
+    remaining = (1<<nbBits)+1;
+    threshold = 1<<nbBits;
+    nbBits++;
+
+    while ((remaining>1) && (charnum<=*maxSVPtr)) {
+        if (previous0) {
+            unsigned n0 = charnum;
+            while ((bitStream & 0xFFFF) == 0xFFFF) {
+                n0+=24;
+                if (ip < iend-5) {
+                    ip+=2;
+                    bitStream = MEM_readLE32(ip) >> bitCount;
+                } else {
+                    bitStream >>= 16;
+                    bitCount+=16;
+            }   }
+            while ((bitStream & 3) == 3) {
+                n0+=3;
+                bitStream>>=2;
+                bitCount+=2;
+            }
+            n0 += bitStream & 3;
+            bitCount += 2;
+            if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall);
+            while (charnum < n0) normalizedCounter[charnum++] = 0;
+            if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
+                ip += bitCount>>3;
+                bitCount &= 7;
+                bitStream = MEM_readLE32(ip) >> bitCount;
+            }
+            else
+                bitStream >>= 2;
+        }
+        {
+            const short max = (short)((2*threshold-1)-remaining);
+            short count;
+
+            if ((bitStream & (threshold-1)) < (U32)max) {
+                count = (short)(bitStream & (threshold-1));
+                bitCount   += nbBits-1;
+            } else {
+                count = (short)(bitStream & (2*threshold-1));
+                if (count >= threshold) count -= max;
+                bitCount   += nbBits;
+            }
+
+            count--;   /* extra accuracy */
+            remaining -= FSEv05_abs(count);
+            normalizedCounter[charnum++] = count;
+            previous0 = !count;
+            while (remaining < threshold) {
+                nbBits--;
+                threshold >>= 1;
+            }
+
+            if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
+                ip += bitCount>>3;
+                bitCount &= 7;
+            } else {
+                bitCount -= (int)(8 * (iend - 4 - ip));
+                ip = iend - 4;
+            }
+            bitStream = MEM_readLE32(ip) >> (bitCount & 31);
+    }   }
+    if (remaining != 1) return ERROR(GENERIC);
+    *maxSVPtr = charnum-1;
+
+    ip += (bitCount+7)>>3;
+    if ((size_t)(ip-istart) > hbSize) return ERROR(srcSize_wrong);
+    return ip-istart;
+}
+
+
+
+/*-*******************************************************
+*  Decompression (Byte symbols)
+*********************************************************/
+size_t FSEv05_buildDTable_rle (FSEv05_DTable* dt, BYTE symbolValue)
+{
+    void* ptr = dt;
+    FSEv05_DTableHeader* const DTableH = (FSEv05_DTableHeader*)ptr;
+    void* dPtr = dt + 1;
+    FSEv05_decode_t* const cell = (FSEv05_decode_t*)dPtr;
+
+    DTableH->tableLog = 0;
+    DTableH->fastMode = 0;
+
+    cell->newState = 0;
+    cell->symbol = symbolValue;
+    cell->nbBits = 0;
+
+    return 0;
+}
+
+
+size_t FSEv05_buildDTable_raw (FSEv05_DTable* dt, unsigned nbBits)
+{
+    void* ptr = dt;
+    FSEv05_DTableHeader* const DTableH = (FSEv05_DTableHeader*)ptr;
+    void* dPtr = dt + 1;
+    FSEv05_decode_t* const dinfo = (FSEv05_decode_t*)dPtr;
+    const unsigned tableSize = 1 << nbBits;
+    const unsigned tableMask = tableSize - 1;
+    const unsigned maxSymbolValue = tableMask;
+    unsigned s;
+
+    /* Sanity checks */
+    if (nbBits < 1) return ERROR(GENERIC);         /* min size */
+
+    /* Build Decoding Table */
+    DTableH->tableLog = (U16)nbBits;
+    DTableH->fastMode = 1;
+    for (s=0; s<=maxSymbolValue; s++) {
+        dinfo[s].newState = 0;
+        dinfo[s].symbol = (BYTE)s;
+        dinfo[s].nbBits = (BYTE)nbBits;
+    }
+
+    return 0;
+}
+
+FORCE_INLINE size_t FSEv05_decompress_usingDTable_generic(
+          void* dst, size_t maxDstSize,
+    const void* cSrc, size_t cSrcSize,
+    const FSEv05_DTable* dt, const unsigned fast)
+{
+    BYTE* const ostart = (BYTE*) dst;
+    BYTE* op = ostart;
+    BYTE* const omax = op + maxDstSize;
+    BYTE* const olimit = omax-3;
+
+    BITv05_DStream_t bitD;
+    FSEv05_DState_t state1;
+    FSEv05_DState_t state2;
+    size_t errorCode;
+
+    /* Init */
+    errorCode = BITv05_initDStream(&bitD, cSrc, cSrcSize);   /* replaced last arg by maxCompressed Size */
+    if (FSEv05_isError(errorCode)) return errorCode;
+
+    FSEv05_initDState(&state1, &bitD, dt);
+    FSEv05_initDState(&state2, &bitD, dt);
+
+#define FSEv05_GETSYMBOL(statePtr) fast ? FSEv05_decodeSymbolFast(statePtr, &bitD) : FSEv05_decodeSymbol(statePtr, &bitD)
+
+    /* 4 symbols per loop */
+    for ( ; (BITv05_reloadDStream(&bitD)==BITv05_DStream_unfinished) && (op<olimit) ; op+=4) {
+        op[0] = FSEv05_GETSYMBOL(&state1);
+
+        if (FSEv05_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            BITv05_reloadDStream(&bitD);
+
+        op[1] = FSEv05_GETSYMBOL(&state2);
+
+        if (FSEv05_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            { if (BITv05_reloadDStream(&bitD) > BITv05_DStream_unfinished) { op+=2; break; } }
+
+        op[2] = FSEv05_GETSYMBOL(&state1);
+
+        if (FSEv05_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            BITv05_reloadDStream(&bitD);
+
+        op[3] = FSEv05_GETSYMBOL(&state2);
+    }
+
+    /* tail */
+    /* note : BITv05_reloadDStream(&bitD) >= FSEv05_DStream_partiallyFilled; Ends at exactly BITv05_DStream_completed */
+    while (1) {
+        if ( (BITv05_reloadDStream(&bitD)>BITv05_DStream_completed) || (op==omax) || (BITv05_endOfDStream(&bitD) && (fast || FSEv05_endOfDState(&state1))) )
+            break;
+
+        *op++ = FSEv05_GETSYMBOL(&state1);
+
+        if ( (BITv05_reloadDStream(&bitD)>BITv05_DStream_completed) || (op==omax) || (BITv05_endOfDStream(&bitD) && (fast || FSEv05_endOfDState(&state2))) )
+            break;
+
+        *op++ = FSEv05_GETSYMBOL(&state2);
+    }
+
+    /* end ? */
+    if (BITv05_endOfDStream(&bitD) && FSEv05_endOfDState(&state1) && FSEv05_endOfDState(&state2))
+        return op-ostart;
+
+    if (op==omax) return ERROR(dstSize_tooSmall);   /* dst buffer is full, but cSrc unfinished */
+
+    return ERROR(corruption_detected);
+}
+
+
+size_t FSEv05_decompress_usingDTable(void* dst, size_t originalSize,
+                            const void* cSrc, size_t cSrcSize,
+                            const FSEv05_DTable* dt)
+{
+    const void* ptr = dt;
+    const FSEv05_DTableHeader* DTableH = (const FSEv05_DTableHeader*)ptr;
+    const U32 fastMode = DTableH->fastMode;
+
+    /* select fast mode (static) */
+    if (fastMode) return FSEv05_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);
+    return FSEv05_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0);
+}
+
+
+size_t FSEv05_decompress(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize)
+{
+    const BYTE* const istart = (const BYTE*)cSrc;
+    const BYTE* ip = istart;
+    short counting[FSEv05_MAX_SYMBOL_VALUE+1];
+    DTable_max_t dt;   /* Static analyzer seems unable to understand this table will be properly initialized later */
+    unsigned tableLog;
+    unsigned maxSymbolValue = FSEv05_MAX_SYMBOL_VALUE;
+    size_t errorCode;
+
+    if (cSrcSize<2) return ERROR(srcSize_wrong);   /* too small input size */
+
+    /* normal FSEv05 decoding mode */
+    errorCode = FSEv05_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize);
+    if (FSEv05_isError(errorCode)) return errorCode;
+    if (errorCode >= cSrcSize) return ERROR(srcSize_wrong);   /* too small input size */
+    ip += errorCode;
+    cSrcSize -= errorCode;
+
+    errorCode = FSEv05_buildDTable (dt, counting, maxSymbolValue, tableLog);
+    if (FSEv05_isError(errorCode)) return errorCode;
+
+    /* always return, even if it is an error code */
+    return FSEv05_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, dt);
+}
+
+
+
+#endif   /* FSEv05_COMMONDEFS_ONLY */
+/* ******************************************************************
+   Huff0 : Huffman coder, part of New Generation Entropy library
+   header file
+   Copyright (C) 2013-2016, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+****************************************************************** */
+#ifndef HUFF0_H
+#define HUFF0_H
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+
+/* ****************************************
+*  Huff0 simple functions
+******************************************/
+size_t HUFv05_decompress(void* dst,  size_t dstSize,
+                const void* cSrc, size_t cSrcSize);
+/*!
+HUFv05_decompress():
+    Decompress Huff0 data from buffer 'cSrc', of size 'cSrcSize',
+    into already allocated destination buffer 'dst', of size 'dstSize'.
+    @dstSize : must be the **exact** size of original (uncompressed) data.
+    Note : in contrast with FSEv05, HUFv05_decompress can regenerate
+           RLE (cSrcSize==1) and uncompressed (cSrcSize==dstSize) data,
+           because it knows size to regenerate.
+    @return : size of regenerated data (== dstSize)
+              or an error code, which can be tested using HUFv05_isError()
+*/
+
+
+/* ****************************************
+*  Tool functions
+******************************************/
+/* Error Management */
+unsigned    HUFv05_isError(size_t code);        /* tells if a return value is an error code */
+const char* HUFv05_getErrorName(size_t code);   /* provides error code string (useful for debugging) */
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif   /* HUF0_H */
+/* ******************************************************************
+   Huff0 : Huffman codec, part of New Generation Entropy library
+   header file, for static linking only
+   Copyright (C) 2013-2016, Yann Collet
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+****************************************************************** */
+#ifndef HUF0_STATIC_H
+#define HUF0_STATIC_H
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+
+/* ****************************************
+*  Static allocation
+******************************************/
+/* static allocation of Huff0's DTable */
+#define HUFv05_DTABLE_SIZE(maxTableLog)   (1 + (1<<maxTableLog))
+#define HUFv05_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) \
+        unsigned short DTable[HUFv05_DTABLE_SIZE(maxTableLog)] = { maxTableLog }
+#define HUFv05_CREATE_STATIC_DTABLEX4(DTable, maxTableLog) \
+        unsigned int DTable[HUFv05_DTABLE_SIZE(maxTableLog)] = { maxTableLog }
+#define HUFv05_CREATE_STATIC_DTABLEX6(DTable, maxTableLog) \
+        unsigned int DTable[HUFv05_DTABLE_SIZE(maxTableLog) * 3 / 2] = { maxTableLog }
+
+
+/* ****************************************
+*  Advanced decompression functions
+******************************************/
+size_t HUFv05_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* single-symbol decoder */
+size_t HUFv05_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* double-symbols decoder */
+size_t HUFv05_decompress4X6 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* quad-symbols decoder */
+
+
+/* ****************************************
+*  Huff0 detailed API
+******************************************/
+/*!
+HUFv05_decompress() does the following:
+1. select the decompression algorithm (X2, X4, X6) based on pre-computed heuristics
+2. build Huffman table from save, using HUFv05_readDTableXn()
+3. decode 1 or 4 segments in parallel using HUFv05_decompressSXn_usingDTable
+*/
+size_t HUFv05_readDTableX2 (unsigned short* DTable, const void* src, size_t srcSize);
+size_t HUFv05_readDTableX4 (unsigned* DTable, const void* src, size_t srcSize);
+size_t HUFv05_readDTableX6 (unsigned* DTable, const void* src, size_t srcSize);
+
+size_t HUFv05_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const unsigned short* DTable);
+size_t HUFv05_decompress4X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const unsigned* DTable);
+size_t HUFv05_decompress4X6_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const unsigned* DTable);
+
+
+/* single stream variants */
+
+size_t HUFv05_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* single-symbol decoder */
+size_t HUFv05_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* double-symbol decoder */
+size_t HUFv05_decompress1X6 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* quad-symbol decoder */
+
+size_t HUFv05_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const unsigned short* DTable);
+size_t HUFv05_decompress1X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const unsigned* DTable);
+size_t HUFv05_decompress1X6_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const unsigned* DTable);
+
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* HUF0_STATIC_H */
+/* ******************************************************************
+   Huff0 : Huffman coder, part of New Generation Entropy library
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - FSEv05+Huff0 source repository : https://github.com/Cyan4973/FiniteStateEntropy
+    - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+
+/* **************************************************************
+*  Compiler specifics
+****************************************************************/
+#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+/* inline is defined */
+#elif defined(_MSC_VER)
+#  define inline __inline
+#else
+#  define inline /* disable inline */
+#endif
+
+
+#ifdef _MSC_VER    /* Visual Studio */
+#  define FORCE_INLINE static __forceinline
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#else
+#  ifdef __GNUC__
+#    define FORCE_INLINE static inline __attribute__((always_inline))
+#  else
+#    define FORCE_INLINE static inline
+#  endif
+#endif
+
+
+/* **************************************************************
+*  Includes
+****************************************************************/
+#include <stdlib.h>     /* malloc, free, qsort */
+#include <string.h>     /* memcpy, memset */
+#include <stdio.h>      /* printf (debug) */
+
+
+/* **************************************************************
+*  Constants
+****************************************************************/
+#define HUFv05_ABSOLUTEMAX_TABLELOG  16   /* absolute limit of HUFv05_MAX_TABLELOG. Beyond that value, code does not work */
+#define HUFv05_MAX_TABLELOG  12           /* max configured tableLog (for static allocation); can be modified up to HUFv05_ABSOLUTEMAX_TABLELOG */
+#define HUFv05_DEFAULT_TABLELOG  HUFv05_MAX_TABLELOG   /* tableLog by default, when not specified */
+#define HUFv05_MAX_SYMBOL_VALUE 255
+#if (HUFv05_MAX_TABLELOG > HUFv05_ABSOLUTEMAX_TABLELOG)
+#  error "HUFv05_MAX_TABLELOG is too large !"
+#endif
+
+
+/* **************************************************************
+*  Error Management
+****************************************************************/
+unsigned HUFv05_isError(size_t code) { return ERR_isError(code); }
+const char* HUFv05_getErrorName(size_t code) { return ERR_getErrorName(code); }
+#define HUFv05_STATIC_ASSERT(c) { enum { HUFv05_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
+
+
+/* *******************************************************
+*  Huff0 : Huffman block decompression
+*********************************************************/
+typedef struct { BYTE byte; BYTE nbBits; } HUFv05_DEltX2;   /* single-symbol decoding */
+
+typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUFv05_DEltX4;  /* double-symbols decoding */
+
+typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t;
+
+/*! HUFv05_readStats
+    Read compact Huffman tree, saved by HUFv05_writeCTable
+    @huffWeight : destination buffer
+    @return : size read from `src`
+*/
+static size_t HUFv05_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
+                            U32* nbSymbolsPtr, U32* tableLogPtr,
+                            const void* src, size_t srcSize)
+{
+    U32 weightTotal;
+    U32 tableLog;
+    const BYTE* ip = (const BYTE*) src;
+    size_t iSize = ip[0];
+    size_t oSize;
+    U32 n;
+
+    //memset(huffWeight, 0, hwSize);   /* is not necessary, even though some analyzer complain ... */
+
+    if (iSize >= 128)  { /* special header */
+        if (iSize >= (242)) {  /* RLE */
+            static int l[14] = { 1, 2, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64, 127, 128 };
+            oSize = l[iSize-242];
+            memset(huffWeight, 1, hwSize);
+            iSize = 0;
+        }
+        else {   /* Incompressible */
+            oSize = iSize - 127;
+            iSize = ((oSize+1)/2);
+            if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
+            if (oSize >= hwSize) return ERROR(corruption_detected);
+            ip += 1;
+            for (n=0; n<oSize; n+=2) {
+                huffWeight[n]   = ip[n/2] >> 4;
+                huffWeight[n+1] = ip[n/2] & 15;
+    }   }   }
+    else  {   /* header compressed with FSEv05 (normal case) */
+        if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
+        oSize = FSEv05_decompress(huffWeight, hwSize-1, ip+1, iSize);   /* max (hwSize-1) values decoded, as last one is implied */
+        if (FSEv05_isError(oSize)) return oSize;
+    }
+
+    /* collect weight stats */
+    memset(rankStats, 0, (HUFv05_ABSOLUTEMAX_TABLELOG + 1) * sizeof(U32));
+    weightTotal = 0;
+    for (n=0; n<oSize; n++) {
+        if (huffWeight[n] >= HUFv05_ABSOLUTEMAX_TABLELOG) return ERROR(corruption_detected);
+        rankStats[huffWeight[n]]++;
+        weightTotal += (1 << huffWeight[n]) >> 1;
+    }
+
+    /* get last non-null symbol weight (implied, total must be 2^n) */
+    tableLog = BITv05_highbit32(weightTotal) + 1;
+    if (tableLog > HUFv05_ABSOLUTEMAX_TABLELOG) return ERROR(corruption_detected);
+    {   /* determine last weight */
+        U32 total = 1 << tableLog;
+        U32 rest = total - weightTotal;
+        U32 verif = 1 << BITv05_highbit32(rest);
+        U32 lastWeight = BITv05_highbit32(rest) + 1;
+        if (verif != rest) return ERROR(corruption_detected);    /* last value must be a clean power of 2 */
+        huffWeight[oSize] = (BYTE)lastWeight;
+        rankStats[lastWeight]++;
+    }
+
+    /* check tree construction validity */
+    if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected);   /* by construction : at least 2 elts of rank 1, must be even */
+
+    /* results */
+    *nbSymbolsPtr = (U32)(oSize+1);
+    *tableLogPtr = tableLog;
+    return iSize+1;
+}
+
+
+/*-***************************/
+/*  single-symbol decoding   */
+/*-***************************/
+
+size_t HUFv05_readDTableX2 (U16* DTable, const void* src, size_t srcSize)
+{
+    BYTE huffWeight[HUFv05_MAX_SYMBOL_VALUE + 1];
+    U32 rankVal[HUFv05_ABSOLUTEMAX_TABLELOG + 1];   /* large enough for values from 0 to 16 */
+    U32 tableLog = 0;
+    size_t iSize;
+    U32 nbSymbols = 0;
+    U32 n;
+    U32 nextRankStart;
+    void* const dtPtr = DTable + 1;
+    HUFv05_DEltX2* const dt = (HUFv05_DEltX2*)dtPtr;
+
+    HUFv05_STATIC_ASSERT(sizeof(HUFv05_DEltX2) == sizeof(U16));   /* if compilation fails here, assertion is false */
+    //memset(huffWeight, 0, sizeof(huffWeight));   /* is not necessary, even though some analyzer complain ... */
+
+    iSize = HUFv05_readStats(huffWeight, HUFv05_MAX_SYMBOL_VALUE + 1, rankVal, &nbSymbols, &tableLog, src, srcSize);
+    if (HUFv05_isError(iSize)) return iSize;
+
+    /* check result */
+    if (tableLog > DTable[0]) return ERROR(tableLog_tooLarge);   /* DTable is too small */
+    DTable[0] = (U16)tableLog;   /* maybe should separate sizeof allocated DTable, from used size of DTable, in case of re-use */
+
+    /* Prepare ranks */
+    nextRankStart = 0;
+    for (n=1; n<=tableLog; n++) {
+        U32 current = nextRankStart;
+        nextRankStart += (rankVal[n] << (n-1));
+        rankVal[n] = current;
+    }
+
+    /* fill DTable */
+    for (n=0; n<nbSymbols; n++) {
+        const U32 w = huffWeight[n];
+        const U32 length = (1 << w) >> 1;
+        U32 i;
+        HUFv05_DEltX2 D;
+        D.byte = (BYTE)n; D.nbBits = (BYTE)(tableLog + 1 - w);
+        for (i = rankVal[w]; i < rankVal[w] + length; i++)
+            dt[i] = D;
+        rankVal[w] += length;
+    }
+
+    return iSize;
+}
+
+static BYTE HUFv05_decodeSymbolX2(BITv05_DStream_t* Dstream, const HUFv05_DEltX2* dt, const U32 dtLog)
+{
+        const size_t val = BITv05_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
+        const BYTE c = dt[val].byte;
+        BITv05_skipBits(Dstream, dt[val].nbBits);
+        return c;
+}
+
+#define HUFv05_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
+    *ptr++ = HUFv05_decodeSymbolX2(DStreamPtr, dt, dtLog)
+
+#define HUFv05_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
+    if (MEM_64bits() || (HUFv05_MAX_TABLELOG<=12)) \
+        HUFv05_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
+
+#define HUFv05_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
+    if (MEM_64bits()) \
+        HUFv05_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
+
+static inline size_t HUFv05_decodeStreamX2(BYTE* p, BITv05_DStream_t* const bitDPtr, BYTE* const pEnd, const HUFv05_DEltX2* const dt, const U32 dtLog)
+{
+    BYTE* const pStart = p;
+
+    /* up to 4 symbols at a time */
+    while ((BITv05_reloadDStream(bitDPtr) == BITv05_DStream_unfinished) && (p <= pEnd-4)) {
+        HUFv05_DECODE_SYMBOLX2_2(p, bitDPtr);
+        HUFv05_DECODE_SYMBOLX2_1(p, bitDPtr);
+        HUFv05_DECODE_SYMBOLX2_2(p, bitDPtr);
+        HUFv05_DECODE_SYMBOLX2_0(p, bitDPtr);
+    }
+
+    /* closer to the end */
+    while ((BITv05_reloadDStream(bitDPtr) == BITv05_DStream_unfinished) && (p < pEnd))
+        HUFv05_DECODE_SYMBOLX2_0(p, bitDPtr);
+
+    /* no more data to retrieve from bitstream, hence no need to reload */
+    while (p < pEnd)
+        HUFv05_DECODE_SYMBOLX2_0(p, bitDPtr);
+
+    return pEnd-pStart;
+}
+
+size_t HUFv05_decompress1X2_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const U16* DTable)
+{
+    BYTE* op = (BYTE*)dst;
+    BYTE* const oend = op + dstSize;
+    size_t errorCode;
+    const U32 dtLog = DTable[0];
+    const void* dtPtr = DTable;
+    const HUFv05_DEltX2* const dt = ((const HUFv05_DEltX2*)dtPtr)+1;
+    BITv05_DStream_t bitD;
+    errorCode = BITv05_initDStream(&bitD, cSrc, cSrcSize);
+    if (HUFv05_isError(errorCode)) return errorCode;
+
+    HUFv05_decodeStreamX2(op, &bitD, oend, dt, dtLog);
+
+    /* check */
+    if (!BITv05_endOfDStream(&bitD)) return ERROR(corruption_detected);
+
+    return dstSize;
+}
+
+size_t HUFv05_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUFv05_CREATE_STATIC_DTABLEX2(DTable, HUFv05_MAX_TABLELOG);
+    const BYTE* ip = (const BYTE*) cSrc;
+    size_t errorCode;
+
+    errorCode = HUFv05_readDTableX2 (DTable, cSrc, cSrcSize);
+    if (HUFv05_isError(errorCode)) return errorCode;
+    if (errorCode >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += errorCode;
+    cSrcSize -= errorCode;
+
+    return HUFv05_decompress1X2_usingDTable (dst, dstSize, ip, cSrcSize, DTable);
+}
+
+
+size_t HUFv05_decompress4X2_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const U16* DTable)
+{
+    const BYTE* const istart = (const BYTE*) cSrc;
+    BYTE* const ostart = (BYTE*) dst;
+    BYTE* const oend = ostart + dstSize;
+    const void* const dtPtr = DTable;
+    const HUFv05_DEltX2* const dt = ((const HUFv05_DEltX2*)dtPtr) +1;
+    const U32 dtLog = DTable[0];
+    size_t errorCode;
+
+    /* Check */
+    if (cSrcSize < 10) return ERROR(corruption_detected);   /* strict minimum : jump table + 1 byte per stream */
+
+    /* Init */
+    BITv05_DStream_t bitD1;
+    BITv05_DStream_t bitD2;
+    BITv05_DStream_t bitD3;
+    BITv05_DStream_t bitD4;
+    const size_t length1 = MEM_readLE16(istart);
+    const size_t length2 = MEM_readLE16(istart+2);
+    const size_t length3 = MEM_readLE16(istart+4);
+    size_t length4;
+    const BYTE* const istart1 = istart + 6;  /* jumpTable */
+    const BYTE* const istart2 = istart1 + length1;
+    const BYTE* const istart3 = istart2 + length2;
+    const BYTE* const istart4 = istart3 + length3;
+    const size_t segmentSize = (dstSize+3) / 4;
+    BYTE* const opStart2 = ostart + segmentSize;
+    BYTE* const opStart3 = opStart2 + segmentSize;
+    BYTE* const opStart4 = opStart3 + segmentSize;
+    BYTE* op1 = ostart;
+    BYTE* op2 = opStart2;
+    BYTE* op3 = opStart3;
+    BYTE* op4 = opStart4;
+    U32 endSignal;
+
+    length4 = cSrcSize - (length1 + length2 + length3 + 6);
+    if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
+    errorCode = BITv05_initDStream(&bitD1, istart1, length1);
+    if (HUFv05_isError(errorCode)) return errorCode;
+    errorCode = BITv05_initDStream(&bitD2, istart2, length2);
+    if (HUFv05_isError(errorCode)) return errorCode;
+    errorCode = BITv05_initDStream(&bitD3, istart3, length3);
+    if (HUFv05_isError(errorCode)) return errorCode;
+    errorCode = BITv05_initDStream(&bitD4, istart4, length4);
+    if (HUFv05_isError(errorCode)) return errorCode;
+
+    /* 16-32 symbols per loop (4-8 symbols per stream) */
+    endSignal = BITv05_reloadDStream(&bitD1) | BITv05_reloadDStream(&bitD2) | BITv05_reloadDStream(&bitD3) | BITv05_reloadDStream(&bitD4);
+    for ( ; (endSignal==BITv05_DStream_unfinished) && (op4<(oend-7)) ; ) {
+        HUFv05_DECODE_SYMBOLX2_2(op1, &bitD1);
+        HUFv05_DECODE_SYMBOLX2_2(op2, &bitD2);
+        HUFv05_DECODE_SYMBOLX2_2(op3, &bitD3);
+        HUFv05_DECODE_SYMBOLX2_2(op4, &bitD4);
+        HUFv05_DECODE_SYMBOLX2_1(op1, &bitD1);
+        HUFv05_DECODE_SYMBOLX2_1(op2, &bitD2);
+        HUFv05_DECODE_SYMBOLX2_1(op3, &bitD3);
+        HUFv05_DECODE_SYMBOLX2_1(op4, &bitD4);
+        HUFv05_DECODE_SYMBOLX2_2(op1, &bitD1);
+        HUFv05_DECODE_SYMBOLX2_2(op2, &bitD2);
+        HUFv05_DECODE_SYMBOLX2_2(op3, &bitD3);
+        HUFv05_DECODE_SYMBOLX2_2(op4, &bitD4);
+        HUFv05_DECODE_SYMBOLX2_0(op1, &bitD1);
+        HUFv05_DECODE_SYMBOLX2_0(op2, &bitD2);
+        HUFv05_DECODE_SYMBOLX2_0(op3, &bitD3);
+        HUFv05_DECODE_SYMBOLX2_0(op4, &bitD4);
+        endSignal = BITv05_reloadDStream(&bitD1) | BITv05_reloadDStream(&bitD2) | BITv05_reloadDStream(&bitD3) | BITv05_reloadDStream(&bitD4);
+    }
+
+    /* check corruption */
+    if (op1 > opStart2) return ERROR(corruption_detected);
+    if (op2 > opStart3) return ERROR(corruption_detected);
+    if (op3 > opStart4) return ERROR(corruption_detected);
+    /* note : op4 supposed already verified within main loop */
+
+    /* finish bitStreams one by one */
+    HUFv05_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
+    HUFv05_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
+    HUFv05_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
+    HUFv05_decodeStreamX2(op4, &bitD4, oend,     dt, dtLog);
+
+    /* check */
+    endSignal = BITv05_endOfDStream(&bitD1) & BITv05_endOfDStream(&bitD2) & BITv05_endOfDStream(&bitD3) & BITv05_endOfDStream(&bitD4);
+    if (!endSignal) return ERROR(corruption_detected);
+
+    /* decoded size */
+    return dstSize;
+}
+
+
+size_t HUFv05_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUFv05_CREATE_STATIC_DTABLEX2(DTable, HUFv05_MAX_TABLELOG);
+    const BYTE* ip = (const BYTE*) cSrc;
+    size_t errorCode;
+
+    errorCode = HUFv05_readDTableX2 (DTable, cSrc, cSrcSize);
+    if (HUFv05_isError(errorCode)) return errorCode;
+    if (errorCode >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += errorCode;
+    cSrcSize -= errorCode;
+
+    return HUFv05_decompress4X2_usingDTable (dst, dstSize, ip, cSrcSize, DTable);
+}
+
+
+/* *************************/
+/* double-symbols decoding */
+/* *************************/
+
+static void HUFv05_fillDTableX4Level2(HUFv05_DEltX4* DTable, U32 sizeLog, const U32 consumed,
+                           const U32* rankValOrigin, const int minWeight,
+                           const sortedSymbol_t* sortedSymbols, const U32 sortedListSize,
+                           U32 nbBitsBaseline, U16 baseSeq)
+{
+    HUFv05_DEltX4 DElt;
+    U32 rankVal[HUFv05_ABSOLUTEMAX_TABLELOG + 1];
+    U32 s;
+
+    /* get pre-calculated rankVal */
+    memcpy(rankVal, rankValOrigin, sizeof(rankVal));
+
+    /* fill skipped values */
+    if (minWeight>1) {
+        U32 i, skipSize = rankVal[minWeight];
+        MEM_writeLE16(&(DElt.sequence), baseSeq);
+        DElt.nbBits   = (BYTE)(consumed);
+        DElt.length   = 1;
+        for (i = 0; i < skipSize; i++)
+            DTable[i] = DElt;
+    }
+
+    /* fill DTable */
+    for (s=0; s<sortedListSize; s++) {   /* note : sortedSymbols already skipped */
+        const U32 symbol = sortedSymbols[s].symbol;
+        const U32 weight = sortedSymbols[s].weight;
+        const U32 nbBits = nbBitsBaseline - weight;
+        const U32 length = 1 << (sizeLog-nbBits);
+        const U32 start = rankVal[weight];
+        U32 i = start;
+        const U32 end = start + length;
+
+        MEM_writeLE16(&(DElt.sequence), (U16)(baseSeq + (symbol << 8)));
+        DElt.nbBits = (BYTE)(nbBits + consumed);
+        DElt.length = 2;
+        do { DTable[i++] = DElt; } while (i<end);   /* since length >= 1 */
+
+        rankVal[weight] += length;
+    }
+}
+
+typedef U32 rankVal_t[HUFv05_ABSOLUTEMAX_TABLELOG][HUFv05_ABSOLUTEMAX_TABLELOG + 1];
+
+static void HUFv05_fillDTableX4(HUFv05_DEltX4* DTable, const U32 targetLog,
+                           const sortedSymbol_t* sortedList, const U32 sortedListSize,
+                           const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight,
+                           const U32 nbBitsBaseline)
+{
+    U32 rankVal[HUFv05_ABSOLUTEMAX_TABLELOG + 1];
+    const int scaleLog = nbBitsBaseline - targetLog;   /* note : targetLog >= srcLog, hence scaleLog <= 1 */
+    const U32 minBits  = nbBitsBaseline - maxWeight;
+    U32 s;
+
+    memcpy(rankVal, rankValOrigin, sizeof(rankVal));
+
+    /* fill DTable */
+    for (s=0; s<sortedListSize; s++) {
+        const U16 symbol = sortedList[s].symbol;
+        const U32 weight = sortedList[s].weight;
+        const U32 nbBits = nbBitsBaseline - weight;
+        const U32 start = rankVal[weight];
+        const U32 length = 1 << (targetLog-nbBits);
+
+        if (targetLog-nbBits >= minBits) {   /* enough room for a second symbol */
+            U32 sortedRank;
+            int minWeight = nbBits + scaleLog;
+            if (minWeight < 1) minWeight = 1;
+            sortedRank = rankStart[minWeight];
+            HUFv05_fillDTableX4Level2(DTable+start, targetLog-nbBits, nbBits,
+                           rankValOrigin[nbBits], minWeight,
+                           sortedList+sortedRank, sortedListSize-sortedRank,
+                           nbBitsBaseline, symbol);
+        } else {
+            U32 i;
+            const U32 end = start + length;
+            HUFv05_DEltX4 DElt;
+
+            MEM_writeLE16(&(DElt.sequence), symbol);
+            DElt.nbBits   = (BYTE)(nbBits);
+            DElt.length   = 1;
+            for (i = start; i < end; i++)
+                DTable[i] = DElt;
+        }
+        rankVal[weight] += length;
+    }
+}
+
+size_t HUFv05_readDTableX4 (U32* DTable, const void* src, size_t srcSize)
+{
+    BYTE weightList[HUFv05_MAX_SYMBOL_VALUE + 1];
+    sortedSymbol_t sortedSymbol[HUFv05_MAX_SYMBOL_VALUE + 1];
+    U32 rankStats[HUFv05_ABSOLUTEMAX_TABLELOG + 1] = { 0 };
+    U32 rankStart0[HUFv05_ABSOLUTEMAX_TABLELOG + 2] = { 0 };
+    U32* const rankStart = rankStart0+1;
+    rankVal_t rankVal;
+    U32 tableLog, maxW, sizeOfSort, nbSymbols;
+    const U32 memLog = DTable[0];
+    size_t iSize;
+    void* dtPtr = DTable;
+    HUFv05_DEltX4* const dt = ((HUFv05_DEltX4*)dtPtr) + 1;
+
+    HUFv05_STATIC_ASSERT(sizeof(HUFv05_DEltX4) == sizeof(U32));   /* if compilation fails here, assertion is false */
+    if (memLog > HUFv05_ABSOLUTEMAX_TABLELOG) return ERROR(tableLog_tooLarge);
+    //memset(weightList, 0, sizeof(weightList));   /* is not necessary, even though some analyzer complain ... */
+
+    iSize = HUFv05_readStats(weightList, HUFv05_MAX_SYMBOL_VALUE + 1, rankStats, &nbSymbols, &tableLog, src, srcSize);
+    if (HUFv05_isError(iSize)) return iSize;
+
+    /* check result */
+    if (tableLog > memLog) return ERROR(tableLog_tooLarge);   /* DTable can't fit code depth */
+
+    /* find maxWeight */
+    for (maxW = tableLog; rankStats[maxW]==0; maxW--) {}  /* necessarily finds a solution before 0 */
+
+    /* Get start index of each weight */
+    {
+        U32 w, nextRankStart = 0;
+        for (w=1; w<=maxW; w++) {
+            U32 current = nextRankStart;
+            nextRankStart += rankStats[w];
+            rankStart[w] = current;
+        }
+        rankStart[0] = nextRankStart;   /* put all 0w symbols at the end of sorted list*/
+        sizeOfSort = nextRankStart;
+    }
+
+    /* sort symbols by weight */
+    {
+        U32 s;
+        for (s=0; s<nbSymbols; s++) {
+            U32 w = weightList[s];
+            U32 r = rankStart[w]++;
+            sortedSymbol[r].symbol = (BYTE)s;
+            sortedSymbol[r].weight = (BYTE)w;
+        }
+        rankStart[0] = 0;   /* forget 0w symbols; this is beginning of weight(1) */
+    }
+
+    /* Build rankVal */
+    {
+        const U32 minBits = tableLog+1 - maxW;
+        U32 nextRankVal = 0;
+        U32 w, consumed;
+        const int rescale = (memLog-tableLog) - 1;   /* tableLog <= memLog */
+        U32* rankVal0 = rankVal[0];
+        for (w=1; w<=maxW; w++) {
+            U32 current = nextRankVal;
+            nextRankVal += rankStats[w] << (w+rescale);
+            rankVal0[w] = current;
+        }
+        for (consumed = minBits; consumed <= memLog - minBits; consumed++) {
+            U32* rankValPtr = rankVal[consumed];
+            for (w = 1; w <= maxW; w++) {
+                rankValPtr[w] = rankVal0[w] >> consumed;
+    }   }   }
+
+    HUFv05_fillDTableX4(dt, memLog,
+                   sortedSymbol, sizeOfSort,
+                   rankStart0, rankVal, maxW,
+                   tableLog+1);
+
+    return iSize;
+}
+
+
+static U32 HUFv05_decodeSymbolX4(void* op, BITv05_DStream_t* DStream, const HUFv05_DEltX4* dt, const U32 dtLog)
+{
+    const size_t val = BITv05_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
+    memcpy(op, dt+val, 2);
+    BITv05_skipBits(DStream, dt[val].nbBits);
+    return dt[val].length;
+}
+
+static U32 HUFv05_decodeLastSymbolX4(void* op, BITv05_DStream_t* DStream, const HUFv05_DEltX4* dt, const U32 dtLog)
+{
+    const size_t val = BITv05_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
+    memcpy(op, dt+val, 1);
+    if (dt[val].length==1) BITv05_skipBits(DStream, dt[val].nbBits);
+    else {
+        if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
+            BITv05_skipBits(DStream, dt[val].nbBits);
+            if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
+                DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);   /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
+    }   }
+    return 1;
+}
+
+
+#define HUFv05_DECODE_SYMBOLX4_0(ptr, DStreamPtr) \
+    ptr += HUFv05_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
+
+#define HUFv05_DECODE_SYMBOLX4_1(ptr, DStreamPtr) \
+    if (MEM_64bits() || (HUFv05_MAX_TABLELOG<=12)) \
+        ptr += HUFv05_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
+
+#define HUFv05_DECODE_SYMBOLX4_2(ptr, DStreamPtr) \
+    if (MEM_64bits()) \
+        ptr += HUFv05_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
+
+static inline size_t HUFv05_decodeStreamX4(BYTE* p, BITv05_DStream_t* bitDPtr, BYTE* const pEnd, const HUFv05_DEltX4* const dt, const U32 dtLog)
+{
+    BYTE* const pStart = p;
+
+    /* up to 8 symbols at a time */
+    while ((BITv05_reloadDStream(bitDPtr) == BITv05_DStream_unfinished) && (p < pEnd-7)) {
+        HUFv05_DECODE_SYMBOLX4_2(p, bitDPtr);
+        HUFv05_DECODE_SYMBOLX4_1(p, bitDPtr);
+        HUFv05_DECODE_SYMBOLX4_2(p, bitDPtr);
+        HUFv05_DECODE_SYMBOLX4_0(p, bitDPtr);
+    }
+
+    /* closer to the end */
+    while ((BITv05_reloadDStream(bitDPtr) == BITv05_DStream_unfinished) && (p <= pEnd-2))
+        HUFv05_DECODE_SYMBOLX4_0(p, bitDPtr);
+
+    while (p <= pEnd-2)
+        HUFv05_DECODE_SYMBOLX4_0(p, bitDPtr);   /* no need to reload : reached the end of DStream */
+
+    if (p < pEnd)
+        p += HUFv05_decodeLastSymbolX4(p, bitDPtr, dt, dtLog);
+
+    return p-pStart;
+}
+
+
+size_t HUFv05_decompress1X4_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const U32* DTable)
+{
+    const BYTE* const istart = (const BYTE*) cSrc;
+    BYTE* const ostart = (BYTE*) dst;
+    BYTE* const oend = ostart + dstSize;
+
+    const U32 dtLog = DTable[0];
+    const void* const dtPtr = DTable;
+    const HUFv05_DEltX4* const dt = ((const HUFv05_DEltX4*)dtPtr) +1;
+    size_t errorCode;
+
+    /* Init */
+    BITv05_DStream_t bitD;
+    errorCode = BITv05_initDStream(&bitD, istart, cSrcSize);
+    if (HUFv05_isError(errorCode)) return errorCode;
+
+    /* finish bitStreams one by one */
+    HUFv05_decodeStreamX4(ostart, &bitD, oend,     dt, dtLog);
+
+    /* check */
+    if (!BITv05_endOfDStream(&bitD)) return ERROR(corruption_detected);
+
+    /* decoded size */
+    return dstSize;
+}
+
+size_t HUFv05_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUFv05_CREATE_STATIC_DTABLEX4(DTable, HUFv05_MAX_TABLELOG);
+    const BYTE* ip = (const BYTE*) cSrc;
+
+    size_t hSize = HUFv05_readDTableX4 (DTable, cSrc, cSrcSize);
+    if (HUFv05_isError(hSize)) return hSize;
+    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += hSize;
+    cSrcSize -= hSize;
+
+    return HUFv05_decompress1X4_usingDTable (dst, dstSize, ip, cSrcSize, DTable);
+}
+
+size_t HUFv05_decompress4X4_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const U32* DTable)
+{
+    if (cSrcSize < 10) return ERROR(corruption_detected);   /* strict minimum : jump table + 1 byte per stream */
+
+    {
+        const BYTE* const istart = (const BYTE*) cSrc;
+        BYTE* const ostart = (BYTE*) dst;
+        BYTE* const oend = ostart + dstSize;
+        const void* const dtPtr = DTable;
+        const HUFv05_DEltX4* const dt = ((const HUFv05_DEltX4*)dtPtr) +1;
+        const U32 dtLog = DTable[0];
+        size_t errorCode;
+
+        /* Init */
+        BITv05_DStream_t bitD1;
+        BITv05_DStream_t bitD2;
+        BITv05_DStream_t bitD3;
+        BITv05_DStream_t bitD4;
+        const size_t length1 = MEM_readLE16(istart);
+        const size_t length2 = MEM_readLE16(istart+2);
+        const size_t length3 = MEM_readLE16(istart+4);
+        size_t length4;
+        const BYTE* const istart1 = istart + 6;  /* jumpTable */
+        const BYTE* const istart2 = istart1 + length1;
+        const BYTE* const istart3 = istart2 + length2;
+        const BYTE* const istart4 = istart3 + length3;
+        const size_t segmentSize = (dstSize+3) / 4;
+        BYTE* const opStart2 = ostart + segmentSize;
+        BYTE* const opStart3 = opStart2 + segmentSize;
+        BYTE* const opStart4 = opStart3 + segmentSize;
+        BYTE* op1 = ostart;
+        BYTE* op2 = opStart2;
+        BYTE* op3 = opStart3;
+        BYTE* op4 = opStart4;
+        U32 endSignal;
+
+        length4 = cSrcSize - (length1 + length2 + length3 + 6);
+        if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
+        errorCode = BITv05_initDStream(&bitD1, istart1, length1);
+        if (HUFv05_isError(errorCode)) return errorCode;
+        errorCode = BITv05_initDStream(&bitD2, istart2, length2);
+        if (HUFv05_isError(errorCode)) return errorCode;
+        errorCode = BITv05_initDStream(&bitD3, istart3, length3);
+        if (HUFv05_isError(errorCode)) return errorCode;
+        errorCode = BITv05_initDStream(&bitD4, istart4, length4);
+        if (HUFv05_isError(errorCode)) return errorCode;
+
+        /* 16-32 symbols per loop (4-8 symbols per stream) */
+        endSignal = BITv05_reloadDStream(&bitD1) | BITv05_reloadDStream(&bitD2) | BITv05_reloadDStream(&bitD3) | BITv05_reloadDStream(&bitD4);
+        for ( ; (endSignal==BITv05_DStream_unfinished) && (op4<(oend-7)) ; ) {
+            HUFv05_DECODE_SYMBOLX4_2(op1, &bitD1);
+            HUFv05_DECODE_SYMBOLX4_2(op2, &bitD2);
+            HUFv05_DECODE_SYMBOLX4_2(op3, &bitD3);
+            HUFv05_DECODE_SYMBOLX4_2(op4, &bitD4);
+            HUFv05_DECODE_SYMBOLX4_1(op1, &bitD1);
+            HUFv05_DECODE_SYMBOLX4_1(op2, &bitD2);
+            HUFv05_DECODE_SYMBOLX4_1(op3, &bitD3);
+            HUFv05_DECODE_SYMBOLX4_1(op4, &bitD4);
+            HUFv05_DECODE_SYMBOLX4_2(op1, &bitD1);
+            HUFv05_DECODE_SYMBOLX4_2(op2, &bitD2);
+            HUFv05_DECODE_SYMBOLX4_2(op3, &bitD3);
+            HUFv05_DECODE_SYMBOLX4_2(op4, &bitD4);
+            HUFv05_DECODE_SYMBOLX4_0(op1, &bitD1);
+            HUFv05_DECODE_SYMBOLX4_0(op2, &bitD2);
+            HUFv05_DECODE_SYMBOLX4_0(op3, &bitD3);
+            HUFv05_DECODE_SYMBOLX4_0(op4, &bitD4);
+
+            endSignal = BITv05_reloadDStream(&bitD1) | BITv05_reloadDStream(&bitD2) | BITv05_reloadDStream(&bitD3) | BITv05_reloadDStream(&bitD4);
+        }
+
+        /* check corruption */
+        if (op1 > opStart2) return ERROR(corruption_detected);
+        if (op2 > opStart3) return ERROR(corruption_detected);
+        if (op3 > opStart4) return ERROR(corruption_detected);
+        /* note : op4 supposed already verified within main loop */
+
+        /* finish bitStreams one by one */
+        HUFv05_decodeStreamX4(op1, &bitD1, opStart2, dt, dtLog);
+        HUFv05_decodeStreamX4(op2, &bitD2, opStart3, dt, dtLog);
+        HUFv05_decodeStreamX4(op3, &bitD3, opStart4, dt, dtLog);
+        HUFv05_decodeStreamX4(op4, &bitD4, oend,     dt, dtLog);
+
+        /* check */
+        endSignal = BITv05_endOfDStream(&bitD1) & BITv05_endOfDStream(&bitD2) & BITv05_endOfDStream(&bitD3) & BITv05_endOfDStream(&bitD4);
+        if (!endSignal) return ERROR(corruption_detected);
+
+        /* decoded size */
+        return dstSize;
+    }
+}
+
+
+size_t HUFv05_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUFv05_CREATE_STATIC_DTABLEX4(DTable, HUFv05_MAX_TABLELOG);
+    const BYTE* ip = (const BYTE*) cSrc;
+
+    size_t hSize = HUFv05_readDTableX4 (DTable, cSrc, cSrcSize);
+    if (HUFv05_isError(hSize)) return hSize;
+    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += hSize;
+    cSrcSize -= hSize;
+
+    return HUFv05_decompress4X4_usingDTable (dst, dstSize, ip, cSrcSize, DTable);
+}
+
+
+/* ********************************/
+/* quad-symbol decoding           */
+/* ********************************/
+typedef struct { BYTE nbBits; BYTE nbBytes; } HUFv05_DDescX6;
+typedef union { BYTE byte[4]; U32 sequence; } HUFv05_DSeqX6;
+
+/* recursive, up to level 3; may benefit from <template>-like strategy to nest each level inline */
+static void HUFv05_fillDTableX6LevelN(HUFv05_DDescX6* DDescription, HUFv05_DSeqX6* DSequence, int sizeLog,
+                           const rankVal_t rankValOrigin, const U32 consumed, const int minWeight, const U32 maxWeight,
+                           const sortedSymbol_t* sortedSymbols, const U32 sortedListSize, const U32* rankStart,
+                           const U32 nbBitsBaseline, HUFv05_DSeqX6 baseSeq, HUFv05_DDescX6 DDesc)
+{
+    const int scaleLog = nbBitsBaseline - sizeLog;   /* note : targetLog >= (nbBitsBaseline-1), hence scaleLog <= 1 */
+    const int minBits  = nbBitsBaseline - maxWeight;
+    const U32 level = DDesc.nbBytes;
+    U32 rankVal[HUFv05_ABSOLUTEMAX_TABLELOG + 1];
+    U32 symbolStartPos, s;
+
+    /* local rankVal, will be modified */
+    memcpy(rankVal, rankValOrigin[consumed], sizeof(rankVal));
+
+    /* fill skipped values */
+    if (minWeight>1) {
+        U32 i;
+        const U32 skipSize = rankVal[minWeight];
+        for (i = 0; i < skipSize; i++) {
+            DSequence[i] = baseSeq;
+            DDescription[i] = DDesc;
+    }   }
+
+    /* fill DTable */
+    DDesc.nbBytes++;
+    symbolStartPos = rankStart[minWeight];
+    for (s=symbolStartPos; s<sortedListSize; s++) {
+        const BYTE symbol = sortedSymbols[s].symbol;
+        const U32  weight = sortedSymbols[s].weight;   /* >= 1 (sorted) */
+        const int  nbBits = nbBitsBaseline - weight;   /* >= 1 (by construction) */
+        const int  totalBits = consumed+nbBits;
+        const U32  start  = rankVal[weight];
+        const U32  length = 1 << (sizeLog-nbBits);
+        baseSeq.byte[level] = symbol;
+        DDesc.nbBits = (BYTE)totalBits;
+
+        if ((level<3) && (sizeLog-totalBits >= minBits)) {  /* enough room for another symbol */
+            int nextMinWeight = totalBits + scaleLog;
+            if (nextMinWeight < 1) nextMinWeight = 1;
+            HUFv05_fillDTableX6LevelN(DDescription+start, DSequence+start, sizeLog-nbBits,
+                           rankValOrigin, totalBits, nextMinWeight, maxWeight,
+                           sortedSymbols, sortedListSize, rankStart,
+                           nbBitsBaseline, baseSeq, DDesc);   /* recursive (max : level 3) */
+        } else {
+            U32 i;
+            const U32 end = start + length;
+            for (i = start; i < end; i++) {
+                DDescription[i] = DDesc;
+                DSequence[i] = baseSeq;
+        }   }
+        rankVal[weight] += length;
+    }
+}
+
+
+/* note : same preparation as X4 */
+size_t HUFv05_readDTableX6 (U32* DTable, const void* src, size_t srcSize)
+{
+    BYTE weightList[HUFv05_MAX_SYMBOL_VALUE + 1];
+    sortedSymbol_t sortedSymbol[HUFv05_MAX_SYMBOL_VALUE + 1];
+    U32 rankStats[HUFv05_ABSOLUTEMAX_TABLELOG + 1] = { 0 };
+    U32 rankStart0[HUFv05_ABSOLUTEMAX_TABLELOG + 2] = { 0 };
+    U32* const rankStart = rankStart0+1;
+    U32 tableLog, maxW, sizeOfSort, nbSymbols;
+    rankVal_t rankVal;
+    const U32 memLog = DTable[0];
+    size_t iSize;
+
+    if (memLog > HUFv05_ABSOLUTEMAX_TABLELOG) return ERROR(tableLog_tooLarge);
+    //memset(weightList, 0, sizeof(weightList));   /* is not necessary, even though some analyzer complain ... */
+
+    iSize = HUFv05_readStats(weightList, HUFv05_MAX_SYMBOL_VALUE + 1, rankStats, &nbSymbols, &tableLog, src, srcSize);
+    if (HUFv05_isError(iSize)) return iSize;
+
+    /* check result */
+    if (tableLog > memLog) return ERROR(tableLog_tooLarge);   /* DTable is too small */
+
+    /* find maxWeight */
+    for (maxW = tableLog; rankStats[maxW]==0; maxW--) {}  /* necessarily finds a solution before 0 */
+
+    /* Get start index of each weight */
+    {
+        U32 w, nextRankStart = 0;
+        for (w=1; w<=maxW; w++) {
+            U32 current = nextRankStart;
+            nextRankStart += rankStats[w];
+            rankStart[w] = current;
+        }
+        rankStart[0] = nextRankStart;   /* put all 0w symbols at the end of sorted list*/
+        sizeOfSort = nextRankStart;
+    }
+
+    /* sort symbols by weight */
+    {
+        U32 s;
+        for (s=0; s<nbSymbols; s++) {
+            U32 w = weightList[s];
+            U32 r = rankStart[w]++;
+            sortedSymbol[r].symbol = (BYTE)s;
+            sortedSymbol[r].weight = (BYTE)w;
+        }
+        rankStart[0] = 0;   /* forget 0w symbols; this is beginning of weight(1) */
+    }
+
+    /* Build rankVal */
+    {
+        const U32 minBits = tableLog+1 - maxW;
+        U32 nextRankVal = 0;
+        U32 w, consumed;
+        const int rescale = (memLog-tableLog) - 1;   /* tableLog <= memLog */
+        U32* rankVal0 = rankVal[0];
+        for (w=1; w<=maxW; w++) {
+            U32 current = nextRankVal;
+            nextRankVal += rankStats[w] << (w+rescale);
+            rankVal0[w] = current;
+        }
+        for (consumed = minBits; consumed <= memLog - minBits; consumed++) {
+            U32* rankValPtr = rankVal[consumed];
+            for (w = 1; w <= maxW; w++) {
+                rankValPtr[w] = rankVal0[w] >> consumed;
+    }   }   }
+
+    /* fill tables */
+    {
+        void* ddPtr = DTable+1;
+        HUFv05_DDescX6* DDescription = (HUFv05_DDescX6*)ddPtr;
+        void* dsPtr = DTable + 1 + ((size_t)1<<(memLog-1));
+        HUFv05_DSeqX6* DSequence = (HUFv05_DSeqX6*)dsPtr;
+        HUFv05_DSeqX6 DSeq;
+        HUFv05_DDescX6 DDesc;
+        DSeq.sequence = 0;
+        DDesc.nbBits = 0;
+        DDesc.nbBytes = 0;
+        HUFv05_fillDTableX6LevelN(DDescription, DSequence, memLog,
+                       (const U32 (*)[HUFv05_ABSOLUTEMAX_TABLELOG + 1])rankVal, 0, 1, maxW,
+                       sortedSymbol, sizeOfSort, rankStart0,
+                       tableLog+1, DSeq, DDesc);
+    }
+
+    return iSize;
+}
+
+
+static U32 HUFv05_decodeSymbolX6(void* op, BITv05_DStream_t* DStream, const HUFv05_DDescX6* dd, const HUFv05_DSeqX6* ds, const U32 dtLog)
+{
+    const size_t val = BITv05_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
+    memcpy(op, ds+val, sizeof(HUFv05_DSeqX6));
+    BITv05_skipBits(DStream, dd[val].nbBits);
+    return dd[val].nbBytes;
+}
+
+static U32 HUFv05_decodeLastSymbolsX6(void* op, const U32 maxL, BITv05_DStream_t* DStream,
+                                  const HUFv05_DDescX6* dd, const HUFv05_DSeqX6* ds, const U32 dtLog)
+{
+    const size_t val = BITv05_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
+    U32 length = dd[val].nbBytes;
+    if (length <= maxL) {
+        memcpy(op, ds+val, length);
+        BITv05_skipBits(DStream, dd[val].nbBits);
+        return length;
+    }
+    memcpy(op, ds+val, maxL);
+    if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
+        BITv05_skipBits(DStream, dd[val].nbBits);
+        if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
+            DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);   /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
+    }
+    return maxL;
+}
+
+
+#define HUFv05_DECODE_SYMBOLX6_0(ptr, DStreamPtr) \
+    ptr += HUFv05_decodeSymbolX6(ptr, DStreamPtr, dd, ds, dtLog)
+
+#define HUFv05_DECODE_SYMBOLX6_1(ptr, DStreamPtr) \
+    if (MEM_64bits() || (HUFv05_MAX_TABLELOG<=12)) \
+        HUFv05_DECODE_SYMBOLX6_0(ptr, DStreamPtr)
+
+#define HUFv05_DECODE_SYMBOLX6_2(ptr, DStreamPtr) \
+    if (MEM_64bits()) \
+        HUFv05_DECODE_SYMBOLX6_0(ptr, DStreamPtr)
+
+static inline size_t HUFv05_decodeStreamX6(BYTE* p, BITv05_DStream_t* bitDPtr, BYTE* const pEnd, const U32* DTable, const U32 dtLog)
+{
+    const void* const ddPtr = DTable+1;
+    const HUFv05_DDescX6* dd = (const HUFv05_DDescX6*)ddPtr;
+    const void* const dsPtr = DTable + 1 + ((size_t)1<<(dtLog-1));
+    const HUFv05_DSeqX6* ds = (const HUFv05_DSeqX6*)dsPtr;
+    BYTE* const pStart = p;
+
+    /* up to 16 symbols at a time */
+    while ((BITv05_reloadDStream(bitDPtr) == BITv05_DStream_unfinished) && (p <= pEnd-16)) {
+        HUFv05_DECODE_SYMBOLX6_2(p, bitDPtr);
+        HUFv05_DECODE_SYMBOLX6_1(p, bitDPtr);
+        HUFv05_DECODE_SYMBOLX6_2(p, bitDPtr);
+        HUFv05_DECODE_SYMBOLX6_0(p, bitDPtr);
+    }
+
+    /* closer to the end, up to 4 symbols at a time */
+    while ((BITv05_reloadDStream(bitDPtr) == BITv05_DStream_unfinished) && (p <= pEnd-4))
+        HUFv05_DECODE_SYMBOLX6_0(p, bitDPtr);
+
+    while ((BITv05_reloadDStream(bitDPtr) <= BITv05_DStream_endOfBuffer) && (p < pEnd))
+        p += HUFv05_decodeLastSymbolsX6(p, (U32)(pEnd-p), bitDPtr, dd, ds, dtLog);
+
+    return p-pStart;
+}
+
+
+size_t HUFv05_decompress1X6_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const U32* DTable)
+{
+    const BYTE* const istart = (const BYTE*) cSrc;
+    BYTE* const ostart = (BYTE*) dst;
+    BYTE* const oend = ostart + dstSize;
+
+    const U32 dtLog = DTable[0];
+    size_t errorCode;
+
+    /* Init */
+    BITv05_DStream_t bitD;
+    errorCode = BITv05_initDStream(&bitD, istart, cSrcSize);
+    if (HUFv05_isError(errorCode)) return errorCode;
+
+    /* finish bitStreams one by one */
+    HUFv05_decodeStreamX6(ostart, &bitD, oend, DTable, dtLog);
+
+    /* check */
+    if (!BITv05_endOfDStream(&bitD)) return ERROR(corruption_detected);
+
+    /* decoded size */
+    return dstSize;
+}
+
+size_t HUFv05_decompress1X6 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUFv05_CREATE_STATIC_DTABLEX6(DTable, HUFv05_MAX_TABLELOG);
+    const BYTE* ip = (const BYTE*) cSrc;
+
+    size_t hSize = HUFv05_readDTableX6 (DTable, cSrc, cSrcSize);
+    if (HUFv05_isError(hSize)) return hSize;
+    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += hSize;
+    cSrcSize -= hSize;
+
+    return HUFv05_decompress1X6_usingDTable (dst, dstSize, ip, cSrcSize, DTable);
+}
+
+
+size_t HUFv05_decompress4X6_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const U32* DTable)
+{
+    const BYTE* const istart = (const BYTE*) cSrc;
+    BYTE* const ostart = (BYTE*) dst;
+    BYTE* const oend = ostart + dstSize;
+
+    const U32 dtLog = DTable[0];
+    const void* const ddPtr = DTable+1;
+    const HUFv05_DDescX6* dd = (const HUFv05_DDescX6*)ddPtr;
+    const void* const dsPtr = DTable + 1 + ((size_t)1<<(dtLog-1));
+    const HUFv05_DSeqX6* ds = (const HUFv05_DSeqX6*)dsPtr;
+    size_t errorCode;
+
+    /* Check */
+    if (cSrcSize < 10) return ERROR(corruption_detected);   /* strict minimum : jump table + 1 byte per stream */
+
+    /* Init */
+    BITv05_DStream_t bitD1;
+    BITv05_DStream_t bitD2;
+    BITv05_DStream_t bitD3;
+    BITv05_DStream_t bitD4;
+    const size_t length1 = MEM_readLE16(istart);
+    const size_t length2 = MEM_readLE16(istart+2);
+    const size_t length3 = MEM_readLE16(istart+4);
+    size_t length4;
+    const BYTE* const istart1 = istart + 6;  /* jumpTable */
+    const BYTE* const istart2 = istart1 + length1;
+    const BYTE* const istart3 = istart2 + length2;
+    const BYTE* const istart4 = istart3 + length3;
+    const size_t segmentSize = (dstSize+3) / 4;
+    BYTE* const opStart2 = ostart + segmentSize;
+    BYTE* const opStart3 = opStart2 + segmentSize;
+    BYTE* const opStart4 = opStart3 + segmentSize;
+    BYTE* op1 = ostart;
+    BYTE* op2 = opStart2;
+    BYTE* op3 = opStart3;
+    BYTE* op4 = opStart4;
+    U32 endSignal;
+
+    length4 = cSrcSize - (length1 + length2 + length3 + 6);
+    if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
+    errorCode = BITv05_initDStream(&bitD1, istart1, length1);
+    if (HUFv05_isError(errorCode)) return errorCode;
+    errorCode = BITv05_initDStream(&bitD2, istart2, length2);
+    if (HUFv05_isError(errorCode)) return errorCode;
+    errorCode = BITv05_initDStream(&bitD3, istart3, length3);
+    if (HUFv05_isError(errorCode)) return errorCode;
+    errorCode = BITv05_initDStream(&bitD4, istart4, length4);
+    if (HUFv05_isError(errorCode)) return errorCode;
+
+    /* 16-64 symbols per loop (4-16 symbols per stream) */
+    endSignal = BITv05_reloadDStream(&bitD1) | BITv05_reloadDStream(&bitD2) | BITv05_reloadDStream(&bitD3) | BITv05_reloadDStream(&bitD4);
+    for ( ; (op3 <= opStart4) && (endSignal==BITv05_DStream_unfinished) && (op4<=(oend-16)) ; ) {
+        HUFv05_DECODE_SYMBOLX6_2(op1, &bitD1);
+        HUFv05_DECODE_SYMBOLX6_2(op2, &bitD2);
+        HUFv05_DECODE_SYMBOLX6_2(op3, &bitD3);
+        HUFv05_DECODE_SYMBOLX6_2(op4, &bitD4);
+        HUFv05_DECODE_SYMBOLX6_1(op1, &bitD1);
+        HUFv05_DECODE_SYMBOLX6_1(op2, &bitD2);
+        HUFv05_DECODE_SYMBOLX6_1(op3, &bitD3);
+        HUFv05_DECODE_SYMBOLX6_1(op4, &bitD4);
+        HUFv05_DECODE_SYMBOLX6_2(op1, &bitD1);
+        HUFv05_DECODE_SYMBOLX6_2(op2, &bitD2);
+        HUFv05_DECODE_SYMBOLX6_2(op3, &bitD3);
+        HUFv05_DECODE_SYMBOLX6_2(op4, &bitD4);
+        HUFv05_DECODE_SYMBOLX6_0(op1, &bitD1);
+        HUFv05_DECODE_SYMBOLX6_0(op2, &bitD2);
+        HUFv05_DECODE_SYMBOLX6_0(op3, &bitD3);
+        HUFv05_DECODE_SYMBOLX6_0(op4, &bitD4);
+
+        endSignal = BITv05_reloadDStream(&bitD1) | BITv05_reloadDStream(&bitD2) | BITv05_reloadDStream(&bitD3) | BITv05_reloadDStream(&bitD4);
+    }
+
+    /* check corruption */
+    if (op1 > opStart2) return ERROR(corruption_detected);
+    if (op2 > opStart3) return ERROR(corruption_detected);
+    if (op3 > opStart4) return ERROR(corruption_detected);
+    /* note : op4 supposed already verified within main loop */
+
+    /* finish bitStreams one by one */
+    HUFv05_decodeStreamX6(op1, &bitD1, opStart2, DTable, dtLog);
+    HUFv05_decodeStreamX6(op2, &bitD2, opStart3, DTable, dtLog);
+    HUFv05_decodeStreamX6(op3, &bitD3, opStart4, DTable, dtLog);
+    HUFv05_decodeStreamX6(op4, &bitD4, oend,     DTable, dtLog);
+
+    /* check */
+    endSignal = BITv05_endOfDStream(&bitD1) & BITv05_endOfDStream(&bitD2) & BITv05_endOfDStream(&bitD3) & BITv05_endOfDStream(&bitD4);
+    if (!endSignal) return ERROR(corruption_detected);
+
+    /* decoded size */
+    return dstSize;
+}
+
+
+size_t HUFv05_decompress4X6 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUFv05_CREATE_STATIC_DTABLEX6(DTable, HUFv05_MAX_TABLELOG);
+    const BYTE* ip = (const BYTE*) cSrc;
+
+    size_t hSize = HUFv05_readDTableX6 (DTable, cSrc, cSrcSize);
+    if (HUFv05_isError(hSize)) return hSize;
+    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += hSize;
+    cSrcSize -= hSize;
+
+    return HUFv05_decompress4X6_usingDTable (dst, dstSize, ip, cSrcSize, DTable);
+}
+
+
+/* ********************************/
+/* Generic decompression selector */
+/* ********************************/
+
+typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t;
+static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] =
+{
+    /* single, double, quad */
+    {{0,0}, {1,1}, {2,2}},  /* Q==0 : impossible */
+    {{0,0}, {1,1}, {2,2}},  /* Q==1 : impossible */
+    {{  38,130}, {1313, 74}, {2151, 38}},   /* Q == 2 : 12-18% */
+    {{ 448,128}, {1353, 74}, {2238, 41}},   /* Q == 3 : 18-25% */
+    {{ 556,128}, {1353, 74}, {2238, 47}},   /* Q == 4 : 25-32% */
+    {{ 714,128}, {1418, 74}, {2436, 53}},   /* Q == 5 : 32-38% */
+    {{ 883,128}, {1437, 74}, {2464, 61}},   /* Q == 6 : 38-44% */
+    {{ 897,128}, {1515, 75}, {2622, 68}},   /* Q == 7 : 44-50% */
+    {{ 926,128}, {1613, 75}, {2730, 75}},   /* Q == 8 : 50-56% */
+    {{ 947,128}, {1729, 77}, {3359, 77}},   /* Q == 9 : 56-62% */
+    {{1107,128}, {2083, 81}, {4006, 84}},   /* Q ==10 : 62-69% */
+    {{1177,128}, {2379, 87}, {4785, 88}},   /* Q ==11 : 69-75% */
+    {{1242,128}, {2415, 93}, {5155, 84}},   /* Q ==12 : 75-81% */
+    {{1349,128}, {2644,106}, {5260,106}},   /* Q ==13 : 81-87% */
+    {{1455,128}, {2422,124}, {4174,124}},   /* Q ==14 : 87-93% */
+    {{ 722,128}, {1891,145}, {1936,146}},   /* Q ==15 : 93-99% */
+};
+
+typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
+
+size_t HUFv05_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    static const decompressionAlgo decompress[3] = { HUFv05_decompress4X2, HUFv05_decompress4X4, HUFv05_decompress4X6 };
+    /* estimate decompression time */
+    U32 Q;
+    const U32 D256 = (U32)(dstSize >> 8);
+    U32 Dtime[3];
+    U32 algoNb = 0;
+    int n;
+
+    /* validation checks */
+    if (dstSize == 0) return ERROR(dstSize_tooSmall);
+    if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */
+    if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */
+    if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */
+
+    /* decoder timing evaluation */
+    Q = (U32)(cSrcSize * 16 / dstSize);   /* Q < 16 since dstSize > cSrcSize */
+    for (n=0; n<3; n++)
+        Dtime[n] = algoTime[Q][n].tableTime + (algoTime[Q][n].decode256Time * D256);
+
+    Dtime[1] += Dtime[1] >> 4; Dtime[2] += Dtime[2] >> 3; /* advantage to algorithms using less memory, for cache eviction */
+
+    if (Dtime[1] < Dtime[0]) algoNb = 1;
+    if (Dtime[2] < Dtime[algoNb]) algoNb = 2;
+
+    return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
+
+    //return HUFv05_decompress4X2(dst, dstSize, cSrc, cSrcSize);   /* multi-streams single-symbol decoding */
+    //return HUFv05_decompress4X4(dst, dstSize, cSrc, cSrcSize);   /* multi-streams double-symbols decoding */
+    //return HUFv05_decompress4X6(dst, dstSize, cSrc, cSrcSize);   /* multi-streams quad-symbols decoding */
+}
+/*
+    zstd - standard compression library
+    Copyright (C) 2014-2016, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+*/
+
+/* ***************************************************************
+*  Tuning parameters
+*****************************************************************/
+/*!
+ * HEAPMODE :
+ * Select how default decompression function ZSTDv05_decompress() will allocate memory,
+ * in memory stack (0), or in memory heap (1, requires malloc())
+ */
+#ifndef ZSTDv05_HEAPMODE
+#  define ZSTDv05_HEAPMODE 1
+#endif
+
+
+/*-*******************************************************
+*  Dependencies
+*********************************************************/
+#include <stdlib.h>      /* calloc */
+#include <string.h>      /* memcpy, memmove */
+#include <stdio.h>       /* debug only : printf */
+
+
+/*-*******************************************************
+*  Compiler specifics
+*********************************************************/
+#ifdef _MSC_VER    /* Visual Studio */
+#  define FORCE_INLINE static __forceinline
+#  include <intrin.h>                    /* For Visual 2005 */
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#  pragma warning(disable : 4324)        /* disable: C4324: padded structure */
+#else
+#  define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+#  ifdef __GNUC__
+#    define FORCE_INLINE static inline __attribute__((always_inline))
+#  else
+#    define FORCE_INLINE static inline
+#  endif
+#endif
+
+
+/*-*************************************
+*  Local types
+***************************************/
+typedef struct
+{
+    blockType_t blockType;
+    U32 origSize;
+} blockProperties_t;
+
+
+/* *******************************************************
+*  Memory operations
+**********************************************************/
+static void ZSTDv05_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
+
+
+/* *************************************
+*  Error Management
+***************************************/
+/*! ZSTDv05_isError() :
+*   tells if a return value is an error code */
+unsigned ZSTDv05_isError(size_t code) { return ERR_isError(code); }
+
+/*! ZSTDv05_getError() :
+*   convert a `size_t` function result into a proper ZSTDv05_errorCode enum */
+ZSTDv05_ErrorCode ZSTDv05_getError(size_t code) { return ERR_getError(code); }
+
+/*! ZSTDv05_getErrorName() :
+*   provides error code string (useful for debugging) */
+const char* ZSTDv05_getErrorName(size_t code) { return ERR_getErrorName(code); }
+
+
+/* *************************************************************
+*   Context management
+***************************************************************/
+typedef enum { ZSTDv05ds_getFrameHeaderSize, ZSTDv05ds_decodeFrameHeader,
+               ZSTDv05ds_decodeBlockHeader, ZSTDv05ds_decompressBlock } ZSTDv05_dStage;
+
+struct ZSTDv05_DCtx_s
+{
+    FSEv05_DTable LLTable[FSEv05_DTABLE_SIZE_U32(LLFSEv05Log)];
+    FSEv05_DTable OffTable[FSEv05_DTABLE_SIZE_U32(OffFSEv05Log)];
+    FSEv05_DTable MLTable[FSEv05_DTABLE_SIZE_U32(MLFSEv05Log)];
+    unsigned   hufTableX4[HUFv05_DTABLE_SIZE(HufLog)];
+    const void* previousDstEnd;
+    const void* base;
+    const void* vBase;
+    const void* dictEnd;
+    size_t expected;
+    size_t headerSize;
+    ZSTDv05_parameters params;
+    blockType_t bType;   /* used in ZSTDv05_decompressContinue(), to transfer blockType between header decoding and block decoding stages */
+    ZSTDv05_dStage stage;
+    U32 flagStaticTables;
+    const BYTE* litPtr;
+    size_t litBufSize;
+    size_t litSize;
+    BYTE litBuffer[BLOCKSIZE + WILDCOPY_OVERLENGTH];
+    BYTE headerBuffer[ZSTDv05_frameHeaderSize_max];
+};  /* typedef'd to ZSTDv05_DCtx within "zstd_static.h" */
+
+size_t ZSTDv05_sizeofDCtx (void) { return sizeof(ZSTDv05_DCtx); }
+
+size_t ZSTDv05_decompressBegin(ZSTDv05_DCtx* dctx)
+{
+    dctx->expected = ZSTDv05_frameHeaderSize_min;
+    dctx->stage = ZSTDv05ds_getFrameHeaderSize;
+    dctx->previousDstEnd = NULL;
+    dctx->base = NULL;
+    dctx->vBase = NULL;
+    dctx->dictEnd = NULL;
+    dctx->hufTableX4[0] = HufLog;
+    dctx->flagStaticTables = 0;
+    return 0;
+}
+
+ZSTDv05_DCtx* ZSTDv05_createDCtx(void)
+{
+    ZSTDv05_DCtx* dctx = (ZSTDv05_DCtx*)malloc(sizeof(ZSTDv05_DCtx));
+    if (dctx==NULL) return NULL;
+    ZSTDv05_decompressBegin(dctx);
+    return dctx;
+}
+
+size_t ZSTDv05_freeDCtx(ZSTDv05_DCtx* dctx)
+{
+    free(dctx);
+    return 0;   /* reserved as a potential error code in the future */
+}
+
+void ZSTDv05_copyDCtx(ZSTDv05_DCtx* dstDCtx, const ZSTDv05_DCtx* srcDCtx)
+{
+    memcpy(dstDCtx, srcDCtx,
+           sizeof(ZSTDv05_DCtx) - (BLOCKSIZE+WILDCOPY_OVERLENGTH + ZSTDv05_frameHeaderSize_max));  /* no need to copy workspace */
+}
+
+
+/* *************************************************************
+*   Decompression section
+***************************************************************/
+
+/* Frame format description
+   Frame Header -  [ Block Header - Block ] - Frame End
+   1) Frame Header
+      - 4 bytes - Magic Number : ZSTDv05_MAGICNUMBER (defined within zstd_internal.h)
+      - 1 byte  - Window Descriptor
+   2) Block Header
+      - 3 bytes, starting with a 2-bits descriptor
+                 Uncompressed, Compressed, Frame End, unused
+   3) Block
+      See Block Format Description
+   4) Frame End
+      - 3 bytes, compatible with Block Header
+*/
+
+/* Block format description
+
+   Block = Literal Section - Sequences Section
+   Prerequisite : size of (compressed) block, maximum size of regenerated data
+
+   1) Literal Section
+
+   1.1) Header : 1-5 bytes
+        flags: 2 bits
+            00 compressed by Huff0
+            01 unused
+            10 is Raw (uncompressed)
+            11 is Rle
+            Note : using 01 => Huff0 with precomputed table ?
+            Note : delta map ? => compressed ?
+
+   1.1.1) Huff0-compressed literal block : 3-5 bytes
+            srcSize < 1 KB => 3 bytes (2-2-10-10) => single stream
+            srcSize < 1 KB => 3 bytes (2-2-10-10)
+            srcSize < 16KB => 4 bytes (2-2-14-14)
+            else           => 5 bytes (2-2-18-18)
+            big endian convention
+
+   1.1.2) Raw (uncompressed) literal block header : 1-3 bytes
+        size :  5 bits: (IS_RAW<<6) + (0<<4) + size
+               12 bits: (IS_RAW<<6) + (2<<4) + (size>>8)
+                        size&255
+               20 bits: (IS_RAW<<6) + (3<<4) + (size>>16)
+                        size>>8&255
+                        size&255
+
+   1.1.3) Rle (repeated single byte) literal block header : 1-3 bytes
+        size :  5 bits: (IS_RLE<<6) + (0<<4) + size
+               12 bits: (IS_RLE<<6) + (2<<4) + (size>>8)
+                        size&255
+               20 bits: (IS_RLE<<6) + (3<<4) + (size>>16)
+                        size>>8&255
+                        size&255
+
+   1.1.4) Huff0-compressed literal block, using precomputed CTables : 3-5 bytes
+            srcSize < 1 KB => 3 bytes (2-2-10-10) => single stream
+            srcSize < 1 KB => 3 bytes (2-2-10-10)
+            srcSize < 16KB => 4 bytes (2-2-14-14)
+            else           => 5 bytes (2-2-18-18)
+            big endian convention
+
+        1- CTable available (stored into workspace ?)
+        2- Small input (fast heuristic ? Full comparison ? depend on clevel ?)
+
+
+   1.2) Literal block content
+
+   1.2.1) Huff0 block, using sizes from header
+        See Huff0 format
+
+   1.2.2) Huff0 block, using prepared table
+
+   1.2.3) Raw content
+
+   1.2.4) single byte
+
+
+   2) Sequences section
+      TO DO
+*/
+
+
+/** ZSTDv05_decodeFrameHeader_Part1() :
+*   decode the 1st part of the Frame Header, which tells Frame Header size.
+*   srcSize must be == ZSTDv05_frameHeaderSize_min.
+*   @return : the full size of the Frame Header */
+static size_t ZSTDv05_decodeFrameHeader_Part1(ZSTDv05_DCtx* zc, const void* src, size_t srcSize)
+{
+    U32 magicNumber;
+    if (srcSize != ZSTDv05_frameHeaderSize_min)
+        return ERROR(srcSize_wrong);
+    magicNumber = MEM_readLE32(src);
+    if (magicNumber != ZSTDv05_MAGICNUMBER) return ERROR(prefix_unknown);
+    zc->headerSize = ZSTDv05_frameHeaderSize_min;
+    return zc->headerSize;
+}
+
+
+size_t ZSTDv05_getFrameParams(ZSTDv05_parameters* params, const void* src, size_t srcSize)
+{
+    U32 magicNumber;
+    if (srcSize < ZSTDv05_frameHeaderSize_min) return ZSTDv05_frameHeaderSize_max;
+    magicNumber = MEM_readLE32(src);
+    if (magicNumber != ZSTDv05_MAGICNUMBER) return ERROR(prefix_unknown);
+    memset(params, 0, sizeof(*params));
+    params->windowLog = (((const BYTE*)src)[4] & 15) + ZSTDv05_WINDOWLOG_ABSOLUTEMIN;
+    if ((((const BYTE*)src)[4] >> 4) != 0) return ERROR(frameParameter_unsupported);   /* reserved bits */
+    return 0;
+}
+
+/** ZSTDv05_decodeFrameHeader_Part2() :
+*   decode the full Frame Header.
+*   srcSize must be the size provided by ZSTDv05_decodeFrameHeader_Part1().
+*   @return : 0, or an error code, which can be tested using ZSTDv05_isError() */
+static size_t ZSTDv05_decodeFrameHeader_Part2(ZSTDv05_DCtx* zc, const void* src, size_t srcSize)
+{
+    size_t result;
+    if (srcSize != zc->headerSize)
+        return ERROR(srcSize_wrong);
+    result = ZSTDv05_getFrameParams(&(zc->params), src, srcSize);
+    if ((MEM_32bits()) && (zc->params.windowLog > 25)) return ERROR(frameParameter_unsupportedBy32bits);
+    return result;
+}
+
+
+size_t ZSTDv05_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr)
+{
+    const BYTE* const in = (const BYTE* const)src;
+    BYTE headerFlags;
+    U32 cSize;
+
+    if (srcSize < 3)
+        return ERROR(srcSize_wrong);
+
+    headerFlags = *in;
+    cSize = in[2] + (in[1]<<8) + ((in[0] & 7)<<16);
+
+    bpPtr->blockType = (blockType_t)(headerFlags >> 6);
+    bpPtr->origSize = (bpPtr->blockType == bt_rle) ? cSize : 0;
+
+    if (bpPtr->blockType == bt_end) return 0;
+    if (bpPtr->blockType == bt_rle) return 1;
+    return cSize;
+}
+
+
+static size_t ZSTDv05_copyRawBlock(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    if (srcSize > maxDstSize) return ERROR(dstSize_tooSmall);
+    memcpy(dst, src, srcSize);
+    return srcSize;
+}
+
+
+/*! ZSTDv05_decodeLiteralsBlock() :
+    @return : nb of bytes read from src (< srcSize ) */
+size_t ZSTDv05_decodeLiteralsBlock(ZSTDv05_DCtx* dctx,
+                          const void* src, size_t srcSize)   /* note : srcSize < BLOCKSIZE */
+{
+    const BYTE* const istart = (const BYTE*) src;
+
+    /* any compressed block with literals segment must be at least this size */
+    if (srcSize < MIN_CBLOCK_SIZE) return ERROR(corruption_detected);
+
+    switch(istart[0]>> 6)
+    {
+    case IS_HUFv05:
+        {
+            size_t litSize, litCSize, singleStream=0;
+            U32 lhSize = ((istart[0]) >> 4) & 3;
+            switch(lhSize)
+            {
+            case 0: case 1: default:   /* note : default is impossible, since lhSize into [0..3] */
+                /* 2 - 2 - 10 - 10 */
+                lhSize=3;
+                singleStream = istart[0] & 16;
+                litSize  = ((istart[0] & 15) << 6) + (istart[1] >> 2);
+                litCSize = ((istart[1] &  3) << 8) + istart[2];
+                break;
+            case 2:
+                /* 2 - 2 - 14 - 14 */
+                lhSize=4;
+                litSize  = ((istart[0] & 15) << 10) + (istart[1] << 2) + (istart[2] >> 6);
+                litCSize = ((istart[2] & 63) <<  8) + istart[3];
+                break;
+            case 3:
+                /* 2 - 2 - 18 - 18 */
+                lhSize=5;
+                litSize  = ((istart[0] & 15) << 14) + (istart[1] << 6) + (istart[2] >> 2);
+                litCSize = ((istart[2] &  3) << 16) + (istart[3] << 8) + istart[4];
+                break;
+            }
+            if (litSize > BLOCKSIZE) return ERROR(corruption_detected);
+
+            if (HUFv05_isError(singleStream ?
+                            HUFv05_decompress1X2(dctx->litBuffer, litSize, istart+lhSize, litCSize) :
+                            HUFv05_decompress   (dctx->litBuffer, litSize, istart+lhSize, litCSize) ))
+                return ERROR(corruption_detected);
+
+            dctx->litPtr = dctx->litBuffer;
+            dctx->litBufSize = BLOCKSIZE+8;
+            dctx->litSize = litSize;
+            return litCSize + lhSize;
+        }
+    case IS_PCH:
+        {
+            size_t errorCode;
+            size_t litSize, litCSize;
+            U32 lhSize = ((istart[0]) >> 4) & 3;
+            if (lhSize != 1)  /* only case supported for now : small litSize, single stream */
+                return ERROR(corruption_detected);
+            if (!dctx->flagStaticTables)
+                return ERROR(dictionary_corrupted);
+
+            /* 2 - 2 - 10 - 10 */
+            lhSize=3;
+            litSize  = ((istart[0] & 15) << 6) + (istart[1] >> 2);
+            litCSize = ((istart[1] &  3) << 8) + istart[2];
+
+            errorCode = HUFv05_decompress1X4_usingDTable(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->hufTableX4);
+            if (HUFv05_isError(errorCode)) return ERROR(corruption_detected);
+
+            dctx->litPtr = dctx->litBuffer;
+            dctx->litBufSize = BLOCKSIZE+WILDCOPY_OVERLENGTH;
+            dctx->litSize = litSize;
+            return litCSize + lhSize;
+        }
+    case IS_RAW:
+        {
+            size_t litSize;
+            U32 lhSize = ((istart[0]) >> 4) & 3;
+            switch(lhSize)
+            {
+            case 0: case 1: default:   /* note : default is impossible, since lhSize into [0..3] */
+                lhSize=1;
+                litSize = istart[0] & 31;
+                break;
+            case 2:
+                litSize = ((istart[0] & 15) << 8) + istart[1];
+                break;
+            case 3:
+                litSize = ((istart[0] & 15) << 16) + (istart[1] << 8) + istart[2];
+                break;
+            }
+
+            if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) {  /* risk reading beyond src buffer with wildcopy */
+                if (litSize+lhSize > srcSize) return ERROR(corruption_detected);
+                memcpy(dctx->litBuffer, istart+lhSize, litSize);
+                dctx->litPtr = dctx->litBuffer;
+                dctx->litBufSize = BLOCKSIZE+8;
+                dctx->litSize = litSize;
+                return lhSize+litSize;
+            }
+            /* direct reference into compressed stream */
+            dctx->litPtr = istart+lhSize;
+            dctx->litBufSize = srcSize-lhSize;
+            dctx->litSize = litSize;
+            return lhSize+litSize;
+        }
+    case IS_RLE:
+        {
+            size_t litSize;
+            U32 lhSize = ((istart[0]) >> 4) & 3;
+            switch(lhSize)
+            {
+            case 0: case 1: default:   /* note : default is impossible, since lhSize into [0..3] */
+                lhSize = 1;
+                litSize = istart[0] & 31;
+                break;
+            case 2:
+                litSize = ((istart[0] & 15) << 8) + istart[1];
+                break;
+            case 3:
+                litSize = ((istart[0] & 15) << 16) + (istart[1] << 8) + istart[2];
+                break;
+            }
+            if (litSize > BLOCKSIZE) return ERROR(corruption_detected);
+            memset(dctx->litBuffer, istart[lhSize], litSize);
+            dctx->litPtr = dctx->litBuffer;
+            dctx->litBufSize = BLOCKSIZE+WILDCOPY_OVERLENGTH;
+            dctx->litSize = litSize;
+            return lhSize+1;
+        }
+    default:
+        return ERROR(corruption_detected);   /* impossible */
+    }
+}
+
+
+size_t ZSTDv05_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLengthPtr,
+                         FSEv05_DTable* DTableLL, FSEv05_DTable* DTableML, FSEv05_DTable* DTableOffb,
+                         const void* src, size_t srcSize)
+{
+    const BYTE* const istart = (const BYTE* const)src;
+    const BYTE* ip = istart;
+    const BYTE* const iend = istart + srcSize;
+    U32 LLtype, Offtype, MLtype;
+    U32 LLlog, Offlog, MLlog;
+    size_t dumpsLength;
+
+    /* check */
+    if (srcSize < MIN_SEQUENCES_SIZE)
+        return ERROR(srcSize_wrong);
+
+    /* SeqHead */
+    *nbSeq = *ip++;
+    if (*nbSeq==0) return 1;
+    if (*nbSeq >= 128)
+        *nbSeq = ((nbSeq[0]-128)<<8) + *ip++;
+
+    LLtype  = *ip >> 6;
+    Offtype = (*ip >> 4) & 3;
+    MLtype  = (*ip >> 2) & 3;
+    if (*ip & 2) {
+        dumpsLength  = ip[2];
+        dumpsLength += ip[1] << 8;
+        ip += 3;
+    } else {
+        dumpsLength  = ip[1];
+        dumpsLength += (ip[0] & 1) << 8;
+        ip += 2;
+    }
+    *dumpsPtr = ip;
+    ip += dumpsLength;
+    *dumpsLengthPtr = dumpsLength;
+
+    /* check */
+    if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */
+
+    /* sequences */
+    {
+        S16 norm[MaxML+1];    /* assumption : MaxML >= MaxLL >= MaxOff */
+        size_t headerSize;
+
+        /* Build DTables */
+        switch(LLtype)
+        {
+        U32 max;
+        case FSEv05_ENCODING_RLE :
+            LLlog = 0;
+            FSEv05_buildDTable_rle(DTableLL, *ip++);
+            break;
+        case FSEv05_ENCODING_RAW :
+            LLlog = LLbits;
+            FSEv05_buildDTable_raw(DTableLL, LLbits);
+            break;
+        case FSEv05_ENCODING_STATIC:
+            break;
+        case FSEv05_ENCODING_DYNAMIC :
+        default :   /* impossible */
+            max = MaxLL;
+            headerSize = FSEv05_readNCount(norm, &max, &LLlog, ip, iend-ip);
+            if (FSEv05_isError(headerSize)) return ERROR(GENERIC);
+            if (LLlog > LLFSEv05Log) return ERROR(corruption_detected);
+            ip += headerSize;
+            FSEv05_buildDTable(DTableLL, norm, max, LLlog);
+        }
+
+        switch(Offtype)
+        {
+        U32 max;
+        case FSEv05_ENCODING_RLE :
+            Offlog = 0;
+            if (ip > iend-2) return ERROR(srcSize_wrong);   /* min : "raw", hence no header, but at least xxLog bits */
+            FSEv05_buildDTable_rle(DTableOffb, *ip++ & MaxOff); /* if *ip > MaxOff, data is corrupted */
+            break;
+        case FSEv05_ENCODING_RAW :
+            Offlog = Offbits;
+            FSEv05_buildDTable_raw(DTableOffb, Offbits);
+            break;
+        case FSEv05_ENCODING_STATIC:
+            break;
+        case FSEv05_ENCODING_DYNAMIC :
+        default :   /* impossible */
+            max = MaxOff;
+            headerSize = FSEv05_readNCount(norm, &max, &Offlog, ip, iend-ip);
+            if (FSEv05_isError(headerSize)) return ERROR(GENERIC);
+            if (Offlog > OffFSEv05Log) return ERROR(corruption_detected);
+            ip += headerSize;
+            FSEv05_buildDTable(DTableOffb, norm, max, Offlog);
+        }
+
+        switch(MLtype)
+        {
+        U32 max;
+        case FSEv05_ENCODING_RLE :
+            MLlog = 0;
+            if (ip > iend-2) return ERROR(srcSize_wrong); /* min : "raw", hence no header, but at least xxLog bits */
+            FSEv05_buildDTable_rle(DTableML, *ip++);
+            break;
+        case FSEv05_ENCODING_RAW :
+            MLlog = MLbits;
+            FSEv05_buildDTable_raw(DTableML, MLbits);
+            break;
+        case FSEv05_ENCODING_STATIC:
+            break;
+        case FSEv05_ENCODING_DYNAMIC :
+        default :   /* impossible */
+            max = MaxML;
+            headerSize = FSEv05_readNCount(norm, &max, &MLlog, ip, iend-ip);
+            if (FSEv05_isError(headerSize)) return ERROR(GENERIC);
+            if (MLlog > MLFSEv05Log) return ERROR(corruption_detected);
+            ip += headerSize;
+            FSEv05_buildDTable(DTableML, norm, max, MLlog);
+    }   }
+
+    return ip-istart;
+}
+
+
+typedef struct {
+    size_t litLength;
+    size_t matchLength;
+    size_t offset;
+} seq_t;
+
+typedef struct {
+    BITv05_DStream_t DStream;
+    FSEv05_DState_t stateLL;
+    FSEv05_DState_t stateOffb;
+    FSEv05_DState_t stateML;
+    size_t prevOffset;
+    const BYTE* dumps;
+    const BYTE* dumpsEnd;
+} seqState_t;
+
+
+
+static void ZSTDv05_decodeSequence(seq_t* seq, seqState_t* seqState)
+{
+    size_t litLength;
+    size_t prevOffset;
+    size_t offset;
+    size_t matchLength;
+    const BYTE* dumps = seqState->dumps;
+    const BYTE* const de = seqState->dumpsEnd;
+
+    /* Literal length */
+    litLength = FSEv05_peakSymbol(&(seqState->stateLL));
+    prevOffset = litLength ? seq->offset : seqState->prevOffset;
+    if (litLength == MaxLL) {
+        U32 add = *dumps++;
+        if (add < 255) litLength += add;
+        else {
+            litLength = MEM_readLE32(dumps) & 0xFFFFFF;  /* no risk : dumps is always followed by seq tables > 1 byte */
+            if (litLength&1) litLength>>=1, dumps += 3;
+            else litLength = (U16)(litLength)>>1, dumps += 2;
+        }
+        if (dumps >= de) dumps = de-1;   /* late correction, to avoid read overflow (data is now corrupted anyway) */
+    }
+
+    /* Offset */
+    {
+        static const U32 offsetPrefix[MaxOff+1] = {
+                1 /*fake*/, 1, 2, 4, 8, 16, 32, 64, 128, 256,
+                512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, 131072, 262144,
+                524288, 1048576, 2097152, 4194304, 8388608, 16777216, 33554432, /*fake*/ 1, 1, 1, 1, 1 };
+        U32 offsetCode = FSEv05_peakSymbol(&(seqState->stateOffb));   /* <= maxOff, by table construction */
+        U32 nbBits = offsetCode - 1;
+        if (offsetCode==0) nbBits = 0;   /* cmove */
+        offset = offsetPrefix[offsetCode] + BITv05_readBits(&(seqState->DStream), nbBits);
+        if (MEM_32bits()) BITv05_reloadDStream(&(seqState->DStream));
+        if (offsetCode==0) offset = prevOffset;   /* repcode, cmove */
+        if (offsetCode | !litLength) seqState->prevOffset = seq->offset;   /* cmove */
+        FSEv05_decodeSymbol(&(seqState->stateOffb), &(seqState->DStream));    /* update */
+    }
+
+    /* Literal length update */
+    FSEv05_decodeSymbol(&(seqState->stateLL), &(seqState->DStream));   /* update */
+    if (MEM_32bits()) BITv05_reloadDStream(&(seqState->DStream));
+
+    /* MatchLength */
+    matchLength = FSEv05_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
+    if (matchLength == MaxML) {
+        U32 add = *dumps++;
+        if (add < 255) matchLength += add;
+        else {
+            matchLength = MEM_readLE32(dumps) & 0xFFFFFF;  /* no pb : dumps is always followed by seq tables > 1 byte */
+            if (matchLength&1) matchLength>>=1, dumps += 3;
+            else matchLength = (U16)(matchLength)>>1, dumps += 2;
+        }
+        if (dumps >= de) dumps = de-1;   /* late correction, to avoid read overflow (data is now corrupted anyway) */
+    }
+    matchLength += MINMATCH;
+
+    /* save result */
+    seq->litLength = litLength;
+    seq->offset = offset;
+    seq->matchLength = matchLength;
+    seqState->dumps = dumps;
+
+#if 0   /* debug */
+    {
+        static U64 totalDecoded = 0;
+        printf("pos %6u : %3u literals & match %3u bytes at distance %6u \n",
+           (U32)(totalDecoded), (U32)litLength, (U32)matchLength, (U32)offset);
+        totalDecoded += litLength + matchLength;
+    }
+#endif
+}
+
+
+static size_t ZSTDv05_execSequence(BYTE* op,
+                                BYTE* const oend, seq_t sequence,
+                                const BYTE** litPtr, const BYTE* const litLimit_8,
+                                const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd)
+{
+    static const int dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 };   /* added */
+    static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 };   /* substracted */
+    BYTE* const oLitEnd = op + sequence.litLength;
+    const size_t sequenceLength = sequence.litLength + sequence.matchLength;
+    BYTE* const oMatchEnd = op + sequenceLength;   /* risk : address space overflow (32-bits) */
+    BYTE* const oend_8 = oend-8;
+    const BYTE* const litEnd = *litPtr + sequence.litLength;
+    const BYTE* match = oLitEnd - sequence.offset;
+
+    /* check */
+    if (oLitEnd > oend_8) return ERROR(dstSize_tooSmall);   /* last match must start at a minimum distance of 8 from oend */
+    if (oMatchEnd > oend) return ERROR(dstSize_tooSmall);   /* overwrite beyond dst buffer */
+    if (litEnd > litLimit_8) return ERROR(corruption_detected);   /* risk read beyond lit buffer */
+
+    /* copy Literals */
+    ZSTDv05_wildcopy(op, *litPtr, sequence.litLength);   /* note : oLitEnd <= oend-8 : no risk of overwrite beyond oend */
+    op = oLitEnd;
+    *litPtr = litEnd;   /* update for next sequence */
+
+    /* copy Match */
+    if (sequence.offset > (size_t)(oLitEnd - base)) {
+        /* offset beyond prefix */
+        if (sequence.offset > (size_t)(oLitEnd - vBase))
+            return ERROR(corruption_detected);
+        match = dictEnd - (base-match);
+        if (match + sequence.matchLength <= dictEnd) {
+            memmove(oLitEnd, match, sequence.matchLength);
+            return sequenceLength;
+        }
+        /* span extDict & currentPrefixSegment */
+        {
+            size_t length1 = dictEnd - match;
+            memmove(oLitEnd, match, length1);
+            op = oLitEnd + length1;
+            sequence.matchLength -= length1;
+            match = base;
+    }   }
+
+    /* match within prefix */
+    if (sequence.offset < 8) {
+        /* close range match, overlap */
+        const int sub2 = dec64table[sequence.offset];
+        op[0] = match[0];
+        op[1] = match[1];
+        op[2] = match[2];
+        op[3] = match[3];
+        match += dec32table[sequence.offset];
+        ZSTDv05_copy4(op+4, match);
+        match -= sub2;
+    } else {
+        ZSTDv05_copy8(op, match);
+    }
+    op += 8; match += 8;
+
+    if (oMatchEnd > oend-12) {
+        if (op < oend_8) {
+            ZSTDv05_wildcopy(op, match, oend_8 - op);
+            match += oend_8 - op;
+            op = oend_8;
+        }
+        while (op < oMatchEnd)
+            *op++ = *match++;
+    } else {
+        ZSTDv05_wildcopy(op, match, sequence.matchLength-8);   /* works even if matchLength < 8 */
+    }
+    return sequenceLength;
+}
+
+
+static size_t ZSTDv05_decompressSequences(
+                               ZSTDv05_DCtx* dctx,
+                               void* dst, size_t maxDstSize,
+                         const void* seqStart, size_t seqSize)
+{
+    const BYTE* ip = (const BYTE*)seqStart;
+    const BYTE* const iend = ip + seqSize;
+    BYTE* const ostart = (BYTE* const)dst;
+    BYTE* op = ostart;
+    BYTE* const oend = ostart + maxDstSize;
+    size_t errorCode, dumpsLength;
+    const BYTE* litPtr = dctx->litPtr;
+    const BYTE* const litLimit_8 = litPtr + dctx->litBufSize - 8;
+    const BYTE* const litEnd = litPtr + dctx->litSize;
+    int nbSeq;
+    const BYTE* dumps;
+    U32* DTableLL = dctx->LLTable;
+    U32* DTableML = dctx->MLTable;
+    U32* DTableOffb = dctx->OffTable;
+    const BYTE* const base = (const BYTE*) (dctx->base);
+    const BYTE* const vBase = (const BYTE*) (dctx->vBase);
+    const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
+
+    /* Build Decoding Tables */
+    errorCode = ZSTDv05_decodeSeqHeaders(&nbSeq, &dumps, &dumpsLength,
+                                      DTableLL, DTableML, DTableOffb,
+                                      ip, seqSize);
+    if (ZSTDv05_isError(errorCode)) return errorCode;
+    ip += errorCode;
+
+    /* Regen sequences */
+    if (nbSeq) {
+        seq_t sequence;
+        seqState_t seqState;
+
+        memset(&sequence, 0, sizeof(sequence));
+        sequence.offset = REPCODE_STARTVALUE;
+        seqState.dumps = dumps;
+        seqState.dumpsEnd = dumps + dumpsLength;
+        seqState.prevOffset = REPCODE_STARTVALUE;
+        errorCode = BITv05_initDStream(&(seqState.DStream), ip, iend-ip);
+        if (ERR_isError(errorCode)) return ERROR(corruption_detected);
+        FSEv05_initDState(&(seqState.stateLL), &(seqState.DStream), DTableLL);
+        FSEv05_initDState(&(seqState.stateOffb), &(seqState.DStream), DTableOffb);
+        FSEv05_initDState(&(seqState.stateML), &(seqState.DStream), DTableML);
+
+        for ( ; (BITv05_reloadDStream(&(seqState.DStream)) <= BITv05_DStream_completed) && nbSeq ; ) {
+            size_t oneSeqSize;
+            nbSeq--;
+            ZSTDv05_decodeSequence(&sequence, &seqState);
+            oneSeqSize = ZSTDv05_execSequence(op, oend, sequence, &litPtr, litLimit_8, base, vBase, dictEnd);
+            if (ZSTDv05_isError(oneSeqSize)) return oneSeqSize;
+            op += oneSeqSize;
+        }
+
+        /* check if reached exact end */
+        if (nbSeq) return ERROR(corruption_detected);
+    }
+
+    /* last literal segment */
+    {
+        size_t lastLLSize = litEnd - litPtr;
+        if (litPtr > litEnd) return ERROR(corruption_detected);   /* too many literals already used */
+        if (op+lastLLSize > oend) return ERROR(dstSize_tooSmall);
+        memcpy(op, litPtr, lastLLSize);
+        op += lastLLSize;
+    }
+
+    return op-ostart;
+}
+
+
+static void ZSTDv05_checkContinuity(ZSTDv05_DCtx* dctx, const void* dst)
+{
+    if (dst != dctx->previousDstEnd) {   /* not contiguous */
+        dctx->dictEnd = dctx->previousDstEnd;
+        dctx->vBase = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->base));
+        dctx->base = dst;
+        dctx->previousDstEnd = dst;
+    }
+}
+
+
+static size_t ZSTDv05_decompressBlock_internal(ZSTDv05_DCtx* dctx,
+                            void* dst, size_t dstCapacity,
+                      const void* src, size_t srcSize)
+{   /* blockType == blockCompressed */
+    const BYTE* ip = (const BYTE*)src;
+    size_t litCSize;
+
+    if (srcSize >= BLOCKSIZE) return ERROR(srcSize_wrong);
+
+    /* Decode literals sub-block */
+    litCSize = ZSTDv05_decodeLiteralsBlock(dctx, src, srcSize);
+    if (ZSTDv05_isError(litCSize)) return litCSize;
+    ip += litCSize;
+    srcSize -= litCSize;
+
+    return ZSTDv05_decompressSequences(dctx, dst, dstCapacity, ip, srcSize);
+}
+
+
+size_t ZSTDv05_decompressBlock(ZSTDv05_DCtx* dctx,
+                            void* dst, size_t dstCapacity,
+                      const void* src, size_t srcSize)
+{
+    ZSTDv05_checkContinuity(dctx, dst);
+    return ZSTDv05_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize);
+}
+
+
+/*! ZSTDv05_decompress_continueDCtx
+*   dctx must have been properly initialized */
+static size_t ZSTDv05_decompress_continueDCtx(ZSTDv05_DCtx* dctx,
+                                 void* dst, size_t maxDstSize,
+                                 const void* src, size_t srcSize)
+{
+    const BYTE* ip = (const BYTE*)src;
+    const BYTE* iend = ip + srcSize;
+    BYTE* const ostart = (BYTE* const)dst;
+    BYTE* op = ostart;
+    BYTE* const oend = ostart + maxDstSize;
+    size_t remainingSize = srcSize;
+    blockProperties_t blockProperties;
+
+    /* Frame Header */
+    {
+        size_t frameHeaderSize;
+        if (srcSize < ZSTDv05_frameHeaderSize_min+ZSTDv05_blockHeaderSize) return ERROR(srcSize_wrong);
+        frameHeaderSize = ZSTDv05_decodeFrameHeader_Part1(dctx, src, ZSTDv05_frameHeaderSize_min);
+        if (ZSTDv05_isError(frameHeaderSize)) return frameHeaderSize;
+        if (srcSize < frameHeaderSize+ZSTDv05_blockHeaderSize) return ERROR(srcSize_wrong);
+        ip += frameHeaderSize; remainingSize -= frameHeaderSize;
+        frameHeaderSize = ZSTDv05_decodeFrameHeader_Part2(dctx, src, frameHeaderSize);
+        if (ZSTDv05_isError(frameHeaderSize)) return frameHeaderSize;
+    }
+
+    /* Loop on each block */
+    while (1)
+    {
+        size_t decodedSize=0;
+        size_t cBlockSize = ZSTDv05_getcBlockSize(ip, iend-ip, &blockProperties);
+        if (ZSTDv05_isError(cBlockSize)) return cBlockSize;
+
+        ip += ZSTDv05_blockHeaderSize;
+        remainingSize -= ZSTDv05_blockHeaderSize;
+        if (cBlockSize > remainingSize) return ERROR(srcSize_wrong);
+
+        switch(blockProperties.blockType)
+        {
+        case bt_compressed:
+            decodedSize = ZSTDv05_decompressBlock_internal(dctx, op, oend-op, ip, cBlockSize);
+            break;
+        case bt_raw :
+            decodedSize = ZSTDv05_copyRawBlock(op, oend-op, ip, cBlockSize);
+            break;
+        case bt_rle :
+            return ERROR(GENERIC);   /* not yet supported */
+            break;
+        case bt_end :
+            /* end of frame */
+            if (remainingSize) return ERROR(srcSize_wrong);
+            break;
+        default:
+            return ERROR(GENERIC);   /* impossible */
+        }
+        if (cBlockSize == 0) break;   /* bt_end */
+
+        if (ZSTDv05_isError(decodedSize)) return decodedSize;
+        op += decodedSize;
+        ip += cBlockSize;
+        remainingSize -= cBlockSize;
+    }
+
+    return op-ostart;
+}
+
+
+size_t ZSTDv05_decompress_usingPreparedDCtx(ZSTDv05_DCtx* dctx, const ZSTDv05_DCtx* refDCtx,
+                                         void* dst, size_t maxDstSize,
+                                   const void* src, size_t srcSize)
+{
+    ZSTDv05_copyDCtx(dctx, refDCtx);
+    ZSTDv05_checkContinuity(dctx, dst);
+    return ZSTDv05_decompress_continueDCtx(dctx, dst, maxDstSize, src, srcSize);
+}
+
+
+size_t ZSTDv05_decompress_usingDict(ZSTDv05_DCtx* dctx,
+                                 void* dst, size_t maxDstSize,
+                                 const void* src, size_t srcSize,
+                                 const void* dict, size_t dictSize)
+{
+    ZSTDv05_decompressBegin_usingDict(dctx, dict, dictSize);
+    ZSTDv05_checkContinuity(dctx, dst);
+    return ZSTDv05_decompress_continueDCtx(dctx, dst, maxDstSize, src, srcSize);
+}
+
+
+size_t ZSTDv05_decompressDCtx(ZSTDv05_DCtx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    return ZSTDv05_decompress_usingDict(dctx, dst, maxDstSize, src, srcSize, NULL, 0);
+}
+
+size_t ZSTDv05_decompress(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+#if defined(ZSTDv05_HEAPMODE) && (ZSTDv05_HEAPMODE==1)
+    size_t regenSize;
+    ZSTDv05_DCtx* dctx = ZSTDv05_createDCtx();
+    if (dctx==NULL) return ERROR(memory_allocation);
+    regenSize = ZSTDv05_decompressDCtx(dctx, dst, maxDstSize, src, srcSize);
+    ZSTDv05_freeDCtx(dctx);
+    return regenSize;
+#else
+    ZSTDv05_DCtx dctx;
+    return ZSTDv05_decompressDCtx(&dctx, dst, maxDstSize, src, srcSize);
+#endif
+}
+
+
+/* ******************************
+*  Streaming Decompression API
+********************************/
+size_t ZSTDv05_nextSrcSizeToDecompress(ZSTDv05_DCtx* dctx)
+{
+    return dctx->expected;
+}
+
+size_t ZSTDv05_decompressContinue(ZSTDv05_DCtx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    /* Sanity check */
+    if (srcSize != dctx->expected) return ERROR(srcSize_wrong);
+    ZSTDv05_checkContinuity(dctx, dst);
+
+    /* Decompress : frame header; part 1 */
+    switch (dctx->stage)
+    {
+    case ZSTDv05ds_getFrameHeaderSize :
+        {
+            /* get frame header size */
+            if (srcSize != ZSTDv05_frameHeaderSize_min) return ERROR(srcSize_wrong);   /* impossible */
+            dctx->headerSize = ZSTDv05_decodeFrameHeader_Part1(dctx, src, ZSTDv05_frameHeaderSize_min);
+            if (ZSTDv05_isError(dctx->headerSize)) return dctx->headerSize;
+            memcpy(dctx->headerBuffer, src, ZSTDv05_frameHeaderSize_min);
+            if (dctx->headerSize > ZSTDv05_frameHeaderSize_min) {
+                dctx->expected = dctx->headerSize - ZSTDv05_frameHeaderSize_min;
+                dctx->stage = ZSTDv05ds_decodeFrameHeader;
+                return 0;
+            }
+            dctx->expected = 0;   /* not necessary to copy more */
+        }
+    case ZSTDv05ds_decodeFrameHeader:
+        {
+            /* get frame header */
+            size_t result;
+            memcpy(dctx->headerBuffer + ZSTDv05_frameHeaderSize_min, src, dctx->expected);
+            result = ZSTDv05_decodeFrameHeader_Part2(dctx, dctx->headerBuffer, dctx->headerSize);
+            if (ZSTDv05_isError(result)) return result;
+            dctx->expected = ZSTDv05_blockHeaderSize;
+            dctx->stage = ZSTDv05ds_decodeBlockHeader;
+            return 0;
+        }
+    case ZSTDv05ds_decodeBlockHeader:
+        {
+            /* Decode block header */
+            blockProperties_t bp;
+            size_t blockSize = ZSTDv05_getcBlockSize(src, ZSTDv05_blockHeaderSize, &bp);
+            if (ZSTDv05_isError(blockSize)) return blockSize;
+            if (bp.blockType == bt_end) {
+                dctx->expected = 0;
+                dctx->stage = ZSTDv05ds_getFrameHeaderSize;
+            }
+            else {
+                dctx->expected = blockSize;
+                dctx->bType = bp.blockType;
+                dctx->stage = ZSTDv05ds_decompressBlock;
+            }
+            return 0;
+        }
+    case ZSTDv05ds_decompressBlock:
+        {
+            /* Decompress : block content */
+            size_t rSize;
+            switch(dctx->bType)
+            {
+            case bt_compressed:
+                rSize = ZSTDv05_decompressBlock_internal(dctx, dst, maxDstSize, src, srcSize);
+                break;
+            case bt_raw :
+                rSize = ZSTDv05_copyRawBlock(dst, maxDstSize, src, srcSize);
+                break;
+            case bt_rle :
+                return ERROR(GENERIC);   /* not yet handled */
+                break;
+            case bt_end :   /* should never happen (filtered at phase 1) */
+                rSize = 0;
+                break;
+            default:
+                return ERROR(GENERIC);   /* impossible */
+            }
+            dctx->stage = ZSTDv05ds_decodeBlockHeader;
+            dctx->expected = ZSTDv05_blockHeaderSize;
+            dctx->previousDstEnd = (char*)dst + rSize;
+            return rSize;
+        }
+    default:
+        return ERROR(GENERIC);   /* impossible */
+    }
+}
+
+
+static void ZSTDv05_refDictContent(ZSTDv05_DCtx* dctx, const void* dict, size_t dictSize)
+{
+    dctx->dictEnd = dctx->previousDstEnd;
+    dctx->vBase = (const char*)dict - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->base));
+    dctx->base = dict;
+    dctx->previousDstEnd = (const char*)dict + dictSize;
+}
+
+static size_t ZSTDv05_loadEntropy(ZSTDv05_DCtx* dctx, const void* dict, size_t dictSize)
+{
+    size_t hSize, offcodeHeaderSize, matchlengthHeaderSize, errorCode, litlengthHeaderSize;
+    short offcodeNCount[MaxOff+1];
+    U32 offcodeMaxValue=MaxOff, offcodeLog=OffFSEv05Log;
+    short matchlengthNCount[MaxML+1];
+    unsigned matchlengthMaxValue = MaxML, matchlengthLog = MLFSEv05Log;
+    short litlengthNCount[MaxLL+1];
+    unsigned litlengthMaxValue = MaxLL, litlengthLog = LLFSEv05Log;
+
+    hSize = HUFv05_readDTableX4(dctx->hufTableX4, dict, dictSize);
+    if (HUFv05_isError(hSize)) return ERROR(dictionary_corrupted);
+    dict = (const char*)dict + hSize;
+    dictSize -= hSize;
+
+    offcodeHeaderSize = FSEv05_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dict, dictSize);
+    if (FSEv05_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted);
+    errorCode = FSEv05_buildDTable(dctx->OffTable, offcodeNCount, offcodeMaxValue, offcodeLog);
+    if (FSEv05_isError(errorCode)) return ERROR(dictionary_corrupted);
+    dict = (const char*)dict + offcodeHeaderSize;
+    dictSize -= offcodeHeaderSize;
+
+    matchlengthHeaderSize = FSEv05_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dict, dictSize);
+    if (FSEv05_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted);
+    errorCode = FSEv05_buildDTable(dctx->MLTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog);
+    if (FSEv05_isError(errorCode)) return ERROR(dictionary_corrupted);
+    dict = (const char*)dict + matchlengthHeaderSize;
+    dictSize -= matchlengthHeaderSize;
+
+    litlengthHeaderSize = FSEv05_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dict, dictSize);
+    if (FSEv05_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted);
+    errorCode = FSEv05_buildDTable(dctx->LLTable, litlengthNCount, litlengthMaxValue, litlengthLog);
+    if (FSEv05_isError(errorCode)) return ERROR(dictionary_corrupted);
+
+    dctx->flagStaticTables = 1;
+    return hSize + offcodeHeaderSize + matchlengthHeaderSize + litlengthHeaderSize;
+}
+
+static size_t ZSTDv05_decompress_insertDictionary(ZSTDv05_DCtx* dctx, const void* dict, size_t dictSize)
+{
+    size_t eSize;
+    U32 magic = MEM_readLE32(dict);
+    if (magic != ZSTDv05_DICT_MAGIC) {
+        /* pure content mode */
+        ZSTDv05_refDictContent(dctx, dict, dictSize);
+        return 0;
+    }
+    /* load entropy tables */
+    dict = (const char*)dict + 4;
+    dictSize -= 4;
+    eSize = ZSTDv05_loadEntropy(dctx, dict, dictSize);
+    if (ZSTDv05_isError(eSize)) return ERROR(dictionary_corrupted);
+
+    /* reference dictionary content */
+    dict = (const char*)dict + eSize;
+    dictSize -= eSize;
+    ZSTDv05_refDictContent(dctx, dict, dictSize);
+
+    return 0;
+}
+
+
+size_t ZSTDv05_decompressBegin_usingDict(ZSTDv05_DCtx* dctx, const void* dict, size_t dictSize)
+{
+    size_t errorCode;
+    errorCode = ZSTDv05_decompressBegin(dctx);
+    if (ZSTDv05_isError(errorCode)) return errorCode;
+
+    if (dict && dictSize) {
+        errorCode = ZSTDv05_decompress_insertDictionary(dctx, dict, dictSize);
+        if (ZSTDv05_isError(errorCode)) return ERROR(dictionary_corrupted);
+    }
+
+    return 0;
+}
+
+/*
+    Buffered version of Zstd compression library
+    Copyright (C) 2015-2016, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+/* The objects defined into this file should be considered experimental.
+ * They are not labelled stable, as their prototype may change in the future.
+ * You can use them for tests, provide feedback, or if you can endure risk of future changes.
+ */
+
+
+
+/* *************************************
+*  Constants
+***************************************/
+static size_t ZBUFFv05_blockHeaderSize = 3;
+
+
+
+/* *** Compression *** */
+
+static size_t ZBUFFv05_limitCopy(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    size_t length = MIN(maxDstSize, srcSize);
+    memcpy(dst, src, length);
+    return length;
+}
+
+
+
+
+/** ************************************************
+*  Streaming decompression
+*
+*  A ZBUFFv05_DCtx object is required to track streaming operation.
+*  Use ZBUFFv05_createDCtx() and ZBUFFv05_freeDCtx() to create/release resources.
+*  Use ZBUFFv05_decompressInit() to start a new decompression operation.
+*  ZBUFFv05_DCtx objects can be reused multiple times.
+*
+*  Use ZBUFFv05_decompressContinue() repetitively to consume your input.
+*  *srcSizePtr and *maxDstSizePtr can be any size.
+*  The function will report how many bytes were read or written by modifying *srcSizePtr and *maxDstSizePtr.
+*  Note that it may not consume the entire input, in which case it's up to the caller to call again the function with remaining input.
+*  The content of dst will be overwritten (up to *maxDstSizePtr) at each function call, so save its content if it matters or change dst .
+*  @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to improve latency)
+*            or 0 when a frame is completely decoded
+*            or an error code, which can be tested using ZBUFFv05_isError().
+*
+*  Hint : recommended buffer sizes (not compulsory)
+*  output : 128 KB block size is the internal unit, it ensures it's always possible to write a full block when it's decoded.
+*  input : just follow indications from ZBUFFv05_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 .
+* **************************************************/
+
+typedef enum { ZBUFFv05ds_init, ZBUFFv05ds_readHeader, ZBUFFv05ds_loadHeader, ZBUFFv05ds_decodeHeader,
+               ZBUFFv05ds_read, ZBUFFv05ds_load, ZBUFFv05ds_flush } ZBUFFv05_dStage;
+
+/* *** Resource management *** */
+
+#define ZSTDv05_frameHeaderSize_max 5   /* too magical, should come from reference */
+struct ZBUFFv05_DCtx_s {
+    ZSTDv05_DCtx* zc;
+    ZSTDv05_parameters params;
+    char* inBuff;
+    size_t inBuffSize;
+    size_t inPos;
+    char* outBuff;
+    size_t outBuffSize;
+    size_t outStart;
+    size_t outEnd;
+    size_t hPos;
+    ZBUFFv05_dStage stage;
+    unsigned char headerBuffer[ZSTDv05_frameHeaderSize_max];
+};   /* typedef'd to ZBUFFv05_DCtx within "zstd_buffered.h" */
+
+
+ZBUFFv05_DCtx* ZBUFFv05_createDCtx(void)
+{
+    ZBUFFv05_DCtx* zbc = (ZBUFFv05_DCtx*)malloc(sizeof(ZBUFFv05_DCtx));
+    if (zbc==NULL) return NULL;
+    memset(zbc, 0, sizeof(*zbc));
+    zbc->zc = ZSTDv05_createDCtx();
+    zbc->stage = ZBUFFv05ds_init;
+    return zbc;
+}
+
+size_t ZBUFFv05_freeDCtx(ZBUFFv05_DCtx* zbc)
+{
+    if (zbc==NULL) return 0;   /* support free on null */
+    ZSTDv05_freeDCtx(zbc->zc);
+    free(zbc->inBuff);
+    free(zbc->outBuff);
+    free(zbc);
+    return 0;
+}
+
+
+/* *** Initialization *** */
+
+size_t ZBUFFv05_decompressInitDictionary(ZBUFFv05_DCtx* zbc, const void* dict, size_t dictSize)
+{
+    zbc->stage = ZBUFFv05ds_readHeader;
+    zbc->hPos = zbc->inPos = zbc->outStart = zbc->outEnd = 0;
+    return ZSTDv05_decompressBegin_usingDict(zbc->zc, dict, dictSize);
+}
+
+size_t ZBUFFv05_decompressInit(ZBUFFv05_DCtx* zbc)
+{
+    return ZBUFFv05_decompressInitDictionary(zbc, NULL, 0);
+}
+
+
+/* *** Decompression *** */
+
+size_t ZBUFFv05_decompressContinue(ZBUFFv05_DCtx* zbc, void* dst, size_t* maxDstSizePtr, const void* src, size_t* srcSizePtr)
+{
+    const char* const istart = (const char*)src;
+    const char* ip = istart;
+    const char* const iend = istart + *srcSizePtr;
+    char* const ostart = (char*)dst;
+    char* op = ostart;
+    char* const oend = ostart + *maxDstSizePtr;
+    U32 notDone = 1;
+
+    while (notDone) {
+        switch(zbc->stage)
+        {
+        case ZBUFFv05ds_init :
+            return ERROR(init_missing);
+
+        case ZBUFFv05ds_readHeader :
+            /* read header from src */
+            {
+                size_t headerSize = ZSTDv05_getFrameParams(&(zbc->params), src, *srcSizePtr);
+                if (ZSTDv05_isError(headerSize)) return headerSize;
+                if (headerSize) {
+                    /* not enough input to decode header : tell how many bytes would be necessary */
+                    memcpy(zbc->headerBuffer+zbc->hPos, src, *srcSizePtr);
+                    zbc->hPos += *srcSizePtr;
+                    *maxDstSizePtr = 0;
+                    zbc->stage = ZBUFFv05ds_loadHeader;
+                    return headerSize - zbc->hPos;
+                }
+                zbc->stage = ZBUFFv05ds_decodeHeader;
+                break;
+            }
+
+        case ZBUFFv05ds_loadHeader:
+            /* complete header from src */
+            {
+                size_t headerSize = ZBUFFv05_limitCopy(
+                    zbc->headerBuffer + zbc->hPos, ZSTDv05_frameHeaderSize_max - zbc->hPos,
+                    src, *srcSizePtr);
+                zbc->hPos += headerSize;
+                ip += headerSize;
+                headerSize = ZSTDv05_getFrameParams(&(zbc->params), zbc->headerBuffer, zbc->hPos);
+                if (ZSTDv05_isError(headerSize)) return headerSize;
+                if (headerSize) {
+                    /* not enough input to decode header : tell how many bytes would be necessary */
+                    *maxDstSizePtr = 0;
+                    return headerSize - zbc->hPos;
+                }
+                // zbc->stage = ZBUFFv05ds_decodeHeader; break;   /* useless : stage follows */
+            }
+
+        case ZBUFFv05ds_decodeHeader:
+                /* apply header to create / resize buffers */
+                {
+                    size_t neededOutSize = (size_t)1 << zbc->params.windowLog;
+                    size_t neededInSize = BLOCKSIZE;   /* a block is never > BLOCKSIZE */
+                    if (zbc->inBuffSize < neededInSize) {
+                        free(zbc->inBuff);
+                        zbc->inBuffSize = neededInSize;
+                        zbc->inBuff = (char*)malloc(neededInSize);
+                        if (zbc->inBuff == NULL) return ERROR(memory_allocation);
+                    }
+                    if (zbc->outBuffSize < neededOutSize) {
+                        free(zbc->outBuff);
+                        zbc->outBuffSize = neededOutSize;
+                        zbc->outBuff = (char*)malloc(neededOutSize);
+                        if (zbc->outBuff == NULL) return ERROR(memory_allocation);
+                }   }
+                if (zbc->hPos) {
+                    /* some data already loaded into headerBuffer : transfer into inBuff */
+                    memcpy(zbc->inBuff, zbc->headerBuffer, zbc->hPos);
+                    zbc->inPos = zbc->hPos;
+                    zbc->hPos = 0;
+                    zbc->stage = ZBUFFv05ds_load;
+                    break;
+                }
+                zbc->stage = ZBUFFv05ds_read;
+
+        case ZBUFFv05ds_read:
+            {
+                size_t neededInSize = ZSTDv05_nextSrcSizeToDecompress(zbc->zc);
+                if (neededInSize==0) {  /* end of frame */
+                    zbc->stage = ZBUFFv05ds_init;
+                    notDone = 0;
+                    break;
+                }
+                if ((size_t)(iend-ip) >= neededInSize) {
+                    /* directly decode from src */
+                    size_t decodedSize = ZSTDv05_decompressContinue(zbc->zc,
+                        zbc->outBuff + zbc->outStart, zbc->outBuffSize - zbc->outStart,
+                        ip, neededInSize);
+                    if (ZSTDv05_isError(decodedSize)) return decodedSize;
+                    ip += neededInSize;
+                    if (!decodedSize) break;   /* this was just a header */
+                    zbc->outEnd = zbc->outStart +  decodedSize;
+                    zbc->stage = ZBUFFv05ds_flush;
+                    break;
+                }
+                if (ip==iend) { notDone = 0; break; }   /* no more input */
+                zbc->stage = ZBUFFv05ds_load;
+            }
+
+        case ZBUFFv05ds_load:
+            {
+                size_t neededInSize = ZSTDv05_nextSrcSizeToDecompress(zbc->zc);
+                size_t toLoad = neededInSize - zbc->inPos;   /* should always be <= remaining space within inBuff */
+                size_t loadedSize;
+                if (toLoad > zbc->inBuffSize - zbc->inPos) return ERROR(corruption_detected);   /* should never happen */
+                loadedSize = ZBUFFv05_limitCopy(zbc->inBuff + zbc->inPos, toLoad, ip, iend-ip);
+                ip += loadedSize;
+                zbc->inPos += loadedSize;
+                if (loadedSize < toLoad) { notDone = 0; break; }   /* not enough input, wait for more */
+                {
+                    size_t decodedSize = ZSTDv05_decompressContinue(zbc->zc,
+                        zbc->outBuff + zbc->outStart, zbc->outBuffSize - zbc->outStart,
+                        zbc->inBuff, neededInSize);
+                    if (ZSTDv05_isError(decodedSize)) return decodedSize;
+                    zbc->inPos = 0;   /* input is consumed */
+                    if (!decodedSize) { zbc->stage = ZBUFFv05ds_read; break; }   /* this was just a header */
+                    zbc->outEnd = zbc->outStart +  decodedSize;
+                    zbc->stage = ZBUFFv05ds_flush;
+                    // break; /* ZBUFFv05ds_flush follows */
+            }   }
+        case ZBUFFv05ds_flush:
+            {
+                size_t toFlushSize = zbc->outEnd - zbc->outStart;
+                size_t flushedSize = ZBUFFv05_limitCopy(op, oend-op, zbc->outBuff + zbc->outStart, toFlushSize);
+                op += flushedSize;
+                zbc->outStart += flushedSize;
+                if (flushedSize == toFlushSize) {
+                    zbc->stage = ZBUFFv05ds_read;
+                    if (zbc->outStart + BLOCKSIZE > zbc->outBuffSize)
+                        zbc->outStart = zbc->outEnd = 0;
+                    break;
+                }
+                /* cannot flush everything */
+                notDone = 0;
+                break;
+            }
+        default: return ERROR(GENERIC);   /* impossible */
+    }   }
+
+    *srcSizePtr = ip-istart;
+    *maxDstSizePtr = op-ostart;
+
+    {
+        size_t nextSrcSizeHint = ZSTDv05_nextSrcSizeToDecompress(zbc->zc);
+        if (nextSrcSizeHint > ZBUFFv05_blockHeaderSize) nextSrcSizeHint+= ZBUFFv05_blockHeaderSize;   /* get next block header too */
+        nextSrcSizeHint -= zbc->inPos;   /* already loaded*/
+        return nextSrcSizeHint;
+    }
+}
+
+
+
+/* *************************************
+*  Tool functions
+***************************************/
+unsigned ZBUFFv05_isError(size_t errorCode) { return ERR_isError(errorCode); }
+const char* ZBUFFv05_getErrorName(size_t errorCode) { return ERR_getErrorName(errorCode); }
+
+size_t ZBUFFv05_recommendedDInSize(void)  { return BLOCKSIZE + ZBUFFv05_blockHeaderSize /* block header size*/ ; }
+size_t ZBUFFv05_recommendedDOutSize(void) { return BLOCKSIZE; }
diff --git a/lib/legacy/zstd_v05.h b/lib/legacy/zstd_v05.h
new file mode 100644
index 00000000..d6de3236
--- /dev/null
+++ b/lib/legacy/zstd_v05.h
@@ -0,0 +1,156 @@
+/*
+    zstd_v05 - decoder for 0.5 format
+    Header File
+    Copyright (C) 2014-2016, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+*/
+#ifndef ZSTDv05_H
+#define ZSTDv05_H
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/*-*************************************
+*  Dependencies
+***************************************/
+#include <stddef.h>   /* size_t */
+
+
+
+/* *************************************
+*  Simple functions
+***************************************/
+/*! ZSTDv05_decompress() :
+    `compressedSize` : is the _exact_ size of the compressed blob, otherwise decompression will fail.
+    `dstCapacity` must be large enough, equal or larger than originalSize.
+    @return : the number of bytes decompressed into `dst` (<= `dstCapacity`),
+              or an errorCode if it fails (which can be tested using ZSTDv05_isError()) */
+size_t ZSTDv05_decompress( void* dst, size_t dstCapacity,
+                     const void* src, size_t compressedSize);
+
+
+/* *************************************
+*  Helper functions
+***************************************/
+/* Error Management */
+unsigned    ZSTDv05_isError(size_t code);          /*!< tells if a `size_t` function result is an error code */
+const char* ZSTDv05_getErrorName(size_t code);     /*!< provides readable string for an error code */
+
+
+/* *************************************
+*  Explicit memory management
+***************************************/
+/** Decompression context */
+typedef struct ZSTDv05_DCtx_s ZSTDv05_DCtx;
+ZSTDv05_DCtx* ZSTDv05_createDCtx(void);
+size_t     ZSTDv05_freeDCtx(ZSTDv05_DCtx* dctx);      /*!< @return : errorCode */
+
+/** ZSTDv05_decompressDCtx() :
+*   Same as ZSTDv05_decompress(), but requires an already allocated ZSTDv05_DCtx (see ZSTDv05_createDCtx()) */
+size_t ZSTDv05_decompressDCtx(ZSTDv05_DCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+
+
+/*-***********************
+*  Dictionary API
+*************************/
+/*! ZSTDv05_decompress_usingDict() :
+*   Decompression using a pre-defined Dictionary content (see dictBuilder).
+*   Dictionary must be identical to the one used during compression, otherwise regenerated data will be corrupted.
+*   Note : dict can be NULL, in which case, it's equivalent to ZSTDv05_decompressDCtx() */
+size_t ZSTDv05_decompress_usingDict(ZSTDv05_DCtx* dctx,
+                                             void* dst, size_t dstCapacity,
+                                       const void* src, size_t srcSize,
+                                       const void* dict,size_t dictSize);
+
+
+
+
+
+
+typedef struct ZBUFFv05_DCtx_s ZBUFFv05_DCtx;
+ZBUFFv05_DCtx* ZBUFFv05_createDCtx(void);
+size_t      ZBUFFv05_freeDCtx(ZBUFFv05_DCtx* dctx);
+
+size_t ZBUFFv05_decompressInit(ZBUFFv05_DCtx* dctx);
+size_t ZBUFFv05_decompressInitDictionary(ZBUFFv05_DCtx* dctx, const void* dict, size_t dictSize);
+
+size_t ZBUFFv05_decompressContinue(ZBUFFv05_DCtx* dctx,
+                                            void* dst, size_t* dstCapacityPtr,
+                                      const void* src, size_t* srcSizePtr);
+
+/*-***************************************************************************
+*  Streaming decompression
+*
+*  A ZBUFFv05_DCtx object is required to track streaming operations.
+*  Use ZBUFFv05_createDCtx() and ZBUFFv05_freeDCtx() to create/release resources.
+*  Use ZBUFFv05_decompressInit() to start a new decompression operation,
+*   or ZBUFFv05_decompressInitDictionary() if decompression requires a dictionary.
+*  Note that ZBUFFv05_DCtx objects can be reused multiple times.
+*
+*  Use ZBUFFv05_decompressContinue() repetitively to consume your input.
+*  *srcSizePtr and *dstCapacityPtr can be any size.
+*  The function will report how many bytes were read or written by modifying *srcSizePtr and *dstCapacityPtr.
+*  Note that it may not consume the entire input, in which case it's up to the caller to present remaining input again.
+*  The content of @dst will be overwritten (up to *dstCapacityPtr) at each function call, so save its content if it matters or change @dst.
+*  @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to help latency)
+*            or 0 when a frame is completely decoded
+*            or an error code, which can be tested using ZBUFFv05_isError().
+*
+*  Hint : recommended buffer sizes (not compulsory) : ZBUFFv05_recommendedDInSize() / ZBUFFv05_recommendedDOutSize()
+*  output : ZBUFFv05_recommendedDOutSize==128 KB block size is the internal unit, it ensures it's always possible to write a full block when decoded.
+*  input  : ZBUFFv05_recommendedDInSize==128Kb+3; just follow indications from ZBUFFv05_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 .
+* *******************************************************************************/
+
+
+/* *************************************
+*  Tool functions
+***************************************/
+unsigned ZBUFFv05_isError(size_t errorCode);
+const char* ZBUFFv05_getErrorName(size_t errorCode);
+
+/** Functions below provide recommended buffer sizes for Compression or Decompression operations.
+*   These sizes are just hints, and tend to offer better latency */
+size_t ZBUFFv05_recommendedDInSize(void);
+size_t ZBUFFv05_recommendedDOutSize(void);
+
+
+
+/*-*************************************
+*  Constants
+***************************************/
+#define ZSTDv05_MAGICNUMBER 0xFD2FB525   /* v0.5 */
+
+
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif  /* ZSTDv0505_H */
diff --git a/lib/mem.h b/lib/mem.h
index 0e357e53..ceafd57b 100644
--- a/lib/mem.h
+++ b/lib/mem.h
@@ -187,7 +187,6 @@ MEM_STATIC void MEM_write64(void* memPtr, U64 value)
 
 #endif /* MEM_FORCE_MEMORY_ACCESS */
 
-
 MEM_STATIC U16 MEM_readLE16(const void* memPtr)
 {
     if (MEM_isLittleEndian())
@@ -276,6 +275,20 @@ MEM_STATIC void MEM_writeLEST(void* memPtr, size_t val)
         MEM_writeLE64(memPtr, (U64)val);
 }
 
+ /* function safe only for comparisons */
+MEM_STATIC U32 MEM_readMINMATCH(const void* memPtr, U32 length)
+{
+    switch (length)
+    {
+    default :
+    case 4 : return MEM_read32(memPtr);
+    case 3 : if (MEM_isLittleEndian())
+                return MEM_read32(memPtr)<<8;
+             else
+                return MEM_read32(memPtr)>>8;
+    }
+}
+
 #if defined (__cplusplus)
 }
 #endif
diff --git a/lib/zbuff.c b/lib/zbuff.c
index 4c1eb2cf..eab0c482 100644
--- a/lib/zbuff.c
+++ b/lib/zbuff.c
@@ -26,31 +26,27 @@
     OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
     You can contact the author at :
-    - zstd source repository : https://github.com/Cyan4973/zstd
-    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+    - zstd homepage : http://www.zstd.net/
 */
 
-/* The objects defined into this file should be considered experimental.
- * They are not labelled stable, as their prototype may change in the future.
- * You can use them for tests, provide feedback, or if you can endure risk of future changes.
- */
 
 /* *************************************
 *  Dependencies
 ***************************************/
 #include <stdlib.h>
 #include "error_private.h"
-#include "zstd_static.h"
+#include "zstd_internal.h"  /* MIN, ZSTD_blockHeaderSize */
+#include "zstd_static.h"    /* ZSTD_BLOCKSIZE_MAX */
 #include "zbuff_static.h"
 
 
 /* *************************************
 *  Constants
 ***************************************/
-static size_t ZBUFF_blockHeaderSize = 3;
-static size_t ZBUFF_endFrameSize = 3;
+static size_t const ZBUFF_endFrameSize = ZSTD_BLOCKHEADERSIZE;
 
-/** ************************************************
+
+/*_**************************************************
 *  Streaming compression
 *
 *  A ZBUFF_CCtx object is required to track streaming operation.
@@ -59,28 +55,28 @@ static size_t ZBUFF_endFrameSize = 3;
 *  ZBUFF_CCtx objects can be reused multiple times.
 *
 *  Use ZBUFF_compressContinue() repetitively to consume your input.
-*  *srcSizePtr and *maxDstSizePtr can be any size.
-*  The function will report how many bytes were read or written by modifying *srcSizePtr and *maxDstSizePtr.
+*  *srcSizePtr and *dstCapacityPtr can be any size.
+*  The function will report how many bytes were read or written by modifying *srcSizePtr and *dstCapacityPtr.
 *  Note that it may not consume the entire input, in which case it's up to the caller to call again the function with remaining input.
-*  The content of dst will be overwritten (up to *maxDstSizePtr) at each function call, so save its content if it matters or change dst .
+*  The content of dst will be overwritten (up to *dstCapacityPtr) at each function call, so save its content if it matters or change dst .
 *  @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to improve latency)
 *            or an error code, which can be tested using ZBUFF_isError().
 *
 *  ZBUFF_compressFlush() can be used to instruct ZBUFF to compress and output whatever remains within its buffer.
-*  Note that it will not output more than *maxDstSizePtr.
+*  Note that it will not output more than *dstCapacityPtr.
 *  Therefore, some content might still be left into its internal buffer if dst buffer is too small.
 *  @return : nb of bytes still present into internal buffer (0 if it's empty)
 *            or an error code, which can be tested using ZBUFF_isError().
 *
 *  ZBUFF_compressEnd() instructs to finish a frame.
 *  It will perform a flush and write frame epilogue.
-*  Similar to ZBUFF_compressFlush(), it may not be able to output the entire internal buffer content if *maxDstSizePtr is too small.
+*  Similar to ZBUFF_compressFlush(), it may not be able to output the entire internal buffer content if *dstCapacityPtr is too small.
 *  @return : nb of bytes still present into internal buffer (0 if it's empty)
 *            or an error code, which can be tested using ZBUFF_isError().
 *
 *  Hint : recommended buffer sizes (not compulsory)
-*  input : 128 KB block size is the internal unit, it improves latency to use this value.
-*  output : ZSTD_compressBound(128 KB) + 3 + 3 : ensures it's always possible to write/flush/end a full block at best speed.
+*  input : ZSTD_BLOCKSIZE_MAX (128 KB), internal unit size, it improves latency to use this value.
+*  output : ZSTD_compressBound(ZSTD_BLOCKSIZE_MAX) + ZSTD_blockHeaderSize + ZBUFF_endFrameSize : ensures it's always possible to write/flush/end a full block at best speed.
 * **************************************************/
 
 typedef enum { ZBUFFcs_init, ZBUFFcs_load, ZBUFFcs_flush } ZBUFF_cStage;
@@ -88,13 +84,13 @@ typedef enum { ZBUFFcs_init, ZBUFFcs_load, ZBUFFcs_flush } ZBUFF_cStage;
 /* *** Ressources *** */
 struct ZBUFF_CCtx_s {
     ZSTD_CCtx* zc;
-    char* inBuff;
+    char*  inBuff;
     size_t inBuffSize;
     size_t inToCompress;
     size_t inBuffPos;
     size_t inBuffTarget;
     size_t blockSize;
-    char* outBuff;
+    char*  outBuff;
     size_t outBuffSize;
     size_t outBuffContentSize;
     size_t outBuffFlushedSize;
@@ -123,23 +119,20 @@ size_t ZBUFF_freeCCtx(ZBUFF_CCtx* zbc)
 
 /* *** Initialization *** */
 
-#define MIN(a,b)    ( ((a)<(b)) ? (a) : (b) )
-#define BLOCKSIZE   (128 * 1024)   /* a bit too "magic", should come from reference */
-size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* zbc, const void* dict, size_t dictSize, ZSTD_parameters params)
+size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* zbc,
+                                   const void* dict, size_t dictSize,
+                                   ZSTD_parameters params, U64 pledgedSrcSize)
 {
-    size_t neededInBuffSize;
-
-    ZSTD_validateParams(&params);
-    neededInBuffSize = (size_t)1 << params.windowLog;
-
     /* allocate buffers */
-    if (zbc->inBuffSize < neededInBuffSize) {
-        zbc->inBuffSize = neededInBuffSize;
-        free(zbc->inBuff);   /* should not be necessary */
-        zbc->inBuff = (char*)malloc(neededInBuffSize);
-        if (zbc->inBuff == NULL) return ERROR(memory_allocation);
+    {   size_t const neededInBuffSize = (size_t)1 << params.cParams.windowLog;
+        if (zbc->inBuffSize < neededInBuffSize) {
+            zbc->inBuffSize = neededInBuffSize;
+            free(zbc->inBuff);   /* should not be necessary */
+            zbc->inBuff = (char*)malloc(neededInBuffSize);
+            if (zbc->inBuff == NULL) return ERROR(memory_allocation);
+        }
+        zbc->blockSize = MIN(ZSTD_BLOCKSIZE_MAX, neededInBuffSize/2);
     }
-    zbc->blockSize = MIN(BLOCKSIZE, zbc->inBuffSize);
     if (zbc->outBuffSize < ZSTD_compressBound(zbc->blockSize)+1) {
         zbc->outBuffSize = ZSTD_compressBound(zbc->blockSize)+1;
         free(zbc->outBuff);   /* should not be necessary */
@@ -147,50 +140,54 @@ size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* zbc, const void* dict, size_t dic
         if (zbc->outBuff == NULL) return ERROR(memory_allocation);
     }
 
-    zbc->outBuffContentSize = ZSTD_compressBegin_advanced(zbc->zc, dict, dictSize, params);
-    if (ZSTD_isError(zbc->outBuffContentSize)) return zbc->outBuffContentSize;
+    { size_t const errorCode = ZSTD_compressBegin_advanced(zbc->zc, dict, dictSize, params, pledgedSrcSize);
+      if (ZSTD_isError(errorCode)) return errorCode; }
 
     zbc->inToCompress = 0;
     zbc->inBuffPos = 0;
     zbc->inBuffTarget = zbc->blockSize;
     zbc->outBuffFlushedSize = 0;
-    zbc->stage = ZBUFFcs_flush;   /* starts by flushing the header */
+    zbc->stage = ZBUFFcs_load;
     return 0;   /* ready to go */
 }
 
+
+size_t ZBUFF_compressInitDictionary(ZBUFF_CCtx* zbc, const void* dict, size_t dictSize, int compressionLevel)
+{
+    ZSTD_parameters params;
+    params.cParams = ZSTD_getCParams(compressionLevel, 0, dictSize);
+    params.fParams.contentSizeFlag = 0;
+    ZSTD_adjustCParams(&params.cParams, 0, dictSize);
+    return ZBUFF_compressInit_advanced(zbc, dict, dictSize, params, 0);
+}
+
 size_t ZBUFF_compressInit(ZBUFF_CCtx* zbc, int compressionLevel)
 {
-    return ZBUFF_compressInit_advanced(zbc, NULL, 0, ZSTD_getParams(compressionLevel, 0));
-}
-
-
-ZSTDLIB_API size_t ZBUFF_compressInitDictionary(ZBUFF_CCtx* zbc, const void* dict, size_t dictSize, int compressionLevel)
-{
-    return ZBUFF_compressInit_advanced(zbc, dict, dictSize, ZSTD_getParams(compressionLevel, 0));
+    return ZBUFF_compressInitDictionary(zbc, NULL, 0, compressionLevel);
 }
 
 
 /* *** Compression *** */
 
-static size_t ZBUFF_limitCopy(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+static size_t ZBUFF_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize)
 {
-    size_t length = MIN(maxDstSize, srcSize);
+    size_t length = MIN(dstCapacity, srcSize);
     memcpy(dst, src, length);
     return length;
 }
 
 static size_t ZBUFF_compressContinue_generic(ZBUFF_CCtx* zbc,
-                              void* dst, size_t* maxDstSizePtr,
+                              void* dst, size_t* dstCapacityPtr,
                         const void* src, size_t* srcSizePtr,
                               int flush)   /* aggregate : wait for full block before compressing */
 {
     U32 notDone = 1;
     const char* const istart = (const char*)src;
-    const char* ip = istart;
     const char* const iend = istart + *srcSizePtr;
+    const char* ip = istart;
     char* const ostart = (char*)dst;
+    char* const oend = ostart + *dstCapacityPtr;
     char* op = ostart;
-    char* const oend = ostart + *maxDstSizePtr;
 
     while (notDone) {
         switch(zbc->stage)
@@ -199,19 +196,17 @@ static size_t ZBUFF_compressContinue_generic(ZBUFF_CCtx* zbc,
 
         case ZBUFFcs_load:
             /* complete inBuffer */
-            {
-                size_t toLoad = zbc->inBuffTarget - zbc->inBuffPos;
-                size_t loaded = ZBUFF_limitCopy(zbc->inBuff + zbc->inBuffPos, toLoad, ip, iend-ip);
+            {   size_t const toLoad = zbc->inBuffTarget - zbc->inBuffPos;
+                size_t const loaded = ZBUFF_limitCopy(zbc->inBuff + zbc->inBuffPos, toLoad, ip, iend-ip);
                 zbc->inBuffPos += loaded;
                 ip += loaded;
                 if ( (zbc->inBuffPos==zbc->inToCompress) || (!flush && (toLoad != loaded)) ) {
                     notDone = 0; break;  /* not enough input to get a full block : stop there, wait for more */
             }   }
             /* compress current block (note : this stage cannot be stopped in the middle) */
-            {
-                void* cDst;
+            {   void* cDst;
                 size_t cSize;
-                size_t iSize = zbc->inBuffPos - zbc->inToCompress;
+                size_t const iSize = zbc->inBuffPos - zbc->inToCompress;
                 size_t oSize = oend-op;
                 if (oSize >= ZSTD_compressBound(iSize))
                     cDst = op;   /* compress directly into output buffer (avoid flush stage) */
@@ -222,20 +217,18 @@ static size_t ZBUFF_compressContinue_generic(ZBUFF_CCtx* zbc,
                 /* prepare next block */
                 zbc->inBuffTarget = zbc->inBuffPos + zbc->blockSize;
                 if (zbc->inBuffTarget > zbc->inBuffSize)
-                    { zbc->inBuffPos = 0; zbc->inBuffTarget = zbc->blockSize; }   /* note : inBuffSize >= blockSize */
+                    zbc->inBuffPos = 0, zbc->inBuffTarget = zbc->blockSize;   /* note : inBuffSize >= blockSize */
                 zbc->inToCompress = zbc->inBuffPos;
                 if (cDst == op) { op += cSize; break; }   /* no need to flush */
                 zbc->outBuffContentSize = cSize;
                 zbc->outBuffFlushedSize = 0;
-                zbc->stage = ZBUFFcs_flush;
-                // break;   /* flush stage follows */
+                zbc->stage = ZBUFFcs_flush;   /* continue to flush stage */
             }
 
         case ZBUFFcs_flush:
             /* flush into dst */
-            {
-                size_t toFlush = zbc->outBuffContentSize - zbc->outBuffFlushedSize;
-                size_t flushed = ZBUFF_limitCopy(op, oend-op, zbc->outBuff + zbc->outBuffFlushedSize, toFlush);
+            {   size_t const toFlush = zbc->outBuffContentSize - zbc->outBuffFlushedSize;
+                size_t const flushed = ZBUFF_limitCopy(op, oend-op, zbc->outBuff + zbc->outBuffFlushedSize, toFlush);
                 op += flushed;
                 zbc->outBuffFlushedSize += flushed;
                 if (toFlush!=flushed) { notDone = 0; break; } /* not enough space within dst to store compressed block : stop there */
@@ -250,41 +243,40 @@ static size_t ZBUFF_compressContinue_generic(ZBUFF_CCtx* zbc,
     }
 
     *srcSizePtr = ip - istart;
-    *maxDstSizePtr = op - ostart;
-    {
-        size_t hintInSize = zbc->inBuffTarget - zbc->inBuffPos;
+    *dstCapacityPtr = op - ostart;
+    {   size_t hintInSize = zbc->inBuffTarget - zbc->inBuffPos;
         if (hintInSize==0) hintInSize = zbc->blockSize;
         return hintInSize;
     }
 }
 
 size_t ZBUFF_compressContinue(ZBUFF_CCtx* zbc,
-                              void* dst, size_t* maxDstSizePtr,
+                              void* dst, size_t* dstCapacityPtr,
                         const void* src, size_t* srcSizePtr)
 {
-    return ZBUFF_compressContinue_generic(zbc, dst, maxDstSizePtr, src, srcSizePtr, 0);
+    return ZBUFF_compressContinue_generic(zbc, dst, dstCapacityPtr, src, srcSizePtr, 0);
 }
 
 
 
 /* *** Finalize *** */
 
-size_t ZBUFF_compressFlush(ZBUFF_CCtx* zbc, void* dst, size_t* maxDstSizePtr)
+size_t ZBUFF_compressFlush(ZBUFF_CCtx* zbc, void* dst, size_t* dstCapacityPtr)
 {
     size_t srcSize = 0;
-    ZBUFF_compressContinue_generic(zbc, dst, maxDstSizePtr, &srcSize, &srcSize, 1);  /* use a valid src address instead of NULL, as some sanitizer don't like it */
+    ZBUFF_compressContinue_generic(zbc, dst, dstCapacityPtr, &srcSize, &srcSize, 1);  /* use a valid src address instead of NULL */
     return zbc->outBuffContentSize - zbc->outBuffFlushedSize;
 }
 
 
-size_t ZBUFF_compressEnd(ZBUFF_CCtx* zbc, void* dst, size_t* maxDstSizePtr)
+size_t ZBUFF_compressEnd(ZBUFF_CCtx* zbc, void* dst, size_t* dstCapacityPtr)
 {
     BYTE* const ostart = (BYTE*)dst;
+    BYTE* const oend = ostart + *dstCapacityPtr;
     BYTE* op = ostart;
-    BYTE* const oend = ostart + *maxDstSizePtr;
-    size_t outSize = *maxDstSizePtr;
+    size_t outSize = *dstCapacityPtr;
     size_t epilogueSize, remaining;
-    ZBUFF_compressFlush(zbc, dst, &outSize);    /* flush any remaining inBuff */
+    ZBUFF_compressFlush(zbc, dst, &outSize);     /* flush any remaining inBuff */
     op += outSize;
     epilogueSize = ZSTD_compressEnd(zbc->zc, zbc->outBuff + zbc->outBuffContentSize, zbc->outBuffSize - zbc->outBuffContentSize);   /* epilogue into outBuff */
     zbc->outBuffContentSize += epilogueSize;
@@ -292,228 +284,193 @@ size_t ZBUFF_compressEnd(ZBUFF_CCtx* zbc, void* dst, size_t* maxDstSizePtr)
     zbc->stage = ZBUFFcs_flush;
     remaining = ZBUFF_compressFlush(zbc, op, &outSize);   /* attempt to flush epilogue into dst */
     op += outSize;
-    if (!remaining) zbc->stage = ZBUFFcs_init;  /* close only if nothing left to flush */
-    *maxDstSizePtr = op-ostart;                 /* tells how many bytes were written */
+    if (!remaining) zbc->stage = ZBUFFcs_init;   /* close only if nothing left to flush */
+    *dstCapacityPtr = op-ostart;                 /* tells how many bytes were written */
     return remaining;
 }
 
 
-
-/** ************************************************
-*  Streaming decompression
+/*-***************************************************************************
+*  Streaming decompression howto
 *
-*  A ZBUFF_DCtx object is required to track streaming operation.
+*  A ZBUFF_DCtx object is required to track streaming operations.
 *  Use ZBUFF_createDCtx() and ZBUFF_freeDCtx() to create/release resources.
-*  Use ZBUFF_decompressInit() to start a new decompression operation.
-*  ZBUFF_DCtx objects can be reused multiple times.
+*  Use ZBUFF_decompressInit() to start a new decompression operation,
+*   or ZBUFF_decompressInitDictionary() if decompression requires a dictionary.
+*  Note that ZBUFF_DCtx objects can be re-init multiple times.
 *
 *  Use ZBUFF_decompressContinue() repetitively to consume your input.
-*  *srcSizePtr and *maxDstSizePtr can be any size.
-*  The function will report how many bytes were read or written by modifying *srcSizePtr and *maxDstSizePtr.
-*  Note that it may not consume the entire input, in which case it's up to the caller to call again the function with remaining input.
-*  The content of dst will be overwritten (up to *maxDstSizePtr) at each function call, so save its content if it matters or change dst .
-*  @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to improve latency)
-*            or 0 when a frame is completely decoded
+*  *srcSizePtr and *dstCapacityPtr can be any size.
+*  The function will report how many bytes were read or written by modifying *srcSizePtr and *dstCapacityPtr.
+*  Note that it may not consume the entire input, in which case it's up to the caller to present remaining input again.
+*  The content of @dst will be overwritten (up to *dstCapacityPtr) at each function call, so save its content if it matters, or change @dst.
+*  @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to help latency),
+*            or 0 when a frame is completely decoded,
 *            or an error code, which can be tested using ZBUFF_isError().
 *
-*  Hint : recommended buffer sizes (not compulsory)
-*  output : 128 KB block size is the internal unit, it ensures it's always possible to write a full block when it's decoded.
-*  input : just follow indications from ZBUFF_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 .
-* **************************************************/
+*  Hint : recommended buffer sizes (not compulsory) : ZBUFF_recommendedDInSize() and ZBUFF_recommendedDOutSize()
+*  output : ZBUFF_recommendedDOutSize==128 KB block size is the internal unit, it ensures it's always possible to write a full block when decoded.
+*  input  : ZBUFF_recommendedDInSize == 128KB + 3;
+*           just follow indications from ZBUFF_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 .
+* *******************************************************************************/
 
-typedef enum { ZBUFFds_init, ZBUFFds_readHeader, ZBUFFds_loadHeader, ZBUFFds_decodeHeader,
+typedef enum { ZBUFFds_init, ZBUFFds_readHeader,
                ZBUFFds_read, ZBUFFds_load, ZBUFFds_flush } ZBUFF_dStage;
 
 /* *** Resource management *** */
-
-#define ZSTD_frameHeaderSize_max 5   /* too magical, should come from reference */
 struct ZBUFF_DCtx_s {
-    ZSTD_DCtx* zc;
-    ZSTD_parameters params;
-    char* inBuff;
+    ZSTD_DCtx* zd;
+    ZSTD_frameParams fParams;
+    size_t blockSize;
+    char*  inBuff;
     size_t inBuffSize;
     size_t inPos;
-    char* outBuff;
+    char*  outBuff;
     size_t outBuffSize;
     size_t outStart;
     size_t outEnd;
-    size_t hPos;
     ZBUFF_dStage stage;
-    unsigned char headerBuffer[ZSTD_frameHeaderSize_max];
 };   /* typedef'd to ZBUFF_DCtx within "zstd_buffered.h" */
 
 
 ZBUFF_DCtx* ZBUFF_createDCtx(void)
 {
-    ZBUFF_DCtx* zbc = (ZBUFF_DCtx*)malloc(sizeof(ZBUFF_DCtx));
-    if (zbc==NULL) return NULL;
-    memset(zbc, 0, sizeof(*zbc));
-    zbc->zc = ZSTD_createDCtx();
-    zbc->stage = ZBUFFds_init;
-    return zbc;
+    ZBUFF_DCtx* zbd = (ZBUFF_DCtx*)malloc(sizeof(ZBUFF_DCtx));
+    if (zbd==NULL) return NULL;
+    memset(zbd, 0, sizeof(*zbd));
+    zbd->zd = ZSTD_createDCtx();
+    zbd->stage = ZBUFFds_init;
+    return zbd;
 }
 
-size_t ZBUFF_freeDCtx(ZBUFF_DCtx* zbc)
+size_t ZBUFF_freeDCtx(ZBUFF_DCtx* zbd)
 {
-    if (zbc==NULL) return 0;   /* support free on null */
-    ZSTD_freeDCtx(zbc->zc);
-    free(zbc->inBuff);
-    free(zbc->outBuff);
-    free(zbc);
+    if (zbd==NULL) return 0;   /* support free on null */
+    ZSTD_freeDCtx(zbd->zd);
+    free(zbd->inBuff);
+    free(zbd->outBuff);
+    free(zbd);
     return 0;
 }
 
 
 /* *** Initialization *** */
 
-size_t ZBUFF_decompressInitDictionary(ZBUFF_DCtx* zbc, const void* dict, size_t dictSize)
+size_t ZBUFF_decompressInitDictionary(ZBUFF_DCtx* zbd, const void* dict, size_t dictSize)
 {
-    zbc->stage = ZBUFFds_readHeader;
-    zbc->hPos = zbc->inPos = zbc->outStart = zbc->outEnd = 0;
-    return ZSTD_decompressBegin_usingDict(zbc->zc, dict, dictSize);
+    zbd->stage = ZBUFFds_readHeader;
+    zbd->inPos = zbd->outStart = zbd->outEnd = 0;
+    return ZSTD_decompressBegin_usingDict(zbd->zd, dict, dictSize);
 }
 
-size_t ZBUFF_decompressInit(ZBUFF_DCtx* zbc)
+size_t ZBUFF_decompressInit(ZBUFF_DCtx* zbd)
 {
-    return ZBUFF_decompressInitDictionary(zbc, NULL, 0);
+    return ZBUFF_decompressInitDictionary(zbd, NULL, 0);
 }
 
 
 /* *** Decompression *** */
 
-size_t ZBUFF_decompressContinue(ZBUFF_DCtx* zbc, void* dst, size_t* maxDstSizePtr, const void* src, size_t* srcSizePtr)
+size_t ZBUFF_decompressContinue(ZBUFF_DCtx* zbd,
+                                void* dst, size_t* dstCapacityPtr,
+                          const void* src, size_t* srcSizePtr)
 {
     const char* const istart = (const char*)src;
-    const char* ip = istart;
     const char* const iend = istart + *srcSizePtr;
+    const char* ip = istart;
     char* const ostart = (char*)dst;
+    char* const oend = ostart + *dstCapacityPtr;
     char* op = ostart;
-    char* const oend = ostart + *maxDstSizePtr;
     U32 notDone = 1;
 
     while (notDone) {
-        switch(zbc->stage)
+        switch(zbd->stage)
         {
         case ZBUFFds_init :
             return ERROR(init_missing);
 
         case ZBUFFds_readHeader :
             /* read header from src */
-            {
-                size_t headerSize = ZSTD_getFrameParams(&(zbc->params), src, *srcSizePtr);
+            {   size_t const headerSize = ZSTD_getFrameParams(&(zbd->fParams), src, *srcSizePtr);
                 if (ZSTD_isError(headerSize)) return headerSize;
                 if (headerSize) {
-                    /* not enough input to decode header : tell how many bytes would be necessary */
-                    memcpy(zbc->headerBuffer+zbc->hPos, src, *srcSizePtr);
-                    zbc->hPos += *srcSizePtr;
-                    *maxDstSizePtr = 0;
-                    zbc->stage = ZBUFFds_loadHeader;
-                    return headerSize - zbc->hPos;
-                }
-                zbc->stage = ZBUFFds_decodeHeader;
-                break;
-            }
+                    /* not enough input to decode header : needs headerSize > *srcSizePtr */
+                    *dstCapacityPtr = 0;
+                    *srcSizePtr = 0;
+                    return headerSize;
+            }   }
 
-        case ZBUFFds_loadHeader:
-            /* complete header from src */
-            {
-                size_t headerSize = ZBUFF_limitCopy(
-                    zbc->headerBuffer + zbc->hPos, ZSTD_frameHeaderSize_max - zbc->hPos,
-                    src, *srcSizePtr);
-                zbc->hPos += headerSize;
-                ip += headerSize;
-                headerSize = ZSTD_getFrameParams(&(zbc->params), zbc->headerBuffer, zbc->hPos);
-                if (ZSTD_isError(headerSize)) return headerSize;
-                if (headerSize) {
-                    /* not enough input to decode header : tell how many bytes would be necessary */
-                    *maxDstSizePtr = 0;
-                    return headerSize - zbc->hPos;
+            /* Frame header instruct buffer sizes */
+            {   size_t const blockSize = MIN(1 << zbd->fParams.windowLog, ZSTD_BLOCKSIZE_MAX);
+                zbd->blockSize = blockSize;
+                if (zbd->inBuffSize < blockSize) {
+                    free(zbd->inBuff);
+                    zbd->inBuffSize = blockSize;
+                    zbd->inBuff = (char*)malloc(blockSize);
+                    if (zbd->inBuff == NULL) return ERROR(memory_allocation);
                 }
-                // zbc->stage = ZBUFFds_decodeHeader; break;   /* useless : stage follows */
-            }
-
-        case ZBUFFds_decodeHeader:
-                /* apply header to create / resize buffers */
-                {
-                    size_t neededOutSize = (size_t)1 << zbc->params.windowLog;
-                    size_t neededInSize = BLOCKSIZE;   /* a block is never > BLOCKSIZE */
-                    if (zbc->inBuffSize < neededInSize) {
-                        free(zbc->inBuff);
-                        zbc->inBuffSize = neededInSize;
-                        zbc->inBuff = (char*)malloc(neededInSize);
-                        if (zbc->inBuff == NULL) return ERROR(memory_allocation);
-                    }
-                    if (zbc->outBuffSize < neededOutSize) {
-                        free(zbc->outBuff);
-                        zbc->outBuffSize = neededOutSize;
-                        zbc->outBuff = (char*)malloc(neededOutSize);
-                        if (zbc->outBuff == NULL) return ERROR(memory_allocation);
-                }   }
-                if (zbc->hPos) {
-                    /* some data already loaded into headerBuffer : transfer into inBuff */
-                    memcpy(zbc->inBuff, zbc->headerBuffer, zbc->hPos);
-                    zbc->inPos = zbc->hPos;
-                    zbc->hPos = 0;
-                    zbc->stage = ZBUFFds_load;
-                    break;
-                }
-                zbc->stage = ZBUFFds_read;
+                {   size_t const neededOutSize = ((size_t)1 << zbd->fParams.windowLog) + blockSize;
+                    if (zbd->outBuffSize < neededOutSize) {
+                        free(zbd->outBuff);
+                        zbd->outBuffSize = neededOutSize;
+                        zbd->outBuff = (char*)malloc(neededOutSize);
+                        if (zbd->outBuff == NULL) return ERROR(memory_allocation);
+            }   }   }
+            zbd->stage = ZBUFFds_read;
 
         case ZBUFFds_read:
-            {
-                size_t neededInSize = ZSTD_nextSrcSizeToDecompress(zbc->zc);
+            {   size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zbd->zd);
                 if (neededInSize==0) {  /* end of frame */
-                    zbc->stage = ZBUFFds_init;
+                    zbd->stage = ZBUFFds_init;
                     notDone = 0;
                     break;
                 }
                 if ((size_t)(iend-ip) >= neededInSize) {
                     /* directly decode from src */
-                    size_t decodedSize = ZSTD_decompressContinue(zbc->zc,
-                        zbc->outBuff + zbc->outStart, zbc->outBuffSize - zbc->outStart,
+                    size_t const decodedSize = ZSTD_decompressContinue(zbd->zd,
+                        zbd->outBuff + zbd->outStart, zbd->outBuffSize - zbd->outStart,
                         ip, neededInSize);
                     if (ZSTD_isError(decodedSize)) return decodedSize;
                     ip += neededInSize;
                     if (!decodedSize) break;   /* this was just a header */
-                    zbc->outEnd = zbc->outStart +  decodedSize;
-                    zbc->stage = ZBUFFds_flush;
+                    zbd->outEnd = zbd->outStart +  decodedSize;
+                    zbd->stage = ZBUFFds_flush;
                     break;
                 }
                 if (ip==iend) { notDone = 0; break; }   /* no more input */
-                zbc->stage = ZBUFFds_load;
+                zbd->stage = ZBUFFds_load;
             }
 
         case ZBUFFds_load:
-            {
-                size_t neededInSize = ZSTD_nextSrcSizeToDecompress(zbc->zc);
-                size_t toLoad = neededInSize - zbc->inPos;   /* should always be <= remaining space within inBuff */
+            {   size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zbd->zd);
+                size_t const toLoad = neededInSize - zbd->inPos;   /* should always be <= remaining space within inBuff */
                 size_t loadedSize;
-                if (toLoad > zbc->inBuffSize - zbc->inPos) return ERROR(corruption_detected);   /* should never happen */
-                loadedSize = ZBUFF_limitCopy(zbc->inBuff + zbc->inPos, toLoad, ip, iend-ip);
+                if (toLoad > zbd->inBuffSize - zbd->inPos) return ERROR(corruption_detected);   /* should never happen */
+                loadedSize = ZBUFF_limitCopy(zbd->inBuff + zbd->inPos, toLoad, ip, iend-ip);
                 ip += loadedSize;
-                zbc->inPos += loadedSize;
+                zbd->inPos += loadedSize;
                 if (loadedSize < toLoad) { notDone = 0; break; }   /* not enough input, wait for more */
-                {
-                    size_t decodedSize = ZSTD_decompressContinue(zbc->zc,
-                        zbc->outBuff + zbc->outStart, zbc->outBuffSize - zbc->outStart,
-                        zbc->inBuff, neededInSize);
+                /* decode loaded input */
+                {   size_t const decodedSize = ZSTD_decompressContinue(zbd->zd,
+                        zbd->outBuff + zbd->outStart, zbd->outBuffSize - zbd->outStart,
+                        zbd->inBuff, neededInSize);
                     if (ZSTD_isError(decodedSize)) return decodedSize;
-                    zbc->inPos = 0;   /* input is consumed */
-                    if (!decodedSize) { zbc->stage = ZBUFFds_read; break; }   /* this was just a header */
-                    zbc->outEnd = zbc->outStart +  decodedSize;
-                    zbc->stage = ZBUFFds_flush;
+                    zbd->inPos = 0;   /* input is consumed */
+                    if (!decodedSize) { zbd->stage = ZBUFFds_read; break; }   /* this was just a header */
+                    zbd->outEnd = zbd->outStart +  decodedSize;
+                    zbd->stage = ZBUFFds_flush;
                     // break; /* ZBUFFds_flush follows */
             }   }
+
         case ZBUFFds_flush:
-            {
-                size_t toFlushSize = zbc->outEnd - zbc->outStart;
-                size_t flushedSize = ZBUFF_limitCopy(op, oend-op, zbc->outBuff + zbc->outStart, toFlushSize);
+            {   size_t const toFlushSize = zbd->outEnd - zbd->outStart;
+                size_t const flushedSize = ZBUFF_limitCopy(op, oend-op, zbd->outBuff + zbd->outStart, toFlushSize);
                 op += flushedSize;
-                zbc->outStart += flushedSize;
+                zbd->outStart += flushedSize;
                 if (flushedSize == toFlushSize) {
-                    zbc->stage = ZBUFFds_read;
-                    if (zbc->outStart + BLOCKSIZE > zbc->outBuffSize)
-                        zbc->outStart = zbc->outEnd = 0;
+                    zbd->stage = ZBUFFds_read;
+                    if (zbd->outStart + zbd->blockSize > zbd->outBuffSize)
+                        zbd->outStart = zbd->outEnd = 0;
                     break;
                 }
                 /* cannot flush everything */
@@ -523,13 +480,12 @@ size_t ZBUFF_decompressContinue(ZBUFF_DCtx* zbc, void* dst, size_t* maxDstSizePt
         default: return ERROR(GENERIC);   /* impossible */
     }   }
 
+    /* result */
     *srcSizePtr = ip-istart;
-    *maxDstSizePtr = op-ostart;
-
-    {
-        size_t nextSrcSizeHint = ZSTD_nextSrcSizeToDecompress(zbc->zc);
-        if (nextSrcSizeHint > ZBUFF_blockHeaderSize) nextSrcSizeHint+= ZBUFF_blockHeaderSize;   /* get next block header too */
-        nextSrcSizeHint -= zbc->inPos;   /* already loaded*/
+    *dstCapacityPtr = op-ostart;
+    {   size_t nextSrcSizeHint = ZSTD_nextSrcSizeToDecompress(zbd->zd);
+        if (nextSrcSizeHint > ZSTD_blockHeaderSize) nextSrcSizeHint+= ZSTD_blockHeaderSize;   /* get following block header too */
+        nextSrcSizeHint -= zbd->inPos;   /* already loaded*/
         return nextSrcSizeHint;
     }
 }
@@ -542,7 +498,7 @@ size_t ZBUFF_decompressContinue(ZBUFF_DCtx* zbc, void* dst, size_t* maxDstSizePt
 unsigned ZBUFF_isError(size_t errorCode) { return ERR_isError(errorCode); }
 const char* ZBUFF_getErrorName(size_t errorCode) { return ERR_getErrorName(errorCode); }
 
-size_t ZBUFF_recommendedCInSize(void)  { return BLOCKSIZE; }
-size_t ZBUFF_recommendedCOutSize(void) { return ZSTD_compressBound(BLOCKSIZE) + ZBUFF_blockHeaderSize + ZBUFF_endFrameSize; }
-size_t ZBUFF_recommendedDInSize(void)  { return BLOCKSIZE + ZBUFF_blockHeaderSize /* block header size*/ ; }
-size_t ZBUFF_recommendedDOutSize(void) { return BLOCKSIZE; }
+size_t ZBUFF_recommendedCInSize(void)  { return ZSTD_BLOCKSIZE_MAX; }
+size_t ZBUFF_recommendedCOutSize(void) { return ZSTD_compressBound(ZSTD_BLOCKSIZE_MAX) + ZSTD_blockHeaderSize + ZBUFF_endFrameSize; }
+size_t ZBUFF_recommendedDInSize(void)  { return ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize /* block header size*/ ; }
+size_t ZBUFF_recommendedDOutSize(void) { return ZSTD_BLOCKSIZE_MAX; }
diff --git a/lib/zbuff.h b/lib/zbuff.h
index d3275b7d..e2482054 100644
--- a/lib/zbuff.h
+++ b/lib/zbuff.h
@@ -31,11 +31,6 @@
 #ifndef ZSTD_BUFFERED_H
 #define ZSTD_BUFFERED_H
 
-/* The objects defined into this file should be considered experimental.
- * They are not considered stable, as their prototype may change in the future.
- * You can use them for tests, provide feedback, or if you can endure risk of future changes.
- */
-
 #if defined (__cplusplus)
 extern "C" {
 #endif
@@ -75,7 +70,7 @@ ZSTDLIB_API size_t ZBUFF_compressFlush(ZBUFF_CCtx* cctx, void* dst, size_t* dstC
 ZSTDLIB_API size_t ZBUFF_compressEnd(ZBUFF_CCtx* cctx, void* dst, size_t* dstCapacityPtr);
 
 /*-*************************************************
-*  Streaming compression
+*  Streaming compression - howto
 *
 *  A ZBUFF_CCtx object is required to track streaming operation.
 *  Use ZBUFF_createCCtx() and ZBUFF_freeCCtx() to create/release resources.
@@ -89,12 +84,12 @@ ZSTDLIB_API size_t ZBUFF_compressEnd(ZBUFF_CCtx* cctx, void* dst, size_t* dstCap
 *  *srcSizePtr and *dstCapacityPtr can be any size.
 *  The function will report how many bytes were read or written within *srcSizePtr and *dstCapacityPtr.
 *  Note that it may not consume the entire input, in which case it's up to the caller to present again remaining data.
-*  The content of @dst will be overwritten (up to *dstCapacityPtr) at each call, so save its content if it matters or change @dst .
+*  The content of `dst` will be overwritten (up to *dstCapacityPtr) at each call, so save its content if it matters or change @dst .
 *  @return : a hint to preferred nb of bytes to use as input for next function call (it's just a hint, to improve latency)
 *            or an error code, which can be tested using ZBUFF_isError().
 *
 *  At any moment, it's possible to flush whatever data remains within buffer, using ZBUFF_compressFlush().
-*  The nb of bytes written into @dst will be reported into *dstCapacityPtr.
+*  The nb of bytes written into `dst` will be reported into *dstCapacityPtr.
 *  Note that the function cannot output more than *dstCapacityPtr,
 *  therefore, some content might still be left into internal buffer if *dstCapacityPtr is too small.
 *  @return : nb of bytes still present into internal buffer (0 if it's empty)
@@ -127,26 +122,27 @@ ZSTDLIB_API size_t ZBUFF_decompressContinue(ZBUFF_DCtx* dctx,
                                       const void* src, size_t* srcSizePtr);
 
 /*-***************************************************************************
-*  Streaming decompression
+*  Streaming decompression howto
 *
 *  A ZBUFF_DCtx object is required to track streaming operations.
 *  Use ZBUFF_createDCtx() and ZBUFF_freeDCtx() to create/release resources.
 *  Use ZBUFF_decompressInit() to start a new decompression operation,
 *   or ZBUFF_decompressInitDictionary() if decompression requires a dictionary.
-*  Note that ZBUFF_DCtx objects can be reused multiple times.
+*  Note that ZBUFF_DCtx objects can be re-init multiple times.
 *
 *  Use ZBUFF_decompressContinue() repetitively to consume your input.
 *  *srcSizePtr and *dstCapacityPtr can be any size.
 *  The function will report how many bytes were read or written by modifying *srcSizePtr and *dstCapacityPtr.
 *  Note that it may not consume the entire input, in which case it's up to the caller to present remaining input again.
-*  The content of @dst will be overwritten (up to *dstCapacityPtr) at each function call, so save its content if it matters or change @dst.
-*  @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to help latency)
-*            or 0 when a frame is completely decoded
+*  The content of `dst` will be overwritten (up to *dstCapacityPtr) at each function call, so save its content if it matters, or change `dst`.
+*  @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to help latency),
+*            or 0 when a frame is completely decoded,
 *            or an error code, which can be tested using ZBUFF_isError().
 *
-*  Hint : recommended buffer sizes (not compulsory) : ZBUFF_recommendedDInSize() / ZBUFF_recommendedDOutSize()
-*  output : ZBUFF_recommendedDOutSize==128 KB block size is the internal unit, it ensures it's always possible to write a full block when decoded.
-*  input  : ZBUFF_recommendedDInSize==128Kb+3; just follow indications from ZBUFF_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 .
+*  Hint : recommended buffer sizes (not compulsory) : ZBUFF_recommendedDInSize() and ZBUFF_recommendedDOutSize()
+*  output : ZBUFF_recommendedDOutSize== 128 KB block size is the internal unit, it ensures it's always possible to write a full block when decoded.
+*  input  : ZBUFF_recommendedDInSize == 128KB + 3;
+*           just follow indications from ZBUFF_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 .
 * *******************************************************************************/
 
 
diff --git a/lib/zbuff_static.h b/lib/zbuff_static.h
index 40550890..9fb522e5 100644
--- a/lib/zbuff_static.h
+++ b/lib/zbuff_static.h
@@ -51,7 +51,9 @@ extern "C" {
 /* *************************************
 *  Advanced Streaming functions
 ***************************************/
-ZSTDLIB_API size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params);
+ZSTDLIB_API size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* cctx,
+                                               const void* dict, size_t dictSize,
+                                               ZSTD_parameters params, U64 pledgedSrcSize);
 
 
 #if defined (__cplusplus)
diff --git a/lib/zdict.c b/lib/zdict.c
index d3d5784d..122ac8cb 100644
--- a/lib/zdict.c
+++ b/lib/zdict.c
@@ -284,8 +284,7 @@ static dictItem ZDICT_analyzePos(
         return solution;
     }
 
-    {
-        int i;
+    {   int i;
         U32 searchLength;
         U32 refinedStart = start;
         U32 refinedEnd = end;
@@ -575,7 +574,6 @@ static void ZDICT_fillNoise(void* buffer, size_t length)
 {
     unsigned acc = PRIME1;
     size_t p=0;;
-
     for (p=0; p<length; p++) {
         acc *= PRIME2;
         ((unsigned char*)buffer)[p] = (unsigned char)(acc >> 21);
@@ -587,7 +585,7 @@ typedef struct
 {
     ZSTD_CCtx* ref;
     ZSTD_CCtx* zc;
-    void* workPlace;   /* must be BLOCKSIZE allocated */
+    void* workPlace;   /* must be ZSTD_BLOCKSIZE_MAX allocated */
 } EStats_ress_t;
 
 
@@ -595,29 +593,57 @@ static void ZDICT_countEStats(EStats_ress_t esr,
                             U32* countLit, U32* offsetcodeCount, U32* matchlengthCount, U32* litlengthCount,
                             const void* src, size_t srcSize)
 {
-    const BYTE* bytePtr;
-    const U32* u32Ptr;
-    seqStore_t seqStore;
+    const seqStore_t* seqStorePtr;
 
-    if (srcSize > BLOCKSIZE) srcSize = BLOCKSIZE;   /* protection vs large samples */
+    if (srcSize > ZSTD_BLOCKSIZE_MAX) srcSize = ZSTD_BLOCKSIZE_MAX;   /* protection vs large samples */
     ZSTD_copyCCtx(esr.zc, esr.ref);
-    ZSTD_compressBlock(esr.zc, esr.workPlace, BLOCKSIZE, src, srcSize);
-    seqStore = ZSTD_copySeqStore(esr.zc);
+    ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_MAX, src, srcSize);
+    seqStorePtr = ZSTD_getSeqStore(esr.zc);
 
-    /* count stats */
-    for(bytePtr = seqStore.litStart; bytePtr < seqStore.lit; bytePtr++)
-        countLit[*bytePtr]++;
-    for(u32Ptr = seqStore.offsetStart; u32Ptr < seqStore.offset; u32Ptr++) {
-        BYTE offcode = (BYTE)ZSTD_highbit(*u32Ptr) + 1;
-        if (*u32Ptr==0) offcode=0;
-        offsetcodeCount[offcode]++;
+    /* literals stats */
+    {   const BYTE* bytePtr;
+        for(bytePtr = seqStorePtr->litStart; bytePtr < seqStorePtr->lit; bytePtr++)
+            countLit[*bytePtr]++;
     }
-    for(bytePtr = seqStore.matchLengthStart; bytePtr < seqStore.matchLength; bytePtr++)
-        matchlengthCount[*bytePtr]++;
-    for(bytePtr = seqStore.litLengthStart; bytePtr < seqStore.litLength; bytePtr++)
-        litlengthCount[*bytePtr]++;
+
+    /* seqStats */
+    {   size_t const nbSeq = (size_t)(seqStorePtr->offset - seqStorePtr->offsetStart);
+        ZSTD_seqToCodes(seqStorePtr, nbSeq);
+
+        {   const BYTE* codePtr = seqStorePtr->offCodeStart;
+            size_t u;
+            for (u=0; u<nbSeq; u++) offsetcodeCount[codePtr[u]]++;
+        }
+
+        {   const BYTE* codePtr = seqStorePtr->mlCodeStart;
+            size_t u;
+            for (u=0; u<nbSeq; u++) matchlengthCount[codePtr[u]]++;
+        }
+
+        {   const BYTE* codePtr = seqStorePtr->llCodeStart;
+            size_t u;
+            for (u=0; u<nbSeq; u++) litlengthCount[codePtr[u]]++;
+    }   }
 }
 
+/*
+static size_t ZDICT_maxSampleSize(const size_t* fileSizes, unsigned nbFiles)
+{
+    unsigned u;
+    size_t max=0;
+    for (u=0; u<nbFiles; u++)
+        if (max < fileSizes[u]) max = fileSizes[u];
+    return max;
+}
+*/
+
+static size_t ZDICT_totalSampleSize(const size_t* fileSizes, unsigned nbFiles)
+{
+    size_t total;
+    unsigned u;
+    for (u=0, total=0; u<nbFiles; u++) total += fileSizes[u];
+    return total;
+}
 
 #define OFFCODE_MAX 18  /* only applicable to first block */
 static size_t ZDICT_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,
@@ -626,41 +652,44 @@ static size_t ZDICT_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,
                            const void* dictBuffer, size_t  dictBufferSize)
 {
     U32 countLit[256];
-    U32 offcodeCount[MaxOff+1];
     HUF_CREATE_STATIC_CTABLE(hufTable, 255);
-    short offcodeNCount[MaxOff+1];
+    U32 offcodeCount[OFFCODE_MAX+1];
+    short offcodeNCount[OFFCODE_MAX+1];
     U32 matchLengthCount[MaxML+1];
     short matchLengthNCount[MaxML+1];
-    U32 litlengthCount[MaxLL+1];
-    short litlengthNCount[MaxLL+1];
+    U32 litLengthCount[MaxLL+1];
+    short litLengthNCount[MaxLL+1];
     EStats_ress_t esr;
     ZSTD_parameters params;
     U32 u, huffLog = 12, Offlog = OffFSELog, mlLog = MLFSELog, llLog = LLFSELog, total;
     size_t pos = 0, errorCode;
     size_t eSize = 0;
+    size_t const totalSrcSize = ZDICT_totalSampleSize(fileSizes, nbFiles);
+    size_t const averageSampleSize = totalSrcSize / nbFiles;
 
     /* init */
     for (u=0; u<256; u++) countLit[u]=1;   /* any character must be described */
     for (u=0; u<=OFFCODE_MAX; u++) offcodeCount[u]=1;
     for (u=0; u<=MaxML; u++) matchLengthCount[u]=1;
-    for (u=0; u<=MaxLL; u++) litlengthCount[u]=1;
+    for (u=0; u<=MaxLL; u++) litLengthCount[u]=1;
     esr.ref = ZSTD_createCCtx();
     esr.zc = ZSTD_createCCtx();
-    esr.workPlace = malloc(BLOCKSIZE);
+    esr.workPlace = malloc(ZSTD_BLOCKSIZE_MAX);
     if (!esr.ref || !esr.zc || !esr.workPlace) {
             eSize = ERROR(memory_allocation);
             DISPLAYLEVEL(1, "Not enough memory");
             goto _cleanup;
     }
     if (compressionLevel==0) compressionLevel=g_compressionLevel_default;
-    params = ZSTD_getParams(compressionLevel, dictBufferSize + 15 KB);
-    params.strategy = ZSTD_greedy;
-    ZSTD_compressBegin_advanced(esr.ref, dictBuffer, dictBufferSize, params);
+    params.cParams = ZSTD_getCParams(compressionLevel, averageSampleSize, dictBufferSize);
+    params.cParams.strategy = ZSTD_greedy;
+    params.fParams.contentSizeFlag = 0;
+    ZSTD_compressBegin_advanced(esr.ref, dictBuffer, dictBufferSize, params, 0);
 
     /* collect stats on all files */
     for (u=0; u<nbFiles; u++) {
         ZDICT_countEStats(esr,
-                        countLit, offcodeCount, matchLengthCount, litlengthCount,
+                        countLit, offcodeCount, matchLengthCount, litLengthCount,
            (const char*)srcBuffer + pos, fileSizes[u]);
         pos += fileSizes[u];
     }
@@ -692,11 +721,11 @@ static size_t ZDICT_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,
     }
     mlLog = (U32)errorCode;
 
-    total=0; for (u=0; u<=MaxLL; u++) total+=litlengthCount[u];
-    errorCode = FSE_normalizeCount(litlengthNCount, llLog, litlengthCount, total, MaxLL);
+    total=0; for (u=0; u<=MaxLL; u++) total+=litLengthCount[u];
+    errorCode = FSE_normalizeCount(litLengthNCount, llLog, litLengthCount, total, MaxLL);
     if (FSE_isError(errorCode)) {
         eSize = ERROR(GENERIC);
-        DISPLAYLEVEL(1, "FSE_normalizeCount error with litlengthCount");
+        DISPLAYLEVEL(1, "FSE_normalizeCount error with litLengthCount");
         goto _cleanup;
     }
     llLog = (U32)errorCode;
@@ -732,7 +761,7 @@ static size_t ZDICT_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,
     maxDstSize -= errorCode;
     eSize += errorCode;
 
-    errorCode = FSE_writeNCount(dstBuffer, maxDstSize, litlengthNCount, MaxLL, llLog);
+    errorCode = FSE_writeNCount(dstBuffer, maxDstSize, litLengthNCount, MaxLL, llLog);
     if (FSE_isError(errorCode)) {
         eSize = ERROR(GENERIC);
         DISPLAYLEVEL(1, "FSE_writeNCount error with litlengthNCount");
@@ -791,25 +820,31 @@ static size_t ZDICT_fastSampling(void* dictBuffer, size_t dictSize,
 }
 
 
+#define DIB_MINSAMPLESSIZE (DIB_FASTSEGMENTSIZE*3)
+/*! ZDICT_trainFromBuffer_unsafe() :
+*   `samplesBuffer` must be followed by noisy guard band.
+*   @return : size of dictionary.
+*/
 size_t ZDICT_trainFromBuffer_unsafe(
                             void* dictBuffer, size_t maxDictSize,
                             const void* samplesBuffer, const size_t* sampleSizes, unsigned nbSamples,
                             ZDICT_params_t params)
 {
-    const U32 dictListSize = MAX( MAX(DICTLISTSIZE, nbSamples), (U32)(maxDictSize/16));
+    U32 const dictListSize = MAX( MAX(DICTLISTSIZE, nbSamples), (U32)(maxDictSize/16));
     dictItem* dictList = (dictItem*)malloc(dictListSize * sizeof(*dictList));
     unsigned selectivity = params.selectivityLevel;
     unsigned compressionLevel = params.compressionLevel;
-    size_t targetDictSize = maxDictSize - g_provision_entropySize;
+    size_t targetDictSize = maxDictSize;
     size_t sBuffSize;
     size_t dictSize = 0;
 
     /* checks */
     if (maxDictSize <= g_provision_entropySize + g_min_fast_dictContent) return ERROR(dstSize_tooSmall);
+    if (!dictList) return ERROR(memory_allocation);
 
     /* init */
     { unsigned u; for (u=0, sBuffSize=0; u<nbSamples; u++) sBuffSize += sampleSizes[u]; }
-    if (!dictList) return ERROR(memory_allocation);
+    if (sBuffSize < DIB_MINSAMPLESSIZE) return 0;   /* not enough source to create dictionary */
     ZDICT_initDictItem(dictList);
     g_displayLevel = params.notificationLevel;
     if (selectivity==0) selectivity = g_selectivity_default;
@@ -824,9 +859,9 @@ size_t ZDICT_trainFromBuffer_unsafe(
 
         /* display best matches */
         if (g_displayLevel>= 3) {
-            const U32 nb = 25;
+            U32 const nb = 25;
+            U32 const dictContentSize = ZDICT_dictSize(dictList);
             U32 u;
-            U32 dictContentSize = ZDICT_dictSize(dictList);
             DISPLAYLEVEL(3, "\n %u segments found, of total size %u \n", dictList[0].pos, dictContentSize);
             DISPLAYLEVEL(3, "list %u best segments \n", nb);
             for (u=1; u<=nb; u++) {
@@ -840,8 +875,7 @@ size_t ZDICT_trainFromBuffer_unsafe(
     }   }   }
 
     /* create dictionary */
-    {
-        U32 dictContentSize = ZDICT_dictSize(dictList);
+    {   U32 dictContentSize = ZDICT_dictSize(dictList);
         size_t hSize;
         BYTE* ptr;
         U32 u;
@@ -859,8 +893,8 @@ size_t ZDICT_trainFromBuffer_unsafe(
         if (selectivity==1) {  /* note could also be used to complete a dictionary, but not necessarily better */
             DISPLAYLEVEL(3, "\r%70s\r", "");   /* clean display line */
             DISPLAYLEVEL(3, "Adding %u KB with fast sampling \n", (U32)(targetDictSize>>10));
-            dictContentSize = (U32)ZDICT_fastSampling((char*)dictBuffer + g_provision_entropySize,
-                                               targetDictSize, samplesBuffer, sBuffSize);
+            dictContentSize = (U32)ZDICT_fastSampling(dictBuffer, targetDictSize,
+                                                      samplesBuffer, sBuffSize);
         }
 
        /* dictionary header */
@@ -886,31 +920,32 @@ size_t ZDICT_trainFromBuffer_unsafe(
 }
 
 
+/* issue : samplesBuffer need to be followed by a noisy guard band.
+*  work around : duplicate the buffer, and add the noise */
 size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacity,
                            const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
                            ZDICT_params_t params)
 {
-    size_t sBuffSize;
     void* newBuff;
-    size_t result;
+    size_t sBuffSize;
 
     { unsigned u; for (u=0, sBuffSize=0; u<nbSamples; u++) sBuffSize += samplesSizes[u]; }
+    if (sBuffSize==0) return 0;   /* empty content => no dictionary */
     newBuff = malloc(sBuffSize + NOISELENGTH);
     if (!newBuff) return ERROR(memory_allocation);
 
     memcpy(newBuff, samplesBuffer, sBuffSize);
     ZDICT_fillNoise((char*)newBuff + sBuffSize, NOISELENGTH);   /* guard band, for end of buffer condition */
 
-    result = ZDICT_trainFromBuffer_unsafe(dictBuffer, dictBufferCapacity,
+    { size_t const result = ZDICT_trainFromBuffer_unsafe(
+                                        dictBuffer, dictBufferCapacity,
                                         newBuff, samplesSizes, nbSamples,
                                         params);
-    free(newBuff);
-    return result;
+      free(newBuff);
+      return result; }
 }
 
 
-/* issue : samplesBuffer need to be followed by a noisy guard band.
-*  work around : duplicate the buffer, and add the noise ? */
 size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
                              const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples)
 {
diff --git a/lib/zstd.h b/lib/zstd.h
index 53ed6973..26c6e275 100644
--- a/lib/zstd.h
+++ b/lib/zstd.h
@@ -60,8 +60,8 @@ extern "C" {
 *  Version
 ***************************************/
 #define ZSTD_VERSION_MAJOR    0    /* for breaking interface changes  */
-#define ZSTD_VERSION_MINOR    5    /* for new (non-breaking) interface capabilities */
-#define ZSTD_VERSION_RELEASE  1    /* for tweaks, bug-fixes, or development */
+#define ZSTD_VERSION_MINOR    6    /* for new (non-breaking) interface capabilities */
+#define ZSTD_VERSION_RELEASE  0    /* for tweaks, bug-fixes, or development */
 #define ZSTD_VERSION_NUMBER  (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
 ZSTDLIB_API unsigned ZSTD_versionNumber (void);
 
diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c
index 7bea6abe..8ce18f81 100644
--- a/lib/zstd_compress.c
+++ b/lib/zstd_compress.c
@@ -62,7 +62,7 @@
 /*-*************************************
 *  Constants
 ***************************************/
-static const U32 g_searchStrength = 8;
+static const U32 g_searchStrength = 8;   /* control skip over incompressible data */
 
 
 /*-*************************************
@@ -74,32 +74,13 @@ size_t ZSTD_compressBound(size_t srcSize) { return FSE_compressBound(srcSize) +
 /*-*************************************
 *  Sequence storage
 ***************************************/
-/** ZSTD_resetFreqs() : for opt variants */
-static void ZSTD_resetFreqs(seqStore_t* ssPtr)
-{
-    unsigned u;
-    ssPtr->matchLengthSum = 512; // (1<<MLbits);
-    ssPtr->litLengthSum = 256; // (1<<LLbits);
-    ssPtr->litSum = (1<<Litbits);
-    ssPtr->offCodeSum = (1<<Offbits);
-
-    for (u=0; u<=MaxLit; u++)
-        ssPtr->litFreq[u] = 1;
-    for (u=0; u<=MaxLL; u++)
-        ssPtr->litLengthFreq[u] = 1;
-    for (u=0; u<=MaxML; u++)
-        ssPtr->matchLengthFreq[u] = 1;
-    for (u=0; u<=MaxOff; u++)
-        ssPtr->offCodeFreq[u] = 1;
-}
-
 static void ZSTD_resetSeqStore(seqStore_t* ssPtr)
 {
     ssPtr->offset = ssPtr->offsetStart;
     ssPtr->lit = ssPtr->litStart;
     ssPtr->litLength = ssPtr->litLengthStart;
     ssPtr->matchLength = ssPtr->matchLengthStart;
-    ssPtr->dumps = ssPtr->dumpsStart;
+    ssPtr->longLengthID = 0;
 }
 
 
@@ -114,18 +95,19 @@ struct ZSTD_CCtx_s
     U32   dictLimit;        /* below that point, need extDict */
     U32   lowLimit;         /* below that point, no more data */
     U32   nextToUpdate;     /* index from which to continue dictionary update */
+    U32   nextToUpdate3;    /* index from which to continue dictionary update */
+    U32   hashLog3;         /* dispatch table : larger == faster, more memory */
     U32   loadedDictEnd;
-    U32   stage;
+    U32   stage;            /* 0: created; 1: init,dictLoad; 2:started */
     ZSTD_parameters params;
     void* workSpace;
     size_t workSpaceSize;
     size_t blockSize;
-    size_t hbSize;
-    char headerBuffer[ZSTD_frameHeaderSize_max];
 
     seqStore_t seqStore;    /* sequences storage ptrs */
     U32* hashTable;
-    U32* contentTable;
+    U32* hashTable3;
+    U32* chainTable;
     HUF_CElt* hufTable;
     U32 flagStaticTables;
     FSE_CTable offcodeCTable   [FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)];
@@ -145,64 +127,121 @@ size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx)
     return 0;   /* reserved as a potential error code in the future */
 }
 
-seqStore_t ZSTD_copySeqStore(const ZSTD_CCtx* ctx)
+const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx)   /* hidden interface */
 {
-    return ctx->seqStore;
+    return &(ctx->seqStore);
+}
+
+
+#define CLAMP(val,min,max) { if (val<min) val=min; else if (val>max) val=max; }
+#define CLAMPCHECK(val,min,max) { if ((val<min) || (val>max)) return ERROR(compressionParameter_unsupported); }
+
+/** ZSTD_checkParams() :
+    ensure param values remain within authorized range.
+    @return : 0, or an error code if one value is beyond authorized range */
+size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams)
+{
+    CLAMPCHECK(cParams.windowLog, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX);
+    CLAMPCHECK(cParams.chainLog, ZSTD_CHAINLOG_MIN, ZSTD_CHAINLOG_MAX);
+    CLAMPCHECK(cParams.hashLog, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX);
+    CLAMPCHECK(cParams.searchLog, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX);
+    { U32 const searchLengthMin = (cParams.strategy == ZSTD_fast || cParams.strategy == ZSTD_greedy) ? ZSTD_SEARCHLENGTH_MIN+1 : ZSTD_SEARCHLENGTH_MIN;
+      U32 const searchLengthMax = (cParams.strategy == ZSTD_fast) ? ZSTD_SEARCHLENGTH_MAX : ZSTD_SEARCHLENGTH_MAX-1;
+      CLAMPCHECK(cParams.searchLength, searchLengthMin, searchLengthMax); }
+    CLAMPCHECK(cParams.targetLength, ZSTD_TARGETLENGTH_MIN, ZSTD_TARGETLENGTH_MAX);
+    if ((U32)(cParams.strategy) > (U32)ZSTD_btopt) return ERROR(compressionParameter_unsupported);
+    return 0;
 }
 
 
 static unsigned ZSTD_highbit(U32 val);
 
-#define CLAMP(val,min,max) { if (val<min) val=min; else if (val>max) val=max; }
-
-/** ZSTD_validateParams() :
-    correct params value to remain within authorized range,
-    optimize for `srcSize` if srcSize > 0 */
-void ZSTD_validateParams(ZSTD_parameters* params)
+/** ZSTD_checkCParams_advanced() :
+    temporary work-around, while the compressor compatibility remains limited regarding windowLog < 18 */
+size_t ZSTD_checkCParams_advanced(ZSTD_compressionParameters cParams, U64 srcSize)
 {
-    const U32 btPlus = (params->strategy == ZSTD_btlazy2) || (params->strategy == ZSTD_btopt);
-
-    /* validate params */
-    if (MEM_32bits()) if (params->windowLog > 25) params->windowLog = 25;   /* 32 bits mode cannot flush > 24 bits */
-    CLAMP(params->windowLog, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX);
-    CLAMP(params->contentLog, ZSTD_CONTENTLOG_MIN, ZSTD_CONTENTLOG_MAX);
-    CLAMP(params->hashLog, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX);
-    CLAMP(params->searchLog, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX);
-    CLAMP(params->searchLength, ZSTD_SEARCHLENGTH_MIN, ZSTD_SEARCHLENGTH_MAX);
-    CLAMP(params->targetLength, ZSTD_TARGETLENGTH_MIN, ZSTD_TARGETLENGTH_MAX);
-    if ((U32)params->strategy>(U32)ZSTD_btopt) params->strategy = ZSTD_btopt;
-
-    /* correct params, to use less memory */
-    if ((params->srcSize > 0) && (params->srcSize < (1<<ZSTD_WINDOWLOG_MAX))) {
-        U32 srcLog = ZSTD_highbit((U32)(params->srcSize)-1) + 1;
-        if (params->windowLog > srcLog) params->windowLog = srcLog;
-    }
-    if (params->windowLog   < ZSTD_WINDOWLOG_ABSOLUTEMIN) params->windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN;  /* required for frame header */
-    if (params->contentLog  > params->windowLog+btPlus) params->contentLog = params->windowLog+btPlus;   /* <= ZSTD_CONTENTLOG_MAX */
+    if (srcSize > (1ULL << ZSTD_WINDOWLOG_MIN)) return ZSTD_checkCParams(cParams);
+    if (cParams.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN) return ERROR(compressionParameter_unsupported);
+    if (srcSize <= (1ULL << cParams.windowLog)) cParams.windowLog = ZSTD_WINDOWLOG_MIN; /* fake value - temporary work around */
+    if (srcSize <= (1ULL << cParams.chainLog)) cParams.chainLog = ZSTD_CHAINLOG_MIN;    /* fake value - temporary work around */
+    if ((srcSize <= (1ULL << cParams.hashLog)) && ((U32)cParams.strategy < (U32)ZSTD_btlazy2)) cParams.hashLog = ZSTD_HASHLOG_MIN;       /* fake value - temporary work around */
+    return ZSTD_checkCParams(cParams);
 }
 
 
+/** ZSTD_adjustParams() :
+    optimize params for q given input (`srcSize` and `dictSize`).
+    mostly downsizing to reduce memory consumption and initialization.
+    Both `srcSize` and `dictSize` are optional (use 0 if unknown),
+    but if both are 0, no optimization can be done.
+    Note : params is considered validated at this stage. Use ZSTD_checkParams() to ensure that. */
+void ZSTD_adjustCParams(ZSTD_compressionParameters* params, U64 srcSize, size_t dictSize)
+{
+    if (srcSize+dictSize == 0) return;   /* no size information available : no adjustment */
+
+    /* resize params, to use less memory when necessary */
+    {   U32 const minSrcSize = (srcSize==0) ? 500 : 0;
+        U64 const rSize = srcSize + dictSize + minSrcSize;
+        if (rSize < ((U64)1<<ZSTD_WINDOWLOG_MAX)) {
+            U32 const srcLog = ZSTD_highbit((U32)(rSize)-1) + 1;
+            if (params->windowLog > srcLog) params->windowLog = srcLog;
+    }   }
+    if (params->hashLog > params->windowLog) params->hashLog = params->windowLog;
+    {   U32 const btPlus = (params->strategy == ZSTD_btlazy2) || (params->strategy == ZSTD_btopt);
+        U32 const maxChainLog = params->windowLog+btPlus;
+        if (params->chainLog > maxChainLog) params->chainLog = maxChainLog; }   /* <= ZSTD_CHAINLOG_MAX */
+
+    if (params->windowLog  < ZSTD_WINDOWLOG_ABSOLUTEMIN) params->windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN;  /* required for frame header */
+    if ((params->hashLog  < ZSTD_HASHLOG_MIN) && ((U32)params->strategy >= (U32)ZSTD_btlazy2)) params->hashLog = ZSTD_HASHLOG_MIN;  /* required to ensure collision resistance in bt */
+}
+
+
+size_t ZSTD_sizeofCCtx(ZSTD_compressionParameters cParams)   /* hidden interface, for paramagrill */
+{
+    ZSTD_CCtx* zc = ZSTD_createCCtx();
+    ZSTD_parameters params;
+    params.cParams = cParams;
+    params.fParams.contentSizeFlag = 1;
+    ZSTD_compressBegin_advanced(zc, NULL, 0, params, 0);
+    { size_t const ccsize = sizeof(*zc) + zc->workSpaceSize;
+      ZSTD_freeCCtx(zc);
+      return ccsize; }
+}
+
+/*! ZSTD_resetCCtx_advanced() :
+    note : 'params' is expected to be validated */
 static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc,
-                                       ZSTD_parameters params)
+                                       ZSTD_parameters params, U32 reset)
 {   /* note : params considered validated here */
-    const size_t blockSize = MIN(BLOCKSIZE, (size_t)1 << params.windowLog);
-    /* reserve table memory */
-    const U32 contentLog = (params.strategy == ZSTD_fast) ? 1 : params.contentLog;
-    const size_t tableSpace = ((1 << contentLog) + (1 << params.hashLog)) * sizeof(U32);
-    const size_t neededSpace = tableSpace + (256*sizeof(U32)) + (3*blockSize) + ((1<<MLbits) + (1<<LLbits) + (1<<Offbits) + (1<<Litbits))*sizeof(U32);
-    if (zc->workSpaceSize < neededSpace) {
-        free(zc->workSpace);
-        zc->workSpace = malloc(neededSpace);
-        if (zc->workSpace == NULL) return ERROR(memory_allocation);
-        zc->workSpaceSize = neededSpace;
-    }
-    memset(zc->workSpace, 0, tableSpace );   /* reset only tables */
-    zc->hashTable = (U32*)(zc->workSpace);
-    zc->contentTable = zc->hashTable + ((size_t)1 << params.hashLog);
-    zc->seqStore.buffer = zc->contentTable + ((size_t)1 << contentLog);
+    const size_t blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << params.cParams.windowLog);
+    const U32    divider = (params.cParams.searchLength==3) ? 3 : 4;
+    const size_t maxNbSeq = blockSize / divider;
+    const size_t tokenSpace = blockSize + 11*maxNbSeq;
+    const size_t chainSize = (params.cParams.strategy == ZSTD_fast) ? 0 : (1 << params.cParams.chainLog);
+    const size_t hSize = 1 << params.cParams.hashLog;
+    const size_t h3Size = (zc->hashLog3) ? 1 << zc->hashLog3 : 0;
+    const size_t tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
+
+    /* Check if workSpace is large enough, alloc a new one if needed */
+    {   size_t const optSpace = ((MaxML+1) + (MaxLL+1) + (MaxOff+1) + (1<<Litbits))*sizeof(U32)
+                              + (ZSTD_OPT_NUM+1)*(sizeof(ZSTD_match_t) + sizeof(ZSTD_optimal_t));
+        size_t const neededSpace = tableSpace + (256*sizeof(U32)) /* huffTable */ + tokenSpace
+                              + ((params.cParams.strategy == ZSTD_btopt) ? optSpace : 0);
+        if (zc->workSpaceSize < neededSpace) {
+            free(zc->workSpace);
+            zc->workSpace = malloc(neededSpace);
+            if (zc->workSpace == NULL) return ERROR(memory_allocation);
+            zc->workSpaceSize = neededSpace;
+    }   }
+
+    if (reset) memset(zc->workSpace, 0, tableSpace );   /* reset only tables */
+    zc->hashTable3 = (U32*)(zc->workSpace);
+    zc->hashTable = zc->hashTable3 + h3Size;
+    zc->chainTable = zc->hashTable + hSize;
+    zc->seqStore.buffer = zc->chainTable + chainSize;
     zc->hufTable = (HUF_CElt*)zc->seqStore.buffer;
     zc->flagStaticTables = 0;
-    zc->seqStore.buffer = (U32*)(zc->seqStore.buffer) + 256;
+    zc->seqStore.buffer = ((U32*)(zc->seqStore.buffer)) + 256;
 
     zc->nextToUpdate = 1;
     zc->nextSrc = NULL;
@@ -213,55 +252,60 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc,
     zc->params = params;
     zc->blockSize = blockSize;
 
-    zc->seqStore.litFreq = (U32*) (zc->seqStore.buffer);
-    zc->seqStore.litLengthFreq = zc->seqStore.litFreq + (1<<Litbits);
-    zc->seqStore.matchLengthFreq = zc->seqStore.litLengthFreq + (1<<LLbits);
-    zc->seqStore.offCodeFreq = zc->seqStore.matchLengthFreq + (1<<MLbits);
+    if (params.cParams.strategy == ZSTD_btopt) {
+        zc->seqStore.litFreq = (U32*)(zc->seqStore.buffer);
+        zc->seqStore.litLengthFreq = zc->seqStore.litFreq + (1<<Litbits);
+        zc->seqStore.matchLengthFreq = zc->seqStore.litLengthFreq + (MaxLL+1);
+        zc->seqStore.offCodeFreq = zc->seqStore.matchLengthFreq + (MaxML+1);
+        zc->seqStore.matchTable = (ZSTD_match_t*)((void*)(zc->seqStore.offCodeFreq + (MaxOff+1)));
+        zc->seqStore.priceTable = (ZSTD_optimal_t*)((void*)(zc->seqStore.matchTable + ZSTD_OPT_NUM+1));
+        zc->seqStore.buffer = zc->seqStore.priceTable + ZSTD_OPT_NUM+1;
+        zc->seqStore.litLengthSum = 0;
+    }
+    zc->seqStore.offsetStart = (U32*) (zc->seqStore.buffer);
+    zc->seqStore.litLengthStart = (U16*) (void*)(zc->seqStore.offsetStart + maxNbSeq);
+    zc->seqStore.matchLengthStart = (U16*) (void*)(zc->seqStore.litLengthStart + maxNbSeq);
+    zc->seqStore.llCodeStart = (BYTE*) (zc->seqStore.matchLengthStart + maxNbSeq);
+    zc->seqStore.mlCodeStart = zc->seqStore.llCodeStart + maxNbSeq;
+    zc->seqStore.offCodeStart = zc->seqStore.mlCodeStart + maxNbSeq;
+    zc->seqStore.litStart = zc->seqStore.offCodeStart + maxNbSeq;
 
-    zc->seqStore.offsetStart = zc->seqStore.offCodeFreq + (1<<Offbits);
-    zc->seqStore.offCodeStart = (BYTE*) (zc->seqStore.offsetStart + (blockSize>>2));
-    zc->seqStore.litStart = zc->seqStore.offCodeStart + (blockSize>>2);
-    zc->seqStore.litLengthStart =  zc->seqStore.litStart + blockSize;
-    zc->seqStore.matchLengthStart = zc->seqStore.litLengthStart + (blockSize>>2);
-    zc->seqStore.dumpsStart = zc->seqStore.matchLengthStart + (blockSize>>2);
-    // zc->seqStore.XXX = zc->seqStore.dumpsStart + (blockSize>>4);
-
-    zc->hbSize = 0;
-    zc->stage = 0;
+    zc->stage = 1;
     zc->loadedDictEnd = 0;
 
     return 0;
 }
 
 
-/*! ZSTD_copyCCtx
-*   Duplicate an existing context @srcCCtx into another one @dstCCtx.
-*   Only works during stage 0 (i.e. before first call to ZSTD_compressContinue())
+/*! ZSTD_copyCCtx() :
+*   Duplicate an existing context `srcCCtx` into another one `dstCCtx`.
+*   Only works during stage 1 (i.e. after creation, but before first call to ZSTD_compressContinue()).
 *   @return : 0, or an error code */
 size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx)
 {
-    const U32 contentLog = (srcCCtx->params.strategy == ZSTD_fast) ? 1 : srcCCtx->params.contentLog;
-    const size_t tableSpace = ((1 << contentLog) + (1 << srcCCtx->params.hashLog)) * sizeof(U32);
+    if (srcCCtx->stage!=1) return ERROR(stage_wrong);
 
-    if (srcCCtx->stage!=0) return ERROR(stage_wrong);
-
-    ZSTD_resetCCtx_advanced(dstCCtx, srcCCtx->params);
+    dstCCtx->hashLog3 = srcCCtx->hashLog3; /* must be before ZSTD_resetCCtx_advanced */
+    ZSTD_resetCCtx_advanced(dstCCtx, srcCCtx->params, 0);
+    dstCCtx->params.fParams.contentSizeFlag = 0;   /* content size different from the one set during srcCCtx init */
 
     /* copy tables */
-    memcpy(dstCCtx->hashTable, srcCCtx->hashTable, tableSpace);
-
-    /* copy frame header */
-    dstCCtx->hbSize = srcCCtx->hbSize;
-    memcpy(dstCCtx->headerBuffer , srcCCtx->headerBuffer, srcCCtx->hbSize);
+    {   const size_t chainSize = (srcCCtx->params.cParams.strategy == ZSTD_fast) ? 0 : (1 << srcCCtx->params.cParams.chainLog);
+        const size_t hSize = 1 << srcCCtx->params.cParams.hashLog;
+        const size_t h3Size = (srcCCtx->hashLog3) ? 1 << srcCCtx->hashLog3 : 0;
+        const size_t tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
+        memcpy(dstCCtx->workSpace, srcCCtx->workSpace, tableSpace);
+    }
 
     /* copy dictionary pointers */
-    dstCCtx->nextToUpdate= srcCCtx->nextToUpdate;
-    dstCCtx->nextSrc     = srcCCtx->nextSrc;
-    dstCCtx->base        = srcCCtx->base;
-    dstCCtx->dictBase    = srcCCtx->dictBase;
-    dstCCtx->dictLimit   = srcCCtx->dictLimit;
-    dstCCtx->lowLimit    = srcCCtx->lowLimit;
-    dstCCtx->loadedDictEnd = srcCCtx->loadedDictEnd;
+    dstCCtx->nextToUpdate = srcCCtx->nextToUpdate;
+    dstCCtx->nextToUpdate3= srcCCtx->nextToUpdate3;
+    dstCCtx->nextSrc      = srcCCtx->nextSrc;
+    dstCCtx->base         = srcCCtx->base;
+    dstCCtx->dictBase     = srcCCtx->dictBase;
+    dstCCtx->dictLimit    = srcCCtx->dictLimit;
+    dstCCtx->lowLimit     = srcCCtx->lowLimit;
+    dstCCtx->loadedDictEnd= srcCCtx->loadedDictEnd;
 
     /* copy entropy tables */
     dstCCtx->flagStaticTables = srcCCtx->flagStaticTables;
@@ -276,27 +320,67 @@ size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx)
 }
 
 
-/*! ZSTD_reduceIndex
-*   rescale indexes to avoid future overflow (indexes are U32) */
-static void ZSTD_reduceIndex (ZSTD_CCtx* zc,
-                        const U32 reducerValue)
+/*! ZSTD_reduceTable() :
+*   reduce table indexes by `reducerValue` */
+static void ZSTD_reduceTable (U32* const table, U32 const size, U32 const reducerValue)
 {
-    const U32 contentLog = (zc->params.strategy == ZSTD_fast) ? 1 : zc->params.contentLog;
-    const U32 tableSpaceU32 = (1 << contentLog) + (1 << zc->params.hashLog);
-    U32* table32 = zc->hashTable;
-    U32 index;
-
-    for (index=0 ; index < tableSpaceU32 ; index++) {
-        if (table32[index] < reducerValue) table32[index] = 0;
-        else table32[index] -= reducerValue;
+    U32 u;
+    for (u=0 ; u < size ; u++) {
+        if (table[u] < reducerValue) table[u] = 0;
+        else table[u] -= reducerValue;
     }
 }
 
+/*! ZSTD_reduceIndex() :
+*   rescale all indexes to avoid future overflow (indexes are U32) */
+static void ZSTD_reduceIndex (ZSTD_CCtx* zc, const U32 reducerValue)
+{
+    { const U32 hSize = 1 << zc->params.cParams.hashLog;
+      ZSTD_reduceTable(zc->hashTable, hSize, reducerValue); }
+
+    { const U32 chainSize = (zc->params.cParams.strategy == ZSTD_fast) ? 0 : (1 << zc->params.cParams.chainLog);
+      ZSTD_reduceTable(zc->chainTable, chainSize, reducerValue); }
+
+    { const U32 h3Size = (zc->hashLog3) ? 1 << zc->hashLog3 : 0;
+      ZSTD_reduceTable(zc->hashTable3, h3Size, reducerValue); }
+}
+
 
 /*-*******************************************************
 *  Block entropic compression
 *********************************************************/
 
+/* Frame format description
+   Frame Header -  [ Block Header - Block ] - Frame End
+   1) Frame Header
+      - 4 bytes - Magic Number : ZSTD_MAGICNUMBER (defined within zstd_static.h)
+      - 1 byte  - Frame Descriptor
+   2) Block Header
+      - 3 bytes, starting with a 2-bits descriptor
+                 Uncompressed, Compressed, Frame End, unused
+   3) Block
+      See Block Format Description
+   4) Frame End
+      - 3 bytes, compatible with Block Header
+*/
+
+
+/* Frame descriptor
+
+   1 byte, using :
+   bit 0-3 : windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN   (see zstd_internal.h)
+   bit 4   : minmatch 4(0) or 3(1)
+   bit 5   : reserved (must be zero)
+   bit 6-7 : Frame content size : unknown, 1 byte, 2 bytes, 8 bytes
+
+   Optional : content size (0, 1, 2 or 8 bytes)
+   0 : unknown
+   1 : 0-255 bytes
+   2 : 256 - 65535+256
+   8 : up to 16 exa
+*/
+
+
 /* Block format description
 
    Block = Literal Section - Sequences Section
@@ -383,11 +467,11 @@ static void ZSTD_reduceIndex (ZSTD_CCtx* zc,
       FSE_ENCODING_DYNAMIC : read NCount
 */
 
-size_t ZSTD_noCompressBlock (void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
 {
     BYTE* const ostart = (BYTE* const)dst;
 
-    if (srcSize + ZSTD_blockHeaderSize > maxDstSize) return ERROR(dstSize_tooSmall);
+    if (srcSize + ZSTD_blockHeaderSize > dstCapacity) return ERROR(dstSize_tooSmall);
     memcpy(ostart + ZSTD_blockHeaderSize, src, srcSize);
 
     /* Build header */
@@ -400,12 +484,12 @@ size_t ZSTD_noCompressBlock (void* dst, size_t maxDstSize, const void* src, size
 }
 
 
-static size_t ZSTD_noCompressLiterals (void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+static size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
 {
     BYTE* const ostart = (BYTE* const)dst;
-    const U32 flSize = 1 + (srcSize>31) + (srcSize>4095);
+    U32 const flSize = 1 + (srcSize>31) + (srcSize>4095);
 
-    if (srcSize + flSize > maxDstSize) return ERROR(dstSize_tooSmall);
+    if (srcSize + flSize > dstCapacity) return ERROR(dstSize_tooSmall);
 
     switch(flSize)
     {
@@ -428,12 +512,12 @@ static size_t ZSTD_noCompressLiterals (void* dst, size_t maxDstSize, const void*
     return srcSize + flSize;
 }
 
-static size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+static size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
 {
     BYTE* const ostart = (BYTE* const)dst;
-    U32 flSize = 1 + (srcSize>31) + (srcSize>4095);
+    U32 const flSize = 1 + (srcSize>31) + (srcSize>4095);
 
-    (void)maxDstSize;  /* maxDstSize guaranteed to be >=4, hence large enough */
+    (void)dstCapacity;  /* dstCapacity guaranteed to be >=4, hence large enough */
 
     switch(flSize)
     {
@@ -444,7 +528,7 @@ static size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t maxDstSize, const
             ostart[0] = (BYTE)((IS_RLE<<6) + (2<<4) + (srcSize >> 8));
             ostart[1] = (BYTE)srcSize;
             break;
-        default:   /*note : should not be necessary : flSize is necessary within {1,2,3} */
+        default:   /*note : should not be necessary : flSize is necessarily within {1,2,3} */
         case 3: /* 2 - 2 - 20 */
             ostart[0] = (BYTE)((IS_RLE<<6) + (3<<4) + (srcSize >> 16));
             ostart[1] = (BYTE)(srcSize>>8);
@@ -457,324 +541,337 @@ static size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t maxDstSize, const
 }
 
 
-size_t ZSTD_minGain(size_t srcSize) { return (srcSize >> 6) + 2; }
+static size_t ZSTD_minGain(size_t srcSize) { return (srcSize >> 6) + 2; }
 
 static size_t ZSTD_compressLiterals (ZSTD_CCtx* zc,
-                                     void* dst, size_t maxDstSize,
+                                     void* dst, size_t dstCapacity,
                                const void* src, size_t srcSize)
 {
-    const size_t minGain = ZSTD_minGain(srcSize);
+    size_t const minGain = ZSTD_minGain(srcSize);
+    size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB);
     BYTE* const ostart = (BYTE*)dst;
-    const size_t lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB);
     U32 singleStream = srcSize < 256;
     U32 hType = IS_HUF;
-    size_t clitSize;
+    size_t cLitSize;
 
-    if (maxDstSize < lhSize+1) return ERROR(dstSize_tooSmall);   /* not enough space for compression */
 
+    /* small ? don't even attempt compression (speed opt) */
+#   define LITERAL_NOENTROPY 63
+    {   size_t const minLitSize = zc->flagStaticTables ? 6 : LITERAL_NOENTROPY;
+        if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
+    }
+
+    if (dstCapacity < lhSize+1) return ERROR(dstSize_tooSmall);   /* not enough space for compression */
     if (zc->flagStaticTables && (lhSize==3)) {
         hType = IS_PCH;
         singleStream = 1;
-        clitSize = HUF_compress1X_usingCTable(ostart+lhSize, maxDstSize-lhSize, src, srcSize, zc->hufTable);
+        cLitSize = HUF_compress1X_usingCTable(ostart+lhSize, dstCapacity-lhSize, src, srcSize, zc->hufTable);
     } else {
-        clitSize = singleStream ? HUF_compress1X(ostart+lhSize, maxDstSize-lhSize, src, srcSize, 255, 12)
-                                : HUF_compress2 (ostart+lhSize, maxDstSize-lhSize, src, srcSize, 255, 12);
+        cLitSize = singleStream ? HUF_compress1X(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 12)
+                                : HUF_compress2 (ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 12);
     }
 
-    if ((clitSize==0) || (clitSize >= srcSize - minGain)) return ZSTD_noCompressLiterals(dst, maxDstSize, src, srcSize);
-    if (clitSize==1) return ZSTD_compressRleLiteralsBlock(dst, maxDstSize, src, srcSize);
+    if ((cLitSize==0) || (cLitSize >= srcSize - minGain))
+        return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
+    if (cLitSize==1)
+        return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
 
     /* Build header */
     switch(lhSize)
     {
     case 3: /* 2 - 2 - 10 - 10 */
         ostart[0] = (BYTE)((srcSize>>6) + (singleStream << 4) + (hType<<6));
-        ostart[1] = (BYTE)((srcSize<<2) + (clitSize>>8));
-        ostart[2] = (BYTE)(clitSize);
+        ostart[1] = (BYTE)((srcSize<<2) + (cLitSize>>8));
+        ostart[2] = (BYTE)(cLitSize);
         break;
     case 4: /* 2 - 2 - 14 - 14 */
         ostart[0] = (BYTE)((srcSize>>10) + (2<<4) +  (hType<<6));
         ostart[1] = (BYTE)(srcSize>> 2);
-        ostart[2] = (BYTE)((srcSize<<6) + (clitSize>>8));
-        ostart[3] = (BYTE)(clitSize);
+        ostart[2] = (BYTE)((srcSize<<6) + (cLitSize>>8));
+        ostart[3] = (BYTE)(cLitSize);
         break;
-    default:   /* should not be necessary, lhSize is {3,4,5} */
+    default:   /* should not be necessary, lhSize is only {3,4,5} */
     case 5: /* 2 - 2 - 18 - 18 */
         ostart[0] = (BYTE)((srcSize>>14) + (3<<4) +  (hType<<6));
         ostart[1] = (BYTE)(srcSize>>6);
-        ostart[2] = (BYTE)((srcSize<<2) + (clitSize>>16));
-        ostart[3] = (BYTE)(clitSize>>8);
-        ostart[4] = (BYTE)(clitSize);
+        ostart[2] = (BYTE)((srcSize<<2) + (cLitSize>>16));
+        ostart[3] = (BYTE)(cLitSize>>8);
+        ostart[4] = (BYTE)(cLitSize);
         break;
     }
-
-    return lhSize+clitSize;
+    return lhSize+cLitSize;
 }
 
 
-#define LITERAL_NOENTROPY 63   /* don't even attempt to compress literals below this threshold (cheap heuristic) */
+void ZSTD_seqToCodes(const seqStore_t* seqStorePtr, size_t const nbSeq)
+{
+    /* LL codes */
+    {   static const BYTE LL_Code[64] = {  0,  1,  2,  3,  4,  5,  6,  7,
+                                           8,  9, 10, 11, 12, 13, 14, 15,
+                                          16, 16, 17, 17, 18, 18, 19, 19,
+                                          20, 20, 20, 20, 21, 21, 21, 21,
+                                          22, 22, 22, 22, 22, 22, 22, 22,
+                                          23, 23, 23, 23, 23, 23, 23, 23,
+                                          24, 24, 24, 24, 24, 24, 24, 24,
+                                          24, 24, 24, 24, 24, 24, 24, 24 };
+        const BYTE LL_deltaCode = 19;
+        const U16* const llTable = seqStorePtr->litLengthStart;
+        BYTE* const llCodeTable = seqStorePtr->llCodeStart;
+        size_t u;
+        for (u=0; u<nbSeq; u++) {
+            U32 const  ll = llTable[u];
+            llCodeTable[u] = (ll>63) ? (BYTE)ZSTD_highbit(ll) + LL_deltaCode : LL_Code[ll];
+        }
+        if (seqStorePtr->longLengthID==1)
+            llCodeTable[seqStorePtr->longLengthPos] = MaxLL;
+    }
+
+    /* Offset codes */
+    {   const U32* const offsetTable = seqStorePtr->offsetStart;
+        BYTE* const ofCodeTable = seqStorePtr->offCodeStart;
+        size_t u;
+        for (u=0; u<nbSeq; u++) ofCodeTable[u] = (BYTE)ZSTD_highbit(offsetTable[u]);
+    }
+
+    /* ML codes */
+    {   static const BYTE ML_Code[128] = { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+                                          16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+                                          32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37,
+                                          38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39,
+                                          40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
+                                          41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41,
+                                          42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
+                                          42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 };
+        const BYTE ML_deltaCode = 36;
+        const U16* const mlTable = seqStorePtr->matchLengthStart;
+        BYTE* const mlCodeTable = seqStorePtr->mlCodeStart;
+        size_t u;
+        for (u=0; u<nbSeq; u++) {
+            U32 const ml = mlTable[u];
+            mlCodeTable[u] = (ml>127) ? (BYTE)ZSTD_highbit(ml) + ML_deltaCode : ML_Code[ml];
+        }
+        if (seqStorePtr->longLengthID==2)
+            mlCodeTable[seqStorePtr->longLengthPos] = MaxML;
+    }
+}
+
 
 size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
-                              void* dst, size_t maxDstSize,
+                              void* dst, size_t dstCapacity,
                               size_t srcSize)
 {
     const seqStore_t* seqStorePtr = &(zc->seqStore);
     U32 count[MaxSeq+1];
     S16 norm[MaxSeq+1];
-    size_t mostFrequent;
-    U32 max;
     FSE_CTable* CTable_LitLength = zc->litlengthCTable;
     FSE_CTable* CTable_OffsetBits = zc->offcodeCTable;
     FSE_CTable* CTable_MatchLength = zc->matchlengthCTable;
     U32 LLtype, Offtype, MLtype;   /* compressed, raw or rle */
-    const BYTE* const op_lit_start = seqStorePtr->litStart;
-    const BYTE* const llTable = seqStorePtr->litLengthStart;
-    const BYTE* const llPtr = seqStorePtr->litLength;
-    const BYTE* const mlTable = seqStorePtr->matchLengthStart;
+    U16*  const llTable = seqStorePtr->litLengthStart;
+    U16*  const mlTable = seqStorePtr->matchLengthStart;
     const U32*  const offsetTable = seqStorePtr->offsetStart;
-    BYTE* const offCodeTable = seqStorePtr->offCodeStart;
+    const U32*  const offsetTableEnd = seqStorePtr->offset;
+    BYTE* const ofCodeTable = seqStorePtr->offCodeStart;
+    BYTE* const llCodeTable = seqStorePtr->llCodeStart;
+    BYTE* const mlCodeTable = seqStorePtr->mlCodeStart;
     BYTE* const ostart = (BYTE*)dst;
+    BYTE* const oend = ostart + dstCapacity;
     BYTE* op = ostart;
-    BYTE* const oend = ostart + maxDstSize;
-    const size_t nbSeq = llPtr - llTable;
-    const size_t minGain = ZSTD_minGain(srcSize);
-    const size_t maxCSize = srcSize - minGain;
+    size_t const nbSeq = offsetTableEnd - offsetTable;
     BYTE* seqHead;
 
     /* Compress literals */
-    {
-        size_t cSize;
-        size_t litSize = seqStorePtr->lit - op_lit_start;
-        const size_t minLitSize = zc->flagStaticTables ? 6 : LITERAL_NOENTROPY;
-
-        if (litSize <= minLitSize)
-            cSize = ZSTD_noCompressLiterals(op, maxDstSize, op_lit_start, litSize);
-        else
-            cSize = ZSTD_compressLiterals(zc, op, maxDstSize, op_lit_start, litSize);
+    {   const BYTE* const literals = seqStorePtr->litStart;
+        size_t const litSize = seqStorePtr->lit - literals;
+        size_t const cSize = ZSTD_compressLiterals(zc, op, dstCapacity, literals, litSize);
         if (ZSTD_isError(cSize)) return cSize;
         op += cSize;
     }
 
     /* Sequences Header */
-    if ((oend-op) < MIN_SEQUENCES_SIZE) return ERROR(dstSize_tooSmall);
-    if (nbSeq < 128) *op++ = (BYTE)nbSeq;
-    else {
-        op[0] = (BYTE)((nbSeq>>8) + 128); op[1] = (BYTE)nbSeq; op+=2;
-    }
+    if ((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead */) return ERROR(dstSize_tooSmall);
+    if (nbSeq < 0x7F) *op++ = (BYTE)nbSeq;
+    else if (nbSeq < LONGNBSEQ) op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2;
+    else op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3;
     if (nbSeq==0) goto _check_compressibility;
 
-    /* dumps : contains rests of large lengths */
-    if ((oend-op) < 3 /* dumps */ + 1 /*seqHead*/)
-        return ERROR(dstSize_tooSmall);
-    seqHead = op;
-    {
-        size_t dumpsLength = seqStorePtr->dumps - seqStorePtr->dumpsStart;
-        if (dumpsLength < 512) {
-            op[0] = (BYTE)(dumpsLength >> 8);
-            op[1] = (BYTE)(dumpsLength);
-            op += 2;
-        } else {
-            op[0] = 2;
-            op[1] = (BYTE)(dumpsLength>>8);
-            op[2] = (BYTE)(dumpsLength);
-            op += 3;
-        }
-        if ((size_t)(oend-op) < dumpsLength+6) return ERROR(dstSize_tooSmall);
-        memcpy(op, seqStorePtr->dumpsStart, dumpsLength);
-        op += dumpsLength;
-    }
+    /* seqHead : flags for FSE encoding type */
+    seqHead = op++;
 
 #define MIN_SEQ_FOR_DYNAMIC_FSE   64
 #define MAX_SEQ_FOR_STATIC_FSE  1000
 
-    /* CTable for Literal Lengths */
-    max = MaxLL;
-    mostFrequent = FSE_countFast(count, &max, llTable, nbSeq);
-    if ((mostFrequent == nbSeq) && (nbSeq > 2)) {
-        *op++ = llTable[0];
-        FSE_buildCTable_rle(CTable_LitLength, (BYTE)max);
-        LLtype = FSE_ENCODING_RLE;
-    } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
-        LLtype = FSE_ENCODING_STATIC;
-    } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (LLbits-1)))) {
-        FSE_buildCTable_raw(CTable_LitLength, LLbits);
-        LLtype = FSE_ENCODING_RAW;
-    } else {
-        size_t NCountSize;
-        size_t nbSeq_1 = nbSeq;
-        U32 tableLog = FSE_optimalTableLog(LLFSELog, nbSeq, max);
-        if (count[llTable[nbSeq-1]]>1) { count[llTable[nbSeq-1]]--; nbSeq_1--; }
-        FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max);
-        NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog);   /* overflow protected */
-        if (FSE_isError(NCountSize)) return ERROR(GENERIC);
-        op += NCountSize;
-        FSE_buildCTable(CTable_LitLength, norm, max, tableLog);
-        LLtype = FSE_ENCODING_DYNAMIC;
-    }
+    /* convert length/distances into codes */
+    ZSTD_seqToCodes(seqStorePtr, nbSeq);
 
-    /* CTable for Offset codes */
-    {   /* create Offset codes */
-        size_t i; for (i=0; i<nbSeq; i++) {
-            offCodeTable[i] = (BYTE)ZSTD_highbit(offsetTable[i]) + 1;
-            if (offsetTable[i]==0) offCodeTable[i]=0;
-        }
-    }
-    max = MaxOff;
-    mostFrequent = FSE_countFast(count, &max, offCodeTable, nbSeq);
-    if ((mostFrequent == nbSeq) && (nbSeq > 2)) {
-        *op++ = offCodeTable[0];
-        FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max);
-        Offtype = FSE_ENCODING_RLE;
-    } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
-        Offtype = FSE_ENCODING_STATIC;
-    } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (Offbits-1)))) {
-        FSE_buildCTable_raw(CTable_OffsetBits, Offbits);
-        Offtype = FSE_ENCODING_RAW;
-    } else {
-        size_t NCountSize;
-        size_t nbSeq_1 = nbSeq;
-        U32 tableLog = FSE_optimalTableLog(OffFSELog, nbSeq, max);
-        if (count[offCodeTable[nbSeq-1]]>1) { count[offCodeTable[nbSeq-1]]--; nbSeq_1--; }
-        FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max);
-        NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog);   /* overflow protected */
-        if (FSE_isError(NCountSize)) return ERROR(GENERIC);
-        op += NCountSize;
-        FSE_buildCTable(CTable_OffsetBits, norm, max, tableLog);
-        Offtype = FSE_ENCODING_DYNAMIC;
-    }
+    /* CTable for Literal Lengths */
+    {   U32 max = MaxLL;
+        size_t const mostFrequent = FSE_countFast(count, &max, llCodeTable, nbSeq);
+        if ((mostFrequent == nbSeq) && (nbSeq > 2)) {
+            *op++ = llCodeTable[0];
+            FSE_buildCTable_rle(CTable_LitLength, (BYTE)max);
+            LLtype = FSE_ENCODING_RLE;
+        } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
+            LLtype = FSE_ENCODING_STATIC;
+        } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (LL_defaultNormLog-1)))) {
+            FSE_buildCTable(CTable_LitLength, LL_defaultNorm, MaxLL, LL_defaultNormLog);
+            LLtype = FSE_ENCODING_RAW;
+        } else {
+            size_t nbSeq_1 = nbSeq;
+            const U32 tableLog = FSE_optimalTableLog(LLFSELog, nbSeq, max);
+            if (count[llCodeTable[nbSeq-1]]>1) { count[llCodeTable[nbSeq-1]]--; nbSeq_1--; }
+            FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max);
+            { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog);   /* overflow protected */
+              if (FSE_isError(NCountSize)) return ERROR(GENERIC);
+              op += NCountSize; }
+            FSE_buildCTable(CTable_LitLength, norm, max, tableLog);
+            LLtype = FSE_ENCODING_DYNAMIC;
+    }   }
+
+    /* CTable for Offsets */
+    {   U32 max = MaxOff;
+        size_t const mostFrequent = FSE_countFast(count, &max, ofCodeTable, nbSeq);
+        if ((mostFrequent == nbSeq) && (nbSeq > 2)) {
+            *op++ = ofCodeTable[0];
+            FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max);
+            Offtype = FSE_ENCODING_RLE;
+        } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
+            Offtype = FSE_ENCODING_STATIC;
+        } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (OF_defaultNormLog-1)))) {
+            FSE_buildCTable(CTable_OffsetBits, OF_defaultNorm, MaxOff, OF_defaultNormLog);
+            Offtype = FSE_ENCODING_RAW;
+        } else {
+            size_t nbSeq_1 = nbSeq;
+            const U32 tableLog = FSE_optimalTableLog(OffFSELog, nbSeq, max);
+            if (count[ofCodeTable[nbSeq-1]]>1) { count[ofCodeTable[nbSeq-1]]--; nbSeq_1--; }
+            FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max);
+            { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog);   /* overflow protected */
+              if (FSE_isError(NCountSize)) return ERROR(GENERIC);
+              op += NCountSize; }
+            FSE_buildCTable(CTable_OffsetBits, norm, max, tableLog);
+            Offtype = FSE_ENCODING_DYNAMIC;
+    }   }
 
     /* CTable for MatchLengths */
-    max = MaxML;
-    mostFrequent = FSE_countFast(count, &max, mlTable, nbSeq);
-    if ((mostFrequent == nbSeq) && (nbSeq > 2)) {
-        *op++ = *mlTable;
-        FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max);
-        MLtype = FSE_ENCODING_RLE;
-    } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
-        MLtype = FSE_ENCODING_STATIC;
-    } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (MLbits-1)))) {
-        FSE_buildCTable_raw(CTable_MatchLength, MLbits);
-        MLtype = FSE_ENCODING_RAW;
-    } else {
-        size_t NCountSize;
-        U32 tableLog = FSE_optimalTableLog(MLFSELog, nbSeq, max);
-        FSE_normalizeCount(norm, tableLog, count, nbSeq, max);
-        NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog);   /* overflow protected */
-        if (FSE_isError(NCountSize)) return ERROR(GENERIC);
-        op += NCountSize;
-        FSE_buildCTable(CTable_MatchLength, norm, max, tableLog);
-        MLtype = FSE_ENCODING_DYNAMIC;
-    }
+    {   U32 max = MaxML;
+        size_t const mostFrequent = FSE_countFast(count, &max, mlCodeTable, nbSeq);
+        if ((mostFrequent == nbSeq) && (nbSeq > 2)) {
+            *op++ = *mlCodeTable;
+            FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max);
+            MLtype = FSE_ENCODING_RLE;
+        } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
+            MLtype = FSE_ENCODING_STATIC;
+        } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (ML_defaultNormLog-1)))) {
+            FSE_buildCTable(CTable_MatchLength, ML_defaultNorm, MaxML, ML_defaultNormLog);
+            MLtype = FSE_ENCODING_RAW;
+        } else {
+            size_t nbSeq_1 = nbSeq;
+            const U32 tableLog = FSE_optimalTableLog(MLFSELog, nbSeq, max);
+            if (count[mlCodeTable[nbSeq-1]]>1) { count[mlCodeTable[nbSeq-1]]--; nbSeq_1--; }
+            FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max);
+            { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog);   /* overflow protected */
+              if (FSE_isError(NCountSize)) return ERROR(GENERIC);
+              op += NCountSize; }
+            FSE_buildCTable(CTable_MatchLength, norm, max, tableLog);
+            MLtype = FSE_ENCODING_DYNAMIC;
+    }   }
 
-    seqHead[0] += (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));
+    *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));
     zc->flagStaticTables = 0;
 
     /* Encoding Sequences */
-    {
-        size_t streamSize, errorCode;
-        BIT_CStream_t blockStream;
-        FSE_CState_t stateMatchLength;
-        FSE_CState_t stateOffsetBits;
-        FSE_CState_t stateLitLength;
-        int i;
+    {   BIT_CStream_t blockStream;
+        FSE_CState_t  stateMatchLength;
+        FSE_CState_t  stateOffsetBits;
+        FSE_CState_t  stateLitLength;
 
-        errorCode = BIT_initCStream(&blockStream, op, oend-op);
-        if (ERR_isError(errorCode)) return ERROR(dstSize_tooSmall);   /* not enough space remaining */
+        { size_t const errorCode = BIT_initCStream(&blockStream, op, oend-op);
+          if (ERR_isError(errorCode)) return ERROR(dstSize_tooSmall); }   /* not enough space remaining */
 
         /* first symbols */
-        FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlTable[nbSeq-1]);
-        FSE_initCState2(&stateOffsetBits,  CTable_OffsetBits,  offCodeTable[nbSeq-1]);
-        FSE_initCState2(&stateLitLength,   CTable_LitLength,   llTable[nbSeq-1]);
-        BIT_addBits(&blockStream, offsetTable[nbSeq-1], offCodeTable[nbSeq-1] ? (offCodeTable[nbSeq-1]-1) : 0);
+        FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]);
+        FSE_initCState2(&stateOffsetBits,  CTable_OffsetBits,  ofCodeTable[nbSeq-1]);
+        FSE_initCState2(&stateLitLength,   CTable_LitLength,   llCodeTable[nbSeq-1]);
+        BIT_addBits(&blockStream, llTable[nbSeq-1], LL_bits[llCodeTable[nbSeq-1]]);
+        if (MEM_32bits()) BIT_flushBits(&blockStream);
+        BIT_addBits(&blockStream, mlTable[nbSeq-1], ML_bits[mlCodeTable[nbSeq-1]]);
+        if (MEM_32bits()) BIT_flushBits(&blockStream);
+        BIT_addBits(&blockStream, offsetTable[nbSeq-1], ofCodeTable[nbSeq-1]);
         BIT_flushBits(&blockStream);
 
-        for (i=(int)nbSeq-2; i>=0; i--) {
-            BYTE mlCode = mlTable[i];
-            U32  offset = offsetTable[i];
-            BYTE offCode = offCodeTable[i];                                 /* 32b*/  /* 64b*/
-            U32 nbBits = (offCode-1) + (!offCode);
-            BYTE litLength = llTable[i];                                    /* (7)*/  /* (7)*/
-            FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode);      /* 17 */  /* 17 */
-            if (MEM_32bits()) BIT_flushBits(&blockStream);                  /*  7 */
-            FSE_encodeSymbol(&blockStream, &stateLitLength, litLength);     /* 26 */  /* 61 */
-            FSE_encodeSymbol(&blockStream, &stateOffsetBits, offCode);      /* 16 */  /* 51 */
-            if (MEM_32bits()) BIT_flushBits(&blockStream);                  /*  7 */
-            BIT_addBits(&blockStream, offset, nbBits);                      /* 31 */  /* 42 */   /* 24 bits max in 32-bits mode */
-            BIT_flushBits(&blockStream);                                    /*  7 */  /*  7 */
-        }
+        {   size_t n;
+            for (n=nbSeq-2 ; n<nbSeq ; n--) {      /* intentional underflow */
+                const BYTE ofCode = ofCodeTable[n];
+                const BYTE mlCode = mlCodeTable[n];
+                const BYTE llCode = llCodeTable[n];
+                const U32  llBits = LL_bits[llCode];
+                const U32  mlBits = ML_bits[mlCode];
+                const U32  ofBits = ofCode;                                     /* 32b*/  /* 64b*/
+                                                                                /* (7)*/  /* (7)*/
+                FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode);       /* 15 */  /* 15 */
+                FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode);      /* 24 */  /* 24 */
+                if (MEM_32bits()) BIT_flushBits(&blockStream);                  /* (7)*/
+                FSE_encodeSymbol(&blockStream, &stateLitLength, llCode);        /* 16 */  /* 33 */
+                if (MEM_32bits() || (ofBits+mlBits+llBits >= 64-7-(LLFSELog+MLFSELog+OffFSELog)))
+                    BIT_flushBits(&blockStream);                                /* (7)*/
+                BIT_addBits(&blockStream, llTable[n], llBits);
+                if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream);
+                BIT_addBits(&blockStream, mlTable[n], mlBits);
+                if (MEM_32bits()) BIT_flushBits(&blockStream);                  /* (7)*/
+                BIT_addBits(&blockStream, offsetTable[n], ofBits);              /* 31 */
+                BIT_flushBits(&blockStream);                                    /* (7)*/
+        }   }
 
         FSE_flushCState(&blockStream, &stateMatchLength);
         FSE_flushCState(&blockStream, &stateOffsetBits);
         FSE_flushCState(&blockStream, &stateLitLength);
 
-        streamSize = BIT_closeCStream(&blockStream);
-        if (streamSize==0) return ERROR(dstSize_tooSmall);   /* not enough space */
-        op += streamSize;
-    }
+        {   size_t const streamSize = BIT_closeCStream(&blockStream);
+            if (streamSize==0) return ERROR(dstSize_tooSmall);   /* not enough space */
+            op += streamSize;
+    }   }
 
     /* check compressibility */
 _check_compressibility:
-    if ((size_t)(op-ostart) >= maxCSize) return 0;
+    { size_t const minGain = ZSTD_minGain(srcSize);
+      size_t const maxCSize = srcSize - minGain;
+      if ((size_t)(op-ostart) >= maxCSize) return 0; }
 
     return op - ostart;
 }
 
 
-/*! ZSTD_storeSeq
-    Store a sequence (literal length, literals, offset code and match length code) into seqStore_t
-    @offsetCode : distance to match, or 0 == repCode
-    @matchCode : matchLength - MINMATCH
+/*! ZSTD_storeSeq() :
+    Store a sequence (literal length, literals, offset code and match length code) into seqStore_t.
+    `offsetCode` : distance to match, or 0 == repCode.
+    `matchCode` : matchLength - MINMATCH
 */
 MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* literals, size_t offsetCode, size_t matchCode)
 {
 #if 0  /* for debug */
     static const BYTE* g_start = NULL;
+    const U32 pos = (U32)(literals - g_start);
     if (g_start==NULL) g_start = literals;
-    //if (literals - g_start == 8695)
-    printf("pos %6u : %3u literals & match %3u bytes at distance %6u \n",
-           (U32)(literals - g_start), (U32)litLength, (U32)matchCode+4, (U32)offsetCode);
+    if ((pos > 5810300) && (pos < 5810500))
+        printf("Cpos %6u :%5u literals & match %3u bytes at distance %6u \n",
+               pos, (U32)litLength, (U32)matchCode+MINMATCH, (U32)offsetCode);
 #endif
+    ZSTD_statsUpdatePrices(&seqStorePtr->stats, litLength, literals, offsetCode, matchCode);
 
     /* copy Literals */
     ZSTD_wildcopy(seqStorePtr->lit, literals, litLength);
     seqStorePtr->lit += litLength;
 
     /* literal Length */
-    if (litLength >= MaxLL) {
-        *(seqStorePtr->litLength++) = MaxLL;
-        if (litLength<255 + MaxLL) {
-            *(seqStorePtr->dumps++) = (BYTE)(litLength - MaxLL);
-        } else {
-            *(seqStorePtr->dumps++) = 255;
-            if (litLength < (1<<15)) {
-                MEM_writeLE16(seqStorePtr->dumps, (U16)(litLength<<1));
-                seqStorePtr->dumps += 2;
-            } else {
-                MEM_writeLE32(seqStorePtr->dumps, (U32)((litLength<<1)+1));
-                seqStorePtr->dumps += 3;
-            }
-    }   }
-    else *(seqStorePtr->litLength++) = (BYTE)litLength;
+    if (litLength>0xFFFF) { seqStorePtr->longLengthID = 1; seqStorePtr->longLengthPos = (U32)(seqStorePtr->litLength - seqStorePtr->litLengthStart); }
+    *seqStorePtr->litLength++ = (U16)litLength;
 
     /* match offset */
-    *(seqStorePtr->offset++) = (U32)offsetCode;
+    *(seqStorePtr->offset++) = (U32)offsetCode + 1;
 
     /* match Length */
-    if (matchCode >= MaxML) {
-        *(seqStorePtr->matchLength++) = MaxML;
-        if (matchCode < 255+MaxML) {
-            *(seqStorePtr->dumps++) = (BYTE)(matchCode - MaxML);
-        } else {
-            *(seqStorePtr->dumps++) = 255;
-            if (matchCode < (1<<15)) {
-                MEM_writeLE16(seqStorePtr->dumps, (U16)(matchCode<<1));
-                seqStorePtr->dumps += 2;
-            } else {
-                MEM_writeLE32(seqStorePtr->dumps, (U32)((matchCode<<1)+1));
-                seqStorePtr->dumps += 3;
-            }
-    }   }
-    else *(seqStorePtr->matchLength++) = (BYTE)matchCode;
+    if (matchCode>0xFFFF) { seqStorePtr->longLengthID = 2; seqStorePtr->longLengthPos = (U32)(seqStorePtr->matchLength - seqStorePtr->matchLengthStart); }
+    *seqStorePtr->matchLength++ = (U16)matchCode;
 }
 
 
@@ -875,6 +972,10 @@ static size_t ZSTD_count_2segments(const BYTE* ip, const BYTE* match, const BYTE
 /*-*************************************
 *  Hashes
 ***************************************/
+static const U32 prime3bytes = 506832829U;
+static U32    ZSTD_hash3(U32 u, U32 h) { return ((u << (32-24)) * prime3bytes)  >> (32-h) ; }
+static size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); }
+
 static const U32 prime4bytes = 2654435761U;
 static U32    ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; }
 static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_read32(ptr), h); }
@@ -907,18 +1008,18 @@ static size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
 /*-*************************************
 *  Fast Scan
 ***************************************/
-#define FILLHASHSTEP 3
 static void ZSTD_fillHashTable (ZSTD_CCtx* zc, const void* end, const U32 mls)
 {
     U32* const hashTable = zc->hashTable;
-    const U32 hBits = zc->params.hashLog;
+    const U32 hBits = zc->params.cParams.hashLog;
     const BYTE* const base = zc->base;
     const BYTE* ip = base + zc->nextToUpdate;
     const BYTE* const iend = ((const BYTE*)end) - 8;
+    const size_t fastHashFillStep = 3;
 
     while(ip <= iend) {
         hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip - base);
-        ip += FILLHASHSTEP;
+        ip += fastHashFillStep;
     }
 }
 
@@ -929,7 +1030,7 @@ void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* zc,
                                  const U32 mls)
 {
     U32* const hashTable = zc->hashTable;
-    const U32 hBits = zc->params.hashLog;
+    const U32 hBits = zc->params.cParams.hashLog;
     seqStore_t* seqStorePtr = &(zc->seqStore);
     const BYTE* const base = zc->base;
     const BYTE* const istart = (const BYTE*)src;
@@ -939,10 +1040,8 @@ void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* zc,
     const BYTE* const lowest = base + lowIndex;
     const BYTE* const iend = istart + srcSize;
     const BYTE* const ilimit = iend - 8;
-
     size_t offset_2=REPCODE_STARTVALUE, offset_1=REPCODE_STARTVALUE;
 
-
     /* init */
     ZSTD_resetSeqStore(seqStorePtr);
     if (ip < lowest+REPCODE_STARTVALUE) ip = lowest+REPCODE_STARTVALUE;
@@ -958,25 +1057,26 @@ void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* zc,
         hashTable[h] = current;   /* update hash table */
 
         if (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)) {   /* note : by construction, offset_1 <= current */
-            mlCode = ZSTD_count(ip+1+MINMATCH, ip+1+MINMATCH-offset_1, iend);
+            mlCode = ZSTD_count(ip+1+EQUAL_READ32, ip+1+EQUAL_READ32-offset_1, iend) + EQUAL_READ32;
             ip++;
-            offset = 0;
-        } else {
+            ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mlCode-MINMATCH);
+       } else {
             if ( (matchIndex <= lowIndex) ||
                  (MEM_read32(match) != MEM_read32(ip)) ) {
                 ip += ((ip-anchor) >> g_searchStrength) + 1;
                 continue;
             }
-            mlCode = ZSTD_count(ip+MINMATCH, match+MINMATCH, iend);
+            mlCode = ZSTD_count(ip+EQUAL_READ32, match+EQUAL_READ32, iend) + EQUAL_READ32;
             offset = ip-match;
             while ((ip>anchor) && (match>lowest) && (ip[-1] == match[-1])) { ip--; match--; mlCode++; }  /* catch up */
             offset_2 = offset_1;
             offset_1 = offset;
+
+            ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mlCode-MINMATCH);
         }
 
         /* match found */
-        ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset, mlCode);
-        ip += mlCode + MINMATCH;
+        ip += mlCode;
         anchor = ip;
 
         if (ip <= ilimit) {
@@ -987,17 +1087,17 @@ void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* zc,
             while ( (ip <= ilimit)
                  && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) {
                 /* store sequence */
-                size_t rlCode = ZSTD_count(ip+MINMATCH, ip+MINMATCH-offset_2, iend);
-                size_t tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff;   /* swap offset_2 <=> offset_1 */
+                size_t const rlCode = ZSTD_count(ip+EQUAL_READ32, ip+EQUAL_READ32-offset_2, iend) + EQUAL_READ32;
+                { size_t const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */
                 hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip-base);
-                ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, rlCode);
-                ip += rlCode+MINMATCH;
+                ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, rlCode-MINMATCH);
+                ip += rlCode;
                 anchor = ip;
                 continue;   /* faster when present ... (?) */
     }   }   }
 
-    {   /* Last Literals */
-        size_t lastLLSize = iend - anchor;
+    /* Last Literals */
+    {   size_t const lastLLSize = iend - anchor;
         memcpy(seqStorePtr->lit, anchor, lastLLSize);
         seqStorePtr->lit += lastLLSize;
     }
@@ -1007,7 +1107,7 @@ void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* zc,
 static void ZSTD_compressBlock_fast(ZSTD_CCtx* ctx,
                        const void* src, size_t srcSize)
 {
-    const U32 mls = ctx->params.searchLength;
+    const U32 mls = ctx->params.cParams.searchLength;
     switch(mls)
     {
     default:
@@ -1028,7 +1128,7 @@ static void ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx,
                                  const U32 mls)
 {
     U32* hashTable = ctx->hashTable;
-    const U32 hBits = ctx->params.hashLog;
+    const U32 hBits = ctx->params.cParams.hashLog;
     seqStore_t* seqStorePtr = &(ctx->seqStore);
     const BYTE* const base = ctx->base;
     const BYTE* const dictBase = ctx->dictBase;
@@ -1066,29 +1166,30 @@ static void ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx,
         U32 offset;
         hashTable[h] = current;   /* update hash table */
 
-        if ( ((repIndex <= dictLimit-4) || (repIndex >= dictLimit))
+        if ( ((repIndex >= dictLimit) || (repIndex <= dictLimit-4))
           && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
             const BYTE* repMatchEnd = repIndex < dictLimit ? dictEnd : iend;
-            mlCode = ZSTD_count_2segments(ip+1+MINMATCH, repMatch+MINMATCH, iend, repMatchEnd, lowPrefixPtr);
+            mlCode = ZSTD_count_2segments(ip+1+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repMatchEnd, lowPrefixPtr) + EQUAL_READ32;
             ip++;
-            offset = 0;
+            ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mlCode-MINMATCH);
         } else {
             if ( (matchIndex < lowLimit) ||
-                 (MEM_read32(match) != MEM_read32(ip)) )
-            { ip += ((ip-anchor) >> g_searchStrength) + 1; continue; }
-            {
-                const BYTE* matchEnd = matchIndex < dictLimit ? dictEnd : iend;
+                 (MEM_read32(match) != MEM_read32(ip)) ) {
+                ip += ((ip-anchor) >> g_searchStrength) + 1;
+                continue;
+            }
+            {   const BYTE* matchEnd = matchIndex < dictLimit ? dictEnd : iend;
                 const BYTE* lowMatchPtr = matchIndex < dictLimit ? dictStart : lowPrefixPtr;
-                mlCode = ZSTD_count_2segments(ip+MINMATCH, match+MINMATCH, iend, matchEnd, lowPrefixPtr);
+                mlCode = ZSTD_count_2segments(ip+EQUAL_READ32, match+EQUAL_READ32, iend, matchEnd, lowPrefixPtr) + EQUAL_READ32;
                 while ((ip>anchor) && (match>lowMatchPtr) && (ip[-1] == match[-1])) { ip--; match--; mlCode++; }   /* catch up */
                 offset = current - matchIndex;
                 offset_2 = offset_1;
                 offset_1 = offset;
+                ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mlCode-MINMATCH);
         }   }
 
         /* found a match : store it */
-        ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset, mlCode);
-        ip += mlCode + MINMATCH;
+        ip += mlCode;
         anchor = ip;
 
         if (ip <= ilimit) {
@@ -1097,17 +1198,17 @@ static void ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx,
             hashTable[ZSTD_hashPtr(ip-2, hBits, mls)] = (U32)(ip-2-base);
             /* check immediate repcode */
             while (ip <= ilimit) {
-                U32 current2 = (U32)(ip-base);
-                const U32 repIndex2 = current2 - offset_2;
+                U32 const current2 = (U32)(ip-base);
+                U32 const repIndex2 = current2 - offset_2;
                 const BYTE* repMatch2 = repIndex2 < dictLimit ? dictBase + repIndex2 : base + repIndex2;
                 if ( ((repIndex2 <= dictLimit-4) || (repIndex2 >= dictLimit))
                   && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
                     const BYTE* const repEnd2 = repIndex2 < dictLimit ? dictEnd : iend;
-                    size_t repLength2 = ZSTD_count_2segments(ip+MINMATCH, repMatch2+MINMATCH, iend, repEnd2, lowPrefixPtr);
+                    size_t repLength2 = ZSTD_count_2segments(ip+EQUAL_READ32, repMatch2+EQUAL_READ32, iend, repEnd2, lowPrefixPtr) + EQUAL_READ32;
                     U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */
-                    ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, repLength2);
+                    ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, repLength2-MINMATCH);
                     hashTable[ZSTD_hashPtr(ip, hBits, mls)] = current2;
-                    ip += repLength2+MINMATCH;
+                    ip += repLength2;
                     anchor = ip;
                     continue;
                 }
@@ -1115,8 +1216,7 @@ static void ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx,
     }   }   }
 
     /* Last Literals */
-    {
-        size_t lastLLSize = iend - anchor;
+    {   size_t const lastLLSize = iend - anchor;
         memcpy(seqStorePtr->lit, anchor, lastLLSize);
         seqStorePtr->lit += lastLLSize;
     }
@@ -1126,7 +1226,7 @@ static void ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx,
 static void ZSTD_compressBlock_fast_extDict(ZSTD_CCtx* ctx,
                          const void* src, size_t srcSize)
 {
-    const U32 mls = ctx->params.searchLength;
+    const U32 mls = ctx->params.cParams.searchLength;
     switch(mls)
     {
     default:
@@ -1142,6 +1242,8 @@ static void ZSTD_compressBlock_fast_extDict(ZSTD_CCtx* ctx,
 }
 
 
+
+
 /*-*************************************
 *  Binary Tree search
 ***************************************/
@@ -1152,10 +1254,10 @@ static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, co
                           U32 extDict)
 {
     U32* const hashTable = zc->hashTable;
-    const U32 hashLog = zc->params.hashLog;
+    const U32 hashLog = zc->params.cParams.hashLog;
     const size_t h  = ZSTD_hashPtr(ip, hashLog, mls);
-    U32* const bt   = zc->contentTable;
-    const U32 btLog = zc->params.contentLog - 1;
+    U32* const bt   = zc->chainTable;
+    const U32 btLog = zc->params.cParams.chainLog - 1;
     const U32 btMask= (1 << btLog) - 1;
     U32 matchIndex  = hashTable[h];
     size_t commonLengthSmaller=0, commonLengthLarger=0;
@@ -1183,7 +1285,7 @@ static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, co
     while (nbCompares-- && (matchIndex > windowLow)) {
         U32* nextPtr = bt + 2*(matchIndex & btMask);
         size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
-#if 1   /* note : can create issues when hlog small <= 11 */
+#if 0   /* note : can create issues when hlog small <= 11 */
         const U32* predictPtr = bt + 2*((matchIndex-1) & btMask);   /* written this way, as bt is a roll buffer */
         if (matchIndex == predictedSmall) {
             /* no need to check length, result known */
@@ -1254,10 +1356,10 @@ static size_t ZSTD_insertBtAndFindBestMatch (
                         U32 extDict)
 {
     U32* const hashTable = zc->hashTable;
-    const U32 hashLog = zc->params.hashLog;
+    const U32 hashLog = zc->params.cParams.hashLog;
     const size_t h  = ZSTD_hashPtr(ip, hashLog, mls);
-    U32* const bt   = zc->contentTable;
-    const U32 btLog = zc->params.contentLog - 1;
+    U32* const bt   = zc->chainTable;
+    const U32 btLog = zc->params.cParams.chainLog - 1;
     const U32 btMask= (1 << btLog) - 1;
     U32 matchIndex  = hashTable[h];
     size_t commonLengthSmaller=0, commonLengthLarger=0;
@@ -1271,9 +1373,9 @@ static size_t ZSTD_insertBtAndFindBestMatch (
     const U32 windowLow = zc->lowLimit;
     U32* smallerPtr = bt + 2*(current&btMask);
     U32* largerPtr  = bt + 2*(current&btMask) + 1;
-    size_t bestLength = 0;
     U32 matchEndIdx = current+8;
     U32 dummy32;   /* to be nullified at the end */
+    size_t bestLength = 0;
 
     hashTable[h] = current;   /* Update Hash Table */
 
@@ -1297,7 +1399,7 @@ static size_t ZSTD_insertBtAndFindBestMatch (
             if (matchLength > matchEndIdx - matchIndex)
                 matchEndIdx = matchIndex + (U32)matchLength;
             if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit(current-matchIndex+1) - ZSTD_highbit((U32)offsetPtr[0]+1)) )
-                bestLength = matchLength, *offsetPtr = current - matchIndex;
+                bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex;
             if (ip+matchLength == iend)   /* equal : no way to know if inf or sup */
                 break;   /* drop, to guarantee consistency (miss a little bit of compression) */
         }
@@ -1335,7 +1437,7 @@ static void ZSTD_updateTree(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* con
         idx += ZSTD_insertBt1(zc, base+idx, mls, iend, nbCompares, 0);
 }
 
-/** Tree updater, providing best match */
+/** ZSTD_BtFindBestMatch() : Tree updater, providing best match */
 static size_t ZSTD_BtFindBestMatch (
                         ZSTD_CCtx* zc,
                         const BYTE* const ip, const BYTE* const iLimit,
@@ -1374,6 +1476,7 @@ static void ZSTD_updateTree_extDict(ZSTD_CCtx* zc, const BYTE* const ip, const B
 }
 
 
+
 /** Tree updater, providing best match */
 static size_t ZSTD_BtFindBestMatch_extDict (
                         ZSTD_CCtx* zc,
@@ -1403,27 +1506,29 @@ static size_t ZSTD_BtFindBestMatch_selectMLS_extDict (
 }
 
 
+
 /* ***********************
 *  Hash Chain
 *************************/
 
 #define NEXT_IN_CHAIN(d, mask)   chainTable[(d) & mask]
 
+
 /* Update chains up to ip (excluded)
    Assumption : always within prefix (ie. not within extDict) */
 FORCE_INLINE
 U32 ZSTD_insertAndFindFirstIndex (ZSTD_CCtx* zc, const BYTE* ip, U32 mls)
 {
     U32* const hashTable  = zc->hashTable;
-    const U32 hashLog = zc->params.hashLog;
-    U32* const chainTable = zc->contentTable;
-    const U32 chainMask = (1 << zc->params.contentLog) - 1;
+    const U32 hashLog = zc->params.cParams.hashLog;
+    U32* const chainTable = zc->chainTable;
+    const U32 chainMask = (1 << zc->params.cParams.chainLog) - 1;
     const BYTE* const base = zc->base;
     const U32 target = (U32)(ip - base);
     U32 idx = zc->nextToUpdate;
 
     while(idx < target) {
-        size_t h = ZSTD_hashPtr(base+idx, hashLog, mls);
+        size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls);
         NEXT_IN_CHAIN(idx, chainMask) = hashTable[h];
         hashTable[h] = idx;
         idx++;
@@ -1434,6 +1539,7 @@ U32 ZSTD_insertAndFindFirstIndex (ZSTD_CCtx* zc, const BYTE* ip, U32 mls)
 }
 
 
+
 FORCE_INLINE /* inlining is important to hardwire a hot branch (template emulation) */
 size_t ZSTD_HcFindBestMatch_generic (
                         ZSTD_CCtx* zc,   /* Index table will be updated */
@@ -1441,8 +1547,8 @@ size_t ZSTD_HcFindBestMatch_generic (
                         size_t* offsetPtr,
                         const U32 maxNbAttempts, const U32 mls, const U32 extDict)
 {
-    U32* const chainTable = zc->contentTable;
-    const U32 chainSize = (1 << zc->params.contentLog);
+    U32* const chainTable = zc->chainTable;
+    const U32 chainSize = (1 << zc->params.cParams.chainLog);
     const U32 chainMask = chainSize-1;
     const BYTE* const base = zc->base;
     const BYTE* const dictBase = zc->dictBase;
@@ -1452,17 +1558,15 @@ size_t ZSTD_HcFindBestMatch_generic (
     const U32 lowLimit = zc->lowLimit;
     const U32 current = (U32)(ip-base);
     const U32 minChain = current > chainSize ? current - chainSize : 0;
-    U32 matchIndex;
-    const BYTE* match;
     int nbAttempts=maxNbAttempts;
-    size_t ml=MINMATCH-1;
+    size_t ml=EQUAL_READ32-1;
 
     /* HC4 match finder */
-    matchIndex = ZSTD_insertAndFindFirstIndex (zc, ip, mls);
+    U32 matchIndex = ZSTD_insertAndFindFirstIndex (zc, ip, mls);
 
-    while ((matchIndex>lowLimit) && (nbAttempts)) {
+    for ( ; (matchIndex>lowLimit) && (nbAttempts) ; nbAttempts--) {
+        const BYTE* match;
         size_t currentMl=0;
-        nbAttempts--;
         if ((!extDict) || matchIndex >= dictLimit) {
             match = base + matchIndex;
             if (match[ml] == ip[ml])   /* potentially better */
@@ -1470,11 +1574,11 @@ size_t ZSTD_HcFindBestMatch_generic (
         } else {
             match = dictBase + matchIndex;
             if (MEM_read32(match) == MEM_read32(ip))   /* assumption : matchIndex <= dictLimit-4 (by table construction) */
-                currentMl = ZSTD_count_2segments(ip+MINMATCH, match+MINMATCH, iLimit, dictEnd, prefixStart) + MINMATCH;
+                currentMl = ZSTD_count_2segments(ip+EQUAL_READ32, match+EQUAL_READ32, iLimit, dictEnd, prefixStart) + EQUAL_READ32;
         }
 
         /* save best solution */
-        if (currentMl > ml) { ml = currentMl; *offsetPtr = current - matchIndex; if (ip+currentMl == iLimit) break; /* best possible, and avoid read overflow*/ }
+        if (currentMl > ml) { ml = currentMl; *offsetPtr = ZSTD_REP_MOVE + current - matchIndex; if (ip+currentMl == iLimit) break; /* best possible, and avoid read overflow*/ }
 
         if (matchIndex <= minChain) break;
         matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask);
@@ -1515,6 +1619,9 @@ FORCE_INLINE size_t ZSTD_HcFindBestMatch_extDict_selectMLS (
     }
 }
 
+/* The optimal parser */
+#include "zstd_opt.h"
+
 
 /* *******************************
 *  Common parser - lazy strategy
@@ -1532,9 +1639,8 @@ void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx,
     const BYTE* const ilimit = iend - 8;
     const BYTE* const base = ctx->base + ctx->dictLimit;
 
-    size_t offset_2=REPCODE_STARTVALUE, offset_1=REPCODE_STARTVALUE;
-    const U32 maxSearches = 1 << ctx->params.searchLog;
-    const U32 mls = ctx->params.searchLength;
+    U32 const maxSearches = 1 << ctx->params.cParams.searchLog;
+    U32 const mls = ctx->params.cParams.searchLength;
 
     typedef size_t (*searchMax_f)(ZSTD_CCtx* zc, const BYTE* ip, const BYTE* iLimit,
                         size_t* offsetPtr,
@@ -1542,6 +1648,10 @@ void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx,
     searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS : ZSTD_HcFindBestMatch_selectMLS;
 
     /* init */
+    U32 rep[ZSTD_REP_INIT];
+    { U32 i ; for (i=0; i<ZSTD_REP_INIT; i++) rep[i]=REPCODE_STARTVALUE; }
+
+    ctx->nextToUpdate3 = ctx->nextToUpdate;
     ZSTD_resetSeqStore(seqStorePtr);
     if ((ip-base) < REPCODE_STARTVALUE) ip = base + REPCODE_STARTVALUE;
 
@@ -1552,21 +1662,20 @@ void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx,
         const BYTE* start=ip+1;
 
         /* check repCode */
-        if (MEM_read32(ip+1) == MEM_read32(ip+1 - offset_1)) {
+        if (MEM_read32(ip+1) == MEM_read32(ip+1 - rep[0])) {
             /* repcode : we take it */
-            matchLength = ZSTD_count(ip+1+MINMATCH, ip+1+MINMATCH-offset_1, iend) + MINMATCH;
+            matchLength = ZSTD_count(ip+1+EQUAL_READ32, ip+1+EQUAL_READ32-rep[0], iend) + EQUAL_READ32;
             if (depth==0) goto _storeSequence;
         }
 
-        {
-            /* first search (depth 0) */
-            size_t offsetFound = 99999999;
-            size_t ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls);
+        /* first search (depth 0) */
+        {   size_t offsetFound = 99999999;
+            size_t const ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls);
             if (ml2 > matchLength)
                 matchLength = ml2, start = ip, offset=offsetFound;
         }
 
-        if (matchLength < MINMATCH) {
+        if (matchLength < EQUAL_READ32) {
             ip += ((ip-anchor) >> g_searchStrength) + 1;   /* jump faster over incompressible sections */
             continue;
         }
@@ -1575,19 +1684,18 @@ void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx,
         if (depth>=1)
         while (ip<ilimit) {
             ip ++;
-            if ((offset) && (MEM_read32(ip) == MEM_read32(ip - offset_1))) {
-                size_t mlRep = ZSTD_count(ip+MINMATCH, ip+MINMATCH-offset_1, iend) + MINMATCH;
-                int gain2 = (int)(mlRep * 3);
-                int gain1 = (int)(matchLength*3 - ZSTD_highbit((U32)offset+1) + 1);
-                if ((mlRep >= MINMATCH) && (gain2 > gain1))
+            if ((offset) && (MEM_read32(ip) == MEM_read32(ip - rep[0]))) {
+                size_t const mlRep = ZSTD_count(ip+EQUAL_READ32, ip+EQUAL_READ32-rep[0], iend) + EQUAL_READ32;
+                int const gain2 = (int)(mlRep * 3);
+                int const gain1 = (int)(matchLength*3 - ZSTD_highbit((U32)offset+1) + 1);
+                if ((mlRep >= EQUAL_READ32) && (gain2 > gain1))
                     matchLength = mlRep, offset = 0, start = ip;
             }
-            {
-                size_t offset2=999999;
-                size_t ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
-                int gain2 = (int)(ml2*4 - ZSTD_highbit((U32)offset2+1));   /* raw approx */
-                int gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 4);
-                if ((ml2 >= MINMATCH) && (gain2 > gain1)) {
+            {   size_t offset2=99999999;
+                size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
+                int const gain2 = (int)(ml2*4 - ZSTD_highbit((U32)offset2+1));   /* raw approx */
+                int const gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 4);
+                if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) {
                     matchLength = ml2, offset = offset2, start = ip;
                     continue;   /* search a better one */
             }   }
@@ -1595,19 +1703,18 @@ void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx,
             /* let's find an even better one */
             if ((depth==2) && (ip<ilimit)) {
                 ip ++;
-                if ((offset) && (MEM_read32(ip) == MEM_read32(ip - offset_1))) {
-                    size_t ml2 = ZSTD_count(ip+MINMATCH, ip+MINMATCH-offset_1, iend) + MINMATCH;
-                    int gain2 = (int)(ml2 * 4);
-                    int gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 1);
-                    if ((ml2 >= MINMATCH) && (gain2 > gain1))
+                if ((offset) && (MEM_read32(ip) == MEM_read32(ip - rep[0]))) {
+                    size_t const ml2 = ZSTD_count(ip+EQUAL_READ32, ip+EQUAL_READ32-rep[0], iend) + EQUAL_READ32;
+                    int const gain2 = (int)(ml2 * 4);
+                    int const gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 1);
+                    if ((ml2 >= EQUAL_READ32) && (gain2 > gain1))
                         matchLength = ml2, offset = 0, start = ip;
                 }
-                {
-                    size_t offset2=999999;
-                    size_t ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
-                    int gain2 = (int)(ml2*4 - ZSTD_highbit((U32)offset2+1));   /* raw approx */
-                    int gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 7);
-                    if ((ml2 >= MINMATCH) && (gain2 > gain1)) {
+                {   size_t offset2=99999999;
+                    size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
+                    int const gain2 = (int)(ml2*4 - ZSTD_highbit((U32)offset2+1));   /* raw approx */
+                    int const gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 7);
+                    if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) {
                         matchLength = ml2, offset = offset2, start = ip;
                         continue;
             }   }   }
@@ -1616,51 +1723,43 @@ void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx,
 
         /* catch up */
         if (offset) {
-            while ((start>anchor) && (start>base+offset) && (start[-1] == start[-1-offset]))   /* only search for offset within prefix */
+            while ((start>anchor) && (start>base+offset-ZSTD_REP_MOVE) && (start[-1] == start[-1-offset+ZSTD_REP_MOVE]))   /* only search for offset within prefix */
                 { start--; matchLength++; }
-            offset_2 = offset_1; offset_1 = offset;
+            rep[1] = rep[0]; rep[0] = (U32)(offset - ZSTD_REP_MOVE);
         }
 
         /* store sequence */
 _storeSequence:
-        {
-            size_t litLength = start - anchor;
+        {   size_t const litLength = start - anchor;
             ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, matchLength-MINMATCH);
             anchor = ip = start + matchLength;
         }
 
         /* check immediate repcode */
         while ( (ip <= ilimit)
-             && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) {
+             && (MEM_read32(ip) == MEM_read32(ip - rep[1])) ) {
             /* store sequence */
-            matchLength = ZSTD_count(ip+MINMATCH, ip+MINMATCH-offset_2, iend);
-            offset = offset_2;
-            offset_2 = offset_1;
-            offset_1 = offset;
-            ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength);
-            ip += matchLength+MINMATCH;
+            matchLength = ZSTD_count(ip+EQUAL_READ32, ip+EQUAL_READ32-rep[1], iend) + EQUAL_READ32;
+            offset = rep[1]; rep[1] = rep[0]; rep[0] = (U32)offset; /* swap repcodes */
+            ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength-MINMATCH);
+            ip += matchLength;
             anchor = ip;
             continue;   /* faster when present ... (?) */
     }   }
 
     /* Last Literals */
-    {
-        size_t lastLLSize = iend - anchor;
+    {   size_t const lastLLSize = iend - anchor;
         memcpy(seqStorePtr->lit, anchor, lastLLSize);
         seqStorePtr->lit += lastLLSize;
+        ZSTD_statsUpdatePrices(&seqStorePtr->stats, lastLLSize, anchor, 0, 0);
     }
 }
 
-#include "zstd_opt.h"
 
-static void ZSTD_compressBlock_opt_bt(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
-{
-    ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 1, 2);
-}
 
-static void ZSTD_compressBlock_opt(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
+static void ZSTD_compressBlock_btopt(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
 {
-    ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 0, 2);
+    ZSTD_compressBlock_opt_generic(ctx, src, srcSize);
 }
 
 static void ZSTD_compressBlock_btlazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
@@ -1702,9 +1801,8 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx,
     const BYTE* const dictEnd  = dictBase + dictLimit;
     const BYTE* const dictStart  = dictBase + ctx->lowLimit;
 
-    size_t offset_2=REPCODE_STARTVALUE, offset_1=REPCODE_STARTVALUE;
-    const U32 maxSearches = 1 << ctx->params.searchLog;
-    const U32 mls = ctx->params.searchLength;
+    const U32 maxSearches = 1 << ctx->params.cParams.searchLog;
+    const U32 mls = ctx->params.cParams.searchLength;
 
     typedef size_t (*searchMax_f)(ZSTD_CCtx* zc, const BYTE* ip, const BYTE* iLimit,
                         size_t* offsetPtr,
@@ -1712,6 +1810,10 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx,
     searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS_extDict : ZSTD_HcFindBestMatch_extDict_selectMLS;
 
     /* init */
+    U32 rep[ZSTD_REP_INIT];
+    { U32 i; for (i=0; i<ZSTD_REP_INIT; i++) rep[i]=REPCODE_STARTVALUE; }
+
+    ctx->nextToUpdate3 = ctx->nextToUpdate;
     ZSTD_resetSeqStore(seqStorePtr);
     if ((ip - prefixStart) < REPCODE_STARTVALUE) ip += REPCODE_STARTVALUE;
 
@@ -1724,26 +1826,25 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx,
 
         /* check repCode */
         {
-            const U32 repIndex = (U32)(current+1 - offset_1);
+            const U32 repIndex = (U32)(current+1 - rep[0]);
             const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
             const BYTE* const repMatch = repBase + repIndex;
             if ((U32)((dictLimit-1) - repIndex) >= 3)   /* intentional overflow */
             if (MEM_read32(ip+1) == MEM_read32(repMatch)) {
                 /* repcode detected we should take it */
                 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
-                matchLength = ZSTD_count_2segments(ip+1+MINMATCH, repMatch+MINMATCH, iend, repEnd, prefixStart) + MINMATCH;
+                matchLength = ZSTD_count_2segments(ip+1+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32;
                 if (depth==0) goto _storeSequence;
         }   }
 
-        {
-            /* first search (depth 0) */
-            size_t offsetFound = 99999999;
-            size_t ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls);
+        /* first search (depth 0) */
+        {   size_t offsetFound = 99999999;
+            size_t const ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls);
             if (ml2 > matchLength)
                 matchLength = ml2, start = ip, offset=offsetFound;
         }
 
-         if (matchLength < MINMATCH) {
+         if (matchLength < EQUAL_READ32) {
             ip += ((ip-anchor) >> g_searchStrength) + 1;   /* jump faster over incompressible sections */
             continue;
         }
@@ -1755,27 +1856,26 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx,
             current++;
             /* check repCode */
             if (offset) {
-                const U32 repIndex = (U32)(current - offset_1);
+                const U32 repIndex = (U32)(current - rep[0]);
                 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
                 const BYTE* const repMatch = repBase + repIndex;
                 if ((U32)((dictLimit-1) - repIndex) >= 3)   /* intentional overflow */
                 if (MEM_read32(ip) == MEM_read32(repMatch)) {
                     /* repcode detected */
                     const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
-                    size_t repLength = ZSTD_count_2segments(ip+MINMATCH, repMatch+MINMATCH, iend, repEnd, prefixStart) + MINMATCH;
-                    int gain2 = (int)(repLength * 3);
-                    int gain1 = (int)(matchLength*3 - ZSTD_highbit((U32)offset+1) + 1);
-                    if ((repLength >= MINMATCH) && (gain2 > gain1))
+                    size_t const repLength = ZSTD_count_2segments(ip+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32;
+                    int const gain2 = (int)(repLength * 3);
+                    int const gain1 = (int)(matchLength*3 - ZSTD_highbit((U32)offset+1) + 1);
+                    if ((repLength >= EQUAL_READ32) && (gain2 > gain1))
                         matchLength = repLength, offset = 0, start = ip;
             }   }
 
             /* search match, depth 1 */
-            {
-                size_t offset2=999999;
-                size_t ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
-                int gain2 = (int)(ml2*4 - ZSTD_highbit((U32)offset2+1));   /* raw approx */
-                int gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 4);
-                if ((ml2 >= MINMATCH) && (gain2 > gain1)) {
+            {   size_t offset2=99999999;
+                size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
+                int const gain2 = (int)(ml2*4 - ZSTD_highbit((U32)offset2+1));   /* raw approx */
+                int const gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 4);
+                if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) {
                     matchLength = ml2, offset = offset2, start = ip;
                     continue;   /* search a better one */
             }   }
@@ -1786,27 +1886,26 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx,
                 current++;
                 /* check repCode */
                 if (offset) {
-                    const U32 repIndex = (U32)(current - offset_1);
+                    const U32 repIndex = (U32)(current - rep[0]);
                     const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
                     const BYTE* const repMatch = repBase + repIndex;
                     if ((U32)((dictLimit-1) - repIndex) >= 3)   /* intentional overflow */
                     if (MEM_read32(ip) == MEM_read32(repMatch)) {
                         /* repcode detected */
                         const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
-                        size_t repLength = ZSTD_count_2segments(ip+MINMATCH, repMatch+MINMATCH, iend, repEnd, prefixStart) + MINMATCH;
+                        size_t repLength = ZSTD_count_2segments(ip+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32;
                         int gain2 = (int)(repLength * 4);
                         int gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 1);
-                        if ((repLength >= MINMATCH) && (gain2 > gain1))
+                        if ((repLength >= EQUAL_READ32) && (gain2 > gain1))
                             matchLength = repLength, offset = 0, start = ip;
                 }   }
 
                 /* search match, depth 2 */
-                {
-                    size_t offset2=999999;
-                    size_t ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
-                    int gain2 = (int)(ml2*4 - ZSTD_highbit((U32)offset2+1));   /* raw approx */
-                    int gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 7);
-                    if ((ml2 >= MINMATCH) && (gain2 > gain1)) {
+                {   size_t offset2=99999999;
+                    size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
+                    int const gain2 = (int)(ml2*4 - ZSTD_highbit((U32)offset2+1));   /* raw approx */
+                    int const gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 7);
+                    if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) {
                         matchLength = ml2, offset = offset2, start = ip;
                         continue;
             }   }   }
@@ -1815,32 +1914,31 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx,
 
         /* catch up */
         if (offset) {
-            U32 matchIndex = (U32)((start-base) - offset);
+            U32 matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE));
             const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex;
             const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart;
             while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; }  /* catch up */
-            offset_2 = offset_1; offset_1 = offset;
+            rep[1] = rep[0]; rep[0] = (U32)(offset - ZSTD_REP_MOVE);
         }
 
         /* store sequence */
 _storeSequence:
-        {
-            size_t litLength = start - anchor;
+        {   size_t const litLength = start - anchor;
             ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, matchLength-MINMATCH);
             anchor = ip = start + matchLength;
         }
 
         /* check immediate repcode */
         while (ip <= ilimit) {
-            const U32 repIndex = (U32)((ip-base) - offset_2);
+            const U32 repIndex = (U32)((ip-base) - rep[1]);
             const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
             const BYTE* const repMatch = repBase + repIndex;
             if ((U32)((dictLimit-1) - repIndex) >= 3)   /* intentional overflow */
             if (MEM_read32(ip) == MEM_read32(repMatch)) {
                 /* repcode detected we should take it */
                 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
-                matchLength = ZSTD_count_2segments(ip+MINMATCH, repMatch+MINMATCH, iend, repEnd, prefixStart) + MINMATCH;
-                offset = offset_2; offset_2 = offset_1; offset_1 = offset;   /* swap offset history */
+                matchLength = ZSTD_count_2segments(ip+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32;
+                offset = rep[1]; rep[1] = rep[0]; rep[0] = (U32)offset;   /* swap offset history */
                 ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength-MINMATCH);
                 ip += matchLength;
                 anchor = ip;
@@ -1850,13 +1948,13 @@ _storeSequence:
     }   }
 
     /* Last Literals */
-    {
-        size_t lastLLSize = iend - anchor;
+    {   size_t const lastLLSize = iend - anchor;
         memcpy(seqStorePtr->lit, anchor, lastLLSize);
         seqStorePtr->lit += lastLLSize;
     }
 }
 
+
 void ZSTD_compressBlock_greedy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
 {
     ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 0);
@@ -1877,14 +1975,9 @@ static void ZSTD_compressBlock_btlazy2_extDict(ZSTD_CCtx* ctx, const void* src,
     ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 1, 2);
 }
 
-static void ZSTD_compressBlock_opt_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
+static void ZSTD_compressBlock_btopt_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
 {
-    ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize, 0, 2);
-}
-
-static void ZSTD_compressBlock_opt_bt_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
-{
-    ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize, 1, 2);
+    ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize);
 }
 
 
@@ -1892,26 +1985,32 @@ typedef void (*ZSTD_blockCompressor) (ZSTD_CCtx* ctx, const void* src, size_t sr
 
 static ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict)
 {
-    static const ZSTD_blockCompressor blockCompressor[2][7] = {
-        { ZSTD_compressBlock_fast, ZSTD_compressBlock_greedy, ZSTD_compressBlock_lazy,ZSTD_compressBlock_lazy2, ZSTD_compressBlock_btlazy2, ZSTD_compressBlock_opt, ZSTD_compressBlock_opt_bt },
-        { ZSTD_compressBlock_fast_extDict, ZSTD_compressBlock_greedy_extDict, ZSTD_compressBlock_lazy_extDict,ZSTD_compressBlock_lazy2_extDict, ZSTD_compressBlock_btlazy2_extDict, ZSTD_compressBlock_opt_extDict, ZSTD_compressBlock_opt_bt_extDict }
+    static const ZSTD_blockCompressor blockCompressor[2][6] = {
+#if 1
+        { ZSTD_compressBlock_fast, ZSTD_compressBlock_greedy, ZSTD_compressBlock_lazy, ZSTD_compressBlock_lazy2, ZSTD_compressBlock_btlazy2, ZSTD_compressBlock_btopt },
+#else
+        { ZSTD_compressBlock_fast_extDict, ZSTD_compressBlock_greedy_extDict, ZSTD_compressBlock_lazy_extDict,ZSTD_compressBlock_lazy2_extDict, ZSTD_compressBlock_btlazy2_extDict, ZSTD_compressBlock_btopt_extDict },
+#endif
+        { ZSTD_compressBlock_fast_extDict, ZSTD_compressBlock_greedy_extDict, ZSTD_compressBlock_lazy_extDict,ZSTD_compressBlock_lazy2_extDict, ZSTD_compressBlock_btlazy2_extDict, ZSTD_compressBlock_btopt_extDict }
     };
 
     return blockCompressor[extDict][(U32)strat];
 }
 
 
-static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
 {
-    ZSTD_blockCompressor blockCompressor = ZSTD_selectBlockCompressor(zc->params.strategy, zc->lowLimit < zc->dictLimit);
+    ZSTD_blockCompressor blockCompressor = ZSTD_selectBlockCompressor(zc->params.cParams.strategy, zc->lowLimit < zc->dictLimit);
     if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) return 0;   /* don't even attempt compression below a certain srcSize */
     blockCompressor(zc, src, srcSize);
-    return ZSTD_compressSequences(zc, dst, maxDstSize, srcSize);
+    return ZSTD_compressSequences(zc, dst, dstCapacity, srcSize);
 }
 
 
+
+
 static size_t ZSTD_compress_generic (ZSTD_CCtx* zc,
-                                        void* dst, size_t maxDstSize,
+                                        void* dst, size_t dstCapacity,
                                   const void* src, size_t srcSize)
 {
     size_t blockSize = zc->blockSize;
@@ -1919,25 +2018,30 @@ static size_t ZSTD_compress_generic (ZSTD_CCtx* zc,
     const BYTE* ip = (const BYTE*)src;
     BYTE* const ostart = (BYTE*)dst;
     BYTE* op = ostart;
-    const U32 maxDist = 1 << zc->params.windowLog;
+    const U32 maxDist = 1 << zc->params.cParams.windowLog;
+    ZSTD_stats_t* stats = &zc->seqStore.stats;
+
+    ZSTD_statsInit(stats);
 
     while (remaining) {
         size_t cSize;
+        ZSTD_statsResetFreqs(stats);
 
-        if (maxDstSize < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE) return ERROR(dstSize_tooSmall);   /* not enough space to store compressed block */
+        if (dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE) return ERROR(dstSize_tooSmall);   /* not enough space to store compressed block */
         if (remaining < blockSize) blockSize = remaining;
 
-        if ((U32)(ip+blockSize - zc->base) > zc->loadedDictEnd + maxDist) { /* enforce maxDist */
-            U32 newLowLimit = (U32)(ip+blockSize - zc->base) - maxDist;
+        if ((U32)(ip+blockSize - zc->base) > zc->loadedDictEnd + maxDist) {
+            /* enforce maxDist */
+            U32 const newLowLimit = (U32)(ip+blockSize - zc->base) - maxDist;
             if (zc->lowLimit < newLowLimit) zc->lowLimit = newLowLimit;
             if (zc->dictLimit < zc->lowLimit) zc->dictLimit = zc->lowLimit;
         }
 
-        cSize = ZSTD_compressBlock_internal(zc, op+ZSTD_blockHeaderSize, maxDstSize-ZSTD_blockHeaderSize, ip, blockSize);
+        cSize = ZSTD_compressBlock_internal(zc, op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize, ip, blockSize);
         if (ZSTD_isError(cSize)) return cSize;
 
         if (cSize == 0) {  /* block is not compressible */
-            cSize = ZSTD_noCompressBlock(op, maxDstSize, ip, blockSize);
+            cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize);
             if (ZSTD_isError(cSize)) return cSize;
         } else {
             op[0] = (BYTE)(cSize>>16);
@@ -1948,36 +2052,62 @@ static size_t ZSTD_compress_generic (ZSTD_CCtx* zc,
         }
 
         remaining -= blockSize;
-        maxDstSize -= cSize;
+        dstCapacity -= cSize;
         ip += blockSize;
         op += cSize;
     }
 
+    ZSTD_statsPrint(stats, zc->params.cParams.searchLength);
     return op-ostart;
 }
 
 
+static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity,
+                                    ZSTD_parameters params, U64 pledgedSrcSize)
+{   BYTE* const op = (BYTE*)dst;
+    U32 const fcsId = params.fParams.contentSizeFlag ?
+                     (pledgedSrcSize>0) + (pledgedSrcSize>=256) + (pledgedSrcSize>=65536+256) :   /* 0-3 */
+                      0;
+    BYTE const fdescriptor = (BYTE)((params.cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN)   /* windowLog : 4 KB - 128 MB */
+                                  | (fcsId << 6) );
+    size_t const hSize = ZSTD_frameHeaderSize_min + ZSTD_fcs_fieldSize[fcsId];
+    if (hSize > dstCapacity) return ERROR(dstSize_tooSmall);
+
+    MEM_writeLE32(dst, ZSTD_MAGICNUMBER);
+    op[4] = fdescriptor;
+    switch(fcsId)
+    {
+        default:   /* impossible */
+        case 0 : break;
+        case 1 : op[5] = (BYTE)(pledgedSrcSize); break;
+        case 2 : MEM_writeLE16(op+5, (U16)(pledgedSrcSize-256)); break;
+        case 3 : MEM_writeLE64(op+5, (U64)(pledgedSrcSize)); break;
+    }
+    return hSize;
+}
+
+
 static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* zc,
-                              void* dst, size_t dstSize,
+                              void* dst, size_t dstCapacity,
                         const void* src, size_t srcSize,
                                U32 frame)
 {
     const BYTE* const ip = (const BYTE*) src;
-    size_t hbSize = 0;
+    size_t fhSize = 0;
 
-    if (frame && (zc->stage==0)) {
-        hbSize = zc->hbSize;
-        if (dstSize <= hbSize) return ERROR(dstSize_tooSmall);
-        zc->stage = 1;
-        memcpy(dst, zc->headerBuffer, hbSize);
-        dstSize -= hbSize;
-        dst = (char*)dst + hbSize;
+    if (zc->stage==0) return ERROR(stage_wrong);
+    if (frame && (zc->stage==1)) {   /* copy saved header */
+        fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, zc->params, srcSize);
+        if (ZSTD_isError(fhSize)) return fhSize;
+        dstCapacity -= fhSize;
+        dst = (char*)dst + fhSize;
+        zc->stage = 2;
     }
 
     /* Check if blocks follow each other */
     if (src != zc->nextSrc) {
         /* not contiguous */
-        size_t delta = zc->nextSrc - ip;
+        size_t const delta = zc->nextSrc - ip;
         zc->lowLimit = zc->dictLimit;
         zc->dictLimit = (U32)(zc->nextSrc - zc->base);
         zc->dictBase = zc->base;
@@ -1988,10 +2118,10 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* zc,
 
     /* preemptive overflow correction */
     if (zc->lowLimit > (1<<30)) {
-        U32 btplus = (zc->params.strategy == ZSTD_btlazy2) || (zc->params.strategy == ZSTD_btopt);
-        U32 contentMask = (1 << (zc->params.contentLog - btplus)) - 1;
-        U32 newLowLimit = zc->lowLimit & contentMask;   /* preserve position % contentSize */
-        U32 correction = zc->lowLimit - newLowLimit;
+        U32 const btplus = (zc->params.cParams.strategy == ZSTD_btlazy2) || (zc->params.cParams.strategy == ZSTD_btopt);
+        U32 const chainMask = (1 << (zc->params.cParams.chainLog - btplus)) - 1;
+        U32 const newLowLimit = zc->lowLimit & chainMask;   /* preserve position % chainSize */
+        U32 const correction = zc->lowLimit - newLowLimit;
         ZSTD_reduceIndex(zc, correction);
         zc->base += correction;
         zc->dictBase += correction;
@@ -2008,28 +2138,28 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* zc,
     }
 
     zc->nextSrc = ip + srcSize;
-    {
-        size_t cSize;
-        if (frame) cSize = ZSTD_compress_generic (zc, dst, dstSize, src, srcSize);
-        else cSize = ZSTD_compressBlock_internal (zc, dst, dstSize, src, srcSize);
+    {   size_t const cSize = frame ?
+                             ZSTD_compress_generic (zc, dst, dstCapacity, src, srcSize) :
+                             ZSTD_compressBlock_internal (zc, dst, dstCapacity, src, srcSize);
         if (ZSTD_isError(cSize)) return cSize;
-        return cSize + hbSize;
+        return cSize + fhSize;
     }
 }
 
 
 size_t ZSTD_compressContinue (ZSTD_CCtx* zc,
-                              void* dst, size_t dstSize,
+                              void* dst, size_t dstCapacity,
                         const void* src, size_t srcSize)
 {
-    return ZSTD_compressContinue_internal(zc, dst, dstSize, src, srcSize, 1);
+    return ZSTD_compressContinue_internal(zc, dst, dstCapacity, src, srcSize, 1);
 }
 
 
-size_t ZSTD_compressBlock(ZSTD_CCtx* zc, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+size_t ZSTD_compressBlock(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
 {
-    if (srcSize > BLOCKSIZE) return ERROR(srcSize_wrong);
-    return ZSTD_compressContinue_internal(zc, dst, maxDstSize, src, srcSize, 0);
+    if (srcSize > ZSTD_BLOCKSIZE_MAX) return ERROR(srcSize_wrong);
+    ZSTD_LOG_BLOCK("%p: ZSTD_compressBlock searchLength=%d\n", zc->base, zc->params.cParams.searchLength);
+    return ZSTD_compressContinue_internal(zc, dst, dstCapacity, src, srcSize, 0);
 }
 
 
@@ -2049,22 +2179,21 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_CCtx* zc, const void* src, size_t
     zc->nextSrc = iend;
     if (srcSize <= 8) return 0;
 
-    switch(zc->params.strategy)
+    switch(zc->params.cParams.strategy)
     {
     case ZSTD_fast:
-        ZSTD_fillHashTable (zc, iend, zc->params.searchLength);
+        ZSTD_fillHashTable (zc, iend, zc->params.cParams.searchLength);
         break;
 
     case ZSTD_greedy:
     case ZSTD_lazy:
     case ZSTD_lazy2:
-    case ZSTD_opt:
-        ZSTD_insertAndFindFirstIndex (zc, iend-8, zc->params.searchLength);
+        ZSTD_insertAndFindFirstIndex (zc, iend-8, zc->params.cParams.searchLength);
         break;
 
     case ZSTD_btlazy2:
     case ZSTD_btopt:
-        ZSTD_updateTree(zc, iend-8, iend, 1 << zc->params.searchLog, zc->params.searchLength);
+        ZSTD_updateTree(zc, iend-8, iend, 1 << zc->params.cParams.searchLog, zc->params.cParams.searchLength);
         break;
 
     default:
@@ -2078,10 +2207,10 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_CCtx* zc, const void* src, size_t
 
 /* Dictionary format :
      Magic == ZSTD_DICT_MAGIC (4 bytes)
-     Huff0 CTable (256 * 4 bytes)  => to be changed to read from writeCTable
+     HUF_writeCTable(256)
      Dictionary content
 */
-/*! ZSTD_loadDictEntropyStats
+/*! ZSTD_loadDictEntropyStats() :
     @return : size read from dictionary */
 static size_t ZSTD_loadDictEntropyStats(ZSTD_CCtx* zc, const void* dict, size_t dictSize)
 {
@@ -2094,7 +2223,7 @@ static size_t ZSTD_loadDictEntropyStats(ZSTD_CCtx* zc, const void* dict, size_t
     short litlengthNCount[MaxLL+1];
     unsigned litlengthMaxValue = MaxLL, litlengthLog = LLFSELog;
 
-    const size_t hufHeaderSize = HUF_readCTable(zc->hufTable, 255, dict, dictSize);
+    size_t const hufHeaderSize = HUF_readCTable(zc->hufTable, 255, dict, dictSize);
     if (HUF_isError(hufHeaderSize)) return ERROR(dictionary_corrupted);
     zc->flagStaticTables = 1;
     dict = (const char*)dict + hufHeaderSize;
@@ -2122,145 +2251,190 @@ static size_t ZSTD_loadDictEntropyStats(ZSTD_CCtx* zc, const void* dict, size_t
     return hufHeaderSize + offcodeHeaderSize + matchlengthHeaderSize + litlengthHeaderSize;
 }
 
-
+/** ZSTD_compress_insertDictionary() :
+*   @return : 0, or an error code */
 static size_t ZSTD_compress_insertDictionary(ZSTD_CCtx* zc, const void* dict, size_t dictSize)
 {
-    if (dict && (dictSize>4)) {
-        U32 magic = MEM_readLE32(dict);
-        size_t eSize;
-        if (magic != ZSTD_DICT_MAGIC)
-            return ZSTD_loadDictionaryContent(zc, dict, dictSize);
+    if ((dict==NULL) || (dictSize<=4)) return 0;
 
-        eSize = ZSTD_loadDictEntropyStats(zc, (const char*)dict+4, dictSize-4) + 4;
+    /* default : dict is pure content */
+    if (MEM_readLE32(dict) != ZSTD_DICT_MAGIC) return ZSTD_loadDictionaryContent(zc, dict, dictSize);
+
+    /* known magic number : dict is parsed for entropy stats and content */
+    {   size_t const eSize = ZSTD_loadDictEntropyStats(zc, (const char*)dict+4 /* skip magic */, dictSize-4) + 4;
         if (ZSTD_isError(eSize)) return eSize;
         return ZSTD_loadDictionaryContent(zc, (const char*)dict+eSize, dictSize-eSize);
     }
-    return 0;
 }
 
 
-/*! ZSTD_compressBegin_advanced
+/*! ZSTD_compressBegin_internal() :
 *   @return : 0, or an error code */
-size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* zc,
+static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* zc,
                              const void* dict, size_t dictSize,
-                                   ZSTD_parameters params)
+                                   ZSTD_parameters params, U64 pledgedSrcSize)
 {
-    size_t errorCode;
+    { U32 const hashLog3 = (pledgedSrcSize || pledgedSrcSize >= 8192) ? ZSTD_HASHLOG3_MAX : ((pledgedSrcSize >= 2048) ? ZSTD_HASHLOG3_MIN + 1 : ZSTD_HASHLOG3_MIN);
+      zc->hashLog3 = (params.cParams.searchLength==3) ? hashLog3 : 0; }
 
-    ZSTD_validateParams(&params);
-
-    errorCode = ZSTD_resetCCtx_advanced(zc, params);
-    if (ZSTD_isError(errorCode)) return errorCode;
-
-    MEM_writeLE32(zc->headerBuffer, ZSTD_MAGICNUMBER);   /* Write Header */
-    ((BYTE*)zc->headerBuffer)[4] = (BYTE)(params.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN);
-    zc->hbSize = ZSTD_frameHeaderSize_min;
-    zc->stage = 0;
+    { size_t const resetError = ZSTD_resetCCtx_advanced(zc, params, 1);
+      if (ZSTD_isError(resetError)) return resetError; }
 
     return ZSTD_compress_insertDictionary(zc, dict, dictSize);
 }
 
 
+/*! ZSTD_compressBegin_advanced() :
+*   @return : 0, or an error code */
+size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* zc,
+                             const void* dict, size_t dictSize,
+                                   ZSTD_parameters params, U64 pledgedSrcSize)
+{
+    /* compression parameters verification and optimization */
+    { size_t const errorCode = ZSTD_checkCParams_advanced(params.cParams, pledgedSrcSize);
+      if (ZSTD_isError(errorCode)) return errorCode; }
+
+    return ZSTD_compressBegin_internal(zc, dict, dictSize, params, pledgedSrcSize);
+}
+
+
 size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* zc, const void* dict, size_t dictSize, int compressionLevel)
 {
-    return ZSTD_compressBegin_advanced(zc, dict, dictSize, ZSTD_getParams(compressionLevel, MAX(128 KB, dictSize)));
+    ZSTD_parameters params;
+    params.cParams = ZSTD_getCParams(compressionLevel, 0, dictSize);
+    params.fParams.contentSizeFlag = 0;
+    ZSTD_adjustCParams(&params.cParams, 0, dictSize);
+    ZSTD_LOG_BLOCK("%p: ZSTD_compressBegin_usingDict compressionLevel=%d\n", zc->base, compressionLevel);
+    return ZSTD_compressBegin_internal(zc, dict, dictSize, params, 0);
 }
 
+
+size_t ZSTD_compressBegin_targetSrcSize(ZSTD_CCtx* zc, const void* dict, size_t dictSize, size_t targetSrcSize, int compressionLevel)
+{
+    ZSTD_parameters params;
+    params.cParams = ZSTD_getCParams(compressionLevel, targetSrcSize, dictSize);
+    params.fParams.contentSizeFlag = 1;
+    ZSTD_adjustCParams(&params.cParams, targetSrcSize, dictSize);
+    ZSTD_LOG_BLOCK("%p: ZSTD_compressBegin_targetSrcSize compressionLevel=%d\n", zc->base, compressionLevel);
+    return ZSTD_compressBegin_internal(zc, dict, dictSize, params, targetSrcSize);
+}
+
+
 size_t ZSTD_compressBegin(ZSTD_CCtx* zc, int compressionLevel)
 {
-    return ZSTD_compressBegin_advanced(zc, NULL, 0, ZSTD_getParams(compressionLevel, 0));
+    ZSTD_LOG_BLOCK("%p: ZSTD_compressBegin compressionLevel=%d\n", zc->base, compressionLevel);
+    return ZSTD_compressBegin_usingDict(zc, NULL, 0, compressionLevel);
 }
 
 
-/*! ZSTD_compressEnd
-*   Write frame epilogue
+/*! ZSTD_compressEnd() :
+*   Write frame epilogue.
 *   @return : nb of bytes written into dst (or an error code) */
-size_t ZSTD_compressEnd(ZSTD_CCtx* zc, void* dst, size_t maxDstSize)
+size_t ZSTD_compressEnd(ZSTD_CCtx* zc, void* dst, size_t dstCapacity)
 {
     BYTE* op = (BYTE*)dst;
-    size_t hbSize = 0;
+    size_t fhSize = 0;
 
-    /* empty frame */
-    if (zc->stage==0) {
-        hbSize = zc->hbSize;
-        if (maxDstSize <= hbSize) return ERROR(dstSize_tooSmall);
-        zc->stage = 1;
-        memcpy(dst, zc->headerBuffer, hbSize);
-        maxDstSize -= hbSize;
-        op += hbSize;
+    /* not even init ! */
+    if (zc->stage==0) return ERROR(stage_wrong);
+
+    /* special case : empty frame */
+    if (zc->stage==1) {
+        fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, zc->params, 0);
+        if (ZSTD_isError(fhSize)) return fhSize;
+        dstCapacity -= fhSize;
+        op += fhSize;
+        zc->stage = 2;
     }
 
     /* frame epilogue */
-    if (maxDstSize < 3) return ERROR(dstSize_tooSmall);
+    if (dstCapacity < 3) return ERROR(dstSize_tooSmall);
     op[0] = (BYTE)(bt_end << 6);
     op[1] = 0;
     op[2] = 0;
 
-    return 3+hbSize;
+    zc->stage = 0;  /* return to "created by not init" status */
+    return 3+fhSize;
 }
 
 
 size_t ZSTD_compress_usingPreparedCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx,
-                                       void* dst, size_t maxDstSize,
+                                       void* dst, size_t dstCapacity,
                                  const void* src, size_t srcSize)
 {
-    size_t outSize;
-    size_t errorCode = ZSTD_copyCCtx(cctx, preparedCCtx);
-    if (ZSTD_isError(errorCode)) return errorCode;
-    errorCode = ZSTD_compressContinue(cctx, dst, maxDstSize, src, srcSize);
-    if (ZSTD_isError(errorCode)) return errorCode;
-    outSize = errorCode;
-    errorCode = ZSTD_compressEnd(cctx, (char*)dst+outSize, maxDstSize-outSize);
-    if (ZSTD_isError(errorCode)) return errorCode;
-    outSize += errorCode;
-    return outSize;
+    {   size_t const errorCode = ZSTD_copyCCtx(cctx, preparedCCtx);
+        if (ZSTD_isError(errorCode)) return errorCode;
+    }
+    {   size_t const cSize = ZSTD_compressContinue(cctx, dst, dstCapacity, src, srcSize);
+        if (ZSTD_isError(cSize)) return cSize;
+
+        {   size_t const endSize = ZSTD_compressEnd(cctx, (char*)dst+cSize, dstCapacity-cSize);
+            if (ZSTD_isError(endSize)) return endSize;
+            return cSize + endSize;
+    }   }
 }
 
 
-size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx,
-                               void* dst, size_t maxDstSize,
+static size_t ZSTD_compress_internal (ZSTD_CCtx* ctx,
+                               void* dst, size_t dstCapacity,
                          const void* src, size_t srcSize,
                          const void* dict,size_t dictSize,
                                ZSTD_parameters params)
 {
     BYTE* const ostart = (BYTE*)dst;
     BYTE* op = ostart;
-    size_t oSize;
 
     /* Init */
-    oSize = ZSTD_compressBegin_advanced(ctx, dict, dictSize, params);
-    if(ZSTD_isError(oSize)) return oSize;
+    { size_t const errorCode = ZSTD_compressBegin_internal(ctx, dict, dictSize, params, srcSize);
+      if(ZSTD_isError(errorCode)) return errorCode; }
 
     /* body (compression) */
-    oSize = ZSTD_compressContinue (ctx, op,  maxDstSize, src, srcSize);
-    if(ZSTD_isError(oSize)) return oSize;
-    op += oSize;
-    maxDstSize -= oSize;
+    { size_t const oSize = ZSTD_compressContinue (ctx, op,  dstCapacity, src, srcSize);
+      if(ZSTD_isError(oSize)) return oSize;
+      op += oSize;
+      dstCapacity -= oSize; }
 
     /* Close frame */
-    oSize = ZSTD_compressEnd(ctx, op, maxDstSize);
-    if(ZSTD_isError(oSize)) return oSize;
-    op += oSize;
+    { size_t const oSize = ZSTD_compressEnd(ctx, op, dstCapacity);
+      if(ZSTD_isError(oSize)) return oSize;
+      op += oSize; }
 
     return (op - ostart);
 }
 
-size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize, const void* dict, size_t dictSize, int compressionLevel)
+size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx,
+                               void* dst, size_t dstCapacity,
+                         const void* src, size_t srcSize,
+                         const void* dict,size_t dictSize,
+                               ZSTD_parameters params)
 {
-    return ZSTD_compress_advanced(ctx, dst, maxDstSize, src, srcSize, dict, dictSize, ZSTD_getParams(compressionLevel, srcSize));
+    size_t const errorCode = ZSTD_checkCParams_advanced(params.cParams, srcSize);
+    if (ZSTD_isError(errorCode)) return errorCode;
+    return ZSTD_compress_internal(ctx, dst, dstCapacity, src, srcSize, dict, dictSize, params);
 }
 
-size_t ZSTD_compressCCtx (ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize, int compressionLevel)
+size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, const void* dict, size_t dictSize, int compressionLevel)
 {
-    return ZSTD_compress_advanced(ctx, dst, maxDstSize, src, srcSize, NULL, 0, ZSTD_getParams(compressionLevel, srcSize));
+    ZSTD_parameters params;
+    ZSTD_LOG_BLOCK("%p: ZSTD_compress_usingDict srcSize=%d dictSize=%d compressionLevel=%d\n", ctx->base, (int)srcSize, (int)dictSize, compressionLevel);
+    params.cParams =  ZSTD_getCParams(compressionLevel, srcSize, dictSize);
+    params.fParams.contentSizeFlag = 1;
+    ZSTD_adjustCParams(&params.cParams, srcSize, dictSize);
+    return ZSTD_compress_internal(ctx, dst, dstCapacity, src, srcSize, dict, dictSize, params);
 }
 
-size_t ZSTD_compress(void* dst, size_t maxDstSize, const void* src, size_t srcSize, int compressionLevel)
+size_t ZSTD_compressCCtx (ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel)
+{
+    ZSTD_LOG_BLOCK("%p: ZSTD_compressCCtx srcSize=%d compressionLevel=%d\n", ctx->base, (int)srcSize, compressionLevel);
+    return ZSTD_compress_usingDict(ctx, dst, dstCapacity, src, srcSize, NULL, 0, compressionLevel);
+}
+
+size_t ZSTD_compress(void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel)
 {
     size_t result;
     ZSTD_CCtx ctxBody;
     memset(&ctxBody, 0, sizeof(ctxBody));
-    result = ZSTD_compressCCtx(&ctxBody, dst, maxDstSize, src, srcSize, compressionLevel);
+    result = ZSTD_compressCCtx(&ctxBody, dst, dstCapacity, src, srcSize, compressionLevel);
     free(ctxBody.workSpace);   /* can't free ctxBody, since it's on stack; just free heap content */
     return result;
 }
@@ -2268,127 +2442,132 @@ size_t ZSTD_compress(void* dst, size_t maxDstSize, const void* src, size_t srcSi
 
 /*-=====  Pre-defined compression levels  =====-*/
 
-#define ZSTD_MAX_CLEVEL 21
+#define ZSTD_MAX_CLEVEL 22
 unsigned ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; }
 
-static const ZSTD_parameters ZSTD_defaultParameters[4][ZSTD_MAX_CLEVEL+1] = {
+static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = {
 {   /* "default" */
-    /* l,  W,  C,  H,  S,  L, SL, strat */
-    {  0,  0,  0,  0,  0,  0,  0, ZSTD_fast    },  /* level  0 - never used */
-    {  0, 19, 13, 14,  1,  7,  4, ZSTD_fast    },  /* level  1 */
-    {  0, 19, 15, 16,  1,  6,  4, ZSTD_fast    },  /* level  2 */
-    {  0, 20, 18, 20,  1,  6,  4, ZSTD_fast    },  /* level  3 */
-    {  0, 21, 19, 21,  1,  6,  4, ZSTD_fast    },  /* level  4 */
-    {  0, 20, 14, 18,  3,  5,  4, ZSTD_greedy  },  /* level  5 */
-    {  0, 20, 18, 19,  3,  5,  4, ZSTD_greedy  },  /* level  6 */
-    {  0, 21, 17, 20,  3,  5,  4, ZSTD_lazy    },  /* level  7 */
-    {  0, 21, 19, 20,  3,  5,  4, ZSTD_lazy    },  /* level  8 */
-    {  0, 21, 20, 20,  3,  5,  4, ZSTD_lazy2   },  /* level  9 */
-    {  0, 21, 19, 21,  4,  5,  4, ZSTD_lazy2   },  /* level 10 */
-    {  0, 22, 20, 22,  4,  5,  4, ZSTD_lazy2   },  /* level 11 */
-    {  0, 22, 20, 22,  5,  5,  4, ZSTD_lazy2   },  /* level 12 */
-    {  0, 22, 21, 22,  5,  5,  4, ZSTD_lazy2   },  /* level 13 */
-    {  0, 22, 22, 23,  5,  5,  4, ZSTD_lazy2   },  /* level 14 */
-    {  0, 23, 23, 23,  5,  5,  4, ZSTD_lazy2   },  /* level 15 */
-    {  0, 23, 22, 22,  5,  5,  4, ZSTD_btlazy2 },  /* level 16 */
-    {  0, 24, 24, 23,  4,  5,  4, ZSTD_btlazy2 },  /* level 17 */
-    {  0, 24, 24, 23,  5,  5, 30, ZSTD_btopt   },  /* level 18 */
-    {  0, 25, 25, 24,  5,  4, 40, ZSTD_btopt   },  /* level 19 */
-    {  0, 26, 26, 25,  8,  4,256, ZSTD_btopt   },  /* level 20 */
-    {  0, 26, 27, 25, 10,  4,256, ZSTD_btopt   },  /* level 21 */
+    /* W,  C,  H,  S,  L, TL, strat */
+    {  0,  0,  0,  0,  0,  0, ZSTD_fast    },  /* level  0 - never used */
+    { 19, 13, 14,  1,  7,  4, ZSTD_fast    },  /* level  1 */
+    { 19, 15, 16,  1,  6,  4, ZSTD_fast    },  /* level  2 */
+    { 20, 18, 20,  1,  6,  4, ZSTD_fast    },  /* level  3 */
+    { 20, 13, 17,  2,  5,  4, ZSTD_greedy  },  /* level  4.*/
+    { 20, 15, 18,  3,  5,  4, ZSTD_greedy  },  /* level  5 */
+    { 21, 16, 19,  2,  5,  4, ZSTD_lazy    },  /* level  6 */
+    { 21, 17, 20,  3,  5,  4, ZSTD_lazy    },  /* level  7 */
+    { 21, 18, 20,  3,  5,  4, ZSTD_lazy2   },  /* level  8.*/
+    { 21, 20, 20,  3,  5,  4, ZSTD_lazy2   },  /* level  9 */
+    { 21, 19, 21,  4,  5,  4, ZSTD_lazy2   },  /* level 10 */
+    { 22, 20, 22,  4,  5,  4, ZSTD_lazy2   },  /* level 11 */
+    { 22, 20, 22,  5,  5,  4, ZSTD_lazy2   },  /* level 12 */
+    { 22, 21, 22,  5,  5,  4, ZSTD_lazy2   },  /* level 13 */
+    { 22, 21, 22,  6,  5,  4, ZSTD_lazy2   },  /* level 14 */
+    { 22, 21, 21,  5,  5,  4, ZSTD_btlazy2 },  /* level 15 */
+    { 23, 22, 22,  5,  5,  4, ZSTD_btlazy2 },  /* level 16 */
+    { 23, 23, 22,  5,  5,  4, ZSTD_btlazy2 },  /* level 17.*/
+    { 23, 23, 22,  6,  5, 24, ZSTD_btopt   },  /* level 18.*/
+    { 23, 23, 22,  6,  3, 48, ZSTD_btopt   },  /* level 19.*/
+    { 25, 26, 23,  7,  3, 64, ZSTD_btopt   },  /* level 20.*/
+    { 26, 26, 23,  7,  3,256, ZSTD_btopt   },  /* level 21.*/
+    { 27, 27, 25,  9,  3,512, ZSTD_btopt   },  /* level 22.*/
 },
 {   /* for srcSize <= 256 KB */
-    /* l,  W,  C,  H,  S,  L,  T, strat */
-    {  0,  0,  0,  0,  0,  0,  0, ZSTD_fast    },  /* level  0 */
-    {  0, 18, 14, 15,  1,  6,  4, ZSTD_fast    },  /* level  1 */
-    {  0, 18, 14, 16,  1,  5,  4, ZSTD_fast    },  /* level  2 */
-    {  0, 18, 14, 17,  1,  5,  4, ZSTD_fast    },  /* level  3.*/
-    {  0, 18, 14, 15,  4,  4,  4, ZSTD_greedy  },  /* level  4 */
-    {  0, 18, 16, 17,  4,  4,  4, ZSTD_greedy  },  /* level  5 */
-    {  0, 18, 17, 17,  3,  4,  4, ZSTD_lazy    },  /* level  6 */
-    {  0, 18, 17, 17,  4,  4,  4, ZSTD_lazy    },  /* level  7 */
-    {  0, 18, 17, 17,  4,  4,  4, ZSTD_lazy2   },  /* level  8 */
-    {  0, 18, 17, 17,  5,  4,  4, ZSTD_lazy2   },  /* level  9 */
-    {  0, 18, 17, 17,  6,  4,  4, ZSTD_lazy2   },  /* level 10 */
-    {  0, 18, 17, 17,  7,  4,  4, ZSTD_lazy2   },  /* level 11 */
-    {  0, 18, 18, 17,  4,  4,  4, ZSTD_btlazy2 },  /* level 12 */
-    {  0, 18, 19, 17,  7,  4,  4, ZSTD_btlazy2 },  /* level 13.*/
-    {  0, 18, 17, 19,  8,  4, 24, ZSTD_btopt   },  /* level 14.*/
-    {  0, 18, 19, 19,  8,  4, 48, ZSTD_btopt   },  /* level 15.*/
-    {  0, 18, 19, 18,  9,  4,128, ZSTD_btopt   },  /* level 16.*/
-    {  0, 18, 19, 18,  9,  4,192, ZSTD_btopt   },  /* level 17.*/
-    {  0, 18, 19, 18,  9,  4,256, ZSTD_btopt   },  /* level 18.*/
-    {  0, 18, 19, 18, 10,  4,256, ZSTD_btopt   },  /* level 19.*/
-    {  0, 18, 19, 18, 11,  4,256, ZSTD_btopt   },  /* level 20.*/
-    {  0, 18, 19, 18, 12,  4,256, ZSTD_btopt   },  /* level 21.*/
+    /* W,  C,  H,  S,  L,  T, strat */
+    {  0,  0,  0,  0,  0,  0, ZSTD_fast    },  /* level  0 */
+    { 18, 13, 14,  1,  6,  4, ZSTD_fast    },  /* level  1 */
+    { 18, 15, 17,  1,  5,  4, ZSTD_fast    },  /* level  2 */
+    { 18, 13, 15,  1,  5,  4, ZSTD_greedy  },  /* level  3.*/
+    { 18, 15, 17,  1,  5,  4, ZSTD_greedy  },  /* level  4.*/
+    { 18, 16, 17,  4,  5,  4, ZSTD_greedy  },  /* level  5 */
+    { 18, 17, 17,  5,  5,  4, ZSTD_greedy  },  /* level  6 */
+    { 18, 17, 17,  4,  4,  4, ZSTD_lazy    },  /* level  7 */
+    { 18, 17, 17,  4,  4,  4, ZSTD_lazy2   },  /* level  8 */
+    { 18, 17, 17,  5,  4,  4, ZSTD_lazy2   },  /* level  9 */
+    { 18, 17, 17,  6,  4,  4, ZSTD_lazy2   },  /* level 10 */
+    { 18, 18, 17,  6,  4,  4, ZSTD_lazy2   },  /* level 11.*/
+    { 18, 18, 17,  7,  4,  4, ZSTD_lazy2   },  /* level 12.*/
+    { 18, 19, 17,  7,  4,  4, ZSTD_btlazy2 },  /* level 13 */
+    { 18, 18, 18,  4,  4, 16, ZSTD_btopt   },  /* level 14.*/
+    { 18, 18, 18,  8,  4, 24, ZSTD_btopt   },  /* level 15.*/
+    { 18, 19, 18,  8,  3, 48, ZSTD_btopt   },  /* level 16.*/
+    { 18, 19, 18,  8,  3, 96, ZSTD_btopt   },  /* level 17.*/
+    { 18, 19, 18,  9,  3,128, ZSTD_btopt   },  /* level 18.*/
+    { 18, 19, 18, 10,  3,256, ZSTD_btopt   },  /* level 19.*/
+    { 18, 19, 18, 11,  3,512, ZSTD_btopt   },  /* level 20.*/
+    { 18, 19, 18, 12,  3,512, ZSTD_btopt   },  /* level 21.*/
+    { 18, 19, 18, 13,  3,512, ZSTD_btopt   },  /* level 22.*/
 },
 {   /* for srcSize <= 128 KB */
-    /* l,  W,  C,  H,  S,  L,  T, strat */
-    {  0,  0,  0,  0,  0,  0,  0, ZSTD_fast    },  /* level  0 - never used */
-    {  0, 17, 12, 13,  1,  6,  4, ZSTD_fast    },  /* level  1 */
-    {  0, 17, 13, 16,  1,  5,  4, ZSTD_fast    },  /* level  2 */
-    {  0, 17, 13, 14,  2,  5,  4, ZSTD_greedy  },  /* level  3 */
-    {  0, 17, 13, 15,  3,  4,  4, ZSTD_greedy  },  /* level  4 */
-    {  0, 17, 15, 17,  4,  4,  4, ZSTD_greedy  },  /* level  5 */
-    {  0, 17, 16, 17,  3,  4,  4, ZSTD_lazy    },  /* level  6 */
-    {  0, 17, 16, 17,  4,  4,  4, ZSTD_lazy    },  /* level  7 */
-    {  0, 17, 17, 16,  4,  4,  4, ZSTD_lazy2   },  /* level  8 */
-    {  0, 17, 17, 16,  5,  4,  4, ZSTD_lazy2   },  /* level  9 */
-    {  0, 17, 17, 16,  6,  4,  4, ZSTD_lazy2   },  /* level 10 */
-    {  0, 17, 17, 17,  7,  4,  4, ZSTD_lazy2   },  /* level 11 */
-    {  0, 17, 17, 17,  8,  4,  4, ZSTD_lazy2   },  /* level 12 */
-    {  0, 17, 17, 17,  9,  4,  4, ZSTD_lazy2   },  /* level 13 */
-    {  0, 17, 18, 16,  5,  4, 20, ZSTD_btopt   },  /* level 14 */
-    {  0, 17, 18, 16,  9,  4, 48, ZSTD_btopt   },  /* level 15 */
-    {  0, 17, 18, 17,  7,  4,128, ZSTD_btopt   },  /* level 16 */
-    {  0, 17, 18, 17,  8,  4,128, ZSTD_btopt   },  /* level 17 */
-    {  0, 17, 18, 17,  8,  4,256, ZSTD_btopt   },  /* level 18 */
-    {  0, 17, 18, 17,  9,  4,256, ZSTD_btopt   },  /* level 19 */
-    {  0, 17, 18, 17, 10,  4,512, ZSTD_btopt   },  /* level 20 */
-    {  0, 17, 18, 17, 11,  4,512, ZSTD_btopt   },  /* level 21 */
-
+    /* W,  C,  H,  S,  L,  T, strat */
+    {  0,  0,  0,  0,  0,  0, ZSTD_fast    },  /* level  0 - never used */
+    { 17, 12, 13,  1,  6,  4, ZSTD_fast    },  /* level  1 */
+    { 17, 13, 16,  1,  5,  4, ZSTD_fast    },  /* level  2 */
+    { 17, 13, 14,  2,  5,  4, ZSTD_greedy  },  /* level  3 */
+    { 17, 13, 15,  3,  4,  4, ZSTD_greedy  },  /* level  4 */
+    { 17, 15, 17,  4,  4,  4, ZSTD_greedy  },  /* level  5 */
+    { 17, 16, 17,  3,  4,  4, ZSTD_lazy    },  /* level  6 */
+    { 17, 15, 17,  4,  4,  4, ZSTD_lazy2   },  /* level  7 */
+    { 17, 17, 17,  4,  4,  4, ZSTD_lazy2   },  /* level  8 */
+    { 17, 17, 17,  5,  4,  4, ZSTD_lazy2   },  /* level  9 */
+    { 17, 17, 17,  6,  4,  4, ZSTD_lazy2   },  /* level 10 */
+    { 17, 17, 17,  7,  4,  4, ZSTD_lazy2   },  /* level 11 */
+    { 17, 17, 17,  8,  4,  4, ZSTD_lazy2   },  /* level 12 */
+    { 17, 18, 17,  6,  4,  4, ZSTD_btlazy2 },  /* level 13.*/
+    { 17, 17, 17,  7,  3,  8, ZSTD_btopt   },  /* level 14.*/
+    { 17, 17, 17,  7,  3, 16, ZSTD_btopt   },  /* level 15.*/
+    { 17, 18, 17,  7,  3, 32, ZSTD_btopt   },  /* level 16.*/
+    { 17, 18, 17,  7,  3, 64, ZSTD_btopt   },  /* level 17.*/
+    { 17, 18, 17,  7,  3,256, ZSTD_btopt   },  /* level 18.*/
+    { 17, 18, 17,  8,  3,256, ZSTD_btopt   },  /* level 19.*/
+    { 17, 18, 17,  9,  3,256, ZSTD_btopt   },  /* level 20.*/
+    { 17, 18, 17, 10,  3,256, ZSTD_btopt   },  /* level 21.*/
+    { 17, 18, 17, 11,  3,256, ZSTD_btopt   },  /* level 22.*/
 },
 {   /* for srcSize <= 16 KB */
-    /* l,  W,  C,  H,  S,  L,  T, strat */
-    {  0,  0,  0,  0,  0,  0,  0, ZSTD_fast    },  /* level  0 -- never used */
-    {  0, 14, 14, 14,  1,  4,  4, ZSTD_fast    },  /* level  1 */
-    {  0, 14, 14, 15,  1,  4,  4, ZSTD_fast    },  /* level  2 */
-    {  0, 14, 13, 15,  4,  4,  4, ZSTD_greedy  },  /* level  3 */
-    {  0, 14, 14, 15,  3,  4,  4, ZSTD_lazy    },  /* level  4 */
-    {  0, 14, 14, 14,  6,  4,  4, ZSTD_lazy    },  /* level  5 */
-    {  0, 14, 14, 14,  5,  4,  4, ZSTD_lazy2   },  /* level  6 */
-    {  0, 14, 14, 14,  7,  4,  4, ZSTD_lazy2   },  /* level  7 */
-    {  0, 14, 14, 14,  8,  4,  4, ZSTD_lazy2   },  /* level  8 */
-    {  0, 14, 14, 14,  9,  4,  4, ZSTD_lazy2   },  /* level  9 */
-    {  0, 14, 14, 14, 10,  4,  4, ZSTD_lazy2   },  /* level 10 */
-    {  0, 14, 14, 14, 11,  4,  4, ZSTD_lazy2   },  /* level 11 */
-    {  0, 14, 15, 15, 12,  4, 32, ZSTD_btopt   },  /* level 12 */
-    {  0, 14, 15, 15, 12,  4, 64, ZSTD_btopt   },  /* level 13 */
-    {  0, 14, 15, 15, 12,  4, 96, ZSTD_btopt   },  /* level 14 */
-    {  0, 14, 15, 15, 12,  4,128, ZSTD_btopt   },  /* level 15 */
-    {  0, 14, 15, 15, 12,  4,256, ZSTD_btopt   },  /* level 16 */
-    {  0, 14, 15, 15, 13,  4,256, ZSTD_btopt   },  /* level 17 */
-    {  0, 14, 15, 15, 14,  4,256, ZSTD_btopt   },  /* level 18 */
-    {  0, 14, 15, 15, 15,  4,256, ZSTD_btopt   },  /* level 19 */
-    {  0, 14, 15, 15, 16,  4,256, ZSTD_btopt   },  /* level 20 */
-    {  0, 14, 15, 15, 17,  4,256, ZSTD_btopt   },  /* level 21 */
+    /* W,  C,  H,  S,  L,  T, strat */
+    {  0,  0,  0,  0,  0,  0, ZSTD_fast    },  /* level  0 -- never used */
+    { 14, 14, 14,  1,  4,  4, ZSTD_fast    },  /* level  1 */
+    { 14, 14, 15,  1,  4,  4, ZSTD_fast    },  /* level  2 */
+    { 14, 14, 14,  4,  4,  4, ZSTD_greedy  },  /* level  3.*/
+    { 14, 14, 14,  3,  4,  4, ZSTD_lazy    },  /* level  4.*/
+    { 14, 14, 14,  4,  4,  4, ZSTD_lazy2   },  /* level  5 */
+    { 14, 14, 14,  5,  4,  4, ZSTD_lazy2   },  /* level  6 */
+    { 14, 14, 14,  6,  4,  4, ZSTD_lazy2   },  /* level  7.*/
+    { 14, 14, 14,  7,  4,  4, ZSTD_lazy2   },  /* level  8.*/
+    { 14, 15, 14,  6,  4,  4, ZSTD_btlazy2 },  /* level  9.*/
+    { 14, 15, 14,  3,  3,  6, ZSTD_btopt   },  /* level 10.*/
+    { 14, 15, 14,  6,  3,  8, ZSTD_btopt   },  /* level 11.*/
+    { 14, 15, 14,  6,  3, 16, ZSTD_btopt   },  /* level 12.*/
+    { 14, 15, 14,  6,  3, 24, ZSTD_btopt   },  /* level 13.*/
+    { 14, 15, 15,  6,  3, 48, ZSTD_btopt   },  /* level 14.*/
+    { 14, 15, 15,  6,  3, 64, ZSTD_btopt   },  /* level 15.*/
+    { 14, 15, 15,  6,  3, 96, ZSTD_btopt   },  /* level 16.*/
+    { 14, 15, 15,  6,  3,128, ZSTD_btopt   },  /* level 17.*/
+    { 14, 15, 15,  6,  3,256, ZSTD_btopt   },  /* level 18.*/
+    { 14, 15, 15,  7,  3,256, ZSTD_btopt   },  /* level 19.*/
+    { 14, 15, 15,  8,  3,256, ZSTD_btopt   },  /* level 20.*/
+    { 14, 15, 15,  9,  3,256, ZSTD_btopt   },  /* level 21.*/
+    { 14, 15, 15, 10,  3,256, ZSTD_btopt   },  /* level 22.*/
 },
 };
 
-/*! ZSTD_getParams
+/*! ZSTD_getParams() :
 *   @return ZSTD_parameters structure for a selected compression level and srcSize.
-*   @srcSizeHint value is optional, select 0 if not known */
-ZSTD_parameters ZSTD_getParams(int compressionLevel, U64 srcSizeHint)
+*   `srcSize` value is optional, select 0 if not known */
+ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, U64 srcSize, size_t dictSize)
 {
-    ZSTD_parameters result;
-    int tableID = ((srcSizeHint-1) <= 256 KB) + ((srcSizeHint-1) <= 128 KB) + ((srcSizeHint-1) <= 16 KB);   /* intentional underflow for srcSizeHint == 0 */
+    ZSTD_compressionParameters cp;
+    size_t const addedSize = srcSize ? 0 : 500;
+    U64 const rSize = srcSize+dictSize ? srcSize+dictSize+addedSize : (U64)-1;
+    U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB);   /* intentional underflow for srcSizeHint == 0 */
     if (compressionLevel<=0) compressionLevel = 1;
     if (compressionLevel > ZSTD_MAX_CLEVEL) compressionLevel = ZSTD_MAX_CLEVEL;
-#if ZSTD_OPT_DEBUG >= 1
-    tableID=0;
-#endif
-    result = ZSTD_defaultParameters[tableID][compressionLevel];
-    result.srcSize = srcSizeHint;
-    return result;
+    cp = ZSTD_defaultCParameters[tableID][compressionLevel];
+    if (MEM_32bits()) {   /* auto-correction, for 32-bits mode */
+        if (cp.windowLog > ZSTD_WINDOWLOG_MAX) cp.windowLog = ZSTD_WINDOWLOG_MAX;
+        if (cp.chainLog > ZSTD_CHAINLOG_MAX) cp.chainLog = ZSTD_CHAINLOG_MAX;
+        if (cp.hashLog > ZSTD_HASHLOG_MAX) cp.hashLog = ZSTD_HASHLOG_MAX;
+    }
+    return cp;
 }
-
diff --git a/lib/zstd_decompress.c b/lib/zstd_decompress.c
index bfa0ea33..5d3f4422 100644
--- a/lib/zstd_decompress.c
+++ b/lib/zstd_decompress.c
@@ -26,7 +26,7 @@
     OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
     You can contact the author at :
-    - zstd source repository : https://github.com/Cyan4973/zstd
+    - zstd homepage : http://www.zstd.net
 */
 
 /* ***************************************************************
@@ -75,7 +75,6 @@
 #  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
 #  pragma warning(disable : 4324)        /* disable: C4324: padded structure */
 #else
-#  define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
 #  ifdef __GNUC__
 #    define FORCE_INLINE static inline __attribute__((always_inline))
 #  else
@@ -84,23 +83,13 @@
 #endif
 
 
-/*-*************************************
-*  Local types
-***************************************/
-typedef struct
-{
-    blockType_t blockType;
-    U32 origSize;
-} blockProperties_t;
-
-
-/* *******************************************************
+/*_*******************************************************
 *  Memory operations
 **********************************************************/
 static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
 
 
-/* *************************************
+/*-*************************************
 *  Error Management
 ***************************************/
 unsigned ZSTD_versionNumber (void) { return ZSTD_VERSION_NUMBER; }
@@ -118,7 +107,7 @@ ZSTD_ErrorCode ZSTD_getError(size_t code) { return ERR_getError(code); }
 const char* ZSTD_getErrorName(size_t code) { return ERR_getErrorName(code); }
 
 
-/* *************************************************************
+/*-*************************************************************
 *   Context management
 ***************************************************************/
 typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader,
@@ -136,18 +125,18 @@ struct ZSTD_DCtx_s
     const void* dictEnd;
     size_t expected;
     size_t headerSize;
-    ZSTD_parameters params;
+    ZSTD_frameParams fParams;
     blockType_t bType;   /* used in ZSTD_decompressContinue(), to transfer blockType between header decoding and block decoding stages */
     ZSTD_dStage stage;
-    U32 flagStaticTables;
+    U32 flagRepeatTable;
     const BYTE* litPtr;
     size_t litBufSize;
     size_t litSize;
-    BYTE litBuffer[BLOCKSIZE + WILDCOPY_OVERLENGTH];
-    BYTE headerBuffer[ZSTD_frameHeaderSize_max];
+    BYTE litBuffer[ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH];
+    BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
 };  /* typedef'd to ZSTD_DCtx within "zstd_static.h" */
 
-size_t sizeofDCtx (void) { return sizeof(ZSTD_DCtx); }
+size_t ZSTD_sizeofDCtx (void) { return sizeof(ZSTD_DCtx); }   /* non published interface */
 
 size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx)
 {
@@ -158,7 +147,7 @@ size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx)
     dctx->vBase = NULL;
     dctx->dictEnd = NULL;
     dctx->hufTableX4[0] = HufLog;
-    dctx->flagStaticTables = 0;
+    dctx->flagRepeatTable = 0;
     return 0;
 }
 
@@ -179,19 +168,19 @@ size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx)
 void ZSTD_copyDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx)
 {
     memcpy(dstDCtx, srcDCtx,
-           sizeof(ZSTD_DCtx) - (BLOCKSIZE+WILDCOPY_OVERLENGTH + ZSTD_frameHeaderSize_max));  /* no need to copy workspace */
+           sizeof(ZSTD_DCtx) - (ZSTD_BLOCKSIZE_MAX+WILDCOPY_OVERLENGTH + ZSTD_frameHeaderSize_max));  /* no need to copy workspace */
 }
 
 
-/* *************************************************************
+/*-*************************************************************
 *   Decompression section
 ***************************************************************/
 
 /* Frame format description
    Frame Header -  [ Block Header - Block ] - Frame End
    1) Frame Header
-      - 4 bytes - Magic Number : ZSTD_MAGICNUMBER (defined within zstd_internal.h)
-      - 1 byte  - Window Descriptor
+      - 4 bytes - Magic Number : ZSTD_MAGICNUMBER (defined within zstd_static.h)
+      - 1 byte  - Frame Descriptor
    2) Block Header
       - 3 bytes, starting with a 2-bits descriptor
                  Uncompressed, Compressed, Frame End, unused
@@ -201,7 +190,24 @@ void ZSTD_copyDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx)
       - 3 bytes, compatible with Block Header
 */
 
-/* Block format description
+
+/* Frame descriptor
+
+   1 byte, using :
+   bit 0-3 : windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN   (see zstd_internal.h)
+   bit 4   : minmatch 4(0) or 3(1)
+   bit 5   : reserved (must be zero)
+   bit 6-7 : Frame content size : unknown, 1 byte, 2 bytes, 8 bytes
+
+   Optional : content size (0, 1, 2 or 8 bytes)
+   0 : unknown
+   1 : 0-255 bytes
+   2 : 256 - 65535+256
+   8 : up to 16 exa
+*/
+
+
+/* Compressed Block, format description
 
    Block = Literal Section - Sequences Section
    Prerequisite : size of (compressed) block, maximum size of regenerated data
@@ -267,63 +273,78 @@ void ZSTD_copyDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx)
       TO DO
 */
 
-
-/** ZSTD_decodeFrameHeader_Part1() :
-*   decode the 1st part of the Frame Header, which tells Frame Header size.
-*   srcSize must be == ZSTD_frameHeaderSize_min.
-*   @return : the full size of the Frame Header */
-static size_t ZSTD_decodeFrameHeader_Part1(ZSTD_DCtx* zc, const void* src, size_t srcSize)
+/** ZSTD_frameHeaderSize() :
+*   srcSize must be >= ZSTD_frameHeaderSize_min.
+*   @return : size of the Frame Header */
+static size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize)
 {
-    U32 magicNumber;
-    if (srcSize != ZSTD_frameHeaderSize_min)
-        return ERROR(srcSize_wrong);
-    magicNumber = MEM_readLE32(src);
-    if (magicNumber != ZSTD_MAGICNUMBER) return ERROR(prefix_unknown);
-    zc->headerSize = ZSTD_frameHeaderSize_min;
-    return zc->headerSize;
+    U32 fcsId;
+    if (srcSize < ZSTD_frameHeaderSize_min) return ERROR(srcSize_wrong);
+    fcsId = (((const BYTE*)src)[4]) >> 6;
+    return ZSTD_frameHeaderSize_min + ZSTD_fcs_fieldSize[fcsId];
 }
 
 
-size_t ZSTD_getFrameParams(ZSTD_parameters* params, const void* src, size_t srcSize)
+/** ZSTD_getFrameParams() :
+*   decode Frame Header, or provide expected `srcSize`.
+*   @return : 0, `fparamsPtr` is correctly filled,
+*            >0, `srcSize` is too small, result is expected `srcSize`,
+*             or an error code, which can be tested using ZSTD_isError() */
+size_t ZSTD_getFrameParams(ZSTD_frameParams* fparamsPtr, const void* src, size_t srcSize)
 {
-    U32 magicNumber;
-    if (srcSize < ZSTD_frameHeaderSize_min) return ZSTD_frameHeaderSize_max;
-    magicNumber = MEM_readLE32(src);
-    if (magicNumber != ZSTD_MAGICNUMBER) return ERROR(prefix_unknown);
-    memset(params, 0, sizeof(*params));
-    params->windowLog = (((const BYTE*)src)[4] & 15) + ZSTD_WINDOWLOG_ABSOLUTEMIN;
-    if ((((const BYTE*)src)[4] >> 4) != 0) return ERROR(frameParameter_unsupported);   /* reserved bits */
+    const BYTE* ip = (const BYTE*)src;
+
+    if (srcSize < ZSTD_frameHeaderSize_min) return ZSTD_frameHeaderSize_min;
+    if (MEM_readLE32(src) != ZSTD_MAGICNUMBER) return ERROR(prefix_unknown);
+
+    /* ensure there is enough `srcSize` to fully read/decode frame header */
+    { size_t const fhsize = ZSTD_frameHeaderSize(src, srcSize);
+      if (srcSize < fhsize) return fhsize; }
+
+    memset(fparamsPtr, 0, sizeof(*fparamsPtr));
+    {   BYTE const frameDesc = ip[4];
+        fparamsPtr->windowLog = (frameDesc & 0xF) + ZSTD_WINDOWLOG_ABSOLUTEMIN;
+        if ((frameDesc & 0x20) != 0) return ERROR(frameParameter_unsupported);   /* reserved 1 bit */
+        switch(frameDesc >> 6)  /* fcsId */
+        {
+            default:   /* impossible */
+            case 0 : fparamsPtr->frameContentSize = 0; break;
+            case 1 : fparamsPtr->frameContentSize = ip[5]; break;
+            case 2 : fparamsPtr->frameContentSize = MEM_readLE16(ip+5)+256; break;
+            case 3 : fparamsPtr->frameContentSize = MEM_readLE64(ip+5); break;
+    }   }
     return 0;
 }
 
-/** ZSTD_decodeFrameHeader_Part2() :
-*   decode the full Frame Header.
-*   srcSize must be the size provided by ZSTD_decodeFrameHeader_Part1().
-*   @return : 0, or an error code, which can be tested using ZSTD_isError() */
-static size_t ZSTD_decodeFrameHeader_Part2(ZSTD_DCtx* zc, const void* src, size_t srcSize)
+
+/** ZSTD_decodeFrameHeader() :
+*   `srcSize` must be the size provided by ZSTD_frameHeaderSize().
+*   @return : 0 if success, or an error code, which can be tested using ZSTD_isError() */
+static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* zc, const void* src, size_t srcSize)
 {
-    size_t result;
-    if (srcSize != zc->headerSize)
-        return ERROR(srcSize_wrong);
-    result = ZSTD_getFrameParams(&(zc->params), src, srcSize);
-    if ((MEM_32bits()) && (zc->params.windowLog > 25)) return ERROR(frameParameter_unsupportedBy32bits);
+    size_t const result = ZSTD_getFrameParams(&(zc->fParams), src, srcSize);
+    if ((MEM_32bits()) && (zc->fParams.windowLog > 25)) return ERROR(frameParameter_unsupportedBy32bits);
     return result;
 }
 
 
+typedef struct
+{
+    blockType_t blockType;
+    U32 origSize;
+} blockProperties_t;
+
+/*! ZSTD_getcBlockSize() :
+*   Provides the size of compressed block from block header `src` */
 size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr)
 {
     const BYTE* const in = (const BYTE* const)src;
-    BYTE headerFlags;
     U32 cSize;
 
-    if (srcSize < 3)
-        return ERROR(srcSize_wrong);
+    if (srcSize < ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
 
-    headerFlags = *in;
+    bpPtr->blockType = (blockType_t)((*in) >> 6);
     cSize = in[2] + (in[1]<<8) + ((in[0] & 7)<<16);
-
-    bpPtr->blockType = (blockType_t)(headerFlags >> 6);
     bpPtr->origSize = (bpPtr->blockType == bt_rle) ? cSize : 0;
 
     if (bpPtr->blockType == bt_end) return 0;
@@ -332,9 +353,9 @@ size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bp
 }
 
 
-static size_t ZSTD_copyRawBlock(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity, const void* src, size_t srcSize)
 {
-    if (srcSize > maxDstSize) return ERROR(dstSize_tooSmall);
+    if (srcSize > dstCapacity) return ERROR(dstSize_tooSmall);
     memcpy(dst, src, srcSize);
     return srcSize;
 }
@@ -353,9 +374,9 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
     switch(istart[0]>> 6)
     {
     case IS_HUF:
-        {
-            size_t litSize, litCSize, singleStream=0;
+        {   size_t litSize, litCSize, singleStream=0;
             U32 lhSize = ((istart[0]) >> 4) & 3;
+            if (srcSize < 5) return ERROR(corruption_detected);   /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for lhSize, + cSize (+nbSeq) */
             switch(lhSize)
             {
             case 0: case 1: default:   /* note : default is impossible, since lhSize into [0..3] */
@@ -378,7 +399,8 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
                 litCSize = ((istart[2] &  3) << 16) + (istart[3] << 8) + istart[4];
                 break;
             }
-            if (litSize > BLOCKSIZE) return ERROR(corruption_detected);
+            if (litSize > ZSTD_BLOCKSIZE_MAX) return ERROR(corruption_detected);
+            if (litCSize + lhSize > srcSize) return ERROR(corruption_detected);
 
             if (HUF_isError(singleStream ?
                             HUF_decompress1X2(dctx->litBuffer, litSize, istart+lhSize, litCSize) :
@@ -386,18 +408,16 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
                 return ERROR(corruption_detected);
 
             dctx->litPtr = dctx->litBuffer;
-            dctx->litBufSize = BLOCKSIZE+8;
+            dctx->litBufSize = ZSTD_BLOCKSIZE_MAX+8;
             dctx->litSize = litSize;
             return litCSize + lhSize;
         }
     case IS_PCH:
-        {
-            size_t errorCode;
-            size_t litSize, litCSize;
+        {   size_t litSize, litCSize;
             U32 lhSize = ((istart[0]) >> 4) & 3;
             if (lhSize != 1)  /* only case supported for now : small litSize, single stream */
                 return ERROR(corruption_detected);
-            if (!dctx->flagStaticTables)
+            if (!dctx->flagRepeatTable)
                 return ERROR(dictionary_corrupted);
 
             /* 2 - 2 - 10 - 10 */
@@ -405,17 +425,16 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
             litSize  = ((istart[0] & 15) << 6) + (istart[1] >> 2);
             litCSize = ((istart[1] &  3) << 8) + istart[2];
 
-            errorCode = HUF_decompress1X4_usingDTable(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->hufTableX4);
-            if (HUF_isError(errorCode)) return ERROR(corruption_detected);
-
+            {   size_t const errorCode = HUF_decompress1X4_usingDTable(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->hufTableX4);
+                if (HUF_isError(errorCode)) return ERROR(corruption_detected);
+            }
             dctx->litPtr = dctx->litBuffer;
-            dctx->litBufSize = BLOCKSIZE+WILDCOPY_OVERLENGTH;
+            dctx->litBufSize = ZSTD_BLOCKSIZE_MAX+WILDCOPY_OVERLENGTH;
             dctx->litSize = litSize;
             return litCSize + lhSize;
         }
     case IS_RAW:
-        {
-            size_t litSize;
+        {   size_t litSize;
             U32 lhSize = ((istart[0]) >> 4) & 3;
             switch(lhSize)
             {
@@ -435,7 +454,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
                 if (litSize+lhSize > srcSize) return ERROR(corruption_detected);
                 memcpy(dctx->litBuffer, istart+lhSize, litSize);
                 dctx->litPtr = dctx->litBuffer;
-                dctx->litBufSize = BLOCKSIZE+8;
+                dctx->litBufSize = ZSTD_BLOCKSIZE_MAX+8;
                 dctx->litSize = litSize;
                 return lhSize+litSize;
             }
@@ -446,8 +465,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
             return lhSize+litSize;
         }
     case IS_RLE:
-        {
-            size_t litSize;
+        {   size_t litSize;
             U32 lhSize = ((istart[0]) >> 4) & 3;
             switch(lhSize)
             {
@@ -460,12 +478,13 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
                 break;
             case 3:
                 litSize = ((istart[0] & 15) << 16) + (istart[1] << 8) + istart[2];
+                if (srcSize<4) return ERROR(corruption_detected);   /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4 */
                 break;
             }
-            if (litSize > BLOCKSIZE) return ERROR(corruption_detected);
+            if (litSize > ZSTD_BLOCKSIZE_MAX) return ERROR(corruption_detected);
             memset(dctx->litBuffer, istart[lhSize], litSize);
             dctx->litPtr = dctx->litBuffer;
-            dctx->litBufSize = BLOCKSIZE+WILDCOPY_OVERLENGTH;
+            dctx->litBufSize = ZSTD_BLOCKSIZE_MAX+WILDCOPY_OVERLENGTH;
             dctx->litSize = litSize;
             return lhSize+1;
         }
@@ -475,121 +494,84 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
 }
 
 
-size_t ZSTD_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLengthPtr,
-                         FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb,
-                         const void* src, size_t srcSize)
+/*! ZSTD_buildSeqTable() :
+    @return : nb bytes read from src,
+              or an error code if it fails, testable with ZSTD_isError()
+*/
+FORCE_INLINE size_t ZSTD_buildSeqTable(FSE_DTable* DTable, U32 type, U32 max, U32 maxLog,
+                                 const void* src, size_t srcSize,
+                                 const S16* defaultNorm, U32 defaultLog, U32 flagRepeatTable)
+{
+    switch(type)
+    {
+    case FSE_ENCODING_RLE :
+        if (!srcSize) return ERROR(srcSize_wrong);
+        if ( (*(const BYTE*)src) > max) return ERROR(corruption_detected);
+        FSE_buildDTable_rle(DTable, *(const BYTE*)src);   /* if *src > max, data is corrupted */
+        return 1;
+    case FSE_ENCODING_RAW :
+        FSE_buildDTable(DTable, defaultNorm, max, defaultLog);
+        return 0;
+    case FSE_ENCODING_STATIC:
+        if (!flagRepeatTable) return ERROR(corruption_detected);
+        return 0;
+    default :   /* impossible */
+    case FSE_ENCODING_DYNAMIC :
+        {   U32 tableLog;
+            S16 norm[MaxSeq+1];
+            size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize);
+            if (FSE_isError(headerSize)) return ERROR(corruption_detected);
+            if (tableLog > maxLog) return ERROR(corruption_detected);
+            FSE_buildDTable(DTable, norm, max, tableLog);
+            return headerSize;
+    }   }
+}
+
+
+size_t ZSTD_decodeSeqHeaders(int* nbSeqPtr,
+                             FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb, U32 flagRepeatTable,
+                             const void* src, size_t srcSize)
 {
     const BYTE* const istart = (const BYTE* const)src;
-    const BYTE* ip = istart;
     const BYTE* const iend = istart + srcSize;
-    U32 LLtype, Offtype, MLtype;
-    U32 LLlog, Offlog, MLlog;
-    size_t dumpsLength;
+    const BYTE* ip = istart;
 
     /* check */
-    if (srcSize < MIN_SEQUENCES_SIZE)
-        return ERROR(srcSize_wrong);
+    if (srcSize < MIN_SEQUENCES_SIZE) return ERROR(srcSize_wrong);
 
     /* SeqHead */
-    *nbSeq = *ip++;
-    if (*nbSeq==0) return 1;
-    if (*nbSeq >= 128)
-        *nbSeq = ((nbSeq[0]-128)<<8) + *ip++;
-
-    LLtype  = *ip >> 6;
-    Offtype = (*ip >> 4) & 3;
-    MLtype  = (*ip >> 2) & 3;
-    if (*ip & 2) {
-        dumpsLength  = ip[2];
-        dumpsLength += ip[1] << 8;
-        ip += 3;
-    } else {
-        dumpsLength  = ip[1];
-        dumpsLength += (ip[0] & 1) << 8;
-        ip += 2;
+    {   int nbSeq = *ip++;
+        if (!nbSeq) { *nbSeqPtr=0; return 1; }
+        if (nbSeq > 0x7F) {
+            if (nbSeq == 0xFF)
+                nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2;
+            else
+                nbSeq = ((nbSeq-0x80)<<8) + *ip++;
+        }
+        *nbSeqPtr = nbSeq;
     }
-    *dumpsPtr = ip;
-    ip += dumpsLength;
-    *dumpsLengthPtr = dumpsLength;
 
-    /* check */
-    if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */
+    /* FSE table descriptors */
+    {   U32 const LLtype  = *ip >> 6;
+        U32 const Offtype = (*ip >> 4) & 3;
+        U32 const MLtype  = (*ip >> 2) & 3;
+        ip++;
 
-    /* sequences */
-    {
-        S16 norm[MaxML+1];    /* assumption : MaxML >= MaxLL >= MaxOff */
-        size_t headerSize;
+        /* check */
+        if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */
 
         /* Build DTables */
-        switch(LLtype)
-        {
-        U32 max;
-        case FSE_ENCODING_RLE :
-            LLlog = 0;
-            FSE_buildDTable_rle(DTableLL, *ip++);
-            break;
-        case FSE_ENCODING_RAW :
-            LLlog = LLbits;
-            FSE_buildDTable_raw(DTableLL, LLbits);
-            break;
-        case FSE_ENCODING_STATIC:
-            break;
-        case FSE_ENCODING_DYNAMIC :
-        default :   /* impossible */
-            max = MaxLL;
-            headerSize = FSE_readNCount(norm, &max, &LLlog, ip, iend-ip);
-            if (FSE_isError(headerSize)) return ERROR(GENERIC);
-            if (LLlog > LLFSELog) return ERROR(corruption_detected);
-            ip += headerSize;
-            FSE_buildDTable(DTableLL, norm, max, LLlog);
+        {   size_t const bhSize = ZSTD_buildSeqTable(DTableLL, LLtype, MaxLL, LLFSELog, ip, iend-ip, LL_defaultNorm, LL_defaultNormLog, flagRepeatTable);
+            if (ZSTD_isError(bhSize)) return ERROR(corruption_detected);
+            ip += bhSize;
         }
-
-        switch(Offtype)
-        {
-        U32 max;
-        case FSE_ENCODING_RLE :
-            Offlog = 0;
-            if (ip > iend-2) return ERROR(srcSize_wrong);   /* min : "raw", hence no header, but at least xxLog bits */
-            FSE_buildDTable_rle(DTableOffb, *ip++ & MaxOff); /* if *ip > MaxOff, data is corrupted */
-            break;
-        case FSE_ENCODING_RAW :
-            Offlog = Offbits;
-            FSE_buildDTable_raw(DTableOffb, Offbits);
-            break;
-        case FSE_ENCODING_STATIC:
-            break;
-        case FSE_ENCODING_DYNAMIC :
-        default :   /* impossible */
-            max = MaxOff;
-            headerSize = FSE_readNCount(norm, &max, &Offlog, ip, iend-ip);
-            if (FSE_isError(headerSize)) return ERROR(GENERIC);
-            if (Offlog > OffFSELog) return ERROR(corruption_detected);
-            ip += headerSize;
-            FSE_buildDTable(DTableOffb, norm, max, Offlog);
+        {   size_t const bhSize = ZSTD_buildSeqTable(DTableOffb, Offtype, MaxOff, OffFSELog, ip, iend-ip, OF_defaultNorm, OF_defaultNormLog, flagRepeatTable);
+            if (ZSTD_isError(bhSize)) return ERROR(corruption_detected);
+            ip += bhSize;
         }
-
-        switch(MLtype)
-        {
-        U32 max;
-        case FSE_ENCODING_RLE :
-            MLlog = 0;
-            if (ip > iend-2) return ERROR(srcSize_wrong); /* min : "raw", hence no header, but at least xxLog bits */
-            FSE_buildDTable_rle(DTableML, *ip++);
-            break;
-        case FSE_ENCODING_RAW :
-            MLlog = MLbits;
-            FSE_buildDTable_raw(DTableML, MLbits);
-            break;
-        case FSE_ENCODING_STATIC:
-            break;
-        case FSE_ENCODING_DYNAMIC :
-        default :   /* impossible */
-            max = MaxML;
-            headerSize = FSE_readNCount(norm, &max, &MLlog, ip, iend-ip);
-            if (FSE_isError(headerSize)) return ERROR(GENERIC);
-            if (MLlog > MLFSELog) return ERROR(corruption_detected);
-            ip += headerSize;
-            FSE_buildDTable(DTableML, norm, max, MLlog);
+        {   size_t const bhSize = ZSTD_buildSeqTable(DTableML, MLtype, MaxML, MLFSELog, ip, iend-ip, ML_defaultNorm, ML_defaultNormLog, flagRepeatTable);
+            if (ZSTD_isError(bhSize)) return ERROR(corruption_detected);
+            ip += bhSize;
     }   }
 
     return ip-istart;
@@ -607,84 +589,84 @@ typedef struct {
     FSE_DState_t stateLL;
     FSE_DState_t stateOffb;
     FSE_DState_t stateML;
-    size_t prevOffset;
-    const BYTE* dumps;
-    const BYTE* dumpsEnd;
+    size_t prevOffset[ZSTD_REP_INIT];
 } seqState_t;
 
 
 
 static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
 {
-    size_t litLength;
-    size_t prevOffset;
-    size_t offset;
-    size_t matchLength;
-    const BYTE* dumps = seqState->dumps;
-    const BYTE* const de = seqState->dumpsEnd;
-
     /* Literal length */
-    litLength = FSE_peakSymbol(&(seqState->stateLL));
-    prevOffset = litLength ? seq->offset : seqState->prevOffset;
-    if (litLength == MaxLL) {
-        U32 add = *dumps++;
-        if (add < 255) litLength += add;
+    U32 const llCode = FSE_peekSymbol(&(seqState->stateLL));
+    U32 const mlCode = FSE_peekSymbol(&(seqState->stateML));
+    U32 const ofCode = FSE_peekSymbol(&(seqState->stateOffb));   /* <= maxOff, by table construction */
+
+    U32 const llBits = LL_bits[llCode];
+    U32 const mlBits = ML_bits[mlCode];
+    U32 const ofBits = ofCode;
+    U32 const totalBits = llBits+mlBits+ofBits;
+
+    static const U32 LL_base[MaxLL+1] = {
+                             0,  1,  2,  3,  4,  5,  6,  7,  8,  9,   10,    11,    12,    13,    14,     15,
+                            16, 18, 20, 22, 24, 28, 32, 40, 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000,
+                            0x2000, 0x4000, 0x8000, 0x10000 };
+
+    static const U32 ML_base[MaxML+1] = {
+                             0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10,   11,    12,    13,    14,    15,
+                            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,   27,    28,    29,    30,    31,
+                            32, 34, 36, 38, 40, 44, 48, 56, 64, 80, 96, 0x80, 0x100, 0x200, 0x400, 0x800,
+                            0x1000, 0x2000, 0x4000, 0x8000, 0x10000 };
+
+    static const U32 OF_base[MaxOff+1] = {
+                 0,        1,       3,       7,     0xF,     0x1F,     0x3F,     0x7F,
+                 0xFF,   0x1FF,   0x3FF,   0x7FF,   0xFFF,   0x1FFF,   0x3FFF,   0x7FFF,
+                 0xFFFF, 0x1FFFF, 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF,
+                 0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF, /*fake*/ 1, 1 };
+
+    /* sequence */
+    {   size_t offset;
+        if (!ofCode)
+            offset = 0;
         else {
-            litLength = MEM_readLE32(dumps) & 0xFFFFFF;  /* no risk : dumps is always followed by seq tables > 1 byte */
-            if (litLength&1) litLength>>=1, dumps += 3;
-            else litLength = (U16)(litLength)>>1, dumps += 2;
+            offset = OF_base[ofCode] + BIT_readBits(&(seqState->DStream), ofBits);   /* <=  26 bits */
+            if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream));
         }
-        if (dumps >= de) dumps = de-1;   /* late correction, to avoid read overflow (data is now corrupted anyway) */
-    }
 
-    /* Offset */
-    {
-        static const U32 offsetPrefix[MaxOff+1] = {
-                1 /*fake*/, 1, 2, 4, 8, 16, 32, 64, 128, 256,
-                512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, 131072, 262144,
-                524288, 1048576, 2097152, 4194304, 8388608, 16777216, 33554432, /*fake*/ 1, 1, 1, 1, 1 };
-        U32 offsetCode = FSE_peakSymbol(&(seqState->stateOffb));   /* <= maxOff, by table construction */
-        U32 nbBits = offsetCode - 1;
-        if (offsetCode==0) nbBits = 0;   /* cmove */
-        offset = offsetPrefix[offsetCode] + BIT_readBits(&(seqState->DStream), nbBits);
-        if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream));
-        if (offsetCode==0) offset = prevOffset;   /* repcode, cmove */
-        if (offsetCode | !litLength) seqState->prevOffset = seq->offset;   /* cmove */
-        FSE_decodeSymbol(&(seqState->stateOffb), &(seqState->DStream));    /* update */
-    }
+        if (offset < ZSTD_REP_NUM) {
+            if (llCode == 0 && offset <= 1) offset = 1-offset;
 
-    /* Literal length update */
-    FSE_decodeSymbol(&(seqState->stateLL), &(seqState->DStream));   /* update */
-    if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream));
+            if (offset != 0) {
+                size_t temp = seqState->prevOffset[offset];
+                if (offset != 1) {
+                    seqState->prevOffset[2] = seqState->prevOffset[1];
+                }
+                seqState->prevOffset[1] = seqState->prevOffset[0];
+                seqState->prevOffset[0] = offset = temp;
 
-    /* MatchLength */
-    matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
-    if (matchLength == MaxML) {
-        U32 add = *dumps++;
-        if (add < 255) matchLength += add;
-        else {
-            matchLength = MEM_readLE32(dumps) & 0xFFFFFF;  /* no pb : dumps is always followed by seq tables > 1 byte */
-            if (matchLength&1) matchLength>>=1, dumps += 3;
-            else matchLength = (U16)(matchLength)>>1, dumps += 2;
+            } else {
+                offset = seqState->prevOffset[0];
+            }
+        } else {
+            offset -= ZSTD_REP_MOVE;
+            seqState->prevOffset[2] = seqState->prevOffset[1];
+            seqState->prevOffset[1] = seqState->prevOffset[0];
+            seqState->prevOffset[0] = offset;
         }
-        if (dumps >= de) dumps = de-1;   /* late correction, to avoid read overflow (data is now corrupted anyway) */
+        seq->offset = offset;
     }
-    matchLength += MINMATCH;
 
-    /* save result */
-    seq->litLength = litLength;
-    seq->offset = offset;
-    seq->matchLength = matchLength;
-    seqState->dumps = dumps;
+    seq->matchLength = ML_base[mlCode] + MINMATCH + ((mlCode>31) ? BIT_readBits(&(seqState->DStream), mlBits) : 0);   /* <=  16 bits */
+    if (MEM_32bits() && (mlBits+llBits>24)) BIT_reloadDStream(&(seqState->DStream));
 
-#if 0   /* debug */
-    {
-        static U64 totalDecoded = 0;
-        printf("pos %6u : %3u literals & match %3u bytes at distance %6u \n",
-           (U32)(totalDecoded), (U32)litLength, (U32)matchLength, (U32)offset);
-        totalDecoded += litLength + matchLength;
-    }
-#endif
+    seq->litLength = LL_base[llCode] + ((llCode>15) ? BIT_readBits(&(seqState->DStream), llBits) : 0);   /* <=  16 bits */
+    if (MEM_32bits() ||
+       (totalBits > 64 - 7 - (LLFSELog+MLFSELog+OffFSELog)) ) BIT_reloadDStream(&(seqState->DStream));
+
+    /* ANS state update */
+    FSE_updateState(&(seqState->stateLL), &(seqState->DStream));   /* <=  9 bits */
+    FSE_updateState(&(seqState->stateML), &(seqState->DStream));   /* <=  9 bits */
+    if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream));     /* <= 18 bits */
+    FSE_updateState(&(seqState->stateOffb), &(seqState->DStream)); /* <=  8 bits */
 }
 
 
@@ -693,38 +675,34 @@ FORCE_INLINE size_t ZSTD_execSequence(BYTE* op,
                                 const BYTE** litPtr, const BYTE* const litLimit_8,
                                 const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd)
 {
-    static const int dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 };   /* added */
-    static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 };   /* substracted */
     BYTE* const oLitEnd = op + sequence.litLength;
-    const size_t sequenceLength = sequence.litLength + sequence.matchLength;
+    size_t const sequenceLength = sequence.litLength + sequence.matchLength;
     BYTE* const oMatchEnd = op + sequenceLength;   /* risk : address space overflow (32-bits) */
     BYTE* const oend_8 = oend-8;
-    const BYTE* const litEnd = *litPtr + sequence.litLength;
+    const BYTE* const iLitEnd = *litPtr + sequence.litLength;
     const BYTE* match = oLitEnd - sequence.offset;
 
     /* check */
     if (oLitEnd > oend_8) return ERROR(dstSize_tooSmall);   /* last match must start at a minimum distance of 8 from oend */
     if (oMatchEnd > oend) return ERROR(dstSize_tooSmall);   /* overwrite beyond dst buffer */
-    if (litEnd > litLimit_8) return ERROR(corruption_detected);   /* risk read beyond lit buffer */
+    if (iLitEnd > litLimit_8) return ERROR(corruption_detected);   /* over-read beyond lit buffer */
 
     /* copy Literals */
     ZSTD_wildcopy(op, *litPtr, sequence.litLength);   /* note : oLitEnd <= oend-8 : no risk of overwrite beyond oend */
     op = oLitEnd;
-    *litPtr = litEnd;   /* update for next sequence */
+    *litPtr = iLitEnd;   /* update for next sequence */
 
     /* copy Match */
     if (sequence.offset > (size_t)(oLitEnd - base)) {
         /* offset beyond prefix */
-        if (sequence.offset > (size_t)(oLitEnd - vBase))
-            return ERROR(corruption_detected);
+        if (sequence.offset > (size_t)(oLitEnd - vBase)) return ERROR(corruption_detected);
         match = dictEnd - (base-match);
         if (match + sequence.matchLength <= dictEnd) {
             memmove(oLitEnd, match, sequence.matchLength);
             return sequenceLength;
         }
         /* span extDict & currentPrefixSegment */
-        {
-            size_t length1 = dictEnd - match;
+        {   size_t const length1 = dictEnd - match;
             memmove(oLitEnd, match, length1);
             op = oLitEnd + length1;
             sequence.matchLength -= length1;
@@ -734,7 +712,9 @@ FORCE_INLINE size_t ZSTD_execSequence(BYTE* op,
     /* match within prefix */
     if (sequence.offset < 8) {
         /* close range match, overlap */
-        const int sub2 = dec64table[sequence.offset];
+        static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 };   /* added */
+        static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 };   /* substracted */
+        int const sub2 = dec64table[sequence.offset];
         op[0] = match[0];
         op[1] = match[1];
         op[2] = match[2];
@@ -747,14 +727,13 @@ FORCE_INLINE size_t ZSTD_execSequence(BYTE* op,
     }
     op += 8; match += 8;
 
-    if (oMatchEnd > oend-12) {
+    if (oMatchEnd > oend-(16-MINMATCH)) {
         if (op < oend_8) {
             ZSTD_wildcopy(op, match, oend_8 - op);
             match += oend_8 - op;
             op = oend_8;
         }
-        while (op < oMatchEnd)
-            *op++ = *match++;
+        while (op < oMatchEnd) *op++ = *match++;
     } else {
         ZSTD_wildcopy(op, match, sequence.matchLength-8);   /* works even if matchLength < 8 */
     }
@@ -770,27 +749,25 @@ static size_t ZSTD_decompressSequences(
     const BYTE* ip = (const BYTE*)seqStart;
     const BYTE* const iend = ip + seqSize;
     BYTE* const ostart = (BYTE* const)dst;
-    BYTE* op = ostart;
     BYTE* const oend = ostart + maxDstSize;
-    size_t errorCode, dumpsLength;
+    BYTE* op = ostart;
     const BYTE* litPtr = dctx->litPtr;
     const BYTE* const litLimit_8 = litPtr + dctx->litBufSize - 8;
     const BYTE* const litEnd = litPtr + dctx->litSize;
-    int nbSeq;
-    const BYTE* dumps;
-    U32* DTableLL = dctx->LLTable;
-    U32* DTableML = dctx->MLTable;
-    U32* DTableOffb = dctx->OffTable;
+    FSE_DTable* DTableLL = dctx->LLTable;
+    FSE_DTable* DTableML = dctx->MLTable;
+    FSE_DTable* DTableOffb = dctx->OffTable;
     const BYTE* const base = (const BYTE*) (dctx->base);
     const BYTE* const vBase = (const BYTE*) (dctx->vBase);
     const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
+    int nbSeq;
 
     /* Build Decoding Tables */
-    errorCode = ZSTD_decodeSeqHeaders(&nbSeq, &dumps, &dumpsLength,
-                                      DTableLL, DTableML, DTableOffb,
-                                      ip, seqSize);
-    if (ZSTD_isError(errorCode)) return errorCode;
-    ip += errorCode;
+    {   size_t const seqHSize = ZSTD_decodeSeqHeaders(&nbSeq, DTableLL, DTableML, DTableOffb, dctx->flagRepeatTable, ip, seqSize);
+        if (ZSTD_isError(seqHSize)) return seqHSize;
+        ip += seqHSize;
+        dctx->flagRepeatTable = 0;
+    }
 
     /* Regen sequences */
     if (nbSeq) {
@@ -799,31 +776,37 @@ static size_t ZSTD_decompressSequences(
 
         memset(&sequence, 0, sizeof(sequence));
         sequence.offset = REPCODE_STARTVALUE;
-        seqState.dumps = dumps;
-        seqState.dumpsEnd = dumps + dumpsLength;
-        seqState.prevOffset = REPCODE_STARTVALUE;
-        errorCode = BIT_initDStream(&(seqState.DStream), ip, iend-ip);
-        if (ERR_isError(errorCode)) return ERROR(corruption_detected);
+        { U32 i; for (i=0; i<ZSTD_REP_INIT; i++) seqState.prevOffset[i] = REPCODE_STARTVALUE; }
+        { size_t const errorCode = BIT_initDStream(&(seqState.DStream), ip, iend-ip);
+          if (ERR_isError(errorCode)) return ERROR(corruption_detected); }
         FSE_initDState(&(seqState.stateLL), &(seqState.DStream), DTableLL);
         FSE_initDState(&(seqState.stateOffb), &(seqState.DStream), DTableOffb);
         FSE_initDState(&(seqState.stateML), &(seqState.DStream), DTableML);
 
         for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq ; ) {
-            size_t oneSeqSize;
             nbSeq--;
             ZSTD_decodeSequence(&sequence, &seqState);
-            oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litLimit_8, base, vBase, dictEnd);
-            if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
-            op += oneSeqSize;
-        }
+
+#if 0  /* debug */
+            static BYTE* start = NULL;
+            if (start==NULL) start = op;
+            size_t pos = (size_t)(op-start);
+            if ((pos >= 5810037) && (pos < 5810400))
+                printf("Dpos %6u :%5u literals & match %3u bytes at distance %6u \n",
+                       pos, (U32)sequence.litLength, (U32)sequence.matchLength, (U32)sequence.offset);
+#endif
+
+            {   size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litLimit_8, base, vBase, dictEnd);
+                if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
+                op += oneSeqSize;
+        }   }
 
         /* check if reached exact end */
         if (nbSeq) return ERROR(corruption_detected);
     }
 
     /* last literal segment */
-    {
-        size_t lastLLSize = litEnd - litPtr;
+    {   size_t const lastLLSize = litEnd - litPtr;
         if (litPtr > litEnd) return ERROR(corruption_detected);   /* too many literals already used */
         if (op+lastLLSize > oend) return ERROR(dstSize_tooSmall);
         memcpy(op, litPtr, lastLLSize);
@@ -850,16 +833,15 @@ static size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
                       const void* src, size_t srcSize)
 {   /* blockType == blockCompressed */
     const BYTE* ip = (const BYTE*)src;
-    size_t litCSize;
 
-    if (srcSize >= BLOCKSIZE) return ERROR(srcSize_wrong);
+    if (srcSize >= ZSTD_BLOCKSIZE_MAX) return ERROR(srcSize_wrong);
 
     /* Decode literals sub-block */
-    litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
-    if (ZSTD_isError(litCSize)) return litCSize;
-    ip += litCSize;
-    srcSize -= litCSize;
-
+    {   size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
+        if (ZSTD_isError(litCSize)) return litCSize;
+        ip += litCSize;
+        srcSize -= litCSize;
+    }
     return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize);
 }
 
@@ -873,44 +855,41 @@ size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
 }
 
 
-/*! ZSTD_decompress_continueDCtx
-*   dctx must have been properly initialized */
-static size_t ZSTD_decompress_continueDCtx(ZSTD_DCtx* dctx,
-                                 void* dst, size_t maxDstSize,
+/*! ZSTD_decompress_continueDCtx() :
+*   `dctx` must have been properly initialized */
+static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
+                                 void* dst, size_t dstCapacity,
                                  const void* src, size_t srcSize)
 {
     const BYTE* ip = (const BYTE*)src;
     const BYTE* iend = ip + srcSize;
     BYTE* const ostart = (BYTE* const)dst;
     BYTE* op = ostart;
-    BYTE* const oend = ostart + maxDstSize;
+    BYTE* const oend = ostart + dstCapacity;
     size_t remainingSize = srcSize;
     blockProperties_t blockProperties;
 
-    /* Frame Header */
-    {
-        size_t frameHeaderSize;
-        if (srcSize < ZSTD_frameHeaderSize_min+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
+    /* check */
+    if (srcSize < ZSTD_frameHeaderSize_min+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1)
-        {
-            const U32 magicNumber = MEM_readLE32(src);
-            if (ZSTD_isLegacy(magicNumber))
-                return ZSTD_decompressLegacy(dst, maxDstSize, src, srcSize, magicNumber);
-        }
+    {   const U32 magicNumber = MEM_readLE32(src);
+        if (ZSTD_isLegacy(magicNumber))
+            return ZSTD_decompressLegacy(dst, dstCapacity, src, srcSize, magicNumber);
+    }
 #endif
-        frameHeaderSize = ZSTD_decodeFrameHeader_Part1(dctx, src, ZSTD_frameHeaderSize_min);
+
+    /* Frame Header */
+    {   size_t const frameHeaderSize = ZSTD_frameHeaderSize(src, ZSTD_frameHeaderSize_min);
         if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize;
         if (srcSize < frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
+        if (ZSTD_decodeFrameHeader(dctx, src, frameHeaderSize)) return ERROR(corruption_detected);
         ip += frameHeaderSize; remainingSize -= frameHeaderSize;
-        frameHeaderSize = ZSTD_decodeFrameHeader_Part2(dctx, src, frameHeaderSize);
-        if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize;
     }
 
     /* Loop on each block */
-    while (1)
-    {
+    while (1) {
         size_t decodedSize=0;
-        size_t cBlockSize = ZSTD_getcBlockSize(ip, iend-ip, &blockProperties);
+        size_t const cBlockSize = ZSTD_getcBlockSize(ip, iend-ip, &blockProperties);
         if (ZSTD_isError(cBlockSize)) return cBlockSize;
 
         ip += ZSTD_blockHeaderSize;
@@ -948,48 +927,49 @@ static size_t ZSTD_decompress_continueDCtx(ZSTD_DCtx* dctx,
 
 
 size_t ZSTD_decompress_usingPreparedDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* refDCtx,
-                                         void* dst, size_t maxDstSize,
+                                         void* dst, size_t dstCapacity,
                                    const void* src, size_t srcSize)
 {
     ZSTD_copyDCtx(dctx, refDCtx);
     ZSTD_checkContinuity(dctx, dst);
-    return ZSTD_decompress_continueDCtx(dctx, dst, maxDstSize, src, srcSize);
+    return ZSTD_decompressFrame(dctx, dst, dstCapacity, src, srcSize);
 }
 
 
 size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx,
-                                 void* dst, size_t maxDstSize,
+                                 void* dst, size_t dstCapacity,
                                  const void* src, size_t srcSize,
                                  const void* dict, size_t dictSize)
 {
     ZSTD_decompressBegin_usingDict(dctx, dict, dictSize);
     ZSTD_checkContinuity(dctx, dst);
-    return ZSTD_decompress_continueDCtx(dctx, dst, maxDstSize, src, srcSize);
+    return ZSTD_decompressFrame(dctx, dst, dstCapacity, src, srcSize);
 }
 
 
-size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
 {
-    return ZSTD_decompress_usingDict(dctx, dst, maxDstSize, src, srcSize, NULL, 0);
+    return ZSTD_decompress_usingDict(dctx, dst, dstCapacity, src, srcSize, NULL, 0);
 }
 
-size_t ZSTD_decompress(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+
+size_t ZSTD_decompress(void* dst, size_t dstCapacity, const void* src, size_t srcSize)
 {
 #if defined(ZSTD_HEAPMODE) && (ZSTD_HEAPMODE==1)
     size_t regenSize;
     ZSTD_DCtx* dctx = ZSTD_createDCtx();
     if (dctx==NULL) return ERROR(memory_allocation);
-    regenSize = ZSTD_decompressDCtx(dctx, dst, maxDstSize, src, srcSize);
+    regenSize = ZSTD_decompressDCtx(dctx, dst, dstCapacity, src, srcSize);
     ZSTD_freeDCtx(dctx);
     return regenSize;
 #else
     ZSTD_DCtx dctx;
-    return ZSTD_decompressDCtx(&dctx, dst, maxDstSize, src, srcSize);
+    return ZSTD_decompressDCtx(&dctx, dst, dstCapacity, src, srcSize);
 #endif
 }
 
 
-/* ******************************
+/*_******************************
 *  Streaming Decompression API
 ********************************/
 size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx)
@@ -1008,9 +988,8 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, co
     {
     case ZSTDds_getFrameHeaderSize :
         {
-            /* get frame header size */
             if (srcSize != ZSTD_frameHeaderSize_min) return ERROR(srcSize_wrong);   /* impossible */
-            dctx->headerSize = ZSTD_decodeFrameHeader_Part1(dctx, src, ZSTD_frameHeaderSize_min);
+            dctx->headerSize = ZSTD_frameHeaderSize(src, ZSTD_frameHeaderSize_min);
             if (ZSTD_isError(dctx->headerSize)) return dctx->headerSize;
             memcpy(dctx->headerBuffer, src, ZSTD_frameHeaderSize_min);
             if (dctx->headerSize > ZSTD_frameHeaderSize_min) {
@@ -1022,10 +1001,9 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, co
         }
     case ZSTDds_decodeFrameHeader:
         {
-            /* get frame header */
             size_t result;
             memcpy(dctx->headerBuffer + ZSTD_frameHeaderSize_min, src, dctx->expected);
-            result = ZSTD_decodeFrameHeader_Part2(dctx, dctx->headerBuffer, dctx->headerSize);
+            result = ZSTD_decodeFrameHeader(dctx, dctx->headerBuffer, dctx->headerSize);
             if (ZSTD_isError(result)) return result;
             dctx->expected = ZSTD_blockHeaderSize;
             dctx->stage = ZSTDds_decodeBlockHeader;
@@ -1033,16 +1011,14 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, co
         }
     case ZSTDds_decodeBlockHeader:
         {
-            /* Decode block header */
             blockProperties_t bp;
-            size_t blockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp);
-            if (ZSTD_isError(blockSize)) return blockSize;
+            size_t const cBlockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp);
+            if (ZSTD_isError(cBlockSize)) return cBlockSize;
             if (bp.blockType == bt_end) {
                 dctx->expected = 0;
                 dctx->stage = ZSTDds_getFrameHeaderSize;
-            }
-            else {
-                dctx->expected = blockSize;
+            } else {
+                dctx->expected = cBlockSize;
                 dctx->bType = bp.blockType;
                 dctx->stage = ZSTDds_decompressBlock;
             }
@@ -1122,14 +1098,14 @@ static size_t ZSTD_loadEntropy(ZSTD_DCtx* dctx, const void* dict, size_t dictSiz
     errorCode = FSE_buildDTable(dctx->LLTable, litlengthNCount, litlengthMaxValue, litlengthLog);
     if (FSE_isError(errorCode)) return ERROR(dictionary_corrupted);
 
-    dctx->flagStaticTables = 1;
+    dctx->flagRepeatTable = 1;
     return hSize + offcodeHeaderSize + matchlengthHeaderSize + litlengthHeaderSize;
 }
 
 static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
 {
     size_t eSize;
-    U32 magic = MEM_readLE32(dict);
+    U32 const magic = MEM_readLE32(dict);
     if (magic != ZSTD_DICT_MAGIC) {
         /* pure content mode */
         ZSTD_refDictContent(dctx, dict, dictSize);
@@ -1152,12 +1128,11 @@ static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx* dctx, const void* dict
 
 size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
 {
-    size_t errorCode;
-    errorCode = ZSTD_decompressBegin(dctx);
-    if (ZSTD_isError(errorCode)) return errorCode;
+    { size_t const errorCode = ZSTD_decompressBegin(dctx);
+      if (ZSTD_isError(errorCode)) return errorCode; }
 
     if (dict && dictSize) {
-        errorCode = ZSTD_decompress_insertDictionary(dctx, dict, dictSize);
+        size_t const errorCode = ZSTD_decompress_insertDictionary(dctx, dict, dictSize);
         if (ZSTD_isError(errorCode)) return ERROR(dictionary_corrupted);
     }
 
diff --git a/lib/zstd_internal.h b/lib/zstd_internal.h
index 26fc8578..9b1874f3 100644
--- a/lib/zstd_internal.h
+++ b/lib/zstd_internal.h
@@ -27,7 +27,7 @@
     OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
     You can contact the author at :
-    - zstd source repository : https://github.com/Cyan4973/zstd
+    - zstd homepage : https://www.zstd.net
 */
 #ifndef ZSTD_CCOMMON_H_MODULE
 #define ZSTD_CCOMMON_H_MODULE
@@ -50,18 +50,29 @@
 /*-*************************************
 *  Common constants
 ***************************************/
-#define ZSTD_DICT_MAGIC  0xEC30A435
+#define ZSTD_OPT_DEBUG 0     // 3 = compression stats;  5 = check encoded sequences;  9 = full logs
+#include <stdio.h>
+#if defined(ZSTD_OPT_DEBUG) && ZSTD_OPT_DEBUG>=9
+    #define ZSTD_LOG_PARSER(...) printf(__VA_ARGS__)
+    #define ZSTD_LOG_ENCODE(...) printf(__VA_ARGS__)
+    #define ZSTD_LOG_BLOCK(...) printf(__VA_ARGS__)
+#else
+    #define ZSTD_LOG_PARSER(...)
+    #define ZSTD_LOG_ENCODE(...)
+    #define ZSTD_LOG_BLOCK(...)
+#endif
+
+#define ZSTD_OPT_NUM    (1<<12)
+#define ZSTD_DICT_MAGIC  0xEC30A436
+
+#define ZSTD_REP_NUM    3
+#define ZSTD_REP_INIT   ZSTD_REP_NUM
+#define ZSTD_REP_MOVE   (ZSTD_REP_NUM-1)
 
 #define KB *(1 <<10)
 #define MB *(1 <<20)
 #define GB *(1U<<30)
 
-#define BLOCKSIZE (128 KB)                 /* define, for static allocation */
-
-static const size_t ZSTD_blockHeaderSize = 3;
-static const size_t ZSTD_frameHeaderSize_min = 5;
-#define ZSTD_frameHeaderSize_max 5         /* define, for static allocation */
-
 #define BIT7 128
 #define BIT6  64
 #define BIT5  32
@@ -69,52 +80,76 @@ static const size_t ZSTD_frameHeaderSize_min = 5;
 #define BIT1   2
 #define BIT0   1
 
+#define ZSTD_WINDOWLOG_ABSOLUTEMIN 12
+static const size_t ZSTD_fcs_fieldSize[4] = { 0, 1, 2, 8 };
+
+#define ZSTD_BLOCKHEADERSIZE 3   /* because C standard does not allow a static const value to be defined using another static const value .... :( */
+static const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE;
+typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t;
+
+#define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */
+#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */)   /* for a non-null block */
+
+#define HufLog 12
+
 #define IS_HUF 0
 #define IS_PCH 1
 #define IS_RAW 2
 #define IS_RLE 3
 
-#define MINMATCH 4
+#define LONGNBSEQ 0x7F00
+
+#define MINMATCH 3
+#define EQUAL_READ32 4
 #define REPCODE_STARTVALUE 1
 
 #define Litbits  8
-#define MLbits   7
-#define LLbits   6
-#define Offbits  5
 #define MaxLit ((1<<Litbits) - 1)
-#define MaxML  ((1<<MLbits) - 1)
-#define MaxLL  ((1<<LLbits) - 1)
-#define MaxOff ((1<<Offbits)- 1)
-#define MLFSELog   10
-#define LLFSELog   10
-#define OffFSELog   9
-#define MaxSeq MAX(MaxLL, MaxML)
+#define MaxML  52
+#define MaxLL  35
+#define MaxOff 28
+#define MaxSeq MAX(MaxLL, MaxML)   /* Assumption : MaxOff < MaxLL,MaxML */
+#define MLFSELog    9
+#define LLFSELog    9
+#define OffFSELog   8
 
 #define FSE_ENCODING_RAW     0
 #define FSE_ENCODING_RLE     1
 #define FSE_ENCODING_STATIC  2
 #define FSE_ENCODING_DYNAMIC 3
 
+static const U32 LL_bits[MaxLL+1] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                                      1, 1, 1, 1, 2, 2, 3, 3, 4, 6, 7, 8, 9,10,11,12,
+                                     13,14,15,16 };
+static const S16 LL_defaultNorm[MaxLL+1] = { 4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1,
+                                             2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 1, 1, 1, 1, 1,
+                                            -1,-1,-1,-1 };
+static const U32 LL_defaultNormLog = 6;
 
-#define HufLog 12
+static const U32 ML_bits[MaxML+1] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                                      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                                      1, 1, 1, 1, 2, 2, 3, 3, 4, 4, 5, 7, 8, 9,10,11,
+                                     12,13,14,15,16 };
+static const S16 ML_defaultNorm[MaxML+1] = { 1, 4, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1,
+                                             1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+                                             1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,
+                                            -1,-1,-1,-1,-1 };
+static const U32 ML_defaultNormLog = 6;
 
-#define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */
-#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */)   /* for a non-null block */
-
-#define WILDCOPY_OVERLENGTH 8
-
-typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t;
+static const S16 OF_defaultNorm[MaxOff+1] = { 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1,
+                                              1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1 };
+static const U32 OF_defaultNormLog = 5;
 
 
 /*-*******************************************
 *  Shared functions to include for inlining
 *********************************************/
 static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
-
 #define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
 
 /*! ZSTD_wildcopy() :
 *   custom version of memcpy(), can copy up to 7 bytes too many (8 bytes if length==0) */
+#define WILDCOPY_OVERLENGTH 8
 MEM_STATIC void ZSTD_wildcopy(void* dst, const void* src, size_t length)
 {
     const BYTE* ip = (const BYTE*)src;
@@ -151,32 +186,68 @@ MEM_STATIC unsigned ZSTD_highbit(U32 val)
 /*-*******************************************
 *  Private interfaces
 *********************************************/
+typedef struct {
+    U32 off;
+    U32 len;
+} ZSTD_match_t;
+
+typedef struct {
+    U32 price;
+    U32 off;
+    U32 mlen;
+    U32 litlen;
+    U32 rep[ZSTD_REP_INIT];
+} ZSTD_optimal_t;
+
+#if ZSTD_OPT_DEBUG == 3
+    #include ".debug/zstd_stats.h"
+#else
+    typedef struct { U32  unused; } ZSTD_stats_t;
+    MEM_STATIC void ZSTD_statsPrint(ZSTD_stats_t* stats, U32 searchLength) { (void)stats; (void)searchLength; };
+    MEM_STATIC void ZSTD_statsInit(ZSTD_stats_t* stats) { (void)stats; };
+    MEM_STATIC void ZSTD_statsResetFreqs(ZSTD_stats_t* stats) { (void)stats; };
+    MEM_STATIC void ZSTD_statsUpdatePrices(ZSTD_stats_t* stats, size_t litLength, const BYTE* literals, size_t offset, size_t matchLength) { (void)stats; (void)litLength; (void)literals; (void)offset; (void)matchLength; };
+#endif
+
 typedef struct {
     void* buffer;
     U32*  offsetStart;
     U32*  offset;
     BYTE* offCodeStart;
-    BYTE* offCode;
     BYTE* litStart;
     BYTE* lit;
-    BYTE* litLengthStart;
-    BYTE* litLength;
-    BYTE* matchLengthStart;
-    BYTE* matchLength;
-    BYTE* dumpsStart;
-    BYTE* dumps;
+    U16*  litLengthStart;
+    U16*  litLength;
+    BYTE* llCodeStart;
+    U16*  matchLengthStart;
+    U16*  matchLength;
+    BYTE* mlCodeStart;
+    U32   longLengthID;   /* 0 == no longLength; 1 == Lit.longLength; 2 == Match.longLength; */
+    U32   longLengthPos;
     /* opt */
+    ZSTD_optimal_t* priceTable;
+    ZSTD_match_t* matchTable;
     U32* matchLengthFreq;
     U32* litLengthFreq;
     U32* litFreq;
     U32* offCodeFreq;
     U32  matchLengthSum;
+    U32  matchSum;
     U32  litLengthSum;
     U32  litSum;
     U32  offCodeSum;
+    U32  log2matchLengthSum;
+    U32  log2matchSum;
+    U32  log2litLengthSum;
+    U32  log2litSum;
+    U32  log2offCodeSum;
+    U32  factor;
+    ZSTD_stats_t stats;
 } seqStore_t;
 
-seqStore_t ZSTD_copySeqStore(const ZSTD_CCtx* ctx);
+const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx);
+void ZSTD_seqToCodes(const seqStore_t* seqStorePtr, size_t const nbSeq);
+size_t ZSTD_compressBegin_targetSrcSize(ZSTD_CCtx* zc, const void* dict, size_t dictSize, size_t targetSrcSize, int compressionLevel);
 
 
 #endif   /* ZSTD_CCOMMON_H_MODULE */
diff --git a/lib/zstd_opt.h b/lib/zstd_opt.h
index ec9a2a15..5f4d4a62 100644
--- a/lib/zstd_opt.h
+++ b/lib/zstd_opt.h
@@ -33,86 +33,96 @@
 
 /* Note : this file is intended to be included within zstd_compress.c */
 
-/*-  Dependencies  -*/
-#include <stdio.h>  /* for debug */
 
+#define ZSTD_FREQ_DIV   5
 
-/*-  Local types  -*/
-typedef struct {
-    U32 off;
-    U32 len;
-    U32 back;
-} ZSTD_match_t;
-
-typedef struct {
-    U32 price;
-    U32 off;
-    U32 mlen;
-    U32 litlen;
-    U32 rep;
-    U32 rep2;
-} ZSTD_optimal_t;
-
-
-/*-  Constants  -*/
-#define ZSTD_OPT_NUM   (1<<12)
-#define ZSTD_FREQ_THRESHOLD (256)
-
-/*-  Debug  -*/
-#define ZSTD_OPT_DEBUG 0     // 1 = tableID=0;    5 = check encoded sequences
-
-#if defined(ZSTD_OPT_DEBUG) && ZSTD_OPT_DEBUG>=1
-    #define ZSTD_LOG_PARSER(...) printf(__VA_ARGS__)
-    #define ZSTD_LOG_ENCODE(...) printf(__VA_ARGS__)
-    #define ZSTD_LOG_TRY_PRICE(...) printf(__VA_ARGS__)
-#else
-    #define ZSTD_LOG_PARSER(...)
-    #define ZSTD_LOG_ENCODE(...)
-    #define ZSTD_LOG_TRY_PRICE(...)
-#endif
-
-
-FORCE_INLINE U32 ZSTD_getLiteralPriceReal(seqStore_t* seqStorePtr, U32 litLength, const BYTE* literals)
+/*-*************************************
+*  Price functions for optimal parser
+***************************************/
+FORCE_INLINE void ZSTD_setLog2Prices(seqStore_t* ssPtr)
 {
-    U32 price, freq, u;
+    ssPtr->log2matchLengthSum = ZSTD_highbit(ssPtr->matchLengthSum+1);
+    ssPtr->log2litLengthSum = ZSTD_highbit(ssPtr->litLengthSum+1);
+    ssPtr->log2litSum = ZSTD_highbit(ssPtr->litSum+1);
+    ssPtr->log2offCodeSum = ZSTD_highbit(ssPtr->offCodeSum+1);
+    ssPtr->factor = 1 + ((ssPtr->litSum>>5) / ssPtr->litLengthSum) + ((ssPtr->litSum<<1) / (ssPtr->litSum + ssPtr->matchSum));
+}
 
-    if (!litLength) return 1;   /* special case */
 
-    /* literals */
-    price = litLength * ZSTD_highbit(seqStorePtr->litSum);
-    for (u=0; u < litLength; u++)
-        price -= ZSTD_highbit(seqStorePtr->litFreq[literals[u]]);
+MEM_STATIC void ZSTD_rescaleFreqs(seqStore_t* ssPtr)
+{
+    unsigned u;
 
-    /* literal Length */
-    price += ((litLength >= MaxLL)*8) + ((litLength >= 255+MaxLL)*16) + ((litLength>=(1<<15))*8);
-    if (litLength >= MaxLL) litLength = MaxLL;
-    freq = seqStorePtr->litLengthFreq[litLength];
-    price += ZSTD_highbit(seqStorePtr->litLengthSum) - ZSTD_highbit(freq);
+    if (ssPtr->litLengthSum == 0) {
+        ssPtr->litSum = (2<<Litbits);
+        ssPtr->litLengthSum = MaxLL+1;
+        ssPtr->matchLengthSum = MaxML+1;
+        ssPtr->offCodeSum = (MaxOff+1);
+        ssPtr->matchSum = (2<<Litbits);
 
-    return price;
+        for (u=0; u<=MaxLit; u++)
+            ssPtr->litFreq[u] = 2;
+        for (u=0; u<=MaxLL; u++)
+            ssPtr->litLengthFreq[u] = 1;
+        for (u=0; u<=MaxML; u++)
+            ssPtr->matchLengthFreq[u] = 1;
+        for (u=0; u<=MaxOff; u++)
+            ssPtr->offCodeFreq[u] = 1;
+    } else {
+        ssPtr->matchLengthSum = 0;
+        ssPtr->litLengthSum = 0;
+        ssPtr->offCodeSum = 0;
+        ssPtr->matchSum = 0;
+        ssPtr->litSum = 0;
+
+        for (u=0; u<=MaxLit; u++) {
+            ssPtr->litFreq[u] = 1 + (ssPtr->litFreq[u]>>ZSTD_FREQ_DIV);
+            ssPtr->litSum += ssPtr->litFreq[u];
+        }
+        for (u=0; u<=MaxLL; u++) {
+            ssPtr->litLengthFreq[u] = 1 + (ssPtr->litLengthFreq[u]>>ZSTD_FREQ_DIV);
+            ssPtr->litLengthSum += ssPtr->litLengthFreq[u];
+        }
+        for (u=0; u<=MaxML; u++) {
+            ssPtr->matchLengthFreq[u] = 1 + (ssPtr->matchLengthFreq[u]>>ZSTD_FREQ_DIV);
+            ssPtr->matchLengthSum += ssPtr->matchLengthFreq[u];
+            ssPtr->matchSum += ssPtr->matchLengthFreq[u] * (u + 3);
+        }
+        for (u=0; u<=MaxOff; u++) {
+            ssPtr->offCodeFreq[u] = 1 + (ssPtr->offCodeFreq[u]>>ZSTD_FREQ_DIV);
+            ssPtr->offCodeSum += ssPtr->offCodeFreq[u];
+        }
+    }
+
+    ZSTD_setLog2Prices(ssPtr);
 }
 
 
 FORCE_INLINE U32 ZSTD_getLiteralPrice(seqStore_t* seqStorePtr, U32 litLength, const BYTE* literals)
 {
-    if (seqStorePtr->litSum > ZSTD_FREQ_THRESHOLD)
-        return ZSTD_getLiteralPriceReal(seqStorePtr, litLength, literals);
-    /* backup eval */
-    return 1 + (litLength<<3);
-}
+    U32 price, u;
 
+    if (litLength == 0)
+        return seqStorePtr->log2litLengthSum - ZSTD_highbit(seqStorePtr->litLengthFreq[0]+1);
 
-FORCE_INLINE U32 ZSTD_getMatchPriceReal(seqStore_t* seqStorePtr, U32 offset, U32 matchLength)
-{
-    /* offset */
-    BYTE offCode = offset ? (BYTE)ZSTD_highbit(offset) + 1 : 0;
-    U32 price = ZSTD_highbit(seqStorePtr->offCodeSum) - ZSTD_highbit(seqStorePtr->offCodeFreq[offCode]);
-    price += offCode;
+    /* literals */
+    price = litLength * seqStorePtr->log2litSum;
+    for (u=0; u < litLength; u++)
+        price -= ZSTD_highbit(seqStorePtr->litFreq[literals[u]]+1);
 
-    /* match Length */
-    price += ((matchLength >= MaxML)*8) + ((matchLength >= 255+MaxML)*16) + ((matchLength>=(1<<15))*8);
-    if (matchLength >= MaxML) matchLength = MaxML;
-    price += ZSTD_highbit(seqStorePtr->matchLengthSum) - ZSTD_highbit(seqStorePtr->matchLengthFreq[matchLength]);
+    /* literal Length */
+    {   static const BYTE LL_Code[64] = {  0,  1,  2,  3,  4,  5,  6,  7,
+                                           8,  9, 10, 11, 12, 13, 14, 15,
+                                          16, 16, 17, 17, 18, 18, 19, 19,
+                                          20, 20, 20, 20, 21, 21, 21, 21,
+                                          22, 22, 22, 22, 22, 22, 22, 22,
+                                          23, 23, 23, 23, 23, 23, 23, 23,
+                                          24, 24, 24, 24, 24, 24, 24, 24,
+                                          24, 24, 24, 24, 24, 24, 24, 24 };
+        const BYTE LL_deltaCode = 19;
+        const BYTE llCode = (litLength>63) ? (BYTE)ZSTD_highbit(litLength) + LL_deltaCode : LL_Code[litLength];
+        price += LL_bits[llCode] + seqStorePtr->log2litLengthSum - ZSTD_highbit(seqStorePtr->litLengthFreq[llCode]+1);
+    }
 
     return price;
 }
@@ -120,10 +130,25 @@ FORCE_INLINE U32 ZSTD_getMatchPriceReal(seqStore_t* seqStorePtr, U32 offset, U32
 
 FORCE_INLINE U32 ZSTD_getPrice(seqStore_t* seqStorePtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength)
 {
-    if (seqStorePtr->litSum > ZSTD_FREQ_THRESHOLD)
-        return ZSTD_getLiteralPriceReal(seqStorePtr, litLength, literals) + ZSTD_getMatchPriceReal(seqStorePtr, offset, matchLength);
-    /* backup eval */
-    return (litLength<<3) + ZSTD_highbit((U32)matchLength+1) + Offbits + ZSTD_highbit((U32)offset+1);
+    /* offset */
+    BYTE offCode = (BYTE)ZSTD_highbit(offset+1);
+    U32 price = offCode + seqStorePtr->log2offCodeSum - ZSTD_highbit(seqStorePtr->offCodeFreq[offCode]+1);
+
+    /* match Length */
+    {   static const BYTE ML_Code[128] = { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+                                          16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+                                          32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37,
+                                          38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39,
+                                          40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
+                                          41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41,
+                                          42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
+                                          42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 };
+        const BYTE ML_deltaCode = 36;
+        const BYTE mlCode = (matchLength>127) ? (BYTE)ZSTD_highbit(matchLength) + ML_deltaCode : ML_Code[matchLength];
+        price += ML_bits[mlCode] + seqStorePtr->log2matchLengthSum - ZSTD_highbit(seqStorePtr->matchLengthFreq[mlCode]+1);
+    }
+
+    return price + ZSTD_getLiteralPrice(seqStorePtr, litLength, literals) + seqStorePtr->factor;
 }
 
 
@@ -137,24 +162,41 @@ MEM_STATIC void ZSTD_updatePrice(seqStore_t* seqStorePtr, U32 litLength, const B
         seqStorePtr->litFreq[literals[u]]++;
 
     /* literal Length */
-    seqStorePtr->litLengthSum++;
-    if (litLength >= MaxLL)
-        seqStorePtr->litLengthFreq[MaxLL]++;
-    else
-        seqStorePtr->litLengthFreq[litLength]++;
+    {   static const BYTE LL_Code[64] = {  0,  1,  2,  3,  4,  5,  6,  7,
+                                           8,  9, 10, 11, 12, 13, 14, 15,
+                                          16, 16, 17, 17, 18, 18, 19, 19,
+                                          20, 20, 20, 20, 21, 21, 21, 21,
+                                          22, 22, 22, 22, 22, 22, 22, 22,
+                                          23, 23, 23, 23, 23, 23, 23, 23,
+                                          24, 24, 24, 24, 24, 24, 24, 24,
+                                          24, 24, 24, 24, 24, 24, 24, 24 };
+        const BYTE LL_deltaCode = 19;
+        const BYTE llCode = (litLength>63) ? (BYTE)ZSTD_highbit(litLength) + LL_deltaCode : LL_Code[litLength];
+        seqStorePtr->litLengthFreq[llCode]++;
+        seqStorePtr->litLengthSum++;
+    }
 
     /* match offset */
     seqStorePtr->offCodeSum++;
-    BYTE offCode = (BYTE)ZSTD_highbit(offset) + 1;
-    if (offset==0) offCode=0;
+    BYTE offCode = (BYTE)ZSTD_highbit(offset+1);
     seqStorePtr->offCodeFreq[offCode]++;
 
     /* match Length */
-    seqStorePtr->matchLengthSum++;
-    if (matchLength >= MaxML)
-        seqStorePtr->matchLengthFreq[MaxML]++;
-    else
-        seqStorePtr->matchLengthFreq[matchLength]++;
+    {   static const BYTE ML_Code[128] = { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+                                          16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+                                          32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37,
+                                          38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39,
+                                          40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
+                                          41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41,
+                                          42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
+                                          42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 };
+        const BYTE ML_deltaCode = 36;
+        const BYTE mlCode = (matchLength>127) ? (BYTE)ZSTD_highbit(matchLength) + ML_deltaCode : ML_Code[matchLength];
+        seqStorePtr->matchLengthFreq[mlCode]++;
+        seqStorePtr->matchLengthSum++;
+    }
+
+    ZSTD_setLog2Prices(seqStorePtr);
 }
 
 
@@ -170,23 +212,45 @@ MEM_STATIC void ZSTD_updatePrice(seqStore_t* seqStorePtr, U32 litLength, const B
 
 
 
+
+/* Update hashTable3 up to ip (excluded)
+   Assumption : always within prefix (ie. not within extDict) */
+FORCE_INLINE
+U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_CCtx* zc, const BYTE* ip)
+{
+    U32* const hashTable3  = zc->hashTable3;
+    U32 const hashLog3  = zc->hashLog3;
+    const BYTE* const base = zc->base;
+    U32 idx = zc->nextToUpdate3;
+    const U32 target = zc->nextToUpdate3 = (U32)(ip - base);
+    const size_t hash3 = ZSTD_hash3Ptr(ip, hashLog3);
+
+    while(idx < target) {
+        hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx;
+        idx++;
+    }
+
+    return hashTable3[hash3];
+}
+
+
 /*-*************************************
 *  Binary Tree search
 ***************************************/
 static U32 ZSTD_insertBtAndGetAllMatches (
                         ZSTD_CCtx* zc,
-                        const BYTE* const ip, const BYTE* const iend,
+                        const BYTE* const ip, const BYTE* const iLimit,
                         U32 nbCompares, const U32 mls,
-                        U32 extDict, ZSTD_match_t* matches, size_t bestLength)
+                        U32 extDict, ZSTD_match_t* matches)
 {
     const BYTE* const base = zc->base;
     const U32 current = (U32)(ip-base);
-    const U32 hashLog = zc->params.hashLog;
+    const U32 hashLog = zc->params.cParams.hashLog;
     const size_t h  = ZSTD_hashPtr(ip, hashLog, mls);
     U32* const hashTable = zc->hashTable;
     U32 matchIndex  = hashTable[h];
-    U32* const bt   = zc->contentTable;
-    const U32 btLog = zc->params.contentLog - 1;
+    U32* const bt   = zc->chainTable;
+    const U32 btLog = zc->params.cParams.chainLog - 1;
     const U32 btMask= (1U << btLog) - 1;
     size_t commonLengthSmaller=0, commonLengthLarger=0;
     const BYTE* const dictBase = zc->dictBase;
@@ -201,7 +265,35 @@ static U32 ZSTD_insertBtAndGetAllMatches (
     U32 dummy32;   /* to be nullified at the end */
     U32 mnum = 0;
 
-    bestLength = MINMATCH-1;
+    const U32 minMatch = (mls == 3) ? 3 : 4;
+    size_t bestLength = minMatch-1;
+
+    if (minMatch == 3) { /* HC3 match finder */
+        U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3 (zc, ip);
+        if (matchIndex3>windowLow && (current - matchIndex3 < (1<<18))) {
+            const BYTE* match;
+            size_t currentMl=0;
+            if ((!extDict) || matchIndex3 >= dictLimit) {
+                match = base + matchIndex3;
+                if (match[bestLength] == ip[bestLength]) currentMl = ZSTD_count(ip, match, iLimit);
+            } else {
+                match = dictBase + matchIndex3;
+                if (MEM_readMINMATCH(match, MINMATCH) == MEM_readMINMATCH(ip, MINMATCH))    /* assumption : matchIndex3 <= dictLimit-4 (by table construction) */
+                    currentMl = ZSTD_count_2segments(ip+MINMATCH, match+MINMATCH, iLimit, dictEnd, prefixStart) + MINMATCH;
+            }
+
+            /* save best solution */
+            if (currentMl > bestLength) {
+                bestLength = currentMl;
+                matches[mnum].off = ZSTD_REP_MOVE + current - matchIndex3;
+                matches[mnum].len = (U32)currentMl;
+                mnum++;
+                if (currentMl > ZSTD_OPT_NUM) goto update;
+                if (ip+currentMl == iLimit) goto update; /* best possible, and avoid read overflow*/
+            }
+        }
+    }
+
     hashTable[h] = current;   /* Update Hash Table */
 
     while (nbCompares-- && (matchIndex > windowLow)) {
@@ -211,11 +303,26 @@ static U32 ZSTD_insertBtAndGetAllMatches (
 
         if ((!extDict) || (matchIndex+matchLength >= dictLimit)) {
             match = base + matchIndex;
-            if (match[matchLength] == ip[matchLength])
-                matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iend) +1;
+            if (match[matchLength] == ip[matchLength]) {
+#if ZSTD_OPT_DEBUG >= 5
+            size_t ml;
+            if (matchIndex < dictLimit)
+                ml = ZSTD_count_2segments(ip, dictBase + matchIndex, iLimit, dictEnd, prefixStart);
+            else
+                ml = ZSTD_count(ip, match, ip+matchLength);
+            if (ml < matchLength)
+                printf("%d: ERROR_NOEXT: offset=%d matchLength=%d matchIndex=%d dictLimit=%d ml=%d\n", current, (int)(current - matchIndex), (int)matchLength, (int)matchIndex, (int)dictLimit, (int)ml), exit(0);
+#endif
+                matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iLimit) +1;
+            }
         } else {
             match = dictBase + matchIndex;
-            matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
+#if ZSTD_OPT_DEBUG >= 5
+            if (memcmp(match, ip, matchLength) != 0)
+                 printf("%d: ERROR_EXT: matchLength=%d ZSTD_count=%d\n", current, (int)matchLength, (int)ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart)), exit(0);
+#endif
+            matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart);
+            ZSTD_LOG_PARSER("%d: ZSTD_INSERTBTANDGETALLMATCHES=%d offset=%d dictBase=%p dictEnd=%p prefixStart=%p ip=%p match=%p\n", (int)current, (int)matchLength, (int)(current - matchIndex), dictBase, dictEnd, prefixStart, ip, match);
             if (matchIndex+matchLength >= dictLimit)
                 match = base + matchIndex;   /* to prepare for next usage of match[matchLength] */
         }
@@ -223,12 +330,11 @@ static U32 ZSTD_insertBtAndGetAllMatches (
         if (matchLength > bestLength) {
             if (matchLength > matchEndIdx - matchIndex) matchEndIdx = matchIndex + (U32)matchLength;
             bestLength = matchLength;
-            matches[mnum].off = current - matchIndex;
+            matches[mnum].off = ZSTD_REP_MOVE + current - matchIndex;
             matches[mnum].len = (U32)matchLength;
-            matches[mnum].back = 0;
             mnum++;
             if (matchLength > ZSTD_OPT_NUM) break;
-            if (ip+matchLength == iend)   /* equal : no way to know if inf or sup */
+            if (ip+matchLength == iLimit)   /* equal : no way to know if inf or sup */
                 break;   /* drop, to guarantee consistency (miss a little bit of compression) */
         }
 
@@ -250,6 +356,7 @@ static U32 ZSTD_insertBtAndGetAllMatches (
 
     *smallerPtr = *largerPtr = 0;
 
+update:
     zc->nextToUpdate = (matchEndIdx > current + 8) ? matchEndIdx - 8 : current+1;
     return mnum;
 }
@@ -259,26 +366,26 @@ static U32 ZSTD_insertBtAndGetAllMatches (
 static U32 ZSTD_BtGetAllMatches (
                         ZSTD_CCtx* zc,
                         const BYTE* const ip, const BYTE* const iLimit,
-                        const U32 maxNbAttempts, const U32 mls, ZSTD_match_t* matches, U32 minml)
+                        const U32 maxNbAttempts, const U32 mls, ZSTD_match_t* matches)
 {
     if (ip < zc->base + zc->nextToUpdate) return 0;   /* skipped area */
     ZSTD_updateTree(zc, ip, iLimit, maxNbAttempts, mls);
-    return ZSTD_insertBtAndGetAllMatches(zc, ip, iLimit, maxNbAttempts, mls, 0, matches, minml);
+    return ZSTD_insertBtAndGetAllMatches(zc, ip, iLimit, maxNbAttempts, mls, 0, matches);
 }
 
 
 static U32 ZSTD_BtGetAllMatches_selectMLS (
                         ZSTD_CCtx* zc,   /* Index table will be updated */
-                        const BYTE* ip, const BYTE* const iLowLimit, const BYTE* const iHighLimit,
-                        const U32 maxNbAttempts, const U32 matchLengthSearch, ZSTD_match_t* matches, U32 minml)
+                        const BYTE* ip, const BYTE* const iHighLimit,
+                        const U32 maxNbAttempts, const U32 matchLengthSearch, ZSTD_match_t* matches)
 {
-    (void)iLowLimit;  /* unused */
     switch(matchLengthSearch)
     {
+    case 3 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 3, matches);
     default :
-    case 4 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 4, matches, minml);
-    case 5 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 5, matches, minml);
-    case 6 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 6, matches, minml);
+    case 4 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 4, matches);
+    case 5 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 5, matches);
+    case 6 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 6, matches);
     }
 }
 
@@ -286,123 +393,26 @@ static U32 ZSTD_BtGetAllMatches_selectMLS (
 static U32 ZSTD_BtGetAllMatches_extDict (
                         ZSTD_CCtx* zc,
                         const BYTE* const ip, const BYTE* const iLimit,
-                        const U32 maxNbAttempts, const U32 mls, ZSTD_match_t* matches, U32 minml)
+                        const U32 maxNbAttempts, const U32 mls, ZSTD_match_t* matches)
 {
     if (ip < zc->base + zc->nextToUpdate) return 0;   /* skipped area */
     ZSTD_updateTree_extDict(zc, ip, iLimit, maxNbAttempts, mls);
-    return ZSTD_insertBtAndGetAllMatches(zc, ip, iLimit, maxNbAttempts, mls, 1, matches, minml);
+    return ZSTD_insertBtAndGetAllMatches(zc, ip, iLimit, maxNbAttempts, mls, 1, matches);
 }
 
 
 static U32 ZSTD_BtGetAllMatches_selectMLS_extDict (
                         ZSTD_CCtx* zc,   /* Index table will be updated */
-                        const BYTE* ip, const BYTE* const iLowLimit, const BYTE* const iHighLimit,
-                        const U32 maxNbAttempts, const U32 matchLengthSearch, ZSTD_match_t* matches, U32 minml)
-{
-    (void)iLowLimit;
-    switch(matchLengthSearch)
-    {
-    default :
-    case 4 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 4, matches, minml);
-    case 5 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 5, matches, minml);
-    case 6 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 6, matches, minml);
-    }
-}
-
-
-/* ***********************
-*  Hash Chain
-*************************/
-FORCE_INLINE /* inlining is important to hardwire a hot branch (template emulation) */
-U32 ZSTD_HcGetAllMatches_generic (
-                        ZSTD_CCtx* zc,   /* Index table will be updated */
-                        const BYTE* const ip, const BYTE* const iLowLimit, const BYTE* const iHighLimit,
-                        const U32 maxNbAttempts, const U32 mls, const U32 extDict, ZSTD_match_t* matches, size_t minml)
-{
-    U32* const chainTable = zc->contentTable;
-    const U32 chainSize = (1U << zc->params.contentLog);
-    const U32 chainMask = chainSize-1;
-    const BYTE* const base = zc->base;
-    const BYTE* const dictBase = zc->dictBase;
-    const U32 dictLimit = zc->dictLimit;
-    const BYTE* const prefixStart = base + dictLimit;
-    const BYTE* const dictEnd = dictBase + dictLimit;
-    const BYTE* const dictStart  = dictBase + zc->lowLimit;
-    const U32 lowLimit = zc->lowLimit;
-    const U32 current = (U32)(ip-base);
-    const U32 minChain = current > chainSize ? current - chainSize : 0;
-    U32 matchIndex;
-    U32 mnum = 0;
-    const BYTE* match;
-    U32 nbAttempts=maxNbAttempts;
-    minml=MINMATCH-1;
-
-    /* HC4 match finder */
-    matchIndex = ZSTD_insertAndFindFirstIndex (zc, ip, mls);
-
-    while ((matchIndex>lowLimit) && (nbAttempts)) {
-        size_t currentMl=0;
-        int back = 0;
-        nbAttempts--;
-        if ((!extDict) || matchIndex >= dictLimit) {
-            match = base + matchIndex;
-            if (match[minml] == ip[minml]) currentMl = ZSTD_count(ip, match, iHighLimit); if (currentMl>0) {   // faster
-            //if (MEM_read32(match) == MEM_read32(ip)) { currentMl = ZSTD_count(ip+MINMATCH, match+MINMATCH, iHighLimit)+MINMATCH;  // stronger
-                while ((match-back > prefixStart) && (ip-back > iLowLimit) && (ip[-back-1] == match[-back-1])) back++;
-                currentMl += back;
-            }
-        } else {
-            match = dictBase + matchIndex;
-            if (MEM_read32(match) == MEM_read32(ip)) {   /* assumption : matchIndex <= dictLimit-4 (by table construction) */
-                currentMl = ZSTD_count_2segments(ip+MINMATCH, match+MINMATCH, iHighLimit, dictEnd, prefixStart) + MINMATCH;
-                while ((match-back > dictStart) && (ip-back > iLowLimit) && (ip[-back-1] == match[-back-1])) back++;   /* backward match extension */
-                currentMl += back;
-        }   }
-
-        /* save best solution */
-        if (currentMl > minml) {
-            minml = currentMl;
-            matches[mnum].off = current - matchIndex;
-            matches[mnum].len = (U32)currentMl;
-            matches[mnum].back = back;
-            mnum++;
-            if (currentMl > ZSTD_OPT_NUM) break;
-            if (ip+currentMl == iHighLimit) break; /* best possible, and avoid read overflow*/
-        }
-
-        if (matchIndex <= minChain) break;
-        matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask);
-    }
-
-    return mnum;
-}
-
-
-static U32 ZSTD_HcGetAllMatches_selectMLS (
-                        ZSTD_CCtx* zc,
-                        const BYTE* ip, const BYTE* const iLowLimit, const BYTE* const iHighLimit,
-                        const U32 maxNbAttempts, const U32 matchLengthSearch, ZSTD_match_t* matches, U32 minml)
+                        const BYTE* ip, const BYTE* const iHighLimit,
+                        const U32 maxNbAttempts, const U32 matchLengthSearch, ZSTD_match_t* matches)
 {
     switch(matchLengthSearch)
     {
+    case 3 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 3, matches);
     default :
-    case 4 : return ZSTD_HcGetAllMatches_generic(zc, ip, iLowLimit, iHighLimit, maxNbAttempts, 4, 0, matches, minml);
-    case 5 : return ZSTD_HcGetAllMatches_generic(zc, ip, iLowLimit, iHighLimit, maxNbAttempts, 5, 0, matches, minml);
-    case 6 : return ZSTD_HcGetAllMatches_generic(zc, ip, iLowLimit, iHighLimit, maxNbAttempts, 6, 0, matches, minml);
-    }
-}
-
-static U32 ZSTD_HcGetAllMatches_selectMLS_extDict (
-                        ZSTD_CCtx* zc,
-                        const BYTE* ip, const BYTE* const iLowLimit, const BYTE* const iHighLimit,
-                        const U32 maxNbAttempts, const U32 matchLengthSearch, ZSTD_match_t* matches, U32 minml)
-{
-    switch(matchLengthSearch)
-    {
-    default :
-    case 4 : return ZSTD_HcGetAllMatches_generic(zc, ip, iLowLimit, iHighLimit, maxNbAttempts, 4, 1, matches, minml);
-    case 5 : return ZSTD_HcGetAllMatches_generic(zc, ip, iLowLimit, iHighLimit, maxNbAttempts, 5, 1, matches, minml);
-    case 6 : return ZSTD_HcGetAllMatches_generic(zc, ip, iLowLimit, iHighLimit, maxNbAttempts, 6, 1, matches, minml);
+    case 4 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 4, matches);
+    case 5 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 5, matches);
+    case 6 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 6, matches);
     }
 }
 
@@ -412,38 +422,38 @@ static U32 ZSTD_HcGetAllMatches_selectMLS_extDict (
 *********************************/
 FORCE_INLINE
 void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
-                                    const void* src, size_t srcSize,
-                                    const U32 searchMethod, const U32 depth)
+                                    const void* src, size_t srcSize)
 {
     seqStore_t* seqStorePtr = &(ctx->seqStore);
     const BYTE* const istart = (const BYTE*)src;
     const BYTE* ip = istart;
     const BYTE* anchor = istart;
+    const BYTE* litstart;
     const BYTE* const iend = istart + srcSize;
     const BYTE* const ilimit = iend - 8;
-    const BYTE* const base = ctx->base + ctx->dictLimit;
+    const BYTE* const base = ctx->base;
+    const BYTE* const prefixStart = base + ctx->dictLimit;
 
-    U32 rep_2=REPCODE_STARTVALUE, rep_1=REPCODE_STARTVALUE;
-    const U32 maxSearches = 1U << ctx->params.searchLog;
-    const U32 mls = ctx->params.searchLength;
+    const U32 maxSearches = 1U << ctx->params.cParams.searchLog;
+    const U32 sufficient_len = ctx->params.cParams.targetLength;
+    const U32 mls = ctx->params.cParams.searchLength;
+    const U32 minMatch = (ctx->params.cParams.searchLength == 3) ? 3 : 4;
 
-    typedef U32 (*getAllMatches_f)(ZSTD_CCtx* zc, const BYTE* ip, const BYTE* iLowLimit, const BYTE* iHighLimit,
-                        U32 maxNbAttempts, U32 matchLengthSearch, ZSTD_match_t* matches, U32 minml);
-    getAllMatches_f getAllMatches = searchMethod ? ZSTD_BtGetAllMatches_selectMLS : ZSTD_HcGetAllMatches_selectMLS;
-
-    ZSTD_optimal_t opt[ZSTD_OPT_NUM+4];
-    ZSTD_match_t matches[ZSTD_OPT_NUM+1];
+    ZSTD_optimal_t* opt = seqStorePtr->priceTable;
+    ZSTD_match_t* matches = seqStorePtr->matchTable;
     const BYTE* inr;
-    U32 skip_num, cur, cur2, match_num, last_pos, litlen, price;
-
-    const U32 sufficient_len = ctx->params.targetLength;
-    const U32 faster_get_matches = (ctx->params.strategy == ZSTD_opt);
-
+    U32 cur, match_num, last_pos, litlen, price;
 
     /* init */
+    U32 rep[ZSTD_REP_INIT];
+    { U32 i; for (i=0; i<ZSTD_REP_INIT; i++) rep[i]=REPCODE_STARTVALUE; }
+
+    ctx->nextToUpdate3 = ctx->nextToUpdate;
     ZSTD_resetSeqStore(seqStorePtr);
-    ZSTD_resetFreqs(seqStorePtr);
-    if ((ip-base) < REPCODE_STARTVALUE) ip = base + REPCODE_STARTVALUE;
+    ZSTD_rescaleFreqs(seqStorePtr);
+    if ((ip-prefixStart) < REPCODE_STARTVALUE) ip = prefixStart + REPCODE_STARTVALUE;
+
+    ZSTD_LOG_BLOCK("%d: COMPBLOCK_OPT_GENERIC srcSz=%d maxSrch=%d mls=%d sufLen=%d\n", (int)(ip-base), (int)srcSize, maxSearches, mls, sufficient_len);
 
     /* Match Loop */
     while (ip < ilimit) {
@@ -454,40 +464,37 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
         memset(opt, 0, sizeof(ZSTD_optimal_t));
         last_pos = 0;
         inr = ip;
-        opt[0].litlen = (U32)(ip - anchor);
+        litstart = ((U32)(ip - anchor) > 128) ? ip - 128 : anchor;
+        opt[0].litlen = (U32)(ip - litstart);
 
         /* check repCode */
-        if (MEM_read32(ip+1) == MEM_read32(ip+1 - rep_1)) {
+        { U32 i; for (i=0; i<ZSTD_REP_NUM; i++)
+        if (MEM_readMINMATCH(ip, minMatch) == MEM_readMINMATCH(ip - rep[i], minMatch)) {
             /* repcode : we take it */
-            mlen = (U32)ZSTD_count(ip+1+MINMATCH, ip+1+MINMATCH-rep_1, iend) + MINMATCH;
+            mlen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-rep[i], iend) + minMatch;
 
-            ZSTD_LOG_PARSER("%d: start try REP rep=%d mlen=%d\n", (int)(ip-base), (int)rep_1, (int)mlen);
-            if (depth==0 || mlen > sufficient_len || mlen >= ZSTD_OPT_NUM) {
-                ip+=1; best_mlen = mlen; best_off = 0; cur = 0; last_pos = 1;
+            ZSTD_LOG_PARSER("%d: start try REP rep[%d]=%d mlen=%d\n", (int)(ip-base), i, (int)rep[i], (int)mlen);
+            if (mlen > sufficient_len || mlen >= ZSTD_OPT_NUM) {
+                best_mlen = mlen; best_off = i; cur = 0; last_pos = 1;
                 goto _storeSequence;
             }
 
-            litlen = opt[0].litlen + 1;
+            best_off = (i<=1 && ip == anchor) ? 1-i : i;
+            litlen = opt[0].litlen;
             do {
-                price = ZSTD_getPrice(seqStorePtr, litlen, anchor, 0, mlen - MINMATCH);
-                if (mlen + 1 > last_pos || price < opt[mlen + 1].price)
-                    SET_PRICE(mlen + 1, mlen, 0, litlen, price);   /* note : macro modifies last_pos */
+                price = ZSTD_getPrice(seqStorePtr, litlen, litstart, best_off, mlen - MINMATCH);
+                if (mlen > last_pos || price < opt[mlen].price)
+                    SET_PRICE(mlen, mlen, i, litlen, price);   /* note : macro modifies last_pos */
                 mlen--;
-            } while (mlen >= MINMATCH);
-        }
+            } while (mlen >= minMatch);
+        } }
 
-        best_mlen = (last_pos) ? last_pos : MINMATCH;
-
-        if (faster_get_matches && last_pos)
-           match_num = 0;
-        else
-           match_num = getAllMatches(ctx, ip, ip, iend, maxSearches, mls, matches, best_mlen); /* first search (depth 0) */
+        match_num = ZSTD_BtGetAllMatches_selectMLS(ctx, ip, iend, maxSearches, mls, matches); /* first search (depth 0) */
 
         ZSTD_LOG_PARSER("%d: match_num=%d last_pos=%d\n", (int)(ip-base), match_num, last_pos);
         if (!last_pos && !match_num) { ip++; continue; }
 
-        opt[0].rep = rep_1;
-        opt[0].rep2 = rep_2;
+        { U32 i ; for (i=0; i<ZSTD_REP_INIT; i++) opt[0].rep[i] = rep[i]; }
         opt[0].mlen = 1;
 
         if (match_num && matches[match_num-1].len > sufficient_len) {
@@ -498,6 +505,8 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
             goto _storeSequence;
         }
 
+       best_mlen = (last_pos) ? last_pos : minMatch;
+
        // set prices using matches at position = 0
        for (u = 0; u < match_num; u++) {
            mlen = (u>0) ? matches[u-1].len+1 : best_mlen;
@@ -505,34 +514,29 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
            ZSTD_LOG_PARSER("%d: start Found mlen=%d off=%d best_mlen=%d last_pos=%d\n", (int)(ip-base), matches[u].len, matches[u].off, (int)best_mlen, (int)last_pos);
            litlen = opt[0].litlen;
            while (mlen <= best_mlen) {
-                price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off, mlen - MINMATCH);
+                price = ZSTD_getPrice(seqStorePtr, litlen, litstart, matches[u].off, mlen - MINMATCH);
                 if (mlen > last_pos || price < opt[mlen].price)
                     SET_PRICE(mlen, mlen, matches[u].off, litlen, price);
                 mlen++;
         }  }
 
-        if (last_pos < MINMATCH) { ip++; continue; }
+        if (last_pos < minMatch) { ip++; continue; }
 
          /* check further positions */
-        for (skip_num = 0, cur = 1; cur <= last_pos; cur++) {
-           size_t cur_rep;
+        for (cur = 1; cur <= last_pos; cur++) {
            inr = ip + cur;
 
            if (opt[cur-1].mlen == 1) {
                 litlen = opt[cur-1].litlen + 1;
                 if (cur > litlen) {
                     price = opt[cur - litlen].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr-litlen);
-                    ZSTD_LOG_TRY_PRICE("%d: TRY1 opt[%d].price=%d price=%d cur=%d litlen=%d\n", (int)(inr-base), cur - litlen, opt[cur - litlen].price, price, cur, litlen);
                 } else
-                    price = ZSTD_getLiteralPrice(seqStorePtr, litlen, anchor);
+                    price = ZSTD_getLiteralPrice(seqStorePtr, litlen, litstart);
            } else {
                 litlen = 1;
                 price = opt[cur - 1].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr-1);
-                ZSTD_LOG_TRY_PRICE("%d: TRY3 price=%d cur=%d litlen=%d litonly=%d\n", (int)(inr-base), price, cur, litlen, (int)ZSTD_getLiteralPrice(seqStorePtr, litlen, inr-1));
            }
 
-           ZSTD_LOG_TRY_PRICE("%d: TRY4 price=%d opt[%d].price=%d\n", (int)(inr-base), price, cur, opt[cur].price);
-
            if (cur > last_pos || price <= opt[cur].price) // || ((price == opt[cur].price) && (opt[cur-1].mlen == 1) && (cur != litlen)))
                 SET_PRICE(cur, 1, 0, litlen, price);
 
@@ -541,134 +545,108 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
            if (inr > ilimit)  /* last match must start at a minimum distance of 8 from oend */
                continue;
 
-            mlen = opt[cur].mlen;
-
-            if (opt[cur-mlen].off) {
-                opt[cur].rep2 = opt[cur-mlen].rep;
-                opt[cur].rep = opt[cur-mlen].off;
-                ZSTD_LOG_PARSER("%d: COPYREP1 cur=%d mlen=%d rep=%d rep2=%d\n", (int)(inr-base), cur, mlen, opt[cur].rep, opt[cur].rep2);
-            } else {
-                if (cur!=mlen && opt[cur-mlen].litlen == 0) {
-                    opt[cur].rep2 = opt[cur-mlen].rep;
-                    opt[cur].rep = opt[cur-mlen].rep2;
-                    ZSTD_LOG_PARSER("%d: COPYREP2 cur=%d mlen=%d rep=%d rep2=%d\n", (int)(inr-base), cur, mlen, opt[cur].rep, opt[cur].rep2);
-                } else {
-                    opt[cur].rep2 = opt[cur-mlen].rep2;
-                    opt[cur].rep = opt[cur-mlen].rep;
-                    ZSTD_LOG_PARSER("%d: COPYREP3 cur=%d mlen=%d rep=%d rep2=%d\n", (int)(inr-base), cur, mlen, opt[cur].rep, opt[cur].rep2);
-            }   }
-
-           ZSTD_LOG_PARSER("%d: CURRENT price[%d/%d]=%d off=%d mlen=%d litlen=%d rep=%d rep2=%d\n", (int)(inr-base), cur, last_pos, opt[cur].price, opt[cur].off, opt[cur].mlen, opt[cur].litlen, opt[cur].rep, opt[cur].rep2);
-
-           best_mlen = 0;
-
-           if (!opt[cur].off && opt[cur].mlen != 1) {
-               cur_rep = opt[cur].rep2;
-               ZSTD_LOG_PARSER("%d: try REP2 rep2=%u mlen=%u\n", (int)(inr-base), (U32)cur_rep, mlen);
+           mlen = opt[cur].mlen;
+           if (opt[cur].off >= ZSTD_REP_NUM) {
+                opt[cur].rep[2] = opt[cur-mlen].rep[1];
+                opt[cur].rep[1] = opt[cur-mlen].rep[0];
+                opt[cur].rep[0] = opt[cur].off - ZSTD_REP_MOVE;
+                ZSTD_LOG_ENCODE("%d: COPYREP_OFF cur=%d mlen=%d rep[0]=%d rep[1]=%d\n", (int)(inr-base), cur, mlen, opt[cur].rep[0], opt[cur].rep[1]);
            } else {
-               cur_rep = opt[cur].rep;
-               ZSTD_LOG_PARSER("%d: try REP1 rep=%u mlen=%u\n", (int)(inr-base), (U32)cur_rep, mlen);
+                opt[cur].rep[2] = (opt[cur].off > 1) ? opt[cur-mlen].rep[1] : opt[cur-mlen].rep[2];
+                opt[cur].rep[1] = (opt[cur].off > 0) ? opt[cur-mlen].rep[0] : opt[cur-mlen].rep[1];
+                opt[cur].rep[0] = opt[cur-mlen].rep[opt[cur].off];
+                ZSTD_LOG_ENCODE("%d: COPYREP_NOR cur=%d mlen=%d rep[0]=%d rep[1]=%d\n", (int)(inr-base), cur, mlen, opt[cur].rep[0], opt[cur].rep[1]);
            }
 
-           if (MEM_read32(inr) == MEM_read32(inr - cur_rep)) {  // check rep
-               mlen = (U32)ZSTD_count(inr+MINMATCH, inr+MINMATCH - cur_rep, iend) + MINMATCH;
-               ZSTD_LOG_PARSER("%d: Found REP mlen=%d off=%d rep=%d opt[%d].off=%d\n", (int)(inr-base), mlen, 0, opt[cur].rep, cur, opt[cur].off);
+           ZSTD_LOG_PARSER("%d: CURRENT_NoExt price[%d/%d]=%d off=%d mlen=%d litlen=%d rep[0]=%d rep[1]=%d\n", (int)(inr-base), cur, last_pos, opt[cur].price, opt[cur].off, opt[cur].mlen, opt[cur].litlen, opt[cur].rep[0], opt[cur].rep[1]);
+           best_mlen = 0;
+
+           { U32 i; for (i=0; i<ZSTD_REP_NUM; i++)
+           if (MEM_readMINMATCH(inr, minMatch) == MEM_readMINMATCH(inr - opt[cur].rep[i], minMatch)) {  /* check rep */
+               mlen = (U32)ZSTD_count(inr+minMatch, inr+minMatch - opt[cur].rep[i], iend) + minMatch;
+               ZSTD_LOG_PARSER("%d: Found REP %d/%d mlen=%d off=%d rep=%d opt[%d].off=%d\n", (int)(inr-base), i, ZSTD_REP_NUM, mlen, i, opt[cur].rep[i], cur, opt[cur].off);
 
                if (mlen > sufficient_len || cur + mlen >= ZSTD_OPT_NUM) {
-                    best_mlen = mlen;
-                    best_off = 0;
                     ZSTD_LOG_PARSER("%d: REP sufficient_len=%d best_mlen=%d best_off=%d last_pos=%d\n", (int)(inr-base), sufficient_len, best_mlen, best_off, last_pos);
-                    last_pos = cur + 1;
+                    best_mlen = mlen; best_off = i; last_pos = cur + 1;
                     goto _storeSequence;
                }
 
+               best_off = (i<=1 && opt[cur].mlen != 1) ? 1-i : i;
                if (opt[cur].mlen == 1) {
                     litlen = opt[cur].litlen;
                     if (cur > litlen) {
-                        price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr-litlen, 0, mlen - MINMATCH);
-                        ZSTD_LOG_TRY_PRICE("%d: TRY5 opt[%d].price=%d price=%d cur=%d litlen=%d\n", (int)(inr-base), cur - litlen, opt[cur - litlen].price, price, cur, litlen);
+                        price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr-litlen, best_off, mlen - MINMATCH);
                     } else
-                        price = ZSTD_getPrice(seqStorePtr, litlen, anchor, 0, mlen - MINMATCH);
+                        price = ZSTD_getPrice(seqStorePtr, litlen, litstart, best_off, mlen - MINMATCH);
                 } else {
                     litlen = 0;
-                    price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, 0, mlen - MINMATCH);
-                    ZSTD_LOG_TRY_PRICE("%d: TRY7 price=%d cur=%d litlen=0 getprice=%d\n", (int)(inr-base), price, cur, (int)ZSTD_getPrice(seqStorePtr, 0, NULL, 0, mlen - MINMATCH));
+                    price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, best_off, mlen - MINMATCH);
                 }
 
                 best_mlen = mlen;
-                if (faster_get_matches) skip_num = best_mlen;
-                ZSTD_LOG_PARSER("%d: Found REP mlen=%d off=%d price=%d litlen=%d price[%d]=%d\n", (int)(inr-base), mlen, 0, price, litlen, cur - litlen, opt[cur - litlen].price);
+                ZSTD_LOG_PARSER("%d: Found REP mlen=%d off=%d price=%d litlen=%d\n", (int)(inr-base), mlen, best_off, price, litlen);
 
                 do {
                     if (cur + mlen > last_pos || price <= opt[cur + mlen].price)
-                        SET_PRICE(cur + mlen, mlen, 0, litlen, price);
+                        SET_PRICE(cur + mlen, mlen, i, litlen, price);
                     mlen--;
-                } while (mlen >= MINMATCH);
-            }
+                } while (mlen >= minMatch);
+            } }
 
-            if (faster_get_matches && skip_num > 0) { skip_num--; continue; }
 
-            best_mlen = (best_mlen > MINMATCH) ? best_mlen : MINMATCH;
-
-            match_num = getAllMatches(ctx, inr, ip, iend, maxSearches, mls, matches, best_mlen);
+            match_num = ZSTD_BtGetAllMatches_selectMLS(ctx, inr, iend, maxSearches, mls, matches);
             ZSTD_LOG_PARSER("%d: ZSTD_GetAllMatches match_num=%d\n", (int)(inr-base), match_num);
 
             if (match_num > 0 && matches[match_num-1].len > sufficient_len) {
-                cur -= matches[match_num-1].back;
                 best_mlen = matches[match_num-1].len;
                 best_off = matches[match_num-1].off;
                 last_pos = cur + 1;
                 goto _storeSequence;
             }
 
+            best_mlen = (best_mlen > minMatch) ? best_mlen : minMatch;
+
             /* set prices using matches at position = cur */
             for (u = 0; u < match_num; u++) {
                 mlen = (u>0) ? matches[u-1].len+1 : best_mlen;
-                cur2 = cur - matches[u].back;
-                best_mlen = (cur2 + matches[u].len < ZSTD_OPT_NUM) ? matches[u].len : ZSTD_OPT_NUM - cur2;
-
-                ZSTD_LOG_PARSER("%d: Found1 cur=%d cur2=%d mlen=%d off=%d best_mlen=%d last_pos=%d\n", (int)(inr-base), cur, cur2, matches[u].len, matches[u].off, best_mlen, last_pos);
-                if (mlen < matches[u].back + 1)
-                    mlen = matches[u].back + 1;
+                best_mlen = (cur + matches[u].len < ZSTD_OPT_NUM) ? matches[u].len : ZSTD_OPT_NUM - cur;
 
+              //  ZSTD_LOG_PARSER("%d: Found1 cur=%d mlen=%d off=%d best_mlen=%d last_pos=%d\n", (int)(inr-base), cur, matches[u].len, matches[u].off, best_mlen, last_pos);
                 while (mlen <= best_mlen) {
-                    if (opt[cur2].mlen == 1) {
-                        litlen = opt[cur2].litlen;
-                        if (cur2 > litlen)
-                            price = opt[cur2 - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, ip+cur2-litlen, matches[u].off, mlen - MINMATCH);
+                    if (opt[cur].mlen == 1) {
+                        litlen = opt[cur].litlen;
+                        if (cur > litlen)
+                            price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, ip+cur-litlen, matches[u].off, mlen - MINMATCH);
                         else
-                            price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off, mlen - MINMATCH);
+                            price = ZSTD_getPrice(seqStorePtr, litlen, litstart, matches[u].off, mlen - MINMATCH);
                     } else {
                         litlen = 0;
-                        price = opt[cur2].price + ZSTD_getPrice(seqStorePtr, 0, NULL, matches[u].off, mlen - MINMATCH);
+                        price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, matches[u].off, mlen - MINMATCH);
                     }
 
-                    ZSTD_LOG_PARSER("%d: Found2 pred=%d mlen=%d best_mlen=%d off=%d price=%d litlen=%d price[%d]=%d\n", (int)(inr-base), matches[u].back, mlen, best_mlen, matches[u].off, price, litlen, cur - litlen, opt[cur - litlen].price);
-                    ZSTD_LOG_TRY_PRICE("%d: TRY8 price=%d opt[%d].price=%d\n", (int)(inr-base), price, cur2 + mlen, opt[cur2 + mlen].price);
-
-                    if (cur2 + mlen > last_pos || (price < opt[cur2 + mlen].price))
-                        SET_PRICE(cur2 + mlen, mlen, matches[u].off, litlen, price);
+                  //  ZSTD_LOG_PARSER("%d: Found2 mlen=%d best_mlen=%d off=%d price=%d litlen=%d\n", (int)(inr-base), mlen, best_mlen, matches[u].off, price, litlen);
+                    if (cur + mlen > last_pos || (price < opt[cur + mlen].price))
+                        SET_PRICE(cur + mlen, mlen, matches[u].off, litlen, price);
 
                     mlen++;
-        }   }   }   //  for (skip_num = 0, cur = 1; cur <= last_pos; cur++)
+        }   }   }   //  for (cur = 1; cur <= last_pos; cur++)
 
         best_mlen = opt[last_pos].mlen;
         best_off = opt[last_pos].off;
         cur = last_pos - best_mlen;
-        // printf("%d: start=%d best_mlen=%d best_off=%d cur=%d\n", (int)(ip - base), (int)(start - ip), (int)best_mlen, (int)best_off, cur);
 
         /* store sequence */
 _storeSequence:   /* cur, last_pos, best_mlen, best_off have to be set */
         for (u = 1; u <= last_pos; u++)
-            ZSTD_LOG_PARSER("%d: price[%d/%d]=%d off=%d mlen=%d litlen=%d rep=%d rep2=%d\n", (int)(ip-base+u), u, last_pos, opt[u].price, opt[u].off, opt[u].mlen, opt[u].litlen, opt[u].rep, opt[u].rep2);
-        ZSTD_LOG_PARSER("%d: cur=%d/%d best_mlen=%d best_off=%d rep=%d\n", (int)(ip-base+cur), (int)cur, (int)last_pos, (int)best_mlen, (int)best_off, opt[cur].rep);
+            ZSTD_LOG_PARSER("%d: price[%d/%d]=%d off=%d mlen=%d litlen=%d rep[0]=%d rep[1]=%d\n", (int)(ip-base+u), u, last_pos, opt[u].price, opt[u].off, opt[u].mlen, opt[u].litlen, opt[u].rep[0], opt[u].rep[1]);
+        ZSTD_LOG_PARSER("%d: cur=%d/%d best_mlen=%d best_off=%d rep[0]=%d\n", (int)(ip-base+cur), (int)cur, (int)last_pos, (int)best_mlen, (int)best_off, opt[cur].rep[0]);
 
         opt[0].mlen = 1;
         U32 offset;
 
         while (1) {
             mlen = opt[cur].mlen;
-            ZSTD_LOG_PARSER("%d: cur=%d mlen=%d\n", (int)(ip-base), cur, mlen);
             offset = opt[cur].off;
             opt[cur].mlen = best_mlen;
             opt[cur].off = best_off;
@@ -679,73 +657,54 @@ _storeSequence:   /* cur, last_pos, best_mlen, best_off have to be set */
         }
 
         for (u = 0; u <= last_pos;) {
-            ZSTD_LOG_PARSER("%d: price2[%d/%d]=%d off=%d mlen=%d litlen=%d rep=%d rep2=%d\n", (int)(ip-base+u), u, last_pos, opt[u].price, opt[u].off, opt[u].mlen, opt[u].litlen, opt[u].rep, opt[u].rep2);
+            ZSTD_LOG_PARSER("%d: price2[%d/%d]=%d off=%d mlen=%d litlen=%d rep[0]=%d rep[1]=%d\n", (int)(ip-base+u), u, last_pos, opt[u].price, opt[u].off, opt[u].mlen, opt[u].litlen, opt[u].rep[0], opt[u].rep[1]);
             u += opt[u].mlen;
         }
 
         for (cur=0; cur < last_pos; ) {
-            ZSTD_LOG_PARSER("%d: price3[%d/%d]=%d off=%d mlen=%d litlen=%d rep=%d rep2=%d\n", (int)(ip-base+cur), cur, last_pos, opt[cur].price, opt[cur].off, opt[cur].mlen, opt[cur].litlen, opt[cur].rep, opt[cur].rep2);
+            ZSTD_LOG_PARSER("%d: price3[%d/%d]=%d off=%d mlen=%d litlen=%d rep[0]=%d rep[1]=%d\n", (int)(ip-base+cur), cur, last_pos, opt[cur].price, opt[cur].off, opt[cur].mlen, opt[cur].litlen, opt[cur].rep[0], opt[cur].rep[1]);
             mlen = opt[cur].mlen;
             if (mlen == 1) { ip++; cur++; continue; }
             offset = opt[cur].off;
             cur += mlen;
 
-            U32 litLength = (U32)(ip - anchor);
-            ZSTD_LOG_ENCODE("%d/%d: ENCODE1 literals=%d mlen=%d off=%d rep1=%d rep2=%d\n", (int)(ip-base), (int)(iend-base), (int)(litLength), (int)mlen, (int)(offset), (int)rep_1, (int)rep_2);
+            U32 const litLength = (U32)(ip - anchor);
+           // ZSTD_LOG_ENCODE("%d/%d: ENCODE literals=%d mlen=%d off=%d rep[0]=%d rep[1]=%d\n", (int)(ip-base), (int)(iend-base), (int)(litLength), (int)mlen, (int)(offset), (int)rep[0], (int)rep[1]);
 
-            if (offset) {
-                rep_2 = rep_1;
-                rep_1 = offset;
+            if (offset >= ZSTD_REP_NUM) {
+                rep[2] = rep[1];
+                rep[1] = rep[0];
+                rep[0] = offset - ZSTD_REP_MOVE;
             } else {
-                if (litLength == 0) {
-                    best_off = rep_2;
-                    rep_2 = rep_1;
-                    rep_1 = best_off;
-            }   }
+                if (offset != 0) {
+                    best_off = rep[offset];
+                    if (offset != 1) rep[2] = rep[1];
+                    rep[1] = rep[0];
+                    rep[0] = best_off;
+                }
+                if (litLength == 0 && offset<=1) offset = 1-offset;
+            }
 
-            ZSTD_LOG_ENCODE("%d/%d: ENCODE2 literals=%d mlen=%d off=%d rep1=%d rep2=%d\n", (int)(ip-base), (int)(iend-base), (int)(litLength), (int)mlen, (int)(offset), (int)rep_1, (int)rep_2);
+            ZSTD_LOG_ENCODE("%d/%d: ENCODE literals=%d mlen=%d off=%d rep[0]=%d rep[1]=%d\n", (int)(ip-base), (int)(iend-base), (int)(litLength), (int)mlen, (int)(offset), (int)rep[0], (int)rep[1]);
 
 #if ZSTD_OPT_DEBUG >= 5
-            int ml2;
-            if (offset)
-                ml2 = ZSTD_count(ip, ip-offset, iend);
+            U32 ml2;
+            if (offset >= ZSTD_REP_NUM)
+                ml2 = (U32)ZSTD_count(ip, ip-(offset-ZSTD_REP_MOVE), iend);
             else
-                ml2 = ZSTD_count(ip, ip-rep_1, iend);
-            if (ml2 < mlen && ml2 < MINMATCH) {
-                printf("%d: ERROR iend=%d mlen=%d offset=%d ml2=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset, (int)ml2); exit(0); }
+                ml2 = (U32)ZSTD_count(ip, ip-rep[0], iend);
+            if ((offset >= 8) && (ml2 < mlen || ml2 < minMatch)) {
+                printf("%d: ERROR_NoExt iend=%d mlen=%d offset=%d ml2=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset, (int)ml2); exit(0); }
             if (ip < anchor) {
-                printf("%d: ERROR ip < anchor iend=%d mlen=%d offset=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset); exit(0); }
-            if (ip - offset < ctx->base) {
-                printf("%d: ERROR ip - offset < base iend=%d mlen=%d offset=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset); exit(0); }
-            if ((int)offset >= (1 << ctx->params.windowLog)) {
-                printf("%d: offset >= (1 << params.windowLog) iend=%d mlen=%d offset=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset); exit(0); }
-            if (mlen < MINMATCH) {
-                printf("%d: ERROR mlen < MINMATCH iend=%d mlen=%d offset=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset); exit(0); }
+                printf("%d: ERROR_NoExt ip < anchor iend=%d mlen=%d offset=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset); exit(0); }
             if (ip + mlen > iend) {
-                printf("%d: ERROR ip + mlen >= iend iend=%d mlen=%d offset=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset); exit(0); }
+                printf("%d: ERROR_NoExt ip + mlen >= iend iend=%d mlen=%d offset=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset); exit(0); }
 #endif
 
             ZSTD_updatePrice(seqStorePtr, litLength, anchor, offset, mlen-MINMATCH);
             ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, mlen-MINMATCH);
             anchor = ip = ip + mlen;
-        }   /* for (cur=0; cur < last_pos; ) */
-
-        /* check immediate repcode */
-        while ( (anchor <= ilimit)
-             && (MEM_read32(anchor) == MEM_read32(anchor - rep_2)) ) {
-            /* store sequence */
-            best_mlen = (U32)ZSTD_count(anchor+MINMATCH, anchor+MINMATCH-rep_2, iend);
-            best_off = rep_2;
-            rep_2 = rep_1;
-            rep_1 = best_off;
-            ZSTD_LOG_ENCODE("%d/%d: ENCODE REP literals=%d mlen=%d off=%d rep1=%d rep2=%d\n", (int)(anchor-base), (int)(iend-base), (int)(0), (int)best_mlen, (int)(0), (int)rep_1, (int)rep_2);
-            ZSTD_updatePrice(seqStorePtr, 0, anchor, 0, best_mlen);
-            ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, best_mlen);
-            anchor += best_mlen+MINMATCH;
-            continue;   /* faster when present ... (?) */
-        }
-        if (anchor > ip) ip = anchor;
-    }
+    }    }   /* for (cur=0; cur < last_pos; ) */
 
     {   /* Last Literals */
         size_t lastLLSize = iend - anchor;
@@ -758,13 +717,13 @@ _storeSequence:   /* cur, last_pos, best_mlen, best_off have to be set */
 
 FORCE_INLINE
 void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
-                                     const void* src, size_t srcSize,
-                                     const U32 searchMethod, const U32 depth)
+                                     const void* src, size_t srcSize)
 {
     seqStore_t* seqStorePtr = &(ctx->seqStore);
     const BYTE* const istart = (const BYTE*)src;
     const BYTE* ip = istart;
     const BYTE* anchor = istart;
+    const BYTE* litstart;
     const BYTE* const iend = istart + srcSize;
     const BYTE* const ilimit = iend - 8;
     const BYTE* const base = ctx->base;
@@ -773,84 +732,84 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
     const BYTE* const dictBase = ctx->dictBase;
     const BYTE* const dictEnd  = dictBase + dictLimit;
 
-    U32 rep_2=REPCODE_STARTVALUE, rep_1=REPCODE_STARTVALUE;
-    const U32 maxSearches = 1U << ctx->params.searchLog;
-    const U32 mls = ctx->params.searchLength;
+    const U32 maxSearches = 1U << ctx->params.cParams.searchLog;
+    const U32 sufficient_len = ctx->params.cParams.targetLength;
+    const U32 mls = ctx->params.cParams.searchLength;
+    const U32 minMatch = (ctx->params.cParams.searchLength == 3) ? 3 : 4;
 
-    typedef U32 (*getAllMatches_f)(ZSTD_CCtx* zc, const BYTE* ip, const BYTE* iLowLimit, const BYTE* iHighLimit,
-                        U32 maxNbAttempts, U32 matchLengthSearch, ZSTD_match_t* matches, U32 minml);
-    getAllMatches_f getAllMatches = searchMethod ? ZSTD_BtGetAllMatches_selectMLS_extDict : ZSTD_HcGetAllMatches_selectMLS_extDict;
-
-    ZSTD_optimal_t opt[ZSTD_OPT_NUM+4];
-    ZSTD_match_t matches[ZSTD_OPT_NUM+1];
+    ZSTD_optimal_t* opt = seqStorePtr->priceTable;
+    ZSTD_match_t* matches = seqStorePtr->matchTable;
     const BYTE* inr;
-    U32 skip_num, cur, cur2, match_num, last_pos, litlen, price;
-
-    const U32 sufficient_len = ctx->params.targetLength;
-    const U32 faster_get_matches = (ctx->params.strategy == ZSTD_opt);
+    U32 cur, match_num, last_pos, litlen, price;
 
     /* init */
+    U32 rep[ZSTD_REP_INIT];
+    { U32 i; for (i=0; i<ZSTD_REP_INIT; i++) rep[i]=REPCODE_STARTVALUE; }
+
+    ctx->nextToUpdate3 = ctx->nextToUpdate;
     ZSTD_resetSeqStore(seqStorePtr);
-    ZSTD_resetFreqs(seqStorePtr);
+    ZSTD_rescaleFreqs(seqStorePtr);
     if ((ip - prefixStart) < REPCODE_STARTVALUE) ip += REPCODE_STARTVALUE;
 
+    ZSTD_LOG_BLOCK("%d: COMPBLOCK_OPT_EXTDICT srcSz=%d maxSrch=%d mls=%d sufLen=%d\n", (int)(ip-base), (int)srcSize, maxSearches, mls, sufficient_len);
+
     /* Match Loop */
     while (ip < ilimit) {
-        U32 u, offset, best_off=0;
-        U32 mlen=0, best_mlen=0;
+        U32 u;
+        U32 mlen=0;
+        U32 best_mlen=0;
+        U32 best_off=0;
         U32 current = (U32)(ip-base);
         memset(opt, 0, sizeof(ZSTD_optimal_t));
         last_pos = 0;
         inr = ip;
-        opt[0].litlen = (U32)(ip - anchor);
+        litstart = ((U32)(ip - anchor) > 128) ? ip - 128 : anchor;
+        opt[0].litlen = (U32)(ip - litstart);
 
         /* check repCode */
-        {
-            const U32 repIndex = (U32)(current+1 - rep_1);
+        { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) {
+            const U32 repIndex = (U32)(current - rep[i]);
             const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
             const BYTE* const repMatch = repBase + repIndex;
             if ( ((U32)((dictLimit-1) - repIndex) >= 3)   /* intentional overflow */
-               && (MEM_read32(ip+1) == MEM_read32(repMatch)) ) {
+               && (MEM_readMINMATCH(ip, minMatch) == MEM_readMINMATCH(repMatch, minMatch)) ) {
                 /* repcode detected we should take it */
                 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
-                mlen = (U32)ZSTD_count_2segments(ip+1+MINMATCH, repMatch+MINMATCH, iend, repEnd, prefixStart) + MINMATCH;
+                mlen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iend, repEnd, prefixStart) + minMatch;
 
-                ZSTD_LOG_PARSER("%d: start try REP rep=%d mlen=%d\n", (int)(ip-base), (int)rep_1, (int)mlen);
-                if (depth==0 || mlen > sufficient_len || mlen >= ZSTD_OPT_NUM) {
-                    ip+=1; best_mlen = mlen; best_off = 0; cur = 0; last_pos = 1;
+                ZSTD_LOG_PARSER("%d: start try REP rep[%d]=%d mlen=%d\n", (int)(ip-base), i, (int)rep[i], (int)mlen);
+                if (mlen > sufficient_len || mlen >= ZSTD_OPT_NUM) {
+                    best_mlen = mlen; best_off = i; cur = 0; last_pos = 1;
                     goto _storeSequence;
                 }
 
-                litlen = opt[0].litlen + 1;
+                best_off = (i<=1 && ip == anchor) ? 1-i : i;
+                litlen = opt[0].litlen;
                 do {
-                    price = ZSTD_getPrice(seqStorePtr, litlen, anchor, 0, mlen - MINMATCH);
-                    if (mlen + 1 > last_pos || price < opt[mlen + 1].price)
-                        SET_PRICE(mlen + 1, mlen, 0, litlen, price);
+                    price = ZSTD_getPrice(seqStorePtr, litlen, litstart, best_off, mlen - MINMATCH);
+                    if (mlen > last_pos || price < opt[mlen].price)
+                        SET_PRICE(mlen, mlen, i, litlen, price);   /* note : macro modifies last_pos */
                     mlen--;
-                } while (mlen >= MINMATCH);
-        }   }
+                } while (mlen >= minMatch);
+        }   } }
 
-       best_mlen = (last_pos) ? last_pos : MINMATCH;
+        match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, ip, iend, maxSearches, mls, matches);  /* first search (depth 0) */
 
-       if (faster_get_matches && last_pos)
-           match_num = 0;
-       else
-           match_num = getAllMatches(ctx, ip, ip, iend, maxSearches, mls, matches, best_mlen);  /* first search (depth 0) */
+        ZSTD_LOG_PARSER("%d: match_num=%d last_pos=%d\n", (int)(ip-base), match_num, last_pos);
+        if (!last_pos && !match_num) { ip++; continue; }
 
-       ZSTD_LOG_PARSER("%d: match_num=%d last_pos=%d\n", (int)(ip-base), match_num, last_pos);
-       if (!last_pos && !match_num) { ip++; continue; }
+        { U32 i; for (i=0; i<ZSTD_REP_INIT; i++) opt[0].rep[i] = rep[i]; }
+        opt[0].mlen = 1;
 
-       opt[0].rep = rep_1;
-       opt[0].rep2 = rep_2;
-       opt[0].mlen = 1;
-
-       if (match_num && matches[match_num-1].len > sufficient_len) {
+        if (match_num && matches[match_num-1].len > sufficient_len) {
             best_mlen = matches[match_num-1].len;
             best_off = matches[match_num-1].off;
             cur = 0;
             last_pos = 1;
             goto _storeSequence;
-       }
+        }
+
+        best_mlen = (last_pos) ? last_pos : minMatch;
 
         // set prices using matches at position = 0
         for (u = 0; u < match_num; u++) {
@@ -859,179 +818,147 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
             ZSTD_LOG_PARSER("%d: start Found mlen=%d off=%d best_mlen=%d last_pos=%d\n", (int)(ip-base), matches[u].len, matches[u].off, (int)best_mlen, (int)last_pos);
             litlen = opt[0].litlen;
             while (mlen <= best_mlen) {
-                price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off, mlen - MINMATCH);
+                price = ZSTD_getPrice(seqStorePtr, litlen, litstart, matches[u].off, mlen - MINMATCH);
                 if (mlen > last_pos || price < opt[mlen].price)
                     SET_PRICE(mlen, mlen, matches[u].off, litlen, price);
                 mlen++;
         }   }
 
-        if (last_pos < MINMATCH) {
+        if (last_pos < minMatch) {
             // ip += ((ip-anchor) >> g_searchStrength) + 1;   /* jump faster over incompressible sections */
             ip++; continue;
         }
 
         /* check further positions */
-        for (skip_num = 0, cur = 1; cur <= last_pos; cur++) {
-           size_t cur_rep;
-           inr = ip + cur;
+        for (cur = 1; cur <= last_pos; cur++) {
+            inr = ip + cur;
 
-           if (opt[cur-1].mlen == 1) {
+            if (opt[cur-1].mlen == 1) {
                 litlen = opt[cur-1].litlen + 1;
                 if (cur > litlen) {
                     price = opt[cur - litlen].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr-litlen);
-                    ZSTD_LOG_TRY_PRICE("%d: TRY1 opt[%d].price=%d price=%d cur=%d litlen=%d\n", (int)(inr-base), cur - litlen, opt[cur - litlen].price, price, cur, litlen);
                 } else
-                    price = ZSTD_getLiteralPrice(seqStorePtr, litlen, anchor);
-           } else {
+                    price = ZSTD_getLiteralPrice(seqStorePtr, litlen, litstart);
+            } else {
                 litlen = 1;
                 price = opt[cur - 1].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr-1);
-                ZSTD_LOG_TRY_PRICE("%d: TRY3 price=%d cur=%d litlen=%d litonly=%d\n", (int)(inr-base), price, cur, litlen, (int)ZSTD_getLiteralPrice(seqStorePtr, litlen, inr-1));
-           }
-
-           ZSTD_LOG_TRY_PRICE("%d: TRY4 price=%d opt[%d].price=%d\n", (int)(inr-base), price, cur, opt[cur].price);
-
-           if (cur > last_pos || price <= opt[cur].price) // || ((price == opt[cur].price) && (opt[cur-1].mlen == 1) && (cur != litlen)))
-                SET_PRICE(cur, 1, 0, litlen, price);
-
-           if (cur == last_pos) break;
-
-           if (inr > ilimit) // last match must start at a minimum distance of 8 from oend
-               continue;
-
-            mlen = opt[cur].mlen;
-
-            if (opt[cur-mlen].off) {
-                opt[cur].rep2 = opt[cur-mlen].rep;
-                opt[cur].rep = opt[cur-mlen].off;
-                ZSTD_LOG_PARSER("%d: COPYREP1 cur=%d mlen=%d rep=%d rep2=%d\n", (int)(inr-base), cur, mlen, opt[cur].rep, opt[cur].rep2);
-            } else {
-                if (cur!=mlen && opt[cur-mlen].litlen == 0) {
-                    opt[cur].rep2 = opt[cur-mlen].rep;
-                    opt[cur].rep = opt[cur-mlen].rep2;
-                    ZSTD_LOG_PARSER("%d: COPYREP2 cur=%d mlen=%d rep=%d rep2=%d\n", (int)(inr-base), cur, mlen, opt[cur].rep, opt[cur].rep2);
-                } else {
-                    opt[cur].rep2 = opt[cur-mlen].rep2;
-                    opt[cur].rep = opt[cur-mlen].rep;
-                    ZSTD_LOG_PARSER("%d: COPYREP3 cur=%d mlen=%d rep=%d rep2=%d\n", (int)(inr-base), cur, mlen, opt[cur].rep, opt[cur].rep2);
-            }   }
-
-           ZSTD_LOG_PARSER("%d: CURRENT price[%d/%d]=%d off=%d mlen=%d litlen=%d rep=%d rep2=%d\n", (int)(inr-base), cur, last_pos, opt[cur].price, opt[cur].off, opt[cur].mlen, opt[cur].litlen, opt[cur].rep, opt[cur].rep2);
-
-           best_mlen = 0;
-
-           if (!opt[cur].off && opt[cur].mlen != 1) {
-               cur_rep = opt[cur].rep2;
-               ZSTD_LOG_PARSER("%d: try REP2 rep2=%u mlen=%u\n", (int)(inr-base), (U32)cur_rep, mlen);
-           } else {
-               cur_rep = opt[cur].rep;
-               ZSTD_LOG_PARSER("%d: try REP1 rep=%u mlen=%u\n", (int)(inr-base), (U32)cur_rep, mlen);
-           }
-
-           const U32 repIndex = (U32)(current+cur - cur_rep);
-           const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
-           const BYTE* const repMatch = repBase + repIndex;
-           if ( ((U32)((dictLimit-1) - repIndex) >= 3)   /* intentional overflow */
-              &&(MEM_read32(inr) == MEM_read32(repMatch)) ) {
-                /* repcode detected */
-                const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
-                mlen = (U32)ZSTD_count_2segments(inr+MINMATCH, repMatch+MINMATCH, iend, repEnd, prefixStart) + MINMATCH;
-                ZSTD_LOG_PARSER("%d: Found REP mlen=%d off=%d rep=%d opt[%d].off=%d\n", (int)(inr-base), mlen, 0, opt[cur].rep, cur, opt[cur].off);
-
-                if (mlen > sufficient_len || cur + mlen >= ZSTD_OPT_NUM) {
-                    best_mlen = mlen;
-                    best_off = 0;
-                    ZSTD_LOG_PARSER("%d: REP sufficient_len=%d best_mlen=%d best_off=%d last_pos=%d\n", (int)(inr-base), sufficient_len, best_mlen, best_off, last_pos);
-                    last_pos = cur + 1;
-                    goto _storeSequence;
-                }
-
-                if (opt[cur].mlen == 1) {
-                    litlen = opt[cur].litlen;
-                    if (cur > litlen) {
-                        price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr-litlen, 0, mlen - MINMATCH);
-                        ZSTD_LOG_TRY_PRICE("%d: TRY5 opt[%d].price=%d price=%d cur=%d litlen=%d\n", (int)(inr-base), cur - litlen, opt[cur - litlen].price, price, cur, litlen);
-                    } else
-                        price = ZSTD_getPrice(seqStorePtr, litlen, anchor, 0, mlen - MINMATCH);
-                } else {
-                    litlen = 0;
-                    price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, 0, mlen - MINMATCH);
-                    ZSTD_LOG_TRY_PRICE("%d: TRY7 price=%d cur=%d litlen=0 getprice=%d\n", (int)(inr-base), price, cur, (int)ZSTD_getPrice(seqStorePtr, 0, NULL, 0, mlen - MINMATCH));
-                }
-
-                best_mlen = mlen;
-                if (faster_get_matches) skip_num = best_mlen;
-
-                ZSTD_LOG_PARSER("%d: Found REP mlen=%d off=%d price=%d litlen=%d price[%d]=%d\n", (int)(inr-base), mlen, 0, price, litlen, cur - litlen, opt[cur - litlen].price);
-
-                do {
-                    if (cur + mlen > last_pos || price <= opt[cur + mlen].price) // || ((price == opt[cur + mlen].price) && (opt[cur].mlen == 1) && (cur != litlen))) // at equal price prefer REP instead of MATCH
-                        SET_PRICE(cur + mlen, mlen, 0, litlen, price);
-                    mlen--;
-                } while (mlen >= MINMATCH);
             }
 
-            if (faster_get_matches && skip_num > 0) { skip_num--; continue; }
+            if (cur > last_pos || price <= opt[cur].price) // || ((price == opt[cur].price) && (opt[cur-1].mlen == 1) && (cur != litlen)))
+                SET_PRICE(cur, 1, 0, litlen, price);
 
-            best_mlen = (best_mlen > MINMATCH) ? best_mlen : MINMATCH;
+            if (cur == last_pos) break;
 
-            match_num = getAllMatches(ctx, inr, ip, iend, maxSearches, mls, matches, best_mlen);
+            if (inr > ilimit)  /* last match must start at a minimum distance of 8 from oend */
+                continue;
+
+            mlen = opt[cur].mlen;
+            if (opt[cur].off >= ZSTD_REP_NUM) {
+                opt[cur].rep[2] = opt[cur-mlen].rep[1];
+                opt[cur].rep[1] = opt[cur-mlen].rep[0];
+                opt[cur].rep[0] = opt[cur].off - ZSTD_REP_MOVE;
+                ZSTD_LOG_ENCODE("%d: COPYREP_OFF cur=%d mlen=%d rep[0]=%d rep[1]=%d\n", (int)(inr-base), cur, mlen, opt[cur].rep[0], opt[cur].rep[1]);
+            } else {
+                opt[cur].rep[2] = (opt[cur].off > 1) ? opt[cur-mlen].rep[1] : opt[cur-mlen].rep[2];
+                opt[cur].rep[1] = (opt[cur].off > 0) ? opt[cur-mlen].rep[0] : opt[cur-mlen].rep[1];
+                opt[cur].rep[0] = opt[cur-mlen].rep[opt[cur].off];
+                ZSTD_LOG_ENCODE("%d: COPYREP_NOR cur=%d mlen=%d rep[0]=%d rep[1]=%d\n", (int)(inr-base), cur, mlen, opt[cur].rep[0], opt[cur].rep[1]);
+            }
+
+            ZSTD_LOG_PARSER("%d: CURRENT_Ext price[%d/%d]=%d off=%d mlen=%d litlen=%d rep[0]=%d rep[1]=%d\n", (int)(inr-base), cur, last_pos, opt[cur].price, opt[cur].off, opt[cur].mlen, opt[cur].litlen, opt[cur].rep[0], opt[cur].rep[1]);
+            best_mlen = 0;
+
+            { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) {
+                const U32 repIndex = (U32)(current+cur - opt[cur].rep[i]);
+                const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
+                const BYTE* const repMatch = repBase + repIndex;
+                if ( ((U32)((dictLimit-1) - repIndex) >= 3)   /* intentional overflow */
+                  && (MEM_readMINMATCH(inr, minMatch) == MEM_readMINMATCH(repMatch, minMatch)) ) {
+                    /* repcode detected */
+                    const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
+                    mlen = (U32)ZSTD_count_2segments(inr+minMatch, repMatch+minMatch, iend, repEnd, prefixStart) + minMatch;
+                    ZSTD_LOG_PARSER("%d: Found REP %d/%d mlen=%d off=%d rep=%d opt[%d].off=%d\n", (int)(inr-base), i, ZSTD_REP_NUM, mlen, i, opt[cur].rep[i], cur, opt[cur].off);
+
+                    if (mlen > sufficient_len || cur + mlen >= ZSTD_OPT_NUM) {
+                        ZSTD_LOG_PARSER("%d: REP sufficient_len=%d best_mlen=%d best_off=%d last_pos=%d\n", (int)(inr-base), sufficient_len, best_mlen, best_off, last_pos);
+                        best_mlen = mlen; best_off = i; last_pos = cur + 1;
+                        goto _storeSequence;
+                    }
+
+                    best_off = (i<=1 && opt[cur].mlen != 1) ? 1-i : i;
+                    if (opt[cur].mlen == 1) {
+                        litlen = opt[cur].litlen;
+                        if (cur > litlen) {
+                            price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr-litlen, best_off, mlen - MINMATCH);
+                        } else
+                            price = ZSTD_getPrice(seqStorePtr, litlen, litstart, best_off, mlen - MINMATCH);
+                    } else {
+                        litlen = 0;
+                        price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, best_off, mlen - MINMATCH);
+                    }
+
+                    best_mlen = mlen;
+                    ZSTD_LOG_PARSER("%d: Found REP mlen=%d off=%d price=%d litlen=%d\n", (int)(inr-base), mlen, best_off, price, litlen);
+
+                    do {
+                        if (cur + mlen > last_pos || price <= opt[cur + mlen].price)
+                            SET_PRICE(cur + mlen, mlen, i, litlen, price);
+                        mlen--;
+                    } while (mlen >= minMatch);
+            }   } }
+
+            match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, inr, iend, maxSearches, mls, matches);
             ZSTD_LOG_PARSER("%d: ZSTD_GetAllMatches match_num=%d\n", (int)(inr-base), match_num);
 
             if (match_num > 0 && matches[match_num-1].len > sufficient_len) {
-                cur -= matches[match_num-1].back;
                 best_mlen = matches[match_num-1].len;
                 best_off = matches[match_num-1].off;
                 last_pos = cur + 1;
                 goto _storeSequence;
             }
 
-            // set prices using matches at position = cur
+            best_mlen = (best_mlen > minMatch) ? best_mlen : minMatch;
+
+            /* set prices using matches at position = cur */
             for (u = 0; u < match_num; u++) {
                 mlen = (u>0) ? matches[u-1].len+1 : best_mlen;
-                cur2 = cur - matches[u].back;
-                best_mlen = (cur2 + matches[u].len < ZSTD_OPT_NUM) ? matches[u].len : ZSTD_OPT_NUM - cur2;
-
-                ZSTD_LOG_PARSER("%d: Found1 cur=%d cur2=%d mlen=%d off=%d best_mlen=%d last_pos=%d\n", (int)(inr-base), cur, cur2, matches[u].len, matches[u].off, best_mlen, last_pos);
-                if (mlen < matches[u].back + 1)
-                    mlen = matches[u].back + 1;
+                best_mlen = (cur + matches[u].len < ZSTD_OPT_NUM) ? matches[u].len : ZSTD_OPT_NUM - cur;
 
+            //    ZSTD_LOG_PARSER("%d: Found1 cur=%d mlen=%d off=%d best_mlen=%d last_pos=%d\n", (int)(inr-base), cur, matches[u].len, matches[u].off, best_mlen, last_pos);
                 while (mlen <= best_mlen) {
-                    if (opt[cur2].mlen == 1) {
-                        litlen = opt[cur2].litlen;
-                        if (cur2 > litlen)
-                            price = opt[cur2 - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, ip+cur2-litlen, matches[u].off, mlen - MINMATCH);
+                    if (opt[cur].mlen == 1) {
+                        litlen = opt[cur].litlen;
+                        if (cur > litlen)
+                            price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, ip+cur-litlen, matches[u].off, mlen - MINMATCH);
                         else
-                            price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off, mlen - MINMATCH);
+                            price = ZSTD_getPrice(seqStorePtr, litlen, litstart, matches[u].off, mlen - MINMATCH);
                     } else {
                         litlen = 0;
-                        price = opt[cur2].price + ZSTD_getPrice(seqStorePtr, 0, NULL, matches[u].off, mlen - MINMATCH);
+                        price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, matches[u].off, mlen - MINMATCH);
                     }
 
-                    ZSTD_LOG_PARSER("%d: Found2 pred=%d mlen=%d best_mlen=%d off=%d price=%d litlen=%d price[%d]=%d\n", (int)(inr-base), matches[u].back, mlen, best_mlen, matches[u].off, price, litlen, cur - litlen, opt[cur - litlen].price);
-                    ZSTD_LOG_TRY_PRICE("%d: TRY8 price=%d opt[%d].price=%d\n", (int)(inr-base), price, cur2 + mlen, opt[cur2 + mlen].price);
-
-                    if (cur2 + mlen > last_pos || (price < opt[cur2 + mlen].price))
-                        SET_PRICE(cur2 + mlen, mlen, matches[u].off, litlen, price);
+                //    ZSTD_LOG_PARSER("%d: Found2 mlen=%d best_mlen=%d off=%d price=%d litlen=%d\n", (int)(inr-base), mlen, best_mlen, matches[u].off, price, litlen);
+                    if (cur + mlen > last_pos || (price < opt[cur + mlen].price))
+                        SET_PRICE(cur + mlen, mlen, matches[u].off, litlen, price);
 
                     mlen++;
-        }   }   }   //  for (skip_num = 0, cur = 1; cur <= last_pos; cur++)
+        }   }   }   /* for (cur = 1; cur <= last_pos; cur++) */
 
         best_mlen = opt[last_pos].mlen;
         best_off = opt[last_pos].off;
         cur = last_pos - best_mlen;
-        // printf("%d: start=%d best_mlen=%d best_off=%d cur=%d\n", (int)(ip - base), (int)(start - ip), (int)best_mlen, (int)best_off, cur);
 
         /* store sequence */
-_storeSequence: // cur, last_pos, best_mlen, best_off have to be set
+_storeSequence:   /* cur, last_pos, best_mlen, best_off have to be set */
         for (u = 1; u <= last_pos; u++)
-            ZSTD_LOG_PARSER("%d: price[%u/%d]=%d off=%d mlen=%d litlen=%d rep=%d rep2=%d\n", (int)(ip-base+u), u, last_pos, opt[u].price, opt[u].off, opt[u].mlen, opt[u].litlen, opt[u].rep, opt[u].rep2);
-        ZSTD_LOG_PARSER("%d: cur=%d/%d best_mlen=%d best_off=%d rep=%d\n", (int)(ip-base+cur), (int)cur, (int)last_pos, (int)best_mlen, (int)best_off, opt[cur].rep);
+            ZSTD_LOG_PARSER("%d: price[%u/%d]=%d off=%d mlen=%d litlen=%d rep[0]=%d rep[1]=%d\n", (int)(ip-base+u), u, last_pos, opt[u].price, opt[u].off, opt[u].mlen, opt[u].litlen, opt[u].rep[0], opt[u].rep[1]);
+        ZSTD_LOG_PARSER("%d: cur=%d/%d best_mlen=%d best_off=%d rep[0]=%d\n", (int)(ip-base+cur), (int)cur, (int)last_pos, (int)best_mlen, (int)best_off, opt[cur].rep[0]);
 
         opt[0].mlen = 1;
+        U32 offset;
 
         while (1) {
             mlen = opt[cur].mlen;
-            ZSTD_LOG_PARSER("%d: cur=%d mlen=%d\n", (int)(ip-base), cur, mlen);
             offset = opt[cur].off;
             opt[cur].mlen = best_mlen;
             opt[cur].off = best_off;
@@ -1042,79 +969,60 @@ _storeSequence: // cur, last_pos, best_mlen, best_off have to be set
         }
 
         for (u = 0; u <= last_pos; ) {
-            ZSTD_LOG_PARSER("%d: price2[%d/%d]=%d off=%d mlen=%d litlen=%d rep=%d rep2=%d\n", (int)(ip-base+u), u, last_pos, opt[u].price, opt[u].off, opt[u].mlen, opt[u].litlen, opt[u].rep, opt[u].rep2);
+            ZSTD_LOG_PARSER("%d: price2[%d/%d]=%d off=%d mlen=%d litlen=%d rep[0]=%d rep[1]=%d\n", (int)(ip-base+u), u, last_pos, opt[u].price, opt[u].off, opt[u].mlen, opt[u].litlen, opt[u].rep[0], opt[u].rep[1]);
             u += opt[u].mlen;
         }
 
         for (cur=0; cur < last_pos; ) {
-            U32 litLength;
-            ZSTD_LOG_PARSER("%d: price3[%d/%d]=%d off=%d mlen=%d litlen=%d rep=%d rep2=%d\n", (int)(ip-base+cur), cur, last_pos, opt[cur].price, opt[cur].off, opt[cur].mlen, opt[cur].litlen, opt[cur].rep, opt[cur].rep2);
+            ZSTD_LOG_PARSER("%d: price3[%d/%d]=%d off=%d mlen=%d litlen=%d rep[0]=%d rep[1]=%d\n", (int)(ip-base+cur), cur, last_pos, opt[cur].price, opt[cur].off, opt[cur].mlen, opt[cur].litlen, opt[cur].rep[0], opt[cur].rep[1]);
             mlen = opt[cur].mlen;
             if (mlen == 1) { ip++; cur++; continue; }
             offset = opt[cur].off;
             cur += mlen;
 
-            litLength = (U32)(ip - anchor);
-            ZSTD_LOG_ENCODE("%d/%d: ENCODE1 literals=%d mlen=%d off=%d rep1=%d rep2=%d\n", (int)(ip-base), (int)(iend-base), (int)(litLength), (int)mlen, (int)(offset), (int)rep_1, (int)rep_2);
+            U32 const litLength = (U32)(ip - anchor);
+         //   ZSTD_LOG_ENCODE("%d/%d: ENCODE1 literals=%d mlen=%d off=%d rep[0]=%d rep[1]=%d\n", (int)(ip-base), (int)(iend-base), (int)(litLength), (int)mlen, (int)(offset), (int)rep[0], (int)rep[1]);
 
-            if (offset) {
-                rep_2 = rep_1;
-                rep_1 = offset;
+            if (offset >= ZSTD_REP_NUM) {
+                rep[2] = rep[1];
+                rep[1] = rep[0];
+                rep[0] = offset - ZSTD_REP_MOVE;
             } else {
-                if (litLength == 0) {
-                    best_off = rep_2;
-                    rep_2 = rep_1;
-                    rep_1 = best_off;
-            }   }
+                if (offset != 0) {
+                    best_off = rep[offset];
+                    if (offset != 1) rep[2] = rep[1];
+                    rep[1] = rep[0];
+                    rep[0] = best_off;
+                 }
+                 if (litLength == 0 && offset<=1) offset = 1-offset;
+            }
 
-            ZSTD_LOG_ENCODE("%d/%d: ENCODE2 literals=%d mlen=%d off=%d rep1=%d rep2=%d\n", (int)(ip-base), (int)(iend-base), (int)(litLength), (int)mlen, (int)(offset), (int)rep_1, (int)rep_2);
+            ZSTD_LOG_ENCODE("%d/%d: ENCODE literals=%d mlen=%d off=%d rep[0]=%d rep[1]=%d\n", (int)(ip-base), (int)(iend-base), (int)(litLength), (int)mlen, (int)(offset), (int)rep[0], (int)rep[1]);
 
 #if ZSTD_OPT_DEBUG >= 5
-            int ml2;
-            if (offset)
-                ml2 = ZSTD_count(ip, ip-offset, iend);
-            else
-                ml2 = ZSTD_count(ip, ip-rep_1, iend);
-            if (ml2 < mlen && ml2 < MINMATCH) {
-                printf("%d: ERROR iend=%d mlen=%d offset=%d ml2=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset, (int)ml2); exit(0); }
+            U32 ml2;
+            if (offset >= ZSTD_REP_NUM) {
+                best_off = offset - ZSTD_REP_MOVE;
+                if (best_off > (size_t)(ip - prefixStart))  {
+                    const BYTE* match = dictEnd - (best_off - (ip - prefixStart));
+                    ml2 = ZSTD_count_2segments(ip, match, iend, dictEnd, prefixStart);
+                    ZSTD_LOG_PARSER("%d: ZSTD_count_2segments=%d offset=%d dictBase=%p dictEnd=%p prefixStart=%p ip=%p match=%p\n", (int)current, (int)ml2, (int)best_off, dictBase, dictEnd, prefixStart, ip, match);
+                }
+                else ml2 = (U32)ZSTD_count(ip, ip-offset, iend);
+            }
+            else ml2 = (U32)ZSTD_count(ip, ip-rep[0], iend);
+            if ((offset >= 8) && (ml2 < mlen || ml2 < minMatch)) {
+                printf("%d: ERROR_Ext iend=%d mlen=%d offset=%d ml2=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset, (int)ml2); exit(0); }
             if (ip < anchor) {
-                printf("%d: ERROR ip < anchor iend=%d mlen=%d offset=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset); exit(0); }
-            if (ip - offset < ctx->base) {
-                printf("%d: ERROR ip - offset < base iend=%d mlen=%d offset=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset); exit(0); }
-            if ((int)offset >= (1 << ctx->params.windowLog)) {
-                printf("%d: offset >= (1 << params.windowLog) iend=%d mlen=%d offset=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset); exit(0); }
-            if (mlen < MINMATCH) {
-                printf("%d: ERROR mlen < MINMATCH iend=%d mlen=%d offset=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset); exit(0); }
+                printf("%d: ERROR_Ext ip < anchor iend=%d mlen=%d offset=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset); exit(0); }
             if (ip + mlen > iend) {
-                printf("%d: ERROR ip + mlen >= iend iend=%d mlen=%d offset=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset); exit(0); }
+                printf("%d: ERROR_Ext ip + mlen >= iend iend=%d mlen=%d offset=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset); exit(0); }
 #endif
 
             ZSTD_updatePrice(seqStorePtr, litLength, anchor, offset, mlen-MINMATCH);
             ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, mlen-MINMATCH);
             anchor = ip = ip + mlen;
-        }
-
-        /* check immediate repcode */
-        while (anchor <= ilimit) {
-            const U32 repIndex = (U32)((anchor-base) - rep_2);
-            const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
-            const BYTE* const repMatch = repBase + repIndex;
-            if ( ((U32)((dictLimit-1) - repIndex) >= 3)   /* intentional overflow */
-               && (MEM_read32(anchor) == MEM_read32(repMatch)) ) {
-                /* repcode detected, let's take it */
-                const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
-                mlen = (U32)ZSTD_count_2segments(anchor+MINMATCH, repMatch+MINMATCH, iend, repEnd, prefixStart) + MINMATCH;
-                offset = rep_2; rep_2 = rep_1; rep_1 = offset;   /* swap offset history */
-                ZSTD_LOG_ENCODE("%d/%d: ENCODE REP literals=%d mlen=%d off=%d rep1=%d rep2=%d\n", (int)(anchor-base), (int)(iend-base), (int)(0), (int)best_mlen, (int)(0), (int)rep_1, (int)rep_2);
-                ZSTD_updatePrice(seqStorePtr, 0, anchor, 0, mlen-MINMATCH);
-                ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, mlen-MINMATCH);
-                anchor += mlen;
-                continue;   /* faster when present ... (?) */
-            }
-            break;
-        }
-        if (anchor > ip) ip = anchor;
-    }
+    }    }   /* for (cur=0; cur < last_pos; ) */
 
     {   /* Last Literals */
         size_t lastLLSize = iend - anchor;
diff --git a/lib/zstd_static.h b/lib/zstd_static.h
index 61216535..960808bc 100644
--- a/lib/zstd_static.h
+++ b/lib/zstd_static.h
@@ -51,39 +51,47 @@ extern "C" {
 /*-*************************************
 *  Constants
 ***************************************/
-#define ZSTD_MAGICNUMBER 0xFD2FB525   /* v0.5 */
+#define ZSTD_MAGICNUMBER 0xFD2FB526   /* v0.6 */
 
 
 /*-*************************************
 *  Types
 ***************************************/
-#define ZSTD_WINDOWLOG_MAX 26
-#define ZSTD_WINDOWLOG_MIN 18
-#define ZSTD_WINDOWLOG_ABSOLUTEMIN 11
-#define ZSTD_CONTENTLOG_MAX (ZSTD_WINDOWLOG_MAX+1)
-#define ZSTD_CONTENTLOG_MIN 4
-#define ZSTD_HASHLOG_MAX 28
-#define ZSTD_HASHLOG_MIN 12
-#define ZSTD_SEARCHLOG_MAX (ZSTD_CONTENTLOG_MAX-1)
-#define ZSTD_SEARCHLOG_MIN 1
-#define ZSTD_SEARCHLENGTH_MAX 7
-#define ZSTD_SEARCHLENGTH_MIN 4
-#define ZSTD_TARGETLENGTH_MIN 4
+#define ZSTD_WINDOWLOG_MAX    ((U32)(MEM_32bits() ? 25 : 27))
+#define ZSTD_WINDOWLOG_MIN     18
+#define ZSTD_CHAINLOG_MAX     (ZSTD_WINDOWLOG_MAX+1)
+#define ZSTD_CHAINLOG_MIN       4
+#define ZSTD_HASHLOG_MAX       ZSTD_WINDOWLOG_MAX
+#define ZSTD_HASHLOG_MIN       12
+#define ZSTD_HASHLOG3_MAX      17
+#define ZSTD_HASHLOG3_MIN      15
+#define ZSTD_SEARCHLOG_MAX    (ZSTD_WINDOWLOG_MAX-1)
+#define ZSTD_SEARCHLOG_MIN      1
+#define ZSTD_SEARCHLENGTH_MAX   7
+#define ZSTD_SEARCHLENGTH_MIN   3
+#define ZSTD_TARGETLENGTH_MIN   4
 #define ZSTD_TARGETLENGTH_MAX 999
 
 /* from faster to stronger */
-typedef enum { ZSTD_fast, ZSTD_greedy, ZSTD_lazy, ZSTD_lazy2, ZSTD_btlazy2, ZSTD_opt, ZSTD_btopt } ZSTD_strategy;
+typedef enum { ZSTD_fast, ZSTD_greedy, ZSTD_lazy, ZSTD_lazy2, ZSTD_btlazy2, ZSTD_btopt } ZSTD_strategy;
 
-typedef struct
-{
-    U64 srcSize;       /* optional : tells how much bytes are present in the frame. Use 0 if not known. */
+typedef struct {
     U32 windowLog;     /* largest match distance : larger == more compression, more memory needed during decompression */
-    U32 contentLog;    /* full search segment : larger == more compression, slower, more memory (useless for fast) */
+    U32 chainLog;      /* fully searched segment : larger == more compression, slower, more memory (useless for fast) */
     U32 hashLog;       /* dispatch table : larger == faster, more memory */
     U32 searchLog;     /* nb of searches : larger == more compression, slower */
     U32 searchLength;  /* match length searched : larger == faster decompression, sometimes less compression */
     U32 targetLength;  /* acceptable match size for optimal parser (only) : larger == more compression, slower */
     ZSTD_strategy strategy;
+} ZSTD_compressionParameters;
+
+typedef struct {
+    U32 contentSizeFlag;   /* 1: content size will be in frame header (if known). */
+} ZSTD_frameParameters;
+
+typedef struct {
+    ZSTD_compressionParameters cParams;
+    ZSTD_frameParameters fParams;
 } ZSTD_parameters;
 
 
@@ -92,14 +100,19 @@ typedef struct
 ***************************************/
 ZSTDLIB_API unsigned ZSTD_maxCLevel (void);
 
-/*! ZSTD_getParams() :
-*   @return ZSTD_parameters structure for a selected compression level and srcSize.
-*   `srcSizeHint` value is optional, select 0 if not known */
-ZSTDLIB_API ZSTD_parameters ZSTD_getParams(int compressionLevel, U64 srcSizeHint);
+/*! ZSTD_getCParams() :
+*   @return ZSTD_compressionParameters structure for a selected compression level and srcSize.
+*   `srcSize` value is optional, select 0 if not known */
+ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, U64 srcSize, size_t dictSize);
 
-/*! ZSTD_validateParams() :
-*   correct params value to remain within authorized range */
-ZSTDLIB_API void ZSTD_validateParams(ZSTD_parameters* params);
+/*! ZSTD_checkParams() :
+*   Ensure param values remain within authorized range */
+ZSTDLIB_API size_t ZSTD_checkCParams(ZSTD_compressionParameters params);
+
+/*! ZSTD_adjustParams() :
+*   optimize params for a given `srcSize` and `dictSize`.
+*   both values are optional, select `0` if unknown. */
+ZSTDLIB_API void ZSTD_adjustCParams(ZSTD_compressionParameters* params, U64 srcSize, size_t dictSize);
 
 /*! ZSTD_compress_advanced() :
 *   Same as ZSTD_compress_usingDict(), with fine-tune control of each compression parameter */
@@ -113,7 +126,7 @@ ZSTDLIB_API size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx,
 *   Same as ZSTD_compress_usingDict, but using a reference context `preparedCCtx`, where dictionary has been loaded.
 *   It avoids reloading the dictionary each time.
 *   `preparedCCtx` must have been properly initialized using ZSTD_compressBegin_usingDict() or ZSTD_compressBegin_advanced().
-*   Requires 2 contexts : 1 for reference, which will not be modified, and 1 to run the compression operation */
+*   Requires 2 contexts : 1 for reference (preparedCCtx) which will not be modified, and 1 to run the compression operation (cctx) */
 ZSTDLIB_API size_t ZSTD_compress_usingPreparedCCtx(
                                            ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx,
                                            void* dst, size_t dstCapacity,
@@ -125,19 +138,19 @@ ZSTDLIB_API size_t ZSTD_compress_usingPreparedCCtx(
 *   Same as ZSTD_decompress_usingDict, but using a reference context `preparedDCtx`, where dictionary has been loaded.
 *   It avoids reloading the dictionary each time.
 *   `preparedDCtx` must have been properly initialized using ZSTD_decompressBegin_usingDict().
-*   Requires 2 contexts : 1 for reference, which will not be modified, and 1 to run the decompression operation */
+*   Requires 2 contexts : 1 for reference (preparedDCtx), which will not be modified, and 1 to run the decompression operation (dctx) */
 ZSTDLIB_API size_t ZSTD_decompress_usingPreparedDCtx(
-                                             ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx,
-                                             void* dst, size_t dstCapacity,
-                                       const void* src, size_t srcSize);
+                                           ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx,
+                                           void* dst, size_t dstCapacity,
+                                     const void* src, size_t srcSize);
 
 
 /* **************************************
 *  Streaming functions (direct mode)
 ****************************************/
 ZSTDLIB_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel);
-ZSTDLIB_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict,size_t dictSize, int compressionLevel);
-ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict,size_t dictSize, ZSTD_parameters params);
+ZSTDLIB_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel);
+ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, U64 pledgedSrcSize);
 ZSTDLIB_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx);
 
 ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
@@ -153,7 +166,7 @@ ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapaci
   Start by initializing a context.
   Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary compression,
   or ZSTD_compressBegin_advanced(), for finer parameter control.
-  It's also possible to duplicate a reference context which has been initialized, using ZSTD_copyCCtx()
+  It's also possible to duplicate a reference context which has already been initialized, using ZSTD_copyCCtx()
 
   Then, consume your input using ZSTD_compressContinue().
   The interface is synchronous, so all input will be consumed and produce a compressed output.
@@ -166,13 +179,17 @@ ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapaci
   You can then reuse ZSTD_CCtx to compress some new frame.
 */
 
+typedef struct { U64 frameContentSize; U32 windowLog; } ZSTD_frameParams;
+
+#define ZSTD_FRAMEHEADERSIZE_MAX 13    /* for static allocation */
+static const size_t ZSTD_frameHeaderSize_min = 5;
+static const size_t ZSTD_frameHeaderSize_max = ZSTD_FRAMEHEADERSIZE_MAX;
+ZSTDLIB_API size_t ZSTD_getFrameParams(ZSTD_frameParams* fparamsPtr, const void* src, size_t srcSize);   /**< doesn't consume input */
 
 ZSTDLIB_API size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx);
 ZSTDLIB_API size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);
 ZSTDLIB_API void   ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx);
 
-ZSTDLIB_API size_t ZSTD_getFrameParams(ZSTD_parameters* params, const void* src, size_t srcSize);
-
 ZSTDLIB_API size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx);
 ZSTDLIB_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
 
@@ -183,15 +200,19 @@ ZSTDLIB_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t ds
   Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it.
   A ZSTD_DCtx object can be re-used multiple times.
 
-  First typical operation is to retrieve frame parameters, using ZSTD_getFrameParams().
-  This operation is independent, and just needs enough input data to properly decode the frame header.
-  Objective is to retrieve *params.windowlog, to know minimum amount of memory required during decoding.
-  Result : 0 when successful, it means the ZSTD_parameters structure has been filled.
-           >0 : means there is not enough data into src. Provides the expected size to successfully decode header.
+  First optional operation is to retrieve frame parameters, using ZSTD_getFrameParams(), which doesn't consume the input.
+  It can provide the minimum size of rolling buffer required to properly decompress data,
+  and optionally the final size of uncompressed content.
+  (Note : content size is an optional info that may not be present. 0 means : content size unknown)
+  Frame parameters are extracted from the beginning of compressed frame.
+  The amount of data to read is variable, from ZSTD_frameHeaderSize_min to ZSTD_frameHeaderSize_max (so if `srcSize` >= ZSTD_frameHeaderSize_max, it will always work)
+  If `srcSize` is too small for operation to succeed, function will return the minimum size it requires to produce a result.
+  Result : 0 when successful, it means the ZSTD_frameParams structure has been filled.
+          >0 : means there is not enough data into `src`. Provides the expected size to successfully decode header.
            errorCode, which can be tested using ZSTD_isError()
 
-  Start decompression, with ZSTD_decompressBegin() or ZSTD_decompressBegin_usingDict()
-  Alternatively, you can copy a prepared context, using ZSTD_copyDCtx()
+  Start decompression, with ZSTD_decompressBegin() or ZSTD_decompressBegin_usingDict().
+  Alternatively, you can copy a prepared context, using ZSTD_copyDCtx().
 
   Then use ZSTD_nextSrcSizeToDecompress() and ZSTD_decompressContinue() alternatively.
   ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue().
@@ -199,7 +220,7 @@ ZSTDLIB_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t ds
   ZSTD_decompressContinue() needs previous data blocks during decompression, up to (1 << windowlog).
   They should preferably be located contiguously, prior to current block. Alternatively, a round buffer is also possible.
 
-  @result of ZSTD_decompressContinue() is the number of bytes regenerated within 'dst'.
+  @result of ZSTD_decompressContinue() is the number of bytes regenerated within 'dst' (necessarily <= dstCapacity)
   It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header.
 
   A frame is fully decoded when ZSTD_nextSrcSizeToDecompress() returns zero.
@@ -211,10 +232,10 @@ ZSTDLIB_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t ds
 *  Block functions
 ****************************************/
 /*! Block functions produce and decode raw zstd blocks, without frame metadata.
-    User will have to take in charge required information to regenerate data, such as block sizes.
+    User will have to take in charge required information to regenerate data, such as compressed and content sizes.
 
     A few rules to respect :
-    - Uncompressed block size must be <= 128 KB
+    - Uncompressed block size must be <= ZSTD_BLOCKSIZE_MAX (128 KB)
     - Compressing or decompressing requires a context structure
       + Use ZSTD_createCCtx() and ZSTD_createDCtx()
     - It is necessary to init context before starting
@@ -228,6 +249,7 @@ ZSTDLIB_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t ds
       + ZSTD_decompressBlock() doesn't accept uncompressed data as input !!
 */
 
+#define ZSTD_BLOCKSIZE_MAX (128 * 1024)   /* define, for static allocation */
 size_t ZSTD_compressBlock  (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
 size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
 
@@ -237,7 +259,7 @@ size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, cons
 ***************************************/
 #include "error_public.h"
 /*! ZSTD_getErrorCode() :
-    convert a `size_t` function result into a `ZSTD_error_code` enum type,
+    convert a `size_t` function result into a `ZSTD_ErrorCode` enum type,
     which can be used to compare directly with enum list published into "error_public.h" */
 ZSTD_ErrorCode ZSTD_getError(size_t code);
 
diff --git a/programs/.gitignore b/programs/.gitignore
index 021e8937..886af777 100644
--- a/programs/.gitignore
+++ b/programs/.gitignore
@@ -5,7 +5,10 @@ fullbench
 fullbench32
 fuzzer
 fuzzer32
+zbufftest
+zbufftest32
 datagen
+paramgrill
 
 # Object files
 *.o
@@ -29,3 +32,15 @@ datagen
 # Visual solution files
 *.suo
 *.user
+
+# Default result files
+dictionary
+grillResults.txt
+_*
+
+# fuzzer
+afl
+
+# Misc files
+*.bat
+fileTests.sh
diff --git a/programs/Makefile b/programs/Makefile
index 4a650c48..36ab3d86 100644
--- a/programs/Makefile
+++ b/programs/Makefile
@@ -53,7 +53,7 @@ BINDIR  = $(PREFIX)/bin
 MANDIR  = $(PREFIX)/share/man/man1
 ZSTDDIR = ../lib
 
-ZSTD_FILES := $(ZSTDDIR)/huff0.c $(ZSTDDIR)/fse.c $(ZSTDDIR)/zstd_compress.c $(ZSTDDIR)/zstd_decompress.c
+ZSTD_FILES := $(ZSTDDIR)/huff0.c $(ZSTDDIR)/fse.c $(ZSTDDIR)/zstd_decompress.c $(ZSTDDIR)/zstd_compress.c
 
 ifeq ($(ZSTD_LEGACY_SUPPORT), 0)
 CPPFLAGS  += -DZSTD_LEGACY_SUPPORT=0
@@ -61,7 +61,8 @@ ZSTD_FILES_LEGACY:=
 else
 ZSTD_LEGACY_SUPPORT:=1
 CPPFLAGS  += -I../lib/legacy -I./legacy
-ZSTD_FILES_LEGACY:= $(ZSTDDIR)/legacy/zstd_v01.c $(ZSTDDIR)/legacy/zstd_v02.c $(ZSTDDIR)/legacy/zstd_v03.c $(ZSTDDIR)/legacy/zstd_v04.c legacy/fileio_legacy.c
+ZSTD_FILES_LEGACY:= $(ZSTDDIR)/legacy/zstd_v01.c $(ZSTDDIR)/legacy/zstd_v02.c $(ZSTDDIR)/legacy/zstd_v03.c \
+                    $(ZSTDDIR)/legacy/zstd_v04.c $(ZSTDDIR)/legacy/zstd_v05.c legacy/fileio_legacy.c
 endif
 
 
@@ -112,11 +113,11 @@ zstd-frugal: $(ZSTD_FILES) $(ZSTDDIR)/zbuff.c zstdcli.c fileio.c
 zstd-small: clean 
 	CFLAGS="-Os -s" $(MAKE) zstd-frugal 
 
-fullbench  : $(ZSTD_FILES) \
+fullbench  : $(ZSTD_FILES) $(ZSTDDIR)/zbuff.c \
         datagen.c fullbench.c
 	$(CC)      $(FLAGS) $^ -o $@$(EXT)
 
-fullbench32: $(ZSTD_FILES) \
+fullbench32: $(ZSTD_FILES) $(ZSTDDIR)/zbuff.c \
       datagen.c fullbench.c
 	$(CC) -m32 $(FLAGS) $^ -o $@$(EXT)
 
@@ -215,19 +216,19 @@ test-zbuff: zbufftest
 test-zbuff32: zbufftest32
 	./zbufftest32 $(ZBUFFTEST)
 
+valgrindTest: VALGRIND = valgrind --leak-check=full --error-exitcode=1
 valgrindTest: zstd datagen fuzzer fullbench zbufftest
 	@echo "\n ---- valgrind tests : memory analyzer ----"
-	valgrind --leak-check=yes --error-exitcode=1 ./datagen -g50M > $(VOID)
-	./datagen -g16KB > tmp
-	valgrind --leak-check=yes --error-exitcode=1 ./zstd -vf tmp -o $(VOID)
-	./datagen -g2930KB > tmp
-	valgrind --leak-check=yes --error-exitcode=1 ./zstd -5 -vf tmp -o tmp2
-	valgrind --leak-check=yes --error-exitcode=1 ./zstd -vdf tmp2 -o $(VOID)
-	./datagen -g64MB > tmp
-	valgrind --leak-check=yes --error-exitcode=1 ./zstd -vf tmp -o $(VOID)
+	$(VALGRIND) ./datagen -g50M > $(VOID)
+	$(VALGRIND) ./zstd ; if [ $$? -eq 0 ] ; then echo "zstd without argument should have failed"; false; fi
+	./datagen -g80 | $(VALGRIND) ./zstd - -c > $(VOID)
+	./datagen -g16KB | $(VALGRIND) ./zstd -vf - -o $(VOID)
+	./datagen -g2930KB | $(VALGRIND) ./zstd -5 -vf - -o tmp
+	$(VALGRIND) ./zstd -vdf tmp -o $(VOID)
+	./datagen -g64MB | $(VALGRIND) ./zstd -vf - -o $(VOID)
 	@rm tmp
-	valgrind --leak-check=yes --error-exitcode=1 ./fuzzer -T1mn -t1
-	valgrind --leak-check=yes --error-exitcode=1 ./fullbench -i1
-	valgrind --leak-check=yes --error-exitcode=1 ./zbufftest -T1mn
+	$(VALGRIND) ./fuzzer -T1mn -t1
+	$(VALGRIND) ./fullbench -i1
+	$(VALGRIND) ./zbufftest -T1mn
 
 endif
diff --git a/programs/bench.c b/programs/bench.c
index 1c776c4e..37c1e9f7 100644
--- a/programs/bench.c
+++ b/programs/bench.c
@@ -1,6 +1,6 @@
 /*
-    bench.c - Demo module to benchmark open-source compression algorithms
-    Copyright (C) Yann Collet 2012-2015
+    bench.c - open-source compression benchmark module
+    Copyright (C) Yann Collet 2012-2016
 
     GPL v2 License
 
@@ -19,8 +19,8 @@
     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 
     You can contact the author at :
+    - zstd homepage : http://www.zstd.net
     - zstd source repository : https://github.com/Cyan4973/zstd
-    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
 */
 
 /* **************************************
@@ -40,32 +40,61 @@
 #  define _LARGEFILE64_SOURCE
 #endif
 
-/* S_ISREG & gettimeofday() are not supported by MSVC */
-#if defined(_MSC_VER) || defined(_WIN32)
-#  define BMK_LEGACY_TIMER 1
-#endif
-
 
 /* *************************************
 *  Includes
 ***************************************/
+#define _POSIX_C_SOURCE 199309L   /* before <time.h> - needed for nanosleep() */
 #include <stdlib.h>      /* malloc, free */
 #include <string.h>      /* memset */
 #include <stdio.h>       /* fprintf, fopen, ftello64 */
 #include <sys/types.h>   /* stat64 */
 #include <sys/stat.h>    /* stat64 */
+#include <time.h>        /* clock_t, nanosleep, clock, CLOCKS_PER_SEC */
 
-/* Use ftime() if gettimeofday() is not available */
-#if defined(BMK_LEGACY_TIMER)
-#  include <sys/timeb.h>  /* timeb, ftime */
+/* sleep : posix - windows - others */
+#if !defined(_WIN32) && (defined(__unix__) || defined(__unix) || (defined(__APPLE__) && defined(__MACH__)))
+#  include <unistd.h>
+#  include <sys/resource.h> /* setpriority */
+#  define BMK_sleep(s) sleep(s)
+#  define mili_sleep(mili) { struct timespec t; t.tv_sec=0; t.tv_nsec=mili*1000000ULL; nanosleep(&t, NULL); }
+#  define SET_HIGH_PRIORITY setpriority(PRIO_PROCESS, 0, -20)
+#elif defined(_WIN32)
+#  include <windows.h>
+#  define BMK_sleep(s) Sleep(1000*s)
+#  define mili_sleep(mili) Sleep(mili)
+#  define SET_HIGH_PRIORITY SetPriorityClass(GetCurrentProcess(), REALTIME_PRIORITY_CLASS)
 #else
-#  include <sys/time.h>   /* gettimeofday */
+#  define BMK_sleep(s)   /* disabled */
+#  define mili_sleep(mili) /* disabled */
+#  define SET_HIGH_PRIORITY /* disabled */
+#endif
+
+#if !defined(_WIN32) && (defined(__unix__) || defined(__unix) || (defined(__APPLE__) && defined(__MACH__)))
+   typedef clock_t BMK_time_t;
+#  define BMK_initTimer(ticksPerSecond) ticksPerSecond=0
+#  define BMK_getTime(x) x = clock()
+#  define BMK_getSpanTimeMicro(ticksPerSecond, clockStart, clockEnd) (1000000ULL * (clockEnd - clockStart) / CLOCKS_PER_SEC)
+#  define BMK_getSpanTimeNano(ticksPerSecond, clockStart, clockEnd) (1000000000ULL * (clockEnd - clockStart) / CLOCKS_PER_SEC)
+#elif defined(_WIN32)
+   typedef LARGE_INTEGER BMK_time_t;
+#  define BMK_initTimer(x) if (!QueryPerformanceFrequency(&x)) { fprintf(stderr, "ERROR: QueryPerformance not present\n"); }
+#  define BMK_getTime(x) QueryPerformanceCounter(&x)
+#  define BMK_getSpanTimeMicro(ticksPerSecond, clockStart, clockEnd) (1000000ULL*(clockEnd.QuadPart - clockStart.QuadPart)/ticksPerSecond.QuadPart)
+#  define BMK_getSpanTimeNano(ticksPerSecond, clockStart, clockEnd) (1000000000ULL*(clockEnd.QuadPart - clockStart.QuadPart)/ticksPerSecond.QuadPart)
+#else
+   typedef int BMK_time_t;
+#  define BMK_initTimer(ticksPerSecond) ticksPerSecond=0
+#  define BMK_getTimeMicro(clockStart) clockStart=1
+#  define BMK_getSpanTimeMicro(ticksPerSecond, clockStart, clockEnd) (TIMELOOP_S*1000000ULL+clockEnd-clockStart)
+#  define BMK_getSpanTimeNano(ticksPerSecond, clockStart, clockEnd) (TIMELOOP_S*1000000000ULL+clockEnd-clockStart)
 #endif
 
 #include "mem.h"
 #include "zstd_static.h"
+#include "zstd_internal.h" /* ZSTD_compressBegin_targetSrcSize */
+#include "datagen.h"       /* RDG_genBuffer */
 #include "xxhash.h"
-#include "datagen.h"      /* RDG_genBuffer */
 
 
 /* *************************************
@@ -75,23 +104,32 @@
 #  define S_ISREG(x) (((x) & S_IFMT) == S_IFREG)
 #endif
 
-#ifdef _MSC_VER
-#define snprintf sprintf_s
+#if defined(_MSC_VER)
+#  define snprintf sprintf_s
+#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L))
+  /* part of <stdio.h> */
+#else
+  extern int snprintf (char* s, size_t maxlen, const char* format, ...);   /* not declared in <stdio.h> when C version < c99 */
 #endif
 
 
 /* *************************************
 *  Constants
 ***************************************/
-#define NBLOOPS    3
-#define TIMELOOP   2500
+#ifndef ZSTD_VERSION
+#  define ZSTD_VERSION ""
+#endif
+
+#define NBLOOPS          3
+#define TIMELOOP_S       1
+#define ACTIVEPERIOD_S  70
+#define COOLPERIOD_S    10
 
 #define KB *(1 <<10)
 #define MB *(1 <<20)
 #define GB *(1U<<30)
 
 static const size_t maxMemory = (sizeof(size_t)==4)  ?  (2 GB - 64 MB) : (size_t)(1ULL << ((sizeof(size_t)*8)-31));
-#define DEFAULT_CHUNKSIZE   (4 MB)
 
 static U32 g_compressibilityDefault = 50;
 
@@ -124,62 +162,38 @@ static U32 g_displayLevel = 2;   /* 0 : no display;   1: errors;   2 : + result
 /* *************************************
 *  Benchmark Parameters
 ***************************************/
-static int nbIterations = NBLOOPS;
+static U32 g_nbIterations = NBLOOPS;
 static size_t g_blockSize = 0;
+int g_additionalParam = 0;
 
-void BMK_SetNbIterations(int nbLoops)
+void BMK_setNotificationLevel(unsigned level) { g_displayLevel=level; }
+
+void BMK_setAdditionalParam(int additionalParam) { g_additionalParam=additionalParam; }
+
+void BMK_SetNbIterations(unsigned nbLoops)
 {
-    nbIterations = nbLoops;
-    DISPLAY("- %i iterations -\n", nbIterations);
+    g_nbIterations = nbLoops;
+    DISPLAYLEVEL(2, "- %i iterations -\n", g_nbIterations);
 }
 
 void BMK_SetBlockSize(size_t blockSize)
 {
     g_blockSize = blockSize;
-    DISPLAY("using blocks of size %u KB \n", (U32)(blockSize>>10));
+    DISPLAYLEVEL(2, "using blocks of size %u KB \n", (U32)(blockSize>>10));
 }
 
 
 /* ********************************************************
 *  Private functions
 **********************************************************/
-
-#if defined(BMK_LEGACY_TIMER)
-
-static int BMK_GetMilliStart(void)
+/* returns time span in microseconds */
+static U64 BMK_clockSpan( BMK_time_t clockStart, BMK_time_t ticksPerSecond )
 {
-  /* Based on Legacy ftime()
-  *  Rolls over every ~ 12.1 days (0x100000/24/60/60)
-  *  Use GetMilliSpan to correct for rollover */
-  struct timeb tb;
-  int nCount;
-  ftime( &tb );
-  nCount = (int) (tb.millitm + (tb.time & 0xfffff) * 1000);
-  return nCount;
-}
+    BMK_time_t clockEnd;
 
-#else
-
-static int BMK_GetMilliStart(void)
-{
-  /* Based on newer gettimeofday()
-  *  Use GetMilliSpan to correct for rollover */
-  struct timeval tv;
-  int nCount;
-  gettimeofday(&tv, NULL);
-  nCount = (int) (tv.tv_usec/1000 + (tv.tv_sec & 0xfffff) * 1000);
-  return nCount;
-}
-
-#endif
-
-
-static int BMK_GetMilliSpan( int nTimeStart )
-{
-  int nSpan = BMK_GetMilliStart() - nTimeStart;
-  if ( nSpan < 0 )
-    nSpan += 0x100000 * 1000;
-  return nSpan;
+    (void)ticksPerSecond;
+    BMK_getTime(clockEnd);
+    return BMK_getSpanTimeMicro(ticksPerSecond, clockStart, clockEnd);
 }
 
 static U64 BMK_getFileSize(const char* infilename)
@@ -211,47 +225,55 @@ typedef struct
     size_t resSize;
 } blockParam_t;
 
+typedef struct
+{
+    double ratio;
+    size_t cSize;
+    double cSpeed;
+    double dSpeed;
+} benchResult_t;
+
+
 #define MIN(a,b) ((a)<(b) ? (a) : (b))
 #define MAX(a,b) ((a)>(b) ? (a) : (b))
 
 static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
                         const char* displayName, int cLevel,
                         const size_t* fileSizes, U32 nbFiles,
-                        const void* dictBuffer, size_t dictBufferSize)
+                        const void* dictBuffer, size_t dictBufferSize, benchResult_t *result)
 {
-    const size_t blockSize = (g_blockSize ? g_blockSize : srcSize) + (!srcSize);   /* avoid div by 0 */
-    const U32 maxNbBlocks = (U32) ((srcSize + (blockSize-1)) / blockSize) + nbFiles;
-    size_t largestBlockSize = 0;
+    size_t const blockSize = (g_blockSize ? g_blockSize : srcSize) + (!srcSize);   /* avoid div by 0 */
+    U32 const maxNbBlocks = (U32) ((srcSize + (blockSize-1)) / blockSize) + nbFiles;
     blockParam_t* const blockTable = (blockParam_t*) malloc(maxNbBlocks * sizeof(blockParam_t));
-    const size_t maxCompressedSize = ZSTD_compressBound(srcSize) + (maxNbBlocks * 1024);   /* add some room for safety */
+    size_t const maxCompressedSize = ZSTD_compressBound(srcSize) + (maxNbBlocks * 1024);   /* add some room for safety */
     void* const compressedBuffer = malloc(maxCompressedSize);
     void* const resultBuffer = malloc(srcSize);
     ZSTD_CCtx* refCtx = ZSTD_createCCtx();
     ZSTD_CCtx* ctx = ZSTD_createCCtx();
     ZSTD_DCtx* refDCtx = ZSTD_createDCtx();
     ZSTD_DCtx* dctx = ZSTD_createDCtx();
-    U64 crcOrig = XXH64(srcBuffer, srcSize, 0);
-    U32 nbBlocks = 0;
+    U32 nbBlocks;
+    BMK_time_t ticksPerSecond;
 
-    /* init */
-    if (strlen(displayName)>17) displayName += strlen(displayName)-17;   /* can only display 17 characters */
-
-    /* Memory allocation & restrictions */
+    /* checks */
     if (!compressedBuffer || !resultBuffer || !blockTable || !refCtx || !ctx || !refDCtx || !dctx)
         EXM_THROW(31, "not enough memory");
 
+    /* init */
+    if (strlen(displayName)>17) displayName += strlen(displayName)-17;   /* can only display 17 characters */
+    BMK_initTimer(ticksPerSecond);
+
     /* Init blockTable data */
-    {
-        U32 fileNb;
-        const char* srcPtr = (const char*)srcBuffer;
+    {   const char* srcPtr = (const char*)srcBuffer;
         char* cPtr = (char*)compressedBuffer;
         char* resPtr = (char*)resultBuffer;
-        for (fileNb=0; fileNb<nbFiles; fileNb++) {
+        U32 fileNb;
+        for (nbBlocks=0, fileNb=0; fileNb<nbFiles; fileNb++) {
             size_t remaining = fileSizes[fileNb];
-            U32 nbBlocksforThisFile = (U32)((remaining + (blockSize-1)) / blockSize);
-            U32 blockEnd = nbBlocks + nbBlocksforThisFile;
+            U32 const nbBlocksforThisFile = (U32)((remaining + (blockSize-1)) / blockSize);
+            U32 const blockEnd = nbBlocks + nbBlocksforThisFile;
             for ( ; nbBlocks<blockEnd; nbBlocks++) {
-                size_t thisBlockSize = MIN(remaining, blockSize);
+                size_t const thisBlockSize = MIN(remaining, blockSize);
                 blockTable[nbBlocks].srcPtr = srcPtr;
                 blockTable[nbBlocks].cPtr = cPtr;
                 blockTable[nbBlocks].resPtr = resPtr;
@@ -261,114 +283,137 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
                 cPtr += blockTable[nbBlocks].cRoom;
                 resPtr += thisBlockSize;
                 remaining -= thisBlockSize;
-                if (thisBlockSize > largestBlockSize) largestBlockSize = thisBlockSize;
     }   }   }
 
     /* warmimg up memory */
     RDG_genBuffer(compressedBuffer, maxCompressedSize, 0.10, 0.50, 1);
 
     /* Bench */
-    {
-        int loopNb;
-        size_t cSize = 0;
-        double fastestC = 100000000., fastestD = 100000000.;
-        double ratio = 0.;
+    {   double fastestC = 100000000., fastestD = 100000000.;
+        U64 const crcOrig = XXH64(srcBuffer, srcSize, 0);
         U64 crcCheck = 0;
+        BMK_time_t coolTime;
+        U32 testNb;
+        size_t cSize = 0;
+        double ratio = 0.;
 
-        DISPLAY("\r%79s\r", "");
-        for (loopNb = 1; loopNb <= nbIterations; loopNb++) {
-            int nbLoops;
-            int milliTime;
-            U32 blockNb;
+        BMK_getTime(coolTime);
+        DISPLAYLEVEL(2, "\r%79s\r", "");
+        for (testNb = 1; testNb <= (g_nbIterations + !g_nbIterations); testNb++) {
+            BMK_time_t clockStart, clockEnd;
+            U64 clockLoop = g_nbIterations ? TIMELOOP_S*1000000ULL : 10;
+
+            /* overheat protection */
+            if (BMK_clockSpan(coolTime, ticksPerSecond) > ACTIVEPERIOD_S*1000000ULL) {
+                DISPLAY("\rcooling down ...    \r");
+                BMK_sleep(COOLPERIOD_S);
+                BMK_getTime(coolTime);
+            }
 
             /* Compression */
-            DISPLAY("%2i-%-17.17s :%10u ->\r", loopNb, displayName, (U32)srcSize);
-            memset(compressedBuffer, 0xE5, maxCompressedSize);
+            DISPLAYLEVEL(2, "%2i-%-17.17s :%10u ->\r", testNb, displayName, (U32)srcSize);
+            memset(compressedBuffer, 0xE5, maxCompressedSize);  /* warm up and erase result buffer */
 
-            nbLoops = 0;
-            milliTime = BMK_GetMilliStart();
-            while (BMK_GetMilliStart() == milliTime);
-            milliTime = BMK_GetMilliStart();
-            while (BMK_GetMilliSpan(milliTime) < TIMELOOP) {
-                ZSTD_compressBegin_advanced(refCtx, dictBuffer, dictBufferSize, ZSTD_getParams(cLevel, MAX(dictBufferSize, largestBlockSize)));
-                for (blockNb=0; blockNb<nbBlocks; blockNb++) {
-                    size_t rSize = ZSTD_compress_usingPreparedCCtx(ctx, refCtx,
-                                        blockTable[blockNb].cPtr,  blockTable[blockNb].cRoom,
-                                        blockTable[blockNb].srcPtr,blockTable[blockNb].srcSize);
-                    if (ZSTD_isError(rSize)) EXM_THROW(1, "ZSTD_compress_usingPreparedCCtx() failed : %s", ZSTD_getErrorName(rSize));
-                    blockTable[blockNb].cSize = rSize;
-                }
-                nbLoops++;
-            }
-            milliTime = BMK_GetMilliSpan(milliTime);
+            mili_sleep(1); /* give processor time to other processes */
+            BMK_getTime(clockStart);
+            do { BMK_getTime(clockEnd); }
+            while (BMK_getSpanTimeNano(ticksPerSecond, clockStart, clockEnd) == 0);
+            BMK_getTime(clockStart);
+
+            {   U32 nbLoops;
+                for (nbLoops = 0 ; BMK_clockSpan(clockStart, ticksPerSecond) < clockLoop ; nbLoops++) {
+                    U32 blockNb;
+                    ZSTD_compressBegin_targetSrcSize(refCtx, dictBuffer, dictBufferSize, blockSize, cLevel);
+                  //  ZSTD_compressBegin_usingDict(refCtx, dictBuffer, dictBufferSize, cLevel);
+                    for (blockNb=0; blockNb<nbBlocks; blockNb++) {
+                        size_t const rSize = ZSTD_compress_usingPreparedCCtx(ctx, refCtx,
+                                            blockTable[blockNb].cPtr,  blockTable[blockNb].cRoom,
+                                            blockTable[blockNb].srcPtr,blockTable[blockNb].srcSize);
+                        if (ZSTD_isError(rSize)) EXM_THROW(1, "ZSTD_compress_usingPreparedCCtx() failed : %s", ZSTD_getErrorName(rSize));
+                        blockTable[blockNb].cSize = rSize;
+                }   }
+                {   U64 const clockSpan = BMK_clockSpan(clockStart, ticksPerSecond);
+                    if ((double)clockSpan < fastestC*nbLoops) fastestC = (double)clockSpan / nbLoops;
+            }   }
 
             cSize = 0;
-            for (blockNb=0; blockNb<nbBlocks; blockNb++)
-                cSize += blockTable[blockNb].cSize;
-            if ((double)milliTime < fastestC*nbLoops) fastestC = (double)milliTime / nbLoops;
+            { U32 blockNb; for (blockNb=0; blockNb<nbBlocks; blockNb++) cSize += blockTable[blockNb].cSize; }
             ratio = (double)srcSize / (double)cSize;
-            DISPLAY("%2i-%-17.17s :%10i ->%10i (%5.3f),%6.1f MB/s\r", loopNb, displayName, (int)srcSize, (int)cSize, ratio, (double)srcSize / fastestC / 1000.);
+            DISPLAYLEVEL(2, "%2i-%-17.17s :%10u ->%10u (%5.3f),%6.1f MB/s\r",
+                    testNb, displayName, (U32)srcSize, (U32)cSize, ratio,
+                    (double)srcSize / fastestC );
 
+            (void)fastestD; (void)crcOrig;   /*  unused when decompression disabled */
 #if 1
             /* Decompression */
             memset(resultBuffer, 0xD6, srcSize);  /* warm result buffer */
 
-            nbLoops = 0;
-            milliTime = BMK_GetMilliStart();
-            while (BMK_GetMilliStart() == milliTime);
-            milliTime = BMK_GetMilliStart();
+            mili_sleep(1); /* give processor time to other processes */
+            BMK_getTime(clockStart);
+            do { BMK_getTime(clockEnd); }
+            while (BMK_getSpanTimeNano(ticksPerSecond, clockStart, clockEnd) == 0);
+            BMK_getTime(clockStart);
 
-            for ( ; BMK_GetMilliSpan(milliTime) < TIMELOOP; nbLoops++) {
-                ZSTD_decompressBegin_usingDict(refDCtx, dictBuffer, dictBufferSize);
-                for (blockNb=0; blockNb<nbBlocks; blockNb++) {
-                    size_t regenSize = ZSTD_decompress_usingPreparedDCtx(dctx, refDCtx,
-                        blockTable[blockNb].resPtr, blockTable[blockNb].srcSize,
-                        blockTable[blockNb].cPtr, blockTable[blockNb].cSize);
-                    if (ZSTD_isError(regenSize)) {
-                        DISPLAY("ZSTD_decompress_usingPreparedDCtx() failed on block %u : %s",
-                                  blockNb, ZSTD_getErrorName(regenSize));
-                        goto _findError;
-                    }
-                    blockTable[blockNb].resSize = regenSize;
+            {   U32 nbLoops;
+                for (nbLoops = 0 ; BMK_clockSpan(clockStart, ticksPerSecond) < clockLoop ; nbLoops++) {
+                    U32 blockNb;
+                    ZSTD_decompressBegin_usingDict(refDCtx, dictBuffer, dictBufferSize);
+                    for (blockNb=0; blockNb<nbBlocks; blockNb++) {
+                        size_t const regenSize = ZSTD_decompress_usingPreparedDCtx(dctx, refDCtx,
+                            blockTable[blockNb].resPtr, blockTable[blockNb].srcSize,
+                            blockTable[blockNb].cPtr, blockTable[blockNb].cSize);
+                        if (ZSTD_isError(regenSize)) {
+                            DISPLAY("ZSTD_decompress_usingPreparedDCtx() failed on block %u : %s  \n",
+                                      blockNb, ZSTD_getErrorName(regenSize));
+                            clockLoop = 0;   /* force immediate test end */
+                            break;
+                        }
+                        blockTable[blockNb].resSize = regenSize;
+                }   }
+                {   U64 const clockSpan = BMK_clockSpan(clockStart, ticksPerSecond);
+                    if ((double)clockSpan < fastestD*nbLoops) fastestD = (double)clockSpan / nbLoops;
             }   }
 
-            milliTime = BMK_GetMilliSpan(milliTime);
-            if ((double)milliTime < fastestD*nbLoops) fastestD = (double)milliTime / nbLoops;
-            DISPLAY("%2i-%-17.17s :%10i ->%10i (%5.3f),%6.1f MB/s ,%6.1f MB/s\r", loopNb, displayName, (int)srcSize, (int)cSize, ratio, (double)srcSize / fastestC / 1000., (double)srcSize / fastestD / 1000.);
+            DISPLAYLEVEL(2, "%2i-%-17.17s :%10u ->%10u (%5.3f),%6.1f MB/s ,%6.1f MB/s\r",
+                    testNb, displayName, (U32)srcSize, (U32)cSize, ratio,
+                    (double)srcSize / fastestC,
+                    (double)srcSize / fastestD );
 
             /* CRC Checking */
-_findError:
-            crcCheck = XXH64(resultBuffer, srcSize, 0);
-            if (crcOrig!=crcCheck) {
-                size_t u;
-                DISPLAY("\n!!! WARNING !!! %14s : Invalid Checksum : %x != %x\n", displayName, (unsigned)crcOrig, (unsigned)crcCheck);
-                for (u=0; u<srcSize; u++) {
-                    if (((const BYTE*)srcBuffer)[u] != ((const BYTE*)resultBuffer)[u]) {
-                        U32 segNb, bNb, pos;
-                        size_t bacc = 0;
-                        printf("Decoding error at pos %u ", (U32)u);
-                        for (segNb = 0; segNb < nbBlocks; segNb++) {
-                            if (bacc + blockTable[segNb].srcSize > u) break;
-                            bacc += blockTable[segNb].srcSize;
+            {   crcCheck = XXH64(resultBuffer, srcSize, 0);
+                if (crcOrig!=crcCheck) {
+                    size_t u;
+                    DISPLAY("!!! WARNING !!! %14s : Invalid Checksum : %x != %x   \n", displayName, (unsigned)crcOrig, (unsigned)crcCheck);
+                    for (u=0; u<srcSize; u++) {
+                        if (((const BYTE*)srcBuffer)[u] != ((const BYTE*)resultBuffer)[u]) {
+                            U32 segNb, bNb, pos;
+                            size_t bacc = 0;
+                            DISPLAY("Decoding error at pos %u ", (U32)u);
+                            for (segNb = 0; segNb < nbBlocks; segNb++) {
+                                if (bacc + blockTable[segNb].srcSize > u) break;
+                                bacc += blockTable[segNb].srcSize;
+                            }
+                            pos = (U32)(u - bacc);
+                            bNb = pos / (128 KB);
+                            DISPLAY("(block %u, sub %u, pos %u) \n", segNb, bNb, pos);
+                            break;
                         }
-                        pos = (U32)(u - bacc);
-                        bNb = pos / (128 KB);
-                        printf("(block %u, sub %u, pos %u) \n", segNb, bNb, pos);
-                        break;
-                    }
-                    if (u==srcSize-1) {  /* should never happen */
-                        printf("no difference detected\n");
-                }   }
-                break;
-            }
+                        if (u==srcSize-1) {  /* should never happen */
+                            DISPLAY("no difference detected\n");
+                    }   }
+                    break;
+            }   }   /* CRC Checking */
 #endif
-        }
+        }   /* for (testNb = 1; testNb <= (g_nbIterations + !g_nbIterations); testNb++) */
 
-        if (crcOrig == crcCheck)
-            DISPLAY("%2i-%-17.17s :%10i ->%10i (%5.3f),%6.1f MB/s ,%6.1f MB/s \n", cLevel, displayName, (int)srcSize, (int)cSize, ratio, (double)srcSize / fastestC / 1000., (double)srcSize / fastestD / 1000.);
-        else
-            DISPLAY("%2i-\n", cLevel);
-    }
+        if (crcOrig == crcCheck) {
+            result->ratio = ratio;
+            result->cSize = cSize;
+            result->cSpeed = (double)srcSize / fastestC;
+            result->dSpeed = (double)srcSize / fastestD;
+        }
+        DISPLAYLEVEL(2, "%2i#\n", cLevel);
+    }   /* Bench */
 
     /* clean up */
     free(compressedBuffer);
@@ -383,39 +428,66 @@ _findError:
 
 static size_t BMK_findMaxMem(U64 requiredMem)
 {
-    size_t step = 64 MB;
+    size_t const step = 64 MB;
     BYTE* testmem = NULL;
 
     requiredMem = (((requiredMem >> 26) + 1) << 26);
-    requiredMem += 2 * step;
+    requiredMem += step;
     if (requiredMem > maxMemory) requiredMem = maxMemory;
 
-    while (!testmem) {
-        requiredMem -= step;
+    do {
         testmem = (BYTE*)malloc((size_t)requiredMem);
-    }
+        requiredMem -= step;
+    } while (!testmem);
+
     free(testmem);
-    return (size_t)(requiredMem - step);
+    return (size_t)(requiredMem);
 }
 
 static void BMK_benchCLevel(void* srcBuffer, size_t benchedSize,
-                            const char* displayName, int cLevel,
+                            const char* displayName, int cLevel, int cLevelLast,
                             const size_t* fileSizes, unsigned nbFiles,
                             const void* dictBuffer, size_t dictBufferSize)
 {
-    if (cLevel < 0) {
-        int l;
-        for (l=1; l <= -cLevel; l++)
-            BMK_benchMem(srcBuffer, benchedSize,
-                         displayName, l,
-                         fileSizes, nbFiles,
-                         dictBuffer, dictBufferSize);
-        return;
+    benchResult_t result, total;
+    int l;
+
+    SET_HIGH_PRIORITY;
+
+    const char* pch = strrchr(displayName, '\\'); /* Windows */
+    if (!pch) pch = strrchr(displayName, '/'); /* Linux */
+    if (pch) displayName = pch+1;
+
+    memset(&result, 0, sizeof(result));
+    memset(&total, 0, sizeof(total));
+
+    if (g_displayLevel == 1 && !g_additionalParam)
+        DISPLAY("bench %s: input %u bytes, %i iterations, %u KB blocks\n", ZSTD_VERSION, (U32)benchedSize, g_nbIterations, (U32)(g_blockSize>>10));
+
+    if (cLevelLast < cLevel) cLevelLast = cLevel;
+
+    for (l=cLevel; l <= cLevelLast; l++) {
+        BMK_benchMem(srcBuffer, benchedSize,
+                     displayName, l,
+                     fileSizes, nbFiles,
+                     dictBuffer, dictBufferSize, &result);
+        if (g_displayLevel == 1) {
+            if (g_additionalParam)
+                DISPLAY("%-3i%11i (%5.3f) %6.1f MB/s %6.1f MB/s  %s (param=%d)\n", -l, (int)result.cSize, result.ratio, result.cSpeed, result.dSpeed, displayName, g_additionalParam);
+            else
+                DISPLAY("%-3i%11i (%5.3f) %6.1f MB/s %6.1f MB/s  %s\n", -l, (int)result.cSize, result.ratio, result.cSpeed, result.dSpeed, displayName);
+            total.cSize += result.cSize;
+            total.cSpeed += result.cSpeed;
+            total.dSpeed += result.dSpeed;
+            total.ratio += result.ratio;
+    }   }
+    if (g_displayLevel == 1 && cLevelLast > cLevel) {
+        total.cSize /= 1+cLevelLast-cLevel;
+        total.cSpeed /= 1+cLevelLast-cLevel;
+        total.dSpeed /= 1+cLevelLast-cLevel;
+        total.ratio /= 1+cLevelLast-cLevel;
+        DISPLAY("avg%11i (%5.3f) %6.1f MB/s %6.1f MB/s  %s\n", (int)total.cSize, total.ratio, total.cSpeed, total.dSpeed, displayName);
     }
-    BMK_benchMem(srcBuffer, benchedSize,
-                 displayName, cLevel,
-                 fileSizes, nbFiles,
-                 dictBuffer, dictBufferSize);
 }
 
 static U64 BMK_getTotalFileSize(const char** fileNamesTable, unsigned nbFiles)
@@ -427,31 +499,32 @@ static U64 BMK_getTotalFileSize(const char** fileNamesTable, unsigned nbFiles)
     return total;
 }
 
+/*! BMK_loadFiles() :
+    Loads `buffer` with content of files listed within `fileNamesTable`.
+    At most, fills `buffer` entirely */
 static void BMK_loadFiles(void* buffer, size_t bufferSize,
                           size_t* fileSizes,
                           const char** fileNamesTable, unsigned nbFiles)
 {
-    BYTE* buff = (BYTE*)buffer;
     size_t pos = 0;
-    unsigned n;
 
+    unsigned n;
     for (n=0; n<nbFiles; n++) {
-        size_t readSize;
         U64 fileSize = BMK_getFileSize(fileNamesTable[n]);
-        FILE* f = fopen(fileNamesTable[n], "rb");
+        FILE* const f = fopen(fileNamesTable[n], "rb");
         if (f==NULL) EXM_THROW(10, "impossible to open file %s", fileNamesTable[n]);
         DISPLAYLEVEL(2, "Loading %s...       \r", fileNamesTable[n]);
-        if (fileSize > bufferSize-pos) fileSize = bufferSize-pos;
-        readSize = fread(buff+pos, 1, (size_t)fileSize, f);
-        if (readSize != (size_t)fileSize) EXM_THROW(11, "could not read %s", fileNamesTable[n]);
-        pos += readSize;
+        if (fileSize > bufferSize-pos) fileSize = bufferSize-pos, nbFiles=n;   /* buffer too small - stop after this file */
+        { size_t const readSize = fread(((char*)buffer)+pos, 1, (size_t)fileSize, f);
+          if (readSize != (size_t)fileSize) EXM_THROW(11, "could not read %s", fileNamesTable[n]);
+          pos += readSize; }
         fileSizes[n] = (size_t)fileSize;
         fclose(f);
     }
 }
 
 static void BMK_benchFileTable(const char** fileNamesTable, unsigned nbFiles,
-                               const char* dictFileName, int cLevel)
+                               const char* dictFileName, int cLevel, int cLevelLast)
 {
     void* srcBuffer;
     size_t benchedSize;
@@ -491,7 +564,7 @@ static void BMK_benchFileTable(const char** fileNamesTable, unsigned nbFiles,
     else displayName = fileNamesTable[0];
 
     BMK_benchCLevel(srcBuffer, benchedSize,
-                    displayName, cLevel,
+                    displayName, cLevel, cLevelLast,
                     fileSizes, nbFiles,
                     dictBuffer, dictBufferSize);
 
@@ -502,7 +575,7 @@ static void BMK_benchFileTable(const char** fileNamesTable, unsigned nbFiles,
 }
 
 
-static void BMK_syntheticTest(int cLevel, double compressibility)
+static void BMK_syntheticTest(int cLevel, int cLevelLast, double compressibility)
 {
     char name[20] = {0};
     size_t benchedSize = 10000000;
@@ -516,7 +589,7 @@ static void BMK_syntheticTest(int cLevel, double compressibility)
 
     /* Bench */
     snprintf (name, sizeof(name), "Synthetic %2u%%", (unsigned)(compressibility*100));
-    BMK_benchCLevel(srcBuffer, benchedSize, name, cLevel, &benchedSize, 1, NULL, 0);
+    BMK_benchCLevel(srcBuffer, benchedSize, name, cLevel, cLevelLast, &benchedSize, 1, NULL, 0);
 
     /* clean up */
     free(srcBuffer);
@@ -524,14 +597,14 @@ static void BMK_syntheticTest(int cLevel, double compressibility)
 
 
 int BMK_benchFiles(const char** fileNamesTable, unsigned nbFiles,
-                   const char* dictFileName, int cLevel)
+                   const char* dictFileName, int cLevel, int cLevelLast)
 {
-    double compressibility = (double)g_compressibilityDefault / 100;
+    double const compressibility = (double)g_compressibilityDefault / 100;
 
     if (nbFiles == 0)
-        BMK_syntheticTest(cLevel, compressibility);
+        BMK_syntheticTest(cLevel, cLevelLast, compressibility);
     else
-        BMK_benchFileTable(fileNamesTable, nbFiles, dictFileName, cLevel);
+        BMK_benchFileTable(fileNamesTable, nbFiles, dictFileName, cLevel, cLevelLast);
     return 0;
 }
 
diff --git a/programs/bench.h b/programs/bench.h
index 9ae83690..3a1ca3a2 100644
--- a/programs/bench.h
+++ b/programs/bench.h
@@ -27,10 +27,11 @@
 
 /* Main function */
 int BMK_benchFiles(const char** fileNamesTable, unsigned nbFiles,
-                   const char* dictFileName, int cLevel);
+                   const char* dictFileName, int cLevel, int cLevelLast);
 
 /* Set Parameters */
-void BMK_SetNbIterations(int nbLoops);
+void BMK_SetNbIterations(unsigned nbLoops);
 void BMK_SetBlockSize(size_t blockSize);
-
+void BMK_setAdditionalParam(int additionalParam);
+void BMK_setNotificationLevel(unsigned level);
 
diff --git a/programs/datagen.c b/programs/datagen.c
index ff3a8cdc..b7e33e3c 100644
--- a/programs/datagen.c
+++ b/programs/datagen.c
@@ -1,6 +1,6 @@
 /*
     datagen.c - compressible data generator test tool
-    Copyright (C) Yann Collet 2012-2015
+    Copyright (C) Yann Collet 2012-2016
 
     GPL v2 License
 
@@ -19,8 +19,8 @@
     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 
     You can contact the author at :
-   - ZSTD source repository : https://github.com/Cyan4973/zstd
-   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+    - zstd homepage : http://www.zstd.net/
+    - source repository : https://github.com/Cyan4973/zstd
 */
 
 /*-************************************
@@ -67,9 +67,6 @@
 **************************************/
 #define KB *(1 <<10)
 
-#define PRIME1   2654435761U
-#define PRIME2   2246822519U
-
 
 /*-************************************
 *  Local types
@@ -87,9 +84,11 @@ typedef BYTE litDistribTable[LTSIZE];
 #define RDG_rotl32(x,r) ((x << r) | (x >> (32 - r)))
 static unsigned int RDG_rand(U32* src)
 {
+    static const U32 prime1 = 2654435761U;
+    static const U32 prime2 = 2246822519U;
     U32 rand32 = *src;
-    rand32 *= PRIME1;
-    rand32 ^= PRIME2;
+    rand32 *= prime1;
+    rand32 ^= prime2;
     rand32  = RDG_rotl32(rand32, 13);
     *src = rand32;
     return rand32;
@@ -103,7 +102,7 @@ static void RDG_fillLiteralDistrib(litDistribTable lt, double ld)
     BYTE firstChar = '(';
     BYTE lastChar = '}';
 
-    if (ld==0.0) {
+    if (ld<=0.0) {
         character = 0;
         firstChar = 0;
         lastChar =255;
@@ -122,7 +121,7 @@ static void RDG_fillLiteralDistrib(litDistribTable lt, double ld)
 
 static BYTE RDG_genChar(U32* seed, const litDistribTable lt)
 {
-    U32 id = RDG_rand(seed) & LTMASK;
+    U32 const id = RDG_rand(seed) & LTMASK;
     return (lt[id]);
 }
 
@@ -194,28 +193,28 @@ void RDG_genBuffer(void* buffer, size_t size, double matchProba, double litProba
 
 #define RDG_DICTSIZE  (32 KB)
 #define RDG_BLOCKSIZE (128 KB)
+#define MIN(a,b)  ( (a) < (b) ? (a) : (b) )
 void RDG_genStdout(unsigned long long size, double matchProba, double litProba, unsigned seed)
 {
     BYTE* buff = (BYTE*)malloc(RDG_DICTSIZE + RDG_BLOCKSIZE);
     U64 total = 0;
-    size_t genBlockSize = RDG_BLOCKSIZE;
-    litDistribTable lt;
+    litDistribTable ldt;
 
     /* init */
     if (buff==NULL) { fprintf(stdout, "not enough memory\n"); exit(1); }
-    if (litProba==0.0) litProba = matchProba / 4.5;
-    RDG_fillLiteralDistrib(lt, litProba);
+    if (litProba<=0.0) litProba = matchProba / 4.5;
+    RDG_fillLiteralDistrib(ldt, litProba);
     SET_BINARY_MODE(stdout);
 
     /* Generate initial dict */
-    RDG_genBlock(buff, RDG_DICTSIZE, 0, matchProba, lt, &seed);
+    RDG_genBlock(buff, RDG_DICTSIZE, 0, matchProba, ldt, &seed);
 
     /* Generate compressible data */
     while (total < size) {
-        RDG_genBlock(buff, RDG_DICTSIZE+RDG_BLOCKSIZE, RDG_DICTSIZE, matchProba, lt, &seed);
-        if (size-total < RDG_BLOCKSIZE) genBlockSize = (size_t)(size-total);
+        size_t const genBlockSize = (size_t) (MIN (RDG_BLOCKSIZE, size-total));
+        RDG_genBlock(buff, RDG_DICTSIZE+RDG_BLOCKSIZE, RDG_DICTSIZE, matchProba, ldt, &seed);
         total += genBlockSize;
-        fwrite(buff, 1, genBlockSize, stdout);
+        { size_t const unused = fwrite(buff, 1, genBlockSize, stdout); (void)unused; }
         /* update dict */
         memcpy(buff, buff + RDG_BLOCKSIZE, RDG_DICTSIZE);
     }
diff --git a/programs/dibio.c b/programs/dibio.c
index 646fe2c6..17f89586 100644
--- a/programs/dibio.c
+++ b/programs/dibio.c
@@ -259,7 +259,7 @@ int DiB_trainFromFiles(const char* dictFileName, unsigned maxDictSize,
                         srcBuffer, fileSizes, nbFiles,
                         params);
     if (ZDICT_isError(dictSize)) {
-        DISPLAYLEVEL(1, "dictionary training failed : %s", ZDICT_getErrorName(dictSize));   /* should not happen */
+        DISPLAYLEVEL(1, "dictionary training failed : %s \n", ZDICT_getErrorName(dictSize));   /* should not happen */
         result = 1;
         goto _cleanup;
     }
diff --git a/programs/fileio.c b/programs/fileio.c
index 028c7db4..ec0d19f2 100644
--- a/programs/fileio.c
+++ b/programs/fileio.c
@@ -64,7 +64,7 @@
 #include <sys/stat.h>   /* stat64 */
 #include "mem.h"
 #include "fileio.h"
-#include "zstd_static.h"   /* ZSTD_magicNumber */
+#include "zstd_static.h"   /* ZSTD_magicNumber, ZSTD_frameHeaderSize_max */
 #include "zbuff_static.h"
 
 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1)
@@ -126,6 +126,7 @@
 #define DISPLAY(...)         fprintf(stderr, __VA_ARGS__)
 #define DISPLAYLEVEL(l, ...) if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); }
 static U32 g_displayLevel = 2;   /* 0 : no display;   1: errors;   2 : + result + interaction + warnings;   3 : + progression;   4 : + information */
+void FIO_setNotificationLevel(unsigned level) { g_displayLevel=level; }
 
 #define DISPLAYUPDATE(l, ...) if (g_displayLevel>=l) { \
             if ((FIO_GetMilliSpan(g_time) > refreshRate) || (g_displayLevel>=4)) \
@@ -142,7 +143,8 @@ static clock_t g_time = 0;
 ***************************************/
 static U32 g_overwrite = 0;
 void FIO_overwriteMode(void) { g_overwrite=1; }
-void FIO_setNotificationLevel(unsigned level) { g_displayLevel=level; }
+static U32 g_maxWLog = 23;
+void FIO_setMaxWLog(unsigned maxWLog) { g_maxWLog = maxWLog; }
 
 
 /*-*************************************
@@ -239,10 +241,10 @@ static FILE* FIO_openDstFile(const char* dstFileName)
 }
 
 
-/*!FIO_loadFile
-*  creates a buffer, pointed by *bufferPtr,
-*  loads "filename" content into it
-*  up to MAX_DICT_SIZE bytes
+/*! FIO_loadFile() :
+*   creates a buffer, pointed by `*bufferPtr`,
+*   loads `filename` content into it,
+*   up to MAX_DICT_SIZE bytes
 */
 static size_t FIO_loadFile(void** bufferPtr, const char* fileName)
 {
@@ -274,7 +276,7 @@ static size_t FIO_loadFile(void** bufferPtr, const char* fileName)
 }
 
 
-/* **********************************************************************
+/*-**********************************************************************
 *  Compression
 ************************************************************************/
 typedef struct {
@@ -321,7 +323,7 @@ static void FIO_freeCResources(cRess_t ress)
 
 
 /*! FIO_compressFilename_internal() :
- *  same as FIO_compressFilename_extRess(), with ress.desFile already opened
+ *  same as FIO_compressFilename_extRess(), with `ress.desFile` already opened.
  *  @return : 0 : compression completed correctly,
  *            1 : missing or pb opening srcFileName
  */
@@ -331,24 +333,28 @@ static int FIO_compressFilename_internal(cRess_t ress,
 {
     FILE* srcFile = ress.srcFile;
     FILE* dstFile = ress.dstFile;
-    U64 filesize = 0;
+    U64 readsize = 0;
     U64 compressedfilesize = 0;
     size_t dictSize = ress.dictBufferSize;
-    size_t sizeCheck, errorCode;
+    size_t sizeCheck;
+    ZSTD_parameters params;
+    U64 const fileSize = FIO_getFileSize(srcFileName);
 
     /* init */
-    filesize = MAX(FIO_getFileSize(srcFileName),dictSize);
-    errorCode = ZBUFF_compressInit_advanced(ress.ctx, ress.dictBuffer, ress.dictBufferSize, ZSTD_getParams(cLevel, filesize));
-    if (ZBUFF_isError(errorCode)) EXM_THROW(21, "Error initializing compression : %s", ZBUFF_getErrorName(errorCode));
+    params.cParams = ZSTD_getCParams(cLevel, fileSize, dictSize);
+    params.fParams.contentSizeFlag = 1;
+    if (g_maxWLog) if (params.cParams.windowLog > g_maxWLog) params.cParams.windowLog = g_maxWLog;
+    { size_t const errorCode = ZBUFF_compressInit_advanced(ress.ctx, ress.dictBuffer, ress.dictBufferSize, params, fileSize);
+      if (ZBUFF_isError(errorCode)) EXM_THROW(21, "Error initializing compression : %s", ZBUFF_getErrorName(errorCode)); }
 
     /* Main compression loop */
-    filesize = 0;
+    readsize = 0;
     while (1) {
         /* Fill input Buffer */
-        size_t inSize = fread(ress.srcBuffer, (size_t)1, ress.srcBufferSize, srcFile);
+        size_t const inSize = fread(ress.srcBuffer, (size_t)1, ress.srcBufferSize, srcFile);
         if (inSize==0) break;
-        filesize += inSize;
-        DISPLAYUPDATE(2, "\rRead : %u MB  ", (U32)(filesize>>20));
+        readsize += inSize;
+        DISPLAYUPDATE(2, "\rRead : %u MB  ", (U32)(readsize>>20));
 
         {   /* Compress using buffered streaming */
             size_t usedInSize = inSize;
@@ -365,13 +371,12 @@ static int FIO_compressFilename_internal(cRess_t ress,
             if (sizeCheck!=cSize) EXM_THROW(25, "Write error : cannot write compressed block into %s", dstFileName);
             compressedfilesize += cSize;
         }
-        DISPLAYUPDATE(2, "\rRead : %u MB  ==> %.2f%%   ", (U32)(filesize>>20), (double)compressedfilesize/filesize*100);
+        DISPLAYUPDATE(2, "\rRead : %u MB  ==> %.2f%%   ", (U32)(readsize>>20), (double)compressedfilesize/readsize*100);
     }
 
     /* End of Frame */
-    {
-        size_t cSize = ress.dstBufferSize;
-        size_t result = ZBUFF_compressEnd(ress.ctx, ress.dstBuffer, &cSize);
+    {   size_t cSize = ress.dstBufferSize;
+        size_t const result = ZBUFF_compressEnd(ress.ctx, ress.dstBuffer, &cSize);
         if (result!=0) EXM_THROW(26, "Compression error : cannot create frame end");
 
         sizeCheck = fwrite(ress.dstBuffer, 1, cSize, dstFile);
@@ -382,14 +387,14 @@ static int FIO_compressFilename_internal(cRess_t ress,
     /* Status */
     DISPLAYLEVEL(2, "\r%79s\r", "");
     DISPLAYLEVEL(2,"Compressed %llu bytes into %llu bytes ==> %.2f%%\n",
-        (unsigned long long) filesize, (unsigned long long) compressedfilesize, (double)compressedfilesize/filesize*100);
+        (unsigned long long)readsize, (unsigned long long) compressedfilesize, (double)compressedfilesize/readsize*100);
 
     return 0;
 }
 
 
 /*! FIO_compressFilename_internal() :
- *  same as FIO_compressFilename_extRess(), with ress.desFile already opened
+ *  same as FIO_compressFilename_extRess(), with ress.destFile already opened (typically stdout)
  *  @return : 0 : compression completed correctly,
  *            1 : missing or pb opening srcFileName
  */
@@ -427,6 +432,7 @@ static int FIO_compressFilename_extRess(cRess_t ress,
     if (ress.dstFile==0) { fclose(ress.srcFile); return 1; }
 
     result = FIO_compressFilename_internal(ress, dstFileName, srcFileName, cLevel);
+    if (result!=0) remove(dstFileName);   /* remove operation artefact */
 
     fclose(ress.srcFile);   /* no pb to expect : only reading */
     if (fclose(ress.dstFile)) EXM_THROW(28, "Write error : cannot properly close %s", dstFileName);
@@ -482,7 +488,7 @@ int FIO_compressMultipleFilenames(const char** inFileNamesTable, unsigned nbFile
         ress.dstFile = stdout;
         for (u=0; u<nbFiles; u++)
             missed_files += FIO_compressFilename_srcFile(ress, stdoutmark,
-                                                          inFileNamesTable[u], compressionLevel);
+                                                         inFileNamesTable[u], compressionLevel);
         if (fclose(ress.dstFile)) EXM_THROW(29, "Write error : cannot properly close %s", stdoutmark);
     } else {
         for (u=0; u<nbFiles; u++) {
@@ -547,35 +553,46 @@ static void FIO_freeDResources(dRess_t ress)
 }
 
 
+/** FIO_decompressFrame() :
+    @return : size of decoded frame
+*/
 unsigned long long FIO_decompressFrame(dRess_t ress,
                                        FILE* foutput, FILE* finput, size_t alreadyLoaded)
 {
     U64    frameSize = 0;
-    size_t readSize=alreadyLoaded;
+    size_t readSize;
+
+    ZBUFF_decompressInitDictionary(ress.dctx, ress.dictBuffer, ress.dictBufferSize);
+
+    /* Complete Header loading */
+    {   size_t const toLoad = ZSTD_frameHeaderSize_max - alreadyLoaded;   /* assumption : alreadyLoaded <= ZSTD_frameHeaderSize_max */
+        size_t const checkSize = fread(((char*)ress.srcBuffer) + alreadyLoaded, 1, toLoad, finput);
+        if (checkSize != toLoad) EXM_THROW(32, "Read error");   /* assumption : srcSize >= ZSTD_frameHeaderSize_max */
+    }
+    readSize = ZSTD_frameHeaderSize_max;
 
     /* Main decompression Loop */
-    ZBUFF_decompressInitDictionary(ress.dctx, ress.dictBuffer, ress.dictBufferSize);
     while (1) {
         /* Decode */
-        size_t sizeCheck;
         size_t inSize=readSize, decodedSize=ress.dstBufferSize;
         size_t toRead = ZBUFF_decompressContinue(ress.dctx, ress.dstBuffer, &decodedSize, ress.srcBuffer, &inSize);
         if (ZBUFF_isError(toRead)) EXM_THROW(36, "Decoding error : %s", ZBUFF_getErrorName(toRead));
         readSize -= inSize;
 
         /* Write block */
-        sizeCheck = fwrite(ress.dstBuffer, 1, decodedSize, foutput);
-        if (sizeCheck != decodedSize) EXM_THROW(37, "Write error : unable to write data block to destination file");
+        { size_t const sizeCheck = fwrite(ress.dstBuffer, 1, decodedSize, foutput);
+          if (sizeCheck != decodedSize) EXM_THROW(37, "Write error : unable to write data block into destination"); }
         frameSize += decodedSize;
         DISPLAYUPDATE(2, "\rDecoded : %u MB...     ", (U32)(frameSize>>20) );
 
-        if (toRead == 0) break;
+        if (toRead == 0) break;   /* end of frame */
         if (readSize) EXM_THROW(38, "Decoding error : should consume entire input");
 
         /* Fill input buffer */
         if (toRead > ress.srcBufferSize) EXM_THROW(34, "too large block");
         readSize = fread(ress.srcBuffer, 1, toRead, finput);
-        if (readSize != toRead) EXM_THROW(35, "Read error");
+        if (readSize != toRead)
+            EXM_THROW(35, "Read error");
     }
 
     return frameSize;
@@ -596,10 +613,9 @@ static int FIO_decompressSrcFile(dRess_t ress, const char* srcFileName)
 
     /* for each frame */
     for ( ; ; ) {
-        size_t sizeCheck;
         /* check magic number -> version */
-        size_t toRead = 4;
-        sizeCheck = fread(ress.srcBuffer, (size_t)1, toRead, srcFile);
+        size_t const toRead = 4;
+        size_t const sizeCheck = fread(ress.srcBuffer, (size_t)1, toRead, srcFile);
         if (sizeCheck==0) break;   /* no more input */
         if (sizeCheck != toRead) EXM_THROW(31, "zstd: %s read error : cannot read header", srcFileName);
 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1)
@@ -633,13 +649,17 @@ static int FIO_decompressSrcFile(dRess_t ress, const char* srcFileName)
 static int FIO_decompressFile_extRess(dRess_t ress,
                                       const char* dstFileName, const char* srcFileName)
 {
+    int result;
     ress.dstFile = FIO_openDstFile(dstFileName);
     if (ress.dstFile==0) return 1;
 
-    FIO_decompressSrcFile(ress, srcFileName);
+    result = FIO_decompressSrcFile(ress, srcFileName);
+    if (result != 0) {
+      remove(dstFileName);
+    }
 
     if (fclose(ress.dstFile)) EXM_THROW(38, "Write error : cannot properly close %s", dstFileName);
-    return 0;
+    return result;
 }
 
 
diff --git a/programs/fileio.h b/programs/fileio.h
index ee3cf227..d5aae449 100644
--- a/programs/fileio.h
+++ b/programs/fileio.h
@@ -41,14 +41,15 @@ extern "C" {
 #endif
 
 
-/* *************************************
+/*-*************************************
 *  Parameters
 ***************************************/
 void FIO_overwriteMode(void);
 void FIO_setNotificationLevel(unsigned level);
+void FIO_setMaxWLog(unsigned maxWLog);   /**< if `maxWLog` == 0, no max enforced */
 
 
-/* *************************************
+/*-*************************************
 *  Single File functions
 ***************************************/
 /** FIO_compressFilename() :
@@ -60,7 +61,7 @@ int FIO_compressFilename (const char* outfilename, const char* infilename, const
 int FIO_decompressFilename (const char* outfilename, const char* infilename, const char* dictFileName);
 
 
-/* *************************************
+/*-*************************************
 *  Multiple File functions
 ***************************************/
 /** FIO_compressMultipleFilenames() :
diff --git a/programs/fullbench.c b/programs/fullbench.c
index 0eeb88e8..d9bce181 100644
--- a/programs/fullbench.c
+++ b/programs/fullbench.c
@@ -1,6 +1,6 @@
 /*
     fullbench.c - Detailed bench program for zstd
-    Copyright (C) Yann Collet 2014-2015
+    Copyright (C) Yann Collet 2014-2016
 
     GPL v2 License
 
@@ -19,11 +19,10 @@
     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 
     You can contact the author at :
-    - zstd source repository : https://github.com/Cyan4973/zstd
-    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+    - zstd homepage : http://www.zstd.net
 */
 
-/**************************************
+/*_************************************
 *  Compiler Options
 **************************************/
 /* Disable some Visual warning messages */
@@ -38,13 +37,8 @@
 #  define _LARGEFILE64_SOURCE
 #endif
 
-/* S_ISREG & gettimeofday() are not supported by MSVC */
-#if defined(_MSC_VER) || defined(_WIN32)
-#  define BMK_LEGACY_TIMER 1
-#endif
 
-
-/**************************************
+/*_************************************
 *  Includes
 **************************************/
 #include <stdlib.h>       /* malloc */
@@ -52,21 +46,16 @@
 #include <sys/types.h>    /* stat64 */
 #include <sys/stat.h>     /* stat64 */
 #include <string.h>       /* strcmp */
-
-/* Use ftime() if gettimeofday() is not available on your target */
-#if defined(BMK_LEGACY_TIMER)
-#  include <sys/timeb.h>  /* timeb, ftime */
-#else
-#  include <sys/time.h>   /* gettimeofday */
-#endif
+#include <time.h>         /* clock_t, clock, CLOCKS_PER_SEC */
 
 #include "mem.h"
 #include "zstd_static.h"
 #include "fse_static.h"
+#include "zbuff.h"
 #include "datagen.h"
 
 
-/**************************************
+/*_************************************
 *  Compiler Options
 **************************************/
 /* S_ISREG & gettimeofday() are not supported by MSVC */
@@ -75,7 +64,7 @@
 #endif
 
 
-/**************************************
+/*_************************************
 *  Constants
 **************************************/
 #define PROGRAM_DESCRIPTION "Zstandard speed analyzer"
@@ -90,99 +79,63 @@
 #define MB *(1<<20)
 
 #define NBLOOPS    6
-#define TIMELOOP   2500
+#define TIMELOOP_S 2
 
 #define KNUTH      2654435761U
 #define MAX_MEM    (1984 MB)
-#define DEFAULT_CHUNKSIZE   (4<<20)
 
 #define COMPRESSIBILITY_DEFAULT 0.50
-static const size_t sampleSize = 10000000;
+static const size_t g_sampleSize = 10000000;
 
 
-/**************************************
+/*_************************************
 *  Macros
 **************************************/
 #define DISPLAY(...)  fprintf(stderr, __VA_ARGS__)
 
 
-/**************************************
+/*_************************************
 *  Benchmark Parameters
 **************************************/
-static int nbIterations = NBLOOPS;
+static U32 g_nbIterations = NBLOOPS;
 static double g_compressibility = COMPRESSIBILITY_DEFAULT;
 
-void BMK_SetNbIterations(int nbLoops)
+static void BMK_SetNbIterations(U32 nbLoops)
 {
-    nbIterations = nbLoops;
-    DISPLAY("- %i iterations -\n", nbIterations);
+    g_nbIterations = nbLoops;
+    DISPLAY("- %i iterations -\n", g_nbIterations);
 }
 
 
-/*********************************************************
+/*_*******************************************************
 *  Private functions
 *********************************************************/
-
-#if defined(BMK_LEGACY_TIMER)
-
-static int BMK_GetMilliStart(void)
+static clock_t BMK_clockSpan( clock_t clockStart )
 {
-  /* Based on Legacy ftime()
-  *  Rolls over every ~ 12.1 days (0x100000/24/60/60)
-  *  Use GetMilliSpan to correct for rollover */
-  struct timeb tb;
-  int nCount;
-  ftime( &tb );
-  nCount = (int) (tb.millitm + (tb.time & 0xfffff) * 1000);
-  return nCount;
-}
-
-#else
-
-static int BMK_GetMilliStart(void)
-{
-  /* Based on newer gettimeofday()
-  *  Use GetMilliSpan to correct for rollover */
-  struct timeval tv;
-  int nCount;
-  gettimeofday(&tv, NULL);
-  nCount = (int) (tv.tv_usec/1000 + (tv.tv_sec & 0xfffff) * 1000);
-  return nCount;
-}
-
-#endif
-
-
-static int BMK_GetMilliSpan( int nTimeStart )
-{
-  int nSpan = BMK_GetMilliStart() - nTimeStart;
-  if ( nSpan < 0 )
-    nSpan += 0x100000 * 1000;
-  return nSpan;
+    return clock() - clockStart;   /* works even if overflow, span limited to <= ~30mn */
 }
 
 
 static size_t BMK_findMaxMem(U64 requiredMem)
 {
-    size_t step = 64 MB;
-    BYTE* testmem=NULL;
+    size_t const step = 64 MB;
+    void* testmem = NULL;
 
     requiredMem = (((requiredMem >> 26) + 1) << 26);
     if (requiredMem > MAX_MEM) requiredMem = MAX_MEM;
 
-    requiredMem += 2*step;
-    while (!testmem)
-    {
+    requiredMem += step;
+    do {
+        testmem = malloc ((size_t)requiredMem);
         requiredMem -= step;
-        testmem = (BYTE*) malloc ((size_t)requiredMem);
-    }
+    } while (!testmem);
 
     free (testmem);
-    return (size_t) (requiredMem - step);
+    return (size_t) requiredMem;
 }
 
 
-static U64 BMK_GetFileSize(char* infilename)
+static U64 BMK_GetFileSize(const char* infilename)
 {
     int r;
 #if defined(_MSC_VER)
@@ -197,67 +150,118 @@ static U64 BMK_GetFileSize(char* infilename)
 }
 
 
-/*********************************************************
+/*_*******************************************************
 *  Benchmark wrappers
 *********************************************************/
 typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t;
-typedef struct
-{
+typedef struct {
     blockType_t blockType;
     U32 unusedBits;
     U32 origSize;
 } blockProperties_t;
 
-static size_t g_cSize = 0;
-static ZSTD_DCtx* g_dctxPtr = NULL;
-
-extern size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr);
-extern size_t ZSTD_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLengthPtr, FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb, const void* src, size_t srcSize);
-
 size_t local_ZSTD_compress(void* dst, size_t dstSize, void* buff2, const void* src, size_t srcSize)
 {
     (void)buff2;
     return ZSTD_compress(dst, dstSize, src, srcSize, 1);
 }
 
+static size_t g_cSize = 0;
 size_t local_ZSTD_decompress(void* dst, size_t dstSize, void* buff2, const void* src, size_t srcSize)
 {
     (void)src; (void)srcSize;
     return ZSTD_decompress(dst, dstSize, buff2, g_cSize);
 }
 
+static ZSTD_DCtx* g_zdc = NULL;
 extern size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* ctx, const void* src, size_t srcSize);
 size_t local_ZSTD_decodeLiteralsBlock(void* dst, size_t dstSize, void* buff2, const void* src, size_t srcSize)
 {
     (void)src; (void)srcSize; (void)dst; (void)dstSize;
-    return ZSTD_decodeLiteralsBlock((ZSTD_DCtx*)g_dctxPtr, buff2, g_cSize);
+    return ZSTD_decodeLiteralsBlock((ZSTD_DCtx*)g_zdc, buff2, g_cSize);
 }
 
+extern size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr);
+extern size_t ZSTD_decodeSeqHeaders(int* nbSeq, FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb, U32 tableRepeatFlag, const void* src, size_t srcSize);
 size_t local_ZSTD_decodeSeqHeaders(void* dst, size_t dstSize, void* buff2, const void* src, size_t srcSize)
 {
     U32 DTableML[FSE_DTABLE_SIZE_U32(10)], DTableLL[FSE_DTABLE_SIZE_U32(10)], DTableOffb[FSE_DTABLE_SIZE_U32(9)];   /* MLFSELog, LLFSELog and OffFSELog are not public values */
-    const BYTE* dumps;
-    size_t length;
     int nbSeq;
     (void)src; (void)srcSize; (void)dst; (void)dstSize;
-    return ZSTD_decodeSeqHeaders(&nbSeq, &dumps, &length, DTableLL, DTableML, DTableOffb, buff2, g_cSize);
+    return ZSTD_decodeSeqHeaders(&nbSeq, DTableLL, DTableML, DTableOffb, 0, buff2, g_cSize);
 }
 
 
+static ZBUFF_CCtx* g_zbcc = NULL;
+size_t local_ZBUFF_compress(void* dst, size_t dstCapacity, void* buff2, const void* src, size_t srcSize)
+{
+    size_t compressedSize;
+    size_t srcRead = srcSize, dstWritten = dstCapacity;
+    (void)buff2;
+    ZBUFF_compressInit(g_zbcc, 1);
+    ZBUFF_compressContinue(g_zbcc, dst, &dstWritten, src, &srcRead);
+    compressedSize = dstWritten;
+    dstWritten = dstCapacity-compressedSize;
+    ZBUFF_compressEnd(g_zbcc, ((char*)dst)+compressedSize, &dstWritten);
+    compressedSize += dstWritten;
+    return compressedSize;
+}
 
-/*********************************************************
+static ZBUFF_DCtx* g_zbdc = NULL;
+static size_t local_ZBUFF_decompress(void* dst, size_t dstCapacity, void* buff2, const void* src, size_t srcSize)
+{
+    size_t srcRead = g_cSize, dstWritten = dstCapacity;
+    (void)src; (void)srcSize;
+    ZBUFF_decompressInit(g_zbdc);
+    ZBUFF_decompressContinue(g_zbdc, dst, &dstWritten, buff2, &srcRead);
+    return dstWritten;
+}
+
+static ZSTD_CCtx* g_zcc = NULL;
+size_t local_ZSTD_compressContinue(void* dst, size_t dstCapacity, void* buff2, const void* src, size_t srcSize)
+{
+    size_t compressedSize;
+    (void)buff2;
+    ZSTD_compressBegin(g_zcc, 1);
+    compressedSize = ZSTD_compressContinue(g_zcc, dst, dstCapacity, src, srcSize);
+    compressedSize += ZSTD_compressEnd(g_zcc, ((char*)dst)+compressedSize, dstCapacity-compressedSize);
+    return compressedSize;
+}
+
+size_t local_ZSTD_decompressContinue(void* dst, size_t dstCapacity, void* buff2, const void* src, size_t srcSize)
+{
+    size_t regeneratedSize = 0;
+    const BYTE* ip = (const BYTE*)buff2;
+    const BYTE* const iend = ip + g_cSize;
+    BYTE* op = (BYTE*)dst;
+    size_t remainingCapacity = dstCapacity;
+
+    (void)src; (void)srcSize;
+    ZSTD_decompressBegin(g_zdc);
+    while (ip < iend) {
+        size_t const iSize = ZSTD_nextSrcSizeToDecompress(g_zdc);
+        size_t const decodedSize = ZSTD_decompressContinue(g_zdc, op, remainingCapacity, ip, iSize);
+        ip += iSize;
+        regeneratedSize += decodedSize;
+        op += decodedSize;
+        remainingCapacity -= decodedSize;
+    }
+
+    return regeneratedSize;
+}
+
+
+/*_*******************************************************
 *  Bench functions
 *********************************************************/
-size_t benchMem(void* src, size_t srcSize, U32 benchNb)
+static size_t benchMem(const void* src, size_t srcSize, U32 benchNb)
 {
     BYTE*  dstBuff;
     size_t dstBuffSize;
     BYTE*  buff2;
-    int loopNb;
     const char* benchName;
     size_t (*benchFunction)(void* dst, size_t dstSize, void* verifBuff, const void* src, size_t srcSize);
     double bestTime = 100000000.;
-    size_t errorCode = 0;
 
     /* Selection */
     switch(benchNb)
@@ -265,15 +269,27 @@ size_t benchMem(void* src, size_t srcSize, U32 benchNb)
     case 1:
         benchFunction = local_ZSTD_compress; benchName = "ZSTD_compress";
         break;
-    case 11:
+    case 2:
         benchFunction = local_ZSTD_decompress; benchName = "ZSTD_decompress";
         break;
+    case 11:
+        benchFunction = local_ZSTD_compressContinue; benchName = "ZSTD_compressContinue";
+        break;
+    case 12:
+        benchFunction = local_ZSTD_decompressContinue; benchName = "ZSTD_decompressContinue";
+        break;
     case 31:
         benchFunction = local_ZSTD_decodeLiteralsBlock; benchName = "ZSTD_decodeLiteralsBlock";
         break;
     case 32:
         benchFunction = local_ZSTD_decodeSeqHeaders; benchName = "ZSTD_decodeSeqHeaders";
         break;
+    case 41:
+        benchFunction = local_ZBUFF_compress; benchName = "ZBUFF_compressContinue";
+        break;
+    case 42:
+        benchFunction = local_ZBUFF_decompress; benchName = "ZBUFF_decompressContinue";
+        break;
     default :
         return 0;
     }
@@ -282,9 +298,7 @@ size_t benchMem(void* src, size_t srcSize, U32 benchNb)
     dstBuffSize = ZSTD_compressBound(srcSize);
     dstBuff = (BYTE*)malloc(dstBuffSize);
     buff2 = (BYTE*)malloc(dstBuffSize);
-    g_dctxPtr = ZSTD_createDCtx();
-    if ((!dstBuff) || (!buff2))
-    {
+    if ((!dstBuff) || (!buff2)) {
         DISPLAY("\nError: not enough memory!\n");
         free(dstBuff); free(buff2);
         return 12;
@@ -293,45 +307,66 @@ size_t benchMem(void* src, size_t srcSize, U32 benchNb)
     /* Preparation */
     switch(benchNb)
     {
-    case 11:
+    case 2:
+        g_cSize = ZSTD_compress(buff2, dstBuffSize, src, srcSize, 1);
+        break;
+    case 11 :
+        if (g_zcc==NULL) g_zcc = ZSTD_createCCtx();
+        break;
+    case 12 :
+        if (g_zdc==NULL) g_zdc = ZSTD_createDCtx();
         g_cSize = ZSTD_compress(buff2, dstBuffSize, src, srcSize, 1);
         break;
     case 31:  /* ZSTD_decodeLiteralsBlock */
-        {
-            blockProperties_t bp;
+        if (g_zdc==NULL) g_zdc = ZSTD_createDCtx();
+        {   blockProperties_t bp;
+            ZSTD_frameParams zfp;
+            size_t frameHeaderSize, skippedSize;
             g_cSize = ZSTD_compress(dstBuff, dstBuffSize, src, srcSize, 1);
-            ZSTD_getcBlockSize(dstBuff+4, dstBuffSize, &bp);  /* Get 1st block type */
-            if (bp.blockType != bt_compressed)
-            {
+            frameHeaderSize = ZSTD_getFrameParams(&zfp, dstBuff, ZSTD_frameHeaderSize_min);
+            if (frameHeaderSize==0) frameHeaderSize = ZSTD_frameHeaderSize_min;
+            ZSTD_getcBlockSize(dstBuff+frameHeaderSize, dstBuffSize, &bp);  /* Get 1st block type */
+            if (bp.blockType != bt_compressed) {
                 DISPLAY("ZSTD_decodeLiteralsBlock : impossible to test on this sample (not compressible)\n");
                 goto _cleanOut;
             }
-            memcpy(buff2, dstBuff+8, g_cSize-8);
+            skippedSize = frameHeaderSize + 3 /* ZSTD_blockHeaderSize */;
+            memcpy(buff2, dstBuff+skippedSize, g_cSize-skippedSize);
             srcSize = srcSize > 128 KB ? 128 KB : srcSize;    /* speed relative to block */
             break;
         }
     case 32:   /* ZSTD_decodeSeqHeaders */
-        {
-            blockProperties_t bp;
+        if (g_zdc==NULL) g_zdc = ZSTD_createDCtx();
+        {   blockProperties_t bp;
+            ZSTD_frameParams zfp;
             const BYTE* ip = dstBuff;
             const BYTE* iend;
-            size_t blockSize;
+            size_t frameHeaderSize, cBlockSize;
             ZSTD_compress(dstBuff, dstBuffSize, src, srcSize, 1);   /* it would be better to use direct block compression here */
-            ip += 5;   /* Skip frame Header */
-            blockSize = ZSTD_getcBlockSize(ip, dstBuffSize, &bp);   /* Get 1st block type */
-            if (bp.blockType != bt_compressed)
-            {
+            g_cSize = ZSTD_compress(dstBuff, dstBuffSize, src, srcSize, 1);
+            frameHeaderSize = ZSTD_getFrameParams(&zfp, dstBuff, ZSTD_frameHeaderSize_min);
+            if (frameHeaderSize==0) frameHeaderSize = ZSTD_frameHeaderSize_min;
+            ip += frameHeaderSize;   /* Skip frame Header */
+            cBlockSize = ZSTD_getcBlockSize(ip, dstBuffSize, &bp);   /* Get 1st block type */
+            if (bp.blockType != bt_compressed) {
                 DISPLAY("ZSTD_decodeSeqHeaders : impossible to test on this sample (not compressible)\n");
                 goto _cleanOut;
             }
-            iend = ip + 3 + blockSize;   /* End of first block */
-            ip += 3;                     /* skip block header */
-            ip += ZSTD_decodeLiteralsBlock(g_dctxPtr, ip, iend-ip);  /* skip literal segment */
+            iend = ip + 3 /* ZSTD_blockHeaderSize */ + cBlockSize;   /* End of first block */
+            ip += 3 /* ZSTD_blockHeaderSize */;                     /* skip block header */
+            ip += ZSTD_decodeLiteralsBlock(g_zdc, ip, iend-ip);  /* skip literal segment */
             g_cSize = iend-ip;
             memcpy(buff2, ip, g_cSize);   /* copy rest of block (it starts by SeqHeader) */
             srcSize = srcSize > 128 KB ? 128 KB : srcSize;   /* speed relative to block */
             break;
         }
+    case 41 :
+        if (g_zbcc==NULL) g_zbcc = ZBUFF_createCCtx();
+        break;
+    case 42 :
+        if (g_zbdc==NULL) g_zbdc = ZBUFF_createDCtx();
+        g_cSize = ZSTD_compress(buff2, dstBuffSize, src, srcSize, 1);
+        break;
 
     /* test functions */
     /* by convention, test functions can be added > 100 */
@@ -341,53 +376,44 @@ size_t benchMem(void* src, size_t srcSize, U32 benchNb)
 
     { size_t i; for (i=0; i<dstBuffSize; i++) dstBuff[i]=(BYTE)i; }     /* warming up memory */
 
-    for (loopNb = 1; loopNb <= nbIterations; loopNb++)
-    {
+    { U32 loopNb;
+    for (loopNb = 1; loopNb <= g_nbIterations; loopNb++) {
+        clock_t const timeLoop = TIMELOOP_S * CLOCKS_PER_SEC;
+        clock_t clockStart;
+        U32 nbRounds;
+        size_t benchResult=0;
         double averageTime;
-        int milliTime;
-        U32 nbRounds=0;
 
         DISPLAY("%2i- %-30.30s : \r", loopNb, benchName);
 
-        milliTime = BMK_GetMilliStart();
-        while(BMK_GetMilliStart() == milliTime);
-        milliTime = BMK_GetMilliStart();
-        while(BMK_GetMilliSpan(milliTime) < TIMELOOP)
-        {
-            errorCode = benchFunction(dstBuff, dstBuffSize, buff2, src, srcSize);
-            if (ZSTD_isError(errorCode)) { DISPLAY("ERROR ! %s() => %s !! \n", benchName, ZSTD_getErrorName(errorCode)); exit(1); }
-            nbRounds++;
+        clockStart = clock();
+        while (clock() == clockStart);
+        clockStart = clock();
+        for (nbRounds=0; BMK_clockSpan(clockStart) < timeLoop; nbRounds++) {
+            benchResult = benchFunction(dstBuff, dstBuffSize, buff2, src, srcSize);
+            if (ZSTD_isError(benchResult)) { DISPLAY("ERROR ! %s() => %s !! \n", benchName, ZSTD_getErrorName(benchResult)); exit(1); }
         }
-        milliTime = BMK_GetMilliSpan(milliTime);
-
-        averageTime = (double)milliTime / nbRounds;
+        averageTime = (((double)BMK_clockSpan(clockStart)) / CLOCKS_PER_SEC) / nbRounds;
         if (averageTime < bestTime) bestTime = averageTime;
-        DISPLAY("%2i- %-30.30s : %7.1f MB/s  (%9u)\r", loopNb, benchName, (double)srcSize / bestTime / 1000., (U32)errorCode);
-    }
-
-    DISPLAY("%2u- %-30.30s : %7.1f MB/s  (%9u)\n", benchNb, benchName, (double)srcSize / bestTime / 1000., (U32)errorCode);
+        DISPLAY("%2i- %-30.30s : %7.1f MB/s  (%9u)\r", loopNb, benchName, (double)srcSize / (1 MB) / bestTime, (U32)benchResult);
+    }}
+    DISPLAY("%2u\n", benchNb);
 
 _cleanOut:
     free(dstBuff);
     free(buff2);
-    ZSTD_freeDCtx(g_dctxPtr);
     return 0;
 }
 
 
-int benchSample(U32 benchNb)
+static int benchSample(U32 benchNb)
 {
-    char* origBuff;
-    size_t benchedSize = sampleSize;
+    size_t const benchedSize = g_sampleSize;
     const char* name = "Sample 10MiB";
 
     /* Allocation */
-    origBuff = (char*) malloc((size_t)benchedSize);
-    if(!origBuff)
-    {
-        DISPLAY("\nError: not enough memory!\n");
-        return 12;
-    }
+    void* origBuff = malloc(benchedSize);
+    if (!origBuff) { DISPLAY("\nError: not enough memory!\n"); return 12; }
 
     /* Fill buffer */
     RDG_genBuffer(origBuff, benchedSize, g_compressibility, 0.0, 0);
@@ -405,58 +431,41 @@ int benchSample(U32 benchNb)
 }
 
 
-int benchFiles(char** fileNamesTable, int nbFiles, U32 benchNb)
+static int benchFiles(const char** fileNamesTable, const int nbFiles, U32 benchNb)
 {
-    int fileIdx=0;
-
     /* Loop for each file */
-    while (fileIdx<nbFiles)
-    {
-        FILE* inFile;
-        char* inFileName;
+    int fileIdx;
+    for (fileIdx=0; fileIdx<nbFiles; fileIdx++) {
+        const char* inFileName = fileNamesTable[fileIdx];
+        FILE* inFile = fopen( inFileName, "rb" );
         U64   inFileSize;
         size_t benchedSize;
-        size_t readSize;
-        char* origBuff;
+        void* origBuff;
 
         /* Check file existence */
-        inFileName = fileNamesTable[fileIdx++];
-        inFile = fopen( inFileName, "rb" );
-        if (inFile==NULL)
-        {
-            DISPLAY( "Pb opening %s\n", inFileName);
-            return 11;
-        }
+        if (inFile==NULL) { DISPLAY( "Pb opening %s\n", inFileName); return 11; }
 
         /* Memory allocation & restrictions */
         inFileSize = BMK_GetFileSize(inFileName);
-        benchedSize = (size_t) BMK_findMaxMem(inFileSize*3) / 3;
+        benchedSize = BMK_findMaxMem(inFileSize*3) / 3;
         if ((U64)benchedSize > inFileSize) benchedSize = (size_t)inFileSize;
         if (benchedSize < inFileSize)
-        {
-            DISPLAY("Not enough memory for '%s' full size; testing %i MB only...\n", inFileName, (int)(benchedSize>>20));
-        }
+            DISPLAY("Not enough memory for '%s' full size; testing %u MB only...\n", inFileName, (U32)(benchedSize>>20));
 
         /* Alloc */
-        origBuff = (char*) malloc((size_t)benchedSize);
-        if(!origBuff)
-        {
-            DISPLAY("\nError: not enough memory!\n");
-            fclose(inFile);
-            return 12;
-        }
+        origBuff = malloc(benchedSize);
+        if (!origBuff) { DISPLAY("\nError: not enough memory!\n"); fclose(inFile); return 12; }
 
         /* Fill input buffer */
         DISPLAY("Loading %s...       \r", inFileName);
-        readSize = fread(origBuff, 1, benchedSize, inFile);
-        fclose(inFile);
-
-        if(readSize != benchedSize)
         {
-            DISPLAY("\nError: problem reading file '%s' !!    \n", inFileName);
-            free(origBuff);
-            return 13;
-        }
+            size_t readSize = fread(origBuff, 1, benchedSize, inFile);
+            fclose(inFile);
+            if (readSize != benchedSize) {
+                DISPLAY("\nError: problem reading file '%s' !!    \n", inFileName);
+                free(origBuff);
+                return 13;
+        }   }
 
         /* bench */
         DISPLAY("\r%79s\r", "");
@@ -465,13 +474,15 @@ int benchFiles(char** fileNamesTable, int nbFiles, U32 benchNb)
             benchMem(origBuff, benchedSize, benchNb);
         else
             for (benchNb=0; benchNb<100; benchNb++) benchMem(origBuff, benchedSize, benchNb);
+
+        free(origBuff);
     }
 
     return 0;
 }
 
 
-int usage(char* exename)
+static int usage(const char* exename)
 {
     DISPLAY( "Usage :\n");
     DISPLAY( "      %s [arg] file1 file2 ... fileX\n", exename);
@@ -480,8 +491,9 @@ int usage(char* exename)
     return 0;
 }
 
-int usage_advanced(void)
+static int usage_advanced(const char* exename)
 {
+    usage(exename);
     DISPLAY( "\nAdvanced options :\n");
     DISPLAY( " -b#    : test only function # \n");
     DISPLAY( " -i#    : iteration loops [1-9](default : %i)\n", NBLOOPS);
@@ -489,45 +501,38 @@ int usage_advanced(void)
     return 0;
 }
 
-int badusage(char* exename)
+static int badusage(const char* exename)
 {
     DISPLAY("Wrong parameters\n");
     usage(exename);
-    return 0;
+    return 1;
 }
 
-int main(int argc, char** argv)
+int main(int argc, const char** argv)
 {
-    int i,
-        filenamesStart=0,
-        result;
-    char* exename=argv[0];
-    char* input_filename=0;
+    int i, filenamesStart=0, result;
+    const char* exename = argv[0];
+    const char* input_filename = NULL;
     U32 benchNb = 0, main_pause = 0;
 
-    // Welcome message
     DISPLAY(WELCOME_MESSAGE);
+    if (argc<1) return badusage(exename);
 
-    if (argc<1) { badusage(exename); return 1; }
+    for(i=1; i<argc; i++) {
+        const char* argument = argv[i];
+        if(!argument) continue;   /* Protection if argument empty */
 
-    for(i=1; i<argc; i++)
-    {
-        char* argument = argv[i];
+        /* Commands (note : aggregated commands are allowed) */
+        if (argument[0]=='-') {
 
-        if(!argument) continue;   // Protection if argument empty
-
-        // Decode command (note : aggregated commands are allowed)
-        if (argument[0]=='-')
-        {
-            while (argument[1]!=0)
-            {
-                argument ++;
+            while (argument[1]!=0) {
+                argument++;
 
                 switch(argument[0])
                 {
                     /* Display help on usage */
                 case 'h' :
-                case 'H': usage(exename); usage_advanced(); return 0;
+                case 'H': return usage_advanced(exename);
 
                     /* Pause at the end (hidden option) */
                 case 'p': main_pause = 1; break;
@@ -535,8 +540,7 @@ int main(int argc, char** argv)
                     /* Select specific algorithm to bench */
                 case 'b':
                     benchNb = 0;
-                    while ((argument[1]>= '0') && (argument[1]<= '9'))
-                    {
+                    while ((argument[1]>= '0') && (argument[1]<= '9')) {
                         benchNb *= 10;
                         benchNb += argument[1] - '0';
                         argument++;
@@ -545,20 +549,17 @@ int main(int argc, char** argv)
 
                     /* Modify Nb Iterations */
                 case 'i':
-                    if ((argument[1] >='0') && (argument[1] <='9'))
-                    {
+                    if ((argument[1] >='0') && (argument[1] <='9')) {
                         int iters = argument[1] - '0';
                         BMK_SetNbIterations(iters);
                         argument++;
                     }
                     break;
 
-                    /* Select specific algorithm to bench */
+                    /* Select compressibility of synthetic sample */
                 case 'P':
-                    {
-                        U32 proba32 = 0;
-                        while ((argument[1]>= '0') && (argument[1]<= '9'))
-                        {
+                    {   U32 proba32 = 0;
+                        while ((argument[1]>= '0') && (argument[1]<= '9')) {
                             proba32 *= 10;
                             proba32 += argument[1] - '0';
                             argument++;
@@ -568,7 +569,7 @@ int main(int argc, char** argv)
                     break;
 
                     /* Unknown command */
-                default : badusage(exename); return 1;
+                default : return badusage(exename);
                 }
             }
             continue;
@@ -578,9 +579,10 @@ int main(int argc, char** argv)
         if (!input_filename) { input_filename=argument; filenamesStart=i; continue; }
     }
 
-    if (filenamesStart==0)
+    if (filenamesStart==0)   /* no input file */
         result = benchSample(benchNb);
-    else result = benchFiles(argv+filenamesStart, argc-filenamesStart, benchNb);
+    else
+        result = benchFiles(argv+filenamesStart, argc-filenamesStart, benchNb);
 
     if (main_pause) { int unused; printf("press enter...\n"); unused = getchar(); (void)unused; }
 
diff --git a/programs/fuzzer.c b/programs/fuzzer.c
index f09cf06f..5bf532e1 100644
--- a/programs/fuzzer.c
+++ b/programs/fuzzer.c
@@ -1,6 +1,6 @@
 /*
     Fuzzer test tool for zstd
-    Copyright (C) Yann Collet 2014-2105
+    Copyright (C) Yann Collet 2014-2016
 
     GPL v2 License
 
@@ -19,11 +19,10 @@
     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 
     You can contact the author at :
-    - ZSTD source repository : https://github.com/Cyan4973/zstd
-    - ZSTD public forum : https://groups.google.com/forum/#!forum/lz4c
+    - ZSTD homepage : http://www.zstd.net
 */
 
-/**************************************
+/*-************************************
 *  Compiler specific
 **************************************/
 #ifdef _MSC_VER    /* Visual Studio */
@@ -32,28 +31,23 @@
 #  pragma warning(disable : 4146)     /* disable: C4146: minus unsigned expression */
 #endif
 
-#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
-#ifdef __GNUC__
-#  pragma GCC diagnostic ignored "-Wmissing-braces"   /* GCC bug 53119 : doesn't accept { 0 } as initializer (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53119) */
-#  pragma GCC diagnostic ignored "-Wmissing-field-initializers"   /* GCC bug 53119 : doesn't accept { 0 } as initializer (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53119) */
-#endif
 
-
-/**************************************
+/*-************************************
 *  Includes
 **************************************/
 #include <stdlib.h>      /* free */
 #include <stdio.h>       /* fgets, sscanf */
 #include <sys/timeb.h>   /* timeb */
 #include <string.h>      /* strcmp */
+#include <time.h>        /* clock_t */
 #include "zstd_static.h"
 #include "datagen.h"     /* RDG_genBuffer */
 #include "xxhash.h"      /* XXH64 */
 #include "mem.h"
 
 
-/**************************************
- Constants
+/*-************************************
+*  Constants
 **************************************/
 #ifndef ZSTD_VERSION
 #  define ZSTD_VERSION ""
@@ -63,15 +57,12 @@
 #define MB *(1U<<20)
 #define GB *(1U<<30)
 
+static const size_t COMPRESSIBLE_NOISE_LENGTH = 10 MB;   /* capital, used to be a macro */
+static const U32 FUZ_compressibility_default = 50;
 static const U32 nbTestsDefault = 30000;
-#define COMPRESSIBLE_NOISE_LENGTH (10 MB)
-#define FUZ_COMPRESSIBILITY_DEFAULT 50
-static const U32 prime1 = 2654435761U;
-static const U32 prime2 = 2246822519U;
 
 
-
-/**************************************
+/*-************************************
 *  Display Macros
 **************************************/
 #define DISPLAY(...)          fprintf(stderr, __VA_ARGS__)
@@ -79,44 +70,30 @@ static const U32 prime2 = 2246822519U;
 static U32 g_displayLevel = 2;
 
 #define DISPLAYUPDATE(l, ...) if (g_displayLevel>=l) { \
-            if ((FUZ_GetMilliSpan(g_displayTime) > g_refreshRate) || (g_displayLevel>=4)) \
-            { g_displayTime = FUZ_GetMilliStart(); DISPLAY(__VA_ARGS__); \
+            if ((FUZ_clockSpan(g_displayClock) > g_refreshRate) || (g_displayLevel>=4)) \
+            { g_displayClock = clock(); DISPLAY(__VA_ARGS__); \
             if (g_displayLevel>=4) fflush(stdout); } }
-static const U32 g_refreshRate = 150;
-static U32 g_displayTime = 0;
-
-static U32 g_testTime = 0;
+static const clock_t g_refreshRate = CLOCKS_PER_SEC * 150 / 1000;
+static clock_t g_displayClock = 0;
 
 
-/*********************************************************
+/*-*******************************************************
 *  Fuzzer functions
 *********************************************************/
 #define MIN(a,b) ((a)<(b)?(a):(b))
 #define MAX(a,b) ((a)>(b)?(a):(b))
 
-static U32 FUZ_GetMilliStart(void)
+static clock_t FUZ_clockSpan(clock_t cStart)
 {
-    struct timeb tb;
-    U32 nCount;
-    ftime( &tb );
-    nCount = (U32) (((tb.time & 0xFFFFF) * 1000) +  tb.millitm);
-    return nCount;
-}
-
-
-static U32 FUZ_GetMilliSpan(U32 nTimeStart)
-{
-    U32 nCurrent = FUZ_GetMilliStart();
-    U32 nSpan = nCurrent - nTimeStart;
-    if (nTimeStart > nCurrent)
-        nSpan += 0x100000 * 1000;
-    return nSpan;
+    return clock() - cStart;   /* works even when overflow; max span ~ 30mn */
 }
 
 
 #  define FUZ_rotl32(x,r) ((x << r) | (x >> (32 - r)))
 unsigned int FUZ_rand(unsigned int* src)
 {
+    static const U32 prime1 = 2654435761U;
+    static const U32 prime2 = 2246822519U;
     U32 rand32 = *src;
     rand32 *= prime1;
     rand32 += prime2;
@@ -130,8 +107,7 @@ static unsigned FUZ_highbit32(U32 v32)
 {
     unsigned nbBits = 0;
     if (v32==0) return 0;
-    while (v32)
-    {
+    while (v32) {
         v32 >>= 1;
         nbBits ++;
     }
@@ -153,8 +129,7 @@ static int basicUnitTests(U32 seed, double compressibility)
     CNBuffer = malloc(COMPRESSIBLE_NOISE_LENGTH);
     compressedBuffer = malloc(ZSTD_compressBound(COMPRESSIBLE_NOISE_LENGTH));
     decodedBuffer = malloc(COMPRESSIBLE_NOISE_LENGTH);
-    if (!CNBuffer || !compressedBuffer || !decodedBuffer)
-    {
+    if (!CNBuffer || !compressedBuffer || !decodedBuffer) {
         DISPLAY("Not enough memory, aborting\n");
         testResult = 1;
         goto _end;
@@ -162,22 +137,21 @@ static int basicUnitTests(U32 seed, double compressibility)
     RDG_genBuffer(CNBuffer, COMPRESSIBLE_NOISE_LENGTH, compressibility, 0., randState);
 
     /* Basic tests */
-    DISPLAYLEVEL(4, "test%3i : compress %u bytes : ", testNb++, COMPRESSIBLE_NOISE_LENGTH);
+    DISPLAYLEVEL(4, "test%3i : compress %u bytes : ", testNb++, (U32)COMPRESSIBLE_NOISE_LENGTH);
     result = ZSTD_compress(compressedBuffer, ZSTD_compressBound(COMPRESSIBLE_NOISE_LENGTH), CNBuffer, COMPRESSIBLE_NOISE_LENGTH, 1);
     if (ZSTD_isError(result)) goto _output_error;
     cSize = result;
     DISPLAYLEVEL(4, "OK (%u bytes : %.2f%%)\n", (U32)cSize, (double)cSize/COMPRESSIBLE_NOISE_LENGTH*100);
 
-    DISPLAYLEVEL(4, "test%3i : decompress %u bytes : ", testNb++, COMPRESSIBLE_NOISE_LENGTH);
+    DISPLAYLEVEL(4, "test%3i : decompress %u bytes : ", testNb++, (U32)COMPRESSIBLE_NOISE_LENGTH);
     result = ZSTD_decompress(decodedBuffer, COMPRESSIBLE_NOISE_LENGTH, compressedBuffer, cSize);
     if (ZSTD_isError(result)) goto _output_error;
+    if (result != COMPRESSIBLE_NOISE_LENGTH) goto _output_error;
     DISPLAYLEVEL(4, "OK \n");
 
-    {
-        size_t i;
+    {   size_t i;
         DISPLAYLEVEL(4, "test%3i : check decompressed result : ", testNb++);
-        for (i=0; i<COMPRESSIBLE_NOISE_LENGTH; i++)
-        {
+        for (i=0; i<COMPRESSIBLE_NOISE_LENGTH; i++) {
             if (((BYTE*)decodedBuffer)[i] != ((BYTE*)CNBuffer)[i]) goto _output_error;;
         }
         DISPLAYLEVEL(4, "OK \n");
@@ -195,19 +169,22 @@ static int basicUnitTests(U32 seed, double compressibility)
     if (result != (size_t)-ZSTD_error_srcSize_wrong) goto _output_error;
     DISPLAYLEVEL(4, "OK \n");
 
-    /* Dictionary and Duplication tests */
-    {
-        ZSTD_CCtx* ctxOrig = ZSTD_createCCtx();
+    /* Dictionary and CCtx Duplication tests */
+    {   ZSTD_CCtx* ctxOrig = ZSTD_createCCtx();
         ZSTD_CCtx* ctxDuplicated = ZSTD_createCCtx();
         ZSTD_DCtx* dctx = ZSTD_createDCtx();
-        const size_t dictSize = 500;
-        size_t cSizeOrig;
+        size_t const dictSize = 500;
+
+        DISPLAYLEVEL(4, "test%3i : copy context too soon : ", testNb++);
+        { size_t const copyResult = ZSTD_copyCCtx(ctxDuplicated, ctxOrig);
+          if (!ZSTD_isError(copyResult)) goto _output_error; }  /* error should be detected */
+        DISPLAYLEVEL(4, "OK \n");
 
         DISPLAYLEVEL(4, "test%3i : load dictionary into context : ", testNb++);
-        result = ZSTD_compressBegin_usingDict(ctxOrig, CNBuffer, dictSize, 2);
-        if (ZSTD_isError(result)) goto _output_error;
-        result = ZSTD_copyCCtx(ctxDuplicated, ctxOrig);
-        if (ZSTD_isError(result)) goto _output_error;
+        { size_t const initResult = ZSTD_compressBegin_usingDict(ctxOrig, CNBuffer, dictSize, 2);
+          if (ZSTD_isError(initResult)) goto _output_error; }
+        { size_t const copyResult = ZSTD_copyCCtx(ctxDuplicated, ctxOrig);
+          if (ZSTD_isError(copyResult)) goto _output_error; }
         DISPLAYLEVEL(4, "OK \n");
 
         DISPLAYLEVEL(4, "test%3i : compress with dictionary : ", testNb++);
@@ -227,20 +204,19 @@ static int basicUnitTests(U32 seed, double compressibility)
                                            CNBuffer, dictSize);
         if (ZSTD_isError(result)) goto _output_error;
         if (result != COMPRESSIBLE_NOISE_LENGTH - dictSize) goto _output_error;
-        ZSTD_freeCCtx(ctxOrig);   /* if ctxOrig is read, will produce segfault */
         DISPLAYLEVEL(4, "OK \n");
 
         DISPLAYLEVEL(4, "test%3i : compress with duplicated context : ", testNb++);
-        cSizeOrig = cSize;
-        cSize = 0;
-        result = ZSTD_compressContinue(ctxDuplicated, compressedBuffer, ZSTD_compressBound(COMPRESSIBLE_NOISE_LENGTH), (const char*)CNBuffer + dictSize, COMPRESSIBLE_NOISE_LENGTH - dictSize);
-        if (ZSTD_isError(result)) goto _output_error;
-        cSize += result;
-        result = ZSTD_compressEnd(ctxDuplicated, (char*)compressedBuffer+cSize, ZSTD_compressBound(COMPRESSIBLE_NOISE_LENGTH)-cSize);
-        if (ZSTD_isError(result)) goto _output_error;
-        cSize += result;
-        if (cSize != cSizeOrig) goto _output_error;   /* should be identical == have same size */
-        ZSTD_freeCCtx(ctxDuplicated);
+        {   size_t const cSizeOrig = cSize;
+            cSize = 0;
+            result = ZSTD_compressContinue(ctxDuplicated, compressedBuffer, ZSTD_compressBound(COMPRESSIBLE_NOISE_LENGTH), (const char*)CNBuffer + dictSize, COMPRESSIBLE_NOISE_LENGTH - dictSize);
+            if (ZSTD_isError(result)) goto _output_error;
+            cSize += result;
+            result = ZSTD_compressEnd(ctxDuplicated, (char*)compressedBuffer+cSize, ZSTD_compressBound(COMPRESSIBLE_NOISE_LENGTH)-cSize);
+            if (ZSTD_isError(result)) goto _output_error;
+            cSize += result;
+            if (cSize != cSizeOrig) goto _output_error;   /* should be identical == have same size */
+        }
         DISPLAYLEVEL(4, "OK (%u bytes : %.2f%%)\n", (U32)cSize, (double)cSize/COMPRESSIBLE_NOISE_LENGTH*100);
 
         DISPLAYLEVEL(4, "test%3i : frame built with duplicated context should be decompressible : ", testNb++);
@@ -250,8 +226,30 @@ static int basicUnitTests(U32 seed, double compressibility)
                                            CNBuffer, dictSize);
         if (ZSTD_isError(result)) goto _output_error;
         if (result != COMPRESSIBLE_NOISE_LENGTH - dictSize) goto _output_error;
-        ZSTD_freeDCtx(dctx);
         DISPLAYLEVEL(4, "OK \n");
+
+        DISPLAYLEVEL(4, "test%3i : check content size on duplicated context : ", testNb++);
+        {   size_t const testSize = COMPRESSIBLE_NOISE_LENGTH / 3;
+            {   ZSTD_parameters p;
+                p.cParams = ZSTD_getCParams(2, testSize, dictSize);
+                p.fParams.contentSizeFlag = 1;
+                {   size_t const initResult = ZSTD_compressBegin_advanced(ctxOrig, CNBuffer, dictSize, p, testSize-1);
+                    if (ZSTD_isError(initResult)) goto _output_error;
+            }   }
+            { size_t const copyResult = ZSTD_copyCCtx(ctxDuplicated, ctxOrig);
+              if (ZSTD_isError(copyResult)) goto _output_error;  }
+            cSize = ZSTD_compressContinue(ctxDuplicated, compressedBuffer, ZSTD_compressBound(testSize), (const char*)CNBuffer + dictSize, COMPRESSIBLE_NOISE_LENGTH - dictSize);
+            if (ZSTD_isError(cSize)) goto _output_error;
+            {   ZSTD_frameParams fp;
+                size_t const gfpResult = ZSTD_getFrameParams(&fp, compressedBuffer, cSize);
+                if (gfpResult!=0) goto _output_error;
+                if ((fp.frameContentSize != testSize) && (fp.frameContentSize != 0)) goto _output_error;
+        }   }
+        DISPLAYLEVEL(4, "OK \n");
+
+        ZSTD_freeCCtx(ctxOrig);
+        ZSTD_freeCCtx(ctxDuplicated);
+        ZSTD_freeDCtx(dctx);
     }
 
     /* Decompression defense tests */
@@ -268,8 +266,7 @@ static int basicUnitTests(U32 seed, double compressibility)
     DISPLAYLEVEL(4, "OK \n");
 
     /* block API tests */
-    {
-        ZSTD_CCtx* const cctx = ZSTD_createCCtx();
+    {   ZSTD_CCtx* const cctx = ZSTD_createCCtx();
         ZSTD_DCtx* const dctx = ZSTD_createDCtx();
         const size_t blockSize = 100 KB;
         const size_t dictSize = 16 KB;
@@ -311,8 +308,7 @@ static int basicUnitTests(U32 seed, double compressibility)
     }
 
     /* long rle test */
-    {
-        size_t sampleSize = 0;
+    {   size_t sampleSize = 0;
         DISPLAYLEVEL(4, "test%3i : Long RLE test : ", testNb++);
         RDG_genBuffer(CNBuffer, sampleSize, compressibility, 0., randState);
         memset((char*)CNBuffer+sampleSize, 'B', 256 KB - 1);
@@ -327,6 +323,57 @@ static int basicUnitTests(U32 seed, double compressibility)
         DISPLAYLEVEL(4, "OK \n");
     }
 
+    /* All zeroes test (#137 verif) */
+    #define ZEROESLENGTH 100
+    DISPLAYLEVEL(4, "test%3i : compress %u zeroes : ", testNb++, ZEROESLENGTH);
+    memset(CNBuffer, 0, ZEROESLENGTH);
+    result = ZSTD_compress(compressedBuffer, ZSTD_compressBound(ZEROESLENGTH), CNBuffer, ZEROESLENGTH, 1);
+    if (ZSTD_isError(result)) goto _output_error;
+    cSize = result;
+    DISPLAYLEVEL(4, "OK (%u bytes : %.2f%%)\n", (U32)cSize, (double)cSize/ZEROESLENGTH*100);
+
+    DISPLAYLEVEL(4, "test%3i : decompress %u zeroes : ", testNb++, ZEROESLENGTH);
+    result = ZSTD_decompress(decodedBuffer, ZEROESLENGTH, compressedBuffer, cSize);
+    if (ZSTD_isError(result)) goto _output_error;
+    if (result != ZEROESLENGTH) goto _output_error;
+    DISPLAYLEVEL(4, "OK \n");
+
+    /* nbSeq limit test */
+    #define _3BYTESTESTLENGTH 131000
+    #define NB3BYTESSEQLOG   9
+    #define NB3BYTESSEQ     (1 << NB3BYTESSEQLOG)
+    #define NB3BYTESSEQMASK (NB3BYTESSEQ-1)
+    /* creates a buffer full of 3-bytes sequences */
+    {   BYTE _3BytesSeqs[NB3BYTESSEQ][3];
+        U32 rSeed = 1;
+
+        /* create batch of 3-bytes sequences */
+        { int i; for (i=0; i < NB3BYTESSEQ; i++) {
+            _3BytesSeqs[i][0] = (BYTE)(FUZ_rand(&rSeed) & 255);
+            _3BytesSeqs[i][1] = (BYTE)(FUZ_rand(&rSeed) & 255);
+            _3BytesSeqs[i][2] = (BYTE)(FUZ_rand(&rSeed) & 255);
+        }}
+
+        /* randomly fills CNBuffer with prepared 3-bytes sequences */
+        { int i; for (i=0; i < _3BYTESTESTLENGTH; ) {   /* note : CNBuffer size > _3BYTESTESTLENGTH+3 */
+            U32 id = FUZ_rand(&rSeed) & NB3BYTESSEQMASK;
+            ((BYTE*)CNBuffer)[i+0] = _3BytesSeqs[id][0];
+            ((BYTE*)CNBuffer)[i+1] = _3BytesSeqs[id][1];
+            ((BYTE*)CNBuffer)[i+2] = _3BytesSeqs[id][2];
+            i += 3;
+    }   }}
+    DISPLAYLEVEL(4, "test%3i : compress lots 3-bytes sequences : ", testNb++);
+    result = ZSTD_compress(compressedBuffer, ZSTD_compressBound(_3BYTESTESTLENGTH), CNBuffer, _3BYTESTESTLENGTH, 19);
+    if (ZSTD_isError(result)) goto _output_error;
+    cSize = result;
+    DISPLAYLEVEL(4, "OK (%u bytes : %.2f%%)\n", (U32)cSize, (double)cSize/_3BYTESTESTLENGTH*100);
+
+    DISPLAYLEVEL(4, "test%3i : decompress lots 3-bytes sequence : ", testNb++);
+    result = ZSTD_decompress(decodedBuffer, _3BYTESTESTLENGTH, compressedBuffer, cSize);
+    if (ZSTD_isError(result)) goto _output_error;
+    if (result != _3BYTESTESTLENGTH) goto _output_error;
+    DISPLAYLEVEL(4, "OK \n");
+
 _end:
     free(CNBuffer);
     free(compressedBuffer);
@@ -345,21 +392,32 @@ static size_t findDiff(const void* buf1, const void* buf2, size_t max)
     const BYTE* b1 = (const BYTE*)buf1;
     const BYTE* b2 = (const BYTE*)buf2;
     size_t i;
-    for (i=0; i<max; i++)
-    {
+    for (i=0; i<max; i++) {
         if (b1[i] != b2[i]) break;
     }
     return i;
 }
 
-#   define CHECK(cond, ...) if (cond) { DISPLAY("Error => "); DISPLAY(__VA_ARGS__); \
-                            DISPLAY(" (seed %u, test nb %u)  \n", seed, testNb); goto _output_error; }
 
-static const U32 maxSrcLog = 23;
-static const U32 maxSampleLog = 22;
-
-int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibility)
+static size_t FUZ_rLogLength(U32* seed, U32 logLength)
 {
+    size_t const lengthMask = ((size_t)1 << logLength) - 1;
+    return (lengthMask+1) + (FUZ_rand(seed) & lengthMask);
+}
+
+static size_t FUZ_randomLength(U32* seed, U32 maxLog)
+{
+    U32 const logLength = FUZ_rand(seed) % maxLog;
+    return FUZ_rLogLength(seed, logLength);
+}
+
+#define CHECK(cond, ...) if (cond) { DISPLAY("Error => "); DISPLAY(__VA_ARGS__); \
+                         DISPLAY(" (seed %u, test nb %u)  \n", seed, testNb); goto _output_error; }
+
+static int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, U32 const maxDurationS, double compressibility)
+{
+    static const U32 maxSrcLog = 23;
+    static const U32 maxSampleLog = 22;
     BYTE* cNoiseBuffer[5];
     BYTE* srcBuffer;
     BYTE* cBuffer;
@@ -374,7 +432,8 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit
     ZSTD_CCtx* refCtx;
     ZSTD_CCtx* ctx;
     ZSTD_DCtx* dctx;
-    U32 startTime = FUZ_GetMilliStart();
+    clock_t startClock = clock();
+    clock_t const maxClockSpan = maxDurationS * CLOCKS_PER_SEC;
 
     /* allocation */
     refCtx = ZSTD_createCCtx();
@@ -401,49 +460,44 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit
     srcBuffer = cNoiseBuffer[2];
 
     /* catch up testNb */
-    for (testNb=1; testNb < startTest; testNb++)
-        FUZ_rand(&coreSeed);
+    for (testNb=1; testNb < startTest; testNb++) FUZ_rand(&coreSeed);
 
-    /* test loop */
-    for ( ; (testNb <= nbTests) || (FUZ_GetMilliSpan(startTime) < g_testTime); testNb++ )
-    {
+    /* main test loop */
+    for ( ; (testNb <= nbTests) || (FUZ_clockSpan(startClock) < maxClockSpan); testNb++ ) {
         size_t sampleSize, sampleStart, maxTestSize, totalTestSize;
-        size_t cSize, dSize, dSupSize, errorCode, totalCSize, totalGenSize;
-        U32 sampleSizeLog, buffNb, cLevelMod, nbChunks, n;
+        size_t cSize, dSize, totalCSize, totalGenSize;
+        U32 sampleSizeLog, nbChunks, n;
         XXH64_CREATESTATE_STATIC(xxh64);
-        U64 crcOrig, crcDest;
-        int cLevel;
+        U64 crcOrig;
         BYTE* sampleBuffer;
         const BYTE* dict;
         size_t dictSize;
 
-        /* init */
-        if (nbTests >= testNb)
-             { DISPLAYUPDATE(2, "\r%6u/%6u    ", testNb, nbTests); }
+        /* notification */
+        if (nbTests >= testNb) { DISPLAYUPDATE(2, "\r%6u/%6u    ", testNb, nbTests); }
         else { DISPLAYUPDATE(2, "\r%6u      ", testNb); }
 
         FUZ_rand(&coreSeed);
-        lseed = coreSeed ^ prime1;
-        buffNb = FUZ_rand(&lseed) & 127;
-        if (buffNb & 7) buffNb=2;
-        else
-        {
-            buffNb >>= 3;
-            if (buffNb & 7)
-            {
-                const U32 tnb[2] = { 1, 3 };
-                buffNb = tnb[buffNb >> 3];
-            }
-            else
-            {
-                const U32 tnb[2] = { 0, 4 };
-                buffNb = tnb[buffNb >> 3];
-            }
+        { U32 const prime1 = 2654435761U; lseed = coreSeed ^ prime1; }
+
+        /* srcBuffer selection [0-4] */
+        {   U32 buffNb = FUZ_rand(&lseed) & 0x7F;
+            if (buffNb & 7) buffNb=2;   /* most common : compressible (P) */
+            else {
+                buffNb >>= 3;
+                if (buffNb & 7) {
+                    const U32 tnb[2] = { 1, 3 };   /* barely/highly compressible */
+                    buffNb = tnb[buffNb >> 3];
+                } else {
+                    const U32 tnb[2] = { 0, 4 };   /* not compressible / sparse */
+                    buffNb = tnb[buffNb >> 3];
+            }   }
+            srcBuffer = cNoiseBuffer[buffNb];
         }
-        srcBuffer = cNoiseBuffer[buffNb];
+
+        /* select src segment */
         sampleSizeLog = FUZ_rand(&lseed) % maxSampleLog;
-        sampleSize = (size_t)1 << sampleSizeLog;
-        sampleSize += FUZ_rand(&lseed) & (sampleSize-1);
+        sampleSize  = FUZ_rLogLength(&lseed, sampleSizeLog);
         sampleStart = FUZ_rand(&lseed) % (srcBufferSize - sampleSize);
 
         /* create sample buffer (to catch read error with valgrind & sanitizers)  */
@@ -452,160 +506,151 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit
         memcpy(sampleBuffer, srcBuffer + sampleStart, sampleSize);
         crcOrig = XXH64(sampleBuffer, sampleSize, 0);
 
-        /* compression test */
-        //cLevelMod = MAX(1, 38 - (int)(MAX(9, sampleSizeLog) * 2));   /* high levels only for small samples, for manageable speed */
-        cLevelMod = MIN( ZSTD_maxCLevel(), (U32)MAX(1,  55 - 3*(int)sampleSizeLog) );   /* high levels only for small samples, for manageable speed */
-        cLevel = (FUZ_rand(&lseed) % cLevelMod) +1;
-        cSize = ZSTD_compressCCtx(ctx, cBuffer, cBufferSize, sampleBuffer, sampleSize, cLevel);
-        CHECK(ZSTD_isError(cSize), "ZSTD_compressCCtx failed");
+        /* compression tests */
+        {   int const cLevel = (FUZ_rand(&lseed) % (ZSTD_maxCLevel() - (sampleSizeLog/3))) + 1;
+            cSize = ZSTD_compressCCtx(ctx, cBuffer, cBufferSize, sampleBuffer, sampleSize, cLevel);
+            CHECK(ZSTD_isError(cSize), "ZSTD_compressCCtx failed");
 
-        /* compression failure test : too small dest buffer */
-        if (cSize > 3)
-        {
-            const size_t missing = (FUZ_rand(&lseed) % (cSize-2)) + 1;   /* no problem, as cSize > 4 (frameHeaderSizer) */
-            const size_t tooSmallSize = cSize - missing;
-            static const U32 endMark = 0x4DC2B1A9;
-            U32 endCheck;
-            memcpy(dstBuffer+tooSmallSize, &endMark, 4);
-            errorCode = ZSTD_compressCCtx(ctx, dstBuffer, tooSmallSize, sampleBuffer, sampleSize, cLevel);
-            CHECK(!ZSTD_isError(errorCode), "ZSTD_compressCCtx should have failed ! (buffer too small : %u < %u)", (U32)tooSmallSize, (U32)cSize);
-            memcpy(&endCheck, dstBuffer+tooSmallSize, 4);
-            CHECK(endCheck != endMark, "ZSTD_compressCCtx : dst buffer overflow");
+            /* compression failure test : too small dest buffer */
+            if (cSize > 3) {
+                const size_t missing = (FUZ_rand(&lseed) % (cSize-2)) + 1;   /* no problem, as cSize > 4 (frameHeaderSizer) */
+                const size_t tooSmallSize = cSize - missing;
+                const U32 endMark = 0x4DC2B1A9;
+                memcpy(dstBuffer+tooSmallSize, &endMark, 4);
+                { size_t const errorCode = ZSTD_compressCCtx(ctx, dstBuffer, tooSmallSize, sampleBuffer, sampleSize, cLevel);
+                  CHECK(!ZSTD_isError(errorCode), "ZSTD_compressCCtx should have failed ! (buffer too small : %u < %u)", (U32)tooSmallSize, (U32)cSize); }
+                { U32 endCheck; memcpy(&endCheck, dstBuffer+tooSmallSize, 4);
+                  CHECK(endCheck != endMark, "ZSTD_compressCCtx : dst buffer overflow"); }
+            }
         }
 
-        /* successfull decompression tests*/
-        dSupSize = (FUZ_rand(&lseed) & 1) ? 0 : (FUZ_rand(&lseed) & 31) + 1;
-        dSize = ZSTD_decompress(dstBuffer, sampleSize + dSupSize, cBuffer, cSize);
-        CHECK(dSize != sampleSize, "ZSTD_decompress failed (%s) (srcSize : %u ; cSize : %u)", ZSTD_getErrorName(dSize), (U32)sampleSize, (U32)cSize);
-        crcDest = XXH64(dstBuffer, sampleSize, 0);
-        CHECK(crcOrig != crcDest, "decompression result corrupted (pos %u / %u)", (U32)findDiff(sampleBuffer, dstBuffer, sampleSize), (U32)sampleSize);
+
+        /* frame header decompression test */
+        {   ZSTD_frameParams dParams;
+            size_t const check = ZSTD_getFrameParams(&dParams, cBuffer, cSize);
+            CHECK(ZSTD_isError(check), "Frame Parameters extraction failed");
+            CHECK(dParams.frameContentSize != sampleSize, "Frame content size incorrect");
+        }
+
+        /* successful decompression test */
+        {   size_t const margin = (FUZ_rand(&lseed) & 1) ? 0 : (FUZ_rand(&lseed) & 31) + 1;
+            dSize = ZSTD_decompress(dstBuffer, sampleSize + margin, cBuffer, cSize);
+            CHECK(dSize != sampleSize, "ZSTD_decompress failed (%s) (srcSize : %u ; cSize : %u)", ZSTD_getErrorName(dSize), (U32)sampleSize, (U32)cSize);
+            {   U64 const crcDest = XXH64(dstBuffer, sampleSize, 0);
+                CHECK(crcOrig != crcDest, "decompression result corrupted (pos %u / %u)", (U32)findDiff(sampleBuffer, dstBuffer, sampleSize), (U32)sampleSize);
+        }   }
 
         free(sampleBuffer);   /* no longer useful after this point */
 
         /* truncated src decompression test */
-        {
-            const size_t missing = (FUZ_rand(&lseed) % (cSize-2)) + 1;   /* no problem, as cSize > 4 (frameHeaderSizer) */
-            const size_t tooSmallSize = cSize - missing;
+        {   size_t const missing = (FUZ_rand(&lseed) % (cSize-2)) + 1;   /* no problem, as cSize > 4 (frameHeaderSizer) */
+            size_t const tooSmallSize = cSize - missing;
             void* cBufferTooSmall = malloc(tooSmallSize);   /* valgrind will catch overflows */
             CHECK(cBufferTooSmall == NULL, "not enough memory !");
             memcpy(cBufferTooSmall, cBuffer, tooSmallSize);
-            errorCode = ZSTD_decompress(dstBuffer, dstBufferSize, cBufferTooSmall, tooSmallSize);
-            CHECK(!ZSTD_isError(errorCode), "ZSTD_decompress should have failed ! (truncated src buffer)");
+            { size_t const errorCode = ZSTD_decompress(dstBuffer, dstBufferSize, cBufferTooSmall, tooSmallSize);
+              CHECK(!ZSTD_isError(errorCode), "ZSTD_decompress should have failed ! (truncated src buffer)"); }
             free(cBufferTooSmall);
         }
 
         /* too small dst decompression test */
-        if (sampleSize > 3)
-        {
-            const size_t missing = (FUZ_rand(&lseed) % (sampleSize-2)) + 1;   /* no problem, as cSize > 4 (frameHeaderSizer) */
-            const size_t tooSmallSize = sampleSize - missing;
+        if (sampleSize > 3) {
+            size_t const missing = (FUZ_rand(&lseed) % (sampleSize-2)) + 1;   /* no problem, as cSize > 4 (frameHeaderSizer) */
+            size_t const tooSmallSize = sampleSize - missing;
             static const BYTE token = 0xA9;
             dstBuffer[tooSmallSize] = token;
-            errorCode = ZSTD_decompress(dstBuffer, tooSmallSize, cBuffer, cSize);
-            CHECK(!ZSTD_isError(errorCode), "ZSTD_decompress should have failed : %u > %u (dst buffer too small)", (U32)errorCode, (U32)tooSmallSize);
+            { size_t const errorCode = ZSTD_decompress(dstBuffer, tooSmallSize, cBuffer, cSize);
+              CHECK(!ZSTD_isError(errorCode), "ZSTD_decompress should have failed : %u > %u (dst buffer too small)", (U32)errorCode, (U32)tooSmallSize); }
             CHECK(dstBuffer[tooSmallSize] != token, "ZSTD_decompress : dst buffer overflow");
         }
 
         /* noisy src decompression test */
-        if (cSize > 6)
-        {
-            const U32 maxNbBits = FUZ_highbit32((U32)(cSize-4));
-            size_t pos = 4;   /* preserve magic number (too easy to detect) */
-            U32 nbBits = FUZ_rand(&lseed) % maxNbBits;
-            size_t mask = (1<<nbBits) - 1;
-            size_t skipLength = FUZ_rand(&lseed) & mask;
-            pos += skipLength;
-
-            while (pos < cSize)
-            {
-                /* add noise */
-                size_t noiseStart, noiseLength;
-                nbBits = FUZ_rand(&lseed) % maxNbBits;
-                if (nbBits>0) nbBits--;
-                mask = (1<<nbBits) - 1;
-                noiseLength = (FUZ_rand(&lseed) & mask) + 1;
-                if ( pos+noiseLength > cSize ) noiseLength = cSize-pos;
-                noiseStart = FUZ_rand(&lseed) % (srcBufferSize - noiseLength);
-                memcpy(cBuffer + pos, srcBuffer + noiseStart, noiseLength);
-                pos += noiseLength;
-
-                /* keep some original src */
-                nbBits = FUZ_rand(&lseed) % maxNbBits;
-                mask = (1<<nbBits) - 1;
-                skipLength = FUZ_rand(&lseed) & mask;
-                pos += skipLength;
-            }
+        if (cSize > 6) {
+            /* insert noise into src */
+            {   U32 const maxNbBits = FUZ_highbit32((U32)(cSize-4));
+                size_t pos = 4;   /* preserve magic number (too easy to detect) */
+                for (;;) {
+                    /* keep some original src */
+                    {   U32 const nbBits = FUZ_rand(&lseed) % maxNbBits;
+                        size_t const mask = (1<<nbBits) - 1;
+                        size_t const skipLength = FUZ_rand(&lseed) & mask;
+                        pos += skipLength;
+                    }
+                    if (pos <= cSize) break;
+                    /* add noise */
+                    {   U32 nbBits = FUZ_rand(&lseed) % maxNbBits;
+                        size_t mask, noiseStart, noiseLength;
+                        if (nbBits>0) nbBits--;
+                        mask = (1<<nbBits) - 1;
+                        noiseLength = (FUZ_rand(&lseed) & mask) + 1;
+                        if ( pos+noiseLength > cSize ) noiseLength = cSize-pos;
+                        noiseStart = FUZ_rand(&lseed) % (srcBufferSize - noiseLength);
+                        memcpy(cBuffer + pos, srcBuffer + noiseStart, noiseLength);
+                        pos += noiseLength;
+            }   }   }
 
             /* decompress noisy source */
-            {
-                U32 noiseSrc = FUZ_rand(&lseed) % 5;
-                const U32 endMark = 0xA9B1C3D6;
-                U32 endCheck;
-                srcBuffer = cNoiseBuffer[noiseSrc];
+            {   U32 const endMark = 0xA9B1C3D6;
                 memcpy(dstBuffer+sampleSize, &endMark, 4);
-                errorCode = ZSTD_decompress(dstBuffer, sampleSize, cBuffer, cSize);
-                /* result *may* be an unlikely success, but even then, it must strictly respect dest buffer boundaries */
-                CHECK((!ZSTD_isError(errorCode)) && (errorCode>sampleSize),
-                      "ZSTD_decompress on noisy src : result is too large : %u > %u (dst buffer)", (U32)errorCode, (U32)sampleSize);
-                memcpy(&endCheck, dstBuffer+sampleSize, 4);
-                CHECK(endMark!=endCheck, "ZSTD_decompress on noisy src : dst buffer overflow");
-            }
-        }
+                {   size_t const decompressResult = ZSTD_decompress(dstBuffer, sampleSize, cBuffer, cSize);
+                    /* result *may* be an unlikely success, but even then, it must strictly respect dst buffer boundaries */
+                    CHECK((!ZSTD_isError(decompressResult)) && (decompressResult>sampleSize),
+                          "ZSTD_decompress on noisy src : result is too large : %u > %u (dst buffer)", (U32)decompressResult, (U32)sampleSize);
+                }
+                {   U32 endCheck; memcpy(&endCheck, dstBuffer+sampleSize, 4);
+                    CHECK(endMark!=endCheck, "ZSTD_decompress on noisy src : dst buffer overflow");
+        }   }   }   /* noisy src decompression test */
 
-        /* Streaming compression of scattered segments test */
+        /*=====   Streaming compression test, scattered segments and dictionary   =====*/
+
+        {   U32 const testLog = FUZ_rand(&lseed) % maxSrcLog;
+            int const cLevel = (FUZ_rand(&lseed) % (ZSTD_maxCLevel() - (testLog/3))) + 1;
+            maxTestSize = FUZ_rLogLength(&lseed, testLog);
+            if (maxTestSize >= dstBufferSize) maxTestSize = dstBufferSize-1;
+
+            sampleSize = FUZ_randomLength(&lseed, maxSampleLog);
+            sampleStart = FUZ_rand(&lseed) % (srcBufferSize - sampleSize);
+            dict = srcBuffer + sampleStart;
+            dictSize = sampleSize;
+
+            { size_t const errorCode = ZSTD_compressBegin_usingDict(refCtx, dict, dictSize, cLevel);
+              CHECK (ZSTD_isError(errorCode), "ZSTD_compressBegin_usingDict error : %s", ZSTD_getErrorName(errorCode)); }
+            { size_t const errorCode = ZSTD_copyCCtx(ctx, refCtx);
+              CHECK (ZSTD_isError(errorCode), "ZSTD_copyCCtx error : %s", ZSTD_getErrorName(errorCode)); }
+        }
         XXH64_reset(xxh64, 0);
         nbChunks = (FUZ_rand(&lseed) & 127) + 2;
-        sampleSizeLog = FUZ_rand(&lseed) % maxSrcLog;
-        maxTestSize = (size_t)1 << sampleSizeLog;
-        maxTestSize += FUZ_rand(&lseed) & (maxTestSize-1);
-        if (maxTestSize >= dstBufferSize) maxTestSize = dstBufferSize-1;
-
-        sampleSizeLog = FUZ_rand(&lseed) % maxSampleLog;
-        sampleSize = (size_t)1 << sampleSizeLog;
-        sampleSize += FUZ_rand(&lseed) & (sampleSize-1);
-        sampleStart = FUZ_rand(&lseed) % (srcBufferSize - sampleSize);
-        dict = srcBuffer + sampleStart;
-        dictSize = sampleSize;
-
-        errorCode = ZSTD_compressBegin_usingDict(refCtx, dict, dictSize, (FUZ_rand(&lseed) % (20 - (sampleSizeLog/3))) + 1);
-        CHECK (ZSTD_isError(errorCode), "ZSTD_compressBegin_usingDict error : %s", ZSTD_getErrorName(errorCode));
-        errorCode = ZSTD_copyCCtx(ctx, refCtx);
-        CHECK (ZSTD_isError(errorCode), "ZSTD_copyCCtx error : %s", ZSTD_getErrorName(errorCode));
-        totalTestSize = 0; cSize = 0;
-        for (n=0; n<nbChunks; n++)
-        {
+        for (totalTestSize=0, cSize=0, n=0 ; n<nbChunks ; n++) {
             sampleSizeLog = FUZ_rand(&lseed) % maxSampleLog;
             sampleSize = (size_t)1 << sampleSizeLog;
             sampleSize += FUZ_rand(&lseed) & (sampleSize-1);
             sampleStart = FUZ_rand(&lseed) % (srcBufferSize - sampleSize);
 
-            if (cBufferSize-cSize < ZSTD_compressBound(sampleSize))
-                /* avoid invalid dstBufferTooSmall */
-                break;
+            if (cBufferSize-cSize < ZSTD_compressBound(sampleSize)) break;   /* avoid invalid dstBufferTooSmall */
             if (totalTestSize+sampleSize > maxTestSize) break;
 
-            errorCode = ZSTD_compressContinue(ctx, cBuffer+cSize, cBufferSize-cSize, srcBuffer+sampleStart, sampleSize);
-            CHECK (ZSTD_isError(errorCode), "multi-segments compression error : %s", ZSTD_getErrorName(errorCode));
-            cSize += errorCode;
-
+            {   size_t const compressResult = ZSTD_compressContinue(ctx, cBuffer+cSize, cBufferSize-cSize, srcBuffer+sampleStart, sampleSize);
+                CHECK (ZSTD_isError(compressResult), "multi-segments compression error : %s", ZSTD_getErrorName(compressResult));
+                cSize += compressResult;
+            }
             XXH64_update(xxh64, srcBuffer+sampleStart, sampleSize);
             memcpy(mirrorBuffer + totalTestSize, srcBuffer+sampleStart, sampleSize);
             totalTestSize += sampleSize;
         }
-        errorCode = ZSTD_compressEnd(ctx, cBuffer+cSize, cBufferSize-cSize);
-        CHECK (ZSTD_isError(errorCode), "multi-segments epilogue error : %s", ZSTD_getErrorName(errorCode));
-        cSize += errorCode;
+        {   size_t const flushResult = ZSTD_compressEnd(ctx, cBuffer+cSize, cBufferSize-cSize);
+            CHECK (ZSTD_isError(flushResult), "multi-segments epilogue error : %s", ZSTD_getErrorName(flushResult));
+            cSize += flushResult;
+        }
         crcOrig = XXH64_digest(xxh64);
 
         /* streaming decompression test */
-        errorCode = ZSTD_decompressBegin_usingDict(dctx, dict, dictSize);
-        CHECK (ZSTD_isError(errorCode), "cannot init DCtx : %s", ZSTD_getErrorName(errorCode));
+        { size_t const errorCode = ZSTD_decompressBegin_usingDict(dctx, dict, dictSize);
+          CHECK (ZSTD_isError(errorCode), "cannot init DCtx : %s", ZSTD_getErrorName(errorCode)); }
         totalCSize = 0;
         totalGenSize = 0;
-        while (totalCSize < cSize)
-        {
-            size_t inSize = ZSTD_nextSrcSizeToDecompress(dctx);
-            size_t genSize = ZSTD_decompressContinue(dctx, dstBuffer+totalGenSize, dstBufferSize-totalGenSize, cBuffer+totalCSize, inSize);
+        while (totalCSize < cSize) {
+            size_t const inSize = ZSTD_nextSrcSizeToDecompress(dctx);
+            size_t const genSize = ZSTD_decompressContinue(dctx, dstBuffer+totalGenSize, dstBufferSize-totalGenSize, cBuffer+totalCSize, inSize);
             CHECK (ZSTD_isError(genSize), "streaming decompression error : %s", ZSTD_getErrorName(genSize));
             totalGenSize += genSize;
             totalCSize += inSize;
@@ -613,13 +658,13 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit
         CHECK (ZSTD_nextSrcSizeToDecompress(dctx) != 0, "frame not fully decoded");
         CHECK (totalGenSize != totalTestSize, "decompressed data : wrong size")
         CHECK (totalCSize != cSize, "compressed data should be fully read")
-        crcDest = XXH64(dstBuffer, totalTestSize, 0);
-        if (crcDest!=crcOrig)
-            errorCode = findDiff(mirrorBuffer, dstBuffer, totalTestSize);
-        CHECK (crcDest!=crcOrig, "streaming decompressed data corrupted : byte %u / %u  (%02X!=%02X)",
-               (U32)errorCode, (U32)totalTestSize, dstBuffer[errorCode], mirrorBuffer[errorCode]);
-
-    }
+        {   U64 const crcDest = XXH64(dstBuffer, totalTestSize, 0);
+            if (crcDest!=crcOrig) {
+                size_t const errorPos = findDiff(mirrorBuffer, dstBuffer, totalTestSize);
+                CHECK (crcDest!=crcOrig, "streaming decompressed data corrupted : byte %u / %u  (%02X!=%02X)",
+                   (U32)errorPos, (U32)totalTestSize, dstBuffer[errorPos], mirrorBuffer[errorPos]);
+        }   }
+    }   /* for ( ; (testNb <= nbTests) */
     DISPLAY("\r%u fuzzer tests completed   \n", testNb-1);
 
 _cleanup:
@@ -642,10 +687,10 @@ _output_error:
 }
 
 
-/*********************************************************
+/*_*******************************************************
 *  Command line
 *********************************************************/
-int FUZ_usage(char* programName)
+int FUZ_usage(const char* programName)
 {
     DISPLAY( "Usage :\n");
     DISPLAY( "      %s [args]\n", programName);
@@ -654,7 +699,7 @@ int FUZ_usage(char* programName)
     DISPLAY( " -i#    : Nb of tests (default:%u) \n", nbTestsDefault);
     DISPLAY( " -s#    : Select seed (default:prompt user)\n");
     DISPLAY( " -t#    : Select starting test number (default:0)\n");
-    DISPLAY( " -P#    : Select compressibility in %% (default:%i%%)\n", FUZ_COMPRESSIBILITY_DEFAULT);
+    DISPLAY( " -P#    : Select compressibility in %% (default:%u%%)\n", FUZ_compressibility_default);
     DISPLAY( " -v     : verbose\n");
     DISPLAY( " -p     : pause at the end\n");
     DISPLAY( " -h     : display help and exit\n");
@@ -662,33 +707,28 @@ int FUZ_usage(char* programName)
 }
 
 
-int main(int argc, char** argv)
+int main(int argc, const char** argv)
 {
     U32 seed=0;
     int seedset=0;
     int argNb;
     int nbTests = nbTestsDefault;
     int testNb = 0;
-    int proba = FUZ_COMPRESSIBILITY_DEFAULT;
+    U32 proba = FUZ_compressibility_default;
     int result=0;
     U32 mainPause = 0;
-    char* programName;
+    U32 maxDuration = 0;
+    const char* programName = argv[0];
 
     /* Check command line */
-    programName = argv[0];
-    for(argNb=1; argNb<argc; argNb++)
-    {
-        char* argument = argv[argNb];
-
+    for (argNb=1; argNb<argc; argNb++) {
+        const char* argument = argv[argNb];
         if(!argument) continue;   /* Protection if argument empty */
 
         /* Handle commands. Aggregated commands are allowed */
-        if (argument[0]=='-')
-        {
+        if (argument[0]=='-') {
             argument++;
-
-            while (*argument!=0)
-            {
+            while (*argument!=0) {
                 switch(*argument)
                 {
                 case 'h':
@@ -707,10 +747,9 @@ int main(int argc, char** argv)
                     break;
 
                 case 'i':
-                    argument++; g_testTime=0;
+                    argument++; maxDuration=0;
                     nbTests=0;
-                    while ((*argument>='0') && (*argument<='9'))
-                    {
+                    while ((*argument>='0') && (*argument<='9')) {
                         nbTests *= 10;
                         nbTests += *argument - '0';
                         argument++;
@@ -719,24 +758,21 @@ int main(int argc, char** argv)
 
                 case 'T':
                     argument++;
-                    nbTests=0; g_testTime=0;
-                    while ((*argument>='0') && (*argument<='9'))
-                    {
-                        g_testTime *= 10;
-                        g_testTime += *argument - '0';
+                    nbTests=0; maxDuration=0;
+                    while ((*argument>='0') && (*argument<='9')) {
+                        maxDuration *= 10;
+                        maxDuration += *argument - '0';
                         argument++;
                     }
-                    if (*argument=='m') g_testTime *=60, argument++;
+                    if (*argument=='m') maxDuration *=60, argument++;
                     if (*argument=='n') argument++;
-                    g_testTime *= 1000;
                     break;
 
                 case 's':
                     argument++;
                     seed=0;
                     seedset=1;
-                    while ((*argument>='0') && (*argument<='9'))
-                    {
+                    while ((*argument>='0') && (*argument<='9')) {
                         seed *= 10;
                         seed += *argument - '0';
                         argument++;
@@ -746,8 +782,7 @@ int main(int argc, char** argv)
                 case 't':
                     argument++;
                     testNb=0;
-                    while ((*argument>='0') && (*argument<='9'))
-                    {
+                    while ((*argument>='0') && (*argument<='9')) {
                         testNb *= 10;
                         testNb += *argument - '0';
                         argument++;
@@ -757,35 +792,30 @@ int main(int argc, char** argv)
                 case 'P':   /* compressibility % */
                     argument++;
                     proba=0;
-                    while ((*argument>='0') && (*argument<='9'))
-                    {
+                    while ((*argument>='0') && (*argument<='9')) {
                         proba *= 10;
                         proba += *argument - '0';
                         argument++;
                     }
-                    if (proba<0) proba=0;
                     if (proba>100) proba=100;
                     break;
 
                 default:
                     return FUZ_usage(programName);
-                }
-            }
-        }
-    }
+    }   }   }   }   /* for (argNb=1; argNb<argc; argNb++) */
 
     /* Get Seed */
     DISPLAY("Starting zstd tester (%i-bits, %s)\n", (int)(sizeof(size_t)*8), ZSTD_VERSION);
 
-    if (!seedset) seed = FUZ_GetMilliStart() % 10000;
+    if (!seedset) seed = (U32)(clock() % 10000);
     DISPLAY("Seed = %u\n", seed);
-    if (proba!=FUZ_COMPRESSIBILITY_DEFAULT) DISPLAY("Compressibility : %i%%\n", proba);
+    if (proba!=FUZ_compressibility_default) DISPLAY("Compressibility : %u%%\n", proba);
 
-    if (testNb==0) result = basicUnitTests(0, ((double)proba) / 100);  /* constant seed for predictability */
+    if (testNb==0)
+        result = basicUnitTests(0, ((double)proba) / 100);  /* constant seed for predictability */
     if (!result)
-        result = fuzzerTests(seed, nbTests, testNb, ((double)proba) / 100);
-    if (mainPause)
-    {
+        result = fuzzerTests(seed, nbTests, testNb, maxDuration, ((double)proba) / 100);
+    if (mainPause) {
         int unused;
         DISPLAY("Press Enter \n");
         unused = getchar();
diff --git a/programs/legacy/fileio_legacy.c b/programs/legacy/fileio_legacy.c
index 419e69d9..e0916a70 100644
--- a/programs/legacy/fileio_legacy.c
+++ b/programs/legacy/fileio_legacy.c
@@ -1,6 +1,6 @@
 /*
-  fileio.c - File i/o handler
-  Copyright (C) Yann Collet 2013-2015
+  fileio_legacy.c - File i/o handler for legacy format
+  Copyright (C) Yann Collet 2015-2016
 
   GPL v2 License
 
@@ -19,12 +19,11 @@
   51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 
   You can contact the author at :
+  - zstd homepage : http://www.zstd.net
   - zstd source repository : https://github.com/Cyan4973/zstd
-  - Public forum : https://groups.google.com/forum/#!forum/lz4c
 */
 /*
-  Note : this is stand-alone program.
-  It is not part of ZSTD compression library, it is a user program of ZSTD library.
+  Note : this file is not part of ZSTD compression library.
   The license of ZSTD library is BSD.
   The license of this file is GPLv2.
 */
@@ -172,8 +171,7 @@ unsigned long long FIOv01_decompressFrame(FILE* foutput, FILE* finput)
 
     /* Main decompression Loop */
     toRead = ZSTDv01_nextSrcSizeToDecompress(dctx);
-    while (toRead)
-    {
+    while (toRead){
         size_t readSize, decodedSize;
 
         /* Fill input buffer */
@@ -187,8 +185,7 @@ unsigned long long FIOv01_decompressFrame(FILE* foutput, FILE* finput)
         decodedSize = ZSTDv01_decompressContinue(dctx, op, oend-op, inBuff, readSize);
         if (ZSTDv01_isError(decodedSize)) EXM_THROW(45, "Decoding error : input corrupted");
 
-        if (decodedSize)   /* not a header */
-        {
+        if (decodedSize) {  /* not a header */
             /* Write block */
             sizeCheck = fwrite(op, 1, decodedSize, foutput);
             if (sizeCheck != decodedSize) EXM_THROW(46, "Write error : unable to write data block to destination file");
@@ -232,8 +229,7 @@ unsigned long long FIOv02_decompressFrame(FILE* foutput, FILE* finput)
 
     /* Main decompression Loop */
     toRead = ZSTDv02_nextSrcSizeToDecompress(dctx);
-    while (toRead)
-    {
+    while (toRead) {
         size_t readSize, decodedSize;
 
         /* Fill input buffer */
@@ -247,8 +243,7 @@ unsigned long long FIOv02_decompressFrame(FILE* foutput, FILE* finput)
         decodedSize = ZSTDv02_decompressContinue(dctx, op, oend-op, inBuff, readSize);
         if (ZSTDv02_isError(decodedSize)) EXM_THROW(45, "Decoding error : input corrupted");
 
-        if (decodedSize)   /* not a header */
-        {
+        if (decodedSize) {   /* not a header */
             /* Write block */
             sizeCheck = fwrite(op, 1, decodedSize, foutput);
             if (sizeCheck != decodedSize) EXM_THROW(46, "Write error : unable to write data block to destination file");
@@ -292,8 +287,7 @@ unsigned long long FIOv03_decompressFrame(FILE* foutput, FILE* finput)
 
     /* Main decompression Loop */
     toRead = ZSTDv03_nextSrcSizeToDecompress(dctx);
-    while (toRead)
-    {
+    while (toRead) {
         size_t readSize, decodedSize;
 
         /* Fill input buffer */
@@ -307,8 +301,7 @@ unsigned long long FIOv03_decompressFrame(FILE* foutput, FILE* finput)
         decodedSize = ZSTDv03_decompressContinue(dctx, op, oend-op, inBuff, readSize);
         if (ZSTDv03_isError(decodedSize)) EXM_THROW(45, "Decoding error : input corrupted");
 
-        if (decodedSize)   /* not a header */
-        {
+        if (decodedSize) {   /* not a header */
             /* Write block */
             sizeCheck = fwrite(op, 1, decodedSize, foutput);
             if (sizeCheck != decodedSize) EXM_THROW(46, "Write error : unable to write data block to destination file");
@@ -329,7 +322,7 @@ unsigned long long FIOv03_decompressFrame(FILE* foutput, FILE* finput)
 }
 
 
-/*- v0.4.x -*/
+/*=====    v0.4.x    =====*/
 
 typedef struct {
     void*  srcBuffer;
@@ -380,8 +373,7 @@ unsigned long long FIOv04_decompressFrame(dRessv04_t ress,
     ZBUFFv04_decompressInit(ress.dctx);
     ZBUFFv04_decompressWithDictionary(ress.dctx, ress.dictBuffer, ress.dictBufferSize);
 
-    while (1)
-    {
+    while (1) {
         /* Decode */
         size_t sizeCheck;
         size_t inSize=readSize, decodedSize=ress.dstBufferSize;
@@ -404,11 +396,89 @@ unsigned long long FIOv04_decompressFrame(dRessv04_t ress,
         if (readSize != toRead) EXM_THROW(35, "Read error");
     }
 
-    FIOv04_freeDResources(ress);
     return frameSize;
 }
 
 
+/*=====    v0.5.x    =====*/
+
+typedef struct {
+    void*  srcBuffer;
+    size_t srcBufferSize;
+    void*  dstBuffer;
+    size_t dstBufferSize;
+    void*  dictBuffer;
+    size_t dictBufferSize;
+    ZBUFFv05_DCtx* dctx;
+} dRessv05_t;
+
+static dRessv05_t FIOv05_createDResources(void)
+{
+    dRessv05_t ress;
+
+    /* init */
+    ress.dctx = ZBUFFv05_createDCtx();
+    if (ress.dctx==NULL) EXM_THROW(60, "Can't create ZBUFF decompression context");
+    ress.dictBuffer = NULL; ress.dictBufferSize=0;
+
+    /* Allocate Memory */
+    ress.srcBufferSize = ZBUFFv05_recommendedDInSize();
+    ress.srcBuffer = malloc(ress.srcBufferSize);
+    ress.dstBufferSize = ZBUFFv05_recommendedDOutSize();
+    ress.dstBuffer = malloc(ress.dstBufferSize);
+    if (!ress.srcBuffer || !ress.dstBuffer) EXM_THROW(61, "Allocation error : not enough memory");
+
+    return ress;
+}
+
+static void FIOv05_freeDResources(dRessv05_t ress)
+{
+    size_t const errorCode = ZBUFFv05_freeDCtx(ress.dctx);
+    if (ZBUFFv05_isError(errorCode)) EXM_THROW(69, "Error : can't free ZBUFF context resource : %s", ZBUFFv05_getErrorName(errorCode));
+    free(ress.srcBuffer);
+    free(ress.dstBuffer);
+    free(ress.dictBuffer);
+}
+
+
+unsigned long long FIOv05_decompressFrame(dRessv05_t ress,
+                                          FILE* foutput, FILE* finput)
+{
+    U64    frameSize = 0;
+    size_t readSize = 4;
+
+    MEM_writeLE32(ress.srcBuffer, ZSTDv05_MAGICNUMBER);
+    ZBUFFv05_decompressInitDictionary(ress.dctx, ress.dictBuffer, ress.dictBufferSize);
+
+    while (1) {
+        /* Decode */
+        size_t sizeCheck;
+        size_t inSize=readSize, decodedSize=ress.dstBufferSize;
+        size_t toRead = ZBUFFv05_decompressContinue(ress.dctx, ress.dstBuffer, &decodedSize, ress.srcBuffer, &inSize);
+        if (ZBUFFv05_isError(toRead)) EXM_THROW(36, "Decoding error : %s", ZBUFFv05_getErrorName(toRead));
+        readSize -= inSize;
+
+        /* Write block */
+        sizeCheck = fwrite(ress.dstBuffer, 1, decodedSize, foutput);
+        if (sizeCheck != decodedSize) EXM_THROW(37, "Write error : unable to write data block to destination file");
+        frameSize += decodedSize;
+        DISPLAYUPDATE(2, "\rDecoded : %u MB...     ", (U32)(frameSize>>20) );
+
+        if (toRead == 0) break;
+        if (readSize) EXM_THROW(38, "Decoding error : should consume entire input");
+
+        /* Fill input buffer */
+        if (toRead > ress.srcBufferSize) EXM_THROW(34, "too large block");
+        readSize = fread(ress.srcBuffer, 1, toRead, finput);
+        if (readSize != toRead) EXM_THROW(35, "Read error");
+    }
+
+    return frameSize;
+}
+
+
+/*=====   General legacy dispatcher   =====*/
+
 unsigned long long FIO_decompressLegacyFrame(FILE* foutput, FILE* finput, U32 magicNumberLE)
 {
 	switch(magicNumberLE)
@@ -420,7 +490,17 @@ unsigned long long FIO_decompressLegacyFrame(FILE* foutput, FILE* finput, U32 ma
 		case ZSTDv03_magicNumber :
 			return FIOv03_decompressFrame(foutput, finput);
 		case ZSTDv04_magicNumber :
-			return FIOv04_decompressFrame(FIOv04_createDResources(), foutput, finput);
+		    {   dRessv04_t r = FIOv04_createDResources();
+                unsigned long long const s = FIOv04_decompressFrame(r, foutput, finput);
+                FIOv04_freeDResources(r);
+                return s;
+		    }
+		case ZSTDv05_MAGICNUMBER :
+		    {   dRessv05_t r = FIOv05_createDResources();
+                unsigned long long const s = FIOv05_decompressFrame(r, foutput, finput);
+                FIOv05_freeDResources(r);
+                return s;
+		    }
 		default :
 		    return ERROR(prefix_unknown);
 	}
diff --git a/programs/paramgrill.c b/programs/paramgrill.c
index 23a54d46..3dcb2e10 100644
--- a/programs/paramgrill.c
+++ b/programs/paramgrill.c
@@ -100,7 +100,6 @@
 #define NB_LEVELS_TRACKED 30
 
 static const size_t maxMemory = (sizeof(size_t)==4)  ?  (2 GB - 64 MB) : (size_t)(1ULL << ((sizeof(size_t)*8)-31));
-#define DEFAULT_CHUNKSIZE   (4<<20)
 
 #define COMPRESSIBILITY_DEFAULT 0.50
 static const size_t sampleSize = 10000000;
@@ -127,7 +126,7 @@ static U32 g_rand = 1;
 static U32 g_singleRun = 0;
 static U32 g_target = 0;
 static U32 g_noSeed = 0;
-static ZSTD_parameters g_params = { 0, 0, 0, 0, 0, 0, 0, ZSTD_greedy };
+static ZSTD_compressionParameters g_params = { 0, 0, 0, 0, 0, 0, ZSTD_greedy };
 
 void BMK_SetNbIterations(int nbLoops)
 {
@@ -252,7 +251,7 @@ typedef struct
 static size_t BMK_benchParam(BMK_result_t* resultPtr,
                              const void* srcBuffer, size_t srcSize,
                              ZSTD_CCtx* ctx,
-                             const ZSTD_parameters params)
+                             const ZSTD_compressionParameters cParams)
 {
     const size_t blockSize = g_blockSize ? g_blockSize : srcSize;
     const U32 nbBlocks = (U32) ((srcSize + (blockSize-1)) / blockSize);
@@ -260,13 +259,14 @@ static size_t BMK_benchParam(BMK_result_t* resultPtr,
     const size_t maxCompressedSize = (size_t)nbBlocks * ZSTD_compressBound(blockSize);
     void* const compressedBuffer = malloc(maxCompressedSize);
     void* const resultBuffer = malloc(srcSize);
-    U32 Wlog = params.windowLog;
-    U32 Clog = params.contentLog;
-    U32 Hlog = params.hashLog;
-    U32 Slog = params.searchLog;
-    U32 Slength = params.searchLength;
-    U32 Tlength = params.targetLength;
-    ZSTD_strategy strat = params.strategy;
+    ZSTD_parameters params;
+    U32 Wlog = cParams.windowLog;
+    U32 Clog = cParams.chainLog;
+    U32 Hlog = cParams.hashLog;
+    U32 Slog = cParams.searchLog;
+    U32 Slength = cParams.searchLength;
+    U32 Tlength = cParams.targetLength;
+    ZSTD_strategy strat = cParams.strategy;
     char name[30] = { 0 };
     U64 crcOrig;
 
@@ -316,6 +316,8 @@ static size_t BMK_benchParam(BMK_result_t* resultPtr,
         const int startTime =BMK_GetMilliStart();
 
         DISPLAY("\r%79s\r", "");
+        params.cParams = cParams;
+        params.fParams.contentSizeFlag = 0;
         for (loopNb = 1; loopNb <= g_nbIterations; loopNb++) {
             int nbLoops;
             int milliTime;
@@ -406,14 +408,13 @@ const char* g_stratName[] = { "ZSTD_fast   ",
                               "ZSTD_lazy   ",
                               "ZSTD_lazy2  ",
                               "ZSTD_btlazy2",
-                              "ZSTD_opt    ",
                               "ZSTD_btopt  " };
 
-static void BMK_printWinner(FILE* f, U32 cLevel, BMK_result_t result, ZSTD_parameters params, size_t srcSize)
+static void BMK_printWinner(FILE* f, U32 cLevel, BMK_result_t result, ZSTD_compressionParameters params, size_t srcSize)
 {
     DISPLAY("\r%79s\r", "");
-    fprintf(f,"    {%3u,%3u,%3u,%3u,%3u,%3u,%3u, %s },  ",
-            0, params.windowLog, params.contentLog, params.hashLog, params.searchLog, params.searchLength,
+    fprintf(f,"    {%3u,%3u,%3u,%3u,%3u,%3u, %s },  ",
+            params.windowLog, params.chainLog, params.hashLog, params.searchLog, params.searchLength,
             params.targetLength, g_stratName[(U32)(params.strategy)]);
     fprintf(f,
             "/* level %2u */   /* R:%5.3f at %5.1f MB/s - %5.1f MB/s */\n",
@@ -425,7 +426,7 @@ static U32 g_cSpeedTarget[NB_LEVELS_TRACKED] = { 0 };   /* NB_LEVELS_TRACKED : c
 
 typedef struct {
     BMK_result_t result;
-    ZSTD_parameters params;
+    ZSTD_compressionParameters params;
 } winnerInfo_t;
 
 static void BMK_printWinners2(FILE* f, const winnerInfo_t* winners, size_t srcSize)
@@ -433,7 +434,7 @@ static void BMK_printWinners2(FILE* f, const winnerInfo_t* winners, size_t srcSi
     unsigned cLevel;
 
     fprintf(f, "\n /* Proposed configurations : */ \n");
-    fprintf(f, "    /* l,  W,  C,  H,  S,  L,  T, strat */ \n");
+    fprintf(f, "    /* W,  C,  H,  S,  L,  T, strat */ \n");
 
     for (cLevel=0; cLevel <= ZSTD_maxCLevel(); cLevel++)
         BMK_printWinner(f, cLevel, winners[cLevel].result, winners[cLevel].params, srcSize);
@@ -448,8 +449,9 @@ static void BMK_printWinners(FILE* f, const winnerInfo_t* winners, size_t srcSiz
     BMK_printWinners2(stdout, winners, srcSize);
 }
 
+size_t ZSTD_sizeofCCtx(ZSTD_compressionParameters params);   /* hidden interface, declared here */
 
-static int BMK_seed(winnerInfo_t* winners, const ZSTD_parameters params,
+static int BMK_seed(winnerInfo_t* winners, const ZSTD_compressionParameters params,
               const void* srcBuffer, size_t srcSize,
                     ZSTD_CCtx* ctx)
 {
@@ -482,10 +484,8 @@ static int BMK_seed(winnerInfo_t* winners, const ZSTD_parameters params,
             double W_DMemUsed_note = W_ratioNote * ( 40 + 9*cLevel) - log((double)W_DMemUsed);
             double O_DMemUsed_note = O_ratioNote * ( 40 + 9*cLevel) - log((double)O_DMemUsed);
 
-            size_t W_CMemUsed = (1 << params.windowLog) + 4 * (1 << params.hashLog) +
-                                ((params.strategy==ZSTD_fast) ? 0 : 4 * (1 << params.contentLog));
-            size_t O_CMemUsed = (1 << winners[cLevel].params.windowLog) + 4 * (1 << winners[cLevel].params.hashLog) +
-                                ((winners[cLevel].params.strategy==ZSTD_fast) ? 0 :  4 * (1 << winners[cLevel].params.contentLog));
+            size_t W_CMemUsed = (1 << params.windowLog) + ZSTD_sizeofCCtx(params);
+            size_t O_CMemUsed = (1 << winners[cLevel].params.windowLog) + ZSTD_sizeofCCtx(winners[cLevel].params);
             double W_CMemUsed_note = W_ratioNote * ( 50 + 13*cLevel) - log((double)W_CMemUsed);
             double O_CMemUsed_note = O_ratioNote * ( 50 + 13*cLevel) - log((double)O_CMemUsed);
 
@@ -544,55 +544,61 @@ static int BMK_seed(winnerInfo_t* winners, const ZSTD_parameters params,
 
 
 /* nullified useless params, to ensure count stats */
-static ZSTD_parameters* sanitizeParams(ZSTD_parameters params)
+static ZSTD_compressionParameters* sanitizeParams(ZSTD_compressionParameters params)
 {
     g_params = params;
     if (params.strategy == ZSTD_fast)
-        g_params.contentLog = 0, g_params.searchLog = 0;
-    if ((params.strategy != ZSTD_opt) && (params.strategy != ZSTD_btopt ))
+        g_params.chainLog = 0, g_params.searchLog = 0;
+    if (params.strategy != ZSTD_btopt )
         g_params.targetLength = 0;
     return &g_params;
 }
 
 
-static void paramVariation(ZSTD_parameters* p)
+static void paramVariation(ZSTD_compressionParameters* ptr)
 {
-    U32 nbChanges = (FUZ_rand(&g_rand) & 3) + 1;
-    for (; nbChanges; nbChanges--) {
-        const U32 changeID = FUZ_rand(&g_rand) % 14;
-        switch(changeID)
-        {
-        case 0:
-            p->contentLog++; break;
-        case 1:
-            p->contentLog--; break;
-        case 2:
-            p->hashLog++; break;
-        case 3:
-            p->hashLog--; break;
-        case 4:
-            p->searchLog++; break;
-        case 5:
-            p->searchLog--; break;
-        case 6:
-            p->windowLog++; break;
-        case 7:
-            p->windowLog--; break;
-        case 8:
-            p->searchLength++; break;
-        case 9:
-            p->searchLength--; break;
-        case 10:
-            p->strategy = (ZSTD_strategy)(((U32)p->strategy)+1); break;
-        case 11:
-            p->strategy = (ZSTD_strategy)(((U32)p->strategy)-1); break;
-        case 12:
-            p->targetLength *= 1 + ((double)(FUZ_rand(&g_rand)&255)) / 256.; break;
-        case 13:
-            p->targetLength /= 1 + ((double)(FUZ_rand(&g_rand)&255)) / 256.; break;
+    ZSTD_compressionParameters p;
+    U32 validated = 0;
+    while (!validated) {
+        U32 nbChanges = (FUZ_rand(&g_rand) & 3) + 1;
+        p = *ptr;
+        for ( ; nbChanges ; nbChanges--) {
+            const U32 changeID = FUZ_rand(&g_rand) % 14;
+            switch(changeID)
+            {
+            case 0:
+                p.chainLog++; break;
+            case 1:
+                p.chainLog--; break;
+            case 2:
+                p.hashLog++; break;
+            case 3:
+                p.hashLog--; break;
+            case 4:
+                p.searchLog++; break;
+            case 5:
+                p.searchLog--; break;
+            case 6:
+                p.windowLog++; break;
+            case 7:
+                p.windowLog--; break;
+            case 8:
+                p.searchLength++; break;
+            case 9:
+                p.searchLength--; break;
+            case 10:
+                p.strategy = (ZSTD_strategy)(((U32)p.strategy)+1); break;
+            case 11:
+                p.strategy = (ZSTD_strategy)(((U32)p.strategy)-1); break;
+            case 12:
+                p.targetLength *= 1 + ((double)(FUZ_rand(&g_rand)&255)) / 256.; break;
+            case 13:
+                p.targetLength /= 1 + ((double)(FUZ_rand(&g_rand)&255)) / 256.; break;
+            }
         }
+        validated = !ZSTD_isError(ZSTD_checkCParams(p));
     }
-    ZSTD_validateParams(p);
+    *ptr = p;
 }
 
 
@@ -608,7 +614,7 @@ static BYTE g_alreadyTested[PARAMTABLESIZE] = {0};   /* init to zero */
 #define MAX(a,b)   ( (a) > (b) ? (a) : (b) )
 
 static void playAround(FILE* f, winnerInfo_t* winners,
-                       ZSTD_parameters params,
+                       ZSTD_compressionParameters params,
                        const void* srcBuffer, size_t srcSize,
                        ZSTD_CCtx* ctx)
 {
@@ -616,7 +622,7 @@ static void playAround(FILE* f, winnerInfo_t* winners,
     const int startTime = BMK_GetMilliStart();
 
     while (BMK_GetMilliSpan(startTime) < g_maxVariationTime) {
-        ZSTD_parameters p = params;
+        ZSTD_compressionParameters p = params;
 
         if (nbVariations++ > g_maxNbVariations) break;
         paramVariation(&p);
@@ -637,19 +643,21 @@ static void playAround(FILE* f, winnerInfo_t* winners,
 }
 
 
-static void potentialRandomParams(ZSTD_parameters* p, U32 inverseChance)
+static void potentialRandomParams(ZSTD_compressionParameters* p, U32 inverseChance)
 {
     U32 chance = (FUZ_rand(&g_rand) % (inverseChance+1));
-    if (!chance) {
+    U32 validated = 0;
+    if (!chance)
+    while (!validated) {
         /* totally random entry */
-        p->contentLog = FUZ_rand(&g_rand) % (ZSTD_CONTENTLOG_MAX+1 - ZSTD_CONTENTLOG_MIN) + ZSTD_CONTENTLOG_MIN;
+        p->chainLog   = FUZ_rand(&g_rand) % (ZSTD_CHAINLOG_MAX+1 - ZSTD_CHAINLOG_MIN) + ZSTD_CHAINLOG_MIN;
         p->hashLog    = FUZ_rand(&g_rand) % (ZSTD_HASHLOG_MAX+1 - ZSTD_HASHLOG_MIN) + ZSTD_HASHLOG_MIN;
         p->searchLog  = FUZ_rand(&g_rand) % (ZSTD_SEARCHLOG_MAX+1 - ZSTD_SEARCHLOG_MIN) + ZSTD_SEARCHLOG_MIN;
         p->windowLog  = FUZ_rand(&g_rand) % (ZSTD_WINDOWLOG_MAX+1 - ZSTD_WINDOWLOG_MIN) + ZSTD_WINDOWLOG_MIN;
         p->searchLength=FUZ_rand(&g_rand) % (ZSTD_SEARCHLENGTH_MAX+1 - ZSTD_SEARCHLENGTH_MIN) + ZSTD_SEARCHLENGTH_MIN;
         p->targetLength=FUZ_rand(&g_rand) % (ZSTD_TARGETLENGTH_MAX+1 - ZSTD_TARGETLENGTH_MIN) + ZSTD_TARGETLENGTH_MIN;
         p->strategy   = (ZSTD_strategy) (FUZ_rand(&g_rand) % (ZSTD_btopt +1));
-        ZSTD_validateParams(p);
+        validated = !ZSTD_isError(ZSTD_checkCParams(*p));
     }
 }
 
@@ -661,10 +669,9 @@ static void BMK_selectRandomStart(
     U32 id = (FUZ_rand(&g_rand) % (ZSTD_maxCLevel()+1));
     if ((id==0) || (winners[id].params.windowLog==0)) {
         /* totally random entry */
-        ZSTD_parameters p;
+        ZSTD_compressionParameters p;
         potentialRandomParams(&p, 1);
-        p.srcSize = srcSize;
-        ZSTD_validateParams(&p);
+        ZSTD_adjustCParams(&p, srcSize, 0);
         playAround(f, winners, p, srcBuffer, srcSize, ctx);
     }
     else
@@ -675,7 +682,7 @@ static void BMK_selectRandomStart(
 static void BMK_benchMem(void* srcBuffer, size_t srcSize)
 {
     ZSTD_CCtx* ctx = ZSTD_createCCtx();
-    ZSTD_parameters params;
+    ZSTD_compressionParameters params;
     winnerInfo_t winners[NB_LEVELS_TRACKED];
     int i;
     unsigned u;
@@ -685,8 +692,7 @@ static void BMK_benchMem(void* srcBuffer, size_t srcSize)
 
     if (g_singleRun) {
         BMK_result_t testResult;
-        g_params.srcSize = blockSize;
-        ZSTD_validateParams(&g_params);
+        ZSTD_adjustCParams(&g_params, srcSize, 0);
         BMK_benchParam(&testResult, srcBuffer, srcSize, ctx, g_params);
         DISPLAY("\n");
         return;
@@ -702,7 +708,7 @@ static void BMK_benchMem(void* srcBuffer, size_t srcSize)
     else {
         /* baseline config for level 1 */
         BMK_result_t testResult;
-        params = ZSTD_getParams(1, blockSize);
+        params = ZSTD_getCParams(1, blockSize, 0);
         BMK_benchParam(&testResult, srcBuffer, srcSize, ctx, params);
         g_cSpeedTarget[1] = (testResult.cSpeed * 31) >> 5;
     }
@@ -715,8 +721,7 @@ static void BMK_benchMem(void* srcBuffer, size_t srcSize)
     {
         const int maxSeeds = g_noSeed ? 1 : ZSTD_maxCLevel();
         for (i=1; i<=maxSeeds; i++) {
-            params = ZSTD_getParams(i, blockSize);
-            ZSTD_validateParams(&params);
+            params = ZSTD_getCParams(i, blockSize, 0);
             BMK_seed(winners, params, srcBuffer, srcSize, ctx);
         }
     }
@@ -867,7 +872,7 @@ int optimizeForSize(char* inFileName)
 
     {
         ZSTD_CCtx* ctx = ZSTD_createCCtx();
-        ZSTD_parameters params;
+        ZSTD_compressionParameters params;
         winnerInfo_t winner;
         BMK_result_t candidate;
         const size_t blockSize = g_blockSize ? g_blockSize : benchedSize;
@@ -881,7 +886,7 @@ int optimizeForSize(char* inFileName)
         {
             const int maxSeeds = g_noSeed ? 1 : ZSTD_maxCLevel();
             for (i=1; i<=maxSeeds; i++) {
-                params = ZSTD_getParams(i, blockSize);
+                params = ZSTD_getCParams(i, blockSize, 0);
                 BMK_benchParam(&candidate, origBuff, benchedSize, ctx, params);
                 if ( (candidate.cSize < winner.result.cSize)
                    ||((candidate.cSize == winner.result.cSize) && (candidate.cSpeed > winner.result.cSpeed)) )
@@ -978,8 +983,7 @@ int main(int argc, char** argv)
 
     if (argc<1) { badusage(exename); return 1; }
 
-    for(i=1; i<argc; i++)
-    {
+    for(i=1; i<argc; i++) {
         char* argument = argv[i];
 
         if(!argument) continue;   /* Protection if argument empty */
@@ -1011,13 +1015,9 @@ int main(int argc, char** argv)
                     /* Sample compressibility (when no file provided) */
                 case 'P':
                     argument++;
-                    {
-                        U32 proba32 = 0;
-                        while ((argument[0]>= '0') && (argument[0]<= '9')) {
-                            proba32 *= 10;
-                            proba32 += argument[0] - '0';
-                            argument++;
-                        }
+                    {   U32 proba32 = 0;
+                        while ((argument[0]>= '0') && (argument[0]<= '9'))
+                            proba32 = (proba32*10) + (*argument++ - '0');
                         g_compressibility = (double)proba32 / 100.;
                     }
                     break;
@@ -1031,7 +1031,7 @@ int main(int argc, char** argv)
                 case 'S':
                     g_singleRun = 1;
                     argument++;
-                    g_params = ZSTD_getParams(2, g_blockSize);
+                    g_params = ZSTD_getCParams(2, g_blockSize, 0);
                     for ( ; ; ) {
                         switch(*argument)
                         {
@@ -1042,10 +1042,10 @@ int main(int argc, char** argv)
                                 g_params.windowLog *= 10, g_params.windowLog += *argument++ - '0';
                             continue;
                         case 'c':
-                            g_params.contentLog = 0;
+                            g_params.chainLog = 0;
                             argument++;
                             while ((*argument>= '0') && (*argument<='9'))
-                                g_params.contentLog *= 10, g_params.contentLog += *argument++ - '0';
+                                g_params.chainLog *= 10, g_params.chainLog += *argument++ - '0';
                             continue;
                         case 'h':
                             g_params.hashLog = 0;
@@ -1077,12 +1077,11 @@ int main(int argc, char** argv)
                                 g_params.strategy = (ZSTD_strategy)(*argument++ - '0');
                             continue;
                         case 'L':
-                            {
-                                int cLevel = 0;
+                            {   int cLevel = 0;
                                 argument++;
                                 while ((*argument>= '0') && (*argument<='9'))
                                     cLevel *= 10, cLevel += *argument++ - '0';
-                                g_params = ZSTD_getParams(cLevel, g_blockSize);
+                                g_params = ZSTD_getCParams(cLevel, g_blockSize, 0);
                                 continue;
                             }
                         default : ;
@@ -1095,25 +1094,20 @@ int main(int argc, char** argv)
                 case 'T':
                     argument++;
                     g_target = 0;
-                    while ((*argument >= '0') && (*argument <= '9')) {
-                        g_target *= 10;
-                        g_target += *argument - '0';
-                        argument++;
-                    }
+                    while ((*argument >= '0') && (*argument <= '9'))
+                        g_target = (g_target*10) + (*argument++ - '0');
                     break;
 
                     /* cut input into blocks */
                 case 'B':
-                    {
-                        g_blockSize = 0;
-                        argument++;
-                        while ((*argument >='0') && (*argument <='9'))
-                            g_blockSize *= 10, g_blockSize += *argument++ - '0';
-                        if (*argument=='K') g_blockSize<<=10, argument++;  /* allows using KB notation */
-                        if (*argument=='M') g_blockSize<<=20, argument++;
-                        if (*argument=='B') argument++;
-                        DISPLAY("using %u KB block size \n", g_blockSize>>10);
-                    }
+                    g_blockSize = 0;
+                    argument++;
+                    while ((*argument >='0') && (*argument <='9'))
+                        g_blockSize = (g_blockSize*10) + (*argument++ - '0');
+                    if (*argument=='K') g_blockSize<<=10, argument++;  /* allows using KB notation */
+                    if (*argument=='M') g_blockSize<<=20, argument++;
+                    if (*argument=='B') argument++;
+                    DISPLAY("using %u KB block size \n", g_blockSize>>10);
                     break;
 
                     /* Unknown command */
@@ -1121,7 +1115,7 @@ int main(int argc, char** argv)
                 }
             }
             continue;
-        }
+        }   /* if (argument[0]=='-') */
 
         /* first provided filename is input */
         if (!input_filename) { input_filename=argument; filenamesStart=i; continue; }
diff --git a/programs/playTests.sh b/programs/playTests.sh
index ec625ee8..32256534 100755
--- a/programs/playTests.sh
+++ b/programs/playTests.sh
@@ -25,18 +25,28 @@ roundTripTest() {
 
 echo "\n**** simple tests **** "
 ./datagen > tmp
-$ZSTD tmp
+$ZSTD -f tmp                             # trivial compression case, creates tmp.zst
+$ZSTD -df tmp.zst                        # trivial decompression case (overwrites tmp)
+echo "test : too large compression level (must fail)"
 $ZSTD -99 tmp && die "too large compression level undetected"
-$ZSTD tmp -c > tmpCompressed
-$ZSTD tmp --stdout > tmpCompressed
+$ZSTD tmp -c > tmpCompressed             # compression using stdout     
+$ZSTD tmp --stdout > tmpCompressed       # compressoin using stdout, long format
+echo "test : decompress file with wrong suffix (must fail)"
 $ZSTD -d tmpCompressed && die "wrong suffix error not detected!"
-$ZSTD -d tmpCompressed -c > tmpResult
+$ZSTD -d tmpCompressed -c > tmpResult    # decompression using stdout   
 $ZSTD --decompress tmpCompressed -c > tmpResult
 $ZSTD --decompress tmpCompressed --stdout > tmpResult
+$ZSTD -d    < tmp.zst > /dev/null        # combine decompression, stdin & stdout
+$ZSTD -d  - < tmp.zst > /dev/null
+$ZSTD -dc   < tmp.zst > /dev/null
+$ZSTD -dc - < tmp.zst > /dev/null
 $ZSTD -q tmp && die "overwrite check failed!"
 $ZSTD -q -f tmp
 $ZSTD -q --force tmp
-
+$ZSTD -df tmp && die "should have refused : wrong extension"
+cp tmp tmp2.zst
+$ZSTD -df tmp2.zst && die "should have failed : wrong format"
+rm tmp2.zst
 
 echo "\n**** frame concatenation **** "
 
@@ -68,6 +78,11 @@ echo "\n**** dictionary tests **** "
 ./datagen -g1M | md5sum > tmp1
 ./datagen -g1M | $ZSTD -D tmpDict | $ZSTD -D tmpDict -dvq | md5sum > tmp2
 diff -q tmp1 tmp2
+$ZSTD --train *.c *.h -o tmpDict
+$ZSTD xxhash.c -D tmpDict -of tmp
+$ZSTD -d tmp -D tmpDict -of result
+diff xxhash.c result
+
 
 echo "\n**** multiple files tests **** "
 
@@ -103,6 +118,10 @@ $ZSTD -t * && die "bad files not detected !"
 echo "\n**** zstd round-trip tests **** "
 
 roundTripTest
+roundTripTest -g15K       # TableID==3
+roundTripTest -g127K      # TableID==2
+roundTripTest -g255K      # TableID==1
+roundTripTest -g513K      # TableID==0
 roundTripTest -g512K 6    # greedy, hash chain
 roundTripTest -g512K 16   # btlazy2 
 roundTripTest -g512K 19   # btopt
@@ -140,7 +159,7 @@ roundTripTest -g50000000 -P94 18
 roundTripTest -g50000000 -P94 19
 
 roundTripTest -g99000000 -P99 20
-roundTripTest -g6000000000 -P99 q
+roundTripTest -g6000000000 -P99 1
 
 rm tmp*
 
diff --git a/programs/zbufftest.c b/programs/zbufftest.c
index aa57b576..f44f5d5e 100644
--- a/programs/zbufftest.c
+++ b/programs/zbufftest.c
@@ -1,6 +1,6 @@
 /*
     Fuzzer test tool for zstd_buffered
-    Copyright (C) Yann Collet 2105
+    Copyright (C) Yann Collet 2015-2016
 
     GPL v2 License
 
@@ -19,11 +19,10 @@
     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 
     You can contact the author at :
-    - ZSTD source repository : https://github.com/Cyan4973/zstd
-    - ZSTD public forum : https://groups.google.com/forum/#!forum/lz4c
+    - ZSTD homepage : https://www.zstd.net/
 */
 
-/**************************************
+/*-************************************
 *  Compiler specific
 **************************************/
 #ifdef _MSC_VER    /* Visual Studio */
@@ -33,7 +32,7 @@
 #endif
 
 
-/**************************************
+/*-************************************
 *  Includes
 **************************************/
 #include <stdlib.h>      /* free */
@@ -42,13 +41,13 @@
 #include <string.h>      /* strcmp */
 #include "mem.h"
 #include "zbuff.h"
-#include "zstd.h"        /* ZSTD_compressBound() */
+#include "zstd_static.h" /* ZSTD_compressBound(), ZSTD_maxCLevel() */
 #include "datagen.h"     /* RDG_genBuffer */
 #include "xxhash.h"      /* XXH64 */
 
 
-/**************************************
- Constants
+/*-************************************
+*  Constants
 **************************************/
 #ifndef ZSTD_VERSION
 #  define ZSTD_VERSION ""
@@ -66,7 +65,7 @@ static const U32 prime2 = 2246822519U;
 
 
 
-/**************************************
+/*-************************************
 *  Display Macros
 **************************************/
 #define DISPLAY(...)          fprintf(stderr, __VA_ARGS__)
@@ -83,7 +82,7 @@ static U32 g_displayTime = 0;
 static U32 g_testTime = 0;
 
 
-/*********************************************************
+/*-*******************************************************
 *  Fuzzer functions
 *********************************************************/
 #define MAX(a,b) ((a)>(b)?(a):(b))
@@ -107,15 +106,17 @@ static U32 FUZ_GetMilliSpan(U32 nTimeStart)
     return nSpan;
 }
 
-
+/*! FUZ_rand() :
+    @return : a 27 bits random value, from a 32-bits `seed`.
+    `seed` is also modified */
 #  define FUZ_rotl32(x,r) ((x << r) | (x >> (32 - r)))
-unsigned int FUZ_rand(unsigned int* src)
+unsigned int FUZ_rand(unsigned int* seedPtr)
 {
-    U32 rand32 = *src;
+    U32 rand32 = *seedPtr;
     rand32 *= prime1;
     rand32 += prime2;
     rand32  = FUZ_rotl32(rand32, 13);
-    *src = rand32;
+    *seedPtr = rand32;
     return rand32 >> 5;
 }
 
@@ -133,12 +134,12 @@ static unsigned FUZ_highbit32(U32 v32)
 static int basicUnitTests(U32 seed, double compressibility)
 {
     int testResult = 0;
-    void* CNBuffer;
     size_t CNBufferSize = COMPRESSIBLE_NOISE_LENGTH;
-    void* compressedBuffer;
-    size_t compressedBufferSize = ZSTD_compressBound(COMPRESSIBLE_NOISE_LENGTH);
-    void* decodedBuffer;
-    size_t decodedBufferSize = CNBufferSize;
+    void* CNBuffer = malloc(CNBufferSize);
+    size_t const compressedBufferSize = ZSTD_compressBound(COMPRESSIBLE_NOISE_LENGTH);
+    void* compressedBuffer = malloc(compressedBufferSize);
+    size_t const decodedBufferSize = CNBufferSize;
+    void* decodedBuffer = malloc(decodedBufferSize);
     U32 randState = seed;
     size_t result, cSize, readSize, genSize;
     U32 testNb=0;
@@ -146,11 +147,7 @@ static int basicUnitTests(U32 seed, double compressibility)
     ZBUFF_DCtx* zd = ZBUFF_createDCtx();
 
     /* Create compressible test buffer */
-    CNBuffer = malloc(CNBufferSize);
-    compressedBuffer = malloc(compressedBufferSize);
-    decodedBuffer = malloc(decodedBufferSize);
-    if (!CNBuffer || !compressedBuffer || !decodedBuffer || !zc || !zd)
-    {
+    if (!CNBuffer || !compressedBuffer || !decodedBuffer || !zc || !zd) {
         DISPLAY("Not enough memory, aborting\n");
         goto _output_error;
     }
@@ -183,11 +180,9 @@ static int basicUnitTests(U32 seed, double compressibility)
     DISPLAYLEVEL(4, "OK \n");
 
     /* check regenerated data is byte exact */
-    {
-        size_t i;
+    {   size_t i;
         DISPLAYLEVEL(4, "test%3i : check decompressed result : ", testNb++);
-        for (i=0; i<CNBufferSize; i++)
-        {
+        for (i=0; i<CNBufferSize; i++) {
             if (((BYTE*)decodedBuffer)[i] != ((BYTE*)CNBuffer)[i]) goto _output_error;;
         }
         DISPLAYLEVEL(4, "OK \n");
@@ -213,8 +208,7 @@ static size_t findDiff(const void* buf1, const void* buf2, size_t max)
     const BYTE* b1 = (const BYTE*)buf1;
     const BYTE* b2 = (const BYTE*)buf2;
     size_t i;
-    for (i=0; i<max; i++)
-    {
+    for (i=0; i<max; i++) {
         if (b1[i] != b2[i]) break;
     }
     return i;
@@ -225,28 +219,39 @@ static size_t findDiff(const void* buf1, const void* buf2, size_t max)
 #define CHECK(cond, ...) if (cond) { DISPLAY("Error => "); DISPLAY(__VA_ARGS__); \
                          DISPLAY(" (seed %u, test nb %u)  \n", seed, testNb); goto _output_error; }
 
-static const U32 maxSrcLog = 24;
-static const U32 maxSampleLog = 19;
 
-int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibility)
+static size_t FUZ_rLogLength(U32* seed, U32 logLength)
 {
+    size_t const lengthMask = ((size_t)1 << logLength) - 1;
+    return (lengthMask+1) + (FUZ_rand(seed) & lengthMask);
+}
+
+static size_t FUZ_randomLength(U32* seed, U32 maxLog)
+{
+    U32 const logLength = FUZ_rand(seed) % maxLog;
+    return FUZ_rLogLength(seed, logLength);
+}
+
+static int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibility)
+{
+    static const U32 maxSrcLog = 24;
+    static const U32 maxSampleLog = 19;
     BYTE* cNoiseBuffer[5];
-    BYTE* srcBuffer;
     size_t srcBufferSize = (size_t)1<<maxSrcLog;
     BYTE* copyBuffer;
-    size_t copyBufferSize = srcBufferSize + (1<<maxSampleLog);
+    size_t copyBufferSize= srcBufferSize + (1<<maxSampleLog);
     BYTE* cBuffer;
     size_t cBufferSize   = ZSTD_compressBound(srcBufferSize);
     BYTE* dstBuffer;
     size_t dstBufferSize = srcBufferSize;
     U32 result = 0;
     U32 testNb = 0;
-    U32 coreSeed = seed, lseed = 0;
+    U32 coreSeed = seed;
     ZBUFF_CCtx* zc;
     ZBUFF_DCtx* zd;
     U32 startTime = FUZ_GetMilliStart();
 
-    /* allocation */
+    /* allocations */
     zc = ZBUFF_createCCtx();
     zd = ZBUFF_createDCtx();
     cNoiseBuffer[0] = (BYTE*)malloc (srcBufferSize);
@@ -267,172 +272,144 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit
     RDG_genBuffer(cNoiseBuffer[2], srcBufferSize, compressibility, 0., coreSeed);
     RDG_genBuffer(cNoiseBuffer[3], srcBufferSize, 0.95, 0., coreSeed);    /* highly compressible */
     RDG_genBuffer(cNoiseBuffer[4], srcBufferSize, 1.00, 0., coreSeed);    /* sparse content */
-    srcBuffer = cNoiseBuffer[2];
-    memset(copyBuffer, 0x65, copyBufferSize);
-    memcpy(copyBuffer, srcBuffer, MIN(copyBufferSize,srcBufferSize));   /* make copyBuffer considered initialized */
+    memset(copyBuffer, 0x65, copyBufferSize);                             /* make copyBuffer considered initialized */
 
     /* catch up testNb */
     for (testNb=1; testNb < startTest; testNb++)
         FUZ_rand(&coreSeed);
 
     /* test loop */
-    for ( ; (testNb <= nbTests) || (FUZ_GetMilliSpan(startTime) < g_testTime); testNb++ )
-    {
-        size_t sampleSize, sampleStart;
+    for ( ; (testNb <= nbTests) || (FUZ_GetMilliSpan(startTime) < g_testTime) ; testNb++ ) {
+        U32 lseed;
+        const BYTE* srcBuffer;
         const BYTE* dict;
-        size_t cSize, dictSize;
-        size_t maxTestSize, totalTestSize, readSize, totalCSize, genSize, totalGenSize;
+        size_t maxTestSize, dictSize;
+        size_t cSize, totalTestSize, totalCSize, totalGenSize;
         size_t errorCode;
-        U32 sampleSizeLog, buffNb, n, nbChunks;
+        U32 n, nbChunks;
         XXH64_CREATESTATE_STATIC(xxh64);
-        U64 crcOrig, crcDest;
+        U64 crcOrig;
 
         /* init */
         DISPLAYUPDATE(2, "\r%6u", testNb);
         if (nbTests >= testNb) DISPLAYUPDATE(2, "/%6u   ", nbTests);
         FUZ_rand(&coreSeed);
         lseed = coreSeed ^ prime1;
-        buffNb = FUZ_rand(&lseed) & 127;
-        if (buffNb & 7) buffNb=2;   /* select buffer */
-        else
-        {
-            buffNb >>= 3;
-            if (buffNb & 7)
-            {
-                const U32 tnb[2] = { 1, 3 };
-                buffNb = tnb[buffNb >> 3];
-            }
-            else
-            {
-                const U32 tnb[2] = { 0, 4 };
-                buffNb = tnb[buffNb >> 3];
-            }
-        }
-        srcBuffer = cNoiseBuffer[buffNb];
 
-        /* Multi - segments compression test */
+        /* state total reset */
+        /* some problems only happen when states are re-used in a specific order */
+        if ((FUZ_rand(&lseed) & 0xFF) == 131) { ZBUFF_freeCCtx(zc); zc = ZBUFF_createCCtx(); }
+        if ((FUZ_rand(&lseed) & 0xFF) == 132) { ZBUFF_freeDCtx(zd); zd = ZBUFF_createDCtx(); }
+
+        /* srcBuffer selection [0-4] */
+        {   U32 buffNb = FUZ_rand(&lseed) & 0x7F;
+            if (buffNb & 7) buffNb=2;   /* most common : compressible (P) */
+            else {
+                buffNb >>= 3;
+                if (buffNb & 7) {
+                    const U32 tnb[2] = { 1, 3 };   /* barely/highly compressible */
+                    buffNb = tnb[buffNb >> 3];
+                } else {
+                    const U32 tnb[2] = { 0, 4 };   /* not compressible / sparse */
+                    buffNb = tnb[buffNb >> 3];
+            }   }
+            srcBuffer = cNoiseBuffer[buffNb];
+        }
+
+        /* compression init */
+        {   U32 const testLog = FUZ_rand(&lseed) % maxSrcLog;
+            U32 const cLevel = (FUZ_rand(&lseed) % (ZSTD_maxCLevel() - (testLog/3))) + 1;
+            maxTestSize = FUZ_rLogLength(&lseed, testLog);
+            /* random dictionary selection */
+            {   size_t dictStart;
+                dictSize  = (FUZ_rand(&lseed)==1) ? FUZ_randomLength(&lseed, maxSampleLog) : 0;
+                dictStart = FUZ_rand(&lseed) % (srcBufferSize - dictSize);
+                dict      = srcBuffer + dictStart;
+            }
+            {   size_t const initError = ZBUFF_compressInitDictionary(zc, dict, dictSize, cLevel);
+                CHECK (ZBUFF_isError(initError),"init error : %s", ZBUFF_getErrorName(initError));
+        }   }
+
+        /* multi-segments compression test */
         XXH64_reset(xxh64, 0);
-        nbChunks = (FUZ_rand(&lseed) & 127) + 2;
-        sampleSizeLog = FUZ_rand(&lseed) % maxSrcLog;
-        maxTestSize = (size_t)1 << sampleSizeLog;
-        maxTestSize += FUZ_rand(&lseed) & (maxTestSize-1);
+        nbChunks    = (FUZ_rand(&lseed) & 127) + 2;
+        for (n=0, cSize=0, totalTestSize=0 ; (n<nbChunks) && (totalTestSize < maxTestSize) ; n++) {
+            /* compress random chunk into random size dst buffer */
+            {   size_t readChunkSize = FUZ_randomLength(&lseed, maxSampleLog);
+                size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog);
+                size_t dstBuffSize = MIN(cBufferSize - cSize, randomDstSize);
+                size_t const srcStart = FUZ_rand(&lseed) % (srcBufferSize - readChunkSize);
 
-        sampleSizeLog = FUZ_rand(&lseed) % maxSampleLog;
-        sampleSize = (size_t)1 << sampleSizeLog;
-        sampleSize += FUZ_rand(&lseed) & (sampleSize-1);
-        sampleStart = FUZ_rand(&lseed) % (srcBufferSize - sampleSize);
-        dict = srcBuffer + sampleStart;
-        dictSize = sampleSize;
-        ZBUFF_compressInitDictionary(zc, dict, dictSize, (FUZ_rand(&lseed) % (20 - (sampleSizeLog/3))) + 1);
+                size_t const compressionError = ZBUFF_compressContinue(zc, cBuffer+cSize, &dstBuffSize, srcBuffer+srcStart, &readChunkSize);
+                CHECK (ZBUFF_isError(compressionError), "compression error : %s", ZBUFF_getErrorName(compressionError));
 
-        totalTestSize = 0;
-        cSize = 0;
-        for (n=0; n<nbChunks; n++)
-        {
-            sampleSizeLog = FUZ_rand(&lseed) % maxSampleLog;
-            sampleSize = (size_t)1 << sampleSizeLog;
-            sampleSize += FUZ_rand(&lseed) & (sampleSize-1);
-            sampleStart = FUZ_rand(&lseed) % (srcBufferSize - sampleSize);
-            readSize = sampleSize;
-
-            /* random size output buffer */
-            sampleSizeLog = FUZ_rand(&lseed) % maxSampleLog;
-            sampleSize = (size_t)1 << sampleSizeLog;
-            sampleSize += FUZ_rand(&lseed) & (sampleSize-1);
-            genSize = MIN (cBufferSize - cSize, sampleSize);
-
-            errorCode = ZBUFF_compressContinue(zc, cBuffer+cSize, &genSize, srcBuffer+sampleStart, &readSize);
-            CHECK (ZBUFF_isError(errorCode), "compression error : %s", ZBUFF_getErrorName(errorCode));
-
-            XXH64_update(xxh64, srcBuffer+sampleStart, readSize);
-            memcpy(copyBuffer+totalTestSize, srcBuffer+sampleStart, readSize);
-            cSize += genSize;
-            totalTestSize += readSize;
-
-            if ((FUZ_rand(&lseed) & 15) == 0)
-            {
-                /* add a few random flushes operations, to mess around */
-                sampleSizeLog = FUZ_rand(&lseed) % maxSampleLog;
-                sampleSize = (size_t)1 << sampleSizeLog;
-                sampleSize += FUZ_rand(&lseed) & (sampleSize-1);
-                genSize = MIN (cBufferSize - cSize, sampleSize);
-                errorCode = ZBUFF_compressFlush(zc, cBuffer+cSize, &genSize);
-                CHECK (ZBUFF_isError(errorCode), "flush error : %s", ZBUFF_getErrorName(errorCode));
-                cSize += genSize;
+                XXH64_update(xxh64, srcBuffer+srcStart, readChunkSize);
+                memcpy(copyBuffer+totalTestSize, srcBuffer+srcStart, readChunkSize);
+                cSize += dstBuffSize;
+                totalTestSize += readChunkSize;
             }
 
-            if (totalTestSize > maxTestSize) break;
+            /* random flush operation, to mess around */
+            if ((FUZ_rand(&lseed) & 15) == 0) {
+                size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog);
+                size_t dstBuffSize = MIN(cBufferSize - cSize, randomDstSize);
+                size_t const flushError = ZBUFF_compressFlush(zc, cBuffer+cSize, &dstBuffSize);
+                CHECK (ZBUFF_isError(flushError), "flush error : %s", ZBUFF_getErrorName(flushError));
+                cSize += dstBuffSize;
+        }   }
+
+        /* final frame epilogue */
+        {   size_t dstBuffSize = cBufferSize - cSize;
+            size_t const flushError = ZBUFF_compressEnd(zc, cBuffer+cSize, &dstBuffSize);
+            CHECK (ZBUFF_isError(flushError), "flush error : %s", ZBUFF_getErrorName(flushError));
+            cSize += dstBuffSize;
         }
-        genSize = cBufferSize - cSize;
-        errorCode = ZBUFF_compressEnd(zc, cBuffer+cSize, &genSize);
-        CHECK (ZBUFF_isError(errorCode), "compression error : %s", ZBUFF_getErrorName(errorCode));
-        CHECK (errorCode != 0, "frame epilogue not fully consumed");
-        cSize += genSize;
         crcOrig = XXH64_digest(xxh64);
 
         /* multi - fragments decompression test */
         ZBUFF_decompressInitDictionary(zd, dict, dictSize);
-        totalCSize = 0;
-        totalGenSize = 0;
-        while (totalCSize < cSize)
-        {
-            sampleSizeLog  = FUZ_rand(&lseed) % maxSampleLog;
-            sampleSize  = (size_t)1 << sampleSizeLog;
-            sampleSize += FUZ_rand(&lseed) & (sampleSize-1);
-            readSize = sampleSize;
-            sampleSizeLog  = FUZ_rand(&lseed) % maxSampleLog;
-            sampleSize  = (size_t)1 << sampleSizeLog;
-            sampleSize += FUZ_rand(&lseed) & (sampleSize-1);
-            genSize = MIN(sampleSize, dstBufferSize - totalGenSize);
-            errorCode = ZBUFF_decompressContinue(zd, dstBuffer+totalGenSize, &genSize, cBuffer+totalCSize, &readSize);
-            CHECK (ZBUFF_isError(errorCode), "decompression error : %s", ZBUFF_getErrorName(errorCode));
-            totalGenSize += genSize;
-            totalCSize += readSize;
+        for (totalCSize = 0, totalGenSize = 0 ; totalCSize < cSize ; ) {
+            size_t readCSrcSize = FUZ_randomLength(&lseed, maxSampleLog);
+            size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog);
+            size_t dstBuffSize = MIN(dstBufferSize - totalGenSize, randomDstSize);
+            size_t const decompressError = ZBUFF_decompressContinue(zd, dstBuffer+totalGenSize, &dstBuffSize, cBuffer+totalCSize, &readCSrcSize);
+            CHECK (ZBUFF_isError(decompressError), "decompression error : %s", ZBUFF_getErrorName(decompressError));
+            totalGenSize += dstBuffSize;
+            totalCSize += readCSrcSize;
+            errorCode = decompressError;   /* needed for != 0 last test */
         }
         CHECK (errorCode != 0, "frame not fully decoded");
         CHECK (totalGenSize != totalTestSize, "decompressed data : wrong size")
         CHECK (totalCSize != cSize, "compressed data should be fully read")
-        crcDest = XXH64(dstBuffer, totalTestSize, 0);
-        if (crcDest!=crcOrig) findDiff(copyBuffer, dstBuffer, totalTestSize);
-        CHECK (crcDest!=crcOrig, "decompressed data corrupted");
+        { U64 const crcDest = XXH64(dstBuffer, totalTestSize, 0);
+          if (crcDest!=crcOrig) findDiff(copyBuffer, dstBuffer, totalTestSize);
+          CHECK (crcDest!=crcOrig, "decompressed data corrupted"); }
+
+        /*=====   noisy/erroneous src decompression test   =====*/
 
-        /* noisy/erroneous src decompression test */
         /* add some noise */
-        nbChunks = (FUZ_rand(&lseed) & 7) + 2;
-        for (n=0; n<nbChunks; n++)
-        {
-            size_t cStart;
-
-            sampleSizeLog = FUZ_rand(&lseed) % maxSampleLog;
-            sampleSize = (size_t)1 << sampleSizeLog;
-            sampleSize += FUZ_rand(&lseed) & (sampleSize-1);
-            if (sampleSize > cSize/3) sampleSize = cSize/3;
-            sampleStart = FUZ_rand(&lseed) % (srcBufferSize - sampleSize);
-            cStart = FUZ_rand(&lseed) % (cSize - sampleSize);
-
-            memcpy(cBuffer+cStart, srcBuffer+sampleStart, sampleSize);
-        }
+        {   U32 const nbNoiseChunks = (FUZ_rand(&lseed) & 7) + 2;
+            U32 nn; for (nn=0; nn<nbNoiseChunks; nn++) {
+                size_t const randomNoiseSize = FUZ_randomLength(&lseed, maxSampleLog);
+                size_t const noiseSize  = MIN((cSize/3) , randomNoiseSize);
+                size_t const noiseStart = FUZ_rand(&lseed) % (srcBufferSize - noiseSize);
+                size_t const cStart = FUZ_rand(&lseed) % (cSize - noiseSize);
+                memcpy(cBuffer+cStart, srcBuffer+noiseStart, noiseSize);
+        }   }
 
         /* try decompression on noisy data */
         ZBUFF_decompressInit(zd);
         totalCSize = 0;
         totalGenSize = 0;
-        while ( (totalCSize < cSize) && (totalGenSize < dstBufferSize) )
-        {
-            sampleSizeLog  = FUZ_rand(&lseed) % maxSampleLog;
-            sampleSize  = (size_t)1 << sampleSizeLog;
-            sampleSize += FUZ_rand(&lseed) & (sampleSize-1);
-            readSize = sampleSize;
-            sampleSizeLog  = FUZ_rand(&lseed) % maxSampleLog;
-            sampleSize  = (size_t)1 << sampleSizeLog;
-            sampleSize += FUZ_rand(&lseed) & (sampleSize-1);
-            genSize = MIN(sampleSize, dstBufferSize - totalGenSize);
-            errorCode = ZBUFF_decompressContinue(zd, dstBuffer+totalGenSize, &genSize, cBuffer+totalCSize, &readSize);
-            if (ZBUFF_isError(errorCode)) break;   /* error correctly detected */
-            totalGenSize += genSize;
-            totalCSize += readSize;
-        }
-    }
+        while ( (totalCSize < cSize) && (totalGenSize < dstBufferSize) ) {
+            size_t readCSrcSize = FUZ_randomLength(&lseed, maxSampleLog);
+            size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog);
+            size_t dstBuffSize = MIN(dstBufferSize - totalGenSize, randomDstSize);
+            size_t const decompressError = ZBUFF_decompressContinue(zd, dstBuffer+totalGenSize, &dstBuffSize, cBuffer+totalCSize, &readCSrcSize);
+            if (ZBUFF_isError(decompressError)) break;   /* error correctly detected */
+            totalGenSize += dstBuffSize;
+            totalCSize += readCSrcSize;
+    }   }
     DISPLAY("\r%u fuzzer tests completed   \n", testNb);
 
 _cleanup:
@@ -454,10 +431,10 @@ _output_error:
 }
 
 
-/*********************************************************
+/*-*******************************************************
 *  Command line
 *********************************************************/
-int FUZ_usage(char* programName)
+int FUZ_usage(const char* programName)
 {
     DISPLAY( "Usage :\n");
     DISPLAY( "      %s [args]\n", programName);
@@ -474,7 +451,7 @@ int FUZ_usage(char* programName)
 }
 
 
-int main(int argc, char** argv)
+int main(int argc, const char** argv)
 {
     U32 seed=0;
     int seedset=0;
@@ -484,23 +461,18 @@ int main(int argc, char** argv)
     int proba = FUZ_COMPRESSIBILITY_DEFAULT;
     int result=0;
     U32 mainPause = 0;
-    char* programName;
+    const char* programName = argv[0];
 
     /* Check command line */
-    programName = argv[0];
-    for(argNb=1; argNb<argc; argNb++)
-    {
-        char* argument = argv[argNb];
-
+    for(argNb=1; argNb<argc; argNb++) {
+        const char* argument = argv[argNb];
         if(!argument) continue;   /* Protection if argument empty */
 
-        /* Handle commands. Aggregated commands are allowed */
-        if (argument[0]=='-')
-        {
+        /* Parsing commands. Aggregated commands are allowed */
+        if (argument[0]=='-') {
             argument++;
 
-            while (*argument!=0)
-            {
+            while (*argument!=0) {
                 switch(*argument)
                 {
                 case 'h':
@@ -521,8 +493,7 @@ int main(int argc, char** argv)
                 case 'i':
                     argument++;
                     nbTests=0; g_testTime=0;
-                    while ((*argument>='0') && (*argument<='9'))
-                    {
+                    while ((*argument>='0') && (*argument<='9')) {
                         nbTests *= 10;
                         nbTests += *argument - '0';
                         argument++;
@@ -532,8 +503,7 @@ int main(int argc, char** argv)
                 case 'T':
                     argument++;
                     nbTests=0; g_testTime=0;
-                    while ((*argument>='0') && (*argument<='9'))
-                    {
+                    while ((*argument>='0') && (*argument<='9')) {
                         g_testTime *= 10;
                         g_testTime += *argument - '0';
                         argument++;
@@ -547,8 +517,7 @@ int main(int argc, char** argv)
                     argument++;
                     seed=0;
                     seedset=1;
-                    while ((*argument>='0') && (*argument<='9'))
-                    {
+                    while ((*argument>='0') && (*argument<='9')) {
                         seed *= 10;
                         seed += *argument - '0';
                         argument++;
@@ -558,8 +527,7 @@ int main(int argc, char** argv)
                 case 't':
                     argument++;
                     testNb=0;
-                    while ((*argument>='0') && (*argument<='9'))
-                    {
+                    while ((*argument>='0') && (*argument<='9')) {
                         testNb *= 10;
                         testNb += *argument - '0';
                         argument++;
@@ -569,8 +537,7 @@ int main(int argc, char** argv)
                 case 'P':   /* compressibility % */
                     argument++;
                     proba=0;
-                    while ((*argument>='0') && (*argument<='9'))
-                    {
+                    while ((*argument>='0') && (*argument<='9')) {
                         proba *= 10;
                         proba += *argument - '0';
                         argument++;
@@ -582,9 +549,7 @@ int main(int argc, char** argv)
                 default:
                     return FUZ_usage(programName);
                 }
-            }
-        }
-    }
+    }   }   }   /* for(argNb=1; argNb<argc; argNb++) */
 
     /* Get Seed */
     DISPLAY("Starting zstd_buffered tester (%i-bits, %s)\n", (int)(sizeof(size_t)*8), ZSTD_VERSION);
@@ -598,8 +563,8 @@ int main(int argc, char** argv)
     if (testNb==0) result = basicUnitTests(0, ((double)proba) / 100);  /* constant seed for predictability */
     if (!result)
         result = fuzzerTests(seed, nbTests, testNb, ((double)proba) / 100);
-    if (mainPause)
-    {
+
+    if (mainPause) {
         int unused;
         DISPLAY("Press Enter \n");
         unused = getchar();
diff --git a/programs/zstdcli.c b/programs/zstdcli.c
index abe13013..3019edcf 100644
--- a/programs/zstdcli.c
+++ b/programs/zstdcli.c
@@ -22,9 +22,9 @@
   - zstd homepage : http://www.zstd.net/
 */
 /*
-  Note : this is user program, not part of libzstd.
-  The license of this command line program is GPLv2.
+  Note : this is a user program, not part of libzstd.
   The license of libzstd is BSD.
+  The license of this command line program is GPLv2.
 */
 
 
@@ -71,9 +71,10 @@
 **************************************/
 #define COMPRESSOR_NAME "zstd command line interface"
 #ifndef ZSTD_VERSION
+#  define LIB_VERSION ZSTD_VERSION_MAJOR.ZSTD_VERSION_MINOR.ZSTD_VERSION_RELEASE
 #  define QUOTE(str) #str
 #  define EXPAND_AND_QUOTE(str) QUOTE(str)
-#  define ZSTD_VERSION "v" EXPAND_AND_QUOTE(ZSTD_VERSION_MAJOR) "." EXPAND_AND_QUOTE(ZSTD_VERSION_MINOR) "." EXPAND_AND_QUOTE(ZSTD_VERSION_RELEASE)
+#  define ZSTD_VERSION "v" EXPAND_AND_QUOTE(LIB_VERSION)
 #endif
 #define AUTHOR "Yann Collet"
 #define WELCOME_MESSAGE "*** %s %i-bits %s, by %s ***\n", COMPRESSOR_NAME, (int)(sizeof(void*)*8), ZSTD_VERSION, AUTHOR
@@ -132,6 +133,7 @@ static int usage_advanced(const char* programName)
     DISPLAY( " -v     : verbose mode\n");
     DISPLAY( " -q     : suppress warnings; specify twice to suppress errors too\n");
     DISPLAY( " -c     : force write to standard output, even if it is the console\n");
+    DISPLAY( "--ultra : enable ultra modes (requires more memory to decompress)\n");
 #ifndef ZSTD_NODICT
     DISPLAY( "Dictionary builder :\n");
     DISPLAY( "--train : create a dictionary from a training set of files \n");
@@ -142,9 +144,9 @@ static int usage_advanced(const char* programName)
 #ifndef ZSTD_NOBENCH
     DISPLAY( "Benchmark arguments :\n");
     DISPLAY( " -b#    : benchmark file(s), using # compression level (default : 1) \n");
+    DISPLAY( " -r#    : test all compression levels from -bX to # (default: 1)\n");
     DISPLAY( " -i#    : iteration loops [1-9](default : 3)\n");
     DISPLAY( " -B#    : cut file into independent blocks of size # (default: no block)\n");
-    DISPLAY( " -r#    : test all compression levels from 1 to # (default: disabled)\n");
 #endif
     return 0;
 }
@@ -166,6 +168,8 @@ static void waitEnter(void)
 }
 
 
+#define CLEAN_RETURN(i) { operationResult = (i); goto _end; }
+
 int main(int argCount, const char** argv)
 {
     int i,
@@ -179,19 +183,19 @@ int main(int argCount, const char** argv)
         nextArgumentIsOutFileName=0,
         nextArgumentIsMaxDict=0;
     unsigned cLevel = 1;
+    unsigned cLevelLast = 1;
     const char** filenameTable = (const char**)malloc(argCount * sizeof(const char*));   /* argCount >= 1 */
     unsigned filenameIdx = 0;
     const char* programName = argv[0];
     const char* outFileName = NULL;
     const char* dictFileName = NULL;
     char* dynNameSpace = NULL;
-    int rangeBench = 1;
     unsigned maxDictSize = g_defaultMaxDictSize;
     unsigned dictCLevel = g_defaultDictCLevel;
     unsigned dictSelect = g_defaultSelectivityLevel;
 
     /* init */
-    (void)rangeBench; (void)dictCLevel;   /* not used when ZSTD_NOBENCH / ZSTD_NODICT set */
+    (void)cLevelLast; (void)dictCLevel;   /* not used when ZSTD_NOBENCH / ZSTD_NODICT set */
     if (filenameTable==NULL) { DISPLAY("not enough memory\n"); exit(1); }
     displayOut = stderr;
     /* Pick out program name from path. Don't rely on stdlib because of conflicting behavior */
@@ -210,8 +214,8 @@ int main(int argCount, const char** argv)
         /* long commands (--long-word) */
         if (!strcmp(argument, "--decompress")) { decode=1; continue; }
         if (!strcmp(argument, "--force")) {  FIO_overwriteMode(); continue; }
-        if (!strcmp(argument, "--version")) { displayOut=stdout; DISPLAY(WELCOME_MESSAGE); return 0; }
-        if (!strcmp(argument, "--help")) { displayOut=stdout; return usage_advanced(programName); }
+        if (!strcmp(argument, "--version")) { displayOut=stdout; DISPLAY(WELCOME_MESSAGE); CLEAN_RETURN(0); }
+        if (!strcmp(argument, "--help")) { displayOut=stdout; CLEAN_RETURN(usage_advanced(programName)); }
         if (!strcmp(argument, "--verbose")) { displayLevel=4; continue; }
         if (!strcmp(argument, "--quiet")) { displayLevel--; continue; }
         if (!strcmp(argument, "--stdout")) { forceStdout=1; outFileName=stdoutmark; displayLevel=1; continue; }
@@ -219,11 +223,11 @@ int main(int argCount, const char** argv)
         if (!strcmp(argument, "--train")) { dictBuild=1; outFileName=g_defaultDictName; continue; }
         if (!strcmp(argument, "--maxdict")) { nextArgumentIsMaxDict=1; continue; }
         if (!strcmp(argument, "--keep")) { continue; }   /* does nothing, since preserving input is default; for gzip/xz compatibility */
+        if (!strcmp(argument, "--ultra")) { FIO_setMaxWLog(0); continue; }
 
         /* '-' means stdin/stdout */
         if (!strcmp(argument, "-")){
-            if (!filenameIdx) { filenameIdx=1, filenameTable[0]=stdinmark; continue; }
-            outFileName=stdoutmark; continue;
+            if (!filenameIdx) { filenameIdx=1, filenameTable[0]=stdinmark; outFileName=stdoutmark; continue; }
         }
 
         /* Decode commands (note : aggregated commands are allowed) */
@@ -231,7 +235,6 @@ int main(int argCount, const char** argv)
             argument++;
 
             while (argument[0]!=0) {
-
                 /* compression Level */
                 if ((*argument>='0') && (*argument<='9')) {
                     cLevel = 0;
@@ -242,16 +245,16 @@ int main(int argCount, const char** argv)
                     }
                     dictCLevel = cLevel;
                     if (dictCLevel > ZSTD_maxCLevel())
-                        return badusage(programName);
+                        CLEAN_RETURN(badusage(programName));
                     continue;
                 }
 
                 switch(argument[0])
                 {
                     /* Display help */
-                case 'V': displayOut=stdout; DISPLAY(WELCOME_MESSAGE); return 0;   /* Version Only */
+                case 'V': displayOut=stdout; DISPLAY(WELCOME_MESSAGE); CLEAN_RETURN(0);   /* Version Only */
                 case 'H':
-                case 'h': displayOut=stdout; return usage_advanced(programName);
+                case 'h': displayOut=stdout; CLEAN_RETURN(usage_advanced(programName));
 
                      /* Decoding */
                 case 'd': decode=1; argument++; break;
@@ -286,33 +289,38 @@ int main(int argCount, const char** argv)
 
                     /* Modify Nb Iterations (benchmark only) */
                 case 'i':
-                    {
-                        int iters= 0;
+                    {   U32 iters= 0;
                         argument++;
                         while ((*argument >='0') && (*argument <='9'))
                             iters *= 10, iters += *argument++ - '0';
+                        BMK_setNotificationLevel(displayLevel);
                         BMK_SetNbIterations(iters);
                     }
                     break;
 
                     /* cut input into blocks (benchmark only) */
                 case 'B':
-                    {
-                        size_t bSize = 0;
+                    {   size_t bSize = 0;
                         argument++;
                         while ((*argument >='0') && (*argument <='9'))
                             bSize *= 10, bSize += *argument++ - '0';
                         if (*argument=='K') bSize<<=10, argument++;  /* allows using KB notation */
                         if (*argument=='M') bSize<<=20, argument++;
                         if (*argument=='B') argument++;
+                        BMK_setNotificationLevel(displayLevel);
                         BMK_SetBlockSize(bSize);
                     }
                     break;
 
                     /* range bench (benchmark only) */
                 case 'r':
-                        rangeBench = -1;
+                        /* compression Level */
                         argument++;
+                        if ((*argument>='0') && (*argument<='9')) {
+                            cLevelLast = 0;
+                            while ((*argument >= '0') && (*argument <= '9'))
+                                cLevelLast *= 10, cLevelLast += *argument++ - '0';
+                        }
                         break;
 #endif   /* ZSTD_NOBENCH */
 
@@ -323,15 +331,24 @@ int main(int argCount, const char** argv)
                         dictSelect *= 10, dictSelect += *argument++ - '0';
                     break;
 
-                    /* Pause at the end (hidden option) */
-                case 'p': main_pause=1; argument++; break;
-
+                    /* Pause at the end (-p) or set an additional param (-p#) (hidden option) */
+                case 'p': argument++;
+#ifndef ZSTD_NOBENCH
+                    if ((*argument>='0') && (*argument<='9')) {
+                        int additionalParam = 0;
+                        while ((*argument >= '0') && (*argument <= '9'))
+                            additionalParam *= 10, additionalParam += *argument++ - '0';
+                        BMK_setAdditionalParam(additionalParam);
+                    } else
+#endif
+                        main_pause=1;
+                    break;
                     /* unknown command */
-                default : return badusage(programName);
+                default : CLEAN_RETURN(badusage(programName));
                 }
             }
             continue;
-        }
+        }   /* if (argument[0]=='-') */
 
         if (nextEntryIsDictionary) {
             nextEntryIsDictionary = 0;
@@ -366,7 +383,8 @@ int main(int argCount, const char** argv)
     /* Check if benchmark is selected */
     if (bench) {
 #ifndef ZSTD_NOBENCH
-        BMK_benchFiles(filenameTable, filenameIdx, dictFileName, cLevel*rangeBench);
+        BMK_setNotificationLevel(displayLevel);
+        BMK_benchFiles(filenameTable, filenameIdx, dictFileName, cLevel, cLevelLast);
 #endif
         goto _end;
     }
@@ -387,17 +405,17 @@ int main(int argCount, const char** argv)
     if(!filenameIdx) filenameIdx=1, filenameTable[0]=stdinmark, outFileName=stdoutmark;
 
     /* Check if input/output defined as console; trigger an error in this case */
-    if (!strcmp(filenameTable[0], stdinmark) && IS_CONSOLE(stdin) ) return badusage(programName);
-    if (outFileName && !strcmp(outFileName, stdoutmark) && IS_CONSOLE(stdout) && !forceStdout) return badusage(programName);
+    if (!strcmp(filenameTable[0], stdinmark) && IS_CONSOLE(stdin) ) CLEAN_RETURN(badusage(programName));
+    if (outFileName && !strcmp(outFileName, stdoutmark) && IS_CONSOLE(stdout) && !forceStdout) CLEAN_RETURN(badusage(programName));
 
     /* user-selected output filename, only possible with a single file */
     if (outFileName && strcmp(outFileName,stdoutmark) && strcmp(outFileName,nulmark) && (filenameIdx>1)) {
         DISPLAY("Too many files (%u) on the command line. \n", filenameIdx);
-        return filenameIdx;
+        CLEAN_RETURN(filenameIdx);
     }
 
     /* No warning message in pipe mode (stdin + stdout) or multiple mode */
-    if (!strcmp(filenameTable[0], stdinmark) && !strcmp(outFileName,stdoutmark) && (displayLevel==2)) displayLevel=1;
+    if (!strcmp(filenameTable[0], stdinmark) && outFileName && !strcmp(outFileName,stdoutmark) && (displayLevel==2)) displayLevel=1;
     if ((filenameIdx>1) && (displayLevel==2)) displayLevel=1;
 
     /* IO Stream/File */
diff --git a/visual/2013/fullbench/fullbench.vcxproj b/visual/2013/fullbench/fullbench.vcxproj
index 3797960f..8252d03d 100644
--- a/visual/2013/fullbench/fullbench.vcxproj
+++ b/visual/2013/fullbench/fullbench.vcxproj
@@ -22,6 +22,7 @@
     <ProjectGuid>{61ABD629-1CC8-4FD7-9281-6B8DBB9D3DF8}</ProjectGuid>
     <Keyword>Win32Proj</Keyword>
     <RootNamespace>fullbench</RootNamespace>
+    <OutDir>$(SolutionDir)bin\$(Platform)\$(Configuration)\</OutDir>
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
@@ -161,6 +162,7 @@
   <ItemGroup>
     <ClCompile Include="..\..\..\lib\fse.c" />
     <ClCompile Include="..\..\..\lib\huff0.c" />
+    <ClCompile Include="..\..\..\lib\zbuff.c" />
     <ClCompile Include="..\..\..\lib\zstd_compress.c" />
     <ClCompile Include="..\..\..\lib\zstd_decompress.c" />
     <ClCompile Include="..\..\..\programs\datagen.c" />
@@ -175,6 +177,8 @@
     <ClInclude Include="..\..\..\lib\legacy\zstd_v01.h" />
     <ClInclude Include="..\..\..\lib\legacy\zstd_v02.h" />
     <ClInclude Include="..\..\..\lib\legacy\zstd_v03.h" />
+    <ClInclude Include="..\..\..\lib\zbuff.h" />
+    <ClInclude Include="..\..\..\lib\zbuff_static.h" />
     <ClInclude Include="..\..\..\lib\zstd.h" />
     <ClInclude Include="..\..\..\lib\zstd_static.h" />
     <ClInclude Include="..\..\..\programs\datagen.h" />
diff --git a/visual/2013/fullbench/fullbench.vcxproj.filters b/visual/2013/fullbench/fullbench.vcxproj.filters
index 3a82000f..1d5f9c59 100644
--- a/visual/2013/fullbench/fullbench.vcxproj.filters
+++ b/visual/2013/fullbench/fullbench.vcxproj.filters
@@ -33,6 +33,9 @@
     <ClCompile Include="..\..\..\lib\zstd_decompress.c">
       <Filter>Fichiers sources</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\lib\zbuff.c">
+      <Filter>Fichiers sources</Filter>
+    </ClCompile>
   </ItemGroup>
   <ItemGroup>
     <ClInclude Include="..\..\..\lib\fse.h">
@@ -68,5 +71,11 @@
     <ClInclude Include="..\..\..\lib\legacy\zstd_v03.h">
       <Filter>Fichiers d%27en-tête</Filter>
     </ClInclude>
+    <ClInclude Include="..\..\..\lib\zbuff.h">
+      <Filter>Fichiers d%27en-tête</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\lib\zbuff_static.h">
+      <Filter>Fichiers d%27en-tête</Filter>
+    </ClInclude>
   </ItemGroup>
 </Project>
\ No newline at end of file
diff --git a/visual/2013/fuzzer/fuzzer.vcxproj b/visual/2013/fuzzer/fuzzer.vcxproj
index 0844efe7..49738e0c 100644
--- a/visual/2013/fuzzer/fuzzer.vcxproj
+++ b/visual/2013/fuzzer/fuzzer.vcxproj
@@ -22,6 +22,7 @@
     <ProjectGuid>{6FD4352B-346C-4703-96EA-D4A8B9A6976E}</ProjectGuid>
     <Keyword>Win32Proj</Keyword>
     <RootNamespace>fuzzer</RootNamespace>
+    <OutDir>$(SolutionDir)bin\$(Platform)\$(Configuration)\</OutDir>
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
diff --git a/visual/2013/zstd/zstd.vcxproj b/visual/2013/zstd/zstd.vcxproj
old mode 100644
new mode 100755
index 45319769..cced867e
--- a/visual/2013/zstd/zstd.vcxproj
+++ b/visual/2013/zstd/zstd.vcxproj
@@ -26,6 +26,7 @@
     <ClCompile Include="..\..\..\lib\legacy\zstd_v02.c" />
     <ClCompile Include="..\..\..\lib\legacy\zstd_v03.c" />
     <ClCompile Include="..\..\..\lib\legacy\zstd_v04.c" />
+    <ClCompile Include="..\..\..\lib\legacy\zstd_v05.c" />
     <ClCompile Include="..\..\..\lib\zbuff.c" />
     <ClCompile Include="..\..\..\lib\zdict.c" />
     <ClCompile Include="..\..\..\lib\zstd_compress.c" />
@@ -49,6 +50,7 @@
     <ClInclude Include="..\..\..\lib\legacy\zstd_v02.h" />
     <ClInclude Include="..\..\..\lib\legacy\zstd_v03.h" />
     <ClInclude Include="..\..\..\lib\legacy\zstd_v04.h" />
+    <ClInclude Include="..\..\..\lib\legacy\zstd_v05.h" />
     <ClInclude Include="..\..\..\lib\zbuff.h" />
     <ClInclude Include="..\..\..\lib\zbuff_static.h" />
     <ClInclude Include="..\..\..\lib\zdict.h" />
@@ -69,6 +71,7 @@
     <ProjectGuid>{4E52A41A-F33B-4C7A-8C36-A1A6B4F4277C}</ProjectGuid>
     <Keyword>Win32Proj</Keyword>
     <RootNamespace>zstd</RootNamespace>
+    <OutDir>$(SolutionDir)bin\$(Platform)\$(Configuration)\</OutDir>
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
@@ -87,7 +90,7 @@
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
     <WholeProgramOptimization>true</WholeProgramOptimization>
-    <PlatformToolset>v120</PlatformToolset>
+    <PlatformToolset>v120_xp</PlatformToolset>
     <CharacterSet>Unicode</CharacterSet>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
@@ -126,7 +129,7 @@
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
     <LinkIncremental>false</LinkIncremental>
     <IncludePath>$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
-    <RunCodeAnalysis>true</RunCodeAnalysis>
+    <RunCodeAnalysis>false</RunCodeAnalysis>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
     <LinkIncremental>false</LinkIncremental>
@@ -170,19 +173,20 @@
       <WarningLevel>Level4</WarningLevel>
       <PrecompiledHeader>
       </PrecompiledHeader>
-      <Optimization>MaxSpeed</Optimization>
+      <Optimization>Full</Optimization>
       <FunctionLevelLinking>true</FunctionLevelLinking>
       <IntrinsicFunctions>true</IntrinsicFunctions>
       <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <EnablePREfast>true</EnablePREfast>
+      <EnablePREfast>false</EnablePREfast>
       <TreatWarningAsError>true</TreatWarningAsError>
-      <AdditionalOptions>/analyze:stacksize19000 %(AdditionalOptions)</AdditionalOptions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
     </ClCompile>
     <Link>
       <SubSystem>Console</SubSystem>
       <GenerateDebugInformation>true</GenerateDebugInformation>
       <EnableCOMDATFolding>true</EnableCOMDATFolding>
       <OptimizeReferences>true</OptimizeReferences>
+      <AdditionalDependencies>setargv.obj; kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
     </Link>
   </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
@@ -203,6 +207,7 @@
       <GenerateDebugInformation>true</GenerateDebugInformation>
       <EnableCOMDATFolding>true</EnableCOMDATFolding>
       <OptimizeReferences>true</OptimizeReferences>
+      <AdditionalDependencies>setargv.obj;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
     </Link>
   </ItemDefinitionGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
diff --git a/visual/2013/zstd/zstd.vcxproj.filters b/visual/2013/zstd/zstd.vcxproj.filters
old mode 100644
new mode 100755
index 31a97801..eb894342
--- a/visual/2013/zstd/zstd.vcxproj.filters
+++ b/visual/2013/zstd/zstd.vcxproj.filters
@@ -69,6 +69,9 @@
     <ClCompile Include="..\..\..\programs\dibio.c">
       <Filter>Fichiers sources</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\lib\legacy\zstd_v05.c">
+      <Filter>Fichiers sources</Filter>
+    </ClCompile>
   </ItemGroup>
   <ItemGroup>
     <ClInclude Include="..\..\..\lib\fse.h">
@@ -146,5 +149,8 @@
     <ClInclude Include="..\..\..\programs\dibio.h">
       <Filter>Fichiers d%27en-tête</Filter>
     </ClInclude>
+    <ClInclude Include="..\..\..\lib\legacy\zstd_v05.h">
+      <Filter>Fichiers d%27en-tête</Filter>
+    </ClInclude>
   </ItemGroup>
 </Project>
\ No newline at end of file
diff --git a/visual/2013/zstdlib/zstdlib.vcxproj b/visual/2013/zstdlib/zstdlib.vcxproj
index b13bc98f..e44699d4 100644
--- a/visual/2013/zstdlib/zstdlib.vcxproj
+++ b/visual/2013/zstdlib/zstdlib.vcxproj
@@ -24,8 +24,6 @@
     <ClCompile Include="..\..\..\lib\zbuff.c" />
     <ClCompile Include="..\..\..\lib\zstd_compress.c" />
     <ClCompile Include="..\..\..\lib\zstd_decompress.c" />
-  </ItemGroup>
-  <ItemGroup>
     <ClInclude Include="..\..\..\lib\bitstream.h" />
     <ClInclude Include="..\..\..\lib\error_private.h" />
     <ClInclude Include="..\..\..\lib\error_public.h" />
@@ -48,6 +46,7 @@
     <ProjectGuid>{8BFD8150-94D5-4BF9-8A50-7BD9929A0850}</ProjectGuid>
     <Keyword>Win32Proj</Keyword>
     <RootNamespace>zstdlib</RootNamespace>
+    <OutDir>$(SolutionDir)bin\$(Platform)\$(Configuration)\</OutDir>
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
@@ -96,7 +95,6 @@
     <LinkIncremental>true</LinkIncremental>
     <TargetName>zstdlib_x86</TargetName>
     <IntDir>$(Platform)\$(Configuration)\</IntDir>
-    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
     <IncludePath>$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
     <RunCodeAnalysis>true</RunCodeAnalysis>
   </PropertyGroup>
@@ -104,14 +102,12 @@
     <LinkIncremental>true</LinkIncremental>
     <TargetName>zstdlib_x64</TargetName>
     <IntDir>$(Platform)\$(Configuration)\</IntDir>
-    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
     <IncludePath>$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
     <RunCodeAnalysis>true</RunCodeAnalysis>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
     <LinkIncremental>false</LinkIncremental>
     <TargetName>zstdlib_x86</TargetName>
-    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
     <IntDir>$(Platform)\$(Configuration)\</IntDir>
     <IncludePath>$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
     <RunCodeAnalysis>false</RunCodeAnalysis>
@@ -119,7 +115,6 @@
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
     <LinkIncremental>false</LinkIncremental>
     <TargetName>zstdlib_x64</TargetName>
-    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
     <IntDir>$(Platform)\$(Configuration)\</IntDir>
     <IncludePath>$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
     <RunCodeAnalysis>false</RunCodeAnalysis>