Merge pull request #268 from Cyan4973/dev

v0.8.0
2016-08-01 09:51:18 +02:00 · 2016-08-01 09:51:18 +02:00 · 908a9f8f36
commit 908a9f8f36
parent 70e725085c 9ba929f1d4
65 changed files with 6897 additions and 2296 deletions
--- a/.gitignore
+++ b/.gitignore
@ -40,3 +40,7 @@ projects/cmake/

 # Test artefacts
 tmp*
+dictionary
+
+# tmp files
+*.swp
--- a/24
+++ b/24
@ -41,20 +41,20 @@ else
 VOID = /dev/null
 endif

-.PHONY: default all zlibwrapper zstdprogram zstd clean install uninstall travis-install test clangtest gpptest armtest usan asan uasan
+.PHONY: default all zlibwrapper zstd clean install uninstall travis-install test clangtest gpptest armtest usan asan uasan

-default: zstdprogram
+default: zstd

 all:
 	$(MAKE) -C $(ZSTDDIR) $@
 	$(MAKE) -C $(PRGDIR) $@
+	@rm -f lib/decompress/*.o
+	$(MAKE) -C $(PRGDIR) all32

-zstdprogram:
+zstd:
 	$(MAKE) -C $(PRGDIR)
 	cp $(PRGDIR)/zstd .

-zstd: zstdprogram
-
 zlibwrapper:
 	$(MAKE) -C $(ZSTDDIR) all
 	$(MAKE) -C $(ZWRAPDIR) all
@ -70,10 +70,10 @@ clean:
 	@echo Cleaning completed


-#------------------------------------------------------------------------
-#make install is validated only for Linux, OSX, kFreeBSD and Hurd targets
-#------------------------------------------------------------------------
-ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU))
+#----------------------------------------------------------------------------------
+#make install is validated only for Linux, OSX, kFreeBSD, Hurd and some BSD targets
+#----------------------------------------------------------------------------------
+ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU FreeBSD DragonFly))
 HOST_OS = POSIX
 install:
 	$(MAKE) -C $(ZSTDDIR) $@
@ -87,7 +87,7 @@ travis-install:
 	$(MAKE) install PREFIX=~/install_test_dir

 gpptest: clean
-	$(MAKE) all CC=g++ CFLAGS="-O3 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Werror"
+	$(MAKE) -C programs all CC=g++ CFLAGS="-O3 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Werror"

 gcc5test: clean
 	gcc-5 -v
@ -107,11 +107,11 @@ armtest: clean

 ppctest: clean
 	$(MAKE) -C $(PRGDIR) datagen   # use native, faster
-	$(MAKE) -C $(PRGDIR) test CC=powerpc-linux-gnu-gcc ZSTDRTTEST= MOREFLAGS="-Werror -static"
+	$(MAKE) -C $(PRGDIR) test CC=powerpc-linux-gnu-gcc ZSTDRTTEST= MOREFLAGS="-Werror -Wno-attributes -static"

 ppc64test: clean
 	$(MAKE) -C $(PRGDIR) datagen   # use native, faster
-	$(MAKE) -C $(PRGDIR) test CC=powerpc-linux-gnu-gcc ZSTDRTTEST= MOREFLAGS="-m64 -Werror -static"
+	$(MAKE) -C $(PRGDIR) test CC=powerpc-linux-gnu-gcc ZSTDRTTEST= MOREFLAGS="-m64 -static"

 usan: clean
 	$(MAKE) test CC=clang MOREFLAGS="-g -fsanitize=undefined"
--- a/22
+++ b/22
@ -1,8 +1,22 @@
-v0.7.4
-Added : new examples
-Fixed : segfault when using small dictionaries, reported by Felix Handte
-Modified : default compression level for CLI is 3
+v0.8.0
+Improved : better speed on clang and gcc -O2, thanks to Eric Biggers
+New : Build on FreeBSD and DragonFly, thanks to JrMarino
+Changed : modified API : ZSTD_compressEnd()
+Fixed : legacy mode with ZSTD_HEAPMODE=0, by Christopher Bergqvist
+Fixed : premature end of frame when zero-sized raw block, reported by Eric Biggers
+Fixed : large dictionaries (> 384 KB), reported by Ilona Papava
+Fixed : checksum correctly checked in single-pass mode
+Fixed : combined --test amd --rm, reported by Andreas M. Nilsson
+Modified : minor compression level adaptations
+Updated : compression format specification to v0.2.0
+changed : zstd.h moved to /lib directory

+v0.7.4
+Added : homebrew for Mac, by Daniel Cade
+Added : more examples
+Fixed : segfault when using small dictionaries, reported by Felix Handte
+Modified : default compression level for CLI is now 3
+Updated : specification, to v0.1.1

 v0.7.3
 New : compression format specification
--- a/README.md
+++ b/README.md
@ -10,7 +10,7 @@ you can consult a list of known ports on [Zstandard homepage](http://www.zstd.ne
 |master      | [![Build Status](https://travis-ci.org/Cyan4973/zstd.svg?branch=master)](https://travis-ci.org/Cyan4973/zstd) |
 |dev         | [![Build Status](https://travis-ci.org/Cyan4973/zstd.svg?branch=dev)](https://travis-ci.org/Cyan4973/zstd) |

-As a reference, several fast compression algorithms were tested and compared on a Core i7-3930K CPU @ 4.5GHz, using [lzbench], an open-source in-memory benchmark by @inikep compiled with gcc 5.2.1, with the [Silesia compression corpus].
+As a reference, several fast compression algorithms were tested and compared on a Core i7-3930K CPU @ 4.5GHz, using [lzbench], an open-source in-memory benchmark by @inikep compiled with gcc 5.4.0, with the [Silesia compression corpus].

 [lzbench]: https://github.com/inikep/lzbench
 [Silesia compression corpus]: http://sun.aei.polsl.pl/~sdeor/index.php?page=silesia
@ -19,9 +19,9 @@ As a reference, several fast compression algorithms were tested and compared on
 |Name             | Ratio | C.speed | D.speed |
 |-----------------|-------|--------:|--------:|
 |                 |       |   MB/s  |  MB/s   |
-|**zstd 0.7.0 -1**|**2.877**|**325**| **930** |
+|**zstd 0.8.0 -1**|**2.877**|**330**| **930** |
 | [zlib] 1.2.8 -1 | 2.730 |    95   |   360   |
-| brotli -0       | 2.708 |   220   |   430   |
+| brotli 0.4 -0   | 2.708 |   320   |   375   |
 | QuickLZ 1.5     | 2.237 |   510   |   605   |
 | LZO 2.09        | 2.106 |   610   |   870   |
 | [LZ4] r131      | 2.101 |   620   |  3100   |
@ -77,8 +77,8 @@ Hence, deploying one dictionary per type of data will provide the greater benefi

 ### Status

-Zstd compression format has reached "Final status". It means it is planned to become the official stable zstd format and be tagged `v1.0`. The reason it's not yet tagged `v1.0` is that it currently performs its "validation period", making sure the format holds all its promises and nothing was missed.
-Zstd library also offers legacy decoder support. Any data compressed by any version >= `v0.1` (hence including current one) remains decodable now and in the future.
+Zstd compression format has reached "Final status". It means it is planned to become the official stable zstd format tagged `v1.0`. The reason it's not yet tagged `v1.0` is that it currently performs its "validation period", making sure the format holds all its promises and nothing was missed.
+Zstd library also offers legacy decoder support. Any data compressed by any version >= `v0.1` is decodable now and in the future.
 The library has been validated using strong [fuzzer tests](https://en.wikipedia.org/wiki/Fuzz_testing), including both [internal tools](programs/fuzzer.c) and [external ones](http://lcamtuf.coredump.cx/afl). It's able to withstand hazard situations, including invalid inputs.
 As a consequence, Zstandard is considered safe for, and is currently used in, production environments.

--- a/appveyor.yml
+++ b/appveyor.yml
@ -27,7 +27,8 @@ install:
      SET "CLANG_PARAMS=-C programs zstd fullbench fuzzer zbufftest paramgrill datagen CC=clang MOREFLAGS="--target=x86_64-w64-mingw32 -Werror -Wconversion -Wno-sign-conversion"" &&
      SET "PATH_MINGW32=c:\MinGW\bin;c:\MinGW\usr\bin" &&
      SET "PATH_MINGW64=c:\msys64\mingw64\bin;c:\msys64\usr\bin" &&
-      COPY C:\MinGW\bin\mingw32-make.exe C:\MinGW\bin\make.exe
+      COPY C:\MinGW\bin\mingw32-make.exe C:\MinGW\bin\make.exe &&
+      COPY C:\MinGW\bin\gcc.exe C:\MinGW\bin\cc.exe
    ) else (
      IF [%PLATFORM%]==[x64] (SET ADDITIONALPARAM=/p:LibraryPath="C:\Program Files\Microsoft SDKs\Windows\v7.1\lib\x64;c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\lib\amd64;C:\Program Files (x86)\Microsoft Visual Studio 10.0\;C:\Program Files (x86)\Microsoft Visual Studio 10.0\lib\amd64;")
    )
@ -50,6 +51,8 @@ build_script:
      ECHO *** &&
      ECHO *** Building %PLATFORM% &&
      ECHO *** &&
+      make -v &&
+      cc -v &&
      ECHO make %MAKE_PARAMS% &&
      make %MAKE_PARAMS% &&
      make clean
--- a/examples/dictionary_compression.c
+++ b/examples/dictionary_compression.c
@ -1,5 +1,5 @@
 /*
-  Dictionary decompression
+  Dictionary compression
  Educational program using zstd library
  Copyright (C) Yann Collet 2016

--- a/examples/simple_compression.c
+++ b/examples/simple_compression.c
@ -31,7 +31,7 @@
 #include <zstd.h>      // presumes zstd library is installed


-static off_t fsize_X(const char *filename)
+static off_t fsize_orDie(const char *filename)
 {
    struct stat st;
    if (stat(filename, &st) == 0) return st.st_size;
@ -40,7 +40,7 @@ static off_t fsize_X(const char *filename)
    exit(1);
 }

-static FILE* fopen_X(const char *filename, const char *instruction)
+static FILE* fopen_orDie(const char *filename, const char *instruction)
 {
    FILE* const inFile = fopen(filename, instruction);
    if (inFile) return inFile;
@ -49,7 +49,7 @@ static FILE* fopen_X(const char *filename, const char *instruction)
    exit(2);
 }

-static void* malloc_X(size_t size)
+static void* malloc_orDie(size_t size)
 {
    void* const buff = malloc(size);
    if (buff) return buff;
@ -58,11 +58,11 @@ static void* malloc_X(size_t size)
    exit(3);
 }

-static void* loadFile_X(const char* fileName, size_t* size)
+static void* loadFile_orDie(const char* fileName, size_t* size)
 {
-    off_t const buffSize = fsize_X(fileName);
-    FILE* const inFile = fopen_X(fileName, "rb");
-    void* const buffer = malloc_X(buffSize);
+    off_t const buffSize = fsize_orDie(fileName);
+    FILE* const inFile = fopen_orDie(fileName, "rb");
+    void* const buffer = malloc_orDie(buffSize);
    size_t const readSize = fread(buffer, 1, buffSize, inFile);
    if (readSize != (size_t)buffSize) {
        fprintf(stderr, "fread: %s : %s \n", fileName, strerror(errno));
@ -74,9 +74,9 @@ static void* loadFile_X(const char* fileName, size_t* size)
 }


-static void saveFile_X(const char* fileName, const void* buff, size_t buffSize)
+static void saveFile_orDie(const char* fileName, const void* buff, size_t buffSize)
 {
-    FILE* const oFile = fopen_X(fileName, "wb");
+    FILE* const oFile = fopen_orDie(fileName, "wb");
    size_t const wSize = fwrite(buff, 1, buffSize, oFile);
    if (wSize != (size_t)buffSize) {
        fprintf(stderr, "fwrite: %s : %s \n", fileName, strerror(errno));
@ -89,12 +89,12 @@ static void saveFile_X(const char* fileName, const void* buff, size_t buffSize)
 }


-static void compress(const char* fname, const char* oname)
+static void compress_orDie(const char* fname, const char* oname)
 {
    size_t fSize;
-    void* const fBuff = loadFile_X(fname, &fSize);
+    void* const fBuff = loadFile_orDie(fname, &fSize);
    size_t const cBuffSize = ZSTD_compressBound(fSize);
-    void* const cBuff = malloc_X(cBuffSize);
+    void* const cBuff = malloc_orDie(cBuffSize);

    size_t const cSize = ZSTD_compress(cBuff, cBuffSize, fBuff, fSize, 1);
    if (ZSTD_isError(cSize)) {
@ -102,7 +102,7 @@ static void compress(const char* fname, const char* oname)
        exit(7);
    }

-    saveFile_X(oname, cBuff, cSize);
+    saveFile_orDie(oname, cBuff, cSize);

    /* success */
    printf("%25s : %6u -> %7u - %s \n", fname, (unsigned)fSize, (unsigned)cSize, oname);
@ -112,11 +112,11 @@ static void compress(const char* fname, const char* oname)
 }


-static const char* createOutFilename(const char* filename)
+static const char* createOutFilename_orDie(const char* filename)
 {
    size_t const inL = strlen(filename);
    size_t const outL = inL + 5;
-    void* outSpace = malloc_X(outL);
+    void* outSpace = malloc_orDie(outL);
    memset(outSpace, 0, outL);
    strcat(outSpace, filename);
    strcat(outSpace, ".zst");
@ -135,8 +135,8 @@ int main(int argc, const char** argv)
        return 1;
    }

-    const char* const outFilename = createOutFilename(inFilename);
-    compress(inFilename, outFilename);
+    const char* const outFilename = createOutFilename_orDie(inFilename);
+    compress_orDie(inFilename, outFilename);

    return 0;
 }
--- a/images/Cspeed4.png
+++ b/images/Cspeed4.png
--- a/lib/Makefile
+++ b/lib/Makefile
@ -31,9 +31,9 @@
 # ################################################################

 # Version numbers
-LIBVER_MAJOR_SCRIPT:=`sed -n '/define ZSTD_VERSION_MAJOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < ./common/zstd.h`
-LIBVER_MINOR_SCRIPT:=`sed -n '/define ZSTD_VERSION_MINOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < ./common/zstd.h`
-LIBVER_PATCH_SCRIPT:=`sed -n '/define ZSTD_VERSION_RELEASE/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < ./common/zstd.h`
+LIBVER_MAJOR_SCRIPT:=`sed -n '/define ZSTD_VERSION_MAJOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < ./zstd.h`
+LIBVER_MINOR_SCRIPT:=`sed -n '/define ZSTD_VERSION_MINOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < ./zstd.h`
+LIBVER_PATCH_SCRIPT:=`sed -n '/define ZSTD_VERSION_RELEASE/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < ./zstd.h`
 LIBVER_SCRIPT:= $(LIBVER_MAJOR_SCRIPT).$(LIBVER_MINOR_SCRIPT).$(LIBVER_PATCH_SCRIPT)
 LIBVER_MAJOR := $(shell echo $(LIBVER_MAJOR_SCRIPT))
 LIBVER_MINOR := $(shell echo $(LIBVER_MINOR_SCRIPT))
@ -46,9 +46,10 @@ PREFIX ?= /usr/local
 LIBDIR ?= $(PREFIX)/lib
 INCLUDEDIR=$(PREFIX)/include

-CPPFLAGS= -I./common -DXXH_NAMESPACE=ZSTD_
+CPPFLAGS= -I. -I./common -DXXH_NAMESPACE=ZSTD_
 CFLAGS ?= -O3
-CFLAGS += -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow -Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement -Wstrict-prototypes -Wundef
+CFLAGS += -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow -Wstrict-aliasing=1 \
+          -Wswitch-enum -Wdeclaration-after-statement -Wstrict-prototypes -Wundef
 FLAGS   = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) $(MOREFLAGS)


@ -95,11 +96,12 @@ libzstd: $(ZSTD_FILES)

 clean:
 	@rm -f core *.o *.a *.gcda *.$(SHARED_EXT) *.$(SHARED_EXT).* libzstd.pc
+	@rm -f decompress/*.o
 	@echo Cleaning library completed

 #------------------------------------------------------------------------
-#make install is validated only for Linux, OSX, kFreeBSD and Hurd targets
-ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU))
+#make install is validated only for Linux, OSX, kFreeBSD, Hurd and some BSD targets
+ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU FreeBSD DragonFly))

 libzstd.pc:
 libzstd.pc: libzstd.pc.in
@ -117,7 +119,7 @@ install: libzstd libzstd.pc
 	@cp -a libzstd.$(SHARED_EXT) $(DESTDIR)$(LIBDIR)
 	@cp -a libzstd.pc $(DESTDIR)$(LIBDIR)/pkgconfig/
 	@install -m 644 libzstd.a $(DESTDIR)$(LIBDIR)/libzstd.a
-	@install -m 644 common/zstd.h $(DESTDIR)$(INCLUDEDIR)/zstd.h
+	@install -m 644 zstd.h $(DESTDIR)$(INCLUDEDIR)/zstd.h
 	@install -m 644 common/zbuff.h $(DESTDIR)$(INCLUDEDIR)/zbuff.h
 	@install -m 644 dictBuilder/zdict.h $(DESTDIR)$(INCLUDEDIR)/zdict.h
 	@echo zstd static and shared library installed
--- a/lib/README.md
+++ b/lib/README.md
@ -1,62 +1,51 @@
 zstd - library files
 ================================

-The __lib__ directory contains several files, but depending on target use case, some of them may not be necessary.
-
-#### Minimal library files
-
-To build the zstd library the following files are required:
-
- [common/bitstream.h](common/bitstream.h)
- [common/error_private.h](common/error_private.h)
- [common/error_public.h](common/error_public.h)
- common/fse.h
- common/fse_decompress.c
- common/huf.h
- [common/mem.h](common/mem.h)
- [common/zstd.h]
- common/zstd_internal.h
- compress/fse_compress.c
- compress/huf_compress.c
- compress/zstd_compress.c
- compress/zstd_opt.h
- decompress/huf_decompress.c
- decompress/zstd_decompress.c
-
-Stable API is exposed in [common/zstd.h].
-Advanced and experimental API can be enabled by defining `ZSTD_STATIC_LINKING_ONLY`.
-Never use them with a dynamic library, as their definition may change in future versions.
-
-[common/zstd.h]: common/zstd.h
+The __lib__ directory contains several directories.
+Depending on target use case, it's enough to include only files from relevant directories.


-#### Separate compressor and decompressor
+#### API

-To build a separate zstd compressor all files from `common/` and `compressor/` directories are required.
-In a similar way to build a separate zstd decompressor all files from `common/` and `decompressor/` directories are needed.
+Zstandard's stable API is exposed within [zstd.h](zstd.h),
+at the root of `lib` directory.


-#### Buffered streaming
+#### Advanced API

-This complementary API makes streaming integration easier.
-It is used by `zstd` command line utility, and [7zip plugin](http://mcmilk.de/projects/7-Zip-ZStd) :
-
- common/zbuff.h
- compress/zbuff_compress.c
- decompress/zbuff_decompress.c
+Some additional API may be useful if you're looking into advanced features :
+- common/error_public.h : transforms `size_t` function results into an `enum`,
+                          for precise error handling.
+- ZSTD_STATIC_LINKING_ONLY : if you define this macro _before_ including `zstd.h`,
+                          it will give access to advanced and experimental API.
+                          These APIs shall ___never be used with dynamic library___ !
+                          They are not "stable", their definition may change in the future.
+                          Only static linking is allowed.


-#### Dictionary builder
+#### Modular build

-In order to create dictionaries from some training sets,
-it's needed to include all files from [dictBuilder directory](dictBuilder/)
+Directory `common/` is required in all circumstances.
+You can select to support compression only, by just adding files from the `compress/` directory,
+In a similar way, you can build a decompressor-only library with the `decompress/` directory.
+
+Other optional functionalities provided are :
+
+- `dictBuilder/`  : source files to create dictionaries.
+                    The API can be consulted in `dictBuilder/zdict.h`.
+                    This module also depends on `common/` and `compress/` .
+
+- `legacy/` : source code to decompress previous versions of zstd, starting from `v0.1`.
+              This module also depends on `common/` and `decompress/` .
+              Note that it's required to compile the library with `ZSTD_LEGACY_SUPPORT = 1` .
+              The main API can be consulted in `legacy/zstd_legacy.h`.
+              Advanced API from each version can be found in its relevant header file.
+              For example, advanced API for version `v0.4` is in `zstd_v04.h` .


-#### Legacy support
+#### Streaming API

-Zstandard can decode previous formats, starting from v0.1.
-Support for these format is provided in [folder legacy](legacy/).
-It's also required to compile the library with `ZSTD_LEGACY_SUPPORT = 1`.
+Streaming is currently provided by `common/zbuff.h`.


 #### Miscellaneous
@ -64,5 +53,5 @@ It's also required to compile the library with `ZSTD_LEGACY_SUPPORT = 1`.
 The other files are not source code. There are :

 - LICENSE : contains the BSD license text
- - Makefile : script to compile or install zstd library (static or dynamic)
- - libzstd.pc.in : for pkg-config (make install)
+ - Makefile : script to compile or install zstd library (static and dynamic)
+ - libzstd.pc.in : for pkg-config (`make install`)
--- a/lib/common/entropy_common.c
+++ b/lib/common/entropy_common.c
@ -38,10 +38,9 @@
 #include "mem.h"
 #include "error_private.h"       /* ERR_*, ERROR */
 #define FSE_STATIC_LINKING_ONLY  /* FSE_MIN_TABLELOG */
-#include "fse.h"   /* FSE_isError, FSE_getErrorName */
+#include "fse.h"
 #define HUF_STATIC_LINKING_ONLY  /* HUF_TABLELOG_ABSOLUTEMAX */
-#include "huf.h"   /* HUF_isError, HUF_getErrorName */
-
+#include "huf.h"


 /*-****************************************
@ -63,7 +62,7 @@ const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); }
 /*-**************************************************************
 *  FSE NCount encoding-decoding
 ****************************************************************/
-static short FSE_abs(short a) { return a<0 ? -a : a; }
+static short FSE_abs(short a) { return (short)(a<0 ? -a : a); }

 size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
                 const void* headerBuffer, size_t hbSize)
@ -90,22 +89,22 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t
    threshold = 1<<nbBits;
    nbBits++;

-    while ((remaining>1) && (charnum<=*maxSVPtr)) {
+    while ((remaining>1) & (charnum<=*maxSVPtr)) {
        if (previous0) {
            unsigned n0 = charnum;
            while ((bitStream & 0xFFFF) == 0xFFFF) {
-                n0+=24;
+                n0 += 24;
                if (ip < iend-5) {
-                    ip+=2;
+                    ip += 2;
                    bitStream = MEM_readLE32(ip) >> bitCount;
                } else {
                    bitStream >>= 16;
-                    bitCount+=16;
+                    bitCount   += 16;
            }   }
            while ((bitStream & 3) == 3) {
-                n0+=3;
-                bitStream>>=2;
-                bitCount+=2;
+                n0 += 3;
+                bitStream >>= 2;
+                bitCount += 2;
            }
            n0 += bitStream & 3;
            bitCount += 2;
@ -115,10 +114,9 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t
                ip += bitCount>>3;
                bitCount &= 7;
                bitStream = MEM_readLE32(ip) >> bitCount;
-            }
-            else
+            } else {
                bitStream >>= 2;
-        }
+        }   }
        {   short const max = (short)((2*threshold-1)-remaining);
            short count;

@ -148,12 +146,12 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t
                ip = iend - 4;
            }
            bitStream = MEM_readLE32(ip) >> (bitCount & 31);
-    }   }   /* while ((remaining>1) && (charnum<=*maxSVPtr)) */
-    if (remaining != 1) return ERROR(GENERIC);
+    }   }   /* while ((remaining>1) & (charnum<=*maxSVPtr)) */
+    if (remaining != 1) return ERROR(corruption_detected);
+    if (bitCount > 32) return ERROR(corruption_detected);
    *maxSVPtr = charnum-1;

    ip += (bitCount+7)>>3;
-    if ((size_t)(ip-istart) > hbSize) return ERROR(srcSize_wrong);
    return ip-istart;
 }

@ -162,7 +160,7 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t
    Read compact Huffman tree, saved by HUF_writeCTable().
    `huffWeight` is destination buffer.
    @return : size read from `src` , or an error Code .
-    Note : Needed by HUF_readCTable() and HUF_readDTableXn() .
+    Note : Needed by HUF_readCTable() and HUF_readDTableX?() .
 */
 size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
                     U32* nbSymbolsPtr, U32* tableLogPtr,
@ -173,26 +171,19 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
    size_t iSize = ip[0];
    size_t oSize;

-    //memset(huffWeight, 0, hwSize);   /* is not necessary, even though some analyzer complain ... */
+    /* memset(huffWeight, 0, hwSize);   *//* is not necessary, even though some analyzer complain ... */

-    if (iSize >= 128)  { /* special header */
-        if (iSize >= (242)) {  /* RLE */
-            static U32 l[14] = { 1, 2, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64, 127, 128 };
-            oSize = l[iSize-242];
-            memset(huffWeight, 1, hwSize);
-            iSize = 0;
-        }
-        else {   /* Incompressible */
-            oSize = iSize - 127;
-            iSize = ((oSize+1)/2);
-            if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
-            if (oSize >= hwSize) return ERROR(corruption_detected);
-            ip += 1;
-            {   U32 n;
-                for (n=0; n<oSize; n+=2) {
-                    huffWeight[n]   = ip[n/2] >> 4;
-                    huffWeight[n+1] = ip[n/2] & 15;
-    }   }   }   }
+    if (iSize >= 128) {  /* special header */
+        oSize = iSize - 127;
+        iSize = ((oSize+1)/2);
+        if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
+        if (oSize >= hwSize) return ERROR(corruption_detected);
+        ip += 1;
+        {   U32 n;
+            for (n=0; n<oSize; n+=2) {
+                huffWeight[n]   = ip[n/2] >> 4;
+                huffWeight[n+1] = ip[n/2] & 15;
+    }   }   }
    else  {   /* header compressed with FSE (normal case) */
        if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
        oSize = FSE_decompress(huffWeight, hwSize-1, ip+1, iSize);   /* max (hwSize-1) values decoded, as last one is implied */
--- a/lib/common/huf.h
+++ b/lib/common/huf.h
@ -100,7 +100,7 @@ size_t HUF_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize
 /* *** Constants *** */
 #define HUF_TABLELOG_ABSOLUTEMAX  16   /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
 #define HUF_TABLELOG_MAX  12           /* max configured tableLog (for static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */
-#define HUF_TABLELOG_DEFAULT  HUF_TABLELOG_MAX   /* tableLog by default, when not specified */
+#define HUF_TABLELOG_DEFAULT  11       /* tableLog by default, when not specified */
 #define HUF_SYMBOLVALUE_MAX 255
 #if (HUF_TABLELOG_MAX > HUF_TABLELOG_ABSOLUTEMAX)
 #  error "HUF_TABLELOG_MAX is too large !"
--- a/lib/common/mem.h
+++ b/lib/common/mem.h
@ -44,19 +44,17 @@ extern "C" {
 ******************************************/
 #include <stddef.h>     /* size_t, ptrdiff_t */
 #include <string.h>     /* memcpy */
-#if defined(_MSC_VER)   /* Visual Studio */
-#   include <stdlib.h>  /* _byteswap_ulong */
-#endif


 /*-****************************************
 *  Compiler specifics
 ******************************************/
-#if defined(_MSC_VER)
-#   include <intrin.h>   /* _byteswap_ */
+#if defined(_MSC_VER)   /* Visual Studio */
+#   include <stdlib.h>  /* _byteswap_ulong */
+#   include <intrin.h>  /* _byteswap_* */
 #endif
 #if defined(__GNUC__)
-#  define MEM_STATIC static __attribute__((unused))
+#  define MEM_STATIC static __inline __attribute__((unused))
 #elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
 #  define MEM_STATIC static inline
 #elif defined(_MSC_VER)
@ -65,6 +63,10 @@ extern "C" {
 #  define MEM_STATIC static  /* this version may generate warnings for unused static functions; disable the relevant warning */
 #endif

+/* code only tested on 32 and 64 bits systems */
+#define MEM_STATIC_ASSERT(c)   { enum { XXH_static_assert = 1/(int)(!!(c)) }; }
+MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); }
+

 /*-**************************************************************
 *  Basic Types
@ -256,6 +258,17 @@ MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val)
    }
 }

+MEM_STATIC U32 MEM_readLE24(const void* memPtr)
+{
+    return MEM_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16);
+}
+
+MEM_STATIC void MEM_writeLE24(void* memPtr, U32 val)
+{
+    MEM_writeLE16(memPtr, (U16)val);
+    ((BYTE*)memPtr)[2] = (BYTE)(val>>16);
+}
+
 MEM_STATIC U32 MEM_readLE32(const void* memPtr)
 {
    if (MEM_isLittleEndian())
@ -374,4 +387,3 @@ MEM_STATIC U32 MEM_readMINMATCH(const void* memPtr, U32 length)
 #endif

 #endif /* MEM_H_MODULE */
-
--- a/lib/common/zbuff.h
+++ b/lib/common/zbuff.h
@ -56,6 +56,12 @@ extern "C" {
 /* *************************************
 *  Streaming functions
 ***************************************/
+/* This is the easier "buffered" streaming API,
+*  using an internal buffer to lift all restrictions on user-provided buffers
+*  which can be any size, any place, for both input and output.
+*  ZBUFF and ZSTD are 100% interoperable,
+*  frames created by one can be decoded by the other one */
+
 typedef struct ZBUFF_CCtx_s ZBUFF_CCtx;
 ZSTDLIB_API ZBUFF_CCtx* ZBUFF_createCCtx(void);
 ZSTDLIB_API size_t      ZBUFF_freeCCtx(ZBUFF_CCtx* cctx);
@ -133,8 +139,9 @@ ZSTDLIB_API size_t ZBUFF_decompressContinue(ZBUFF_DCtx* dctx,
 *  The function will report how many bytes were read or written by modifying *srcSizePtr and *dstCapacityPtr.
 *  Note that it may not consume the entire input, in which case it's up to the caller to present remaining input again.
 *  The content of `dst` will be overwritten (up to *dstCapacityPtr) at each function call, so save its content if it matters, or change `dst`.
-*  @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to help latency),
-*            or 0 when a frame is completely decoded,
+*  @return : 0 when a frame is completely decoded and fully flushed,
+*            1 when there is still some data left within internal buffer to flush,
+*            >1 when more data is expected, with value being a suggested next input size (it's just a hint, which helps latency),
 *            or an error code, which can be tested using ZBUFF_isError().
 *
 *  Hint : recommended buffer sizes (not compulsory) : ZBUFF_recommendedDInSize() and ZBUFF_recommendedDOutSize()
@ -168,11 +175,11 @@ ZSTDLIB_API size_t ZBUFF_recommendedDOutSize(void);
 * ==================================================================================== */

 /*--- Dependency ---*/
-#define ZSTD_STATIC_LINKING_ONLY   /* ZSTD_parameters */
+#define ZSTD_STATIC_LINKING_ONLY   /* ZSTD_parameters, ZSTD_customMem */
 #include "zstd.h"


-/*--- External memory ---*/
+/*--- Custom memory allocator ---*/
 /*! ZBUFF_createCCtx_advanced() :
 *  Create a ZBUFF compression context using external alloc and free functions */
 ZSTDLIB_API ZBUFF_CCtx* ZBUFF_createCCtx_advanced(ZSTD_customMem customMem);
@ -182,7 +189,7 @@ ZSTDLIB_API ZBUFF_CCtx* ZBUFF_createCCtx_advanced(ZSTD_customMem customMem);
 ZSTDLIB_API ZBUFF_DCtx* ZBUFF_createDCtx_advanced(ZSTD_customMem customMem);


-/*--- Advanced Streaming function ---*/
+/*--- Advanced Streaming Initialization ---*/
 ZSTDLIB_API size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* zbc,
                                               const void* dict, size_t dictSize,
                                               ZSTD_parameters params, unsigned long long pledgedSrcSize);
--- a/lib/common/zstd_internal.h
+++ b/lib/common/zstd_internal.h
@ -52,8 +52,9 @@
 *  Common constants
 ***************************************/
 #define ZSTD_OPT_DEBUG 0     /* 3 = compression stats;  5 = check encoded sequences;  9 = full logs */
-#include <stdio.h>
 #if defined(ZSTD_OPT_DEBUG) && ZSTD_OPT_DEBUG>=9
+    #include <stdio.h>
+    #include <stdlib.h>
    #define ZSTD_LOG_PARSER(...) printf(__VA_ARGS__)
    #define ZSTD_LOG_ENCODE(...) printf(__VA_ARGS__)
    #define ZSTD_LOG_BLOCK(...) printf(__VA_ARGS__)
@ -64,10 +65,10 @@
 #endif

 #define ZSTD_OPT_NUM    (1<<12)
-#define ZSTD_DICT_MAGIC  0xEC30A437   /* v0.7 */
+#define ZSTD_DICT_MAGIC  0xEC30A437   /* v0.7+ */

-#define ZSTD_REP_NUM    3
-#define ZSTD_REP_INIT   ZSTD_REP_NUM
+#define ZSTD_REP_NUM    3                 /* number of repcodes */
+#define ZSTD_REP_CHECK  (ZSTD_REP_NUM-0)  /* number of repcodes to check by the optimal parser */
 #define ZSTD_REP_MOVE   (ZSTD_REP_NUM-1)
 static const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 };

@ -88,13 +89,13 @@ static const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 };

 #define ZSTD_BLOCKHEADERSIZE 3   /* C standard doesn't allow `static const` variable to be init using another `static const` variable */
 static const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE;
-typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t;
+typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e;

 #define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */
 #define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */)   /* for a non-null block */

 #define HufLog 12
-typedef enum { lbt_huffman, lbt_repeat, lbt_raw, lbt_rle } litBlockType_t;
+typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingType_e;

 #define LONGNBSEQ 0x7F00

@ -111,11 +112,6 @@ typedef enum { lbt_huffman, lbt_repeat, lbt_raw, lbt_rle } litBlockType_t;
 #define LLFSELog    9
 #define OffFSELog   8

-#define FSE_ENCODING_RAW     0
-#define FSE_ENCODING_RLE     1
-#define FSE_ENCODING_STATIC  2
-#define FSE_ENCODING_DYNAMIC 3
-
 static const U32 LL_bits[MaxLL+1] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                                      1, 1, 1, 1, 2, 2, 3, 3, 4, 6, 7, 8, 9,10,11,12,
                                     13,14,15,16 };
@ -174,7 +170,7 @@ typedef struct {
    U32 off;
    U32 mlen;
    U32 litlen;
-    U32 rep[ZSTD_REP_INIT];
+    U32 rep[ZSTD_REP_NUM];
 } ZSTD_optimal_t;

 #if ZSTD_OPT_DEBUG == 3
@ -187,19 +183,22 @@ typedef struct {
    MEM_STATIC void ZSTD_statsUpdatePrices(ZSTD_stats_t* stats, size_t litLength, const BYTE* literals, size_t offset, size_t matchLength) { (void)stats; (void)litLength; (void)literals; (void)offset; (void)matchLength; }
 #endif   /* #if ZSTD_OPT_DEBUG == 3 */

+
+typedef struct seqDef_s {
+    U32 offset;
+    U16 litLength;
+    U16 matchLength;
+} seqDef;
+
+
 typedef struct {
-    void* buffer;
-    U32*  offsetStart;
-    U32*  offset;
-    BYTE* offCodeStart;
+    seqDef* sequencesStart;
+    seqDef* sequences;
    BYTE* litStart;
    BYTE* lit;
-    U16*  litLengthStart;
-    U16*  litLength;
-    BYTE* llCodeStart;
-    U16*  matchLengthStart;
-    U16*  matchLength;
-    BYTE* mlCodeStart;
+    BYTE* llCode;
+    BYTE* mlCode;
+    BYTE* ofCode;
    U32   longLengthID;   /* 0 == no longLength; 1 == Lit.longLength; 2 == Match.longLength; */
    U32   longLengthPos;
    /* opt */
@ -227,7 +226,7 @@ typedef struct {
 } seqStore_t;

 const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx);
-void ZSTD_seqToCodes(const seqStore_t* seqStorePtr, size_t const nbSeq);
+void ZSTD_seqToCodes(const seqStore_t* seqStorePtr);
 int ZSTD_isSkipFrame(ZSTD_DCtx* dctx);

 /* custom memory allocation functions */
@ -235,4 +234,29 @@ void* ZSTD_defaultAllocFunction(void* opaque, size_t size);
 void ZSTD_defaultFreeFunction(void* opaque, void* address);
 static const ZSTD_customMem defaultCustomMem = { ZSTD_defaultAllocFunction, ZSTD_defaultFreeFunction, NULL };

+/*======  common function  ======*/
+
+MEM_STATIC U32 ZSTD_highbit32(U32 val)
+{
+#   if defined(_MSC_VER)   /* Visual */
+    unsigned long r=0;
+    _BitScanReverse(&r, val);
+    return (unsigned)r;
+#   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* GCC Intrinsic */
+    return 31 - __builtin_clz(val);
+#   else   /* Software version */
+    static const int DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
+    U32 v = val;
+    int r;
+    v |= v >> 1;
+    v |= v >> 2;
+    v |= v >> 4;
+    v |= v >> 8;
+    v |= v >> 16;
+    r = DeBruijnClz[(U32)(v * 0x07C4ACDDU) >> 27];
+    return r;
+#   endif
+}
+
+
 #endif   /* ZSTD_CCOMMON_H_MODULE */
--- a/lib/compress/fse_compress.c
+++ b/lib/compress/fse_compress.c
@ -190,7 +190,7 @@ size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)
    return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND;  /* maxSymbolValue==0 ? use default */
 }

-static short FSE_abs(short a) { return a<0 ? -a : a; }
+static short FSE_abs(short a) { return (short)(a<0 ? -a : a); }

 static size_t FSE_writeNCount_generic (void* header, size_t headerBufferSize,
                                       const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
--- a/lib/compress/huf_compress.c
+++ b/lib/compress/huf_compress.c
@ -105,68 +105,39 @@ size_t HUF_writeCTable (void* dst, size_t maxDstSize,
                        const HUF_CElt* CTable, U32 maxSymbolValue, U32 huffLog)
 {
    BYTE bitsToWeight[HUF_TABLELOG_MAX + 1];
-    BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1];
-    U32 n;
+    BYTE huffWeight[HUF_SYMBOLVALUE_MAX];
    BYTE* op = (BYTE*)dst;
-    size_t size;
+    U32 n;

     /* check conditions */
-    if (maxSymbolValue > HUF_SYMBOLVALUE_MAX + 1)
-        return ERROR(GENERIC);
+    if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(GENERIC);

    /* convert to weight */
    bitsToWeight[0] = 0;
-    for (n=1; n<=huffLog; n++)
+    for (n=1; n<huffLog+1; n++)
        bitsToWeight[n] = (BYTE)(huffLog + 1 - n);
    for (n=0; n<maxSymbolValue; n++)
        huffWeight[n] = bitsToWeight[CTable[n].nbBits];

-    size = FSE_compress(op+1, maxDstSize-1, huffWeight, maxSymbolValue);   /* don't need last symbol stat : implied */
-    if (HUF_isError(size)) return size;
-    if (size >= 128) return ERROR(GENERIC);   /* should never happen, since maxSymbolValue <= 255 */
-    if ((size <= 1) || (size >= maxSymbolValue/2)) {
-        if (size==1) {  /* RLE */
-            /* only possible case : series of 1 (because there are at least 2) */
-            /* can only be 2^n or (2^n-1), otherwise not an huffman tree */
-            BYTE code;
-            switch(maxSymbolValue)
-            {
-            case 1: code = 0; break;
-            case 2: code = 1; break;
-            case 3: code = 2; break;
-            case 4: code = 3; break;
-            case 7: code = 4; break;
-            case 8: code = 5; break;
-            case 15: code = 6; break;
-            case 16: code = 7; break;
-            case 31: code = 8; break;
-            case 32: code = 9; break;
-            case 63: code = 10; break;
-            case 64: code = 11; break;
-            case 127: code = 12; break;
-            case 128: code = 13; break;
-            default : return ERROR(corruption_detected);
-            }
-            op[0] = (BYTE)(255-13 + code);
-            return 1;
-        }
-         /* Not compressible */
-        if (maxSymbolValue > (241-128)) return ERROR(GENERIC);   /* not implemented (not possible with current format) */
-        if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall);   /* not enough space within dst buffer */
-        op[0] = (BYTE)(128 /*special case*/ + 0 /* Not Compressible */ + (maxSymbolValue-1));
-        huffWeight[maxSymbolValue] = 0;   /* to be sure it doesn't cause issue in final combination */
-        for (n=0; n<maxSymbolValue; n+=2)
-            op[(n/2)+1] = (BYTE)((huffWeight[n] << 4) + huffWeight[n+1]);
-        return ((maxSymbolValue+1)/2) + 1;
-    }
+    {   size_t const size = FSE_compress(op+1, maxDstSize-1, huffWeight, maxSymbolValue);
+        if (FSE_isError(size)) return size;
+        if ((size>1) & (size < maxSymbolValue/2)) {   /* FSE compressed */
+            op[0] = (BYTE)size;
+            return size+1;
+    }   }
+
+    /* raw values */
+    if (maxSymbolValue > (256-128)) return ERROR(GENERIC);   /* should not happen */
+    if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall);   /* not enough space within dst buffer */
+    op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue-1));
+    huffWeight[maxSymbolValue] = 0;   /* to be sure it doesn't cause issue in final combination */
+    for (n=0; n<maxSymbolValue; n+=2)
+        op[(n/2)+1] = (BYTE)((huffWeight[n] << 4) + huffWeight[n+1]);
+    return ((maxSymbolValue+1)/2) + 1;

-    /* normal header case */
-    op[0] = (BYTE)size;
-    return size+1;
 }


-
 size_t HUF_readCTable (HUF_CElt* CTable, U32 maxSymbolValue, const void* src, size_t srcSize)
 {
    BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1];
@ -174,7 +145,7 @@ size_t HUF_readCTable (HUF_CElt* CTable, U32 maxSymbolValue, const void* src, si
    U32 tableLog = 0;
    size_t readSize;
    U32 nbSymbols = 0;
-    //memset(huffWeight, 0, sizeof(huffWeight));   /* is not necessary, even though some analyzer complain ... */
+    /*memset(huffWeight, 0, sizeof(huffWeight));*/   /* is not necessary, even though some analyzer complain ... */

    /* get symbol weights */
    readSize = HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX+1, rankVal, &nbSymbols, &tableLog, src, srcSize);
@ -193,10 +164,10 @@ size_t HUF_readCTable (HUF_CElt* CTable, U32 maxSymbolValue, const void* src, si
    }   }

    /* fill nbBits */
-    { U32 n; for (n=0; n<nbSymbols; n++) {
-        const U32 w = huffWeight[n];
-        CTable[n].nbBits = (BYTE)(tableLog + 1 - w);
-    }}
+    {   U32 n; for (n=0; n<nbSymbols; n++) {
+            const U32 w = huffWeight[n];
+            CTable[n].nbBits = (BYTE)(tableLog + 1 - w);
+    }   }

    /* fill val */
    {   U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0};
@ -535,7 +506,6 @@ static size_t HUF_compress_internal (
    {   size_t const hSize = HUF_writeCTable (op, dstSize, CTable, maxSymbolValue, huffLog);
        if (HUF_isError(hSize)) return hSize;
        if (hSize + 12 >= srcSize) return 0;   /* not useful to try compression */
-        //static U64 totalHSize = 0; static U32 nbHSize = 0; totalHSize += hSize; nbHSize++; if ((nbHSize & 63) == 1) printf("average : %6.3f \n", (double)totalHSize / nbHSize);
        op += hSize;
    }

--- a/lib/compress/zbuff_compress.c
+++ b/lib/compress/zbuff_compress.c
@ -46,7 +46,7 @@
 static size_t const ZBUFF_endFrameSize = ZSTD_BLOCKHEADERSIZE;


-/*_**************************************************
+/*-***********************************************************
 *  Streaming compression
 *
 *  A ZBUFF_CCtx object is required to track streaming operation.
@ -77,7 +77,7 @@ static size_t const ZBUFF_endFrameSize = ZSTD_BLOCKHEADERSIZE;
 *  Hint : recommended buffer sizes (not compulsory)
 *  input : ZSTD_BLOCKSIZE_MAX (128 KB), internal unit size, it improves latency to use this value.
 *  output : ZSTD_compressBound(ZSTD_BLOCKSIZE_MAX) + ZSTD_blockHeaderSize + ZBUFF_endFrameSize : ensures it's always possible to write/flush/end a full block at best speed.
-* **************************************************/
+* ***********************************************************/

 typedef enum { ZBUFFcs_init, ZBUFFcs_load, ZBUFFcs_flush, ZBUFFcs_final } ZBUFF_cStage;

@ -95,6 +95,8 @@ struct ZBUFF_CCtx_s {
    size_t outBuffContentSize;
    size_t outBuffFlushedSize;
    ZBUFF_cStage stage;
+    U32    checksum;
+    U32    frameEnded;
    ZSTD_customMem customMem;
 };   /* typedef'd tp ZBUFF_CCtx within "zstd_buffered.h" */

@ -133,7 +135,7 @@ size_t ZBUFF_freeCCtx(ZBUFF_CCtx* zbc)
 }


-/* *** Initialization *** */
+/* ======   Initialization   ====== */

 size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* zbc,
                                   const void* dict, size_t dictSize,
@ -147,7 +149,7 @@ size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* zbc,
            zbc->inBuff = (char*)zbc->customMem.customAlloc(zbc->customMem.opaque, neededInBuffSize);
            if (zbc->inBuff == NULL) return ERROR(memory_allocation);
        }
-        zbc->blockSize = MIN(ZSTD_BLOCKSIZE_MAX, neededInBuffSize);
+        zbc->blockSize = MIN(ZSTD_BLOCKSIZE_ABSOLUTEMAX, neededInBuffSize);
    }
    if (zbc->outBuffSize < ZSTD_compressBound(zbc->blockSize)+1) {
        zbc->outBuffSize = ZSTD_compressBound(zbc->blockSize)+1;
@ -164,6 +166,8 @@ size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* zbc,
    zbc->inBuffTarget = zbc->blockSize;
    zbc->outBuffContentSize = zbc->outBuffFlushedSize = 0;
    zbc->stage = ZBUFFcs_load;
+    zbc->checksum = params.fParams.checksumFlag > 0;
+    zbc->frameEnded = 0;
    return 0;   /* ready to go */
 }

@ -189,14 +193,16 @@ MEM_STATIC size_t ZBUFF_limitCopy(void* dst, size_t dstCapacity, const void* src
 }


-/* *** Compression *** */
+/* ======   Compression   ====== */
+
+typedef enum { zbf_gather, zbf_flush, zbf_end } ZBUFF_flush_e;

 static size_t ZBUFF_compressContinue_generic(ZBUFF_CCtx* zbc,
                              void* dst, size_t* dstCapacityPtr,
                        const void* src, size_t* srcSizePtr,
-                              int flush)
+                              ZBUFF_flush_e const flush)
 {
-    U32 notDone = 1;
+    U32 someMoreWork = 1;
    const char* const istart = (const char*)src;
    const char* const iend = istart + *srcSizePtr;
    const char* ip = istart;
@ -204,7 +210,7 @@ static size_t ZBUFF_compressContinue_generic(ZBUFF_CCtx* zbc,
    char* const oend = ostart + *dstCapacityPtr;
    char* op = ostart;

-    while (notDone) {
+    while (someMoreWork) {
        switch(zbc->stage)
        {
        case ZBUFFcs_init: return ERROR(init_missing);   /* call ZBUFF_compressInit() first ! */
@ -216,7 +222,7 @@ static size_t ZBUFF_compressContinue_generic(ZBUFF_CCtx* zbc,
                zbc->inBuffPos += loaded;
                ip += loaded;
                if ( (zbc->inBuffPos==zbc->inToCompress) || (!flush && (toLoad != loaded)) ) {
-                    notDone = 0; break;  /* not enough input to get a full block : stop there, wait for more */
+                    someMoreWork = 0; break;  /* not enough input to get a full block : stop there, wait for more */
            }   }
            /* compress current block (note : this stage cannot be stopped in the middle) */
            {   void* cDst;
@ -227,8 +233,11 @@ static size_t ZBUFF_compressContinue_generic(ZBUFF_CCtx* zbc,
                    cDst = op;   /* compress directly into output buffer (avoid flush stage) */
                else
                    cDst = zbc->outBuff, oSize = zbc->outBuffSize;
-                cSize = ZSTD_compressContinue(zbc->zc, cDst, oSize, zbc->inBuff + zbc->inToCompress, iSize);
+                cSize = (flush == zbf_end) ?
+                        ZSTD_compressEnd(zbc->zc, cDst, oSize, zbc->inBuff + zbc->inToCompress, iSize) :
+                        ZSTD_compressContinue(zbc->zc, cDst, oSize, zbc->inBuff + zbc->inToCompress, iSize);
                if (ZSTD_isError(cSize)) return cSize;
+                if (flush == zbf_end) zbc->frameEnded = 1;
                /* prepare next block */
                zbc->inBuffTarget = zbc->inBuffPos + zbc->blockSize;
                if (zbc->inBuffTarget > zbc->inBuffSize)
@ -245,14 +254,14 @@ static size_t ZBUFF_compressContinue_generic(ZBUFF_CCtx* zbc,
                size_t const flushed = ZBUFF_limitCopy(op, oend-op, zbc->outBuff + zbc->outBuffFlushedSize, toFlush);
                op += flushed;
                zbc->outBuffFlushedSize += flushed;
-                if (toFlush!=flushed) { notDone = 0; break; } /* dst too small to store flushed data : stop there */
+                if (toFlush!=flushed) { someMoreWork = 0; break; } /* dst too small to store flushed data : stop there */
                zbc->outBuffContentSize = zbc->outBuffFlushedSize = 0;
                zbc->stage = ZBUFFcs_load;
                break;
            }

        case ZBUFFcs_final:
-            notDone = 0;   /* do nothing */
+            someMoreWork = 0;   /* do nothing */
            break;

        default:
@ -262,6 +271,7 @@ static size_t ZBUFF_compressContinue_generic(ZBUFF_CCtx* zbc,

    *srcSizePtr = ip - istart;
    *dstCapacityPtr = op - ostart;
+    if (zbc->frameEnded) return 0;
    {   size_t hintInSize = zbc->inBuffTarget - zbc->inBuffPos;
        if (hintInSize==0) hintInSize = zbc->blockSize;
        return hintInSize;
@ -272,17 +282,17 @@ size_t ZBUFF_compressContinue(ZBUFF_CCtx* zbc,
                              void* dst, size_t* dstCapacityPtr,
                        const void* src, size_t* srcSizePtr)
 {
-    return ZBUFF_compressContinue_generic(zbc, dst, dstCapacityPtr, src, srcSizePtr, 0);
+    return ZBUFF_compressContinue_generic(zbc, dst, dstCapacityPtr, src, srcSizePtr, zbf_gather);
 }



-/* *** Finalize *** */
+/* ======   Finalize   ====== */

 size_t ZBUFF_compressFlush(ZBUFF_CCtx* zbc, void* dst, size_t* dstCapacityPtr)
 {
    size_t srcSize = 0;
-    ZBUFF_compressContinue_generic(zbc, dst, dstCapacityPtr, &srcSize, &srcSize, 1);  /* use a valid src address instead of NULL */
+    ZBUFF_compressContinue_generic(zbc, dst, dstCapacityPtr, &srcSize, &srcSize, zbf_flush);  /* use a valid src address instead of NULL */
    return zbc->outBuffContentSize - zbc->outBuffFlushedSize;
 }

@ -296,15 +306,18 @@ size_t ZBUFF_compressEnd(ZBUFF_CCtx* zbc, void* dst, size_t* dstCapacityPtr)
    if (zbc->stage != ZBUFFcs_final) {
        /* flush whatever remains */
        size_t outSize = *dstCapacityPtr;
-        size_t const remainingToFlush = ZBUFF_compressFlush(zbc, dst, &outSize);
+        size_t srcSize = 0;
+        size_t const notEnded = ZBUFF_compressContinue_generic(zbc, dst, &outSize, &srcSize, &srcSize, zbf_end);  /* use a valid address instead of NULL */
+        size_t const remainingToFlush = zbc->outBuffContentSize - zbc->outBuffFlushedSize;
        op += outSize;
        if (remainingToFlush) {
            *dstCapacityPtr = op-ostart;
-            return remainingToFlush + ZBUFF_endFrameSize;
+            return remainingToFlush + ZBUFF_endFrameSize + (zbc->checksum * 4);
        }
        /* create epilogue */
        zbc->stage = ZBUFFcs_final;
-        zbc->outBuffContentSize = ZSTD_compressEnd(zbc->zc, zbc->outBuff, zbc->outBuffSize); /* epilogue into outBuff */
+        zbc->outBuffContentSize = !notEnded ? 0 :
+            ZSTD_compressEnd(zbc->zc, zbc->outBuff, zbc->outBuffSize, NULL, 0);  /* write epilogue into outBuff */
    }

    /* flush epilogue */
@ -323,5 +336,5 @@ size_t ZBUFF_compressEnd(ZBUFF_CCtx* zbc, void* dst, size_t* dstCapacityPtr)
 /* *************************************
 *  Tool functions
 ***************************************/
-size_t ZBUFF_recommendedCInSize(void)  { return ZSTD_BLOCKSIZE_MAX; }
-size_t ZBUFF_recommendedCOutSize(void) { return ZSTD_compressBound(ZSTD_BLOCKSIZE_MAX) + ZSTD_blockHeaderSize + ZBUFF_endFrameSize; }
+size_t ZBUFF_recommendedCInSize(void)  { return ZSTD_BLOCKSIZE_ABSOLUTEMAX; }
+size_t ZBUFF_recommendedCOutSize(void) { return ZSTD_compressBound(ZSTD_BLOCKSIZE_ABSOLUTEMAX) + ZSTD_blockHeaderSize + ZBUFF_endFrameSize; }
--- a/lib/compress/zstd_compress.c
+++ b/lib/compress/zstd_compress.c
--- a/lib/compress/zstd_opt.h
+++ b/lib/compress/zstd_opt.h
@ -134,15 +134,7 @@ FORCE_INLINE U32 ZSTD_getLiteralPrice(seqStore_t* ssPtr, U32 litLength, const BY
    }

    /* literal Length */
-    {   static const BYTE LL_Code[64] = {  0,  1,  2,  3,  4,  5,  6,  7,
-                                           8,  9, 10, 11, 12, 13, 14, 15,
-                                          16, 16, 17, 17, 18, 18, 19, 19,
-                                          20, 20, 20, 20, 21, 21, 21, 21,
-                                          22, 22, 22, 22, 22, 22, 22, 22,
-                                          23, 23, 23, 23, 23, 23, 23, 23,
-                                          24, 24, 24, 24, 24, 24, 24, 24,
-                                          24, 24, 24, 24, 24, 24, 24, 24 };
-        const BYTE LL_deltaCode = 19;
+    {   const BYTE LL_deltaCode = 19;
        const BYTE llCode = (litLength>63) ? (BYTE)ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength];
        price += LL_bits[llCode] + ssPtr->log2litLengthSum - ZSTD_highbit32(ssPtr->litLengthFreq[llCode]+1);
    }
@ -158,15 +150,7 @@ FORCE_INLINE U32 ZSTD_getPrice(seqStore_t* seqStorePtr, U32 litLength, const BYT
    U32 price = offCode + seqStorePtr->log2offCodeSum - ZSTD_highbit32(seqStorePtr->offCodeFreq[offCode]+1);

    /* match Length */
-    {   static const BYTE ML_Code[128] = { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
-                                          16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
-                                          32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37,
-                                          38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39,
-                                          40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
-                                          41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41,
-                                          42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
-                                          42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 };
-        const BYTE ML_deltaCode = 36;
+    {   const BYTE ML_deltaCode = 36;
        const BYTE mlCode = (matchLength>127) ? (BYTE)ZSTD_highbit32(matchLength) + ML_deltaCode : ML_Code[matchLength];
        price += ML_bits[mlCode] + seqStorePtr->log2matchLengthSum - ZSTD_highbit32(seqStorePtr->matchLengthFreq[mlCode]+1);
    }
@ -185,15 +169,7 @@ MEM_STATIC void ZSTD_updatePrice(seqStore_t* seqStorePtr, U32 litLength, const B
        seqStorePtr->litFreq[literals[u]]++;

    /* literal Length */
-    {   static const BYTE LL_Code[64] = {  0,  1,  2,  3,  4,  5,  6,  7,
-                                           8,  9, 10, 11, 12, 13, 14, 15,
-                                          16, 16, 17, 17, 18, 18, 19, 19,
-                                          20, 20, 20, 20, 21, 21, 21, 21,
-                                          22, 22, 22, 22, 22, 22, 22, 22,
-                                          23, 23, 23, 23, 23, 23, 23, 23,
-                                          24, 24, 24, 24, 24, 24, 24, 24,
-                                          24, 24, 24, 24, 24, 24, 24, 24 };
-        const BYTE LL_deltaCode = 19;
+    {   const BYTE LL_deltaCode = 19;
        const BYTE llCode = (litLength>63) ? (BYTE)ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength];
        seqStorePtr->litLengthFreq[llCode]++;
        seqStorePtr->litLengthSum++;
@ -206,15 +182,7 @@ MEM_STATIC void ZSTD_updatePrice(seqStore_t* seqStorePtr, U32 litLength, const B
 	}

    /* match Length */
-    {   static const BYTE ML_Code[128] = { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
-                                          16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
-                                          32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37,
-                                          38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39,
-                                          40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
-                                          41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41,
-                                          42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
-                                          42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 };
-        const BYTE ML_deltaCode = 36;
+    {   const BYTE ML_deltaCode = 36;
        const BYTE mlCode = (matchLength>127) ? (BYTE)ZSTD_highbit32(matchLength) + ML_deltaCode : ML_Code[matchLength];
        seqStorePtr->matchLengthFreq[mlCode]++;
        seqStorePtr->matchLengthSum++;
@ -464,13 +432,14 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
    ZSTD_optimal_t* opt = seqStorePtr->priceTable;
    ZSTD_match_t* matches = seqStorePtr->matchTable;
    const BYTE* inr;
-    U32 offset, rep[ZSTD_REP_INIT];
+    U32 offset, rep[ZSTD_REP_NUM];

    /* init */
    ctx->nextToUpdate3 = ctx->nextToUpdate;
    ZSTD_rescaleFreqs(seqStorePtr);
    ip += (ip==prefixStart);
-    { U32 i; for (i=0; i<ZSTD_REP_INIT; i++) rep[i]=ctx->rep[i]; }
+    { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) rep[i]=ctx->rep[i]; }
+    inr = ip;

    ZSTD_LOG_BLOCK("%d: COMPBLOCK_OPT_GENERIC srcSz=%d maxSrch=%d mls=%d sufLen=%d\n", (int)(ip-base), (int)srcSize, maxSearches, mls, sufficient_len);

@ -484,7 +453,7 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,

        /* check repCode */
        {   U32 i;
-            for (i=0; i<ZSTD_REP_NUM; i++) {
+            for (i=(ip == anchor); i<ZSTD_REP_CHECK; i++) {
                if ((rep[i]<(U32)(ip-prefixStart))
                    && (MEM_readMINMATCH(ip, minMatch) == MEM_readMINMATCH(ip - rep[i], minMatch))) {
                    mlen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-rep[i], iend) + minMatch;
@ -493,7 +462,7 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
                        best_mlen = mlen; best_off = i; cur = 0; last_pos = 1;
                        goto _storeSequence;
                    }
-                    best_off = (i<=1 && ip == anchor) ? 1-i : i;
+                    best_off = i - (ip == anchor);
                    do {
                        price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH);
                        if (mlen > last_pos || price < opt[mlen].price)
@ -531,7 +500,7 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
        if (last_pos < minMatch) { ip++; continue; }

        /* initialize opt[0] */
-        { U32 i ; for (i=0; i<ZSTD_REP_INIT; i++) opt[0].rep[i] = rep[i]; }
+        { U32 i ; for (i=0; i<ZSTD_REP_NUM; i++) opt[0].rep[i] = rep[i]; }
        opt[0].mlen = 1;
        opt[0].litlen = litlen;

@ -575,19 +544,21 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,

           best_mlen = minMatch;
           {   U32 i;
-               for (i=0; i<ZSTD_REP_NUM; i++) {
+               for (i=(opt[cur].mlen != 1); i<ZSTD_REP_CHECK; i++) {  /* check rep */
                   if ((opt[cur].rep[i]<(U32)(inr-prefixStart))
-                       && (MEM_readMINMATCH(inr, minMatch) == MEM_readMINMATCH(inr - opt[cur].rep[i], minMatch))) {  /* check rep */
+                       && (MEM_readMINMATCH(inr, minMatch) == MEM_readMINMATCH(inr - opt[cur].rep[i], minMatch))) {
                       mlen = (U32)ZSTD_count(inr+minMatch, inr+minMatch - opt[cur].rep[i], iend) + minMatch;
                       ZSTD_LOG_PARSER("%d: Found REP %d/%d mlen=%d off=%d rep=%d opt[%d].off=%d\n", (int)(inr-base), i, ZSTD_REP_NUM, mlen, i, opt[cur].rep[i], cur, opt[cur].off);

                       if (mlen > sufficient_len || cur + mlen >= ZSTD_OPT_NUM) {
-                            ZSTD_LOG_PARSER("%d: REP sufficient_len=%d best_mlen=%d best_off=%d last_pos=%d\n", (int)(inr-base), sufficient_len, best_mlen, best_off, last_pos);
                            best_mlen = mlen; best_off = i; last_pos = cur + 1;
+                            ZSTD_LOG_PARSER("%d: REP sufficient_len=%d best_mlen=%d best_off=%d last_pos=%d\n", (int)(inr-base), sufficient_len, best_mlen, best_off, last_pos);
                            goto _storeSequence;
                       }

-                       best_off = (i<=1 && opt[cur].mlen != 1) ? 1-i : i;
+                       //best_off = ((i<=1) & (opt[cur].mlen != 1)) ? 1-i : i;
+                       best_off = i - (opt[cur].mlen != 1);
+
                       if (opt[cur].mlen == 1) {
                            litlen = opt[cur].litlen;
                            if (cur > litlen) {
@ -692,7 +663,8 @@ _storeSequence:   /* cur, last_pos, best_mlen, best_off have to be set */
                    rep[1] = rep[0];
                    rep[0] = best_off;
                }
-                if (litLength == 0 && offset<=1) offset = 1-offset;
+                if ((litLength == 0) & (offset==0)) offset = rep[1];  /* protection, but should never happen */
+                if ((litLength == 0) & (offset<=2)) offset--;
            }

            ZSTD_LOG_ENCODE("%d/%d: ENCODE literals=%d mlen=%d off=%d rep[0]=%d rep[1]=%d\n", (int)(ip-base), (int)(iend-base), (int)(litLength), (int)mlen, (int)(offset), (int)rep[0], (int)rep[1]);
@ -755,12 +727,13 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
    const BYTE* inr;

    /* init */
-    U32 offset, rep[ZSTD_REP_INIT];
-    { U32 i; for (i=0; i<ZSTD_REP_INIT; i++) rep[i]=ctx->rep[i]; }
+    U32 offset, rep[ZSTD_REP_NUM];
+    { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) rep[i]=ctx->rep[i]; }

    ctx->nextToUpdate3 = ctx->nextToUpdate;
    ZSTD_rescaleFreqs(seqStorePtr);
    ip += (ip==prefixStart);
+    inr = ip;

    ZSTD_LOG_BLOCK("%d: COMPBLOCK_OPT_EXTDICT srcSz=%d maxSrch=%d mls=%d sufLen=%d\n", (int)(ip-base), (int)srcSize, maxSearches, mls, sufficient_len);

@ -776,11 +749,12 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,

        /* check repCode */
        {   U32 i;
-            for (i=0; i<ZSTD_REP_NUM; i++) {
+            for (i = (ip==anchor); i<ZSTD_REP_CHECK; i++) {
                const U32 repIndex = (U32)(current - rep[i]);
                const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
                const BYTE* const repMatch = repBase + repIndex;
-                if ( (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex>lowestIndex))  /* intentional overflow */
+                if ( (rep[i] <= current)
+                   && (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex>lowestIndex))  /* intentional overflow */
                   && (MEM_readMINMATCH(ip, minMatch) == MEM_readMINMATCH(repMatch, minMatch)) ) {
                    /* repcode detected we should take it */
                    const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
@ -792,7 +766,7 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
                        goto _storeSequence;
                    }

-                    best_off = (i<=1 && ip == anchor) ? 1-i : i;
+                    best_off = i - (ip==anchor);
                    litlen = opt[0].litlen;
                    do {
                        price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH);
@ -807,7 +781,7 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
        ZSTD_LOG_PARSER("%d: match_num=%d last_pos=%d\n", (int)(ip-base), match_num, last_pos);
        if (!last_pos && !match_num) { ip++; continue; }

-        { U32 i; for (i=0; i<ZSTD_REP_INIT; i++) opt[0].rep[i] = rep[i]; }
+        { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) opt[0].rep[i] = rep[i]; }
        opt[0].mlen = 1;

        if (match_num && (matches[match_num-1].len > sufficient_len || matches[match_num-1].len >= ZSTD_OPT_NUM)) {
@ -878,11 +852,12 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
            best_mlen = 0;

            {   U32 i;
-                for (i=0; i<ZSTD_REP_NUM; i++) {
+                for (i = (opt[cur].mlen != 1); i<ZSTD_REP_CHECK; i++) {
                    const U32 repIndex = (U32)(current+cur - opt[cur].rep[i]);
                    const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
                    const BYTE* const repMatch = repBase + repIndex;
-                    if ( (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex>lowestIndex))  /* intentional overflow */
+                    if ( (opt[cur].rep[i] <= current+cur)
+                      && (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex>lowestIndex))  /* intentional overflow */
                      && (MEM_readMINMATCH(inr, minMatch) == MEM_readMINMATCH(repMatch, minMatch)) ) {
                        /* repcode detected */
                        const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
@ -890,12 +865,12 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
                        ZSTD_LOG_PARSER("%d: Found REP %d/%d mlen=%d off=%d rep=%d opt[%d].off=%d\n", (int)(inr-base), i, ZSTD_REP_NUM, mlen, i, opt[cur].rep[i], cur, opt[cur].off);

                        if (mlen > sufficient_len || cur + mlen >= ZSTD_OPT_NUM) {
-                            ZSTD_LOG_PARSER("%d: REP sufficient_len=%d best_mlen=%d best_off=%d last_pos=%d\n", (int)(inr-base), sufficient_len, best_mlen, best_off, last_pos);
                            best_mlen = mlen; best_off = i; last_pos = cur + 1;
+                            ZSTD_LOG_PARSER("%d: REP sufficient_len=%d best_mlen=%d best_off=%d last_pos=%d\n", (int)(inr-base), sufficient_len, best_mlen, best_off, last_pos);
                            goto _storeSequence;
                        }

-                        best_off = (i<=1 && opt[cur].mlen != 1) ? 1-i : i;
+                        best_off = i - (opt[cur].mlen != 1);
                        if (opt[cur].mlen == 1) {
                            litlen = opt[cur].litlen;
                            if (cur > litlen) {
@ -1001,8 +976,9 @@ _storeSequence:   /* cur, last_pos, best_mlen, best_off have to be set */
                    if (offset != 1) rep[2] = rep[1];
                    rep[1] = rep[0];
                    rep[0] = best_off;
-                 }
-                 if (litLength == 0 && offset<=1) offset = 1-offset;
+                }
+                if ((litLength==0) & (offset==0)) offset = rep[1];  /* protection, but should never happen */
+                if ((litLength==0) & (offset<=2)) offset --;
            }

            ZSTD_LOG_ENCODE("%d/%d: ENCODE literals=%d mlen=%d off=%d rep[0]=%d rep[1]=%d\n", (int)(ip-base), (int)(iend-base), (int)(litLength), (int)mlen, (int)(offset), (int)rep[0], (int)rep[1]);
@ -1016,7 +992,7 @@ _storeSequence:   /* cur, last_pos, best_mlen, best_off have to be set */
                    ml2 = ZSTD_count_2segments(ip, match, iend, dictEnd, prefixStart);
                    ZSTD_LOG_PARSER("%d: ZSTD_count_2segments=%d offset=%d dictBase=%p dictEnd=%p prefixStart=%p ip=%p match=%p\n", (int)current, (int)ml2, (int)best_off, dictBase, dictEnd, prefixStart, ip, match);
                }
-                else ml2 = (U32)ZSTD_count(ip, ip-offset, iend);
+                else ml2 = (U32)ZSTD_count(ip, ip-best_off, iend);
            }
            else ml2 = (U32)ZSTD_count(ip, ip-rep[0], iend);
            if ((offset >= 8) && (ml2 < mlen || ml2 < minMatch)) {
@ -1033,7 +1009,7 @@ _storeSequence:   /* cur, last_pos, best_mlen, best_off have to be set */
    }    }   /* for (cur=0; cur < last_pos; ) */

    /* Save reps for next block */
-    ctx->savedRep[0] = rep[0]; ctx->savedRep[1] = rep[1]; ctx->savedRep[2] = rep[2];
+    { int i; for (i=0; i<ZSTD_REP_NUM; i++) ctx->savedRep[i] = rep[i]; }

    /* Last Literals */
    {   size_t lastLLSize = iend - anchor;
--- a/lib/decompress/zbuff_decompress.c
+++ b/lib/decompress/zbuff_decompress.c
@ -158,9 +158,9 @@ size_t ZBUFF_decompressContinue(ZBUFF_DCtx* zbd,
    char* const ostart = (char*)dst;
    char* const oend = ostart + *dstCapacityPtr;
    char* op = ostart;
-    U32 notDone = 1;
+    U32 someMoreWork = 1;

-    while (notDone) {
+    while (someMoreWork) {
        switch(zbd->stage)
        {
        case ZBUFFds_init :
@ -168,9 +168,9 @@ size_t ZBUFF_decompressContinue(ZBUFF_DCtx* zbd,

        case ZBUFFds_loadHeader :
            {   size_t const hSize = ZSTD_getFrameParams(&(zbd->fParams), zbd->headerBuffer, zbd->lhSize);
-                if (hSize != 0) {
+                if (ZSTD_isError(hSize)) return hSize;
+                if (hSize != 0) {   /* need more input */
                    size_t const toLoad = hSize - zbd->lhSize;   /* if hSize!=0, hSize > zbd->lhSize */
-                    if (ZSTD_isError(hSize)) return hSize;
                    if (toLoad > (size_t)(iend-ip)) {   /* not enough input to load full header */
                        memcpy(zbd->headerBuffer + zbd->lhSize, ip, iend-ip);
                        zbd->lhSize += iend-ip;
@ -184,7 +184,7 @@ size_t ZBUFF_decompressContinue(ZBUFF_DCtx* zbd,
            /* Consume header */
            {   size_t const h1Size = ZSTD_nextSrcSizeToDecompress(zbd->zd);  /* == ZSTD_frameHeaderSize_min */
                size_t const h1Result = ZSTD_decompressContinue(zbd->zd, NULL, 0, zbd->headerBuffer, h1Size);
-                if (ZSTD_isError(h1Result)) return h1Result;
+                if (ZSTD_isError(h1Result)) return h1Result;   /* should not happen : already checked */
                if (h1Size < zbd->lhSize) {   /* long header */
                    size_t const h2Size = ZSTD_nextSrcSizeToDecompress(zbd->zd);
                    size_t const h2Result = ZSTD_decompressContinue(zbd->zd, NULL, 0, zbd->headerBuffer+h1Size, h2Size);
@ -194,7 +194,8 @@ size_t ZBUFF_decompressContinue(ZBUFF_DCtx* zbd,
            zbd->fParams.windowSize = MAX(zbd->fParams.windowSize, 1U << ZSTD_WINDOWLOG_ABSOLUTEMIN);

            /* Frame header instruct buffer sizes */
-            {   size_t const blockSize = MIN(zbd->fParams.windowSize, ZSTD_BLOCKSIZE_MAX);
+            {   size_t const blockSize = MIN(zbd->fParams.windowSize, ZSTD_BLOCKSIZE_ABSOLUTEMAX);
+                size_t const neededOutSize = zbd->fParams.windowSize + blockSize;
                zbd->blockSize = blockSize;
                if (zbd->inBuffSize < blockSize) {
                    zbd->customMem.customFree(zbd->customMem.opaque, zbd->inBuff);
@ -202,20 +203,20 @@ size_t ZBUFF_decompressContinue(ZBUFF_DCtx* zbd,
                    zbd->inBuff = (char*)zbd->customMem.customAlloc(zbd->customMem.opaque, blockSize);
                    if (zbd->inBuff == NULL) return ERROR(memory_allocation);
                }
-                {   size_t const neededOutSize = zbd->fParams.windowSize + blockSize;
-                    if (zbd->outBuffSize < neededOutSize) {
-                        zbd->customMem.customFree(zbd->customMem.opaque, zbd->outBuff);
-                        zbd->outBuffSize = neededOutSize;
-                        zbd->outBuff = (char*)zbd->customMem.customAlloc(zbd->customMem.opaque, neededOutSize);
-                        if (zbd->outBuff == NULL) return ERROR(memory_allocation);
-            }   }   }
+                if (zbd->outBuffSize < neededOutSize) {
+                    zbd->customMem.customFree(zbd->customMem.opaque, zbd->outBuff);
+                    zbd->outBuffSize = neededOutSize;
+                    zbd->outBuff = (char*)zbd->customMem.customAlloc(zbd->customMem.opaque, neededOutSize);
+                    if (zbd->outBuff == NULL) return ERROR(memory_allocation);
+            }   }
            zbd->stage = ZBUFFds_read;
+            /* pass-through */

        case ZBUFFds_read:
            {   size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zbd->zd);
                if (neededInSize==0) {  /* end of frame */
                    zbd->stage = ZBUFFds_init;
-                    notDone = 0;
+                    someMoreWork = 0;
                    break;
                }
                if ((size_t)(iend-ip) >= neededInSize) {  /* decode directly from src */
@ -230,8 +231,9 @@ size_t ZBUFF_decompressContinue(ZBUFF_DCtx* zbd,
                    zbd->stage = ZBUFFds_flush;
                    break;
                }
-                if (ip==iend) { notDone = 0; break; }   /* no more input */
+                if (ip==iend) { someMoreWork = 0; break; }   /* no more input */
                zbd->stage = ZBUFFds_load;
+                /* pass-through */
            }

        case ZBUFFds_load:
@ -242,7 +244,7 @@ size_t ZBUFF_decompressContinue(ZBUFF_DCtx* zbd,
                loadedSize = ZBUFF_limitCopy(zbd->inBuff + zbd->inPos, toLoad, ip, iend-ip);
                ip += loadedSize;
                zbd->inPos += loadedSize;
-                if (loadedSize < toLoad) { notDone = 0; break; }   /* not enough input, wait for more */
+                if (loadedSize < toLoad) { someMoreWork = 0; break; }   /* not enough input, wait for more */

                /* decode loaded input */
                {  const int isSkipFrame = ZSTD_isSkipFrame(zbd->zd);
@ -254,7 +256,7 @@ size_t ZBUFF_decompressContinue(ZBUFF_DCtx* zbd,
                    if (!decodedSize && !isSkipFrame) { zbd->stage = ZBUFFds_read; break; }   /* this was just a header */
                    zbd->outEnd = zbd->outStart +  decodedSize;
                    zbd->stage = ZBUFFds_flush;
-                    // break; /* ZBUFFds_flush follows */
+                    /* pass-through */
            }   }

        case ZBUFFds_flush:
@ -262,14 +264,14 @@ size_t ZBUFF_decompressContinue(ZBUFF_DCtx* zbd,
                size_t const flushedSize = ZBUFF_limitCopy(op, oend-op, zbd->outBuff + zbd->outStart, toFlushSize);
                op += flushedSize;
                zbd->outStart += flushedSize;
-                if (flushedSize == toFlushSize) {
+                if (flushedSize == toFlushSize) {  /* flush completed */
                    zbd->stage = ZBUFFds_read;
                    if (zbd->outStart + zbd->blockSize > zbd->outBuffSize)
                        zbd->outStart = zbd->outEnd = 0;
                    break;
                }
                /* cannot flush everything */
-                notDone = 0;
+                someMoreWork = 0;
                break;
            }
        default: return ERROR(GENERIC);   /* impossible */
@ -279,16 +281,17 @@ size_t ZBUFF_decompressContinue(ZBUFF_DCtx* zbd,
    *srcSizePtr = ip-istart;
    *dstCapacityPtr = op-ostart;
    {   size_t nextSrcSizeHint = ZSTD_nextSrcSizeToDecompress(zbd->zd);
-//        if (nextSrcSizeHint > ZSTD_blockHeaderSize) nextSrcSizeHint+= ZSTD_blockHeaderSize;   /* get following block header too */
+        if (!nextSrcSizeHint) return (zbd->outEnd != zbd->outStart);   /* return 0 only if fully flushed too */
+        nextSrcSizeHint += ZSTD_blockHeaderSize * (ZSTD_nextInputType(zbd->zd) == ZSTDnit_block);
+        if (zbd->inPos > nextSrcSizeHint) return ERROR(GENERIC);   /* should never happen */
        nextSrcSizeHint -= zbd->inPos;   /* already loaded*/
        return nextSrcSizeHint;
    }
 }


-
 /* *************************************
 *  Tool functions
 ***************************************/
-size_t ZBUFF_recommendedDInSize(void)  { return ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize /* block header size*/ ; }
-size_t ZBUFF_recommendedDOutSize(void) { return ZSTD_BLOCKSIZE_MAX; }
+size_t ZBUFF_recommendedDInSize(void)  { return ZSTD_BLOCKSIZE_ABSOLUTEMAX + ZSTD_blockHeaderSize /* block header size*/ ; }
+size_t ZBUFF_recommendedDOutSize(void) { return ZSTD_BLOCKSIZE_ABSOLUTEMAX; }
--- a/lib/decompress/zstd_decompress.c
+++ b/lib/decompress/zstd_decompress.c
@ -105,6 +105,7 @@ static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
 ***************************************************************/
 typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader,
               ZSTDds_decodeBlockHeader, ZSTDds_decompressBlock,
+               ZSTDds_decompressLastBlock, ZSTDds_checkChecksum,
               ZSTDds_decodeSkippableHeader, ZSTDds_skipFrame } ZSTD_dStage;

 struct ZSTD_DCtx_s
@ -118,9 +119,9 @@ struct ZSTD_DCtx_s
    const void* vBase;
    const void* dictEnd;
    size_t expected;
-    U32 rep[3];
+    U32 rep[ZSTD_REP_NUM];
    ZSTD_frameParams fParams;
-    blockType_t bType;   /* used in ZSTD_decompressContinue(), to transfer blockType between header decoding and block decoding stages */
+    blockType_e bType;   /* used in ZSTD_decompressContinue(), to transfer blockType between header decoding and block decoding stages */
    ZSTD_dStage stage;
    U32 litEntropy;
    U32 fseEntropy;
@ -131,7 +132,8 @@ struct ZSTD_DCtx_s
    ZSTD_customMem customMem;
    size_t litBufSize;
    size_t litSize;
-    BYTE litBuffer[ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH];
+    size_t rleSize;
+    BYTE litBuffer[ZSTD_BLOCKSIZE_ABSOLUTEMAX + WILDCOPY_OVERLENGTH];
    BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
 };  /* typedef'd to ZSTD_DCtx within "zstd_static.h" */

@ -186,7 +188,7 @@ size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx)
 void ZSTD_copyDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx)
 {
    memcpy(dstDCtx, srcDCtx,
-           sizeof(ZSTD_DCtx) - (ZSTD_BLOCKSIZE_MAX+WILDCOPY_OVERLENGTH + ZSTD_frameHeaderSize_max));  /* no need to copy workspace */
+           sizeof(ZSTD_DCtx) - (ZSTD_BLOCKSIZE_ABSOLUTEMAX+WILDCOPY_OVERLENGTH + ZSTD_frameHeaderSize_max));  /* no need to copy workspace */
 }


@ -194,117 +196,7 @@ void ZSTD_copyDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx)
 *   Decompression section
 ***************************************************************/

-/* Frame format description
-   Frame Header -  [ Block Header - Block ] - Frame End
-   1) Frame Header
-      - 4 bytes - Magic Number : ZSTD_MAGICNUMBER (defined within zstd.h)
-      - 1 byte  - Frame Descriptor
-   2) Block Header
-      - 3 bytes, starting with a 2-bits descriptor
-                 Uncompressed, Compressed, Frame End, unused
-   3) Block
-      See Block Format Description
-   4) Frame End
-      - 3 bytes, compatible with Block Header
-*/
-
-
-/* Frame Header :
-
-   1 byte - FrameHeaderDescription :
-   bit 0-1 : dictID (0, 1, 2 or 4 bytes)
-   bit 2   : checksumFlag
-   bit 3   : reserved (must be zero)
-   bit 4   : reserved (unused, can be any value)
-   bit 5   : Single Segment (if 1, WindowLog byte is not present)
-   bit 6-7 : FrameContentFieldSize (0, 2, 4, or 8)
-             if (SkippedWindowLog && !FrameContentFieldsize) FrameContentFieldsize=1;
-
-   Optional : WindowLog (0 or 1 byte)
-   bit 0-2 : octal Fractional (1/8th)
-   bit 3-7 : Power of 2, with 0 = 1 KB (up to 2 TB)
-
-   Optional : dictID (0, 1, 2 or 4 bytes)
-   Automatic adaptation
-   0 : no dictID
-   1 : 1 - 255
-   2 : 256 - 65535
-   4 : all other values
-
-   Optional : content size (0, 1, 2, 4 or 8 bytes)
-   0 : unknown          (fcfs==0 and swl==0)
-   1 : 0-255 bytes      (fcfs==0 and swl==1)
-   2 : 256 - 65535+256  (fcfs==1)
-   4 : 0 - 4GB-1        (fcfs==2)
-   8 : 0 - 16EB-1       (fcfs==3)
-*/
-
-
-/* Compressed Block, format description
-
-   Block = Literal Section - Sequences Section
-   Prerequisite : size of (compressed) block, maximum size of regenerated data
-
-   1) Literal Section
-
-   1.1) Header : 1-5 bytes
-        flags: 2 bits
-            00 compressed by Huff0
-            01 unused
-            10 is Raw (uncompressed)
-            11 is Rle
-            Note : using 01 => Huff0 with precomputed table ?
-            Note : delta map ? => compressed ?
-
-   1.1.1) Huff0-compressed literal block : 3-5 bytes
-            srcSize < 1 KB => 3 bytes (2-2-10-10) => single stream
-            srcSize < 1 KB => 3 bytes (2-2-10-10)
-            srcSize < 16KB => 4 bytes (2-2-14-14)
-            else           => 5 bytes (2-2-18-18)
-            big endian convention
-
-   1.1.2) Raw (uncompressed) literal block header : 1-3 bytes
-        size :  5 bits: (IS_RAW<<6) + (0<<4) + size
-               12 bits: (IS_RAW<<6) + (2<<4) + (size>>8)
-                        size&255
-               20 bits: (IS_RAW<<6) + (3<<4) + (size>>16)
-                        size>>8&255
-                        size&255
-
-   1.1.3) Rle (repeated single byte) literal block header : 1-3 bytes
-        size :  5 bits: (IS_RLE<<6) + (0<<4) + size
-               12 bits: (IS_RLE<<6) + (2<<4) + (size>>8)
-                        size&255
-               20 bits: (IS_RLE<<6) + (3<<4) + (size>>16)
-                        size>>8&255
-                        size&255
-
-   1.1.4) Huff0-compressed literal block, using precomputed CTables : 3-5 bytes
-            srcSize < 1 KB => 3 bytes (2-2-10-10) => single stream
-            srcSize < 1 KB => 3 bytes (2-2-10-10)
-            srcSize < 16KB => 4 bytes (2-2-14-14)
-            else           => 5 bytes (2-2-18-18)
-            big endian convention
-
-        1- CTable available (stored into workspace ?)
-        2- Small input (fast heuristic ? Full comparison ? depend on clevel ?)
-
-
-   1.2) Literal block content
-
-   1.2.1) Huff0 block, using sizes from header
-        See Huff0 format
-
-   1.2.2) Huff0 block, using prepared table
-
-   1.2.3) Raw content
-
-   1.2.4) single byte
-
-
-   2) Sequences section
-      TO DO
-*/
+/* See compression format details in : zstd_compression_format.md */

 /** ZSTD_frameHeaderSize() :
 *   srcSize must be >= ZSTD_frameHeaderSize_min.
@ -314,10 +206,10 @@ static size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize)
    if (srcSize < ZSTD_frameHeaderSize_min) return ERROR(srcSize_wrong);
    {   BYTE const fhd = ((const BYTE*)src)[4];
        U32 const dictID= fhd & 3;
-        U32 const directMode = (fhd >> 5) & 1;
+        U32 const singleSegment = (fhd >> 5) & 1;
        U32 const fcsId = fhd >> 6;
-        return ZSTD_frameHeaderSize_min + !directMode + ZSTD_did_fieldSize[dictID] + ZSTD_fcs_fieldSize[fcsId]
-                + (directMode && !ZSTD_fcs_fieldSize[fcsId]);
+        return ZSTD_frameHeaderSize_min + !singleSegment + ZSTD_did_fieldSize[dictID] + ZSTD_fcs_fieldSize[fcsId]
+                + (singleSegment && !ZSTD_fcs_fieldSize[fcsId]);
    }
 }

@ -351,14 +243,14 @@ size_t ZSTD_getFrameParams(ZSTD_frameParams* fparamsPtr, const void* src, size_t
        size_t pos = 5;
        U32 const dictIDSizeCode = fhdByte&3;
        U32 const checksumFlag = (fhdByte>>2)&1;
-        U32 const directMode = (fhdByte>>5)&1;
+        U32 const singleSegment = (fhdByte>>5)&1;
        U32 const fcsID = fhdByte>>6;
        U32 const windowSizeMax = 1U << ZSTD_WINDOWLOG_MAX;
        U32 windowSize = 0;
        U32 dictID = 0;
        U64 frameContentSize = 0;
        if ((fhdByte & 0x08) != 0) return ERROR(frameParameter_unsupported);   /* reserved bits, which must be zero */
-        if (!directMode) {
+        if (!singleSegment) {
            BYTE const wlByte = ip[pos++];
            U32 const windowLog = (wlByte >> 3) + ZSTD_WINDOWLOG_ABSOLUTEMIN;
            if (windowLog > ZSTD_WINDOWLOG_MAX) return ERROR(frameParameter_unsupported);
@ -377,7 +269,7 @@ size_t ZSTD_getFrameParams(ZSTD_frameParams* fparamsPtr, const void* src, size_t
        switch(fcsID)
        {
            default:   /* impossible */
-            case 0 : if (directMode) frameContentSize = ip[pos]; break;
+            case 0 : if (singleSegment) frameContentSize = ip[pos]; break;
            case 1 : frameContentSize = MEM_readLE16(ip+pos)+256; break;
            case 2 : frameContentSize = MEM_readLE32(ip+pos); break;
            case 3 : frameContentSize = MEM_readLE64(ip+pos); break;
@ -397,9 +289,9 @@ size_t ZSTD_getFrameParams(ZSTD_frameParams* fparamsPtr, const void* src, size_t
 *   compatible with legacy mode
 *   @return : decompressed size if known, 0 otherwise
              note : 0 can mean any of the following :
-                   - decompressed size is not provided within frame header
+                   - decompressed size is not present within frame header
                   - frame header unknown / not supported
-                   - frame header not completely provided (`srcSize` too small) */
+                   - frame header not complete (`srcSize` too small) */
 unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize)
 {
 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1)
@ -427,7 +319,8 @@ static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t sr

 typedef struct
 {
-    blockType_t blockType;
+    blockType_e blockType;
+    U32 lastBlock;
    U32 origSize;
 } blockProperties_t;

@ -435,18 +328,16 @@ typedef struct
 *   Provides the size of compressed block from block header `src` */
 size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr)
 {
-    const BYTE* const in = (const BYTE* const)src;
-    U32 cSize;
-
    if (srcSize < ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
-
-    bpPtr->blockType = (blockType_t)((*in) >> 6);
-    cSize = in[2] + (in[1]<<8) + ((in[0] & 7)<<16);
-    bpPtr->origSize = (bpPtr->blockType == bt_rle) ? cSize : 0;
-
-    if (bpPtr->blockType == bt_end) return 0;
-    if (bpPtr->blockType == bt_rle) return 1;
-    return cSize;
+    {   U32 const cBlockHeader = MEM_readLE24(src);
+        U32 const cSize = cBlockHeader >> 3;
+        bpPtr->lastBlock = cBlockHeader & 1;
+        bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3);
+        bpPtr->origSize = cSize;   /* only useful for RLE */
+        if (bpPtr->blockType == bt_rle) return 1;
+        if (bpPtr->blockType == bt_reserved) return ERROR(corruption_detected);
+        return cSize;
+    }
 }


@ -458,136 +349,143 @@ static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity, const void* src,
 }


+static size_t ZSTD_setRleBlock(void* dst, size_t dstCapacity, const void* src, size_t srcSize, size_t regenSize)
+{
+    if (srcSize != 1) return ERROR(srcSize_wrong);
+    if (regenSize > dstCapacity) return ERROR(dstSize_tooSmall);
+    memset(dst, *(const BYTE*)src, regenSize);
+    return regenSize;
+}
+
 /*! ZSTD_decodeLiteralsBlock() :
    @return : nb of bytes read from src (< srcSize ) */
 size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
                          const void* src, size_t srcSize)   /* note : srcSize < BLOCKSIZE */
 {
-    const BYTE* const istart = (const BYTE*) src;
-
    if (srcSize < MIN_CBLOCK_SIZE) return ERROR(corruption_detected);

-    switch((litBlockType_t)(istart[0]>> 6))
-    {
-    case lbt_huffman:
-        {   size_t litSize, litCSize, singleStream=0;
-            U32 lhSize = (istart[0] >> 4) & 3;
-            if (srcSize < 5) return ERROR(corruption_detected);   /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for lhSize, + cSize (+nbSeq) */
-            switch(lhSize)
-            {
-            case 0: case 1: default:   /* note : default is impossible, since lhSize into [0..3] */
-                /* 2 - 2 - 10 - 10 */
-                lhSize=3;
-                singleStream = istart[0] & 16;
-                litSize  = ((istart[0] & 15) << 6) + (istart[1] >> 2);
-                litCSize = ((istart[1] &  3) << 8) + istart[2];
-                break;
-            case 2:
-                /* 2 - 2 - 14 - 14 */
-                lhSize=4;
-                litSize  = ((istart[0] & 15) << 10) + (istart[1] << 2) + (istart[2] >> 6);
-                litCSize = ((istart[2] & 63) <<  8) + istart[3];
-                break;
-            case 3:
-                /* 2 - 2 - 18 - 18 */
-                lhSize=5;
-                litSize  = ((istart[0] & 15) << 14) + (istart[1] << 6) + (istart[2] >> 2);
-                litCSize = ((istart[2] &  3) << 16) + (istart[3] << 8) + istart[4];
-                break;
-            }
-            if (litSize > ZSTD_BLOCKSIZE_MAX) return ERROR(corruption_detected);
-            if (litCSize + lhSize > srcSize) return ERROR(corruption_detected);
+    {   const BYTE* const istart = (const BYTE*) src;
+        symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3);

-            if (HUF_isError(singleStream ?
-                            HUF_decompress1X2_DCtx(dctx->hufTable, dctx->litBuffer, litSize, istart+lhSize, litCSize) :
-                            HUF_decompress4X_hufOnly (dctx->hufTable, dctx->litBuffer, litSize, istart+lhSize, litCSize) ))
-                return ERROR(corruption_detected);
+        switch(litEncType)
+        {
+        case set_repeat:
+            if (dctx->litEntropy==0) return ERROR(dictionary_corrupted);
+            /* fall-through */
+        case set_compressed:
+            if (srcSize < 5) return ERROR(corruption_detected);   /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3 */
+            {   size_t lhSize, litSize, litCSize;
+                U32 singleStream=0;
+                U32 const lhlCode = (istart[0] >> 2) & 3;
+                U32 const lhc = MEM_readLE32(istart);
+                switch(lhlCode)
+                {
+                case 0: case 1: default:   /* note : default is impossible, since lhlCode into [0..3] */
+                    /* 2 - 2 - 10 - 10 */
+                    {   singleStream = !lhlCode;
+                        lhSize = 3;
+                        litSize  = (lhc >> 4) & 0x3FF;
+                        litCSize = (lhc >> 14) & 0x3FF;
+                        break;
+                    }
+                case 2:
+                    /* 2 - 2 - 14 - 14 */
+                    {   lhSize = 4;
+                        litSize  = (lhc >> 4) & 0x3FFF;
+                        litCSize = lhc >> 18;
+                        break;
+                    }
+                case 3:
+                    /* 2 - 2 - 18 - 18 */
+                    {   lhSize = 5;
+                        litSize  = (lhc >> 4) & 0x3FFFF;
+                        litCSize = (lhc >> 22) + (istart[4] << 10);
+                        break;
+                    }
+                }
+                if (litSize > ZSTD_BLOCKSIZE_ABSOLUTEMAX) return ERROR(corruption_detected);
+                if (litCSize + lhSize > srcSize) return ERROR(corruption_detected);

-            dctx->litPtr = dctx->litBuffer;
-            dctx->litBufSize = ZSTD_BLOCKSIZE_MAX+8;
-            dctx->litSize = litSize;
-            dctx->litEntropy = 1;
-            return litCSize + lhSize;
-        }
-    case lbt_repeat:
-        {   size_t litSize, litCSize;
-            U32 lhSize = ((istart[0]) >> 4) & 3;
-            if (lhSize != 1)  /* only case supported for now : small litSize, single stream */
-                return ERROR(corruption_detected);
-            if (dctx->litEntropy==0)
-                return ERROR(dictionary_corrupted);
+                if (HUF_isError((litEncType==set_repeat) ?
+                                    ( singleStream ?
+                                        HUF_decompress1X_usingDTable(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->hufTable) :
+                                        HUF_decompress4X_usingDTable(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->hufTable) ) :
+                                    ( singleStream ?
+                                        HUF_decompress1X2_DCtx(dctx->hufTable, dctx->litBuffer, litSize, istart+lhSize, litCSize) :
+                                        HUF_decompress4X_hufOnly (dctx->hufTable, dctx->litBuffer, litSize, istart+lhSize, litCSize)) ))
+                    return ERROR(corruption_detected);

-            /* 2 - 2 - 10 - 10 */
-            lhSize=3;
-            litSize  = ((istart[0] & 15) << 6) + (istart[1] >> 2);
-            litCSize = ((istart[1] &  3) << 8) + istart[2];
-            if (litCSize + lhSize > srcSize) return ERROR(corruption_detected);
-
-            {   size_t const errorCode = HUF_decompress1X4_usingDTable(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->hufTable);
-                if (HUF_isError(errorCode)) return ERROR(corruption_detected);
-            }
-            dctx->litPtr = dctx->litBuffer;
-            dctx->litBufSize = ZSTD_BLOCKSIZE_MAX+WILDCOPY_OVERLENGTH;
-            dctx->litSize = litSize;
-            return litCSize + lhSize;
-        }
-    case lbt_raw:
-        {   size_t litSize;
-            U32 lhSize = ((istart[0]) >> 4) & 3;
-            switch(lhSize)
-            {
-            case 0: case 1: default:   /* note : default is impossible, since lhSize into [0..3] */
-                lhSize=1;
-                litSize = istart[0] & 31;
-                break;
-            case 2:
-                litSize = ((istart[0] & 15) << 8) + istart[1];
-                break;
-            case 3:
-                litSize = ((istart[0] & 15) << 16) + (istart[1] << 8) + istart[2];
-                break;
-            }
-
-            if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) {  /* risk reading beyond src buffer with wildcopy */
-                if (litSize+lhSize > srcSize) return ERROR(corruption_detected);
-                memcpy(dctx->litBuffer, istart+lhSize, litSize);
                dctx->litPtr = dctx->litBuffer;
-                dctx->litBufSize = ZSTD_BLOCKSIZE_MAX+8;
+                dctx->litBufSize = ZSTD_BLOCKSIZE_ABSOLUTEMAX+WILDCOPY_OVERLENGTH;
+                dctx->litSize = litSize;
+                dctx->litEntropy = 1;
+                return litCSize + lhSize;
+            }
+
+        case set_basic:
+            {   size_t litSize, lhSize;
+                U32 const lhlCode = ((istart[0]) >> 2) & 3;
+                switch(lhlCode)
+                {
+                case 0: case 2: default:   /* note : default is impossible, since lhlCode into [0..3] */
+                    lhSize = 1;
+                    litSize = istart[0] >> 3;
+                    break;
+                case 1:
+                    lhSize = 2;
+                    litSize = MEM_readLE16(istart) >> 4;
+                    break;
+                case 3:
+                    lhSize = 3;
+                    litSize = MEM_readLE24(istart) >> 4;
+                    break;
+                }
+
+                if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) {  /* risk reading beyond src buffer with wildcopy */
+                    if (litSize+lhSize > srcSize) return ERROR(corruption_detected);
+                    memcpy(dctx->litBuffer, istart+lhSize, litSize);
+                    dctx->litPtr = dctx->litBuffer;
+                    dctx->litBufSize = ZSTD_BLOCKSIZE_ABSOLUTEMAX+8;
+                    dctx->litSize = litSize;
+                    return lhSize+litSize;
+                }
+                /* direct reference into compressed stream */
+                dctx->litPtr = istart+lhSize;
+                dctx->litBufSize = srcSize-lhSize;
                dctx->litSize = litSize;
                return lhSize+litSize;
            }
-            /* direct reference into compressed stream */
-            dctx->litPtr = istart+lhSize;
-            dctx->litBufSize = srcSize-lhSize;
-            dctx->litSize = litSize;
-            return lhSize+litSize;
-        }
-    case lbt_rle:
-        {   size_t litSize;
-            U32 lhSize = ((istart[0]) >> 4) & 3;
-            switch(lhSize)
-            {
-            case 0: case 1: default:   /* note : default is impossible, since lhSize into [0..3] */
-                lhSize = 1;
-                litSize = istart[0] & 31;
-                break;
-            case 2:
-                litSize = ((istart[0] & 15) << 8) + istart[1];
-                break;
-            case 3:
-                litSize = ((istart[0] & 15) << 16) + (istart[1] << 8) + istart[2];
-                if (srcSize<4) return ERROR(corruption_detected);   /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4 */
-                break;
+
+        case set_rle:
+            {   U32 const lhlCode = ((istart[0]) >> 2) & 3;
+                size_t litSize, lhSize;
+                switch(lhlCode)
+                {
+                case 0: case 2: default:   /* note : default is impossible, since lhlCode into [0..3] */
+                    lhSize = 1;
+                    litSize = istart[0] >> 3;
+                    break;
+                case 1:
+                    lhSize = 2;
+                    litSize = MEM_readLE16(istart) >> 4;
+                    break;
+                case 3:
+                    lhSize = 3;
+                    litSize = MEM_readLE24(istart) >> 4;
+                    if (srcSize<4) return ERROR(corruption_detected);   /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4 */
+                    break;
+                }
+                if (litSize > ZSTD_BLOCKSIZE_ABSOLUTEMAX) return ERROR(corruption_detected);
+                memset(dctx->litBuffer, istart[lhSize], litSize);
+                dctx->litPtr = dctx->litBuffer;
+                dctx->litBufSize = ZSTD_BLOCKSIZE_ABSOLUTEMAX+WILDCOPY_OVERLENGTH;
+                dctx->litSize = litSize;
+                return lhSize+1;
            }
-            if (litSize > ZSTD_BLOCKSIZE_MAX) return ERROR(corruption_detected);
-            memset(dctx->litBuffer, istart[lhSize], litSize);
-            dctx->litPtr = dctx->litBuffer;
-            dctx->litBufSize = ZSTD_BLOCKSIZE_MAX+WILDCOPY_OVERLENGTH;
-            dctx->litSize = litSize;
-            return lhSize+1;
+        default:
+            return ERROR(corruption_detected);   /* impossible */
        }
-    default:
-        return ERROR(corruption_detected);   /* impossible */
+
    }
 }

@ -596,25 +494,25 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
    @return : nb bytes read from src,
              or an error code if it fails, testable with ZSTD_isError()
 */
-FORCE_INLINE size_t ZSTD_buildSeqTable(FSE_DTable* DTable, U32 type, U32 max, U32 maxLog,
+FORCE_INLINE size_t ZSTD_buildSeqTable(FSE_DTable* DTable, symbolEncodingType_e type, U32 max, U32 maxLog,
                                 const void* src, size_t srcSize,
                                 const S16* defaultNorm, U32 defaultLog, U32 flagRepeatTable)
 {
    switch(type)
    {
-    case FSE_ENCODING_RLE :
+    case set_rle :
        if (!srcSize) return ERROR(srcSize_wrong);
        if ( (*(const BYTE*)src) > max) return ERROR(corruption_detected);
        FSE_buildDTable_rle(DTable, *(const BYTE*)src);   /* if *src > max, data is corrupted */
        return 1;
-    case FSE_ENCODING_RAW :
+    case set_basic :
        FSE_buildDTable(DTable, defaultNorm, max, defaultLog);
        return 0;
-    case FSE_ENCODING_STATIC:
+    case set_repeat:
        if (!flagRepeatTable) return ERROR(corruption_detected);
        return 0;
    default :   /* impossible */
-    case FSE_ENCODING_DYNAMIC :
+    case set_compressed :
        {   U32 tableLog;
            S16 norm[MaxSeq+1];
            size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize);
@ -650,14 +548,12 @@ size_t ZSTD_decodeSeqHeaders(int* nbSeqPtr,
    }

    /* FSE table descriptors */
-    {   U32 const LLtype  = *ip >> 6;
-        U32 const OFtype = (*ip >> 4) & 3;
-        U32 const MLtype  = (*ip >> 2) & 3;
+    if (ip+4 > iend) return ERROR(srcSize_wrong); /* minimum possible size */
+    {   symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6);
+        symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3);
+        symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3);
        ip++;

-        /* check */
-        if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */
-
        /* Build DTables */
        {   size_t const llhSize = ZSTD_buildSeqTable(DTableLL, LLtype, MaxLL, LLFSELog, ip, iend-ip, LL_defaultNorm, LL_defaultNormLog, flagRepeatTable);
            if (ZSTD_isError(llhSize)) return ERROR(corruption_detected);
@ -687,7 +583,7 @@ typedef struct {
    FSE_DState_t stateLL;
    FSE_DState_t stateOffb;
    FSE_DState_t stateML;
-    size_t prevOffset[ZSTD_REP_INIT];
+    size_t prevOffset[ZSTD_REP_NUM];
 } seqState_t;


@ -731,9 +627,9 @@ static seq_t ZSTD_decodeSequence(seqState_t* seqState)
        }

        if (ofCode <= 1) {
-            if ((llCode == 0) & (offset <= 1)) offset = 1-offset;
+            offset += (llCode==0);
            if (offset) {
-                size_t const temp = seqState->prevOffset[offset];
+                size_t const temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
                if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
                seqState->prevOffset[1] = seqState->prevOffset[0];
                seqState->prevOffset[0] = offset = temp;
@ -774,7 +670,7 @@ size_t ZSTD_execSequence(BYTE* op,
    BYTE* const oLitEnd = op + sequence.litLength;
    size_t const sequenceLength = sequence.litLength + sequence.matchLength;
    BYTE* const oMatchEnd = op + sequenceLength;   /* risk : address space overflow (32-bits) */
-    BYTE* const oend_w = oend-WILDCOPY_OVERLENGTH;
+    BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
    const BYTE* const iLitEnd = *litPtr + sequence.litLength;
    const BYTE* match = oLitEnd - sequence.offset;

@ -867,7 +763,7 @@ static size_t ZSTD_decompressSequences(
    if (nbSeq) {
        seqState_t seqState;
        dctx->fseEntropy = 1;
-        { U32 i; for (i=0; i<ZSTD_REP_INIT; i++) seqState.prevOffset[i] = dctx->rep[i]; }
+        { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->rep[i]; }
        { size_t const errorCode = BIT_initDStream(&(seqState.DStream), ip, iend-ip);
          if (ERR_isError(errorCode)) return ERROR(corruption_detected); }
        FSE_initDState(&(seqState.stateLL), &(seqState.DStream), DTableLL);
@ -885,12 +781,11 @@ static size_t ZSTD_decompressSequences(
        /* check if reached exact end */
        if (nbSeq) return ERROR(corruption_detected);
        /* save reps for next block */
-        { U32 i; for (i=0; i<ZSTD_REP_INIT; i++) dctx->rep[i] = (U32)(seqState.prevOffset[i]); }
+        { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->rep[i] = (U32)(seqState.prevOffset[i]); }
    }

    /* last literal segment */
    {   size_t const lastLLSize = litEnd - litPtr;
-        //if (litPtr > litEnd) return ERROR(corruption_detected);   /* too many literals already used */
        if (lastLLSize > (size_t)(oend-op)) return ERROR(dstSize_tooSmall);
        memcpy(op, litPtr, lastLLSize);
        op += lastLLSize;
@ -917,7 +812,7 @@ static size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
 {   /* blockType == blockCompressed */
    const BYTE* ip = (const BYTE*)src;

-    if (srcSize >= ZSTD_BLOCKSIZE_MAX) return ERROR(srcSize_wrong);
+    if (srcSize >= ZSTD_BLOCKSIZE_ABSOLUTEMAX) return ERROR(srcSize_wrong);

    /* Decode literals sub-block */
    {   size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
@ -966,7 +861,6 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
                                 const void* src, size_t srcSize)
 {
    const BYTE* ip = (const BYTE*)src;
-    const BYTE* const iend = ip + srcSize;
    BYTE* const ostart = (BYTE* const)dst;
    BYTE* const oend = ostart + dstCapacity;
    BYTE* op = ostart;
@ -977,9 +871,11 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,

    /* Frame Header */
    {   size_t const frameHeaderSize = ZSTD_frameHeaderSize(src, ZSTD_frameHeaderSize_min);
+        size_t result;
        if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize;
        if (srcSize < frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
-        if (ZSTD_decodeFrameHeader(dctx, src, frameHeaderSize)) return ERROR(corruption_detected);
+        result = ZSTD_decodeFrameHeader(dctx, src, frameHeaderSize);
+        if (ZSTD_isError(result)) return result;
        ip += frameHeaderSize; remainingSize -= frameHeaderSize;
    }

@ -987,7 +883,7 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
    while (1) {
        size_t decodedSize;
        blockProperties_t blockProperties;
-        size_t const cBlockSize = ZSTD_getcBlockSize(ip, iend-ip, &blockProperties);
+        size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties);
        if (ZSTD_isError(cBlockSize)) return cBlockSize;

        ip += ZSTD_blockHeaderSize;
@ -1005,23 +901,29 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
        case bt_rle :
            decodedSize = ZSTD_generateNxBytes(op, oend-op, *ip, blockProperties.origSize);
            break;
-        case bt_end :
-            /* end of frame */
-            if (remainingSize) return ERROR(srcSize_wrong);
-            decodedSize = 0;
-            break;
+        case bt_reserved :
        default:
-            return ERROR(GENERIC);   /* impossible */
+            return ERROR(corruption_detected);
        }
-        if (cBlockSize == 0) break;   /* bt_end */

        if (ZSTD_isError(decodedSize)) return decodedSize;
        if (dctx->fParams.checksumFlag) XXH64_update(&dctx->xxhState, op, decodedSize);
        op += decodedSize;
        ip += cBlockSize;
        remainingSize -= cBlockSize;
+        if (blockProperties.lastBlock) break;
    }

+    if (dctx->fParams.checksumFlag) {   /* Frame content checksum verification */
+        U32 const checkCalc = (U32)XXH64_digest(&dctx->xxhState);
+        U32 checkRead;
+        if (remainingSize<4) return ERROR(checksum_wrong);
+        checkRead = MEM_readLE32(ip);
+        if (checkRead != checkCalc) return ERROR(checksum_wrong);
+        remainingSize -= 4;
+    }
+
+    if (remainingSize) return ERROR(srcSize_wrong);
    return op-ostart;
 }

@ -1077,18 +979,33 @@ size_t ZSTD_decompress(void* dst, size_t dstCapacity, const void* src, size_t sr
 }


-/*_******************************
-*  Streaming Decompression API
-********************************/
-size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx)
-{
-    return dctx->expected;
+/*-**********************************
+*   Streaming Decompression API
+************************************/
+size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx) { return dctx->expected; }
+
+ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx) {
+    switch(dctx->stage)
+    {
+    default:   /* should not happen */
+    case ZSTDds_getFrameHeaderSize:
+    case ZSTDds_decodeFrameHeader:
+        return ZSTDnit_frameHeader;
+    case ZSTDds_decodeBlockHeader:
+        return ZSTDnit_blockHeader;
+    case ZSTDds_decompressBlock:
+        return ZSTDnit_block;
+    case ZSTDds_decompressLastBlock:
+        return ZSTDnit_lastBlock;
+    case ZSTDds_checkChecksum:
+        return ZSTDnit_checksum;
+    case ZSTDds_decodeSkippableHeader:
+    case ZSTDds_skipFrame:
+        return ZSTDnit_skippableFrame;
+    }
 }

-int ZSTD_isSkipFrame(ZSTD_DCtx* dctx)
-{
-    return dctx->stage == ZSTDds_skipFrame;
-}
+int ZSTD_isSkipFrame(ZSTD_DCtx* dctx) { return dctx->stage == ZSTDds_skipFrame; }   /* for zbuff */

 /** ZSTD_decompressContinue() :
 *   @return : nb of bytes generated into `dst` (necessarily <= `dstCapacity)
@ -1132,23 +1049,29 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
        {   blockProperties_t bp;
            size_t const cBlockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp);
            if (ZSTD_isError(cBlockSize)) return cBlockSize;
-            if (bp.blockType == bt_end) {
+            dctx->expected = cBlockSize;
+            dctx->bType = bp.blockType;
+            dctx->rleSize = bp.origSize;
+            if (cBlockSize) {
+                dctx->stage = bp.lastBlock ? ZSTDds_decompressLastBlock : ZSTDds_decompressBlock;
+                return 0;
+            }
+            /* empty block */
+            if (bp.lastBlock) {
                if (dctx->fParams.checksumFlag) {
-                    U64 const h64 = XXH64_digest(&dctx->xxhState);
-                    U32 const h32 = (U32)(h64>>11) & ((1<<22)-1);
-                    const BYTE* const ip = (const BYTE*)src;
-                    U32 const check32 = ip[2] + (ip[1] << 8) + ((ip[0] & 0x3F) << 16);
-                    if (check32 != h32) return ERROR(checksum_wrong);
+                    dctx->expected = 4;
+                    dctx->stage = ZSTDds_checkChecksum;
+                } else {
+                    dctx->expected = 0; /* end of frame */
+                    dctx->stage = ZSTDds_getFrameHeaderSize;
                }
-                dctx->expected = 0;
-                dctx->stage = ZSTDds_getFrameHeaderSize;
            } else {
-                dctx->expected = cBlockSize;
-                dctx->bType = bp.blockType;
-                dctx->stage = ZSTDds_decompressBlock;
+                dctx->expected = 3;  /* go directly to next header */
+                dctx->stage = ZSTDds_decodeBlockHeader;
            }
            return 0;
        }
+    case ZSTDds_decompressLastBlock:
    case ZSTDds_decompressBlock:
        {   size_t rSize;
            switch(dctx->bType)
@ -1160,21 +1083,38 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
                rSize = ZSTD_copyRawBlock(dst, dstCapacity, src, srcSize);
                break;
            case bt_rle :
-                return ERROR(GENERIC);   /* not yet handled */
-                break;
-            case bt_end :   /* should never happen (filtered at phase 1) */
-                rSize = 0;
+                rSize = ZSTD_setRleBlock(dst, dstCapacity, src, srcSize, dctx->rleSize);
                break;
+            case bt_reserved :   /* should never happen */
            default:
-                return ERROR(GENERIC);   /* impossible */
+                return ERROR(corruption_detected);
            }
-            dctx->stage = ZSTDds_decodeBlockHeader;
-            dctx->expected = ZSTD_blockHeaderSize;
-            dctx->previousDstEnd = (char*)dst + rSize;
            if (ZSTD_isError(rSize)) return rSize;
            if (dctx->fParams.checksumFlag) XXH64_update(&dctx->xxhState, dst, rSize);
+
+            if (dctx->stage == ZSTDds_decompressLastBlock) {   /* end of frame */
+                if (dctx->fParams.checksumFlag) {  /* another round for frame checksum */
+                    dctx->expected = 4;
+                    dctx->stage = ZSTDds_checkChecksum;
+                } else {
+                    dctx->expected = 0;   /* ends here */
+                    dctx->stage = ZSTDds_getFrameHeaderSize;
+                }
+            } else {
+                dctx->stage = ZSTDds_decodeBlockHeader;
+                dctx->expected = ZSTD_blockHeaderSize;
+                dctx->previousDstEnd = (char*)dst + rSize;
+            }
            return rSize;
        }
+    case ZSTDds_checkChecksum:
+        {   U32 const h32 = (U32)XXH64_digest(&dctx->xxhState);
+            U32 const check32 = MEM_readLE32(src);   /* srcSize == 4, guaranteed by dctx->expected */
+            if (check32 != h32) return ERROR(checksum_wrong);
+            dctx->expected = 0;
+            dctx->stage = ZSTDds_getFrameHeaderSize;
+            return 0;
+        }
    case ZSTDds_decodeSkippableHeader:
        {   memcpy(dctx->headerBuffer + ZSTD_frameHeaderSize_min, src, dctx->expected);
            dctx->expected = MEM_readLE32(dctx->headerBuffer + 4);
--- a/lib/dictBuilder/zdict.c
+++ b/lib/dictBuilder/zdict.c
@ -85,7 +85,7 @@
 #define PRIME2   2246822519U

 #define MINRATIO 4
-static const U32 g_compressionLevel_default = 5;
+static const int g_compressionLevel_default = 5;
 static const U32 g_selectivity_default = 9;
 static const size_t g_provision_entropySize = 200;
 static const size_t g_min_fast_dictContent = 192;
@ -489,14 +489,13 @@ static U32 ZDICT_dictSize(const dictItem* dictList)
 static size_t ZDICT_trainBuffer(dictItem* dictList, U32 dictListSize,
                            const void* const buffer, size_t bufferSize,   /* buffer must end with noisy guard band */
                            const size_t* fileSizes, unsigned nbFiles,
-                            U32 shiftRatio, unsigned maxDictSize)
+                            U32 minRatio)
 {
    int* const suffix0 = (int*)malloc((bufferSize+2)*sizeof(*suffix0));
    int* const suffix = suffix0+1;
    U32* reverseSuffix = (U32*)malloc((bufferSize)*sizeof(*reverseSuffix));
    BYTE* doneMarks = (BYTE*)malloc((bufferSize+16)*sizeof(*doneMarks));   /* +16 for overflow security */
    U32* filePos = (U32*)malloc(nbFiles * sizeof(*filePos));
-    U32 minRatio = nbFiles >> shiftRatio;
    size_t result = 0;

    /* init */
@ -542,16 +541,6 @@ static size_t ZDICT_trainBuffer(dictItem* dictList, U32 dictListSize,
            DISPLAYUPDATE(2, "\r%4.2f %% \r", (double)cursor / bufferSize * 100);
    }   }

-    /* limit dictionary size */
-    {   U32 const max = dictList->pos;   /* convention : nb of useful elts within dictList */
-        U32 currentSize = 0;
-        U32 n; for (n=1; n<max; n++) {
-            currentSize += dictList[n].length;
-            if (currentSize > maxDictSize) break;
-        }
-        dictList->pos = n;
-    }
-
 _cleanup:
    free(suffix0);
    free(reverseSuffix);
@ -576,7 +565,7 @@ typedef struct
 {
    ZSTD_CCtx* ref;
    ZSTD_CCtx* zc;
-    void* workPlace;   /* must be ZSTD_BLOCKSIZE_MAX allocated */
+    void* workPlace;   /* must be ZSTD_BLOCKSIZE_ABSOLUTEMAX allocated */
 } EStats_ress_t;

 #define MAXREPOFFSET 1024
@ -585,14 +574,14 @@ static void ZDICT_countEStats(EStats_ress_t esr, ZSTD_parameters params,
                            U32* countLit, U32* offsetcodeCount, U32* matchlengthCount, U32* litlengthCount, U32* repOffsets,
                            const void* src, size_t srcSize)
 {
-    size_t const blockSizeMax = MIN (ZSTD_BLOCKSIZE_MAX, 1 << params.cParams.windowLog);
+    size_t const blockSizeMax = MIN (ZSTD_BLOCKSIZE_ABSOLUTEMAX, 1 << params.cParams.windowLog);
    size_t cSize;

    if (srcSize > blockSizeMax) srcSize = blockSizeMax;   /* protection vs large samples */
 	{	size_t const errorCode = ZSTD_copyCCtx(esr.zc, esr.ref);
 		if (ZSTD_isError(errorCode)) { DISPLAYLEVEL(1, "warning : ZSTD_copyCCtx failed \n"); return; }
 	}
-    cSize = ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_MAX, src, srcSize);
+    cSize = ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_ABSOLUTEMAX, src, srcSize);
    if (ZSTD_isError(cSize)) { DISPLAYLEVEL(1, "warning : could not compress sample size %u \n", (U32)srcSize); return; }

    if (cSize) {  /* if == 0; block is not compressible */
@ -605,28 +594,28 @@ static void ZDICT_countEStats(EStats_ress_t esr, ZSTD_parameters params,
        }

        /* seqStats */
-        {   size_t const nbSeq = (size_t)(seqStorePtr->offset - seqStorePtr->offsetStart);
-            ZSTD_seqToCodes(seqStorePtr, nbSeq);
+        {   U32 const nbSeq = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
+            ZSTD_seqToCodes(seqStorePtr);

-            {   const BYTE* codePtr = seqStorePtr->offCodeStart;
-                size_t u;
+            {   const BYTE* codePtr = seqStorePtr->ofCode;
+                U32 u;
                for (u=0; u<nbSeq; u++) offsetcodeCount[codePtr[u]]++;
            }

-            {   const BYTE* codePtr = seqStorePtr->mlCodeStart;
-                size_t u;
+            {   const BYTE* codePtr = seqStorePtr->mlCode;
+                U32 u;
                for (u=0; u<nbSeq; u++) matchlengthCount[codePtr[u]]++;
            }

-            {   const BYTE* codePtr = seqStorePtr->llCodeStart;
-                size_t u;
+            {   const BYTE* codePtr = seqStorePtr->llCode;
+                U32 u;
                for (u=0; u<nbSeq; u++) litlengthCount[codePtr[u]]++;
        }   }

        /* rep offsets */
-        {   const U32* const offsetPtr = seqStorePtr->offsetStart;
-            U32 offset1 = offsetPtr[0] - 3;
-            U32 offset2 = offsetPtr[1] - 3;
+        {   const seqDef* const seq = seqStorePtr->sequences;
+            U32 offset1 = seq[0].offset - 3;
+            U32 offset2 = seq[1].offset - 3;
            if (offset1 >= MAXREPOFFSET) offset1 = 0;
            if (offset2 >= MAXREPOFFSET) offset2 = 0;
            repOffsets[offset1] += 3;
@ -671,7 +660,7 @@ static void ZDICT_insertSortCount(offsetCount_t table[ZSTD_REP_NUM+1], U32 val,
 }


-#define OFFCODE_MAX 18  /* only applicable to first block */
+#define OFFCODE_MAX 30  /* only applicable to first block */
 static size_t ZDICT_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,
                                 unsigned compressionLevel,
                           const void*  srcBuffer, const size_t* fileSizes, unsigned nbFiles,
@ -681,6 +670,7 @@ static size_t ZDICT_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,
    HUF_CREATE_STATIC_CTABLE(hufTable, 255);
    U32 offcodeCount[OFFCODE_MAX+1];
    short offcodeNCount[OFFCODE_MAX+1];
+    U32 offcodeMax = ZSTD_highbit32((U32)(dictBufferSize + 128 KB));
    U32 matchLengthCount[MaxML+1];
    short matchLengthNCount[MaxML+1];
    U32 litLengthCount[MaxLL+1];
@ -689,7 +679,7 @@ static size_t ZDICT_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,
    offsetCount_t bestRepOffset[ZSTD_REP_NUM+1];
    EStats_ress_t esr;
    ZSTD_parameters params;
-    U32 u, huffLog = 12, Offlog = OffFSELog, mlLog = MLFSELog, llLog = LLFSELog, total;
+    U32 u, huffLog = 11, Offlog = OffFSELog, mlLog = MLFSELog, llLog = LLFSELog, total;
    size_t pos = 0, errorCode;
    size_t eSize = 0;
    size_t const totalSrcSize = ZDICT_totalSampleSize(fileSizes, nbFiles);
@ -697,15 +687,16 @@ static size_t ZDICT_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,
    BYTE* dstPtr = (BYTE*)dstBuffer;

    /* init */
+    if (offcodeMax>OFFCODE_MAX) { eSize = ERROR(dictionary_wrong); goto _cleanup; }   /* too large dictionary */
    for (u=0; u<256; u++) countLit[u]=1;   /* any character must be described */
-    for (u=0; u<=OFFCODE_MAX; u++) offcodeCount[u]=1;
+    for (u=0; u<=offcodeMax; u++) offcodeCount[u]=1;
    for (u=0; u<=MaxML; u++) matchLengthCount[u]=1;
    for (u=0; u<=MaxLL; u++) litLengthCount[u]=1;
    repOffset[1] = repOffset[4] = repOffset[8] = 1;
    memset(bestRepOffset, 0, sizeof(bestRepOffset));
    esr.ref = ZSTD_createCCtx();
    esr.zc = ZSTD_createCCtx();
-    esr.workPlace = malloc(ZSTD_BLOCKSIZE_MAX);
+    esr.workPlace = malloc(ZSTD_BLOCKSIZE_ABSOLUTEMAX);
    if (!esr.ref || !esr.zc || !esr.workPlace) {
            eSize = ERROR(memory_allocation);
            DISPLAYLEVEL(1, "Not enough memory");
@ -744,8 +735,8 @@ static size_t ZDICT_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,
    }
    /* note : the result of this phase should be used to better appreciate the impact on statistics */

-    total=0; for (u=0; u<=OFFCODE_MAX; u++) total+=offcodeCount[u];
-    errorCode = FSE_normalizeCount(offcodeNCount, Offlog, offcodeCount, total, OFFCODE_MAX);
+    total=0; for (u=0; u<=offcodeMax; u++) total+=offcodeCount[u];
+    errorCode = FSE_normalizeCount(offcodeNCount, Offlog, offcodeCount, total, offcodeMax);
    if (FSE_isError(errorCode)) {
        eSize = ERROR(GENERIC);
        DISPLAYLEVEL(1, "FSE_normalizeCount error with offcodeCount");
@ -845,51 +836,12 @@ _cleanup:
 }


-#define DIB_FASTSEGMENTSIZE 64
-/*! ZDICT_fastSampling()  (based on an idea proposed by Giuseppe Ottaviano) :
-    Fill `dictBuffer` with stripes of size DIB_FASTSEGMENTSIZE from `samplesBuffer`,
-    up to `dictSize`.
-    Filling starts from the end of `dictBuffer`, down to maximum possible.
-    if `dictSize` is not a multiply of DIB_FASTSEGMENTSIZE, some bytes at beginning of `dictBuffer` won't be used.
-    @return : amount of data written into `dictBuffer`,
-              or an error code
-*/
-static size_t ZDICT_fastSampling(void* dictBuffer, size_t dictSize,
-                         const void* samplesBuffer, size_t samplesSize)
-{
-    char* dstPtr = (char*)dictBuffer + dictSize;
-    const char* srcPtr = (const char*)samplesBuffer;
-    size_t const nbSegments = dictSize / DIB_FASTSEGMENTSIZE;
-    size_t segNb, interSize;
-
-    if (nbSegments <= 2) return ERROR(srcSize_wrong);
-    if (samplesSize < dictSize) return ERROR(srcSize_wrong);
-
-    /* first and last segments are part of dictionary, in case they contain interesting header/footer */
-    dstPtr -= DIB_FASTSEGMENTSIZE;
-    memcpy(dstPtr, srcPtr, DIB_FASTSEGMENTSIZE);
-    dstPtr -= DIB_FASTSEGMENTSIZE;
-    memcpy(dstPtr, srcPtr+samplesSize-DIB_FASTSEGMENTSIZE, DIB_FASTSEGMENTSIZE);
-
-    /* regularly copy a segment */
-    interSize = (samplesSize - nbSegments*DIB_FASTSEGMENTSIZE) / (nbSegments-1);
-    srcPtr += DIB_FASTSEGMENTSIZE;
-    for (segNb=2; segNb < nbSegments; segNb++) {
-        srcPtr += interSize;
-        dstPtr -= DIB_FASTSEGMENTSIZE;
-        memcpy(dstPtr, srcPtr, DIB_FASTSEGMENTSIZE);
-        srcPtr += DIB_FASTSEGMENTSIZE;
-    }
-
-    return nbSegments * DIB_FASTSEGMENTSIZE;
-}
-
 size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
                                                 const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
                                                 ZDICT_params_t params)
 {
    size_t hSize;
-    unsigned const compressionLevel = (params.compressionLevel == 0) ? g_compressionLevel_default : params.compressionLevel;
+    int const compressionLevel = (params.compressionLevel <= 0) ? g_compressionLevel_default : params.compressionLevel;

    /* dictionary header */
    MEM_writeLE32(dictBuffer, ZSTD_DICT_MAGIC);
@ -914,60 +866,87 @@ size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictCo
 }


-#define DIB_MINSAMPLESSIZE (DIB_FASTSEGMENTSIZE*3)
+#define DIB_MINSAMPLESSIZE 512
 /*! ZDICT_trainFromBuffer_unsafe() :
-*   `samplesBuffer` must be followed by noisy guard band.
-*   @return : size of dictionary.
+*   Warning : `samplesBuffer` must be followed by noisy guard band.
+*   @return : size of dictionary, or an error code which can be tested with ZDICT_isError()
 */
 size_t ZDICT_trainFromBuffer_unsafe(
                            void* dictBuffer, size_t maxDictSize,
                            const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
                            ZDICT_params_t params)
 {
-    U32 const dictListSize = MAX( MAX(DICTLISTSIZE, nbSamples), (U32)(maxDictSize/16));
+    U32 const dictListSize = MAX(MAX(DICTLISTSIZE, nbSamples), (U32)(maxDictSize/16));
    dictItem* const dictList = (dictItem*)malloc(dictListSize * sizeof(*dictList));
-    unsigned selectivity = params.selectivityLevel;
+    unsigned const selectivity = params.selectivityLevel == 0 ? g_selectivity_default : params.selectivityLevel;
+    unsigned const minRep = (selectivity > 30) ? MINRATIO : nbSamples >> selectivity;
    size_t const targetDictSize = maxDictSize;
-    size_t sBuffSize;
+    size_t const samplesBuffSize = ZDICT_totalSampleSize(samplesSizes, nbSamples);
    size_t dictSize = 0;

    /* checks */
    if (!dictList) return ERROR(memory_allocation);
    if (maxDictSize <= g_provision_entropySize + g_min_fast_dictContent) { free(dictList); return ERROR(dstSize_tooSmall); }
+    if (samplesBuffSize < DIB_MINSAMPLESSIZE) { free(dictList); return 0; }   /* not enough source to create dictionary */

    /* init */
-    { unsigned u; for (u=0, sBuffSize=0; u<nbSamples; u++) sBuffSize += samplesSizes[u]; }
-    if (sBuffSize < DIB_MINSAMPLESSIZE) { free(dictList); return 0; }   /* not enough source to create dictionary */
    ZDICT_initDictItem(dictList);
    g_displayLevel = params.notificationLevel;
-    if (selectivity==0) selectivity = g_selectivity_default;

    /* build dictionary */
-    if (selectivity>1) {  /* selectivity == 1 => fast mode */
-        ZDICT_trainBuffer(dictList, dictListSize,
-                        samplesBuffer, sBuffSize,
-                        samplesSizes, nbSamples,
-                        selectivity, (U32)targetDictSize);
+    ZDICT_trainBuffer(dictList, dictListSize,
+                    samplesBuffer, samplesBuffSize,
+                    samplesSizes, nbSamples,
+                    minRep);
+
+    /* display best matches */
+    if (g_displayLevel>= 3) {
+        U32 const nb = 25;
+        U32 const dictContentSize = ZDICT_dictSize(dictList);
+        U32 u;
+        DISPLAYLEVEL(3, "\n %u segments found, of total size %u \n", dictList[0].pos, dictContentSize);
+        DISPLAYLEVEL(3, "list %u best segments \n", nb);
+        for (u=1; u<=nb; u++) {
+            U32 pos = dictList[u].pos;
+            U32 length = dictList[u].length;
+            U32 printedLength = MIN(40, length);
+            DISPLAYLEVEL(3, "%3u:%3u bytes at pos %8u, savings %7u bytes |",
+                         u, length, pos, dictList[u].savings);
+            ZDICT_printHex(3, (const char*)samplesBuffer+pos, printedLength);
+            DISPLAYLEVEL(3, "| \n");
+    }   }

-        /* display best matches */
-        if (g_displayLevel>= 3) {
-            U32 const nb = 25;
-            U32 const dictContentSize = ZDICT_dictSize(dictList);
-            U32 u;
-            DISPLAYLEVEL(3, "\n %u segments found, of total size %u \n", dictList[0].pos, dictContentSize);
-            DISPLAYLEVEL(3, "list %u best segments \n", nb);
-            for (u=1; u<=nb; u++) {
-                U32 p = dictList[u].pos;
-                U32 l = dictList[u].length;
-                U32 d = MIN(40, l);
-                DISPLAYLEVEL(3, "%3u:%3u bytes at pos %8u, savings %7u bytes |",
-                             u, l, p, dictList[u].savings);
-                ZDICT_printHex(3, (const char*)samplesBuffer+p, d);
-                DISPLAYLEVEL(3, "| \n");
-    }   }   }

    /* create dictionary */
    {   U32 dictContentSize = ZDICT_dictSize(dictList);
+        if (dictContentSize < targetDictSize/2) {
+            DISPLAYLEVEL(2, "!  warning : created dictionary significantly smaller than requested (%u < %u) \n", dictContentSize, (U32)maxDictSize);
+            if (minRep > MINRATIO) {
+                DISPLAYLEVEL(2, "!  consider increasing selectivity to produce larger dictionary (-s%u) \n", selectivity+1);
+                DISPLAYLEVEL(2, "!  note : larger dictionaries are not necessarily better, test its efficiency on samples \n");
+            }
+            if (samplesBuffSize < 10 * targetDictSize)
+                DISPLAYLEVEL(2, "!  consider increasing the number of samples (total size : %u MB)\n", (U32)(samplesBuffSize>>20));
+        }
+
+        if ((dictContentSize > targetDictSize*2) && (nbSamples > 2*MINRATIO) && (selectivity>1)) {
+            U32 proposedSelectivity = selectivity-1;
+            while ((nbSamples >> proposedSelectivity) <= MINRATIO) { proposedSelectivity--; }
+            DISPLAYLEVEL(2, "!  note : calculated dictionary significantly larger than requested (%u > %u) \n", dictContentSize, (U32)maxDictSize);
+            DISPLAYLEVEL(2, "!  you may consider decreasing selectivity to produce denser dictionary (-s%u) \n", proposedSelectivity);
+            DISPLAYLEVEL(2, "!  but test its efficiency on samples \n");
+        }
+
+        /* limit dictionary size */
+        {   U32 const max = dictList->pos;   /* convention : nb of useful elts within dictList */
+            U32 currentSize = 0;
+            U32 n; for (n=1; n<max; n++) {
+                currentSize += dictList[n].length;
+                if (currentSize > targetDictSize) { currentSize -= dictList[n].length; break; }
+            }
+            dictList->pos = n;
+            dictContentSize = currentSize;
+        }

        /* build dict content */
        {   U32 u;
@ -979,14 +958,6 @@ size_t ZDICT_trainFromBuffer_unsafe(
                memcpy(ptr, (const char*)samplesBuffer+dictList[u].pos, l);
        }   }

-        /* fast mode dict content */
-        if (selectivity==1) {  /* note could also be used to complete a dictionary, but not necessarily better */
-            DISPLAYLEVEL(3, "\r%70s\r", "");   /* clean display line */
-            DISPLAYLEVEL(3, "Adding %u KB with fast sampling \n", (U32)(targetDictSize>>10));
-            dictContentSize = (U32)ZDICT_fastSampling(dictBuffer, targetDictSize,
-                                                      samplesBuffer, sBuffSize);
-        }
-
        dictSize = ZDICT_addEntropyTablesFromBuffer_advanced(dictBuffer, dictContentSize, maxDictSize,
                                                             samplesBuffer, samplesSizes, nbSamples,
                                                             params);
@ -1004,23 +975,23 @@ size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacit
                                      const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
                                      ZDICT_params_t params)
 {
+    size_t result;
    void* newBuff;
-    size_t sBuffSize;
+    size_t const sBuffSize = ZDICT_totalSampleSize(samplesSizes, nbSamples);
+    if (sBuffSize < DIB_MINSAMPLESSIZE) return 0;   /* not enough content => no dictionary */

-    { unsigned u; for (u=0, sBuffSize=0; u<nbSamples; u++) sBuffSize += samplesSizes[u]; }
-    if (sBuffSize==0) return 0;   /* empty content => no dictionary */
    newBuff = malloc(sBuffSize + NOISELENGTH);
    if (!newBuff) return ERROR(memory_allocation);

    memcpy(newBuff, samplesBuffer, sBuffSize);
    ZDICT_fillNoise((char*)newBuff + sBuffSize, NOISELENGTH);   /* guard band, for end of buffer condition */

-    { size_t const result = ZDICT_trainFromBuffer_unsafe(
+    result = ZDICT_trainFromBuffer_unsafe(
                                        dictBuffer, dictBufferCapacity,
                                        newBuff, samplesSizes, nbSamples,
                                        params);
-      free(newBuff);
-      return result; }
+    free(newBuff);
+    return result;
 }


--- a/lib/dictBuilder/zdict.h
+++ b/lib/dictBuilder/zdict.h
@ -38,43 +38,28 @@
 extern "C" {
 #endif

-/*-*************************************
-*  Public functions
-***************************************/
 /*! ZDICT_trainFromBuffer() :
-    Train a dictionary from a memory buffer `samplesBuffer`,
-    where `nbSamples` samples have been stored concatenated.
-    Each sample size is provided into an orderly table `samplesSizes`.
-    Resulting dictionary will be saved into `dictBuffer`.
+    Train a dictionary from an array of samples.
+    Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
+    supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
+    The resulting dictionary will be saved into `dictBuffer`.
    @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
-              or an error code, which can be tested by ZDICT_isError().
+              or an error code, which can be tested with ZDICT_isError().
+    Tips : In general, a reasonable dictionary has a size of ~ 100 KB.
+           It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`.
+           In general, it's recommended to provide a few thousands samples, but this can vary a lot.
+           It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
 */
 size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
-                             const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples);
-
-/*! ZDICT_addEntropyTablesFromBuffer() :
-
-    Given a content-only dictionary (built for example from common strings in
-    the input), add entropy tables computed from the memory buffer
-    `samplesBuffer`, where `nbSamples` samples have been stored concatenated.
-    Each sample size is provided into an orderly table `samplesSizes`.
-
-    The input dictionary is the last `dictContentSize` bytes of `dictBuffer`. The
-    resulting dictionary with added entropy tables will written back to
-    `dictBuffer`.
-    @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`).
-*/
-size_t ZDICT_addEntropyTablesFromBuffer(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
-                                        const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples);
+                       const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples);


-/*-*************************************
-*  Helper functions
-***************************************/
+/*======   Helper functions   ======*/
 unsigned ZDICT_isError(size_t errorCode);
 const char* ZDICT_getErrorName(size_t errorCode);


+
 #ifdef ZDICT_STATIC_LINKING_ONLY

 /* ====================================================================================
@ -85,8 +70,8 @@ const char* ZDICT_getErrorName(size_t errorCode);
 * ==================================================================================== */

 typedef struct {
-    unsigned selectivityLevel;   /* 0 means default; larger => bigger selection => larger dictionary */
-    unsigned compressionLevel;   /* 0 means default; target a specific zstd compression level */
+    unsigned selectivityLevel;   /* 0 means default; larger => select more => larger dictionary */
+    int      compressionLevel;   /* 0 means default; target a specific zstd compression level */
    unsigned notificationLevel;  /* Write to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
    unsigned dictID;             /* 0 means auto mode (32-bits random value); other : force dictID value */
    unsigned reserved[2];        /* space for future parameters */
@ -96,13 +81,32 @@ typedef struct {
 /*! ZDICT_trainFromBuffer_advanced() :
    Same as ZDICT_trainFromBuffer() with control over more parameters.
    `parameters` is optional and can be provided with values set to 0 to mean "default".
-    @return : size of dictionary stored into `dictBuffer` (<= `dictBufferSize`)
+    @return : size of dictionary stored into `dictBuffer` (<= `dictBufferSize`),
              or an error code, which can be tested by ZDICT_isError().
-    note : ZDICT_trainFromBuffer_advanced() will send notifications into stderr if instructed to, using ZDICT_setNotificationLevel()
+    note : ZDICT_trainFromBuffer_advanced() will send notifications into stderr if instructed to, using notificationLevel>0.
 */
 size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacity,
-                             const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
-                             ZDICT_params_t parameters);
+                                const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
+                                ZDICT_params_t parameters);
+
+
+/*! ZDICT_addEntropyTablesFromBuffer() :
+
+    Given a content-only dictionary (built using any 3rd party algorithm),
+    add entropy tables computed from an array of samples.
+    Samples must be stored concatenated in a flat buffer `samplesBuffer`,
+    supplied with an array of sizes `samplesSizes`, providing the size of each sample in order.
+
+    The input dictionary content must be stored *at the end* of `dictBuffer`.
+    Its size is `dictContentSize`.
+    The resulting dictionary with added entropy tables will be *written back to `dictBuffer`*,
+    starting from its beginning.
+    @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`).
+*/
+size_t ZDICT_addEntropyTablesFromBuffer(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
+                                        const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples);
+
+

 #endif   /* ZDICT_STATIC_LINKING_ONLY */

--- a/lib/legacy/zstd_legacy.h
+++ b/lib/legacy/zstd_legacy.h
@ -48,6 +48,7 @@ extern "C" {
 #include "zstd_v04.h"
 #include "zstd_v05.h"
 #include "zstd_v06.h"
+#include "zstd_v07.h"


 /** ZSTD_isLegacy() :
@ -67,6 +68,7 @@ MEM_STATIC unsigned ZSTD_isLegacy(const void* src, size_t srcSize)
        case ZSTDv04_magicNumber : return 4;
        case ZSTDv05_MAGICNUMBER : return 5;
        case ZSTDv06_MAGICNUMBER : return 6;
+        case ZSTDv07_MAGICNUMBER : return 7;
        default : return 0;
    }
 }
@ -90,6 +92,12 @@ MEM_STATIC unsigned long long ZSTD_getDecompressedSize_legacy(const void* src, s
            if (frResult != 0) return 0;
            return fParams.frameContentSize;
        }
+        if (version==7) {
+            ZSTDv07_frameParams fParams;
+            size_t const frResult = ZSTDv07_getFrameParams(&fParams, src, srcSize);
+            if (frResult != 0) return 0;
+            return fParams.frameContentSize;
+        }
        return 0;   /* should not be possible */
    }
 }
@ -126,6 +134,14 @@ MEM_STATIC size_t ZSTD_decompressLegacy(
                ZSTDv06_freeDCtx(zd);
                return result;
            }
+        case 7 :
+            {   size_t result;
+                ZSTDv07_DCtx* const zd = ZSTDv07_createDCtx();
+                if (zd==NULL) return ERROR(memory_allocation);
+                result = ZSTDv07_decompress_usingDict(zd, dst, dstCapacity, src, compressedSize, dict, dictSize);
+                ZSTDv07_freeDCtx(zd);
+                return result;
+            }
        default :
            return ERROR(prefix_unknown);
    }
--- a/lib/legacy/zstd_v02.c
+++ b/lib/legacy/zstd_v02.c
@ -1350,7 +1350,7 @@ static unsigned FSE_isError(size_t code) { return ERR_isError(code); }
 ****************************************************************/
 static short FSE_abs(short a)
 {
-    return a<0 ? -a : a;
+    return (short)(a<0 ? -a : a);
 }

 static size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
--- a/lib/legacy/zstd_v04.c
+++ b/lib/legacy/zstd_v04.c
@ -4024,7 +4024,7 @@ size_t ZSTDv04_decompress(void* dst, size_t maxDstSize, const void* src, size_t
    return regenSize;
 #else
    ZSTD_DCtx dctx;
-    return ZSTD_decompressDCtx(&dctx, dst, maxDstSize, src, srcSize);
+    return ZSTDv04_decompressDCtx(&dctx, dst, maxDstSize, src, srcSize);
 #endif
 }

@ -4054,3 +4054,11 @@ size_t ZBUFFv04_decompressContinue(ZBUFFv04_DCtx* dctx, void* dst, size_t* maxDs
 {
    return ZBUFF_decompressContinue(dctx, dst, maxDstSizePtr, src, srcSizePtr);
 }
+
+ZSTD_DCtx* ZSTDv04_createDCtx(void) { return ZSTD_createDCtx(); }
+size_t ZSTDv04_freeDCtx(ZSTD_DCtx* dctx) { return ZSTD_freeDCtx(dctx); }
+
+size_t ZSTDv04_getFrameParams(ZSTD_parameters* params, const void* src, size_t srcSize)
+{
+    return ZSTD_getFrameParams(params, src, srcSize);
+}
--- a/lib/legacy/zstd_v07.c
+++ b/lib/legacy/zstd_v07.c
--- a/lib/legacy/zstd_v07.h
+++ b/lib/legacy/zstd_v07.h
@ -0,0 +1,196 @@
+/*
+    zstd_v07 - decoder for 0.7 format
+    Header File
+    Copyright (C) 2014-2016, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+*/
+#ifndef ZSTDv07_H_235446
+#define ZSTDv07_H_235446
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/*======  Dependency  ======*/
+#include <stddef.h>   /* size_t */
+
+
+/*======  Export for Windows  ======*/
+/*!
+*  ZSTDv07_DLL_EXPORT :
+*  Enable exporting of functions when building a Windows DLL
+*/
+#if defined(_WIN32) && defined(ZSTDv07_DLL_EXPORT) && (ZSTDv07_DLL_EXPORT==1)
+#  define ZSTDLIB_API __declspec(dllexport)
+#else
+#  define ZSTDLIB_API
+#endif
+
+
+
+/* *************************************
+*  Simple API
+***************************************/
+/*! ZSTDv07_getDecompressedSize() :
+*   @return : decompressed size if known, 0 otherwise.
+       note 1 : if `0`, follow up with ZSTDv07_getFrameParams() to know precise failure cause.
+       note 2 : decompressed size could be wrong or intentionally modified !
+                always ensure results fit within application's authorized limits */
+unsigned long long ZSTDv07_getDecompressedSize(const void* src, size_t srcSize);
+
+/*! ZSTDv07_decompress() :
+    `compressedSize` : must be _exact_ size of compressed input, otherwise decompression will fail.
+    `dstCapacity` must be equal or larger than originalSize.
+    @return : the number of bytes decompressed into `dst` (<= `dstCapacity`),
+              or an errorCode if it fails (which can be tested using ZSTDv07_isError()) */
+ZSTDLIB_API size_t ZSTDv07_decompress( void* dst, size_t dstCapacity,
+                              const void* src, size_t compressedSize);
+
+/*======  Helper functions  ======*/
+ZSTDLIB_API unsigned    ZSTDv07_isError(size_t code);          /*!< tells if a `size_t` function result is an error code */
+ZSTDLIB_API const char* ZSTDv07_getErrorName(size_t code);     /*!< provides readable string from an error code */
+
+
+/*-*************************************
+*  Explicit memory management
+***************************************/
+/** Decompression context */
+typedef struct ZSTDv07_DCtx_s ZSTDv07_DCtx;
+ZSTDLIB_API ZSTDv07_DCtx* ZSTDv07_createDCtx(void);
+ZSTDLIB_API size_t     ZSTDv07_freeDCtx(ZSTDv07_DCtx* dctx);      /*!< @return : errorCode */
+
+/** ZSTDv07_decompressDCtx() :
+*   Same as ZSTDv07_decompress(), requires an allocated ZSTDv07_DCtx (see ZSTDv07_createDCtx()) */
+ZSTDLIB_API size_t ZSTDv07_decompressDCtx(ZSTDv07_DCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+
+
+/*-************************
+*  Simple dictionary API
+***************************/
+/*! ZSTDv07_decompress_usingDict() :
+*   Decompression using a pre-defined Dictionary content (see dictBuilder).
+*   Dictionary must be identical to the one used during compression.
+*   Note : This function load the dictionary, resulting in a significant startup time */
+ZSTDLIB_API size_t ZSTDv07_decompress_usingDict(ZSTDv07_DCtx* dctx,
+                                             void* dst, size_t dstCapacity,
+                                       const void* src, size_t srcSize,
+                                       const void* dict,size_t dictSize);
+
+
+/*-**************************
+*  Advanced Dictionary API
+****************************/
+/*! ZSTDv07_createDDict() :
+*   Create a digested dictionary, ready to start decompression operation without startup delay.
+*   `dict` can be released after creation */
+typedef struct ZSTDv07_DDict_s ZSTDv07_DDict;
+ZSTDLIB_API ZSTDv07_DDict* ZSTDv07_createDDict(const void* dict, size_t dictSize);
+ZSTDLIB_API size_t      ZSTDv07_freeDDict(ZSTDv07_DDict* ddict);
+
+/*! ZSTDv07_decompress_usingDDict() :
+*   Decompression using a pre-digested Dictionary
+*   Faster startup than ZSTDv07_decompress_usingDict(), recommended when same dictionary is used multiple times. */
+ZSTDLIB_API size_t ZSTDv07_decompress_usingDDict(ZSTDv07_DCtx* dctx,
+                                              void* dst, size_t dstCapacity,
+                                        const void* src, size_t srcSize,
+                                        const ZSTDv07_DDict* ddict);
+
+typedef struct {
+    unsigned long long frameContentSize;
+    unsigned windowSize;
+    unsigned dictID;
+    unsigned checksumFlag;
+} ZSTDv07_frameParams;
+
+ZSTDLIB_API size_t ZSTDv07_getFrameParams(ZSTDv07_frameParams* fparamsPtr, const void* src, size_t srcSize);   /**< doesn't consume input */
+
+
+
+
+/* *************************************
+*  Streaming functions
+***************************************/
+typedef struct ZBUFFv07_DCtx_s ZBUFFv07_DCtx;
+ZSTDLIB_API ZBUFFv07_DCtx* ZBUFFv07_createDCtx(void);
+ZSTDLIB_API size_t      ZBUFFv07_freeDCtx(ZBUFFv07_DCtx* dctx);
+
+ZSTDLIB_API size_t ZBUFFv07_decompressInit(ZBUFFv07_DCtx* dctx);
+ZSTDLIB_API size_t ZBUFFv07_decompressInitDictionary(ZBUFFv07_DCtx* dctx, const void* dict, size_t dictSize);
+
+ZSTDLIB_API size_t ZBUFFv07_decompressContinue(ZBUFFv07_DCtx* dctx,
+                                            void* dst, size_t* dstCapacityPtr,
+                                      const void* src, size_t* srcSizePtr);
+
+/*-***************************************************************************
+*  Streaming decompression howto
+*
+*  A ZBUFFv07_DCtx object is required to track streaming operations.
+*  Use ZBUFFv07_createDCtx() and ZBUFFv07_freeDCtx() to create/release resources.
+*  Use ZBUFFv07_decompressInit() to start a new decompression operation,
+*   or ZBUFFv07_decompressInitDictionary() if decompression requires a dictionary.
+*  Note that ZBUFFv07_DCtx objects can be re-init multiple times.
+*
+*  Use ZBUFFv07_decompressContinue() repetitively to consume your input.
+*  *srcSizePtr and *dstCapacityPtr can be any size.
+*  The function will report how many bytes were read or written by modifying *srcSizePtr and *dstCapacityPtr.
+*  Note that it may not consume the entire input, in which case it's up to the caller to present remaining input again.
+*  The content of `dst` will be overwritten (up to *dstCapacityPtr) at each function call, so save its content if it matters, or change `dst`.
+*  @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to help latency),
+*            or 0 when a frame is completely decoded,
+*            or an error code, which can be tested using ZBUFFv07_isError().
+*
+*  Hint : recommended buffer sizes (not compulsory) : ZBUFFv07_recommendedDInSize() and ZBUFFv07_recommendedDOutSize()
+*  output : ZBUFFv07_recommendedDOutSize== 128 KB block size is the internal unit, it ensures it's always possible to write a full block when decoded.
+*  input  : ZBUFFv07_recommendedDInSize == 128KB + 3;
+*           just follow indications from ZBUFFv07_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 .
+* *******************************************************************************/
+
+
+/* *************************************
+*  Tool functions
+***************************************/
+ZSTDLIB_API unsigned ZBUFFv07_isError(size_t errorCode);
+ZSTDLIB_API const char* ZBUFFv07_getErrorName(size_t errorCode);
+
+/** Functions below provide recommended buffer sizes for Compression or Decompression operations.
+*   These sizes are just hints, they tend to offer better latency */
+ZSTDLIB_API size_t ZBUFFv07_recommendedDInSize(void);
+ZSTDLIB_API size_t ZBUFFv07_recommendedDOutSize(void);
+
+
+/*-*************************************
+*  Constants
+***************************************/
+#define ZSTDv07_MAGICNUMBER            0xFD2FB527   /* v0.7 */
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif  /* ZSTDv07_H_235446 */
--- a/lib/common/zstd.h
+++ b/lib/common/zstd.h
@ -36,15 +36,11 @@
 extern "C" {
 #endif

-/*-*************************************
-*  Dependencies
-***************************************/
+/*======  Dependency  ======*/
 #include <stddef.h>   /* size_t */


-/*-***************************************************************
-*  Export parameters
-*****************************************************************/
+/*======  Export for Windows  ======*/
 /*!
 *  ZSTD_DLL_EXPORT :
 *  Enable exporting of functions when building a Windows DLL
@ -56,12 +52,10 @@ extern "C" {
 #endif


-/* *************************************
-*  Version
-***************************************/
+/*======  Version  ======*/
 #define ZSTD_VERSION_MAJOR    0
-#define ZSTD_VERSION_MINOR    7
-#define ZSTD_VERSION_RELEASE  4
+#define ZSTD_VERSION_MINOR    8
+#define ZSTD_VERSION_RELEASE  0

 #define ZSTD_LIB_VERSION ZSTD_VERSION_MAJOR.ZSTD_VERSION_MINOR.ZSTD_VERSION_RELEASE
 #define ZSTD_QUOTE(str) #str
@ -73,61 +67,72 @@ ZSTDLIB_API unsigned ZSTD_versionNumber (void);


 /* *************************************
-*  Simple functions
+*  Simple API
 ***************************************/
 /*! ZSTD_compress() :
-    Compresses `srcSize` bytes from buffer `src` into buffer `dst` of size `dstCapacity`.
-    Destination buffer must be already allocated.
-    Compression runs faster if `dstCapacity` >=  `ZSTD_compressBound(srcSize)`.
-    @return : the number of bytes written into `dst`,
+    Compresses `src` buffer into already allocated `dst`.
+    Hint : compression runs faster if `dstCapacity` >=  `ZSTD_compressBound(srcSize)`.
+    @return : the number of bytes written into `dst` (<= `dstCapacity),
              or an error code if it fails (which can be tested using ZSTD_isError()) */
-ZSTDLIB_API size_t ZSTD_compress(   void* dst, size_t dstCapacity,
-                              const void* src, size_t srcSize,
-                                     int  compressionLevel);
+ZSTDLIB_API size_t ZSTD_compress( void* dst, size_t dstCapacity,
+                            const void* src, size_t srcSize,
+                                  int compressionLevel);

-/** ZSTD_getDecompressedSize() :
-*   @return : decompressed size if known, 0 otherwise.
-        note : to know precise reason why result is `0`, follow up with ZSTD_getFrameParams() */
+/*! ZSTD_getDecompressedSize() :
+*   @return : decompressed size as a 64-bits value _if known_, 0 otherwise.
+*    note 1 : decompressed size can be very large (64-bits value),
+*             potentially larger than what local system can handle as a single memory segment.
+*             In which case, it's necessary to use streaming mode to decompress data.
+*    note 2 : decompressed size is an optional field, that may not be present.
+*             When `return==0`, consider data to decompress could have any size.
+*             In which case, it's necessary to use streaming mode to decompress data,
+*             or rely on application's implied limits.
+*             (For example, it may know that its own data is necessarily cut into blocks <= 16 KB).
+*    note 3 : decompressed size could be wrong or intentionally modified !
+*             Always ensure result fits within application's authorized limits !
+*             Each application can have its own set of conditions.
+*             If the intention is to decompress public data compressed by zstd command line utility,
+*             it is recommended to support at least 8 MB for extended compatibility.
+*    note 4 : when `return==0`, if precise failure cause is needed, use ZSTD_getFrameParams() to know more. */
 unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize);

 /*! ZSTD_decompress() :
-    `compressedSize` : is the _exact_ size of compressed input, otherwise decompression will fail.
-    `dstCapacity` must be equal or larger than originalSize.
+    `compressedSize` : must be the _exact_ size of compressed input, otherwise decompression will fail.
+    `dstCapacity` must be equal or larger than originalSize (see ZSTD_getDecompressedSize() ).
+    If originalSize is unknown, and if there is no implied application-specific limitations,
+    it's necessary to use streaming mode to decompress data.
    @return : the number of bytes decompressed into `dst` (<= `dstCapacity`),
              or an errorCode if it fails (which can be tested using ZSTD_isError()) */
 ZSTDLIB_API size_t ZSTD_decompress( void* dst, size_t dstCapacity,
                              const void* src, size_t compressedSize);


-/* *************************************
-*  Helper functions
-***************************************/
-ZSTDLIB_API size_t      ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size (worst case scenario) */
-
-/* Error Management */
+/*======  Helper functions  ======*/
+ZSTDLIB_API int         ZSTD_maxCLevel(void);               /*!< maximum compression level available */
+ZSTDLIB_API size_t      ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case scenario */
 ZSTDLIB_API unsigned    ZSTD_isError(size_t code);          /*!< tells if a `size_t` function result is an error code */
-ZSTDLIB_API const char* ZSTD_getErrorName(size_t code);     /*!< provides readable string for an error code */
+ZSTDLIB_API const char* ZSTD_getErrorName(size_t code);     /*!< provides readable string from an error code */


-/* *************************************
+/*-*************************************
 *  Explicit memory management
 ***************************************/
 /** Compression context */
 typedef struct ZSTD_CCtx_s ZSTD_CCtx;                       /*< incomplete type */
 ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx(void);
-ZSTDLIB_API size_t     ZSTD_freeCCtx(ZSTD_CCtx* cctx);      /*!< @return : errorCode */
+ZSTDLIB_API size_t     ZSTD_freeCCtx(ZSTD_CCtx* cctx);

 /** ZSTD_compressCCtx() :
-    Same as ZSTD_compress(), but requires an already allocated ZSTD_CCtx (see ZSTD_createCCtx()) */
+    Same as ZSTD_compress(), requires an allocated ZSTD_CCtx (see ZSTD_createCCtx()) */
 ZSTDLIB_API size_t ZSTD_compressCCtx(ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel);

 /** Decompression context */
-typedef struct ZSTD_DCtx_s ZSTD_DCtx;
+typedef struct ZSTD_DCtx_s ZSTD_DCtx;                       /*< incomplete type */
 ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx(void);
-ZSTDLIB_API size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);      /*!< @return : errorCode */
+ZSTDLIB_API size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);

 /** ZSTD_decompressDCtx() :
-*   Same as ZSTD_decompress(), but requires an already allocated ZSTD_DCtx (see ZSTD_createDCtx()) */
+*   Same as ZSTD_decompress(), requires an allocated ZSTD_DCtx (see ZSTD_createDCtx()) */
 ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);


@ -135,10 +140,8 @@ ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* ctx, void* dst, size_t dstCapa
 *  Simple dictionary API
 ***************************/
 /*! ZSTD_compress_usingDict() :
-*   Compression using a pre-defined Dictionary content (see dictBuilder).
-*   Note 1 : This function load the dictionary, resulting in a significant startup time.
-*   Note 2 : `dict` must remain accessible and unmodified during compression operation.
-*   Note 3 : `dict` can be `NULL`, in which case, it's equivalent to ZSTD_compressCCtx() */
+*   Compression using a predefined Dictionary (see dictBuilder/zdict.h).
+*   Note : This function load the dictionary, resulting in a significant startup time. */
 ZSTDLIB_API size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx,
                                           void* dst, size_t dstCapacity,
                                     const void* src, size_t srcSize,
@ -146,11 +149,9 @@ ZSTDLIB_API size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx,
                                           int compressionLevel);

 /*! ZSTD_decompress_usingDict() :
-*   Decompression using a pre-defined Dictionary content (see dictBuilder).
+*   Decompression using a predefined Dictionary (see dictBuilder/zdict.h).
 *   Dictionary must be identical to the one used during compression.
-*   Note 1 : This function load the dictionary, resulting in a significant startup time
-*   Note 2 : `dict` must remain accessible and unmodified during compression operation.
-*   Note 3 : `dict` can be `NULL`, in which case, it's equivalent to ZSTD_decompressDCtx() */
+*   Note : This function load the dictionary, resulting in a significant startup time */
 ZSTDLIB_API size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx,
                                             void* dst, size_t dstCapacity,
                                       const void* src, size_t srcSize,
@ -158,7 +159,7 @@ ZSTDLIB_API size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx,


 /*-**************************
-*  Advanced Dictionary API
+*  Fast Dictionary API
 ****************************/
 /*! ZSTD_createCDict() :
 *   Create a digested dictionary, ready to start compression operation without startup delay.
@ -168,8 +169,8 @@ ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int
 ZSTDLIB_API size_t      ZSTD_freeCDict(ZSTD_CDict* CDict);

 /*! ZSTD_compress_usingCDict() :
-*   Compression using a pre-digested Dictionary.
-*   Much faster than ZSTD_compress_usingDict() when same dictionary is used multiple times.
+*   Compression using a digested Dictionary.
+*   Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times.
 *   Note that compression level is decided during dictionary creation */
 ZSTDLIB_API size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx,
                                            void* dst, size_t dstCapacity,
@ -184,15 +185,14 @@ ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize);
 ZSTDLIB_API size_t      ZSTD_freeDDict(ZSTD_DDict* ddict);

 /*! ZSTD_decompress_usingDDict() :
-*   Decompression using a pre-digested Dictionary
-*   Much faster than ZSTD_decompress_usingDict() when same dictionary is used multiple times. */
+*   Decompression using a digested Dictionary
+*   Faster startup than ZSTD_decompress_usingDict(), recommended when same dictionary is used multiple times. */
 ZSTDLIB_API size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx,
                                              void* dst, size_t dstCapacity,
                                        const void* src, size_t srcSize,
                                        const ZSTD_DDict* ddict);


-
 #ifdef ZSTD_STATIC_LINKING_ONLY

 /* ====================================================================================
@ -203,7 +203,7 @@ ZSTDLIB_API size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx,
 * ==================================================================================== */

 /*--- Constants ---*/
-#define ZSTD_MAGICNUMBER            0xFD2FB527   /* v0.7 */
+#define ZSTD_MAGICNUMBER            0xFD2FB528   /* v0.8 */
 #define ZSTD_MAGIC_SKIPPABLE_START  0x184D2A50U

 #define ZSTD_WINDOWLOG_MAX_32  25
@ -215,7 +215,6 @@ ZSTDLIB_API size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx,
 #define ZSTD_HASHLOG_MAX       ZSTD_WINDOWLOG_MAX
 #define ZSTD_HASHLOG_MIN       12
 #define ZSTD_HASHLOG3_MAX      17
-//#define ZSTD_HASHLOG3_MIN      15
 #define ZSTD_SEARCHLOG_MAX    (ZSTD_WINDOWLOG_MAX-1)
 #define ZSTD_SEARCHLOG_MIN      1
 #define ZSTD_SEARCHLENGTH_MAX   7
@ -280,8 +279,6 @@ ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictS
 *  Gives the amount of memory used by a given ZSTD_CCtx */
 ZSTDLIB_API size_t ZSTD_sizeofCCtx(const ZSTD_CCtx* cctx);

-ZSTDLIB_API unsigned ZSTD_maxCLevel (void);
-
 /*! ZSTD_getParams() :
 *   same as ZSTD_getCParams(), but @return a full `ZSTD_parameters` object instead of a `ZSTD_compressionParameters`.
 *   All fields of `ZSTD_frameParameters` are set to default (0) */
@ -326,15 +323,20 @@ ZSTDLIB_API size_t ZSTD_sizeofDCtx(const ZSTD_DCtx* dctx);


 /* ******************************************************************
-*  Streaming functions (direct mode - synchronous and buffer-less)
+*  Buffer-less streaming functions (synchronous mode)
 ********************************************************************/
+/* This is an advanced API, giving full control over buffer management, for users which need direct control over memory.
+*  But it's also a complex one, with a lot of restrictions (documented below).
+*  For an easier streaming API, look into common/zbuff.h
+*  which removes all restrictions by allocating and managing its own internal buffer */
+
 ZSTDLIB_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel);
 ZSTDLIB_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel);
 ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize);
 ZSTDLIB_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx);

 ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
-ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity);
+ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);

 /*
  A ZSTD_CCtx object is required to track streaming operations.
@ -349,7 +351,7 @@ ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapaci
  Then, consume your input using ZSTD_compressContinue().
  There are some important considerations to keep in mind when using this advanced function :
  - ZSTD_compressContinue() has no internal buffer. It uses externally provided buffer only.
-  - Interface is synchronous : input is consumed entirely and produce 1 (or more) compressed blocks.
+  - Interface is synchronous : input is consumed entirely and produce 1+ (or more) compressed blocks.
  - Caller must ensure there is enough space in `dst` to store compressed data under worst case scenario.
    Worst case evaluation is provided by ZSTD_compressBound().
    ZSTD_compressContinue() doesn't guarantee recover after a failed compression.
@ -358,9 +360,9 @@ ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapaci
  - ZSTD_compressContinue() detects that prior input has been overwritten when `src` buffer overlaps.
    In which case, it will "discard" the relevant memory section from its history.

-
-  Finish a frame with ZSTD_compressEnd(), which will write the epilogue.
-  Without epilogue, frames will be considered unfinished (broken) by decoders.
+  Finish a frame with ZSTD_compressEnd(), which will write the last block(s) and optional checksum.
+  It's possible to use a NULL,0 src content, in which case, it will write a final empty block to end the frame,
+  Without last block mark, frames will be considered unfinished (broken) by decoders.

  You can then reuse `ZSTD_CCtx` (ZSTD_compressBegin()) to compress some new frame.
 */
@ -372,7 +374,7 @@ typedef struct {
    unsigned checksumFlag;
 } ZSTD_frameParams;

-ZSTDLIB_API size_t ZSTD_getFrameParams(ZSTD_frameParams* fparamsPtr, const void* src, size_t srcSize);   /**< doesn't consume input */
+ZSTDLIB_API size_t ZSTD_getFrameParams(ZSTD_frameParams* fparamsPtr, const void* src, size_t srcSize);   /**< doesn't consume input, see details below */

 ZSTDLIB_API size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx);
 ZSTDLIB_API size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);
@ -381,49 +383,58 @@ ZSTDLIB_API void   ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx)
 ZSTDLIB_API size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx);
 ZSTDLIB_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);

+typedef enum { ZSTDnit_frameHeader, ZSTDnit_blockHeader, ZSTDnit_block, ZSTDnit_lastBlock, ZSTDnit_checksum, ZSTDnit_skippableFrame } ZSTD_nextInputType_e;
+ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx);
+
 /*
-  Streaming decompression, direct mode (bufferless)
+  Buffer-less streaming decompression (synchronous mode)

  A ZSTD_DCtx object is required to track streaming operations.
  Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it.
  A ZSTD_DCtx object can be re-used multiple times.

-  First optional operation is to retrieve frame parameters, using ZSTD_getFrameParams(), which doesn't consume the input.
-  It can provide the minimum size of rolling buffer required to properly decompress data (`windowSize`),
-  and optionally the final size of uncompressed content.
-  (Note : content size is an optional info that may not be present. 0 means : content size unknown)
-  Frame parameters are extracted from the beginning of compressed frame.
-  The amount of data to read is variable, from ZSTD_frameHeaderSize_min to ZSTD_frameHeaderSize_max (so if `srcSize` >= ZSTD_frameHeaderSize_max, it will always work)
-  If `srcSize` is too small for operation to succeed, function will return the minimum size it requires to produce a result.
-  Result : 0 when successful, it means the ZSTD_frameParams structure has been filled.
-          >0 : means there is not enough data into `src`. Provides the expected size to successfully decode header.
-           errorCode, which can be tested using ZSTD_isError()
+  First typical operation is to retrieve frame parameters, using ZSTD_getFrameParams().
+  It fills a ZSTD_frameParams structure which provide important information to correctly decode the frame,
+  such as the minimum rolling buffer size to allocate to decompress data (`windowSize`),
+  and the dictionary ID used.
+  (Note : content size is optional, it may not be present. 0 means : content size unknown).
+  Note that these values could be wrong, either because of data malformation, or because an attacker is spoofing deliberate false information.
+  As a consequence, check that values remain within valid application range, especially `windowSize`, before allocation.
+  Each application can set its own limit, depending on local restrictions. For extended interoperability, it is recommended to support at least 8 MB.
+  Frame parameters are extracted from the beginning of the compressed frame.
+  Data fragment must be large enough to ensure successful decoding, typically `ZSTD_frameHeaderSize_max` bytes.
+  @result : 0 : successful decoding, the `ZSTD_frameParams` structure is correctly filled.
+           >0 : `srcSize` is too small, please provide at least @result bytes on next attempt.
+           errorCode, which can be tested using ZSTD_isError().

  Start decompression, with ZSTD_decompressBegin() or ZSTD_decompressBegin_usingDict().
  Alternatively, you can copy a prepared context, using ZSTD_copyDCtx().

  Then use ZSTD_nextSrcSizeToDecompress() and ZSTD_decompressContinue() alternatively.
-  ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue().
-  ZSTD_decompressContinue() requires this exact amount of bytes, or it will fail.
+  ZSTD_nextSrcSizeToDecompress() tells how many bytes to provide as 'srcSize' to ZSTD_decompressContinue().
+  ZSTD_decompressContinue() requires this _exact_ amount of bytes, or it will fail.

  @result of ZSTD_decompressContinue() is the number of bytes regenerated within 'dst' (necessarily <= dstCapacity).
-  It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header.
+  It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some metadata item.
+  It can also be an error code, which can be tested with ZSTD_isError().

  ZSTD_decompressContinue() needs previous data blocks during decompression, up to `windowSize`.
  They should preferably be located contiguously, prior to current block.
  Alternatively, a round buffer of sufficient size is also possible. Sufficient size is determined by frame parameters.
  ZSTD_decompressContinue() is very sensitive to contiguity,
  if 2 blocks don't follow each other, make sure that either the compressor breaks contiguity at the same place,
-    or that previous contiguous segment is large enough to properly handle maximum back-reference.
+  or that previous contiguous segment is large enough to properly handle maximum back-reference.

  A frame is fully decoded when ZSTD_nextSrcSizeToDecompress() returns zero.
  Context can then be reset to start a new decompression.

+  Note : it's possible to know if next input to present is a header or a block, using ZSTD_nextInputType().
+  This information is not required to properly decode a frame.

  == Special case : skippable frames ==

-  Skippable frames allow the integration of user-defined data into a flow of concatenated frames.
-  Skippable frames will be ignored (skipped) by a decompressor. The format of skippable frame is following:
+  Skippable frames allow integration of user-defined data into a flow of concatenated frames.
+  Skippable frames will be ignored (skipped) by a decompressor. The format of skippable frames is as follows :
  a) Skippable frame ID - 4 Bytes, Little endian format, any value from 0x184D2A50 to 0x184D2A5F
  b) Frame Size - 4 Bytes, Little endian format, unsigned 32-bits
  c) Frame Content - any content (User Data) of length equal to Frame Size
@ -437,13 +448,10 @@ ZSTDLIB_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t ds
 *  Block functions
 ****************************************/
 /*! Block functions produce and decode raw zstd blocks, without frame metadata.
-    Frame metadata cost is typically ~18 bytes, which is non-negligible on very small blocks.
+    Frame metadata cost is typically ~18 bytes, which can be non-negligible for very small blocks (< 100 bytes).
    User will have to take in charge required information to regenerate data, such as compressed and content sizes.

    A few rules to respect :
-    - Uncompressed block size must be <= MIN (128 KB, 1 << windowLog)
-      + If you need to compress more, cut data into multiple blocks
-      + Consider using the regular ZSTD_compress() instead, as frame metadata costs become negligible when source size is large.
    - Compressing and decompressing require a context structure
      + Use ZSTD_createCCtx() and ZSTD_createDCtx()
    - It is necessary to init context before starting
@ -451,19 +459,22 @@ ZSTDLIB_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t ds
      + decompression : ZSTD_decompressBegin()
      + variants _usingDict() are also allowed
      + copyCCtx() and copyDCtx() work too
+    - Block size is limited, it must be <= ZSTD_getBlockSizeMax()
+      + If you need to compress more, cut data into multiple blocks
+      + Consider using the regular ZSTD_compress() instead, as frame metadata costs become negligible when source size is large.
    - When a block is considered not compressible enough, ZSTD_compressBlock() result will be zero.
      In which case, nothing is produced into `dst`.
      + User must test for such outcome and deal directly with uncompressed data
      + ZSTD_decompressBlock() doesn't accept uncompressed data as input !!!
      + In case of multiple successive blocks, decoder must be informed of uncompressed block existence to follow proper history.
        Use ZSTD_insertBlock() in such a case.
-        Insert block once it's copied into its final position.
 */

-#define ZSTD_BLOCKSIZE_MAX (128 * 1024)   /* define, for static allocation */
+#define ZSTD_BLOCKSIZE_ABSOLUTEMAX (128 * 1024)   /* define, for static allocation */
+ZSTDLIB_API size_t ZSTD_getBlockSizeMax(ZSTD_CCtx* cctx);
 ZSTDLIB_API size_t ZSTD_compressBlock  (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
 ZSTDLIB_API size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
-ZSTDLIB_API size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize);  /**< insert block into `dctx` history. Useful to track uncompressed blocks */
+ZSTDLIB_API size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize);  /**< insert block into `dctx` history. Useful for uncompressed blocks */


 #endif   /* ZSTD_STATIC_LINKING_ONLY */
--- a/programs/.gitignore
+++ b/programs/.gitignore
@ -11,6 +11,7 @@ zbufftest
 zbufftest32
 datagen
 paramgrill
+paramgrill32
 roundTripCrash

 # Object files
--- a/programs/Makefile
+++ b/programs/Makefile
@ -38,15 +38,23 @@ MANDIR  = $(PREFIX)/share/man/man1

 ZSTDDIR = ../lib

-CPPFLAGS= -I$(ZSTDDIR)/common -I$(ZSTDDIR)/dictBuilder -DXXH_NAMESPACE=ZSTD_
-CFLAGS ?= -O3  # -falign-loops=32   # not always beneficial
-CFLAGS += -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow -Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement -Wstrict-prototypes -Wundef
-FLAGS   = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) $(MOREFLAGS)
+ifeq ($(shell $(CC) -v 2>&1 | grep -c "gcc version "), 1)
+ALIGN_LOOP = -falign-loops=32
+else
+ALIGN_LOOP =
+endif
+
+CPPFLAGS= -I$(ZSTDDIR) -I$(ZSTDDIR)/common -I$(ZSTDDIR)/dictBuilder -DXXH_NAMESPACE=ZSTD_
+CFLAGS ?= -O3
+CFLAGS += -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow -Wstrict-aliasing=1 \
+          -Wswitch-enum -Wdeclaration-after-statement -Wstrict-prototypes -Wundef
+CFLAGS += $(MOREFLAGS)
+FLAGS   = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS)


 ZSTDCOMMON_FILES := $(ZSTDDIR)/common/*.c
 ZSTDCOMP_FILES := $(ZSTDDIR)/compress/zstd_compress.c $(ZSTDDIR)/compress/fse_compress.c $(ZSTDDIR)/compress/huf_compress.c
-ZSTDDECOMP_FILES := $(ZSTDDIR)/decompress/huf_decompress.c $(ZSTDDIR)/decompress/zstd_decompress.c
+ZSTDDECOMP_FILES := $(ZSTDDIR)/decompress/zstd_decompress.o $(ZSTDDIR)/decompress/huf_decompress.c
 ZSTD_FILES := $(ZSTDDECOMP_FILES) $(ZSTDCOMMON_FILES) $(ZSTDCOMP_FILES)
 ZBUFF_FILES := $(ZSTDDIR)/compress/zbuff_compress.c $(ZSTDDIR)/decompress/zbuff_decompress.c
 ZDICT_FILES := $(ZSTDDIR)/dictBuilder/*.c
@ -74,19 +82,25 @@ ZBUFFTEST = -T2mn
 FUZZERTEST= -T5mn
 ZSTDRTTEST= --test-large-data

-.PHONY: default all clean install uninstall test test32 test-all
+.PHONY: default all all32 clean install uninstall test test32 test-all

 default: zstd

-all: zstd fullbench fuzzer zbufftest paramgrill datagen zstd32 fullbench32 fuzzer32 zbufftest32
+all: zstd fullbench fuzzer zbufftest paramgrill datagen
+
+all32: CFLAGS += -m32
+all32: EXT := 32$(EXT)
+all32: cleano32 all
+
+$(ZSTDDIR)/decompress/zstd_decompress.o: CFLAGS += $(ALIGN_LOOP)

 zstd  : $(ZSTD_FILES) $(ZSTDLEGACY_FILES) $(ZBUFF_FILES) $(ZDICT_FILES) \
        zstdcli.c fileio.c bench.c datagen.c dibio.c
 	$(CC)      $(FLAGS) -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT) $^ -o $@$(EXT)

-zstd32: $(ZSTD_FILES) $(ZSTDLEGACY_FILES) $(ZBUFF_FILES) $(ZDICT_FILES) \
-        zstdcli.c fileio.c bench.c datagen.c dibio.c
-	$(CC) -m32 $(FLAGS) -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT) $^ -o $@$(EXT)
+zstd32: CFLAGS += -m32
+zstd32: EXT := 32$(EXT)
+zstd32: zstd

 zstd_nolegacy :
 	$(MAKE) zstd ZSTD_LEGACY_SUPPORT=0
@ -119,22 +133,24 @@ zstd-small: clean
 fullbench  : $(ZSTD_FILES) $(ZBUFF_FILES) datagen.c fullbench.c
 	$(CC)      $(FLAGS) $^ -o $@$(EXT)

-fullbench32: $(ZSTD_FILES) $(ZBUFF_FILES) datagen.c fullbench.c
-	$(CC) -m32 $(FLAGS) $^ -o $@$(EXT)
+fullbench32 : CFLAGS += -m32
+fullbench32 : EXT := 32$(EXT)
+fullbench32 : fullbench

 fuzzer  : CPPFLAGS += -I$(ZSTDDIR)/dictBuilder
 fuzzer  : $(ZSTD_FILES) $(ZDICT_FILES) datagen.c fuzzer.c
 	$(CC)      $(FLAGS) $^ -o $@$(EXT)

-fuzzer32 : CPPFLAGS += -I$(ZSTDDIR)/dictBuilder
-fuzzer32: $(ZSTD_FILES) $(ZDICT_FILES) datagen.c fuzzer.c
-	$(CC) -m32 $(FLAGS) $^ -o $@$(EXT)
+fuzzer32 : CFLAGS += -m32
+fuzzer32 : EXT := 32$(EXT)
+fuzzer32 : fuzzer

 zbufftest  : $(ZSTD_FILES) $(ZBUFF_FILES) datagen.c zbufftest.c
 	$(CC)      $(FLAGS) $^ -o $@$(EXT)

-zbufftest32: $(ZSTD_FILES) $(ZBUFF_FILES) datagen.c zbufftest.c
-	$(CC) -m32 $(FLAGS) $^ -o $@$(EXT)
+zbufftest32 : CFLAGS += -m32
+zbufftest32 : EXT := 32$(EXT)
+zbufftest32 : zbufftest

 paramgrill : $(ZSTD_FILES) datagen.c paramgrill.c
 	$(CC)      $(FLAGS) $^ -lm -o $@$(EXT)
@ -146,6 +162,7 @@ roundTripCrash : $(ZSTD_FILES) roundTripCrash.c
 	$(CC)      $(FLAGS) $^ -o $@$(EXT)

 clean:
+	$(MAKE) -C ../lib clean
 	@rm -f core *.o tmp* result* *.gcda dictionary *.zst \
        zstd$(EXT) zstd32$(EXT) zstd-compress$(EXT) zstd-decompress$(EXT) \
        fullbench$(EXT) fullbench32$(EXT) \
@ -153,11 +170,13 @@ clean:
        datagen$(EXT) paramgrill$(EXT) roundTripCrash$(EXT)
 	@echo Cleaning completed

+cleano32:
+	@rm -f ../lib/decompress/*.o

-#---------------------------------------------------------------------------------
-#make install is validated only for Linux, OSX, kFreeBSD, Hurd and OpenBSD targets
-#---------------------------------------------------------------------------------
-ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD))
+#----------------------------------------------------------------------------------
+#make install is validated only for Linux, OSX, kFreeBSD, Hurd and some BSD targets
+#----------------------------------------------------------------------------------
+ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD DragonFly))
 HOST_OS = POSIX
 install: zstd
 	@echo Installing binaries
--- a/programs/bench.c
+++ b/programs/bench.c
@ -202,7 +202,7 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,

            /* overheat protection */
            if (UTIL_clockSpanMicro(coolTime, ticksPerSecond) > ACTIVEPERIOD_MICROSEC) {
-                DISPLAY("\rcooling down ...    \r");
+                DISPLAYLEVEL(2, "\rcooling down ...    \r");
                UTIL_sleep(COOLPERIOD_SEC);
                UTIL_getTime(&coolTime);
            }
@ -352,7 +352,7 @@ static void BMK_benchCLevel(void* srcBuffer, size_t benchedSize,
                            const size_t* fileSizes, unsigned nbFiles,
                            const void* dictBuffer, size_t dictBufferSize)
 {
-    benchResult_t result, total;
+    benchResult_t result;
    int l;

    const char* pch = strrchr(displayName, '\\'); /* Windows */
@ -362,7 +362,6 @@ static void BMK_benchCLevel(void* srcBuffer, size_t benchedSize,
    SET_HIGH_PRIORITY;

    memset(&result, 0, sizeof(result));
-    memset(&total, 0, sizeof(total));

    if (g_displayLevel == 1 && !g_additionalParam)
        DISPLAY("bench %s %s: input %u bytes, %i iterations, %u KB blocks\n", ZSTD_VERSION_STRING, ZSTD_GIT_COMMIT_STRING, (U32)benchedSize, g_nbIterations, (U32)(g_blockSize>>10));
@ -379,18 +378,7 @@ static void BMK_benchCLevel(void* srcBuffer, size_t benchedSize,
                DISPLAY("%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s  %s (param=%d)\n", -l, (int)result.cSize, result.ratio, result.cSpeed, result.dSpeed, displayName, g_additionalParam);
            else
                DISPLAY("%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s  %s\n", -l, (int)result.cSize, result.ratio, result.cSpeed, result.dSpeed, displayName);
-            total.cSize += result.cSize;
-            total.cSpeed += result.cSpeed;
-            total.dSpeed += result.dSpeed;
-            total.ratio += result.ratio;
    }   }
-    if (g_displayLevel == 1 && cLevelLast > cLevel) {
-        total.cSize /= 1+cLevelLast-cLevel;
-        total.cSpeed /= 1+cLevelLast-cLevel;
-        total.dSpeed /= 1+cLevelLast-cLevel;
-        total.ratio /= 1+cLevelLast-cLevel;
-        DISPLAY("avg%11i (%5.3f) %6.2f MB/s %6.1f MB/s  %s\n", (int)total.cSize, total.ratio, total.cSpeed, total.dSpeed, displayName);
-    }
 }


--- a/programs/datagencli.c
+++ b/programs/datagencli.c
@ -39,7 +39,7 @@
 #define MB *(1 <<20)
 #define GB *(1U<<30)

-#define SIZE_DEFAULT (64 KB)
+#define SIZE_DEFAULT ((64 KB) + 1)
 #define SEED_DEFAULT 0
 #define COMPRESSIBILITY_DEFAULT 50

@ -72,15 +72,13 @@ static int usage(const char* programName)

 int main(int argc, const char** argv)
 {
-    int argNb;
    double proba = (double)COMPRESSIBILITY_DEFAULT / 100;
    double litProba = 0.0;
    U64 size = SIZE_DEFAULT;
    U32 seed = SEED_DEFAULT;
-    const char* programName;
+    const char* const programName = argv[0];

-    /* Check command line */
-    programName = argv[0];
+    int argNb;
    for(argNb=1; argNb<argc; argNb++) {
        const char* argument = argv[argNb];

--- a/programs/dibio.c
+++ b/programs/dibio.c
@ -202,9 +202,16 @@ int DiB_trainFromFiles(const char* dictFileName, unsigned maxDictSize,

    /* Checks */
    if ((!fileSizes) || (!srcBuffer) || (!dictBuffer)) EXM_THROW(12, "not enough memory for DiB_trainFiles");   /* should not happen */
+    g_displayLevel = params.notificationLevel;
+    if (nbFiles < 5) {
+        DISPLAYLEVEL(2, "!  Warning : nb of samples too low for proper processing \n");
+        DISPLAYLEVEL(2, "!  Please provide one file per sample \n");
+        DISPLAYLEVEL(2, "!  Avoid concatenating multiple samples into a single file \n");
+        DISPLAYLEVEL(2, "!  otherwise, dictBuilder will be unable to find the beginning of each sample \n");
+        DISPLAYLEVEL(2, "!  resulting in distorted statistics \n");
+    }

    /* init */
-    g_displayLevel = params.notificationLevel;
    if (benchedSize < totalSizeToLoad)
        DISPLAYLEVEL(1, "Not enough memory; training on %u MB only...\n", (unsigned)(benchedSize >> 20));

--- a/programs/fileio.c
+++ b/programs/fileio.c
@ -180,7 +180,7 @@ static FILE* FIO_openSrcFile(const char* srcFileName)
    return f;
 }

-
+/* `dstFileName must` be non-NULL */
 static FILE* FIO_openDstFile(const char* dstFileName)
 {
    FILE* f;
@ -636,13 +636,12 @@ unsigned long long FIO_decompressFrame(dRess_t ress,
        DISPLAYUPDATE(2, "\rDecoded : %u MB...     ", (U32)(frameSize>>20) );

        if (toRead == 0) break;   /* end of frame */
-        if (readSize) EXM_THROW(38, "Decoding error : should consume entire input");
+        if (readSize) EXM_THROW(37, "Decoding error : should consume entire input");

        /* Fill input buffer */
-        if (toRead > ress.srcBufferSize) EXM_THROW(34, "too large block");
+        if (toRead > ress.srcBufferSize) EXM_THROW(38, "too large block");
        readSize = fread(ress.srcBuffer, 1, toRead, finput);
-        if (readSize != toRead)
-            EXM_THROW(35, "Read error");
+        if (readSize == 0) EXM_THROW(39, "Read error : premature end");
    }

    FIO_fwriteSparseEnd(foutput, storedSkips);
@ -683,6 +682,7 @@ static int FIO_decompressSrcFile(dRess_t ress, const char* srcFileName)
    unsigned long long filesize = 0;
    FILE* const dstFile = ress.dstFile;
    FILE* srcFile;
+    unsigned readSomething = 0;

    if (UTIL_isDirectory(srcFileName)) {
        DISPLAYLEVEL(1, "zstd: %s is a directory -- ignored \n", srcFileName);
@ -696,8 +696,12 @@ static int FIO_decompressSrcFile(dRess_t ress, const char* srcFileName)
        /* check magic number -> version */
        size_t const toRead = 4;
        size_t const sizeCheck = fread(ress.srcBuffer, (size_t)1, toRead, srcFile);
-        if (sizeCheck==0) break;   /* no more input */
-        if (sizeCheck != toRead) EXM_THROW(31, "zstd: %s read error : cannot read header", srcFileName);
+        if (sizeCheck==0) {
+            if (readSomething==0) { DISPLAY("zstd: %s: unexpected end of file \n", srcFileName); fclose(srcFile); return 1; }  /* srcFileName is empty */
+            break;   /* no more input */
+        }
+        readSomething = 1;
+        if (sizeCheck != toRead) { DISPLAY("zstd: %s: unknown header \n", srcFileName); fclose(srcFile); return 1; }  /* srcFileName is empty */
        {   U32 const magic = MEM_readLE32(ress.srcBuffer);
 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
            if (ZSTD_isLegacy(ress.srcBuffer, 4)) {
@ -705,8 +709,8 @@ static int FIO_decompressSrcFile(dRess_t ress, const char* srcFileName)
                continue;
            }
 #endif
-            if (((magic & 0xFFFFFFF0U) != ZSTD_MAGIC_SKIPPABLE_START) && (magic != ZSTD_MAGICNUMBER)) {
-                if (g_overwrite) {  /* -df : pass-through mode */
+            if (((magic & 0xFFFFFFF0U) != ZSTD_MAGIC_SKIPPABLE_START) & (magic != ZSTD_MAGICNUMBER)) {
+                if ((g_overwrite) && !strcmp (srcFileName, stdinmark)) {  /* pass-through mode */
                    unsigned const result = FIO_passThrough(dstFile, srcFile, ress.srcBuffer, ress.srcBufferSize);
                    if (fclose(srcFile)) EXM_THROW(32, "zstd: %s close error", srcFileName);  /* error should never happen */
                    return result;
@ -744,7 +748,7 @@ static int FIO_decompressDstFile(dRess_t ress,
    result = FIO_decompressSrcFile(ress, srcFileName);

    if (fclose(ress.dstFile)) EXM_THROW(38, "Write error : cannot properly close %s", dstFileName);
-    if (result != 0) if (remove(dstFileName)) EXM_THROW(39, "remove %s error : %s", dstFileName, strerror(errno));
+    if (result != 0) if (remove(dstFileName)) result=1;   /* don't do anything if remove fails */
    return result;
 }

--- a/programs/fileio.h
+++ b/programs/fileio.h
@ -31,7 +31,6 @@ extern "C" {
 /* *************************************
 *  Special i/o constants
 **************************************/
-#define nullString "null"
 #define stdinmark "stdin"
 #define stdoutmark "stdout"
 #ifdef _WIN32
--- a/programs/fullbench.c
+++ b/programs/fullbench.c
@ -31,8 +31,9 @@
 #include <time.h>        /* clock_t, clock, CLOCKS_PER_SEC */

 #include "mem.h"
+#include "zstd_internal.h"   /* ZSTD_blockHeaderSize, blockType_e, KB, MB */
 #define ZSTD_STATIC_LINKING_ONLY  /* ZSTD_compressBegin, ZSTD_compressContinue, etc. */
-#include "zstd.h"        /* ZSTD_VERSION_STRING */
+#include "zstd.h"            /* ZSTD_VERSION_STRING */
 #define FSE_STATIC_LINKING_ONLY   /* FSE_DTABLE_SIZE_U32 */
 #include "fse.h"
 #include "zbuff.h"
@ -46,10 +47,6 @@
 #define AUTHOR "Yann Collet"
 #define WELCOME_MESSAGE "*** %s %s %i-bits, by %s (%s) ***\n", PROGRAM_DESCRIPTION, ZSTD_VERSION_STRING, (int)(sizeof(void*)*8), AUTHOR, __DATE__

-
-#define KB *(1<<10)
-#define MB *(1<<20)
-
 #define NBLOOPS    6
 #define TIMELOOP_S 2

@ -110,9 +107,8 @@ static size_t BMK_findMaxMem(U64 requiredMem)
 /*_*******************************************************
 *  Benchmark wrappers
 *********************************************************/
-typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t;
 typedef struct {
-    blockType_t blockType;
+    blockType_e blockType;
    U32 unusedBits;
    U32 origSize;
 } blockProperties_t;
@ -177,12 +173,9 @@ static size_t local_ZBUFF_decompress(void* dst, size_t dstCapacity, void* buff2,
 static ZSTD_CCtx* g_zcc = NULL;
 size_t local_ZSTD_compressContinue(void* dst, size_t dstCapacity, void* buff2, const void* src, size_t srcSize)
 {
-    size_t compressedSize;
    (void)buff2;
    ZSTD_compressBegin(g_zcc, 1);
-    compressedSize = ZSTD_compressContinue(g_zcc, dst, dstCapacity, src, srcSize);
-    compressedSize += ZSTD_compressEnd(g_zcc, ((char*)dst)+compressedSize, dstCapacity-compressedSize);
-    return compressedSize;
+    return ZSTD_compressEnd(g_zcc, dst, dstCapacity, src, srcSize);
 }

 size_t local_ZSTD_decompressContinue(void* dst, size_t dstCapacity, void* buff2, const void* src, size_t srcSize)
@ -214,8 +207,8 @@ size_t local_ZSTD_decompressContinue(void* dst, size_t dstCapacity, void* buff2,
 static size_t benchMem(const void* src, size_t srcSize, U32 benchNb)
 {
    BYTE*  dstBuff;
-    size_t dstBuffSize;
-    BYTE*  buff2;
+    size_t const dstBuffSize = ZSTD_compressBound(srcSize);
+    void*  buff2;
    const char* benchName;
    size_t (*benchFunction)(void* dst, size_t dstSize, void* verifBuff, const void* src, size_t srcSize);
    double bestTime = 100000000.;
@ -252,9 +245,8 @@ static size_t benchMem(const void* src, size_t srcSize, U32 benchNb)
    }

    /* Allocation */
-    dstBuffSize = ZSTD_compressBound(srcSize);
    dstBuff = (BYTE*)malloc(dstBuffSize);
-    buff2 = (BYTE*)malloc(dstBuffSize);
+    buff2 = malloc(dstBuffSize);
    if ((!dstBuff) || (!buff2)) {
        DISPLAY("\nError: not enough memory!\n");
        free(dstBuff); free(buff2);
@ -287,7 +279,7 @@ static size_t benchMem(const void* src, size_t srcSize, U32 benchNb)
                DISPLAY("ZSTD_decodeLiteralsBlock : impossible to test on this sample (not compressible)\n");
                goto _cleanOut;
            }
-            skippedSize = frameHeaderSize + 3 /* ZSTD_blockHeaderSize */;
+            skippedSize = frameHeaderSize + ZSTD_blockHeaderSize;
            memcpy(buff2, dstBuff+skippedSize, g_cSize-skippedSize);
            srcSize = srcSize > 128 KB ? 128 KB : srcSize;    /* speed relative to block */
            break;
@ -309,9 +301,9 @@ static size_t benchMem(const void* src, size_t srcSize, U32 benchNb)
                DISPLAY("ZSTD_decodeSeqHeaders : impossible to test on this sample (not compressible)\n");
                goto _cleanOut;
            }
-            iend = ip + 3 /* ZSTD_blockHeaderSize */ + cBlockSize;   /* End of first block */
-            ip += 3 /* ZSTD_blockHeaderSize */;                     /* skip block header */
-            ip += ZSTD_decodeLiteralsBlock(g_zdc, ip, iend-ip);  /* skip literal segment */
+            iend = ip + ZSTD_blockHeaderSize + cBlockSize;   /* End of first block */
+            ip += ZSTD_blockHeaderSize;                      /* skip block header */
+            ip += ZSTD_decodeLiteralsBlock(g_zdc, ip, iend-ip);   /* skip literal segment */
            g_cSize = iend-ip;
            memcpy(buff2, ip, g_cSize);   /* copy rest of block (it starts by SeqHeader) */
            srcSize = srcSize > 128 KB ? 128 KB : srcSize;   /* speed relative to block */
--- a/programs/fuzzer.c
+++ b/programs/fuzzer.c
@ -35,19 +35,19 @@
 /*-************************************
 *  Includes
 **************************************/
-#include <stdlib.h>      /* free */
-#include <stdio.h>       /* fgets, sscanf */
-#include <sys/timeb.h>   /* timeb */
-#include <string.h>      /* strcmp */
-#include <time.h>        /* clock_t */
+#include <stdlib.h>       /* free */
+#include <stdio.h>        /* fgets, sscanf */
+#include <sys/timeb.h>    /* timeb */
+#include <string.h>       /* strcmp */
+#include <time.h>         /* clock_t */
 #define ZSTD_STATIC_LINKING_ONLY   /* ZSTD_compressContinue, ZSTD_compressBlock */
-#include "zstd.h"        /* ZSTD_VERSION_STRING */
+#include "zstd.h"         /* ZSTD_VERSION_STRING */
 #include "error_public.h" /* ZSTD_getErrorCode */
-#include "zdict.h"       /* ZDICT_trainFromBuffer */
-#include "datagen.h"     /* RDG_genBuffer */
+#include "zdict.h"        /* ZDICT_trainFromBuffer */
+#include "datagen.h"      /* RDG_genBuffer */
 #include "mem.h"
 #define XXH_STATIC_LINKING_ONLY
-#include "xxhash.h"      /* XXH64 */
+#include "xxhash.h"       /* XXH64 */


 /*-************************************
@ -145,8 +145,8 @@ static int basicUnitTests(U32 seed, double compressibility)
    DISPLAYLEVEL(4, "OK \n");

    DISPLAYLEVEL(4, "test%3i : decompress %u bytes : ", testNb++, (U32)CNBuffSize);
-    CHECKPLUS( r , ZSTD_decompress(decodedBuffer, CNBuffSize, compressedBuffer, cSize),
-               if (r != CNBuffSize) goto _output_error);
+    { size_t const r = ZSTD_decompress(decodedBuffer, CNBuffSize, compressedBuffer, cSize);
+      if (r != CNBuffSize) goto _output_error; }
    DISPLAYLEVEL(4, "OK \n");

    DISPLAYLEVEL(4, "test%3i : check decompressed result : ", testNb++);
@ -186,11 +186,9 @@ static int basicUnitTests(U32 seed, double compressibility)

        DISPLAYLEVEL(4, "test%3i : compress with flat dictionary : ", testNb++);
        cSize = 0;
-        CHECKPLUS(r, ZSTD_compressContinue(ctxOrig, compressedBuffer, ZSTD_compressBound(CNBuffSize),
+        CHECKPLUS(r, ZSTD_compressEnd(ctxOrig, compressedBuffer, ZSTD_compressBound(CNBuffSize),
                                           (const char*)CNBuffer + dictSize, CNBuffSize - dictSize),
                  cSize += r);
-        CHECKPLUS(r, ZSTD_compressEnd(ctxOrig, (char*)compressedBuffer+cSize, ZSTD_compressBound(CNBuffSize)-cSize),
-                  cSize += r);
        DISPLAYLEVEL(4, "OK (%u bytes : %.2f%%)\n", (U32)cSize, (double)cSize/CNBuffSize*100);

        DISPLAYLEVEL(4, "test%3i : frame built with flat dictionary should be decompressible : ", testNb++);
@ -204,11 +202,9 @@ static int basicUnitTests(U32 seed, double compressibility)
        DISPLAYLEVEL(4, "test%3i : compress with duplicated context : ", testNb++);
        {   size_t const cSizeOrig = cSize;
            cSize = 0;
-            CHECKPLUS(r, ZSTD_compressContinue(ctxDuplicated, compressedBuffer, ZSTD_compressBound(CNBuffSize),
+            CHECKPLUS(r, ZSTD_compressEnd(ctxDuplicated, compressedBuffer, ZSTD_compressBound(CNBuffSize),
                                               (const char*)CNBuffer + dictSize, CNBuffSize - dictSize),
                      cSize += r);
-            CHECKPLUS(r, ZSTD_compressEnd(ctxDuplicated, (char*)compressedBuffer+cSize, ZSTD_compressBound(CNBuffSize)-cSize),
-                      cSize += r);
            if (cSize != cSizeOrig) goto _output_error;   /* should be identical ==> same size */
        }
        DISPLAYLEVEL(4, "OK (%u bytes : %.2f%%)\n", (U32)cSize, (double)cSize/CNBuffSize*100);
@ -696,7 +692,7 @@ static int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, U32 const maxD
                totalTestSize += segmentSize;
        }   }

-        {   size_t const flushResult = ZSTD_compressEnd(ctx, cBuffer+cSize, cBufferSize-cSize);
+        {   size_t const flushResult = ZSTD_compressEnd(ctx, cBuffer+cSize, cBufferSize-cSize, NULL, 0);
            CHECK (ZSTD_isError(flushResult), "multi-segments epilogue error : %s", ZSTD_getErrorName(flushResult));
            cSize += flushResult;
        }
--- a/programs/legacy/fileio_legacy.c
+++ b/programs/legacy/fileio_legacy.c
@ -548,6 +548,81 @@ unsigned long long FIOv06_decompressFrame(dRessv06_t ress,
 }


+/*=====    v0.7.x    =====*/
+
+typedef struct {
+    void*  srcBuffer;
+    size_t srcBufferSize;
+    void*  dstBuffer;
+    size_t dstBufferSize;
+    const void*  dictBuffer;
+    size_t dictBufferSize;
+    ZBUFFv07_DCtx* dctx;
+} dRessv07_t;
+
+static dRessv07_t FIOv07_createDResources(void)
+{
+    dRessv07_t ress;
+
+    /* init */
+    ress.dctx = ZBUFFv07_createDCtx();
+    if (ress.dctx==NULL) EXM_THROW(60, "Can't create ZBUFF decompression context");
+    ress.dictBuffer = NULL; ress.dictBufferSize=0;
+
+    /* Allocate Memory */
+    ress.srcBufferSize = ZBUFFv07_recommendedDInSize();
+    ress.srcBuffer = malloc(ress.srcBufferSize);
+    ress.dstBufferSize = ZBUFFv07_recommendedDOutSize();
+    ress.dstBuffer = malloc(ress.dstBufferSize);
+    if (!ress.srcBuffer || !ress.dstBuffer) EXM_THROW(61, "Allocation error : not enough memory");
+
+    return ress;
+}
+
+static void FIOv07_freeDResources(dRessv07_t ress)
+{
+    size_t const errorCode = ZBUFFv07_freeDCtx(ress.dctx);
+    if (ZBUFFv07_isError(errorCode)) EXM_THROW(69, "Error : can't free ZBUFF context resource : %s", ZBUFFv07_getErrorName(errorCode));
+    free(ress.srcBuffer);
+    free(ress.dstBuffer);
+}
+
+
+unsigned long long FIOv07_decompressFrame(dRessv07_t ress,
+                                          FILE* foutput, FILE* finput)
+{
+    U64    frameSize = 0;
+    size_t readSize  = 4;
+
+    MEM_writeLE32(ress.srcBuffer, ZSTDv07_MAGICNUMBER);
+    ZBUFFv07_decompressInitDictionary(ress.dctx, ress.dictBuffer, ress.dictBufferSize);
+
+    while (1) {
+        /* Decode */
+        size_t inSize=readSize, decodedSize=ress.dstBufferSize;
+        size_t toRead = ZBUFFv07_decompressContinue(ress.dctx, ress.dstBuffer, &decodedSize, ress.srcBuffer, &inSize);
+        if (ZBUFFv07_isError(toRead)) EXM_THROW(36, "Decoding error : %s", ZBUFFv07_getErrorName(toRead));
+        readSize -= inSize;
+
+        /* Write block */
+        { size_t const sizeCheck = fwrite(ress.dstBuffer, 1, decodedSize, foutput);
+          if (sizeCheck != decodedSize) EXM_THROW(37, "Write error : unable to write data block to destination file"); }
+        frameSize += decodedSize;
+        DISPLAYUPDATE(2, "\rDecoded : %u MB...     ", (U32)(frameSize>>20) );
+
+        if (toRead == 0) break;
+        if (readSize) EXM_THROW(38, "Decoding error : should consume entire input");
+
+        /* Fill input buffer */
+        if (toRead > ress.srcBufferSize) EXM_THROW(34, "too large block");
+        readSize = fread(ress.srcBuffer, 1, toRead, finput);
+        if (readSize != toRead) EXM_THROW(35, "Read error");
+    }
+
+    return frameSize;
+}
+
+
 /*=====   General legacy dispatcher   =====*/

 unsigned long long FIO_decompressLegacyFrame(FILE* foutput, FILE* finput,
@ -584,6 +659,14 @@ unsigned long long FIO_decompressLegacyFrame(FILE* foutput, FILE* finput,
                    FIOv06_freeDResources(r);
                    return s;
            }   }
+        case ZSTDv07_MAGICNUMBER :
+            {   dRessv07_t r = FIOv07_createDResources();
+                r.dictBuffer = dictBuffer;
+                r.dictBufferSize = dictSize;
+                {   unsigned long long const s = FIOv07_decompressFrame(r, foutput, finput);
+                    FIOv07_freeDResources(r);
+                    return s;
+            }   }
        default :
            return ERROR(prefix_unknown);
    }
--- a/programs/paramgrill.c
+++ b/programs/paramgrill.c
@ -340,7 +340,7 @@ typedef struct {

 static void BMK_printWinners2(FILE* f, const winnerInfo_t* winners, size_t srcSize)
 {
-    unsigned cLevel;
+    int cLevel;

    fprintf(f, "\n /* Proposed configurations : */ \n");
    fprintf(f, "    /* W,  C,  H,  S,  L,  T, strat */ \n");
@ -364,7 +364,7 @@ static int BMK_seed(winnerInfo_t* winners, const ZSTD_compressionParameters para
 {
    BMK_result_t testResult;
    int better = 0;
-    unsigned cLevel;
+    int cLevel;

    BMK_benchParam(&testResult, srcBuffer, srcSize, ctx, params);

@ -618,9 +618,9 @@ static void BMK_benchMem(void* srcBuffer, size_t srcSize)
    }

    /* establish speed objectives (relative to level 1) */
-    {   unsigned u;
-        for (u=2; u<=ZSTD_maxCLevel(); u++)
-            g_cSpeedTarget[u] = (g_cSpeedTarget[u-1] * 25) / 32;
+    {   int i;
+        for (i=2; i<=ZSTD_maxCLevel(); i++)
+            g_cSpeedTarget[i] = (g_cSpeedTarget[i-1] * 25) / 32;
    }

    /* populate initial solution */
--- a/programs/playTests.sh
+++ b/programs/playTests.sh
@ -16,7 +16,7 @@ roundTripTest() {
    rm -f tmp1 tmp2
    $ECHO "roundTripTest: ./datagen $1 $p | $ZSTD -v$c | $ZSTD -d"
    ./datagen $1 $p | $MD5SUM > tmp1
-    ./datagen $1 $p | $ZSTD -vq$c | $ZSTD -d  | $MD5SUM > tmp2
+    ./datagen $1 $p | $ZSTD -v$c | $ZSTD -d  | $MD5SUM > tmp2
    diff -q tmp1 tmp2
 }

@ -96,6 +96,13 @@ cat hello.zstd world.zstd > helloworld.zstd
 $ZSTD -dc helloworld.zstd > result.tmp
 cat result.tmp
 sdiff helloworld.tmp result.tmp
+$ECHO "frame concatenation without checksum"
+$ZSTD -c hello.tmp > hello.zstd --no-check
+$ZSTD -c world.tmp > world.zstd --no-check
+cat hello.zstd world.zstd > helloworld.zstd
+$ZSTD -dc helloworld.zstd > result.tmp
+cat result.tmp
+sdiff helloworld.tmp result.tmp
 rm ./*.tmp ./*.zstd
 $ECHO "frame concatenation tests completed"

@ -142,8 +149,8 @@ $ECHO "\n**** multiple files tests **** "
 ./datagen -s1        > tmp1 2> $INTOVOID
 ./datagen -s2 -g100K > tmp2 2> $INTOVOID
 ./datagen -s3 -g1M   > tmp3 2> $INTOVOID
-$ZSTD -f tmp*
 $ECHO "compress tmp* : "
+$ZSTD -f tmp*
 ls -ls tmp*
 rm tmp1 tmp2 tmp3
 $ECHO "decompress tmp* : "
@ -204,8 +211,16 @@ $ZSTD -t tmp1.zst
 $ZSTD --test tmp1.zst
 $ECHO "test multiple files (*.zst) "
 $ZSTD -t *.zst
-$ECHO "test good and bad files (*) "
+$ECHO "test bad files (*) "
 $ZSTD -t * && die "bad files not detected !"
+$ZSTD -t tmp1 && die "bad file not detected !"
+cp tmp1 tmp2.zst
+$ZSTD -t tmp2.zst && die "bad file not detected !"
+./datagen -g0 > tmp3
+$ZSTD -t tmp3 && die "bad file not detected !"   # detects 0-sized files as bad
+$ECHO "test --rm and --test combined "
+$ZSTD -t --rm tmp1.zst
+ls -ls tmp1.zst  # check file is still present


 $ECHO "\n**** zstd round-trip tests **** "
--- a/programs/util.h
+++ b/programs/util.h
@ -284,6 +284,7 @@ UTIL_STATIC int UTIL_prepareFileList(const char *dirName, char** bufStart, size_
        return 0;
    }

+    errno = 0;
    while ((entry = readdir(dir)) != NULL) {
        if (strcmp (entry->d_name, "..") == 0 ||
            strcmp (entry->d_name, ".") == 0) continue;
@ -310,8 +311,14 @@ UTIL_STATIC int UTIL_prepareFileList(const char *dirName, char** bufStart, size_
            }
         //   printf ("%s/%s nbFiles=%d left=%d\n", dirName, entry->d_name, nbFiles, (int)(bufEnd - *bufStart));
        }
+        errno = 0; // clear errno after UTIL_isDirectory, UTIL_prepareFileList
    }

+    if (errno != 0) {
+        fprintf(stderr, "readdir(%s) error: %s\n", dirName, strerror(errno));
+        free(*bufStart);
+        *bufStart = NULL;
+    }
    closedir(dir);
    return nbFiles;
 }
--- a/programs/zbufftest.c
+++ b/programs/zbufftest.c
@ -424,23 +424,22 @@ static int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compres
                U32 const enoughDstSize = dstBuffSize >= remainingToFlush;
                remainingToFlush = ZBUFF_compressEnd(zc, cBuffer+cSize, &dstBuffSize);
                CHECK (ZBUFF_isError(remainingToFlush), "flush error : %s", ZBUFF_getErrorName(remainingToFlush));
-                //DISPLAY("flush %u bytes : still within context : %i \n", (U32)dstBuffSize, (int)remainingToFlush);
-                CHECK (enoughDstSize && remainingToFlush, "ZBUFF_compressEnd() not fully flushed, but enough space available");
+                CHECK (enoughDstSize && remainingToFlush, "ZBUFF_compressEnd() not fully flushed (%u remaining), but enough space available", (U32)remainingToFlush);
                cSize += dstBuffSize;
        }   }
        crcOrig = XXH64_digest(&xxhState);

        /* multi - fragments decompression test */
        ZBUFF_decompressInitDictionary(zd, dict, dictSize);
-        for (totalCSize = 0, totalGenSize = 0 ; totalCSize < cSize ; ) {
+        errorCode = 1;
+        for (totalCSize = 0, totalGenSize = 0 ; errorCode ; ) {
            size_t readCSrcSize = FUZ_randomLength(&lseed, maxSampleLog);
            size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog);
            size_t dstBuffSize = MIN(dstBufferSize - totalGenSize, randomDstSize);
-            size_t const decompressError = ZBUFF_decompressContinue(zd, dstBuffer+totalGenSize, &dstBuffSize, cBuffer+totalCSize, &readCSrcSize);
-            CHECK (ZBUFF_isError(decompressError), "decompression error : %s", ZBUFF_getErrorName(decompressError));
+            errorCode = ZBUFF_decompressContinue(zd, dstBuffer+totalGenSize, &dstBuffSize, cBuffer+totalCSize, &readCSrcSize);
+            CHECK (ZBUFF_isError(errorCode), "decompression error : %s", ZBUFF_getErrorName(errorCode));
            totalGenSize += dstBuffSize;
            totalCSize += readCSrcSize;
-            errorCode = decompressError;   /* needed for != 0 last test */
        }
        CHECK (errorCode != 0, "frame not fully decoded");
        CHECK (totalGenSize != totalTestSize, "decompressed data : wrong size")
--- a/programs/zstd.1
+++ b/programs/zstd.1
@ -43,7 +43,7 @@ It also features a very fast decoder, with speed > 500 MB/s per core.
 .SH OPTIONS
 .TP
 .B \-#
- # compression level [1-22] (default:1)
+ # compression level [1-22] (default:3)
 .TP
 .BR \-d ", " --decompress
 decompression
@ -80,7 +80,8 @@ It also features a very fast decoder, with speed > 500 MB/s per core.
 verbose mode
 .TP
 .BR \-q ", " --quiet
- suppress warnings and notifications; specify twice to suppress errors too
+ suppress warnings, interactivity and notifications.
+ specify twice to suppress errors too.
 .TP
 .BR \-C ", " --check
 add integrity check computed from uncompressed data
--- a/programs/zstdcli.c
+++ b/programs/zstdcli.c
@ -90,7 +90,7 @@

 static const char*    g_defaultDictName = "dictionary";
 static const unsigned g_defaultMaxDictSize = 110 KB;
-static const unsigned g_defaultDictCLevel = 5;
+static const int      g_defaultDictCLevel = 5;
 static const unsigned g_defaultSelectivityLevel = 9;


@ -115,7 +115,7 @@ static int usage(const char* programName)
    DISPLAY( "          with no FILE, or when FILE is - , read standard input\n");
    DISPLAY( "Arguments :\n");
 #ifndef ZSTD_NOCOMPRESS
-    DISPLAY( " -#     : # compression level (1-%u, default:1) \n", ZSTD_maxCLevel());
+    DISPLAY( " -#     : # compression level (1-%u, default:%u) \n", ZSTD_maxCLevel(), ZSTDCLI_CLEVEL_DEFAULT);
 #endif
 #ifndef ZSTD_NODECOMPRESS
    DISPLAY( " -d     : decompression \n");
@ -206,6 +206,7 @@ int main(int argCount, const char** argv)
    int argNb,
        bench=0,
        decode=0,
+        testmode=0,
        forceStdout=0,
        main_pause=0,
        nextEntryIsDictionary=0,
@ -215,8 +216,8 @@ int main(int argCount, const char** argv)
        nextArgumentIsMaxDict=0,
        nextArgumentIsDictID=0,
        nextArgumentIsFile=0;
-    unsigned cLevel = ZSTDCLI_CLEVEL_DEFAULT;
-    unsigned cLevelLast = 1;
+    int cLevel = ZSTDCLI_CLEVEL_DEFAULT;
+    int cLevelLast = 1;
    unsigned recursive = 0;
    const char** filenameTable = (const char**)malloc(argCount * sizeof(const char*));   /* argCount >= 1 */
    unsigned filenameIdx = 0;
@ -226,7 +227,7 @@ int main(int argCount, const char** argv)
    char* dynNameSpace = NULL;
    unsigned maxDictSize = g_defaultMaxDictSize;
    unsigned dictID = 0;
-    unsigned dictCLevel = g_defaultDictCLevel;
+    int dictCLevel = g_defaultDictCLevel;
    unsigned dictSelect = g_defaultSelectivityLevel;
 #ifdef UTIL_HAS_CREATEFILELIST
    const char** fileNamesTable = NULL;
@ -273,7 +274,7 @@ int main(int argCount, const char** argv)
            if (!strcmp(argument, "--no-dictID")) { FIO_setDictIDFlag(0); continue; }
            if (!strcmp(argument, "--sparse")) { FIO_setSparseWrite(2); continue; }
            if (!strcmp(argument, "--no-sparse")) { FIO_setSparseWrite(0); continue; }
-            if (!strcmp(argument, "--test")) { decode=1; outFileName=nulmark; FIO_overwriteMode(); continue; }
+            if (!strcmp(argument, "--test")) { testmode=1; decode=1; continue; }
            if (!strcmp(argument, "--train")) { dictBuild=1; outFileName=g_defaultDictName; continue; }
            if (!strcmp(argument, "--maxdict")) { nextArgumentIsMaxDict=1; continue; }
            if (!strcmp(argument, "--dictID")) { nextArgumentIsDictID=1; continue; }
@ -337,7 +338,7 @@ int main(int argCount, const char** argv)
                    case 'C': argument++; FIO_setChecksumFlag(2); break;

                        /* test compressed file */
-                    case 't': decode=1; outFileName=nulmark; argument++; break;
+                    case 't': testmode=1; decode=1; argument++; break;

                        /* destination file name */
                    case 'o': nextArgumentIsOutFileName=1; argument++; break;
@ -441,7 +442,7 @@ int main(int argCount, const char** argv)
        fileNamesTable = UTIL_createFileList(filenameTable, filenameIdx, &fileNamesBuf, &fileNamesNb);
        if (fileNamesTable) {
            unsigned i;
-            for (i=0; i<fileNamesNb; i++) DISPLAYLEVEL(3, "%d %s\n", i, fileNamesTable[i]);
+            for (i=0; i<fileNamesNb; i++) DISPLAYLEVEL(4, "%d %s\n", i, fileNamesTable[i]);
            free((void*)filenameTable);
            filenameTable = fileNamesTable;
            filenameIdx = fileNamesNb;
@ -474,7 +475,7 @@ int main(int argCount, const char** argv)

    /* No input filename ==> use stdin and stdout */
    filenameIdx += !filenameIdx;   /*< default input is stdin */
-    if (!strcmp(filenameTable[0], stdinmark) && !outFileName ) outFileName = stdoutmark;   /*< when input is stdin, default output is stdout */
+    if (!strcmp(filenameTable[0], stdinmark) && !outFileName) outFileName = stdoutmark;   /*< when input is stdin, default output is stdout */

    /* Check if input/output defined as console; trigger an error in this case */
    if (!strcmp(filenameTable[0], stdinmark) && IS_CONSOLE(stdin) ) CLEAN_RETURN(badusage(programName));
@ -489,7 +490,7 @@ int main(int argCount, const char** argv)

    /* No warning message in pipe mode (stdin + stdout) or multiple mode */
    if (!strcmp(filenameTable[0], stdinmark) && outFileName && !strcmp(outFileName,stdoutmark) && (displayLevel==2)) displayLevel=1;
-    if ((filenameIdx>1) && (displayLevel==2)) displayLevel=1;
+    if ((filenameIdx>1) & (displayLevel==2)) displayLevel=1;

    /* IO Stream/File */
    FIO_setNotificationLevel(displayLevel);
@ -503,6 +504,7 @@ int main(int argCount, const char** argv)
 #endif
    {  /* decompression */
 #ifndef ZSTD_NODECOMPRESS
+        if (testmode) { outFileName=nulmark; FIO_setRemoveSrcFile(0); } /* test mode */
        if (filenameIdx==1 && outFileName)
            operationResult = FIO_decompressFilename(outFileName, filenameTable[0], dictFileName);
        else
--- a/projects/VS2008/fullbench/fullbench.vcproj
+++ b/projects/VS2008/fullbench/fullbench.vcproj
@ -44,7 +44,7 @@
 			<Tool
 				Name="VCCLCompilerTool"
 				Optimization="0"
-				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs\legacy"
+				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs\legacy"
 				PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
 				MinimalRebuild="true"
 				BasicRuntimeChecks="3"
@ -120,7 +120,7 @@
 				Optimization="2"
 				EnableIntrinsicFunctions="true"
 				OmitFramePointers="true"
-				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs\legacy"
+				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs\legacy"
 				PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
 				RuntimeLibrary="0"
 				EnableFunctionLevelLinking="true"
@ -194,7 +194,7 @@
 			<Tool
 				Name="VCCLCompilerTool"
 				Optimization="0"
-				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs\legacy"
+				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs\legacy"
 				PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
 				MinimalRebuild="true"
 				BasicRuntimeChecks="3"
@ -271,7 +271,7 @@
 				Optimization="2"
 				EnableIntrinsicFunctions="true"
 				OmitFramePointers="true"
-				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs\legacy"
+				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs\legacy"
 				PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
 				RuntimeLibrary="0"
 				EnableFunctionLevelLinking="true"
@ -427,7 +427,7 @@
 				>
 			</File>
 			<File
-				RelativePath="..\..\..\lib\common\zstd.h"
+				RelativePath="..\..\..\lib\zstd.h"
 				>
 			</File>
 			<File
--- a/projects/VS2008/fuzzer/fuzzer.vcproj
+++ b/projects/VS2008/fuzzer/fuzzer.vcproj
@ -44,7 +44,7 @@
 			<Tool
 				Name="VCCLCompilerTool"
 				Optimization="0"
-				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs\legacy"
+				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs\legacy"
 				PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
 				MinimalRebuild="true"
 				BasicRuntimeChecks="3"
@ -120,7 +120,7 @@
 				Optimization="2"
 				EnableIntrinsicFunctions="true"
 				OmitFramePointers="true"
-				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs\legacy"
+				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs\legacy"
 				PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
 				RuntimeLibrary="0"
 				EnableFunctionLevelLinking="true"
@ -194,7 +194,7 @@
 			<Tool
 				Name="VCCLCompilerTool"
 				Optimization="0"
-				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs\legacy"
+				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs\legacy"
 				PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
 				MinimalRebuild="true"
 				BasicRuntimeChecks="3"
@ -271,7 +271,7 @@
 				Optimization="2"
 				EnableIntrinsicFunctions="true"
 				OmitFramePointers="true"
-				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs\legacy"
+				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs\legacy"
 				PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
 				RuntimeLibrary="0"
 				EnableFunctionLevelLinking="true"
@ -439,7 +439,7 @@
 				>
 			</File>
 			<File
-				RelativePath="..\..\..\lib\common\zstd.h"
+				RelativePath="..\..\..\lib\zstd.h"
 				>
 			</File>
 			<File
--- a/projects/VS2008/zstd/zstd.vcproj
+++ b/projects/VS2008/zstd/zstd.vcproj
@ -44,7 +44,7 @@
 			<Tool
 				Name="VCCLCompilerTool"
 				Optimization="0"
-				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\dictBuilder"
+				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\dictBuilder"
 				PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
 				MinimalRebuild="true"
 				BasicRuntimeChecks="3"
@ -121,7 +121,7 @@
 				Optimization="2"
 				EnableIntrinsicFunctions="true"
 				OmitFramePointers="true"
-				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\dictBuilder"
+				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\dictBuilder"
 				PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
 				RuntimeLibrary="0"
 				EnableFunctionLevelLinking="true"
@ -196,7 +196,7 @@
 			<Tool
 				Name="VCCLCompilerTool"
 				Optimization="0"
-				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\dictBuilder"
+				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\dictBuilder"
 				PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
 				MinimalRebuild="true"
 				BasicRuntimeChecks="3"
@ -274,7 +274,7 @@
 				Optimization="2"
 				EnableIntrinsicFunctions="true"
 				OmitFramePointers="true"
-				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\dictBuilder"
+				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\dictBuilder"
 				PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
 				RuntimeLibrary="0"
 				EnableFunctionLevelLinking="true"
@ -428,6 +428,10 @@
 				RelativePath="..\..\..\lib\legacy\zstd_v06.c"
 				>
 			</File>
+			<File
+				RelativePath="..\..\..\lib\legacy\zstd_v07.c"
+				>
+			</File>
 			<File
 				RelativePath="..\..\..\programs\zstdcli.c"
 				>
@ -495,7 +499,7 @@
 				>
 			</File>
 			<File
-				RelativePath="..\..\..\lib\common\zstd.h"
+				RelativePath="..\..\..\lib\zstd.h"
 				>
 			</File>
 			<File
@ -538,6 +542,10 @@
 				RelativePath="..\..\..\lib\legacy\zstd_v06.h"
 				>
 			</File>
+			<File
+				RelativePath="..\..\..\lib\legacy\zstd_v07.h"
+				>
+			</File>
 		</Filter>
 	</Files>
 	<Globals>
--- a/projects/VS2008/zstdlib/zstdlib.vcproj
+++ b/projects/VS2008/zstdlib/zstdlib.vcproj
@ -44,7 +44,7 @@
 			<Tool
 				Name="VCCLCompilerTool"
 				Optimization="0"
-				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\dictBuilder"
+				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\dictBuilder"
 				PreprocessorDefinitions="ZSTD_DLL_EXPORT=1;ZSTD_HEAPMODE=0;ZSTD_LEGACY_SUPPORT=0;WIN32;_DEBUG;_CONSOLE"
 				MinimalRebuild="true"
 				BasicRuntimeChecks="3"
@ -120,7 +120,7 @@
 				Optimization="2"
 				EnableIntrinsicFunctions="true"
 				OmitFramePointers="true"
-				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\dictBuilder"
+				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\dictBuilder"
 				PreprocessorDefinitions="ZSTD_DLL_EXPORT=1;ZSTD_HEAPMODE=0;ZSTD_LEGACY_SUPPORT=0;WIN32;NDEBUG;_CONSOLE"
 				RuntimeLibrary="0"
 				EnableFunctionLevelLinking="true"
@ -194,7 +194,7 @@
 			<Tool
 				Name="VCCLCompilerTool"
 				Optimization="0"
-				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\dictBuilder"
+				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\dictBuilder"
 				PreprocessorDefinitions="ZSTD_DLL_EXPORT=1;ZSTD_HEAPMODE=0;ZSTD_LEGACY_SUPPORT=0;WIN32;_DEBUG;_CONSOLE"
 				MinimalRebuild="true"
 				BasicRuntimeChecks="3"
@ -271,7 +271,7 @@
 				Optimization="2"
 				EnableIntrinsicFunctions="true"
 				OmitFramePointers="true"
-				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\dictBuilder"
+				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\dictBuilder"
 				PreprocessorDefinitions="ZSTD_DLL_EXPORT=1;ZSTD_HEAPMODE=0;ZSTD_LEGACY_SUPPORT=0;WIN32;NDEBUG;_CONSOLE"
 				RuntimeLibrary="0"
 				EnableFunctionLevelLinking="true"
@ -443,7 +443,7 @@
 				>
 			</File>
 			<File
-				RelativePath="..\..\..\lib\common\zstd.h"
+				RelativePath="..\..\..\lib\zstd.h"
 				>
 			</File>
 			<File
--- a/projects/VS2010/datagen/datagen.vcxproj.filters
+++ b/projects/VS2010/datagen/datagen.vcxproj.filters
@ -1,26 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
-  <ItemGroup>
-    <Filter Include="Header Files">
-      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
-      <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
-    </Filter>
-    <Filter Include="Source Files">
-      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
-      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
-    </Filter>
-  </ItemGroup>
-  <ItemGroup>
-    <ClCompile Include="..\..\..\programs\datagen.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\programs\datagencli.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-  </ItemGroup>
-  <ItemGroup>
-    <ClInclude Include="..\..\..\programs\datagen.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-  </ItemGroup>
-</Project>
--- a/projects/VS2010/fullbench/fullbench.vcxproj
+++ b/projects/VS2010/fullbench/fullbench.vcxproj
@ -65,24 +65,24 @@
  <PropertyGroup Label="UserMacros" />
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
    <LinkIncremental>true</LinkIncremental>
-    <IncludePath>$(IncludePath);$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(UniversalCRT_IncludePath);</IncludePath>
+    <IncludePath>$(IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(UniversalCRT_IncludePath);</IncludePath>
    <RunCodeAnalysis>false</RunCodeAnalysis>
    <IntDir>$(Platform)\$(Configuration)\</IntDir>
  </PropertyGroup>
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
    <LinkIncremental>true</LinkIncremental>
-    <IncludePath>$(IncludePath);$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(UniversalCRT_IncludePath);</IncludePath>
+    <IncludePath>$(IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(UniversalCRT_IncludePath);</IncludePath>
    <RunCodeAnalysis>false</RunCodeAnalysis>
  </PropertyGroup>
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
    <LinkIncremental>false</LinkIncremental>
-    <IncludePath>$(IncludePath);$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(UniversalCRT_IncludePath);</IncludePath>
+    <IncludePath>$(IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(UniversalCRT_IncludePath);</IncludePath>
    <RunCodeAnalysis>false</RunCodeAnalysis>
    <IntDir>$(Platform)\$(Configuration)\</IntDir>
  </PropertyGroup>
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
    <LinkIncremental>false</LinkIncremental>
-    <IncludePath>$(IncludePath);$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(UniversalCRT_IncludePath);</IncludePath>
+    <IncludePath>$(IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(UniversalCRT_IncludePath);</IncludePath>
    <RunCodeAnalysis>false</RunCodeAnalysis>
  </PropertyGroup>
  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
@ -175,7 +175,7 @@
    <ClInclude Include="..\..\..\lib\common\huf.h" />
    <ClInclude Include="..\..\..\lib\common\xxhash.h" />
    <ClInclude Include="..\..\..\lib\common\zbuff.h" />
-    <ClInclude Include="..\..\..\lib\common\zstd.h" />
+    <ClInclude Include="..\..\..\lib\zstd.h" />
    <ClInclude Include="..\..\..\lib\common\zstd_internal.h" />
    <ClInclude Include="..\..\..\lib\compress\zstd_opt.h" />
    <ClInclude Include="..\..\..\lib\legacy\zstd_legacy.h" />
@ -185,4 +185,4 @@
  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
  <ImportGroup Label="ExtensionTargets">
  </ImportGroup>
-</Project>
+</Project>
--- a/projects/VS2010/fullbench/fullbench.vcxproj.filters
+++ b/projects/VS2010/fullbench/fullbench.vcxproj.filters
@ -1,86 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
-  <ItemGroup>
-    <Filter Include="Header Files">
-      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
-      <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
-    </Filter>
-    <Filter Include="Source Files">
-      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
-      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
-    </Filter>
-  </ItemGroup>
-  <ItemGroup>
-    <ClCompile Include="..\..\..\lib\common\zstd_common.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\compress\fse_compress.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\programs\fullbench.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\programs\datagen.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\compress\huf_compress.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\decompress\huf_decompress.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\compress\zstd_compress.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\decompress\zstd_decompress.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\compress\zbuff_compress.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\decompress\zbuff_decompress.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\common\fse_decompress.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\common\entropy_common.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\common\xxhash.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-  </ItemGroup>
-  <ItemGroup>
-    <ClInclude Include="..\..\..\lib\common\fse.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\common\zstd.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\programs\datagen.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\common\huf.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\legacy\zstd_legacy.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\common\zbuff.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\compress\zstd_opt.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\common\zstd_internal.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\programs\util.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\common\xxhash.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-  </ItemGroup>
-</Project>
--- a/projects/VS2010/fuzzer/fuzzer.vcxproj
+++ b/projects/VS2010/fuzzer/fuzzer.vcxproj
@ -66,24 +66,24 @@
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
    <LinkIncremental>true</LinkIncremental>
    <RunCodeAnalysis>false</RunCodeAnalysis>
-    <IncludePath>$(IncludePath);$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(UniversalCRT_IncludePath);</IncludePath>
+    <IncludePath>$(IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(UniversalCRT_IncludePath);</IncludePath>
    <IntDir>$(Platform)\$(Configuration)\</IntDir>
  </PropertyGroup>
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
    <LinkIncremental>true</LinkIncremental>
    <RunCodeAnalysis>false</RunCodeAnalysis>
-    <IncludePath>$(IncludePath);$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(UniversalCRT_IncludePath);</IncludePath>
+    <IncludePath>$(IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(UniversalCRT_IncludePath);</IncludePath>
  </PropertyGroup>
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
    <LinkIncremental>false</LinkIncremental>
    <RunCodeAnalysis>false</RunCodeAnalysis>
-    <IncludePath>$(IncludePath);$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(UniversalCRT_IncludePath);</IncludePath>
+    <IncludePath>$(IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(UniversalCRT_IncludePath);</IncludePath>
    <IntDir>$(Platform)\$(Configuration)\</IntDir>
  </PropertyGroup>
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
    <LinkIncremental>false</LinkIncremental>
    <RunCodeAnalysis>false</RunCodeAnalysis>
-    <IncludePath>$(IncludePath);$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(UniversalCRT_IncludePath);</IncludePath>
+    <IncludePath>$(IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(UniversalCRT_IncludePath);</IncludePath>
  </PropertyGroup>
  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
    <ClCompile>
@ -176,7 +176,7 @@
    <ClInclude Include="..\..\..\lib\common\xxhash.h" />
    <ClInclude Include="..\..\..\lib\common\zbuff.h" />
    <ClInclude Include="..\..\..\lib\common\zstd_internal.h" />
-    <ClInclude Include="..\..\..\lib\common\zstd.h" />
+    <ClInclude Include="..\..\..\lib\zstd.h" />
    <ClInclude Include="..\..\..\lib\compress\zstd_opt.h" />
    <ClInclude Include="..\..\..\lib\dictBuilder\divsufsort.h" />
    <ClInclude Include="..\..\..\lib\dictBuilder\zdict.h" />
@ -187,4 +187,4 @@
  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
  <ImportGroup Label="ExtensionTargets">
  </ImportGroup>
-</Project>
+</Project>
--- a/projects/VS2010/fuzzer/fuzzer.vcxproj.filters
+++ b/projects/VS2010/fuzzer/fuzzer.vcxproj.filters
@ -1,92 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
-  <ItemGroup>
-    <Filter Include="Header Files">
-      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
-      <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
-    </Filter>
-    <Filter Include="Source Files">
-      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
-      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
-    </Filter>
-  </ItemGroup>
-  <ItemGroup>
-    <ClCompile Include="..\..\..\programs\fuzzer.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\programs\datagen.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\common\zstd_common.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\compress\fse_compress.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\compress\huf_compress.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\decompress\huf_decompress.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\compress\zstd_compress.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\decompress\zstd_decompress.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\common\fse_decompress.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\common\entropy_common.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\common\xxhash.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\dictBuilder\divsufsort.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\dictBuilder\zdict.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-  </ItemGroup>
-  <ItemGroup>
-    <ClInclude Include="..\..\..\programs\datagen.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\legacy\zstd_legacy.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\common\fse.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\common\huf.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\common\zbuff.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\common\zstd_internal.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\common\zstd.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\compress\zstd_opt.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\programs\util.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\common\xxhash.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\dictBuilder\divsufsort.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\dictBuilder\zdict.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-  </ItemGroup>
-</Project>
--- a/projects/VS2010/zstd/zstd.vcxproj
+++ b/projects/VS2010/zstd/zstd.vcxproj
@ -1,4 +1,4 @@
-<?xml version="1.0" encoding="utf-8"?>
+<?xml version="1.0" encoding="utf-8"?>
 <Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  <ItemGroup Label="ProjectConfigurations">
    <ProjectConfiguration Include="Debug|Win32">
@ -38,6 +38,7 @@
    <ClCompile Include="..\..\..\lib\legacy\zstd_v04.c" />
    <ClCompile Include="..\..\..\lib\legacy\zstd_v05.c" />
    <ClCompile Include="..\..\..\lib\legacy\zstd_v06.c" />
+    <ClCompile Include="..\..\..\lib\legacy\zstd_v07.c" />
    <ClCompile Include="..\..\..\programs\bench.c" />
    <ClCompile Include="..\..\..\programs\datagen.c" />
    <ClCompile Include="..\..\..\programs\dibio.c" />
@ -52,7 +53,7 @@
    <ClInclude Include="..\..\..\lib\common\fse.h" />
    <ClInclude Include="..\..\..\lib\common\huf.h" />
    <ClInclude Include="..\..\..\lib\common\zbuff.h" />
-    <ClInclude Include="..\..\..\lib\common\zstd.h" />
+    <ClInclude Include="..\..\..\lib\zstd.h" />
    <ClInclude Include="..\..\..\lib\common\zstd_internal.h" />
    <ClInclude Include="..\..\..\lib\compress\zstd_opt.h" />
    <ClInclude Include="..\..\..\lib\legacy\zstd_legacy.h" />
@ -62,6 +63,7 @@
    <ClInclude Include="..\..\..\lib\legacy\zstd_v04.h" />
    <ClInclude Include="..\..\..\lib\legacy\zstd_v05.h" />
    <ClInclude Include="..\..\..\lib\legacy\zstd_v06.h" />
+    <ClInclude Include="..\..\..\lib\legacy\zstd_v07.h" />
    <ClInclude Include="..\..\..\programs\bench.h" />
    <ClInclude Include="..\..\..\programs\datagen.h" />
    <ClInclude Include="..\..\..\programs\dibio.h" />
@ -116,27 +118,27 @@
  <PropertyGroup Label="UserMacros" />
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
    <LinkIncremental>true</LinkIncremental>
-    <IncludePath>$(IncludePath);$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(UniversalCRT_IncludePath);</IncludePath>
+    <IncludePath>$(IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(UniversalCRT_IncludePath);</IncludePath>
    <RunCodeAnalysis>false</RunCodeAnalysis>
    <LibraryPath>$(LibraryPath)</LibraryPath>
    <IntDir>$(Platform)\$(Configuration)\</IntDir>
  </PropertyGroup>
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
    <LinkIncremental>true</LinkIncremental>
-    <IncludePath>$(IncludePath);$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(UniversalCRT_IncludePath);</IncludePath>
+    <IncludePath>$(IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(UniversalCRT_IncludePath);</IncludePath>
    <RunCodeAnalysis>false</RunCodeAnalysis>
    <LibraryPath>$(LibraryPath);</LibraryPath>
  </PropertyGroup>
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
    <LinkIncremental>false</LinkIncremental>
-    <IncludePath>$(IncludePath);$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(UniversalCRT_IncludePath);</IncludePath>
+    <IncludePath>$(IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(UniversalCRT_IncludePath);</IncludePath>
    <RunCodeAnalysis>false</RunCodeAnalysis>
    <LibraryPath>$(LibraryPath)</LibraryPath>
    <IntDir>$(Platform)\$(Configuration)\</IntDir>
  </PropertyGroup>
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
    <LinkIncremental>false</LinkIncremental>
-    <IncludePath>$(IncludePath);$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(UniversalCRT_IncludePath);</IncludePath>
+    <IncludePath>$(IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(UniversalCRT_IncludePath);</IncludePath>
    <RunCodeAnalysis>false</RunCodeAnalysis>
    <LibraryPath>$(LibraryPath);</LibraryPath>
  </PropertyGroup>
@ -217,4 +219,4 @@
  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
  <ImportGroup Label="ExtensionTargets">
  </ImportGroup>
-</Project>
+</Project>
--- a/projects/VS2010/zstd/zstd.vcxproj.filters
+++ b/projects/VS2010/zstd/zstd.vcxproj.filters
@ -1,158 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
-  <ItemGroup>
-    <Filter Include="Header Files">
-      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
-      <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
-    </Filter>
-    <Filter Include="Source Files">
-      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
-      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
-    </Filter>
-  </ItemGroup>
-  <ItemGroup>
-    <ClCompile Include="..\..\..\programs\bench.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\programs\fileio.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\programs\zstdcli.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\programs\dibio.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\programs\datagen.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\programs\legacy\fileio_legacy.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\legacy\zstd_v01.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\legacy\zstd_v02.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\legacy\zstd_v03.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\legacy\zstd_v04.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\legacy\zstd_v05.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\legacy\zstd_v06.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\common\zstd_common.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\compress\fse_compress.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\compress\huf_compress.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\compress\zbuff_compress.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\compress\zstd_compress.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\decompress\huf_decompress.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\decompress\zbuff_decompress.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\decompress\zstd_decompress.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\dictBuilder\divsufsort.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\dictBuilder\zdict.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\common\fse_decompress.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\common\entropy_common.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\common\xxhash.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-  </ItemGroup>
-  <ItemGroup>
-    <ClInclude Include="..\..\..\programs\bench.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\programs\fileio.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\programs\datagen.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\programs\legacy\fileio_legacy.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\legacy\zstd_legacy.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\legacy\zstd_v01.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\legacy\zstd_v02.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\legacy\zstd_v03.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\legacy\zstd_v04.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\legacy\zstd_v05.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\legacy\zstd_v06.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\programs\dibio.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\dictBuilder\zdict.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\dictBuilder\divsufsort.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\common\fse.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\common\huf.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\common\zbuff.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\common\zstd.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\common\zstd_internal.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\compress\zstd_opt.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\programs\util.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\common\xxhash.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-  </ItemGroup>
-</Project>
--- a/projects/VS2010/zstdlib/zstdlib.vcxproj
+++ b/projects/VS2010/zstdlib/zstdlib.vcxproj
@ -40,7 +40,7 @@
    <ClInclude Include="..\..\..\lib\common\huf.h" />
    <ClInclude Include="..\..\..\lib\common\xxhash.h" />
    <ClInclude Include="..\..\..\lib\common\zbuff.h" />
-    <ClInclude Include="..\..\..\lib\common\zstd.h" />
+    <ClInclude Include="..\..\..\lib\zstd.h" />
    <ClInclude Include="..\..\..\lib\common\zstd_internal.h" />
    <ClInclude Include="..\..\..\lib\compress\zstd_opt.h" />
    <ClInclude Include="..\..\..\programs\util.h" />
@ -97,28 +97,28 @@
    <LinkIncremental>true</LinkIncremental>
    <TargetName>zstdlib_x86</TargetName>
    <IntDir>$(Platform)\$(Configuration)\</IntDir>
-    <IncludePath>$(IncludePath);$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(UniversalCRT_IncludePath);</IncludePath>
+    <IncludePath>$(IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(UniversalCRT_IncludePath);</IncludePath>
    <RunCodeAnalysis>false</RunCodeAnalysis>
  </PropertyGroup>
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
    <LinkIncremental>true</LinkIncremental>
    <TargetName>zstdlib_x64</TargetName>
    <IntDir>$(Platform)\$(Configuration)\</IntDir>
-    <IncludePath>$(IncludePath);$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(UniversalCRT_IncludePath);</IncludePath>
+    <IncludePath>$(IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(UniversalCRT_IncludePath);</IncludePath>
    <RunCodeAnalysis>false</RunCodeAnalysis>
  </PropertyGroup>
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
    <LinkIncremental>false</LinkIncremental>
    <TargetName>zstdlib_x86</TargetName>
    <IntDir>$(Platform)\$(Configuration)\</IntDir>
-    <IncludePath>$(IncludePath);$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(UniversalCRT_IncludePath);</IncludePath>
+    <IncludePath>$(IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(UniversalCRT_IncludePath);</IncludePath>
    <RunCodeAnalysis>false</RunCodeAnalysis>
  </PropertyGroup>
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
    <LinkIncremental>false</LinkIncremental>
    <TargetName>zstdlib_x64</TargetName>
    <IntDir>$(Platform)\$(Configuration)\</IntDir>
-    <IncludePath>$(IncludePath);$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(UniversalCRT_IncludePath);</IncludePath>
+    <IncludePath>$(IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(UniversalCRT_IncludePath);</IncludePath>
    <RunCodeAnalysis>false</RunCodeAnalysis>
  </PropertyGroup>
  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
@ -208,4 +208,4 @@
  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
  <ImportGroup Label="ExtensionTargets">
  </ImportGroup>
-</Project>
+</Project>
--- a/projects/VS2010/zstdlib/zstdlib.vcxproj.filters
+++ b/projects/VS2010/zstdlib/zstdlib.vcxproj.filters
@ -1,95 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
-  <ItemGroup>
-    <Filter Include="Source Files">
-      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
-      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
-    </Filter>
-    <Filter Include="Header Files">
-      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
-      <Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
-    </Filter>
-  </ItemGroup>
-  <ItemGroup>
-    <ClCompile Include="..\..\..\lib\common\zstd_common.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\compress\fse_compress.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\compress\huf_compress.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\compress\zbuff_compress.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\compress\zstd_compress.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\decompress\huf_decompress.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\decompress\zbuff_decompress.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\decompress\zstd_decompress.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\dictBuilder\divsufsort.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\dictBuilder\zdict.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\common\fse_decompress.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\common\entropy_common.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\common\xxhash.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-  </ItemGroup>
-  <ItemGroup>
-    <ClInclude Include="..\..\..\lib\common\bitstream.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\common\error_private.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\common\error_public.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\common\mem.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\common\fse.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\common\huf.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\common\zbuff.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\common\zstd.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\common\zstd_internal.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\compress\zstd_opt.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\programs\util.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\common\xxhash.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-  </ItemGroup>
-  <ItemGroup>
-    <ResourceCompile Include="zstdlib.rc" />
-  </ItemGroup>
-</Project>
--- a/projects/cmake/lib/CMakeLists.txt
+++ b/projects/cmake/lib/CMakeLists.txt
@ -47,10 +47,10 @@ SET(ROOT_DIR ../../..)

 # Define library directory, where sources and header files are located
 SET(LIBRARY_DIR ${ROOT_DIR}/lib)
-INCLUDE_DIRECTORIES(${LIBRARY_DIR}/common)
+INCLUDE_DIRECTORIES(${LIBRARY_DIR} ${LIBRARY_DIR}/common)

 # Read file content
-FILE(READ ${LIBRARY_DIR}/common/zstd.h HEADER_CONTENT)
+FILE(READ ${LIBRARY_DIR}/zstd.h HEADER_CONTENT)

 # Parse version
 GetLibraryVersion("${HEADER_CONTENT}" LIBVER_MAJOR LIBVER_MINOR LIBVER_RELEASE)
@ -80,7 +80,7 @@ SET(Headers
        ${LIBRARY_DIR}/common/mem.h
        ${LIBRARY_DIR}/common/zbuff.h
        ${LIBRARY_DIR}/common/zstd_internal.h
-        ${LIBRARY_DIR}/common/zstd.h
+        ${LIBRARY_DIR}/zstd.h
        ${LIBRARY_DIR}/dictBuilder/zdict.h)

 IF (ZSTD_LEGACY_SUPPORT)
@ -93,7 +93,8 @@ IF (ZSTD_LEGACY_SUPPORT)
            ${LIBRARY_LEGACY_DIR}/zstd_v03.c
            ${LIBRARY_LEGACY_DIR}/zstd_v04.c
            ${LIBRARY_LEGACY_DIR}/zstd_v05.c
-            ${LIBRARY_LEGACY_DIR}/zstd_v06.c)
+            ${LIBRARY_LEGACY_DIR}/zstd_v06.c
+            ${LIBRARY_LEGACY_DIR}/zstd_v07.c)

    SET(Headers ${Headers}
            ${LIBRARY_LEGACY_DIR}/zstd_legacy.h
@ -102,7 +103,8 @@ IF (ZSTD_LEGACY_SUPPORT)
            ${LIBRARY_LEGACY_DIR}/zstd_v03.h
            ${LIBRARY_LEGACY_DIR}/zstd_v04.h
            ${LIBRARY_LEGACY_DIR}/zstd_v05.h
-            ${LIBRARY_LEGACY_DIR}/zstd_v06.h)
+            ${LIBRARY_LEGACY_DIR}/zstd_v06.h
+            ${LIBRARY_LEGACY_DIR}/zstd_v07.h)
 ENDIF (ZSTD_LEGACY_SUPPORT)

 IF (MSVC)
@ -162,7 +164,7 @@ IF (UNIX)
    SET(INSTALL_INCLUDE_DIR ${PREFIX}/include)

    # install target
-    INSTALL(FILES ${LIBRARY_DIR}/common/zstd.h ${LIBRARY_DIR}/common/zbuff.h ${LIBRARY_DIR}/dictBuilder/zdict.h DESTINATION ${INSTALL_INCLUDE_DIR})
+    INSTALL(FILES ${LIBRARY_DIR}/zstd.h ${LIBRARY_DIR}/common/zbuff.h ${LIBRARY_DIR}/dictBuilder/zdict.h DESTINATION ${INSTALL_INCLUDE_DIR})
    INSTALL(TARGETS libzstd_static DESTINATION ${INSTALL_LIBRARY_DIR})
    INSTALL(TARGETS libzstd_shared LIBRARY DESTINATION ${INSTALL_LIBRARY_DIR})

--- a/projects/cmake/programs/CMakeLists.txt
+++ b/projects/cmake/programs/CMakeLists.txt
@ -40,7 +40,7 @@ SET(ROOT_DIR ../../..)
 # Define programs directory, where sources and header files are located
 SET(LIBRARY_DIR ${ROOT_DIR}/lib)
 SET(PROGRAMS_DIR ${ROOT_DIR}/programs)
-INCLUDE_DIRECTORIES(${PROGRAMS_DIR} ${LIBRARY_DIR}/common ${LIBRARY_DIR}/dictBuilder)
+INCLUDE_DIRECTORIES(${PROGRAMS_DIR} ${LIBRARY_DIR} ${LIBRARY_DIR}/common ${LIBRARY_DIR}/dictBuilder)

 IF (ZSTD_LEGACY_SUPPORT)
    SET(PROGRAMS_LEGACY_DIR ${PROGRAMS_DIR}/legacy)
--- a/tests/.gitignore
+++ b/tests/.gitignore
@ -2,3 +2,7 @@
 zstdtest
 speedTest
 versionsTest
+
+# Local script
+startSpeedTest
+speedTest.pid
--- a/tests/test-zstd-speed.py
+++ b/tests/test-zstd-speed.py
@ -3,27 +3,29 @@
 import argparse
 import os
 import string
+import subprocess
 import time
 import traceback
-import subprocess
-import signal
- 
+

 default_repo_url = 'https://github.com/Cyan4973/zstd.git'
 working_dir_name = 'speedTest'
-working_path = os.getcwd() + '/' + working_dir_name     # /path/to/zstd/tests/speedTest 
-clone_path = working_path + '/' + 'zstd'                # /path/to/zstd/tests/speedTest/zstd 
+working_path = os.getcwd() + '/' + working_dir_name     # /path/to/zstd/tests/speedTest
+clone_path = working_path + '/' + 'zstd'                # /path/to/zstd/tests/speedTest/zstd
 email_header = '[ZSTD_speedTest]'
 pid = str(os.getpid())
+verbose = False


 def log(text):
    print(time.strftime("%Y/%m/%d %H:%M:%S") + ' - ' + text)


-def execute(command, print_output=False, print_error=True, param_shell=True):
-    log("> " + command)
-    popen = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=param_shell, cwd=execute.cwd)
+def execute(command, print_command=True, print_output=False, print_error=True, param_shell=True):
+    if print_command:
+        log("> " + command)
+    popen = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
+                             shell=param_shell, cwd=execute.cwd)
    stdout = popen.communicate()[0]
    stdout_lines = stdout.splitlines()
    if print_output:
@ -38,8 +40,8 @@ execute.cwd = None

 def does_command_exist(command):
    try:
-        execute(command, False, False);
-    except Exception as e:
+        execute(command, verbose, False, False)
+    except Exception:
        return False
    return True

@ -50,33 +52,38 @@ def send_email(emails, topic, text, have_mutt, have_mail):
        myfile.writelines(text)
        myfile.close()
        if have_mutt:
-            execute('mutt -s "' + topic + '" ' + emails + ' < ' + logFileName)
+            execute('mutt -s "' + topic + '" ' + emails + ' < ' + logFileName, verbose)
        elif have_mail:
-            execute('mail -s "' + topic + '" ' + emails + ' < ' + logFileName)
+            execute('mail -s "' + topic + '" ' + emails + ' < ' + logFileName, verbose)
        else:
            log("e-mail cannot be sent (mail or mutt not found)")


-def send_email_with_attachments(branch, commit, last_commit, emails, text, results_files, logFileName, lower_limit, have_mutt, have_mail):
+def send_email_with_attachments(branch, commit, last_commit, args, text, results_files,
+                                logFileName, have_mutt, have_mail):
    with open(logFileName, "w") as myfile:
        myfile.writelines(text)
        myfile.close()
-        email_topic = '%s:%s Warning for %s:%s last_commit=%s speed<%s' % (email_header, pid, branch, commit, last_commit, lower_limit)
+        email_topic = '%s:%s Warning for %s:%s last_commit=%s speed<%s ratio<%s' \
+                      % (email_header, pid, branch, commit, last_commit,
+                         args.lowerLimit, args.ratioLimit)
        if have_mutt:
-            execute('mutt -s "' + email_topic + '" ' + emails + ' -a ' + results_files + ' < ' + logFileName)
+            execute('mutt -s "' + email_topic + '" ' + args.emails + ' -a ' + results_files
+                    + ' < ' + logFileName)
        elif have_mail:
-            execute('mail -s "' + email_topic + '" ' + emails + ' < ' + logFileName)
+            execute('mail -s "' + email_topic + '" ' + args.emails + ' < ' + logFileName)
        else:
            log("e-mail cannot be sent (mail or mutt not found)")


 def git_get_branches():
-    execute('git fetch -p')
-    output = execute('git branch -rl')
-    for line in output:
-        if "HEAD" in line: 
-            output.remove(line)  # remove "origin/HEAD -> origin/dev"
-    return map(lambda l: l.strip(), output)
+    execute('git fetch -p', verbose)
+    branches = execute('git branch -rl', verbose)
+    output = []
+    for line in branches:
+        if ("HEAD" not in line) and ("coverity_scan" not in line) and ("gh-pages" not in line):
+            output.append(line.strip())
+    return output


 def git_get_changes(branch, commit, last_commit):
@ -90,32 +97,38 @@ def git_get_changes(branch, commit, last_commit):

 def get_last_results(resultsFileName):
    if not os.path.isfile(resultsFileName):
-        return None, None, None
+        return None, None, None, None
    commit = None
+    csize = []
    cspeed = []
    dspeed = []
-    with open(resultsFileName,'r') as f:
+    with open(resultsFileName, 'r') as f:
        for line in f:
            words = line.split()
            if len(words) == 2:   # branch + commit
-                commit = words[1];
+                commit = words[1]
+                csize = []
                cspeed = []
                dspeed = []
            if (len(words) == 8):  # results
+                csize.append(int(words[1]))
                cspeed.append(float(words[3]))
                dspeed.append(float(words[5]))
-    return commit, cspeed, dspeed
+    return commit, csize, cspeed, dspeed


-def benchmark_and_compare(branch, commit, resultsFileName, lastCLevel, testFilePath, fileName, last_cspeed, last_dspeed, lower_limit, maxLoadAvg, message):
+def benchmark_and_compare(branch, commit, last_commit, args, executableName, resultsFileName,
+                          testFilePath, fileName, last_csize, last_cspeed, last_dspeed):
    sleepTime = 30
-    while os.getloadavg()[0] > maxLoadAvg:
-        log("WARNING: bench loadavg=%.2f is higher than %s, sleeping for %s seconds" % (os.getloadavg()[0], maxLoadAvg, sleepTime))
+    while os.getloadavg()[0] > args.maxLoadAvg:
+        log("WARNING: bench loadavg=%.2f is higher than %s, sleeping for %s seconds"
+            % (os.getloadavg()[0], args.maxLoadAvg, sleepTime))
        time.sleep(sleepTime)
    start_load = str(os.getloadavg())
-    result = execute('programs/zstd -qi5b1e%s %s' % (lastCLevel, testFilePath), print_output=True)
+    result = execute('programs/%s -qi5b1e%s %s' % (executableName, args.lastCLevel, testFilePath),
+                     print_output=True)
    end_load = str(os.getloadavg())
-    linesExpected = lastCLevel + 2;
+    linesExpected = args.lastCLevel + 1
    if len(result) != linesExpected:
        raise RuntimeError("ERROR: number of result lines=%d is different that expected %d\n%s" % (len(result), linesExpected, '\n'.join(result)))
    with open(resultsFileName, "a") as myfile:
@ -125,16 +138,18 @@ def benchmark_and_compare(branch, commit, resultsFileName, lastCLevel, testFileP
        if (last_cspeed == None):
            log("WARNING: No data for comparison for branch=%s file=%s " % (branch, fileName))
            return ""
-        commit, cspeed, dspeed = get_last_results(resultsFileName)
+        commit, csize, cspeed, dspeed = get_last_results(resultsFileName)
        text = ""
        for i in range(0, min(len(cspeed), len(last_cspeed))):
-            print("%s:%s -%d cspeed=%6.2f clast=%6.2f cdiff=%1.4f dspeed=%6.2f dlast=%6.2f ddiff=%1.4f %s" % (branch, commit, i+1, cspeed[i], last_cspeed[i], cspeed[i]/last_cspeed[i], dspeed[i], last_dspeed[i], dspeed[i]/last_dspeed[i], fileName))
-            if (cspeed[i]/last_cspeed[i] < lower_limit):
-                text += "WARNING: -%d cspeed=%.2f clast=%.2f cdiff=%.4f %s\n" % (i+1, cspeed[i], last_cspeed[i], cspeed[i]/last_cspeed[i], fileName)
-            if (dspeed[i]/last_dspeed[i] < lower_limit):
-                text += "WARNING: -%d dspeed=%.2f dlast=%.2f ddiff=%.4f %s\n" % (i+1, dspeed[i], last_dspeed[i], dspeed[i]/last_dspeed[i], fileName)
+            print("%s:%s -%d cSpeed=%6.2f cLast=%6.2f cDiff=%1.4f dSpeed=%6.2f dLast=%6.2f dDiff=%1.4f ratioDiff=%1.4f %s" % (branch, commit, i+1, cspeed[i], last_cspeed[i], cspeed[i]/last_cspeed[i], dspeed[i], last_dspeed[i], dspeed[i]/last_dspeed[i], float(last_csize[i])/csize[i], fileName))
+            if (cspeed[i]/last_cspeed[i] < args.lowerLimit):
+                text += "WARNING: %s -%d cSpeed=%.2f cLast=%.2f cDiff=%.4f %s\n" % (executableName, i+1, cspeed[i], last_cspeed[i], cspeed[i]/last_cspeed[i], fileName)
+            if (dspeed[i]/last_dspeed[i] < args.lowerLimit):
+                text += "WARNING: %s -%d dSpeed=%.2f dLast=%.2f dDiff=%.4f %s\n" % (executableName, i+1, dspeed[i], last_dspeed[i], dspeed[i]/last_dspeed[i], fileName)
+            if (float(last_csize[i])/csize[i] < args.ratioLimit):
+                text += "WARNING: %s -%d cSize=%d last_cSize=%d diff=%.4f %s\n" % (executableName, i+1, csize[i], last_csize[i], float(last_csize[i])/csize[i], fileName)
        if text:
-            text = message + ("\nmaxLoadAvg=%s  load average at start=%s end=%s\n" % (maxLoadAvg, start_load, end_load)) + text
+            text = args.message + ("\nmaxLoadAvg=%s  load average at start=%s end=%s  last_commit=%s\n" % (args.maxLoadAvg, start_load, end_load, last_commit)) + text
        return text


@ -147,28 +162,38 @@ def update_config_file(branch, commit):
    return last_commit


+def double_check(branch, commit, args, executableName, resultsFileName, filePath, fileName):
+    last_commit, csize, cspeed, dspeed = get_last_results(resultsFileName)
+    if not args.dry_run:
+        text = benchmark_and_compare(branch, commit, last_commit, args, executableName, resultsFileName, filePath, fileName, csize, cspeed, dspeed)
+        if text:
+            log("WARNING: redoing tests for branch %s: commit %s" % (branch, commit))
+            text = benchmark_and_compare(branch, commit, last_commit, args, executableName, resultsFileName, filePath, fileName, csize, cspeed, dspeed)
+    return text
+
+
 def test_commit(branch, commit, last_commit, args, testFilePaths, have_mutt, have_mail):
    local_branch = string.split(branch, '/')[1]
    version = local_branch.rpartition('-')[2] + '_' + commit
    if not args.dry_run:
-        execute('make clean zstdprogram MOREFLAGS="-DZSTD_GIT_COMMIT=%s"' % version)
+        execute('make -C programs clean zstd MOREFLAGS="-DZSTD_GIT_COMMIT=%s" && make -B -C programs zstd32 MOREFLAGS="-DZSTD_GIT_COMMIT=%s"' % (version, version))
    logFileName = working_path + "/log_" + branch.replace("/", "_") + ".txt"
    text_to_send = []
    results_files = ""
    for filePath in testFilePaths:
        fileName = filePath.rpartition('/')[2]
        resultsFileName = working_path + "/results_" + branch.replace("/", "_") + "_" + fileName.replace(".", "_") + ".txt"
-        last_commit, cspeed, dspeed = get_last_results(resultsFileName)
-        if not args.dry_run:
-            text = benchmark_and_compare(branch, commit, resultsFileName, args.lastCLevel, filePath, fileName, cspeed, dspeed, args.lowerLimit, args.maxLoadAvg, args.message)
-            if text:
-                log("WARNING: redoing tests for branch %s: commit %s" % (branch, commit))
-                text = benchmark_and_compare(branch, commit, resultsFileName, args.lastCLevel, filePath, fileName, cspeed, dspeed, args.lowerLimit, args.maxLoadAvg, args.message)
-                if text:
-                    text_to_send.append(text)
-                    results_files += resultsFileName + " "
+        text = double_check(branch, commit, args, 'zstd', resultsFileName, filePath, fileName)
+        if text:
+            text_to_send.append(text)
+            results_files += resultsFileName + " "
+        resultsFileName = working_path + "/results32_" + branch.replace("/", "_") + "_" + fileName.replace(".", "_") + ".txt"
+        text = double_check(branch, commit, args, 'zstd32', resultsFileName, filePath, fileName)
+        if text:
+            text_to_send.append(text)
+            results_files += resultsFileName + " "
    if text_to_send:
-        send_email_with_attachments(branch, commit, last_commit, args.emails, text_to_send, results_files, logFileName, args.lowerLimit, have_mutt, have_mail)
+        send_email_with_attachments(branch, commit, last_commit, args, text_to_send, results_files, logFileName, have_mutt, have_mail)


 if __name__ == '__main__':
@ -178,11 +203,14 @@ if __name__ == '__main__':
    parser.add_argument('--message', help='attach an additional message to e-mail', default="")
    parser.add_argument('--repoURL', help='changes default repository URL', default=default_repo_url)
    parser.add_argument('--lowerLimit', type=float, help='send email if speed is lower than given limit', default=0.98)
+    parser.add_argument('--ratioLimit', type=float, help='send email if ratio is lower than given limit', default=0.999)
    parser.add_argument('--maxLoadAvg', type=float, help='maximum load average to start testing', default=0.75)
    parser.add_argument('--lastCLevel', type=int, help='last compression level for testing', default=5)
    parser.add_argument('--sleepTime', type=int, help='frequency of repository checking in seconds', default=300)
    parser.add_argument('--dry-run', dest='dry_run', action='store_true', help='not build', default=False)
+    parser.add_argument('--verbose', action='store_true', help='more verbose logs', default=False)
    args = parser.parse_args()
+    verbose = args.verbose

    # check if test files are accessible
    testFileNames = args.testFileNames.split()
@ -196,24 +224,27 @@ if __name__ == '__main__':
            exit(1)

    # check availability of e-mail senders
-    have_mutt = does_command_exist("mutt -h");
-    have_mail = does_command_exist("mail -V");
+    have_mutt = does_command_exist("mutt -h")
+    have_mail = does_command_exist("mail -V")
    if not have_mutt and not have_mail:
        log("ERROR: e-mail senders 'mail' or 'mutt' not found")
        exit(1)

-    print("PARAMETERS:\nrepoURL=%s" % args.repoURL)
-    print("working_path=%s" % working_path)
-    print("clone_path=%s" % clone_path)
-    print("testFilePath(%s)=%s" % (len(testFilePaths), testFilePaths))
-    print("message=%s" % args.message)
-    print("emails=%s" % args.emails)
-    print("maxLoadAvg=%s" % args.maxLoadAvg)
-    print("lowerLimit=%s" % args.lowerLimit)
-    print("lastCLevel=%s" % args.lastCLevel)
-    print("sleepTime=%s" % args.sleepTime)
-    print("dry_run=%s" % args.dry_run)
-    print("have_mutt=%s have_mail=%s" % (have_mutt, have_mail))
+    if verbose:
+        print("PARAMETERS:\nrepoURL=%s" % args.repoURL)
+        print("working_path=%s" % working_path)
+        print("clone_path=%s" % clone_path)
+        print("testFilePath(%s)=%s" % (len(testFilePaths), testFilePaths))
+        print("message=%s" % args.message)
+        print("emails=%s" % args.emails)
+        print("maxLoadAvg=%s" % args.maxLoadAvg)
+        print("lowerLimit=%s" % args.lowerLimit)
+        print("ratioLimit=%s" % args.ratioLimit)
+        print("lastCLevel=%s" % args.lastCLevel)
+        print("sleepTime=%s" % args.sleepTime)
+        print("dry_run=%s" % args.dry_run)
+        print("verbose=%s" % args.verbose)
+        print("have_mutt=%s have_mail=%s" % (have_mutt, have_mail))

    # clone ZSTD repo if needed
    if not os.path.isdir(working_path):
@ -241,7 +272,7 @@ if __name__ == '__main__':
            if (loadavg <= args.maxLoadAvg):
                branches = git_get_branches()
                for branch in branches:
-                    commit = execute('git show -s --format=%h ' + branch)[0]
+                    commit = execute('git show -s --format=%h ' + branch, verbose)[0]
                    last_commit = update_config_file(branch, commit)
                    if commit == last_commit:
                        log("skipping branch %s: head %s already processed" % (branch, commit))
@ -252,13 +283,15 @@ if __name__ == '__main__':
                        test_commit(branch, commit, last_commit, args, testFilePaths, have_mutt, have_mail)
            else:
                log("WARNING: main loadavg=%.2f is higher than %s" % (loadavg, args.maxLoadAvg))
-            log("sleep for %s seconds" % args.sleepTime)
+            if verbose:
+                log("sleep for %s seconds" % args.sleepTime)
            time.sleep(args.sleepTime)
        except Exception as e:
            stack = traceback.format_exc()
            email_topic = '%s:%s ERROR in %s:%s' % (email_header, pid, branch, commit)
            send_email(args.emails, email_topic, stack, have_mutt, have_mail)
            print(stack)
+            time.sleep(args.sleepTime)
        except KeyboardInterrupt:
            os.unlink(pidfile)
            send_email(args.emails, email_header + ':%s test-zstd-speed.py has been stopped' % pid, args.message, have_mutt, have_mail)
--- a/zlibWrapper/Makefile
+++ b/zlibWrapper/Makefile
@ -17,8 +17,8 @@ endif

 ZLIBWRAPPER_PATH = .
 EXAMPLE_PATH = examples
-CC = gcc
-CFLAGS = $(LOC) -I../lib/common -I$(ZLIBDIR) -I$(ZLIBWRAPPER_PATH) -O3 -std=gnu90
+CC ?= gcc
+CFLAGS = $(LOC) -I../lib -I../lib/common -I$(ZLIBDIR) -I$(ZLIBWRAPPER_PATH) -O3 -std=gnu90
 CFLAGS += -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow -Wswitch-enum -Wdeclaration-after-statement -Wstrict-prototypes -Wundef
 LDFLAGS = $(LOC)
 RM = rm -f
--- a/zstd_compression_format.md
+++ b/zstd_compression_format.md
@ -6,8 +6,8 @@ Zstandard Compression Format
 Copyright (c) 2016 Yann Collet

 Permission is granted to copy and distribute this document
-for any  purpose and without charge,
-including translations into other  languages
+for any purpose and without charge,
+including translations into other languages
 and incorporation into compilations,
 provided that the copyright notice and this notice are preserved,
 and that any substantive changes or deletions from the original
@ -16,7 +16,7 @@ Distribution of this document is unlimited.

 ### Version

-0.1.1 (15/07/16)
+0.2.0 (22/07/16)


 Introduction
@ -58,6 +58,11 @@ it must produce a non-ambiguous error code and associated error message
 explaining which parameter is unsupported.


+Overall conventions
+-----------
+In this document square brackets i.e. `[` and `]` are used to indicate optional fields or parameters.
+
+
 Definitions
 -----------
 A content compressed by Zstandard is transformed into a Zstandard __frame__.
@ -73,49 +78,8 @@ However, each block can be decompressed without waiting for its successor,
 allowing streaming operations.


-General Structure of Zstandard Frame format
-------------------------------------------
-
-| MagicNb |  Frame Header | Block | (More blocks) | EndMark |
-|:-------:|:-------------:| ----- | ------------- | ------- |
-| 4 bytes |  2-14 bytes   |       |               | 3 bytes |
-
-__Magic Number__
-
-4 Bytes, Little endian format.
-Value : 0xFD2FB527
-
-__Frame Header__
-
-2 to 14 Bytes, detailed in [next part](#frame-header).
-
-__Data Blocks__
-
-Detailed in [next chapter](#data-blocks).
-That’s where compressed data is stored.
-
-__EndMark__
-
-The flow of blocks ends when the last block header brings an _end signal_ .
-This last block header may optionally host a __Content Checksum__ .
-
-##### __Content Checksum__
-
-Content Checksum verify that frame content has been regenerated correctly.
-The content checksum is the result
-of [xxh64() hash function](https://www.xxHash.com)
-digesting the original (decoded) data as input, and a seed of zero.
-Bits from 11 to 32 (included) are extracted to form a 22 bits checksum
-stored into the endmark body.
-```
-mask22bits = (1<<22)-1;
-contentChecksum = (XXH64(content, size, 0) >> 11) & mask22bits;
-```
-Content checksum is only present when its associated flag
-is set in the frame descriptor.
-Its usage is optional.
-
-__Frame Concatenation__
+Frame Concatenation
+-------------------

 In some circumstances, it may be required to append multiple frames,
 for example in order to add new data to an existing compressed file
@ -132,50 +96,89 @@ to decode all concatenated frames in their sequential order,
 delivering the final decompressed result as if it was a single content.


-Frame Header
-------------
+General Structure of Zstandard Frame format
+-------------------------------------------
+The structure of a single Zstandard frame is following:

-| FHD     | (WD)      | (dictID)  | (Content Size) |
-| ------- | --------- | --------- |:--------------:|
-| 1 byte  | 0-1 byte  | 0-4 bytes |  0 - 8 bytes   |
+| `Magic_Number` | `Frame_Header` |`Data_Block`| [More data blocks] | [`Content_Checksum`] |
+|:--------------:|:--------------:|:----------:| ------------------ |:--------------------:|
+| 4 bytes        |  2-14 bytes    | n bytes    |                    |   0-4 bytes          |

-Frame header has a variable size, which uses a minimum of 2 bytes,
+__`Magic_Number`__
+
+4 Bytes, Little-endian format.
+Value : 0xFD2FB527
+
+__`Frame_Header`__
+
+2 to 14 Bytes, detailed in [next part](#the-structure-of-frame_header).
+
+__`Data_Block`__
+
+Detailed in [next chapter](#the-structure-of-data_block).
+That’s where compressed data is stored.
+
+__`Content_Checksum`__
+
+An optional 32-bit checksum, only present if `Content_Checksum_flag` is set.
+The content checksum is the result
+of [xxh64() hash function](https://www.xxHash.com)
+digesting the original (decoded) data as input, and a seed of zero.
+The low 4 bytes of the checksum are stored in little endian format.
+
+
+The structure of `Frame_Header`
+-------------------------------
+The `Frame_Header` has a variable size, which uses a minimum of 2 bytes,
 and up to 14 bytes depending on optional parameters.
+The structure of `Frame_Header` is following:

-__FHD byte__ (Frame Header Descriptor)
+| `Frame_Header_Descriptor` | [`Window_Descriptor`] | [`Dictionary_ID`] | [`Frame_Content_Size`] |
+| ------------------------- | --------------------- | ----------------- | ---------------------- |
+| 1 byte                    | 0-1 byte              | 0-4 bytes         | 0-8 bytes              |

-The first Header's byte is called the Frame Header Descriptor.
+### `Frame_Header_Descriptor`
+
+The first header's byte is called the `Frame_Header_Descriptor`.
 It tells which other fields are present.
-Decoding this byte is enough to tell the size of Frame Header.
+Decoding this byte is enough to tell the size of `Frame_Header`.

-|  BitNb  |   7-6  |    5    |   4    |    3     |    2     |  1-0   |
-| ------- | ------ | ------- | ------ | -------- | -------- | ------ |
-|FieldName| FCSize | Segment | Unused | Reserved | Checksum | dictID |
+| Bit number | Field name                |
+| ---------- | ----------                |
+| 7-6        | `Frame_Content_Size_flag` |
+| 5          | `Single_Segment_flag`     |
+| 4          | `Unused_bit`              |
+| 3          | `Reserved_bit`            |
+| 2          | `Content_Checksum_flag`   |
+| 1-0        | `Dictionary_ID_flag`      |

 In this table, bit 7 is highest bit, while bit 0 is lowest.

-__Frame Content Size flag__
+__`Frame_Content_Size_flag`__

-This is a 2-bits flag (`= FHD >> 6`),
+This is a 2-bits flag (`= Frame_Header_Descriptor >> 6`),
 specifying if decompressed data size is provided within the header.
+The `Flag_Value` can be converted into `Field_Size`,
+which is the number of bytes used by `Frame_Content_Size`
+according to the following table:

-|  Value  |  0  |  1  |  2  |  3  |
-| ------- | --- | --- | --- | --- |
-|FieldSize| 0-1 |  2  |  4  |  8  |
+|`Flag_Value`|  0  |  1  |  2  |  3  |
+| ---------- | --- | --- | --- | --- |
+|`Field_Size`| 0-1 |  2  |  4  |  8  |

-Value 0 meaning depends on _single segment_ mode :
-it either means `0` (size not provided) _if_ the `WD` byte is present,
-or `1` (frame content size <= 255 bytes) otherwise.
+When `Flag_Value` is `0`, `Field_Size` depends on `Single_Segment_flag` :
+if `Single_Segment_flag` is set, `Field_Size` is 1.
+Otherwise, `Field_Size` is 0 (content size not provided).

-__Single Segment__
+__`Single_Segment_flag`__

 If this flag is set,
-data shall be regenerated within a single continuous memory segment.
+data must be regenerated within a single continuous memory segment.

-In which case, `WD` byte __is not present__,
-but `Frame Content Size` field necessarily is.
+In this case, `Frame_Content_Size` is necessarily present,
+but `Window_Descriptor` byte is skipped.
 As a consequence, the decoder must allocate a memory segment
-of size `>= Frame Content Size`.
+of size equal or bigger than `Frame_Content_Size`.

 In order to preserve the decoder from unreasonable memory requirement,
 a decoder can reject a compressed frame
@ -187,50 +190,49 @@ This is just a recommendation,
 each decoder is free to support higher or lower limits,
 depending on local limitations.

-__Unused bit__
+__`Unused_bit`__

 The value of this bit should be set to zero.
-A decoder compliant with this specification version should not interpret it.
+A decoder compliant with this specification version shall not interpret it.
 It might be used in a future version,
 to signal a property which is not mandatory to properly decode the frame.

-__Reserved bit__
+__`Reserved_bit`__

 This bit is reserved for some future feature.
 Its value _must be zero_.
 A decoder compliant with this specification version must ensure it is not set.
 This bit may be used in a future revision,
-to signal a feature that must be interpreted in order to decode the frame.
+to signal a feature that must be interpreted to decode the frame correctly.

-__Content checksum flag__
+__`Content_Checksum_flag`__

-If this flag is set, a content checksum will be present into the EndMark.
-The checksum is a 22 bits value extracted from the XXH64() of data,
-and stored into endMark. See [__Content Checksum__](#content-checksum) .
+If this flag is set, a 32-bits `Content_Checksum` will be present at frame's end.
+See `Content_Checksum` paragraph.

-__Dictionary ID flag__
+__`Dictionary_ID_flag`__

 This is a 2-bits flag (`= FHD & 3`),
 telling if a dictionary ID is provided within the header.
 It also specifies the size of this field.

-|  Value  |  0  |  1  |  2  |  3  |
-| ------- | --- | --- | --- | --- |
-|FieldSize|  0  |  1  |  2  |  4  |
+|  Value   |  0  |  1  |  2  |  3  |
+| -------- | --- | --- | --- | --- |
+|Field size|  0  |  1  |  2  |  4  |

-__WD byte__ (Window Descriptor)
+### `Window_Descriptor`

 Provides guarantees on maximum back-reference distance
-that will be present within compressed data.
-This information is useful for decoders to allocate enough memory.
+that will be used within compressed data.
+This information is important for decoders to allocate enough memory.

-`WD` byte is optional. It's not present in `single segment` mode.
-In which case, the maximum back-reference distance is the content size itself,
+The `Window_Descriptor` byte is optional. It is absent when `Single_Segment_flag` is set.
+In this case, the maximum back-reference distance is the content size itself,
 which can be any value from 1 to 2^64-1 bytes (16 EB).

-|   BitNb   |    7-3   |    0-2   |
-| --------- | -------- | -------- |
-| FieldName | Exponent | Mantissa |
+| Bit numbers |    7-3   |    0-2   |
+| ----------- | -------- | -------- |
+| Field name  | Exponent | Mantissa |

 Maximum distance is given by the following formulae :
 ```
@ -250,20 +252,20 @@ a decoder can refuse a compressed frame
 which requests a memory size beyond decoder's authorized range.

 For improved interoperability,
-decoders are recommended to be compatible with window sizes of 8 MB.
-Encoders are recommended to not request more than 8 MB.
+decoders are recommended to be compatible with window sizes of 8 MB,
+and encoders are recommended to not request more than 8 MB.
 It's merely a recommendation though,
 decoders are free to support larger or lower limits,
 depending on local limitations.

-__Dictionary ID__
+### `Dictionary_ID`

 This is a variable size field, which contains
 the ID of the dictionary required to properly decode the frame.
 Note that this field is optional. When it's not present,
 it's up to the caller to make sure it uses the correct dictionary.

-Field size depends on __Dictionary ID flag__.
+Field size depends on `Dictionary_ID_flag`.
 1 byte can represent an ID 0-255.
 2 bytes can represent an ID 0-65535.
 4 bytes can represent an ID 0-4294967295.
@ -275,72 +277,70 @@ _Reserved ranges :_
 If the frame is going to be distributed in a private environment,
 any dictionary ID can be used.
 However, for public distribution of compressed frames using a dictionary,
-some ranges are reserved for future use :
- low : 1 - 32767 : reserved
- high : >= (2^31) : reserved
+the following ranges are reserved for future use and should not be used :
+- low range : 1 - 32767
+- high range : >= (2^31)


-__Frame Content Size__
+### `Frame_Content_Size`

-This is the original (uncompressed) size.
-This information is optional, and only present if associated flag is set.
-Content size is provided using 1, 2, 4 or 8 Bytes.
-Format is Little endian.
+This is the original (uncompressed) size. This information is optional.
+The `Field_Size` is provided according to value of `Frame_Content_Size_flag`.
+The `Field_Size` can be equal to 0 (not present), 1, 2, 4 or 8 bytes.
+Format is Little-endian.

-| Field Size |    Range   |
-| ---------- | ---------- |
-|     0      |      0     |
-|     1      |   0 - 255  |
-|     2      | 256 - 65791|
-|     4      | 0 - 2^32-1 |
-|     8      | 0 - 2^64-1 |
+| `Field_Size` |    Range   |
+| ------------ | ---------- |
+|      1       |   0 - 255  |
+|      2       | 256 - 65791|
+|      4       | 0 - 2^32-1 |
+|      8       | 0 - 2^64-1 |

-When field size is 1, 4 or 8 bytes, the value is read directly.
-When field size is 2, _an offset of 256 is added_.
-It's allowed to represent a small size (ex: `18`) using any compatible variant.
-A size of `0` means `content size is unknown`.
-In which case, the `WD` byte will necessarily be present,
-and becomes the only hint to guide memory allocation.
-
-In order to preserve decoder from unreasonable memory requirement,
-a decoder can refuse a compressed frame
-which requests a memory size beyond decoder's authorized range.
+When `Field_Size` is 1, 4 or 8 bytes, the value is read directly.
+When `Field_Size` is 2, _the offset of 256 is added_.
+It's allowed to represent a small size (for example `18`) using any compatible variant.


-Data Blocks
-----------
+The structure of `Data_Block`
+-----------------------------
+The structure of `Data_Block` is following:

-| B. Header |  data  |
-|:---------:| ------ |
-|  3 bytes  |        |
+| `Last_Block` | `Block_Type` | `Block_Size` | `Block_Content` |
+|:------------:|:------------:|:------------:|:---------------:|
+|   1 bit      |  2 bits      |  21 bits     |  n bytes        |

+The block header uses 3-bytes.

-__Block Header__
+__`Last_Block`__

-This field uses 3-bytes, format is __big-endian__.
+The lowest bit signals if this block is the last one.
+Frame ends right after this block.
+It may be followed by an optional `Content_Checksum` .

-The 2 highest bits represent the `block type`,
-while the remaining 22 bits represent the (compressed) block size.
+__`Block_Type` and `Block_Size`__
+
+The next 2 bits represent the `Block_Type`,
+while the remaining 21 bits represent the `Block_Size`.
+Format is __little-endian__.

 There are 4 block types :

-|    Value   |      0     |  1  |  2  |    3    |
-| ---------- | ---------- | --- | --- | ------- |
-| Block Type | Compressed | Raw | RLE | EndMark |
+|    Value     |      0      |     1       |  2                 |    3      |
+| ------------ | ----------- | ----------- | ------------------ | --------- |
+| `Block_Type` | `Raw_Block` | `RLE_Block` | `Compressed_Block` | `Reserved`|

- Compressed : this is a [Zstandard compressed block](#compressed-block-format),
+- `Raw_Block` - this is an uncompressed block.
+  `Block_Size` is the number of bytes to read and copy.
+- `RLE_Block` - this is a single byte, repeated N times.
+  In which case, `Block_Size` is the size to regenerate,
+  while the "compressed" block is just 1 byte (the byte to repeat).
+- `Compressed_Block` - this is a [Zstandard compressed block](#the-format-of-compressed_block),
  detailed in another section of this specification.
-  "block size" is the compressed size.
+  `Block_Size` is the compressed size.
  Decompressed size is unknown,
  but its maximum possible value is guaranteed (see below)
- Raw : this is an uncompressed block.
-  "block size" is the number of bytes to read and copy.
- RLE : this is a single byte, repeated N times.
-  In which case, "block size" is the size to regenerate,
-  while the "compressed" block is just 1 byte (the byte to repeat).
- EndMark : this is not a block. Signal the end of the frame.
-  The rest of the field may be optionally filled by a checksum
-  (see [Content Checksum](#content-checksum)).
+- `Reserved` - this is not a block.
+  This value cannot be used with current version of this specification.

 Block sizes must respect a few rules :
 - In compressed mode, compressed size if always strictly `< decompressed size`.
@ -348,14 +348,14 @@ Block sizes must respect a few rules :
 - Block decompressed size is always <= 128 KB


-__Data__
+__`Block_Content`__

-Where the actual data to decode stands.
+The `Block_Content` is where the actual data to decode stands.
 It might be compressed or not, depending on previous field indications.
 A data block is not necessarily "full" :
 since an arbitrary “flush” may happen anytime,
 block decompressed content can be any size,
-up to Block Maximum Decompressed Size, which is the smallest of :
+up to `Block_Maximum_Decompressed_Size`, which is the smallest of :
 - Maximum back-reference distance
 - 128 KB

@ -363,9 +363,9 @@ up to Block Maximum Decompressed Size, which is the smallest of :
 Skippable Frames
 ----------------

-| Magic Number | Frame Size | User Data |
-|:------------:|:----------:| --------- |
-|   4 bytes    |  4 bytes   |           |
+| `Magic_Number` | `Frame_Size` | `User_Data` |
+|:--------------:|:------------:|:-----------:|
+|   4 bytes      |  4 bytes     |   n bytes   |

 Skippable frames allow the insertion of user-defined data
 into a flow of concatenated frames.
@ -377,31 +377,30 @@ Skippable frames defined in this specification are compatible with [LZ4] ones.

 [LZ4]:http://www.lz4.org

-__Magic Number__ :
+__`Magic_Number`__

-4 Bytes, Little endian format.
+4 Bytes, Little-endian format.
 Value : 0x184D2A5X, which means any value from 0x184D2A50 to 0x184D2A5F.
 All 16 values are valid to identify a skippable frame.

-__Frame Size__ :
+__`Frame_Size`__

-This is the size, in bytes, of the following User Data
+This is the size, in bytes, of the following `User_Data`
 (without including the magic number nor the size field itself).
-4 Bytes, Little endian format, unsigned 32-bits.
-This means User Data can’t be bigger than (2^32-1) Bytes.
+This field is represented using 4 Bytes, Little-endian format, unsigned 32-bits.
+This means `User_Data` can’t be bigger than (2^32-1) bytes.

-__User Data__ :
+__`User_Data`__

-User Data can be anything. Data will just be skipped by the decoder.
+The `User_Data` can be anything. Data will just be skipped by the decoder.


-Compressed block format
-----------------------
-This specification details the content of a _compressed block_.
-A compressed block has a size, which must be known.
-It also has a guaranteed maximum regenerated size,
+The format of `Compressed_Block`
+--------------------------------
+The size of `Compressed_Block` must be provided using `Block_Size` field from `Data_Block`.
+The `Compressed_Block` has a guaranteed maximum regenerated size,
 in order to properly allocate destination buffer.
-See [Data Blocks](#data-blocks) for more details.
+See [`Data_Block`](#the-structure-of-data_block) for more details.

 A compressed block consists of 2 sections :
 - [Literals section](#literals-section)
@ -410,7 +409,7 @@ A compressed block consists of 2 sections :
 ### Prerequisites
 To decode a compressed block, the following elements are necessary :
 - Previous decoded blocks, up to a distance of `windowSize`,
-  or all previous blocks in "single segment" mode.
+  or all previous blocks when `Single_Segment_flag` is set.
 - List of "recent offsets" from previous compressed block.
 - Decoding tables of previous compressed block for each symbol type
  (literals, litLength, matchLength, offset).
@ -418,45 +417,47 @@ To decode a compressed block, the following elements are necessary :

 ### Literals section

-Literals are compressed using Huffman prefix codes.
 During sequence phase, literals will be entangled with match copy operations.
 All literals are regrouped in the first part of the block.
 They can be decoded first, and then copied during sequence operations,
 or they can be decoded on the flow, as needed by sequence commands.

-| Header | (Tree Description) | Stream1 | (Stream2) | (Stream3) | (Stream4) |
-| ------ | ------------------ | ------- | --------- | --------- | --------- |
+| Literals section header | [Huffman Tree Description] | Stream1 | [Stream2] | [Stream3] | [Stream4] |
+| ----------------------- | -------------------------- | ------- | --------- | --------- | --------- |

-Literals can be compressed, or uncompressed.
+Literals can be stored uncompressed or compressed using Huffman prefix codes.
 When compressed, an optional tree description can be present,
 followed by 1 or 4 streams.

+
 #### Literals section header

 Header is in charge of describing how literals are packed.
 It's a byte-aligned variable-size bitfield, ranging from 1 to 5 bytes,
-using big-endian convention.
+using little-endian convention.

-| BlockType | sizes format | (compressed size) | regenerated size |
-| --------- | ------------ | ----------------- | ---------------- |
-|   2 bits  |  1 - 2 bits  |    0 - 18 bits    |    5 - 20 bits   |
+| Literals Block Type | sizes format | regenerated size | [compressed size] |
+| ------------------- | ------------ | ---------------- | ----------------- |
+|   2 bits            |  1 - 2 bits  |    5 - 20 bits   |    0 - 18 bits    |

-__Block Type__ :
+In this representation, bits on the left are smallest bits.

-This is a 2-bits field, describing 4 different block types :
+__Literals Block Type__ :

-|    Value   |      0     |    1   |  2  |    3    |
-| ---------- | ---------- | ------ | --- | ------- |
-| Block Type | Compressed | Repeat | Raw |   RLE   |
+This field uses 2 lowest bits of first byte, describing 4 different block types :

- Compressed : This is a standard huffman-compressed block,
+|       Value         |  0  |  1  |      2     |      3      |
+| ------------------- | --- | --- | ---------- | ----------- |
+| Literals Block Type | Raw | RLE | Compressed | RepeatStats |    
+
+- Raw literals block - Literals are stored uncompressed.
+- RLE literals block - Literals consist of a single byte value repeated N times.
+- Compressed literals block - This is a standard huffman-compressed block,
        starting with a huffman tree description.
        See details below.
- Repeat Stats : This is a huffman-compressed block,
+- Repeat Stats literals block - This is a huffman-compressed block,
        using huffman tree _from previous huffman-compressed literals block_.
        Huffman tree description will be skipped.
- Raw : Literals are stored uncompressed.
- RLE : Literals consist of a single byte value repeated N times.

 __Sizes format__ :

@ -466,40 +467,39 @@ Sizes format are divided into 2 families :
  and the decompressed size. It will also decode the number of streams.
 - For Raw or RLE blocks, it's enough to decode the size to regenerate.

-For values spanning several bytes, convention is Big-endian.
+For values spanning several bytes, convention is Little-endian.

-__Sizes format for Raw or RLE literals block__ :
+__Sizes format for Raw and RLE literals block__ :

- Value : 0x : Regenerated size uses 5 bits (0-31).
+- Value : x0 : Regenerated size uses 5 bits (0-31).
               Total literal header size is 1 byte.
-               `size = h[0] & 31;`
- Value : 10 : Regenerated size uses 12 bits (0-4095).
+               `size = h[0]>>3;`
+- Value : 01 : Regenerated size uses 12 bits (0-4095).
               Total literal header size is 2 bytes.
-               `size = ((h[0] & 15) << 8) + h[1];`
+               `size = (h[0]>>4) + (h[1]<<4);`
 - Value : 11 : Regenerated size uses 20 bits (0-1048575).
               Total literal header size is 3 bytes.
-               `size = ((h[0] & 15) << 16) + (h[1]<<8) + h[2];`
+               `size = (h[0]>>4) + (h[1]<<4) + (h[2]<<12);`

 Note : it's allowed to represent a short value (ex : `13`)
 using a long format, accepting the reduced compacity.

-__Sizes format for Compressed literals block__ :
+__Sizes format for Compressed literals block and Repeat Stats literals block__ :

-Note : also applicable to "repeat-stats" blocks.
- Value : 00 : 4 streams.
+- Value : 00 : _Single stream_.
               Compressed and regenerated sizes use 10 bits (0-1023).
               Total literal header size is 3 bytes.
- Value : 01 : _Single stream_.
+- Value : 01 : 4 streams.
               Compressed and regenerated sizes use 10 bits (0-1023).
               Total literal header size is 3 bytes.
 - Value : 10 : 4 streams.
               Compressed and regenerated sizes use 14 bits (0-16383).
               Total literal header size is 4 bytes.
- Value : 10 : 4 streams.
+- Value : 11 : 4 streams.
               Compressed and regenerated sizes use 18 bits (0-262143).
               Total literal header size is 5 bytes.

-Compressed and regenerated size fields follow big endian convention.
+Compressed and regenerated size fields follow little-endian convention.

 #### Huffman Tree description

@ -518,11 +518,8 @@ using the fewest bits of any possible prefix codes for that alphabet.

 Prefix code must not exceed a maximum code length.
 More bits improve accuracy but cost more header size,
-and require more memory for decoding operations.
-
-The current format limits the maximum depth to 15 bits.
-The reference decoder goes further, by limiting it to 12 bits.
-It is recommended to remain compatible with reference decoder.
+and require more memory or more complex decoding operations.
+This specification limits maximum code length to 11 bits.


 ##### Representation
@ -566,21 +563,12 @@ Therefore, `maxBits = 4` and `weight[5] = 1`.
 This is a single byte value (0-255),
 which tells how to decode the list of weights.

- if headerByte >= 242 : this is one of 14 pre-defined weight distributions :
-
-| value    |242|243|244|245|246|247|248|249|250|251|252|253|254|255|
-| -------- |---|---|---|---|---|---|---|---|---|---|---|---|---|---|
-| Nb of 1s | 1 | 2 | 3 | 4 | 7 | 8 | 15| 16| 31| 32| 63| 64|127|128|
-|Complement| 1 | 2 | 1 | 4 | 1 | 8 | 1 | 16| 1 | 32| 1 | 64| 1 |128|
-
-_Note_ : complement is found by using "join to nearest power of 2" rule.
-
 - if headerByte >= 128 : this is a direct representation,
  where each weight is written directly as a 4 bits field (0-15).
  The full representation occupies `((nbSymbols+1)/2)` bytes,
  meaning it uses a last full byte even if nbSymbols is odd.
  `nbSymbols = headerByte - 127;`.
-  Note that maximum nbSymbols is 241-127 = 114.
+  Note that maximum nbSymbols is 255-127 = 128.
  A larger serie must necessarily use FSE compression.

 - if headerByte < 128 :
@ -595,20 +583,20 @@ sharing a single distribution table.

 To decode an FSE bitstream, it is necessary to know its compressed size.
 Compressed size is provided by `headerByte`.
-It's also necessary to know its maximum decompressed size,
+It's also necessary to know its _maximum possible_ decompressed size,
 which is `255`, since literal values span from `0` to `255`,
 and last symbol value is not represented.

 An FSE bitstream starts by a header, describing probabilities distribution.
 It will create a Decoding Table.
 Table must be pre-allocated, which requires to support a maximum accuracy.
-For a list of huffman weights, recommended maximum is 7 bits.
+For a list of huffman weights, maximum accuracy is 7 bits.

 FSE header is [described in relevant chapter](#fse-distribution-table--condensed-format),
 and so is [FSE bitstream](#bitstream).
 The main difference is that Huffman header compression uses 2 states,
 which share the same FSE distribution table.
-Bitstream contains only FSE symbols, there are no interleaved "raw bitfields".
+Bitstream contains only FSE symbols (no interleaved "raw bitfields").
 The number of symbols to decode is discovered
 by tracking bitStream overflow condition.
 When both states have overflowed the bitstream, end is reached.
@ -617,16 +605,12 @@ When both states have overflowed the bitstream, end is reached.
 ##### Conversion from weights to huffman prefix codes

 All present symbols shall now have a `weight` value.
-Symbols are sorted by weight.
-Symbols with a weight of zero are removed.
-Within same weight, symbols keep natural order.
-Starting from lowest weight,
-symbols are being allocated to a `range`.
-A `weight` directly represents a `range`,
-following the formulae : `range = weight ? 1 << (weight-1) : 0 ;`
-Similarly, it is possible to transform weights into nbBits :
+It is possible to transform weights into nbBits, using this formula :
 `nbBits = nbBits ? maxBits + 1 - weight : 0;` .

+Symbols are sorted by weight. Within same weight, symbols keep natural order.
+Symbols with a weight of zero are removed.
+Then, starting from lowest weight, prefix codes are distributed in order.

 __Example__ :
 Let's presume the following list of weights has been decoded :
@ -641,8 +625,6 @@ it gives the following distribution :
 | Literal      |  3  |  4  |  5  |  2  |  1  |   0  |
 | ------------ | --- | --- | --- | --- | --- | ---- |
 | weight       |  0  |  1  |  1  |  2  |  3  |   4  |
-| range        |  0  |  1  |  1  |  2  |  4  |   8  |
-| table entries| N/A |  0  |  1  | 2-3 | 4-7 | 8-15 |
 | nb bits      |  0  |  4  |  4  |  3  |  2  |   1  |
 | prefix codes | N/A | 0000| 0001| 001 | 01  |   1  |

@ -666,15 +648,14 @@ header only provides compressed and regenerated size of all 4 streams combined.
 In order to properly decode the 4 streams,
 it's necessary to know the compressed and regenerated size of each stream.

-Regenerated size is easiest :
-each stream has a size of `(totalSize+3)/4`,
-except the last one, which is up to 3 bytes smaller, to reach `totalSize`.
+Regenerated size of each stream can be calculated by `(totalSize+3)/4`,
+except for last one, which can be up to 3 bytes smaller, to reach `totalSize`.

-Compressed size must be provided explicitly : in the 4-streams variant,
-bitstreams are preceded by 3 unsigned Little Endian 16-bits values.
+Compressed size is provided explicitly : in the 4-streams variant,
+bitstreams are preceded by 3 unsigned Little-Endian 16-bits values.
 Each value represents the compressed size of one stream, in order.
 The last stream size is deducted from total compressed size
-and from already known stream sizes :
+and from previously decoded stream sizes :
 `stream4CSize = totalCSize - 6 - stream1CSize - stream2CSize - stream3CSize;`

 ##### Bitstreams read and decode
@ -688,7 +669,7 @@ This is detected by a final bit flag :
 the highest bit of latest byte is a final-bit-flag.
 Consequently, a last byte of `0` is not possible.
 And the final-bit-flag itself is not part of the useful bitstream.
-Hence, the last byte contain between 0 and 7 useful bits.
+Hence, the last byte contains between 0 and 7 useful bits.

 Starting from the end,
 it's possible to read the bitstream in a little-endian fashion,
@ -726,7 +707,7 @@ The Sequences section starts by a header,
 followed by optional Probability tables for each symbol type,
 followed by the bitstream.

-| Header | (LitLengthTable) | (OffsetTable) | (MatchLengthTable) | bitStream |
+| Header | [LitLengthTable] | [OffsetTable] | [MatchLengthTable] | bitStream |
 | ------ | ---------------- | ------------- | ------------------ | --------- |

 To decode the Sequence section, it's required to know its size.
@ -750,29 +731,29 @@ Let's call its first byte `byte0`.
 - `if (byte0 < 255)` : `nbSeqs = ((byte0-128) << 8) + byte1;` . Uses 2 bytes.
 - `if (byte0 == 255)`: `nbSeqs = byte1 + (byte2<<8) + 0x7F00;` . Uses 3 bytes.

-__Symbol compression modes__
+__Symbol encoding modes__

 This is a single byte, defining the compression mode of each symbol type.

 |  BitNb  |   7-6  |   5-4  |   3-2  |    1-0   |
 | ------- | ------ | ------ | ------ | -------- |
-|FieldName| LLtype | OFType | MLType | Reserved |
+|FieldName| LLType | OFType | MLType | Reserved |

 The last field, `Reserved`, must be all-zeroes.

-`LLtype`, `OFType` and `MLType` define the compression mode of
+`LLType`, `OFType` and `MLType` define the compression mode of
 Literal Lengths, Offsets and Match Lengths respectively.

 They follow the same enumeration :

-|       Value      |    0   |  1  |    2   |  3  |
-| ---------------- | ------ | --- | ------ | --- |
-| Compression Mode | predef | RLE | Repeat | FSE |
+|       Value      |    0   |  1  |      2     |    3   |
+| ---------------- | ------ | --- | ---------- | ------ |
+| Compression Mode | predef | RLE | Compressed | Repeat |

 - "predef" : uses a pre-defined distribution table.
 - "RLE" : it's a single code, repeated `nbSeqs` times.
 - "Repeat" : re-use distribution table from previous compressed block.
- "FSE" : standard FSE compression.
+- "Compressed" : standard FSE compression.
          A distribution table will be present.
          It will be described in [next part](#distribution-tables).

@ -899,16 +880,16 @@ short offsetCodes_defaultDistribution[53] =
 #### Distribution tables

 Following the header, up to 3 distribution tables can be described.
-They are, in order :
+When present, they are in this order :
 - Literal lengthes
 - Offsets
 - Match Lengthes

-The content to decode depends on their respective compression mode :
- Repeat mode : no content. Re-use distribution from previous compressed block.
+The content to decode depends on their respective encoding mode :
 - Predef : no content. Use pre-defined distribution table.
 - RLE : 1 byte. This is the only code to use across the whole compressed block.
 - FSE : A distribution table is present.
+- Repeat mode : no content. Re-use distribution from previous compressed block.

 ##### FSE distribution table : condensed format

@ -922,10 +903,8 @@ since it will be discovered and reported by the decoding process.

 The bitstream starts by reporting on which scale it operates.
 `AccuracyLog = low4bits + 5;`
-In theory, it can define a scale from 5 to 20.
-In practice, decoders are allowed to limit the maximum supported `AccuracyLog`.
-Recommended maximum are `9` for literal and match lengthes, and `8` for offsets.
-The reference decoder uses these limits.
+Note that maximum `AccuracyLog` for literal and match lengthes is `9`,
+and for offsets it is `8`. Higher values are considered errors.

 Then follow each symbol value, from `0` to last present one.
 The nb of bits used by each field is variable.
@ -974,15 +953,14 @@ If it is a 3, another 2-bits repeat flag follows, and so on.

 When last symbol reaches cumulated total of `1 << AccuracyLog`,
 decoding is complete.
-Then the decoder can tell how many bytes were used in this process,
-and how many symbols are present.
-
-The bitstream consumes a round number of bytes.
-Any remaining bit within the last byte is just unused.
-
 If the last symbol makes cumulated total go above `1 << AccuracyLog`,
 distribution is considered corrupted.

+Then the decoder can tell how many bytes were used in this process,
+and how many symbols are present.
+The bitstream consumes a round number of bytes.
+Any remaining bit within the last byte is just unused.
+
 ##### FSE decoding : from normalized distribution to decoding tables

 The distribution of normalized probabilities is enough
@ -1103,11 +1081,11 @@ As seen in [Offset Codes], the first 3 values define a repeated offset.
 They are sorted in recency order, with 1 meaning "most recent one".

 There is an exception though, when current sequence's literal length is `0`.
-In which case, 1 would just make previous match longer.
-Therefore, in such case, 1 means in fact 2, and 2 is impossible.
-Meaning of 3 is unmodified.
+In which case, repcodes are "pushed by one",
+so 1 becomes 2, 2 becomes 3,
+and 3 becomes "offset_1 - 1_byte".

-Repeat offsets start with the following values : 1, 4 and 8 (in order).
+On first block, offset history is populated by the following values : 1, 4 and 8 (in order).

 Then each block receives its start value from previous compressed block.
 Note that non-compressed blocks are skipped,
@ -1117,14 +1095,11 @@ they do not contribute to offset history.

 ###### Offset updates rules

-When the new offset is a normal one,
-offset history is simply translated by one position,
-with the new offset taking first spot.
+New offset take the lead in offset history,
+up to its previous place if it was already present.

- When repeat offset 1 (most recent) is used, history is unmodified.
- When repeat offset 2 is used, it's swapped with offset 1.
- When repeat offset 3 is used, it takes first spot,
-  pushing the other ones by one position.
+It means that when repeat offset 1 (most recent) is used, history is unmodified.
+When repeat offset 2 is used, it's swapped with offset 1.


 Dictionary format
@ -1139,9 +1114,9 @@ __Pre-requisites__ : a dictionary has a known length,
 | Header | DictID | Stats | Content |
 | ------ | ------ | ----- | ------- |

-__Header__ : 4 bytes ID, value 0xEC30A437, Little Endian format
+__Header__ : 4 bytes ID, value 0xEC30A437, Little-Endian format

-__Dict_ID__ : 4 bytes, stored in Little Endian format.
+__Dict_ID__ : 4 bytes, stored in Little-Endian format.
              DictID can be any value, except 0 (which means no DictID).
              It's used by decoders to check if they use the correct dictionary.
              _Reserved ranges :_
@ -1158,15 +1133,17 @@ __Stats__ : Entropy tables, following the same format as a [compressed blocks].
            Huffman tables for literals, FSE table for offset,
            FSE table for matchLenth, and FSE table for litLength.
            It's finally followed by 3 offset values, populating recent offsets,
-            stored in order, 4-bytes little endian each, for a total of 12 bytes.
+            stored in order, 4-bytes little-endian each, for a total of 12 bytes.

 __Content__ : Where the actual dictionary content is.
              Content size depends on Dictionary size.

-[compressed blocks]: #compressed-block-format
+[compressed blocks]: #the-format-of-compressed_block


 Version changes
 ---------------
- 0.1.1 reserved dictID ranges
- 0.1.0 initial release
+- 0.2.0 : numerous format adjustments for zstd v0.8
+- 0.1.2 : limit huffman tree depth to 11 bits
+- 0.1.1 : reserved dictID ranges
+- 0.1.0 : initial release