merge conflict

2019-10-15 12:58:50 -04:00 · 2019-10-15 12:58:50 -04:00 · a06b51879c
commit a06b51879c
parent 23dac23a49 8b6d96827c
84 changed files with 2169 additions and 1009 deletions
--- a/.gitignore
+++ b/.gitignore
@ -23,6 +23,7 @@ zstdmt
 # Test artefacts
 tmp*
 dictionary.
+dictionary
 NUL

 # Build artefacts
--- a/1
+++ b/1
@ -69,6 +69,7 @@ test: MOREFLAGS += -g -DDEBUGLEVEL=$(DEBUGLEVEL) -Werror
 test:
 	MOREFLAGS="$(MOREFLAGS)" $(MAKE) -j -C $(PRGDIR) allVariants
 	$(MAKE) -C $(TESTDIR) $@
+	ZSTD=../../programs/zstd $(MAKE) -C doc/educational_decoder test

 ## shortest: same as `make check`
 .PHONY: shortest
--- a/README.md
+++ b/README.md
@ -15,6 +15,7 @@ a list of known ports and bindings is provided on [Zstandard homepage](http://ww
 [![Build status][AppveyorDevBadge]][AppveyorLink]
 [![Build status][CircleDevBadge]][CircleLink]
 [![Build status][CirrusDevBadge]][CirrusLink]
+[![Fuzzing Status][OSSFuzzBadge]][OSSFuzzLink]

 [travisDevBadge]: https://travis-ci.org/facebook/zstd.svg?branch=dev "Continuous Integration test suite"
 [travisLink]: https://travis-ci.org/facebook/zstd
@ -24,6 +25,8 @@ a list of known ports and bindings is provided on [Zstandard homepage](http://ww
 [CircleLink]: https://circleci.com/gh/facebook/zstd
 [CirrusDevBadge]: https://api.cirrus-ci.com/github/facebook/zstd.svg?branch=dev
 [CirrusLink]: https://cirrus-ci.com/github/facebook/zstd
+[OSSFuzzBadge]: https://oss-fuzz-build-logs.storage.googleapis.com/badges/zstd.svg
+[OSSFuzzLink]: https://bugs.chromium.org/p/oss-fuzz/issues/list?sort=-opened&can=1&q=proj:zstd

 ## Benchmarks

--- a/build/LICENSE
+++ b/build/LICENSE
--- a/build/VS2008/fullbench/fullbench.vcproj
+++ b/build/VS2008/fullbench/fullbench.vcproj
@ -510,6 +510,10 @@
 				RelativePath="..\..\..\lib\compress\zstd_compress_sequences.h"
 				>
 			</File>
+			<File
+				RelativePath="..\..\..\lib\compress\zstd_cwksp.h"
+				>
+			</File>
 			<File
 				RelativePath="..\..\..\lib\compress\zstd_fast.h"
 				>
--- a/build/VS2008/fuzzer/fuzzer.vcproj
+++ b/build/VS2008/fuzzer/fuzzer.vcproj
@ -546,6 +546,10 @@
 				RelativePath="..\..\..\lib\compress\zstd_compress_sequences.h"
 				>
 			</File>
+			<File
+				RelativePath="..\..\..\lib\compress\zstd_cwksp.h"
+				>
+			</File>
 			<File
 				RelativePath="..\..\..\lib\compress\zstd_fast.h"
 				>
--- a/build/VS2008/zstd/zstd.vcproj
+++ b/build/VS2008/zstd/zstd.vcproj
@ -626,6 +626,10 @@
 				RelativePath="..\..\..\lib\compress\zstd_compress_sequences.h"
 				>
 			</File>
+			<File
+				RelativePath="..\..\..\lib\compress\zstd_cwksp.h"
+				>
+			</File>
 			<File
 				RelativePath="..\..\..\lib\compress\zstd_fast.h"
 				>
--- a/build/VS2008/zstdlib/zstdlib.vcproj
+++ b/build/VS2008/zstdlib/zstdlib.vcproj
@ -558,6 +558,10 @@
 				RelativePath="..\..\..\lib\compress\zstd_compress_sequences.h"
 				>
 			</File>
+			<File
+				RelativePath="..\..\..\lib\compress\zstd_cwksp.h"
+				>
+			</File>
 			<File
 				RelativePath="..\..\..\lib\compress\zstd_fast.h"
 				>
--- a/build/VS2010/fullbench/fullbench.vcxproj
+++ b/build/VS2010/fullbench/fullbench.vcxproj
@ -197,6 +197,7 @@
    <ClInclude Include="..\..\..\lib\compress\zstd_compress.h" />
    <ClInclude Include="..\..\..\lib\compress\zstd_compress_literals.h" />
    <ClInclude Include="..\..\..\lib\compress\zstd_compress_sequences.h" />
+    <ClInclude Include="..\..\..\lib\compress\zstd_cwksp.h" />
    <ClInclude Include="..\..\..\lib\compress\zstd_fast.h" />
    <ClInclude Include="..\..\..\lib\compress\zstd_double_fast.h" />
    <ClInclude Include="..\..\..\lib\compress\zstd_lazy.h" />
--- a/build/VS2010/fuzzer/fuzzer.vcxproj
+++ b/build/VS2010/fuzzer/fuzzer.vcxproj
@ -200,6 +200,7 @@
    <ClInclude Include="..\..\..\lib\compress\zstd_compress.h" />
    <ClInclude Include="..\..\..\lib\compress\zstd_compress_literals.h" />
    <ClInclude Include="..\..\..\lib\compress\zstd_compress_sequences.h" />
+    <ClInclude Include="..\..\..\lib\compress\zstd_cwksp.h" />
    <ClInclude Include="..\..\..\lib\compress\zstd_fast.h" />
    <ClInclude Include="..\..\..\lib\compress\zstd_double_fast.h" />
    <ClInclude Include="..\..\..\lib\compress\zstd_lazy.h" />
--- a/build/VS2010/libzstd-dll/libzstd-dll.vcxproj
+++ b/build/VS2010/libzstd-dll/libzstd-dll.vcxproj
@ -82,6 +82,7 @@
    <ClInclude Include="..\..\..\lib\compress\zstd_compress.h" />
    <ClInclude Include="..\..\..\lib\compress\zstd_compress_literals.h" />
    <ClInclude Include="..\..\..\lib\compress\zstd_compress_sequences.h" />
+    <ClInclude Include="..\..\..\lib\compress\zstd_cwksp.h" />
    <ClInclude Include="..\..\..\lib\compress\zstd_fast.h" />
    <ClInclude Include="..\..\..\lib\compress\zstd_double_fast.h" />
    <ClInclude Include="..\..\..\lib\compress\zstd_lazy.h" />
--- a/build/VS2010/libzstd/libzstd.vcxproj
+++ b/build/VS2010/libzstd/libzstd.vcxproj
@ -82,6 +82,7 @@
    <ClInclude Include="..\..\..\lib\compress\zstd_compress.h" />
    <ClInclude Include="..\..\..\lib\compress\zstd_compress_literals.h" />
    <ClInclude Include="..\..\..\lib\compress\zstd_compress_sequences.h" />
+    <ClInclude Include="..\..\..\lib\compress\zstd_cwksp.h" />
    <ClInclude Include="..\..\..\lib\compress\zstd_fast.h" />
    <ClInclude Include="..\..\..\lib\compress\zstd_double_fast.h" />
    <ClInclude Include="..\..\..\lib\compress\zstd_lazy.h" />
--- a/build/VS2010/zstd/zstd.vcxproj
+++ b/build/VS2010/zstd/zstd.vcxproj
@ -79,6 +79,7 @@
    <ClInclude Include="..\..\..\lib\compress\zstd_compress.h" />
    <ClInclude Include="..\..\..\lib\compress\zstd_compress_literals.h" />
    <ClInclude Include="..\..\..\lib\compress\zstd_compress_sequences.h" />
+    <ClInclude Include="..\..\..\lib\compress\zstd_cwksp.h" />
    <ClInclude Include="..\..\..\lib\compress\zstd_fast.h" />
    <ClInclude Include="..\..\..\lib\compress\zstd_double_fast.h" />
    <ClInclude Include="..\..\..\lib\compress\zstd_lazy.h" />
--- a/build/cmake/lib/CMakeLists.txt
+++ b/build/cmake/lib/CMakeLists.txt
@ -133,8 +133,8 @@ endif ()
 if (UNIX)
    # pkg-config
    set(PREFIX "${CMAKE_INSTALL_PREFIX}")
-    set(LIBDIR "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}")
-    set(INCLUDEDIR "${CMAKE_INSTALL_PREFIX}/include")
+    set(LIBDIR "${CMAKE_INSTALL_FULL_LIBDIR}")
+    set(INCLUDEDIR "${CMAKE_INSTALL_FULL_INCLUDEDIR}")
    set(VERSION "${zstd_VERSION_MAJOR}.${zstd_VERSION_MINOR}.${zstd_VERSION_PATCH}")
    add_custom_target(libzstd.pc ALL
            ${CMAKE_COMMAND} -DIN="${LIBRARY_DIR}/libzstd.pc.in" -DOUT="libzstd.pc"
@ -152,10 +152,10 @@ install(FILES
    ${LIBRARY_DIR}/dictBuilder/zdict.h
    ${LIBRARY_DIR}/dictBuilder/cover.h
    ${LIBRARY_DIR}/common/zstd_errors.h
-    DESTINATION "include")
+    DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}")

 if (ZSTD_BUILD_SHARED)
-    install(TARGETS libzstd_shared RUNTIME DESTINATION "bin"
+    install(TARGETS libzstd_shared RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}"
                                   LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}"
                                   ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}")
 endif()
--- a/build/cmake/programs/CMakeLists.txt
+++ b/build/cmake/programs/CMakeLists.txt
@ -31,15 +31,15 @@ target_link_libraries(zstd libzstd_static)
 if (CMAKE_SYSTEM_NAME MATCHES "(Solaris|SunOS)")
    target_link_libraries(zstd rt)
 endif ()
-install(TARGETS zstd RUNTIME DESTINATION "bin")
+install(TARGETS zstd RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}")

 if (UNIX)
    add_custom_target(zstdcat ALL ${CMAKE_COMMAND} -E create_symlink zstd zstdcat DEPENDS zstd COMMENT "Creating zstdcat symlink")
    add_custom_target(unzstd ALL ${CMAKE_COMMAND} -E create_symlink zstd unzstd DEPENDS zstd COMMENT "Creating unzstd symlink")
-    install(FILES ${CMAKE_CURRENT_BINARY_DIR}/zstdcat DESTINATION "bin")
-    install(FILES ${CMAKE_CURRENT_BINARY_DIR}/unzstd DESTINATION "bin")
-    install(PROGRAMS ${PROGRAMS_DIR}/zstdgrep DESTINATION "bin")
-    install(PROGRAMS ${PROGRAMS_DIR}/zstdless DESTINATION "bin")
+    install(FILES ${CMAKE_CURRENT_BINARY_DIR}/zstdcat DESTINATION "${CMAKE_INSTALL_BINDIR}")
+    install(FILES ${CMAKE_CURRENT_BINARY_DIR}/unzstd DESTINATION "${CMAKE_INSTALL_BINDIR}")
+    install(PROGRAMS ${PROGRAMS_DIR}/zstdgrep DESTINATION "${CMAKE_INSTALL_BINDIR}")
+    install(PROGRAMS ${PROGRAMS_DIR}/zstdless DESTINATION "${CMAKE_INSTALL_BINDIR}")

    add_custom_target(zstd.1 ALL
        ${CMAKE_COMMAND} -E copy ${PROGRAMS_DIR}/zstd.1 .
@ -56,14 +56,16 @@ if (UNIX)
    # Define MAN_INSTALL_DIR if necessary
    if (MAN_INSTALL_DIR)
    else ()
-      set(MAN_INSTALL_DIR ${CMAKE_INSTALL_PREFIX}/share/man/man1)
+        set(MAN_INSTALL_DIR ${CMAKE_INSTALL_MANDIR}/man1)
    endif ()

-    install(FILES ${CMAKE_CURRENT_BINARY_DIR}/zstd.1 DESTINATION "${MAN_INSTALL_DIR}")
-    install(FILES ${CMAKE_CURRENT_BINARY_DIR}/zstdcat.1 DESTINATION "${MAN_INSTALL_DIR}")
-    install(FILES ${CMAKE_CURRENT_BINARY_DIR}/unzstd.1 DESTINATION "${MAN_INSTALL_DIR}")
-    install(FILES ${CMAKE_CURRENT_BINARY_DIR}/zstdgrep.1 DESTINATION "${MAN_INSTALL_DIR}")
-    install(FILES ${CMAKE_CURRENT_BINARY_DIR}/zstdless.1 DESTINATION "${MAN_INSTALL_DIR}")
+    install(FILES
+        ${CMAKE_CURRENT_BINARY_DIR}/zstd.1
+        ${CMAKE_CURRENT_BINARY_DIR}/zstdcat.1
+        ${CMAKE_CURRENT_BINARY_DIR}/unzstd.1
+        ${CMAKE_CURRENT_BINARY_DIR}/zstdgrep.1
+        ${CMAKE_CURRENT_BINARY_DIR}/zstdless.1
+        DESTINATION "${MAN_INSTALL_DIR}")

    add_executable(zstd-frugal ${PROGRAMS_DIR}/zstdcli.c ${PROGRAMS_DIR}/util.c ${PROGRAMS_DIR}/timefn.c ${PROGRAMS_DIR}/fileio.c)
    target_link_libraries(zstd-frugal libzstd_static)
@ -79,7 +81,7 @@ if (ZSTD_MULTITHREAD_SUPPORT)
        target_link_libraries(zstd ${THREADS_LIBS})

        add_custom_target(zstdmt ALL ${CMAKE_COMMAND} -E create_symlink zstd zstdmt DEPENDS zstd COMMENT "Creating zstdmt symlink")
-        install(FILES ${CMAKE_CURRENT_BINARY_DIR}/zstdmt DESTINATION "bin")
+        install(FILES ${CMAKE_CURRENT_BINARY_DIR}/zstdmt DESTINATION "${CMAKE_INSTALL_BINDIR}")
    endif ()
 endif ()

--- a/contrib/linux-kernel/lib/zstd/fse.h
+++ b/contrib/linux-kernel/lib/zstd/fse.h
@ -232,7 +232,7 @@ If there is an error, the function will return an error code, which can be teste
 *******************************************/
 /* FSE buffer bounds */
 #define FSE_NCOUNTBOUND 512
-#define FSE_BLOCKBOUND(size) (size + (size >> 7))
+#define FSE_BLOCKBOUND(size) (size + (size >> 7) + 4 /* constant for initial fse states */ )
 #define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size)) /* Macro version, useful for static allocation */

 /* It is possible to statically allocate FSE CTable/DTable as a table of FSE_CTable/FSE_DTable using below macros */
--- a/doc/educational_decoder/Makefile
+++ b/doc/educational_decoder/Makefile
@ -1,15 +1,26 @@
+# ################################################################
+# Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+# All rights reserved.
+#
+# This source code is licensed under both the BSD-style license (found in the
+# LICENSE file in the root directory of this source tree) and the GPLv2 (found
+# in the COPYING file in the root directory of this source tree).
+# ################################################################
+
+ZSTD ?= zstd   # requires zstd installation on local system
+DIFF ?= diff
 HARNESS_FILES=*.c

 MULTITHREAD_LDFLAGS = -pthread
 DEBUGFLAGS= -g -DZSTD_DEBUG=1
 CPPFLAGS += -I$(ZSTDDIR) -I$(ZSTDDIR)/common -I$(ZSTDDIR)/compress \
            -I$(ZSTDDIR)/dictBuilder -I$(ZSTDDIR)/deprecated -I$(PRGDIR)
-CFLAGS   ?= -O3
+CFLAGS   ?= -O2
 CFLAGS   += -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow                 \
-            -Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \
-            -Wstrict-prototypes -Wundef                                     \
+            -Wstrict-aliasing=1 -Wswitch-enum                               \
+            -Wredundant-decls -Wstrict-prototypes -Wundef                   \
            -Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings      \
-            -Wredundant-decls
+            -std=c99
 CFLAGS   += $(DEBUGFLAGS)
 CFLAGS   += $(MOREFLAGS)
 FLAGS     = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) $(MULTITHREAD_LDFLAGS)
@ -22,13 +33,22 @@ clean:
 	@$(RM) -rf harness.dSYM

 test: harness
-	@zstd README.md -o tmp.zst
+	#
+	# Testing single-file decompression with educational decoder
+	#
+	@$(ZSTD) README.md -o tmp.zst
 	@./harness tmp.zst tmp
-	@diff -s tmp README.md
+	@$(DIFF) -s tmp README.md
 	@$(RM) -f tmp*
-	@zstd --train harness.c zstd_decompress.c zstd_decompress.h README.md
-	@zstd -D dictionary README.md -o tmp.zst
+	#
+	# Testing dictionary decompression with education decoder
+	#
+	# note : files are presented multiple for training, to reach minimum threshold
+	@$(ZSTD) --train harness.c zstd_decompress.c zstd_decompress.h README.md \
+                  harness.c zstd_decompress.c zstd_decompress.h README.md \
+                  harness.c zstd_decompress.c zstd_decompress.h README.md
+	@$(ZSTD) -D dictionary README.md -o tmp.zst
 	@./harness tmp.zst tmp dictionary
-	@diff -s tmp README.md
+	@$(DIFF) -s tmp README.md
 	@$(RM) -f tmp* dictionary
-	@make clean
+	@$(MAKE) clean
--- a/doc/educational_decoder/harness.c
+++ b/doc/educational_decoder/harness.c
@ -33,7 +33,7 @@ size_t read_file(const char *path, u8 **ptr) {
    }

    fseek(f, 0L, SEEK_END);
-    size_t size = ftell(f);
+    size_t size = (size_t)ftell(f);
    rewind(f);

    *ptr = malloc(size);
--- a/doc/educational_decoder/zstd_decompress.c
+++ b/doc/educational_decoder/zstd_decompress.c
@ -395,7 +395,7 @@ size_t ZSTD_decompress_with_dict(void *const dst, const size_t dst_len,
    /* this decoder assumes decompression of a single frame */
    decode_frame(&out, &in, parsed_dict);

-    return out.ptr - (u8 *)dst;
+    return (size_t)(out.ptr - (u8 *)dst);
 }

 /******* FRAME DECODING ******************************************************/
@ -416,7 +416,7 @@ static void decompress_data(frame_context_t *const ctx, ostream_t *const out,

 static void decode_frame(ostream_t *const out, istream_t *const in,
                         const dictionary_t *const dict) {
-    const u32 magic_number = IO_read_bits(in, 32);
+    const u32 magic_number = (u32)IO_read_bits(in, 32);
    // Zstandard frame
    //
    // "Magic_Number
@ -497,7 +497,7 @@ static void parse_frame_header(frame_header_t *const header,
    // 3    Reserved_bit
    // 2    Content_Checksum_flag
    // 1-0  Dictionary_ID_flag"
-    const u8 descriptor = IO_read_bits(in, 8);
+    const u8 descriptor = (u8)IO_read_bits(in, 8);

    // decode frame header descriptor into flags
    const u8 frame_content_size_flag = descriptor >> 6;
@ -521,7 +521,7 @@ static void parse_frame_header(frame_header_t *const header,
        //
        // Bit numbers  7-3         2-0
        // Field name   Exponent    Mantissa"
-        u8 window_descriptor = IO_read_bits(in, 8);
+        u8 window_descriptor = (u8)IO_read_bits(in, 8);
        u8 exponent = window_descriptor >> 3;
        u8 mantissa = window_descriptor & 7;

@ -541,7 +541,7 @@ static void parse_frame_header(frame_header_t *const header,
        const int bytes_array[] = {0, 1, 2, 4};
        const int bytes = bytes_array[dictionary_id_flag];

-        header->dictionary_id = IO_read_bits(in, bytes * 8);
+        header->dictionary_id = (u32)IO_read_bits(in, bytes * 8);
    } else {
        header->dictionary_id = 0;
    }
@ -633,8 +633,8 @@ static void decompress_data(frame_context_t *const ctx, ostream_t *const out,
        //
        // The next 2 bits represent the Block_Type, while the remaining 21 bits
        // represent the Block_Size. Format is little-endian."
-        last_block = IO_read_bits(in, 1);
-        const int block_type = IO_read_bits(in, 2);
+        last_block = (int)IO_read_bits(in, 1);
+        const int block_type = (int)IO_read_bits(in, 2);
        const size_t block_len = IO_read_bits(in, 21);

        switch (block_type) {
@ -748,8 +748,8 @@ static size_t decode_literals(frame_context_t *const ctx, istream_t *const in,
    // types"
    //
    // size_format takes between 1 and 2 bits
-    int block_type = IO_read_bits(in, 2);
-    int size_format = IO_read_bits(in, 2);
+    int block_type = (int)IO_read_bits(in, 2);
+    int size_format = (int)IO_read_bits(in, 2);

    if (block_type <= 1) {
        // Raw or RLE literals block
@ -833,6 +833,7 @@ static size_t decode_literals_compressed(frame_context_t *const ctx,
        // bits (0-1023)."
        num_streams = 1;
    // Fall through as it has the same size format
+        /* fallthrough */
    case 1:
        // "4 streams. Both Compressed_Size and Regenerated_Size use 10 bits
        // (0-1023)."
@ -1005,7 +1006,7 @@ static const i16 SEQ_MATCH_LENGTH_DEFAULT_DIST[53] = {
 static const u32 SEQ_LITERAL_LENGTH_BASELINES[36] = {
    0,  1,  2,   3,   4,   5,    6,    7,    8,    9,     10,    11,
    12, 13, 14,  15,  16,  18,   20,   22,   24,   28,    32,    40,
-    48, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65538};
+    48, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536};
 static const u8 SEQ_LITERAL_LENGTH_EXTRA_BITS[36] = {
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  0,  0,  0,  0,  1,  1,
    1, 1, 2, 2, 3, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
@ -1021,7 +1022,7 @@ static const u8 SEQ_MATCH_LENGTH_EXTRA_BITS[53] = {
    2, 2, 3, 3, 4, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};

 /// Offset decoding is simpler so we just need a maximum code value
-static const u8 SEQ_MAX_CODES[3] = {35, -1, 52};
+static const u8 SEQ_MAX_CODES[3] = {35, (u8)-1, 52};

 static void decompress_sequences(frame_context_t *const ctx,
                                 istream_t *const in,
@ -1132,7 +1133,7 @@ static void decompress_sequences(frame_context_t *const ctx, istream_t *in,
    // a single 1-bit and then fills the byte with 0-7 0 bits of padding."
    const int padding = 8 - highest_set_bit(src[len - 1]);
    // The offset starts at the end because FSE streams are read backwards
-    i64 bit_offset = len * 8 - padding;
+    i64 bit_offset = (i64)(len * 8 - (size_t)padding);

    // "The bitstream starts with initial state values, each using the required
    // number of bits in their respective accuracy, decoded previously from
@ -1409,7 +1410,7 @@ size_t ZSTD_get_decompressed_size(const void *src, const size_t src_len) {

    // get decompressed size from ZSTD frame header
    {
-        const u32 magic_number = IO_read_bits(&in, 32);
+        const u32 magic_number = (u32)IO_read_bits(&in, 32);

        if (magic_number == 0xFD2FB528U) {
            // ZSTD frame
@ -1418,7 +1419,7 @@ size_t ZSTD_get_decompressed_size(const void *src, const size_t src_len) {

            if (header.frame_content_size == 0 && !header.single_segment_flag) {
                // Content size not provided, we can't tell
-                return -1;
+                return (size_t)-1;
            }

            return header.frame_content_size;
--- a/doc/educational_decoder/zstd_decompress.h
+++ b/doc/educational_decoder/zstd_decompress.h
@ -7,6 +7,8 @@
 * in the COPYING file in the root directory of this source tree).
 */

+#include <stddef.h>   /* size_t */
+
 /******* EXPOSED TYPES ********************************************************/
 /*
 * Contains the parsed contents of a dictionary
@ -39,7 +41,7 @@ size_t ZSTD_get_decompressed_size(const void *const src, const size_t src_len);
 * Return a valid dictionary_t pointer for use with dictionary initialization
 * or decompression
 */
-dictionary_t* create_dictionary();
+dictionary_t* create_dictionary(void);

 /*
 * Parse a provided dictionary blob for use in decompression
--- a/doc/zstd_manual.html
+++ b/doc/zstd_manual.html
@ -1,10 +1,10 @@
 <html>
 <head>
 <meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
-<title>zstd 1.4.3 Manual</title>
+<title>zstd 1.4.4 Manual</title>
 </head>
 <body>
-<h1>zstd 1.4.3 Manual</h1>
+<h1>zstd 1.4.4 Manual</h1>
 <hr>
 <a name="Contents"></a><h2>Contents</h2>
 <ol>
@ -324,6 +324,7 @@ size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);
     * ZSTD_c_forceAttachDict
     * ZSTD_c_literalCompressionMode
     * ZSTD_c_targetCBlockSize
+     * ZSTD_c_srcSizeHint
     * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
     * note : never ever use experimentalParam? names directly;
     *        also, the enums values themselves are unstable and can still change.
@ -334,6 +335,7 @@ size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);
     ZSTD_c_experimentalParam4=1001,
     ZSTD_c_experimentalParam5=1002,
     ZSTD_c_experimentalParam6=1003,
+     ZSTD_c_experimentalParam7=1004,
 } ZSTD_cParameter;
 </b></pre><BR>
 <pre><b>typedef struct {
@ -1005,14 +1007,23 @@ size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
 size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams);
 size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params);
 size_t ZSTD_estimateDCtxSize(void);
-</b><p>  These functions make it possible to estimate memory usage
-  of a future {D,C}Ctx, before its creation.
-  ZSTD_estimateCCtxSize() will provide a budget large enough for any compression level up to selected one.
-  It will also consider src size to be arbitrarily "large", which is worst case.
-  If srcSize is known to always be small, ZSTD_estimateCCtxSize_usingCParams() can provide a tighter estimation.
-  ZSTD_estimateCCtxSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel.
-  ZSTD_estimateCCtxSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParams_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_c_nbWorkers is >= 1.
-  Note : CCtx size estimation is only correct for single-threaded compression. 
+</b><p>  These functions make it possible to estimate memory usage of a future
+  {D,C}Ctx, before its creation.
+
+  ZSTD_estimateCCtxSize() will provide a budget large enough for any
+  compression level up to selected one. Unlike ZSTD_estimateCStreamSize*(),
+  this estimate does not include space for a window buffer, so this estimate
+  is guaranteed to be enough for single-shot compressions, but not streaming
+  compressions. It will however assume the input may be arbitrarily large,
+  which is the worst case. If srcSize is known to always be small,
+  ZSTD_estimateCCtxSize_usingCParams() can provide a tighter estimation.
+  ZSTD_estimateCCtxSize_usingCParams() can be used in tandem with
+  ZSTD_getCParams() to create cParams from compressionLevel.
+  ZSTD_estimateCCtxSize_usingCCtxParams() can be used in tandem with
+  ZSTD_CCtxParams_setParameter().
+
+  Note: only single-threaded compression is supported. This function will
+  return an error code if ZSTD_c_nbWorkers is >= 1. 
 </p></pre><BR>

 <pre><b>size_t ZSTD_estimateCStreamSize(int compressionLevel);
@ -1318,7 +1329,10 @@ size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t di
 </b>/**! ZSTD_initCStream_advanced() :<b>
 * This function is deprecated, and is approximately equivalent to:
 *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
- *     ZSTD_CCtx_setZstdParams(zcs, params); // Set the zstd params and leave the rest as-is
+ *     // Pseudocode: Set each zstd parameter and leave the rest as-is.
+ *     for ((param, value) : params) {
+ *         ZSTD_CCtx_setParameter(zcs, param, value);
+ *     }
 *     ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
 *     ZSTD_CCtx_loadDictionary(zcs, dict, dictSize);
 *
@ -1338,7 +1352,10 @@ size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict);
 </b>/**! ZSTD_initCStream_usingCDict_advanced() :<b>
 * This function is deprecated, and is approximately equivalent to:
 *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
- *     ZSTD_CCtx_setZstdFrameParams(zcs, fParams); // Set the zstd frame params and leave the rest as-is
+ *     // Pseudocode: Set each zstd frame parameter and leave the rest as-is.
+ *     for ((fParam, value) : fParams) {
+ *         ZSTD_CCtx_setParameter(zcs, fParam, value);
+ *     }
 *     ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
 *     ZSTD_CCtx_refCDict(zcs, cdict);
 *
--- a/lib/common/bitstream.h
+++ b/lib/common/bitstream.h
@ -164,7 +164,7 @@ MEM_STATIC unsigned BIT_highbit32 (U32 val)
        _BitScanReverse ( &r, val );
        return (unsigned) r;
 #   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* Use GCC Intrinsic */
-        return 31 - __builtin_clz (val);
+        return __builtin_clz (val) ^ 31;
 #   elif defined(__ICCARM__)    /* IAR Intrinsic */
        return 31 - __CLZ(val);
 #   else   /* Software version */
@ -244,9 +244,9 @@ MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC)
 {
    size_t const nbBytes = bitC->bitPos >> 3;
    assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
+    assert(bitC->ptr <= bitC->endPtr);
    MEM_writeLEST(bitC->ptr, bitC->bitContainer);
    bitC->ptr += nbBytes;
-    assert(bitC->ptr <= bitC->endPtr);
    bitC->bitPos &= 7;
    bitC->bitContainer >>= nbBytes*8;
 }
@ -260,6 +260,7 @@ MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC)
 {
    size_t const nbBytes = bitC->bitPos >> 3;
    assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
+    assert(bitC->ptr <= bitC->endPtr);
    MEM_writeLEST(bitC->ptr, bitC->bitContainer);
    bitC->ptr += nbBytes;
    if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;
--- a/lib/common/compiler.h
+++ b/lib/common/compiler.h
@ -61,6 +61,13 @@
 #  define HINT_INLINE static INLINE_KEYWORD FORCE_INLINE_ATTR
 #endif

+/* UNUSED_ATTR tells the compiler it is okay if the function is unused. */
+#if defined(__GNUC__)
+#  define UNUSED_ATTR __attribute__((unused))
+#else
+#  define UNUSED_ATTR
+#endif
+
 /* force no inlining */
 #ifdef _MSC_VER
 #  define FORCE_NOINLINE static __declspec(noinline)
--- a/lib/common/fse.h
+++ b/lib/common/fse.h
@ -308,7 +308,7 @@ If there is an error, the function will return an error code, which can be teste
 *******************************************/
 /* FSE buffer bounds */
 #define FSE_NCOUNTBOUND 512
-#define FSE_BLOCKBOUND(size) (size + (size>>7))
+#define FSE_BLOCKBOUND(size) (size + (size>>7) + 4 /* fse states */ + sizeof(size_t) /* bitContainer */)
 #define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size))   /* Macro version, useful for static allocation */

 /* It is possible to statically allocate FSE CTable/DTable as a table of FSE_CTable/FSE_DTable using below macros */
--- a/lib/common/mem.h
+++ b/lib/common/mem.h
@ -47,6 +47,39 @@ extern "C" {
 #define MEM_STATIC_ASSERT(c)   { enum { MEM_static_assert = 1/(int)(!!(c)) }; }
 MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); }

+/* detects whether we are being compiled under msan */
+#if defined (__has_feature)
+#  if __has_feature(memory_sanitizer)
+#    define MEMORY_SANITIZER 1
+#  endif
+#endif
+
+#if defined (MEMORY_SANITIZER)
+/* Not all platforms that support msan provide sanitizers/msan_interface.h.
+ * We therefore declare the functions we need ourselves, rather than trying to
+ * include the header file... */
+
+#include <stdint.h> /* intptr_t */
+
+/* Make memory region fully initialized (without changing its contents). */
+void __msan_unpoison(const volatile void *a, size_t size);
+
+/* Make memory region fully uninitialized (without changing its contents).
+   This is a legacy interface that does not update origin information. Use
+   __msan_allocated_memory() instead. */
+void __msan_poison(const volatile void *a, size_t size);
+
+/* Returns the offset of the first (at least partially) poisoned byte in the
+   memory range, or -1 if the whole range is good. */
+intptr_t __msan_test_shadow(const volatile void *x, size_t size);
+#endif
+
+#if defined (MEMORY_SANITIZER)
+#  define MEM_SKIP_MSAN __attribute__((no_sanitize("memory")))
+#else
+#  define MEM_SKIP_MSAN
+#endif
+

 /*-**************************************************************
 *  Basic Types
--- a/lib/common/zstd_internal.h
+++ b/lib/common/zstd_internal.h
@ -197,8 +197,8 @@ static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
 static void ZSTD_copy16(void* dst, const void* src) { memcpy(dst, src, 16); }
 #define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; }

-#define WILDCOPY_OVERLENGTH 8
-#define VECLEN 16
+#define WILDCOPY_OVERLENGTH 32
+#define WILDCOPY_VECLEN 16

 typedef enum {
    ZSTD_no_overlap,
@ -207,67 +207,58 @@ typedef enum {
 } ZSTD_overlap_e;

 /*! ZSTD_wildcopy() :
- *  custom version of memcpy(), can overwrite up to WILDCOPY_OVERLENGTH bytes (if length==0) */
+ *  Custom version of memcpy(), can over read/write up to WILDCOPY_OVERLENGTH bytes (if length==0)
+ *  @param ovtype controls the overlap detection
+ *         - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.
+ *         - ZSTD_overlap_src_before_dst: The src and dst may overlap, but they MUST be at least 8 bytes apart.
+ *           The src buffer must be before the dst buffer.
+ */
 MEM_STATIC FORCE_INLINE_ATTR DONT_VECTORIZE
-void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e ovtype)
+void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e const ovtype)
 {
    ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src;
    const BYTE* ip = (const BYTE*)src;
    BYTE* op = (BYTE*)dst;
    BYTE* const oend = op + length;

-    assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff < -8));
-    if (length < VECLEN || (ovtype == ZSTD_overlap_src_before_dst && diff < VECLEN)) {
-      do
-          COPY8(op, ip)
-      while (op < oend);
-    }
-    else {
-      if ((length & 8) == 0)
-        COPY8(op, ip);
-      do {
+    assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff <= -WILDCOPY_VECLEN));
+
+    if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN) {
+        /* Handle short offset copies. */
+        do {
+            COPY8(op, ip)
+        } while (op < oend);
+    } else {
+        assert(diff >= WILDCOPY_VECLEN || diff <= -WILDCOPY_VECLEN);
+        /* Separate out the first two COPY16() calls because the copy length is
+         * almost certain to be short, so the branches have different
+         * probabilities.
+         * On gcc-9 unrolling once is +1.6%, twice is +2%, thrice is +1.8%.
+         * On clang-8 unrolling once is +1.4%, twice is +3.3%, thrice is +3%.
+         */
        COPY16(op, ip);
-      }
-      while (op < oend);
+        COPY16(op, ip);
+        if (op >= oend) return;
+        do {
+            COPY16(op, ip);
+            COPY16(op, ip);
+        }
+        while (op < oend);
    }
 }

-/*! ZSTD_wildcopy_16min() :
- *  same semantics as ZSTD_wilcopy() except guaranteed to be able to copy 16 bytes at the start */
-MEM_STATIC FORCE_INLINE_ATTR DONT_VECTORIZE
-void ZSTD_wildcopy_16min(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e ovtype)
+/*! ZSTD_wildcopy8() :
+ *  The same as ZSTD_wildcopy(), but it can only overwrite 8 bytes, and works for
+ *  overlapping buffers that are at least 8 bytes apart.
+ */
+MEM_STATIC void ZSTD_wildcopy8(void* dst, const void* src, ptrdiff_t length)
 {
-    ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src;
    const BYTE* ip = (const BYTE*)src;
    BYTE* op = (BYTE*)dst;
-    BYTE* const oend = op + length;
-
-    assert(length >= 8);
-    assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff < -8));
-
-    if (ovtype == ZSTD_overlap_src_before_dst && diff < VECLEN) {
-      do
-          COPY8(op, ip)
-      while (op < oend);
-    }
-    else {
-      if ((length & 8) == 0)
+    BYTE* const oend = (BYTE*)op + length;
+    do {
        COPY8(op, ip);
-      do {
-        COPY16(op, ip);
-      }
-      while (op < oend);
-    }
-}
-
-MEM_STATIC void ZSTD_wildcopy_e(void* dst, const void* src, void* dstEnd)   /* should be faster for decoding, but strangely, not verified on all platform */
-{
-    const BYTE* ip = (const BYTE*)src;
-    BYTE* op = (BYTE*)dst;
-    BYTE* const oend = (BYTE*)dstEnd;
-    do
-        COPY8(op, ip)
-    while (op < oend);
+    } while (op < oend);
 }


@ -323,7 +314,7 @@ MEM_STATIC U32 ZSTD_highbit32(U32 val)   /* compress, dictBuilder, decodeCorpus
        _BitScanReverse(&r, val);
        return (unsigned)r;
 #   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* GCC Intrinsic */
-        return 31 - __builtin_clz(val);
+        return __builtin_clz (val) ^ 31;
 #   elif defined(__ICCARM__)    /* IAR Intrinsic */
        return 31 - __CLZ(val);
 #   else   /* Software version */
--- a/lib/compress/zstd_compress.c
+++ b/lib/compress/zstd_compress.c
--- a/lib/compress/zstd_compress_internal.h
+++ b/lib/compress/zstd_compress_internal.h
@ -19,6 +19,7 @@
 *  Dependencies
 ***************************************/
 #include "zstd_internal.h"
+#include "zstd_cwksp.h"
 #ifdef ZSTD_MULTITHREAD
 #  include "zstdmt_compress.h"
 #endif
@ -192,6 +193,13 @@ typedef struct {
  size_t capacity; /* The capacity starting from `seq` pointer */
 } rawSeqStore_t;

+typedef struct {
+    int collectSequences;
+    ZSTD_Sequence* seqStart;
+    size_t seqIndex;
+    size_t maxSequences;
+} SeqCollector;
+
 struct ZSTD_CCtx_params_s {
    ZSTD_format_e format;
    ZSTD_compressionParameters cParams;
@ -231,9 +239,7 @@ struct ZSTD_CCtx_s {
    ZSTD_CCtx_params appliedParams;
    U32   dictID;

-    int workSpaceOversizedDuration;
-    void* workSpace;
-    size_t workSpaceSize;
+    ZSTD_cwksp workspace; /* manages buffer for dynamic allocations */
    size_t blockSize;
    unsigned long long pledgedSrcSizePlusOne;  /* this way, 0 (default) == unknown */
    unsigned long long consumedSrcSize;
@ -241,6 +247,7 @@ struct ZSTD_CCtx_s {
    XXH64_state_t xxhState;
    ZSTD_customMem customMem;
    size_t staticSize;
+    SeqCollector seqCollector;
    int isFirstBlock;

    seqStore_t seqStore;      /* sequences storage ptrs */
@ -341,26 +348,57 @@ MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat)
    return (srcSize >> minlog) + 2;
 }

+/*! ZSTD_safecopyLiterals() :
+ *  memcpy() function that won't read beyond more than WILDCOPY_OVERLENGTH bytes past ilimit_w.
+ *  Only called when the sequence ends past ilimit_w, so it only needs to be optimized for single
+ *  large copies.
+ */
+static void ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w) {
+    assert(iend > ilimit_w);
+    if (ip <= ilimit_w) {
+        ZSTD_wildcopy(op, ip, ilimit_w - ip, ZSTD_no_overlap);
+        op += ilimit_w - ip;
+        ip = ilimit_w;
+    }
+    while (ip < iend) *op++ = *ip++;
+}
+
 /*! ZSTD_storeSeq() :
- *  Store a sequence (literal length, literals, offset code and match length code) into seqStore_t.
- *  `offsetCode` : distance to match + 3 (values 1-3 are repCodes).
+ *  Store a sequence (litlen, litPtr, offCode and mlBase) into seqStore_t.
+ *  `offCode` : distance to match + ZSTD_REP_MOVE (values <= ZSTD_REP_MOVE are repCodes).
 *  `mlBase` : matchLength - MINMATCH
+ *  Allowed to overread literals up to litLimit.
 */
-MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const void* literals, U32 offsetCode, size_t mlBase)
+HINT_INLINE UNUSED_ATTR
+void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* literals, const BYTE* litLimit, U32 offCode, size_t mlBase)
 {
+    BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH;
+    BYTE const* const litEnd = literals + litLength;
 #if defined(DEBUGLEVEL) && (DEBUGLEVEL >= 6)
    static const BYTE* g_start = NULL;
    if (g_start==NULL) g_start = (const BYTE*)literals;  /* note : index only works for compression within a single segment */
    {   U32 const pos = (U32)((const BYTE*)literals - g_start);
        DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u",
-               pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)offsetCode);
+               pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)offCode);
    }
 #endif
    assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq);
    /* copy Literals */
    assert(seqStorePtr->maxNbLit <= 128 KB);
    assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit);
-    ZSTD_wildcopy(seqStorePtr->lit, literals, (ptrdiff_t)litLength, ZSTD_no_overlap);
+    assert(literals + litLength <= litLimit);
+    if (litEnd <= litLimit_w) {
+        /* Common case we can use wildcopy.
+	 * First copy 16 bytes, because literals are likely short.
+	 */
+        assert(WILDCOPY_OVERLENGTH >= 16);
+        ZSTD_copy16(seqStorePtr->lit, literals);
+        if (litLength > 16) {
+            ZSTD_wildcopy(seqStorePtr->lit+16, literals+16, (ptrdiff_t)litLength-16, ZSTD_no_overlap);
+        }
+    } else {
+        ZSTD_safecopyLiterals(seqStorePtr->lit, literals, litEnd, litLimit_w);
+    }
    seqStorePtr->lit += litLength;

    /* literal Length */
@ -372,7 +410,7 @@ MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const v
    seqStorePtr->sequences[0].litLength = (U16)litLength;

    /* match offset */
-    seqStorePtr->sequences[0].offset = offsetCode + 1;
+    seqStorePtr->sequences[0].offset = offCode + 1;

    /* match Length */
    if (mlBase>0xFFFF) {
@ -914,7 +952,7 @@ ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
 size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs,
                     const void* dict, size_t dictSize,
                     const ZSTD_CDict* cdict,
-                     ZSTD_CCtx_params  params, unsigned long long pledgedSrcSize);
+                     const ZSTD_CCtx_params* params, unsigned long long pledgedSrcSize);

 void ZSTD_resetSeqStore(seqStore_t* ssPtr);

@ -929,7 +967,7 @@ size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx,
                                    ZSTD_dictContentType_e dictContentType,
                                    ZSTD_dictTableLoadMethod_e dtlm,
                                    const ZSTD_CDict* cdict,
-                                    ZSTD_CCtx_params params,
+                                    const ZSTD_CCtx_params* params,
                                    unsigned long long pledgedSrcSize);

 /* ZSTD_compress_advanced_internal() :
@ -938,7 +976,7 @@ size_t ZSTD_compress_advanced_internal(ZSTD_CCtx* cctx,
                                       void* dst, size_t dstCapacity,
                                 const void* src, size_t srcSize,
                                 const void* dict,size_t dictSize,
-                                 ZSTD_CCtx_params params);
+                                 const ZSTD_CCtx_params* params);


 /* ZSTD_writeLastEmptyBlock() :
--- a/lib/compress/zstd_compress_literals.c
+++ b/lib/compress/zstd_compress_literals.c
@ -70,7 +70,7 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
                              ZSTD_strategy strategy, int disableLiteralCompression,
                              void* dst, size_t dstCapacity,
                        const void* src, size_t srcSize,
-                              void* workspace, size_t wkspSize,
+                              void* entropyWorkspace, size_t entropyWorkspaceSize,
                        const int bmi2)
 {
    size_t const minGain = ZSTD_minGain(srcSize, strategy);
@ -99,10 +99,15 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
    {   HUF_repeat repeat = prevHuf->repeatMode;
        int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0;
        if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1;
-        cLitSize = singleStream ? HUF_compress1X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11,
-                                      workspace, wkspSize, (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2)
-                                : HUF_compress4X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11,
-                                      workspace, wkspSize, (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2);
+        cLitSize = singleStream ?
+            HUF_compress1X_repeat(
+                ostart+lhSize, dstCapacity-lhSize, src, srcSize,
+                255, 11, entropyWorkspace, entropyWorkspaceSize,
+                (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2) :
+            HUF_compress4X_repeat(
+                ostart+lhSize, dstCapacity-lhSize, src, srcSize,
+                255, 11, entropyWorkspace, entropyWorkspaceSize,
+                (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2);
        if (repeat != HUF_repeat_none) {
            /* reused the existing table */
            hType = set_repeat;
--- a/lib/compress/zstd_compress_literals.h
+++ b/lib/compress/zstd_compress_literals.h
@ -23,7 +23,7 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
                              ZSTD_strategy strategy, int disableLiteralCompression,
                              void* dst, size_t dstCapacity,
                        const void* src, size_t srcSize,
-                              void* workspace, size_t wkspSize,
+                              void* entropyWorkspace, size_t entropyWorkspaceSize,
                        const int bmi2);

 #endif /* ZSTD_COMPRESS_LITERALS_H */
--- a/lib/compress/zstd_compress_sequences.c
+++ b/lib/compress/zstd_compress_sequences.c
@ -222,7 +222,7 @@ ZSTD_buildCTable(void* dst, size_t dstCapacity,
                const BYTE* codeTable, size_t nbSeq,
                const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax,
                const FSE_CTable* prevCTable, size_t prevCTableSize,
-                void* workspace, size_t workspaceSize)
+                void* entropyWorkspace, size_t entropyWorkspaceSize)
 {
    BYTE* op = (BYTE*)dst;
    const BYTE* const oend = op + dstCapacity;
@ -238,7 +238,7 @@ ZSTD_buildCTable(void* dst, size_t dstCapacity,
        memcpy(nextCTable, prevCTable, prevCTableSize);
        return 0;
    case set_basic:
-        FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, workspace, workspaceSize));  /* note : could be pre-calculated */
+        FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, entropyWorkspace, entropyWorkspaceSize));  /* note : could be pre-calculated */
        return 0;
    case set_compressed: {
        S16 norm[MaxSeq + 1];
@ -252,7 +252,7 @@ ZSTD_buildCTable(void* dst, size_t dstCapacity,
        FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max));
        {   size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog);   /* overflow protected */
            FORWARD_IF_ERROR(NCountSize);
-            FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, norm, max, tableLog, workspace, workspaceSize));
+            FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, norm, max, tableLog, entropyWorkspace, entropyWorkspaceSize));
            return NCountSize;
        }
    }
--- a/lib/compress/zstd_compress_sequences.h
+++ b/lib/compress/zstd_compress_sequences.h
@ -35,7 +35,7 @@ ZSTD_buildCTable(void* dst, size_t dstCapacity,
                const BYTE* codeTable, size_t nbSeq,
                const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax,
                const FSE_CTable* prevCTable, size_t prevCTableSize,
-                void* workspace, size_t workspaceSize);
+                void* entropyWorkspace, size_t entropyWorkspaceSize);

 size_t ZSTD_encodeSequences(
            void* dst, size_t dstCapacity,
--- a/lib/compress/zstd_cwksp.h
+++ b/lib/compress/zstd_cwksp.h
@ -0,0 +1,449 @@
+/*
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_CWKSP_H
+#define ZSTD_CWKSP_H
+
+/*-*************************************
+*  Dependencies
+***************************************/
+#include "zstd_internal.h"
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/*-*************************************
+*  Constants
+***************************************/
+
+/* define "workspace is too large" as this number of times larger than needed */
+#define ZSTD_WORKSPACETOOLARGE_FACTOR 3
+
+/* when workspace is continuously too large
+ * during at least this number of times,
+ * context's memory usage is considered wasteful,
+ * because it's sized to handle a worst case scenario which rarely happens.
+ * In which case, resize it down to free some memory */
+#define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128
+
+/*-*************************************
+*  Structures
+***************************************/
+typedef enum {
+    ZSTD_cwksp_alloc_objects,
+    ZSTD_cwksp_alloc_buffers,
+    ZSTD_cwksp_alloc_aligned
+} ZSTD_cwksp_alloc_phase_e;
+
+/**
+ * Zstd fits all its internal datastructures into a single continuous buffer,
+ * so that it only needs to perform a single OS allocation (or so that a buffer
+ * can be provided to it and it can perform no allocations at all). This buffer
+ * is called the workspace.
+ *
+ * Several optimizations complicate that process of allocating memory ranges
+ * from this workspace for each internal datastructure:
+ *
+ * - These different internal datastructures have different setup requirements:
+ *
+ *   - The static objects need to be cleared once and can then be trivially
+ *     reused for each compression.
+ *
+ *   - Various buffers don't need to be initialized at all--they are always
+ *     written into before they're read.
+ *
+ *   - The matchstate tables have a unique requirement that they don't need
+ *     their memory to be totally cleared, but they do need the memory to have
+ *     some bound, i.e., a guarantee that all values in the memory they've been
+ *     allocated is less than some maximum value (which is the starting value
+ *     for the indices that they will then use for compression). When this
+ *     guarantee is provided to them, they can use the memory without any setup
+ *     work. When it can't, they have to clear the area.
+ *
+ * - These buffers also have different alignment requirements.
+ *
+ * - We would like to reuse the objects in the workspace for multiple
+ *   compressions without having to perform any expensive reallocation or
+ *   reinitialization work.
+ *
+ * - We would like to be able to efficiently reuse the workspace across
+ *   multiple compressions **even when the compression parameters change** and
+ *   we need to resize some of the objects (where possible).
+ *
+ * To attempt to manage this buffer, given these constraints, the ZSTD_cwksp
+ * abstraction was created. It works as follows:
+ *
+ * Workspace Layout:
+ *
+ * [                        ... workspace ...                         ]
+ * [objects][tables ... ->] free space [<- ... aligned][<- ... buffers]
+ *
+ * The various objects that live in the workspace are divided into the
+ * following categories, and are allocated separately:
+ *
+ * - Static objects: this is optionally the enclosing ZSTD_CCtx or ZSTD_CDict,
+ *   so that literally everything fits in a single buffer. Note: if present,
+ *   this must be the first object in the workspace, since ZSTD_free{CCtx,
+ *   CDict}() rely on a pointer comparison to see whether one or two frees are
+ *   required.
+ *
+ * - Fixed size objects: these are fixed-size, fixed-count objects that are
+ *   nonetheless "dynamically" allocated in the workspace so that we can
+ *   control how they're initialized separately from the broader ZSTD_CCtx.
+ *   Examples:
+ *   - Entropy Workspace
+ *   - 2 x ZSTD_compressedBlockState_t
+ *   - CDict dictionary contents
+ *
+ * - Tables: these are any of several different datastructures (hash tables,
+ *   chain tables, binary trees) that all respect a common format: they are
+ *   uint32_t arrays, all of whose values are between 0 and (nextSrc - base).
+ *   Their sizes depend on the cparams.
+ *
+ * - Aligned: these buffers are used for various purposes that require 4 byte
+ *   alignment, but don't require any initialization before they're used.
+ *
+ * - Buffers: these buffers are used for various purposes that don't require
+ *   any alignment or initialization before they're used. This means they can
+ *   be moved around at no cost for a new compression.
+ *
+ * Allocating Memory:
+ *
+ * The various types of objects must be allocated in order, so they can be
+ * correctly packed into the workspace buffer. That order is:
+ *
+ * 1. Objects
+ * 2. Buffers
+ * 3. Aligned
+ * 4. Tables
+ *
+ * Attempts to reserve objects of different types out of order will fail.
+ */
+typedef struct {
+    void* workspace;
+    void* workspaceEnd;
+
+    void* objectEnd;
+    void* tableEnd;
+    void* tableValidEnd;
+    void* allocStart;
+
+    int allocFailed;
+    int workspaceOversizedDuration;
+    ZSTD_cwksp_alloc_phase_e phase;
+} ZSTD_cwksp;
+
+/*-*************************************
+*  Functions
+***************************************/
+
+MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws);
+
+MEM_STATIC void ZSTD_cwksp_assert_internal_consistency(ZSTD_cwksp* ws) {
+    (void)ws;
+    assert(ws->workspace <= ws->objectEnd);
+    assert(ws->objectEnd <= ws->tableEnd);
+    assert(ws->objectEnd <= ws->tableValidEnd);
+    assert(ws->tableEnd <= ws->allocStart);
+    assert(ws->tableValidEnd <= ws->allocStart);
+    assert(ws->allocStart <= ws->workspaceEnd);
+}
+
+/**
+ * Align must be a power of 2.
+ */
+MEM_STATIC size_t ZSTD_cwksp_align(size_t size, size_t const align) {
+    size_t const mask = align - 1;
+    assert((align & mask) == 0);
+    return (size + mask) & ~mask;
+}
+
+MEM_STATIC void ZSTD_cwksp_internal_advance_phase(
+        ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase) {
+    assert(phase >= ws->phase);
+    if (phase > ws->phase) {
+        if (ws->phase < ZSTD_cwksp_alloc_buffers &&
+                phase >= ZSTD_cwksp_alloc_buffers) {
+            ws->tableValidEnd = ws->objectEnd;
+        }
+        if (ws->phase < ZSTD_cwksp_alloc_aligned &&
+                phase >= ZSTD_cwksp_alloc_aligned) {
+            /* If unaligned allocations down from a too-large top have left us
+             * unaligned, we need to realign our alloc ptr. Technically, this
+             * can consume space that is unaccounted for in the neededSpace
+             * calculation. However, I believe this can only happen when the
+             * workspace is too large, and specifically when it is too large
+             * by a larger margin than the space that will be consumed. */
+            /* TODO: cleaner, compiler warning friendly way to do this??? */
+            ws->allocStart = (BYTE*)ws->allocStart - ((size_t)ws->allocStart & (sizeof(U32)-1));
+            if (ws->allocStart < ws->tableValidEnd) {
+                ws->tableValidEnd = ws->allocStart;
+            }
+        }
+        ws->phase = phase;
+    }
+}
+
+/**
+ * Internal function. Do not use directly.
+ */
+MEM_STATIC void* ZSTD_cwksp_reserve_internal(
+        ZSTD_cwksp* ws, size_t bytes, ZSTD_cwksp_alloc_phase_e phase) {
+    void* alloc;
+    void* bottom = ws->tableEnd;
+    ZSTD_cwksp_internal_advance_phase(ws, phase);
+    alloc = (BYTE *)ws->allocStart - bytes;
+    DEBUGLOG(5, "cwksp: reserving %zd bytes, %zd bytes remaining",
+        bytes, ZSTD_cwksp_available_space(ws) - bytes);
+    ZSTD_cwksp_assert_internal_consistency(ws);
+    assert(alloc >= bottom);
+    if (alloc < bottom) {
+        DEBUGLOG(4, "cwksp: alloc failed!");
+        ws->allocFailed = 1;
+        return NULL;
+    }
+    if (alloc < ws->tableValidEnd) {
+        ws->tableValidEnd = alloc;
+    }
+    ws->allocStart = alloc;
+    return alloc;
+}
+
+/**
+ * Reserves and returns unaligned memory.
+ */
+MEM_STATIC BYTE* ZSTD_cwksp_reserve_buffer(ZSTD_cwksp* ws, size_t bytes) {
+    return (BYTE*)ZSTD_cwksp_reserve_internal(ws, bytes, ZSTD_cwksp_alloc_buffers);
+}
+
+/**
+ * Reserves and returns memory sized on and aligned on sizeof(unsigned).
+ */
+MEM_STATIC void* ZSTD_cwksp_reserve_aligned(ZSTD_cwksp* ws, size_t bytes) {
+    assert((bytes & (sizeof(U32)-1)) == 0);
+    return ZSTD_cwksp_reserve_internal(ws, ZSTD_cwksp_align(bytes, sizeof(U32)), ZSTD_cwksp_alloc_aligned);
+}
+
+/**
+ * Aligned on sizeof(unsigned). These buffers have the special property that
+ * their values remain constrained, allowing us to re-use them without
+ * memset()-ing them.
+ */
+MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes) {
+    const ZSTD_cwksp_alloc_phase_e phase = ZSTD_cwksp_alloc_aligned;
+    void* alloc = ws->tableEnd;
+    void* end = (BYTE *)alloc + bytes;
+    void* top = ws->allocStart;
+    DEBUGLOG(5, "cwksp: reserving table %zd bytes, %zd bytes remaining",
+        bytes, ZSTD_cwksp_available_space(ws) - bytes);
+    assert((bytes & (sizeof(U32)-1)) == 0);
+    ZSTD_cwksp_internal_advance_phase(ws, phase);
+    ZSTD_cwksp_assert_internal_consistency(ws);
+    assert(end <= top);
+    if (end > top) {
+        DEBUGLOG(4, "cwksp: table alloc failed!");
+        ws->allocFailed = 1;
+        return NULL;
+    }
+    ws->tableEnd = end;
+    return alloc;
+}
+
+/**
+ * Aligned on sizeof(void*).
+ */
+MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes) {
+    size_t roundedBytes = ZSTD_cwksp_align(bytes, sizeof(void*));
+    void* start = ws->objectEnd;
+    void* end = (BYTE*)start + roundedBytes;
+    DEBUGLOG(5,
+        "cwksp: reserving object %zd bytes (rounded to %zd), %zd bytes remaining",
+        bytes, roundedBytes, ZSTD_cwksp_available_space(ws) - roundedBytes);
+    assert(((size_t)start & (sizeof(void*)-1)) == 0);
+    assert((bytes & (sizeof(void*)-1)) == 0);
+    ZSTD_cwksp_assert_internal_consistency(ws);
+    /* we must be in the first phase, no advance is possible */
+    if (ws->phase != ZSTD_cwksp_alloc_objects || end > ws->workspaceEnd) {
+        DEBUGLOG(4, "cwksp: object alloc failed!");
+        ws->allocFailed = 1;
+        return NULL;
+    }
+    ws->objectEnd = end;
+    ws->tableEnd = end;
+    ws->tableValidEnd = end;
+    return start;
+}
+
+MEM_STATIC void ZSTD_cwksp_mark_tables_dirty(ZSTD_cwksp* ws) {
+    DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_dirty");
+
+#if defined (MEMORY_SANITIZER) && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
+    /* To validate that the table re-use logic is sound, and that we don't
+     * access table space that we haven't cleaned, we re-"poison" the table
+     * space every time we mark it dirty. */
+    {
+        size_t size = (BYTE*)ws->tableValidEnd - (BYTE*)ws->objectEnd;
+        assert(__msan_test_shadow(ws->objectEnd, size) == -1);
+        __msan_poison(ws->objectEnd, size);
+    }
+#endif
+
+    assert(ws->tableValidEnd >= ws->objectEnd);
+    assert(ws->tableValidEnd <= ws->allocStart);
+    ws->tableValidEnd = ws->objectEnd;
+    ZSTD_cwksp_assert_internal_consistency(ws);
+}
+
+MEM_STATIC void ZSTD_cwksp_mark_tables_clean(ZSTD_cwksp* ws) {
+    DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_clean");
+    assert(ws->tableValidEnd >= ws->objectEnd);
+    assert(ws->tableValidEnd <= ws->allocStart);
+    if (ws->tableValidEnd < ws->tableEnd) {
+        ws->tableValidEnd = ws->tableEnd;
+    }
+    ZSTD_cwksp_assert_internal_consistency(ws);
+}
+
+/**
+ * Zero the part of the allocated tables not already marked clean.
+ */
+MEM_STATIC void ZSTD_cwksp_clean_tables(ZSTD_cwksp* ws) {
+    DEBUGLOG(4, "cwksp: ZSTD_cwksp_clean_tables");
+    assert(ws->tableValidEnd >= ws->objectEnd);
+    assert(ws->tableValidEnd <= ws->allocStart);
+    if (ws->tableValidEnd < ws->tableEnd) {
+        memset(ws->tableValidEnd, 0, (BYTE*)ws->tableEnd - (BYTE*)ws->tableValidEnd);
+    }
+    ZSTD_cwksp_mark_tables_clean(ws);
+}
+
+/**
+ * Invalidates table allocations.
+ * All other allocations remain valid.
+ */
+MEM_STATIC void ZSTD_cwksp_clear_tables(ZSTD_cwksp* ws) {
+    DEBUGLOG(4, "cwksp: clearing tables!");
+    ws->tableEnd = ws->objectEnd;
+    ZSTD_cwksp_assert_internal_consistency(ws);
+}
+
+/**
+ * Invalidates all buffer, aligned, and table allocations.
+ * Object allocations remain valid.
+ */
+MEM_STATIC void ZSTD_cwksp_clear(ZSTD_cwksp* ws) {
+    DEBUGLOG(4, "cwksp: clearing!");
+
+#if defined (MEMORY_SANITIZER) && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
+    /* To validate that the context re-use logic is sound, and that we don't
+     * access stuff that this compression hasn't initialized, we re-"poison"
+     * the workspace (or at least the non-static, non-table parts of it)
+     * every time we start a new compression. */
+    {
+        size_t size = (BYTE*)ws->workspaceEnd - (BYTE*)ws->tableValidEnd;
+        __msan_poison(ws->tableValidEnd, size);
+    }
+#endif
+
+    ws->tableEnd = ws->objectEnd;
+    ws->allocStart = ws->workspaceEnd;
+    ws->allocFailed = 0;
+    if (ws->phase > ZSTD_cwksp_alloc_buffers) {
+        ws->phase = ZSTD_cwksp_alloc_buffers;
+    }
+    ZSTD_cwksp_assert_internal_consistency(ws);
+}
+
+/**
+ * The provided workspace takes ownership of the buffer [start, start+size).
+ * Any existing values in the workspace are ignored (the previously managed
+ * buffer, if present, must be separately freed).
+ */
+MEM_STATIC void ZSTD_cwksp_init(ZSTD_cwksp* ws, void* start, size_t size) {
+    DEBUGLOG(4, "cwksp: init'ing workspace with %zd bytes", size);
+    assert(((size_t)start & (sizeof(void*)-1)) == 0); /* ensure correct alignment */
+    ws->workspace = start;
+    ws->workspaceEnd = (BYTE*)start + size;
+    ws->objectEnd = ws->workspace;
+    ws->tableValidEnd = ws->objectEnd;
+    ws->phase = ZSTD_cwksp_alloc_objects;
+    ZSTD_cwksp_clear(ws);
+    ws->workspaceOversizedDuration = 0;
+    ZSTD_cwksp_assert_internal_consistency(ws);
+}
+
+MEM_STATIC size_t ZSTD_cwksp_create(ZSTD_cwksp* ws, size_t size, ZSTD_customMem customMem) {
+    void* workspace = ZSTD_malloc(size, customMem);
+    DEBUGLOG(4, "cwksp: creating new workspace with %zd bytes", size);
+    RETURN_ERROR_IF(workspace == NULL, memory_allocation);
+    ZSTD_cwksp_init(ws, workspace, size);
+    return 0;
+}
+
+MEM_STATIC void ZSTD_cwksp_free(ZSTD_cwksp* ws, ZSTD_customMem customMem) {
+    DEBUGLOG(4, "cwksp: freeing workspace");
+    ZSTD_free(ws->workspace, customMem);
+    memset(ws, 0, sizeof(ZSTD_cwksp));
+}
+
+/**
+ * Moves the management of a workspace from one cwksp to another. The src cwksp
+ * is left in an invalid state (src must be re-init()'ed before its used again).
+ */
+MEM_STATIC void ZSTD_cwksp_move(ZSTD_cwksp* dst, ZSTD_cwksp* src) {
+    *dst = *src;
+    memset(src, 0, sizeof(ZSTD_cwksp));
+}
+
+MEM_STATIC size_t ZSTD_cwksp_sizeof(const ZSTD_cwksp* ws) {
+    return (BYTE*)ws->workspaceEnd - (BYTE*)ws->workspace;
+}
+
+MEM_STATIC int ZSTD_cwksp_reserve_failed(const ZSTD_cwksp* ws) {
+    return ws->allocFailed;
+}
+
+/*-*************************************
+*  Functions Checking Free Space
+***************************************/
+
+MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws) {
+    return (size_t)((BYTE*)ws->allocStart - (BYTE*)ws->tableEnd);
+}
+
+MEM_STATIC int ZSTD_cwksp_check_available(ZSTD_cwksp* ws, size_t additionalNeededSpace) {
+    return ZSTD_cwksp_available_space(ws) >= additionalNeededSpace;
+}
+
+MEM_STATIC int ZSTD_cwksp_check_too_large(ZSTD_cwksp* ws, size_t additionalNeededSpace) {
+    return ZSTD_cwksp_check_available(
+        ws, additionalNeededSpace * ZSTD_WORKSPACETOOLARGE_FACTOR);
+}
+
+MEM_STATIC int ZSTD_cwksp_check_wasteful(ZSTD_cwksp* ws, size_t additionalNeededSpace) {
+    return ZSTD_cwksp_check_too_large(ws, additionalNeededSpace)
+        && ws->workspaceOversizedDuration > ZSTD_WORKSPACETOOLARGE_MAXDURATION;
+}
+
+MEM_STATIC void ZSTD_cwksp_bump_oversized_duration(
+        ZSTD_cwksp* ws, size_t additionalNeededSpace) {
+    if (ZSTD_cwksp_check_too_large(ws, additionalNeededSpace)) {
+        ws->workspaceOversizedDuration++;
+    } else {
+        ws->workspaceOversizedDuration = 0;
+    }
+}
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* ZSTD_CWKSP_H */
--- a/lib/compress/zstd_double_fast.c
+++ b/lib/compress/zstd_double_fast.c
@ -148,7 +148,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
            const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
            mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
            ip++;
-            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
            goto _match_stored;
        }

@ -157,7 +157,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
          && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {
            mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
            ip++;
-            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
            goto _match_stored;
        }

@ -247,7 +247,7 @@ _match_found:
        offset_2 = offset_1;
        offset_1 = offset;

-        ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+        ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);

 _match_stored:
        /* match found */
@ -278,7 +278,7 @@ _match_stored:
                        const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend;
                        size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4;
                        U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */
-                        ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
+                        ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
                        hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
                        hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
                        ip += repLength2;
@ -297,7 +297,7 @@ _match_stored:
                    U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff;  /* swap offset_2 <=> offset_1 */
                    hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);
                    hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);
-                    ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH);
+                    ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, rLength-MINMATCH);
                    ip += rLength;
                    anchor = ip;
                    continue;   /* faster when present ... (?) */
@ -411,7 +411,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
            const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
            mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
            ip++;
-            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
        } else {
            if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
                const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend;
@ -422,7 +422,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
                while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; }   /* catch up */
                offset_2 = offset_1;
                offset_1 = offset;
-                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);

            } else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) {
                size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
@ -447,7 +447,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
                }
                offset_2 = offset_1;
                offset_1 = offset;
-                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);

            } else {
                ip += ((ip-anchor) >> kSearchStrength) + 1;
@ -479,7 +479,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
                    const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
                    size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
                    U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */
-                    ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
+                    ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
                    hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
                    hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
                    ip += repLength2;
--- a/lib/compress/zstd_fast.c
+++ b/lib/compress/zstd_fast.c
@ -8,7 +8,7 @@
 * You may select, at your option, one of the above-listed licenses.
 */

-#include "zstd_compress_internal.h"
+#include "zstd_compress_internal.h"  /* ZSTD_hashPtr, ZSTD_count, ZSTD_storeSeq */
 #include "zstd_fast.h"


@ -43,8 +43,8 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
 }


-FORCE_INLINE_TEMPLATE
-size_t ZSTD_compressBlock_fast_generic(
+FORCE_INLINE_TEMPLATE size_t
+ZSTD_compressBlock_fast_generic(
        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize,
        U32 const mls)
@ -74,8 +74,7 @@ size_t ZSTD_compressBlock_fast_generic(
    DEBUGLOG(5, "ZSTD_compressBlock_fast_generic");
    ip0 += (ip0 == prefixStart);
    ip1 = ip0 + 1;
-    {
-        U32 const maxRep = (U32)(ip0 - prefixStart);
+    {   U32 const maxRep = (U32)(ip0 - prefixStart);
        if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
        if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
    }
@ -118,8 +117,7 @@ size_t ZSTD_compressBlock_fast_generic(
            match0 = match1;
            goto _offset;
        }
-        {
-            size_t const step = ((ip0-anchor) >> (kSearchStrength - 1)) + stepSize;
+        {   size_t const step = ((size_t)(ip0-anchor) >> (kSearchStrength - 1)) + stepSize;
            assert(step >= 2);
            ip0 += step;
            ip1 += step;
@ -138,7 +136,7 @@ _offset: /* Requires: ip0, match0 */
 _match: /* Requires: ip0, match0, offcode */
        /* Count the forward length */
        mLength += ZSTD_count(ip0+mLength+4, match0+mLength+4, iend) + 4;
-        ZSTD_storeSeq(seqStore, ip0-anchor, anchor, offcode, mLength-MINMATCH);
+        ZSTD_storeSeq(seqStore, (size_t)(ip0-anchor), anchor, iend, offcode, mLength-MINMATCH);
        /* match found */
        ip0 += mLength;
        anchor = ip0;
@ -150,16 +148,15 @@ _match: /* Requires: ip0, match0, offcode */
            hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2;  /* here because current+2 could be > iend-8 */
            hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base);

-            while ( (ip0 <= ilimit)
-                 && ( (offset_2>0)
-                    & (MEM_read32(ip0) == MEM_read32(ip0 - offset_2)) )) {
+            while ( ((ip0 <= ilimit) & (offset_2>0))  /* offset_2==0 means offset_2 is invalidated */
+                 && (MEM_read32(ip0) == MEM_read32(ip0 - offset_2)) ) {
                /* store sequence */
                size_t const rLength = ZSTD_count(ip0+4, ip0+4-offset_2, iend) + 4;
-                U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff;  /* swap offset_2 <=> offset_1 */
+                { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */
                hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base);
                ip0 += rLength;
                ip1 = ip0 + 1;
-                ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH);
+                ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, 0 /*offCode*/, rLength-MINMATCH);
                anchor = ip0;
                continue;   /* faster when present (confirmed on gcc-8) ... (?) */
            }
@ -179,8 +176,7 @@ size_t ZSTD_compressBlock_fast(
        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize)
 {
-    ZSTD_compressionParameters const* cParams = &ms->cParams;
-    U32 const mls = cParams->minMatch;
+    U32 const mls = ms->cParams.minMatch;
    assert(ms->dictMatchState == NULL);
    switch(mls)
    {
@ -265,7 +261,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
            const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
            mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
            ip++;
-            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
        } else if ( (matchIndex <= prefixStartIndex) ) {
            size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls);
            U32 const dictMatchIndex = dictHashTable[dictHash];
@ -285,7 +281,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
                } /* catch up */
                offset_2 = offset_1;
                offset_1 = offset;
-                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
            }
        } else if (MEM_read32(match) != MEM_read32(ip)) {
            /* it's not a match, and we're not going to check the dictionary */
@ -300,7 +296,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
                 && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
            offset_2 = offset_1;
            offset_1 = offset;
-            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
        }

        /* match found */
@ -325,7 +321,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
                    const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
                    size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
                    U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */
-                    ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
+                    ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
                    hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
                    ip += repLength2;
                    anchor = ip;
@ -348,8 +344,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState(
        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize)
 {
-    ZSTD_compressionParameters const* cParams = &ms->cParams;
-    U32 const mls = cParams->minMatch;
+    U32 const mls = ms->cParams.minMatch;
    assert(ms->dictMatchState != NULL);
    switch(mls)
    {
@ -408,16 +403,17 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
        const U32    repIndex = current + 1 - offset_1;
        const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
        const BYTE* const repMatch = repBase + repIndex;
-        size_t mLength;
        hashTable[h] = current;   /* update hash table */
        assert(offset_1 <= current +1);   /* check repIndex */

        if ( (((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > dictStartIndex))
           && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
            const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
-            mLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4;
+            size_t const rLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4;
            ip++;
-            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, rLength-MINMATCH);
+            ip += rLength;
+            anchor = ip;
        } else {
            if ( (matchIndex < dictStartIndex) ||
                 (MEM_read32(match) != MEM_read32(ip)) ) {
@ -427,19 +423,15 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
            }
            {   const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
                const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
-                U32 offset;
-                mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
+                U32 const offset = current - matchIndex;
+                size_t mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
                while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; }   /* catch up */
-                offset = current - matchIndex;
-                offset_2 = offset_1;
-                offset_1 = offset;
-                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+                offset_2 = offset_1; offset_1 = offset;  /* update offset history */
+                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+                ip += mLength;
+                anchor = ip;
        }   }

-        /* found a match : store it */
-        ip += mLength;
-        anchor = ip;
-
        if (ip <= ilimit) {
            /* Fill Table */
            hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2;
@ -448,13 +440,13 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
            while (ip <= ilimit) {
                U32 const current2 = (U32)(ip-base);
                U32 const repIndex2 = current2 - offset_2;
-                const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
+                const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
                if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (repIndex2 > dictStartIndex))  /* intentional overflow */
                   && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
                    const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
                    size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
-                    U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */
-                    ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
+                    { U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; }  /* swap offset_2 <=> offset_1 */
+                    ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, 0 /*offcode*/, repLength2-MINMATCH);
                    hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
                    ip += repLength2;
                    anchor = ip;
@ -476,8 +468,7 @@ size_t ZSTD_compressBlock_fast_extDict(
        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize)
 {
-    ZSTD_compressionParameters const* cParams = &ms->cParams;
-    U32 const mls = cParams->minMatch;
+    U32 const mls = ms->cParams.minMatch;
    switch(mls)
    {
    default: /* includes case 3 */
--- a/lib/compress/zstd_lazy.c
+++ b/lib/compress/zstd_lazy.c
@ -810,7 +810,7 @@ ZSTD_compressBlock_lazy_generic(
        /* store sequence */
 _storeSequence:
        {   size_t const litLength = start - anchor;
-            ZSTD_storeSeq(seqStore, litLength, anchor, (U32)offset, matchLength-MINMATCH);
+            ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH);
            anchor = ip = start + matchLength;
        }

@ -828,7 +828,7 @@ _storeSequence:
                    const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend;
                    matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4;
                    offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset;   /* swap offset_2 <=> offset_1 */
-                    ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH);
+                    ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
                    ip += matchLength;
                    anchor = ip;
                    continue;
@ -843,7 +843,7 @@ _storeSequence:
                /* store sequence */
                matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
                offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */
-                ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH);
+                ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
                ip += matchLength;
                anchor = ip;
                continue;   /* faster when present ... (?) */
@ -1051,7 +1051,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
        /* store sequence */
 _storeSequence:
        {   size_t const litLength = start - anchor;
-            ZSTD_storeSeq(seqStore, litLength, anchor, (U32)offset, matchLength-MINMATCH);
+            ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH);
            anchor = ip = start + matchLength;
        }

@ -1066,7 +1066,7 @@ _storeSequence:
                const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
                matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
                offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset;   /* swap offset history */
-                ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH);
+                ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
                ip += matchLength;
                anchor = ip;
                continue;   /* faster when present ... (?) */
--- a/lib/compress/zstd_ldm.c
+++ b/lib/compress/zstd_ldm.c
@ -583,7 +583,7 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
                rep[i] = rep[i-1];
            rep[0] = sequence.offset;
            /* Store the sequence */
-            ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength,
+            ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend,
                          sequence.offset + ZSTD_REP_MOVE,
                          sequence.matchLength - MINMATCH);
            ip += sequence.matchLength;
--- a/lib/compress/zstd_opt.c
+++ b/lib/compress/zstd_opt.c
@ -1098,7 +1098,7 @@ _shortestPath:   /* cur, last_pos, best_mlen, best_off have to be set */

                    assert(anchor + llen <= iend);
                    ZSTD_updateStats(optStatePtr, llen, anchor, offCode, mlen);
-                    ZSTD_storeSeq(seqStore, llen, anchor, offCode, mlen-MINMATCH);
+                    ZSTD_storeSeq(seqStore, llen, anchor, iend, offCode, mlen-MINMATCH);
                    anchor += advance;
                    ip = anchor;
            }   }
--- a/lib/compress/zstdmt_compress.c
+++ b/lib/compress/zstdmt_compress.c
@ -668,7 +668,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)

    /* init */
    if (job->cdict) {
-        size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, job->cdict, jobParams, job->fullFrameSize);
+        size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, job->cdict, &jobParams, job->fullFrameSize);
        assert(job->firstJob);  /* only allowed for first job */
        if (ZSTD_isError(initError)) JOB_ERROR(initError);
    } else {  /* srcStart points at reloaded section */
@ -680,7 +680,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
                                        job->prefix.start, job->prefix.size, ZSTD_dct_rawContent, /* load dictionary in "content-only" mode (no header analysis) */
                                        ZSTD_dtlm_fast,
                                        NULL, /*cdict*/
-                                        jobParams, pledgedSrcSize);
+                                        &jobParams, pledgedSrcSize);
            if (ZSTD_isError(initError)) JOB_ERROR(initError);
    }   }

@ -1028,9 +1028,9 @@ size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter,

 /* Sets parameters relevant to the compression job,
 * initializing others to default values. */
-static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(ZSTD_CCtx_params const params)
+static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(const ZSTD_CCtx_params* params)
 {
-    ZSTD_CCtx_params jobParams = params;
+    ZSTD_CCtx_params jobParams = *params;
    /* Clear parameters related to multithreading */
    jobParams.forceWindow = 0;
    jobParams.nbWorkers = 0;
@ -1151,16 +1151,16 @@ size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx)
 /* =====   Multi-threaded compression   ===== */
 /* ------------------------------------------ */

-static unsigned ZSTDMT_computeTargetJobLog(ZSTD_CCtx_params const params)
+static unsigned ZSTDMT_computeTargetJobLog(const ZSTD_CCtx_params* params)
 {
    unsigned jobLog;
-    if (params.ldmParams.enableLdm) {
+    if (params->ldmParams.enableLdm) {
        /* In Long Range Mode, the windowLog is typically oversized.
         * In which case, it's preferable to determine the jobSize
         * based on chainLog instead. */
-        jobLog = MAX(21, params.cParams.chainLog + 4);
+        jobLog = MAX(21, params->cParams.chainLog + 4);
    } else {
-        jobLog = MAX(20, params.cParams.windowLog + 2);
+        jobLog = MAX(20, params->cParams.windowLog + 2);
    }
    return MIN(jobLog, (unsigned)ZSTDMT_JOBLOG_MAX);
 }
@ -1193,27 +1193,27 @@ static int ZSTDMT_overlapLog(int ovlog, ZSTD_strategy strat)
    return ovlog;
 }

-static size_t ZSTDMT_computeOverlapSize(ZSTD_CCtx_params const params)
+static size_t ZSTDMT_computeOverlapSize(const ZSTD_CCtx_params* params)
 {
-    int const overlapRLog = 9 - ZSTDMT_overlapLog(params.overlapLog, params.cParams.strategy);
-    int ovLog = (overlapRLog >= 8) ? 0 : (params.cParams.windowLog - overlapRLog);
+    int const overlapRLog = 9 - ZSTDMT_overlapLog(params->overlapLog, params->cParams.strategy);
+    int ovLog = (overlapRLog >= 8) ? 0 : (params->cParams.windowLog - overlapRLog);
    assert(0 <= overlapRLog && overlapRLog <= 8);
-    if (params.ldmParams.enableLdm) {
+    if (params->ldmParams.enableLdm) {
        /* In Long Range Mode, the windowLog is typically oversized.
         * In which case, it's preferable to determine the jobSize
         * based on chainLog instead.
         * Then, ovLog becomes a fraction of the jobSize, rather than windowSize */
-        ovLog = MIN(params.cParams.windowLog, ZSTDMT_computeTargetJobLog(params) - 2)
+        ovLog = MIN(params->cParams.windowLog, ZSTDMT_computeTargetJobLog(params) - 2)
                - overlapRLog;
    }
    assert(0 <= ovLog && ovLog <= ZSTD_WINDOWLOG_MAX);
-    DEBUGLOG(4, "overlapLog : %i", params.overlapLog);
+    DEBUGLOG(4, "overlapLog : %i", params->overlapLog);
    DEBUGLOG(4, "overlap size : %i", 1 << ovLog);
    return (ovLog==0) ? 0 : (size_t)1 << ovLog;
 }

 static unsigned
-ZSTDMT_computeNbJobs(ZSTD_CCtx_params params, size_t srcSize, unsigned nbWorkers)
+ZSTDMT_computeNbJobs(const ZSTD_CCtx_params* params, size_t srcSize, unsigned nbWorkers)
 {
    assert(nbWorkers>0);
    {   size_t const jobSizeTarget = (size_t)1 << ZSTDMT_computeTargetJobLog(params);
@ -1236,9 +1236,9 @@ static size_t ZSTDMT_compress_advanced_internal(
          const ZSTD_CDict* cdict,
                ZSTD_CCtx_params params)
 {
-    ZSTD_CCtx_params const jobParams = ZSTDMT_initJobCCtxParams(params);
-    size_t const overlapSize = ZSTDMT_computeOverlapSize(params);
-    unsigned const nbJobs = ZSTDMT_computeNbJobs(params, srcSize, params.nbWorkers);
+    ZSTD_CCtx_params const jobParams = ZSTDMT_initJobCCtxParams(&params);
+    size_t const overlapSize = ZSTDMT_computeOverlapSize(&params);
+    unsigned const nbJobs = ZSTDMT_computeNbJobs(&params, srcSize, params.nbWorkers);
    size_t const proposedJobSize = (srcSize + (nbJobs-1)) / nbJobs;
    size_t const avgJobSize = (((proposedJobSize-1) & 0x1FFFF) < 0x7FFF) ? proposedJobSize + 0xFFFF : proposedJobSize;   /* avoid too small last block */
    const char* const srcStart = (const char*)src;
@ -1256,7 +1256,7 @@ static size_t ZSTDMT_compress_advanced_internal(
        ZSTD_CCtx* const cctx = mtctx->cctxPool->cctx[0];
        DEBUGLOG(4, "ZSTDMT_compress_advanced_internal: fallback to single-thread mode");
        if (cdict) return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, jobParams.fParams);
-        return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, NULL, 0, jobParams);
+        return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, NULL, 0, &jobParams);
    }

    assert(avgJobSize >= 256 KB);  /* condition for ZSTD_compressBound(A) + ZSTD_compressBound(B) <= ZSTD_compressBound(A+B), required to compress directly into Dst (no additional buffer) */
@ -1404,12 +1404,12 @@ size_t ZSTDMT_initCStream_internal(

    mtctx->singleBlockingThread = (pledgedSrcSize <= ZSTDMT_JOBSIZE_MIN);  /* do not trigger multi-threading when srcSize is too small */
    if (mtctx->singleBlockingThread) {
-        ZSTD_CCtx_params const singleThreadParams = ZSTDMT_initJobCCtxParams(params);
+        ZSTD_CCtx_params const singleThreadParams = ZSTDMT_initJobCCtxParams(&params);
        DEBUGLOG(5, "ZSTDMT_initCStream_internal: switch to single blocking thread mode");
        assert(singleThreadParams.nbWorkers == 0);
        return ZSTD_initCStream_internal(mtctx->cctxPool->cctx[0],
                                         dict, dictSize, cdict,
-                                         singleThreadParams, pledgedSrcSize);
+                                         &singleThreadParams, pledgedSrcSize);
    }

    DEBUGLOG(4, "ZSTDMT_initCStream_internal: %u workers", params.nbWorkers);
@ -1435,11 +1435,11 @@ size_t ZSTDMT_initCStream_internal(
        mtctx->cdict = cdict;
    }

-    mtctx->targetPrefixSize = ZSTDMT_computeOverlapSize(params);
+    mtctx->targetPrefixSize = ZSTDMT_computeOverlapSize(&params);
    DEBUGLOG(4, "overlapLog=%i => %u KB", params.overlapLog, (U32)(mtctx->targetPrefixSize>>10));
    mtctx->targetSectionSize = params.jobSize;
    if (mtctx->targetSectionSize == 0) {
-        mtctx->targetSectionSize = 1ULL << ZSTDMT_computeTargetJobLog(params);
+        mtctx->targetSectionSize = 1ULL << ZSTDMT_computeTargetJobLog(&params);
    }
    assert(mtctx->targetSectionSize <= (size_t)ZSTDMT_JOBSIZE_MAX);

--- a/lib/decompress/zstd_decompress_block.c
+++ b/lib/decompress/zstd_decompress_block.c
@ -573,38 +573,118 @@ typedef struct {
    size_t pos;
 } seqState_t;

+/*! ZSTD_overlapCopy8() :
+ *  Copies 8 bytes from ip to op and updates op and ip where ip <= op.
+ *  If the offset is < 8 then the offset is spread to at least 8 bytes.
+ *
+ *  Precondition: *ip <= *op
+ *  Postcondition: *op - *op >= 8
+ */
+static void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) {
+    assert(*ip <= *op);
+    if (offset < 8) {
+        /* close range match, overlap */
+        static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 };   /* added */
+        static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 };   /* subtracted */
+        int const sub2 = dec64table[offset];
+        (*op)[0] = (*ip)[0];
+        (*op)[1] = (*ip)[1];
+        (*op)[2] = (*ip)[2];
+        (*op)[3] = (*ip)[3];
+        *ip += dec32table[offset];
+        ZSTD_copy4(*op+4, *ip);
+        *ip -= sub2;
+    } else {
+        ZSTD_copy8(*op, *ip);
+    }
+    *ip += 8;
+    *op += 8;
+    assert(*op - *ip >= 8);
+}

-/* ZSTD_execSequenceLast7():
- * exceptional case : decompress a match starting within last 7 bytes of output buffer.
- * requires more careful checks, to ensure there is no overflow.
- * performance does not matter though.
- * note : this case is supposed to be never generated "naturally" by reference encoder,
- *        since in most cases it needs at least 8 bytes to look for a match.
- *        but it's allowed by the specification. */
+/*! ZSTD_safecopy() :
+ *  Specialized version of memcpy() that is allowed to READ up to WILDCOPY_OVERLENGTH past the input buffer
+ *  and write up to 16 bytes past oend_w (op >= oend_w is allowed).
+ *  This function is only called in the uncommon case where the sequence is near the end of the block. It
+ *  should be fast for a single long sequence, but can be slow for several short sequences.
+ *
+ *  @param ovtype controls the overlap detection
+ *         - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.
+ *         - ZSTD_overlap_src_before_dst: The src and dst may overlap and may be any distance apart.
+ *           The src buffer must be before the dst buffer.
+ */
+static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) {
+    ptrdiff_t const diff = op - ip;
+    BYTE* const oend = op + length;
+
+    assert((ovtype == ZSTD_no_overlap && (diff <= -8 || diff >= 8)) ||
+           (ovtype == ZSTD_overlap_src_before_dst && diff >= 0));
+
+    if (length < 8) {
+        /* Handle short lengths. */
+        while (op < oend) *op++ = *ip++;
+        return;
+    }
+    if (ovtype == ZSTD_overlap_src_before_dst) {
+        /* Copy 8 bytes and ensure the offset >= 8 when there can be overlap. */
+        assert(length >= 8);
+        ZSTD_overlapCopy8(&op, &ip, diff);
+        assert(op - ip >= 8);
+        assert(op <= oend);
+    }
+
+    if (oend <= oend_w) {
+        /* No risk of overwrite. */
+        ZSTD_wildcopy(op, ip, length, ovtype);
+        return;
+    }
+    if (op <= oend_w) {
+        /* Wildcopy until we get close to the end. */
+        assert(oend > oend_w);
+        ZSTD_wildcopy(op, ip, oend_w - op, ovtype);
+        ip += oend_w - op;
+        op = oend_w;
+    }
+    /* Handle the leftovers. */
+    while (op < oend) *op++ = *ip++;
+}
+
+/* ZSTD_execSequenceEnd():
+ * This version handles cases that are near the end of the output buffer. It requires
+ * more careful checks to make sure there is no overflow. By separating out these hard
+ * and unlikely cases, we can speed up the common cases.
+ *
+ * NOTE: This function needs to be fast for a single long sequence, but doesn't need
+ * to be optimized for many small sequences, since those fall into ZSTD_execSequence().
+ */
 FORCE_NOINLINE
-size_t ZSTD_execSequenceLast7(BYTE* op,
-                              BYTE* const oend, seq_t sequence,
-                              const BYTE** litPtr, const BYTE* const litLimit,
-                              const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd)
+size_t ZSTD_execSequenceEnd(BYTE* op,
+                            BYTE* const oend, seq_t sequence,
+                            const BYTE** litPtr, const BYTE* const litLimit,
+                            const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
 {
    BYTE* const oLitEnd = op + sequence.litLength;
    size_t const sequenceLength = sequence.litLength + sequence.matchLength;
    BYTE* const oMatchEnd = op + sequenceLength;   /* risk : address space overflow (32-bits) */
    const BYTE* const iLitEnd = *litPtr + sequence.litLength;
    const BYTE* match = oLitEnd - sequence.offset;
+    BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;

-    /* check */
-    RETURN_ERROR_IF(oMatchEnd>oend, dstSize_tooSmall, "last match must fit within dstBuffer");
+    /* bounds checks */
+    assert(oLitEnd < oMatchEnd);
+    RETURN_ERROR_IF(oMatchEnd > oend, dstSize_tooSmall, "last match must fit within dstBuffer");
    RETURN_ERROR_IF(iLitEnd > litLimit, corruption_detected, "try to read beyond literal buffer");

    /* copy literals */
-    while (op < oLitEnd) *op++ = *(*litPtr)++;
+    ZSTD_safecopy(op, oend_w, *litPtr, sequence.litLength, ZSTD_no_overlap);
+    op = oLitEnd;
+    *litPtr = iLitEnd;

    /* copy Match */
-    if (sequence.offset > (size_t)(oLitEnd - base)) {
+    if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
        /* offset beyond prefix */
-        RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - vBase),corruption_detected);
-        match = dictEnd - (base-match);
+        RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected);
+        match = dictEnd - (prefixStart-match);
        if (match + sequence.matchLength <= dictEnd) {
            memmove(oLitEnd, match, sequence.matchLength);
            return sequenceLength;
@ -614,13 +694,12 @@ size_t ZSTD_execSequenceLast7(BYTE* op,
            memmove(oLitEnd, match, length1);
            op = oLitEnd + length1;
            sequence.matchLength -= length1;
-            match = base;
+            match = prefixStart;
    }   }
-    while (op < oMatchEnd) *op++ = *match++;
+    ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);
    return sequenceLength;
 }

-
 HINT_INLINE
 size_t ZSTD_execSequence(BYTE* op,
                         BYTE* const oend, seq_t sequence,
@ -634,20 +713,29 @@ size_t ZSTD_execSequence(BYTE* op,
    const BYTE* const iLitEnd = *litPtr + sequence.litLength;
    const BYTE* match = oLitEnd - sequence.offset;

-    /* check */
-    RETURN_ERROR_IF(oMatchEnd>oend, dstSize_tooSmall, "last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend");
-    RETURN_ERROR_IF(iLitEnd > litLimit, corruption_detected, "over-read beyond lit buffer");
-    if (oLitEnd>oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
+    /* Errors and uncommon cases handled here. */
+    assert(oLitEnd < oMatchEnd);
+    if (iLitEnd > litLimit || oMatchEnd > oend_w)
+        return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);

-    /* copy Literals */
-    if (sequence.litLength > 8)
-        ZSTD_wildcopy_16min(op, (*litPtr), sequence.litLength, ZSTD_no_overlap);   /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
-    else
-        ZSTD_copy8(op, *litPtr);
+    /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
+    assert(iLitEnd <= litLimit /* Literal length is in bounds */);
+    assert(oLitEnd <= oend_w /* Can wildcopy literals */);
+    assert(oMatchEnd <= oend_w /* Can wildcopy matches */);
+
+    /* Copy Literals:
+     * Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.
+     * We likely don't need the full 32-byte wildcopy.
+     */
+    assert(WILDCOPY_OVERLENGTH >= 16);
+    ZSTD_copy16(op, (*litPtr));
+    if (sequence.litLength > 16) {
+        ZSTD_wildcopy(op+16, (*litPtr)+16, sequence.litLength-16, ZSTD_no_overlap);
+    }
    op = oLitEnd;
    *litPtr = iLitEnd;   /* update for next sequence */

-    /* copy Match */
+    /* Copy Match */
    if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
        /* offset beyond prefix -> go into extDict */
        RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected);
@ -662,123 +750,33 @@ size_t ZSTD_execSequence(BYTE* op,
            op = oLitEnd + length1;
            sequence.matchLength -= length1;
            match = prefixStart;
-            if (op > oend_w || sequence.matchLength < MINMATCH) {
-              U32 i;
-              for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i];
-              return sequenceLength;
-            }
    }   }
-    /* Requirement: op <= oend_w && sequence.matchLength >= MINMATCH */
+    /* Match within prefix of 1 or more bytes */
+    assert(op <= oMatchEnd);
+    assert(oMatchEnd <= oend_w);
+    assert(match >= prefixStart);
+    assert(sequence.matchLength >= 1);

-    /* match within prefix */
-    if (sequence.offset < 8) {
-        /* close range match, overlap */
-        static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 };   /* added */
-        static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 };   /* subtracted */
-        int const sub2 = dec64table[sequence.offset];
-        op[0] = match[0];
-        op[1] = match[1];
-        op[2] = match[2];
-        op[3] = match[3];
-        match += dec32table[sequence.offset];
-        ZSTD_copy4(op+4, match);
-        match -= sub2;
-    } else {
-        ZSTD_copy8(op, match);
+    /* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy
+     * without overlap checking.
+     */
+    if (sequence.offset >= WILDCOPY_VECLEN) {
+        /* We bet on a full wildcopy for matches, since we expect matches to be
+         * longer than literals (in general). In silesia, ~10% of matches are longer
+         * than 16 bytes.
+         */
+        ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap);
+        return sequenceLength;
    }
-    op += 8; match += 8;
+    assert(sequence.offset < WILDCOPY_VECLEN);

-    if (oMatchEnd > oend-(16-MINMATCH)) {
-        if (op < oend_w) {
-            ZSTD_wildcopy(op, match, oend_w - op, ZSTD_overlap_src_before_dst);
-            match += oend_w - op;
-            op = oend_w;
-        }
-        while (op < oMatchEnd) *op++ = *match++;
-    } else {
-        ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst);   /* works even if matchLength < 8 */
-    }
-    return sequenceLength;
-}
+    /* Copy 8 bytes and spread the offset to be >= 8. */
+    ZSTD_overlapCopy8(&op, &match, sequence.offset);

-
-HINT_INLINE
-size_t ZSTD_execSequenceLong(BYTE* op,
-                             BYTE* const oend, seq_t sequence,
-                             const BYTE** litPtr, const BYTE* const litLimit,
-                             const BYTE* const prefixStart, const BYTE* const dictStart, const BYTE* const dictEnd)
-{
-    BYTE* const oLitEnd = op + sequence.litLength;
-    size_t const sequenceLength = sequence.litLength + sequence.matchLength;
-    BYTE* const oMatchEnd = op + sequenceLength;   /* risk : address space overflow (32-bits) */
-    BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
-    const BYTE* const iLitEnd = *litPtr + sequence.litLength;
-    const BYTE* match = sequence.match;
-
-    /* check */
-    RETURN_ERROR_IF(oMatchEnd > oend, dstSize_tooSmall, "last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend");
-    RETURN_ERROR_IF(iLitEnd > litLimit, corruption_detected, "over-read beyond lit buffer");
-    if (oLitEnd > oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, dictStart, dictEnd);
-
-    /* copy Literals */
-    if (sequence.litLength > 8)
-        ZSTD_wildcopy_16min(op, *litPtr, sequence.litLength, ZSTD_no_overlap);   /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
-    else
-        ZSTD_copy8(op, *litPtr);  /* note : op <= oLitEnd <= oend_w == oend - 8 */
-
-    op = oLitEnd;
-    *litPtr = iLitEnd;   /* update for next sequence */
-
-    /* copy Match */
-    if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
-        /* offset beyond prefix */
-        RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - dictStart), corruption_detected);
-        if (match + sequence.matchLength <= dictEnd) {
-            memmove(oLitEnd, match, sequence.matchLength);
-            return sequenceLength;
-        }
-        /* span extDict & currentPrefixSegment */
-        {   size_t const length1 = dictEnd - match;
-            memmove(oLitEnd, match, length1);
-            op = oLitEnd + length1;
-            sequence.matchLength -= length1;
-            match = prefixStart;
-            if (op > oend_w || sequence.matchLength < MINMATCH) {
-              U32 i;
-              for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i];
-              return sequenceLength;
-            }
-    }   }
-    assert(op <= oend_w);
-    assert(sequence.matchLength >= MINMATCH);
-
-    /* match within prefix */
-    if (sequence.offset < 8) {
-        /* close range match, overlap */
-        static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 };   /* added */
-        static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 };   /* subtracted */
-        int const sub2 = dec64table[sequence.offset];
-        op[0] = match[0];
-        op[1] = match[1];
-        op[2] = match[2];
-        op[3] = match[3];
-        match += dec32table[sequence.offset];
-        ZSTD_copy4(op+4, match);
-        match -= sub2;
-    } else {
-        ZSTD_copy8(op, match);
-    }
-    op += 8; match += 8;
-
-    if (oMatchEnd > oend-(16-MINMATCH)) {
-        if (op < oend_w) {
-            ZSTD_wildcopy(op, match, oend_w - op, ZSTD_overlap_src_before_dst);
-            match += oend_w - op;
-            op = oend_w;
-        }
-        while (op < oMatchEnd) *op++ = *match++;
-    } else {
-        ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst);   /* works even if matchLength < 8 */
+    /* If the match length is > 8 bytes, then continue with the wildcopy. */
+    if (sequence.matchLength > 8) {
+        assert(op < oMatchEnd);
+        ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst);
    }
    return sequenceLength;
 }
@ -1098,7 +1096,7 @@ ZSTD_decompressSequencesLong_body(
        /* decode and decompress */
        for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb<nbSeq) ; seqNb++) {
            seq_t const sequence = ZSTD_decodeSequenceLong(&seqState, isLongOffset);
-            size_t const oneSeqSize = ZSTD_execSequenceLong(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
+            size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
            if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
            PREFETCH_L1(sequence.match); PREFETCH_L1(sequence.match + sequence.matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
            sequences[seqNb & STORED_SEQS_MASK] = sequence;
@ -1109,7 +1107,7 @@ ZSTD_decompressSequencesLong_body(
        /* finish queue */
        seqNb -= seqAdvance;
        for ( ; seqNb<nbSeq ; seqNb++) {
-            size_t const oneSeqSize = ZSTD_execSequenceLong(op, oend, sequences[seqNb&STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
+            size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[seqNb&STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
            if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
            op += oneSeqSize;
        }
--- a/lib/dictBuilder/cover.c
+++ b/lib/dictBuilder/cover.c
@ -638,8 +638,8 @@ void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLeve
                    "compared to the source size %u! "
                    "size(source)/size(dictionary) = %f, but it should be >= "
                    "10! This may lead to a subpar dictionary! We recommend "
-                    "training on sources at least 10x, and up to 100x the "
-                    "size of the dictionary!\n", (U32)maxDictSize,
+                    "training on sources at least 10x, and preferably 100x "
+                    "the size of the dictionary! \n", (U32)maxDictSize,
                    (U32)nbDmers, ratio);
 }

--- a/lib/legacy/zstd_v01.c
+++ b/lib/legacy/zstd_v01.c
@ -346,7 +346,7 @@ FORCE_INLINE unsigned FSE_highbit32 (U32 val)
    _BitScanReverse ( &r, val );
    return (unsigned) r;
 #   elif defined(__GNUC__) && (GCC_VERSION >= 304)   /* GCC Intrinsic */
-    return 31 - __builtin_clz (val);
+    return __builtin_clz (val) ^ 31;
 #   else   /* Software version */
    static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
    U32 v = val;
--- a/lib/legacy/zstd_v02.c
+++ b/lib/legacy/zstd_v02.c
@ -353,7 +353,7 @@ MEM_STATIC unsigned BIT_highbit32 (U32 val)
    _BitScanReverse ( &r, val );
    return (unsigned) r;
 #   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* Use GCC Intrinsic */
-    return 31 - __builtin_clz (val);
+    return __builtin_clz (val) ^ 31;
 #   else   /* Software version */
    static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
    U32 v = val;
--- a/lib/legacy/zstd_v03.c
+++ b/lib/legacy/zstd_v03.c
@ -356,7 +356,7 @@ MEM_STATIC unsigned BIT_highbit32 (U32 val)
    _BitScanReverse ( &r, val );
    return (unsigned) r;
 #   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* Use GCC Intrinsic */
-    return 31 - __builtin_clz (val);
+    return __builtin_clz (val) ^ 31;
 #   else   /* Software version */
    static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
    U32 v = val;
--- a/lib/legacy/zstd_v04.c
+++ b/lib/legacy/zstd_v04.c
@ -627,7 +627,7 @@ MEM_STATIC unsigned BIT_highbit32 (U32 val)
    _BitScanReverse ( &r, val );
    return (unsigned) r;
 #   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* Use GCC Intrinsic */
-    return 31 - __builtin_clz (val);
+    return __builtin_clz (val) ^ 31;
 #   else   /* Software version */
    static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
    U32 v = val;
--- a/lib/legacy/zstd_v05.c
+++ b/lib/legacy/zstd_v05.c
@ -756,7 +756,7 @@ MEM_STATIC unsigned BITv05_highbit32 (U32 val)
    _BitScanReverse ( &r, val );
    return (unsigned) r;
 #   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* Use GCC Intrinsic */
-    return 31 - __builtin_clz (val);
+    return __builtin_clz (val) ^ 31;
 #   else   /* Software version */
    static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
    U32 v = val;
--- a/lib/legacy/zstd_v06.c
+++ b/lib/legacy/zstd_v06.c
@ -860,7 +860,7 @@ MEM_STATIC unsigned BITv06_highbit32 ( U32 val)
    _BitScanReverse ( &r, val );
    return (unsigned) r;
 #   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* Use GCC Intrinsic */
-    return 31 - __builtin_clz (val);
+    return __builtin_clz (val) ^ 31;
 #   else   /* Software version */
    static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
    U32 v = val;
--- a/lib/legacy/zstd_v07.c
+++ b/lib/legacy/zstd_v07.c
@ -530,7 +530,7 @@ MEM_STATIC unsigned BITv07_highbit32 (U32 val)
    _BitScanReverse ( &r, val );
    return (unsigned) r;
 #   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* Use GCC Intrinsic */
-    return 31 - __builtin_clz (val);
+    return __builtin_clz (val) ^ 31;
 #   else   /* Software version */
    static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
    U32 v = val;
--- a/lib/zstd.h
+++ b/lib/zstd.h
@ -72,7 +72,7 @@ extern "C" {
 /*------   Version   ------*/
 #define ZSTD_VERSION_MAJOR    1
 #define ZSTD_VERSION_MINOR    4
-#define ZSTD_VERSION_RELEASE  3
+#define ZSTD_VERSION_RELEASE  4

 #define ZSTD_VERSION_NUMBER  (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
 ZSTDLIB_API unsigned ZSTD_versionNumber(void);   /**< to check runtime library version */
@ -1077,6 +1077,24 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);

 typedef struct ZSTD_CCtx_params_s ZSTD_CCtx_params;

+typedef struct {
+    unsigned int matchPos; /* Match pos in dst */
+    /* If seqDef.offset > 3, then this is seqDef.offset - 3
+     * If seqDef.offset < 3, then this is the corresponding repeat offset
+     * But if seqDef.offset < 3 and litLength == 0, this is the
+     *   repeat offset before the corresponding repeat offset
+     * And if seqDef.offset == 3 and litLength == 0, this is the
+     *   most recent repeat offset - 1
+     */
+    unsigned int offset;
+    unsigned int litLength; /* Literal length */
+    unsigned int matchLength; /* Match length */
+    /* 0 when seq not rep and seqDef.offset otherwise
+     * when litLength == 0 this will be <= 4, otherwise <= 3 like normal
+     */
+    unsigned int rep;
+} ZSTD_Sequence;
+
 typedef struct {
    unsigned windowLog;       /**< largest match distance : larger == more compression, more memory needed during decompression */
    unsigned chainLog;        /**< fully searched segment : larger == more compression, slower, more memory (useless for fast) */
@ -1215,20 +1233,38 @@ ZSTDLIB_API unsigned long long ZSTD_decompressBound(const void* src, size_t srcS
 *           or an error code (if srcSize is too small) */
 ZSTDLIB_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize);

+/*! ZSTD_getSequences() :
+ * Extract sequences from the sequence store
+ * zc can be used to insert custom compression params.
+ * This function invokes ZSTD_compress2
+ * @return : number of sequences extracted
+ */
+ZSTDLIB_API size_t ZSTD_getSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
+    size_t outSeqsSize, const void* src, size_t srcSize);
+

 /***************************************
 *  Memory management
 ***************************************/

 /*! ZSTD_estimate*() :
- *  These functions make it possible to estimate memory usage
- *  of a future {D,C}Ctx, before its creation.
- *  ZSTD_estimateCCtxSize() will provide a budget large enough for any compression level up to selected one.
- *  It will also consider src size to be arbitrarily "large", which is worst case.
- *  If srcSize is known to always be small, ZSTD_estimateCCtxSize_usingCParams() can provide a tighter estimation.
- *  ZSTD_estimateCCtxSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel.
- *  ZSTD_estimateCCtxSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParams_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_c_nbWorkers is >= 1.
- *  Note : CCtx size estimation is only correct for single-threaded compression. */
+ *  These functions make it possible to estimate memory usage of a future
+ *  {D,C}Ctx, before its creation.
+ *
+ *  ZSTD_estimateCCtxSize() will provide a budget large enough for any
+ *  compression level up to selected one. Unlike ZSTD_estimateCStreamSize*(),
+ *  this estimate does not include space for a window buffer, so this estimate
+ *  is guaranteed to be enough for single-shot compressions, but not streaming
+ *  compressions. It will however assume the input may be arbitrarily large,
+ *  which is the worst case. If srcSize is known to always be small,
+ *  ZSTD_estimateCCtxSize_usingCParams() can provide a tighter estimation.
+ *  ZSTD_estimateCCtxSize_usingCParams() can be used in tandem with
+ *  ZSTD_getCParams() to create cParams from compressionLevel.
+ *  ZSTD_estimateCCtxSize_usingCCtxParams() can be used in tandem with
+ *  ZSTD_CCtxParams_setParameter().
+ *
+ *  Note: only single-threaded compression is supported. This function will
+ *  return an error code if ZSTD_c_nbWorkers is >= 1. */
 ZSTDLIB_API size_t ZSTD_estimateCCtxSize(int compressionLevel);
 ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams);
 ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params);
@ -1641,7 +1677,10 @@ ZSTDLIB_API size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dic
 /**! ZSTD_initCStream_advanced() :
 * This function is deprecated, and is approximately equivalent to:
 *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
- *     ZSTD_CCtx_setZstdParams(zcs, params); // Set the zstd params and leave the rest as-is
+ *     // Pseudocode: Set each zstd parameter and leave the rest as-is.
+ *     for ((param, value) : params) {
+ *         ZSTD_CCtx_setParameter(zcs, param, value);
+ *     }
 *     ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
 *     ZSTD_CCtx_loadDictionary(zcs, dict, dictSize);
 *
@ -1661,7 +1700,10 @@ ZSTDLIB_API size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDi
 /**! ZSTD_initCStream_usingCDict_advanced() :
 * This function is deprecated, and is approximately equivalent to:
 *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
- *     ZSTD_CCtx_setZstdFrameParams(zcs, fParams); // Set the zstd frame params and leave the rest as-is
+ *     // Pseudocode: Set each zstd frame parameter and leave the rest as-is.
+ *     for ((fParam, value) : fParams) {
+ *         ZSTD_CCtx_setParameter(zcs, fParam, value);
+ *     }
 *     ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
 *     ZSTD_CCtx_refCDict(zcs, cdict);
 *
--- a/programs/fileio.c
+++ b/programs/fileio.c
@ -585,7 +585,7 @@ static FILE* FIO_openDstFile(FIO_prefs_t* const prefs, const char* srcFileName,
    {   FILE* const f = fopen( dstFileName, "wb" );
        if (f == NULL) {
            DISPLAYLEVEL(1, "zstd: %s: %s\n", dstFileName, strerror(errno));
-        } else {
+        } else if(srcFileName != NULL && strcmp (srcFileName, stdinmark)) {
            chmod(dstFileName, 00600);
        }
        return f;
@ -628,6 +628,102 @@ static size_t FIO_createDictBuffer(void** bufferPtr, const char* fileName)
    return (size_t)fileSize;
 }

+
+
+/* FIO_checkFilenameCollisions() :
+ * Checks for and warns if there are any files that would have the same output path
+ */
+int FIO_checkFilenameCollisions(const char** filenameTable, unsigned nbFiles) {
+    const char **filenameTableSorted, *c, *prevElem, *filename;
+    unsigned u;
+
+    #if defined(_MSC_VER) || defined(__MINGW32__) || defined (__MSVCRT__) /* windows support */
+    c = "\\";
+    #else
+    c = "/";
+    #endif
+
+    filenameTableSorted = (const char**) malloc(sizeof(char*) * nbFiles);
+    if (!filenameTableSorted) {
+        DISPLAY("Unable to malloc new str array, not checking for name collisions\n");
+        return 1;
+    }
+    
+    for (u = 0; u < nbFiles; ++u) {
+        filename = strrchr(filenameTable[u], c[0]);
+        if (filename == NULL) {
+            filenameTableSorted[u] = filenameTable[u];
+        } else {
+            filenameTableSorted[u] = filename+1;
+        }
+    }
+
+    qsort((void*)filenameTableSorted, nbFiles, sizeof(char*), UTIL_compareStr);
+    prevElem = filenameTableSorted[0];
+    for (u = 1; u < nbFiles; ++u) {
+        if (strcmp(prevElem, filenameTableSorted[u]) == 0) {
+            DISPLAY("WARNING: Two files have same filename: %s\n", prevElem);
+        }
+        prevElem = filenameTableSorted[u];
+    }
+
+    free((void*)filenameTableSorted);
+    return 0;
+}
+
+/* FIO_createFilename_fromOutDir() :
+ * Takes a source file name and specified output directory, and
+ * allocates memory for and returns a pointer to final path.
+ * This function never returns an error (it may abort() in case of pb)
+ */
+static char*
+FIO_createFilename_fromOutDir(const char* srcFilename, const char* outDirName, const size_t suffixLen)
+{
+    const char* c, *filenameBegin;
+    char* filename, *result;
+    size_t finalPathLen;
+
+    #if defined(_MSC_VER) || defined(__MINGW32__) || defined (__MSVCRT__) /* windows support */
+    c = "\\";
+    #else
+    c = "/";
+    #endif
+
+    finalPathLen = strlen(outDirName);
+    filenameBegin = strrchr(srcFilename, c[0]);
+    if (filenameBegin == NULL) {
+        filename = (char*) malloc((strlen(srcFilename)+1) * sizeof(char));
+        if (!filename) {
+            EXM_THROW(30, "zstd: %s", strerror(errno));
+        }
+        strcpy(filename, srcFilename);
+    } else {
+        filename = (char*) malloc((strlen(filenameBegin+1)+1) * sizeof(char));
+        if (!filename) {
+            EXM_THROW(30, "zstd: %s", strerror(errno));
+        }
+        strcpy(filename, filenameBegin+1);
+    }
+
+    finalPathLen += strlen(filename);
+    result = (char*) malloc((finalPathLen+suffixLen+30) * sizeof(char));
+    if (!result) {
+        free(filename);
+        EXM_THROW(30, "zstd: %s", strerror(errno));
+    }
+
+    strcpy(result, outDirName);
+    if (outDirName[strlen(outDirName)-1] == c[0]) {
+        strcat(result, filename);
+    } else {  
+        strcat(result, c);
+        strcat(result, filename);
+    }
+
+    free(filename);
+    return result;
+}
+
 #ifndef ZSTD_NOCOMPRESS

 /* **********************************************************************
@ -769,7 +865,7 @@ FIO_compressGzFrame(cRess_t* ress,
        {   size_t const decompBytes = ress->dstBufferSize - strm.avail_out;
            if (decompBytes) {
                if (fwrite(ress->dstBuffer, 1, decompBytes, ress->dstFile) != decompBytes)
-                    EXM_THROW(73, "Write error : cannot write to output file");
+                    EXM_THROW(73, "Write error : cannot write to output file : %s", strerror(errno));
                outFileSize += decompBytes;
                strm.next_out = (Bytef*)ress->dstBuffer;
                strm.avail_out = (uInt)ress->dstBufferSize;
@ -1276,9 +1372,7 @@ static int FIO_compressFilename_dstFile(FIO_prefs_t* const prefs,
    int result;
    stat_t statbuf;
    int transfer_permissions = 0;
-
    assert(ress.srcFile != NULL);
-
    if (ress.dstFile == NULL) {
        closeDstFile = 1;
        DISPLAYLEVEL(6, "FIO_compressFilename_dstFile: opening dst: %s", dstFileName);
@ -1369,11 +1463,9 @@ FIO_compressFilename_srcFile(FIO_prefs_t* const prefs,
    return result;
 }

-
-int FIO_compressFilename(FIO_prefs_t* const prefs,
-                         const char* dstFileName, const char* srcFileName,
-                         const char* dictFileName, int compressionLevel,
-                         ZSTD_compressionParameters comprParams)
+int FIO_compressFilename(FIO_prefs_t* const prefs, const char* dstFileName,
+                         const char* srcFileName, const char* dictFileName,
+                         int compressionLevel,  ZSTD_compressionParameters comprParams)
 {
    cRess_t const ress = FIO_createCResources(prefs, dictFileName, compressionLevel, comprParams);
    int const result = FIO_compressFilename_srcFile(prefs, ress, dstFileName, srcFileName, compressionLevel);
@ -1383,21 +1475,25 @@ int FIO_compressFilename(FIO_prefs_t* const prefs,
    return result;
 }

-
 /* FIO_determineCompressedName() :
 * create a destination filename for compressed srcFileName.
 * @return a pointer to it.
 * This function never returns an error (it may abort() in case of pb)
 */
 static const char*
-FIO_determineCompressedName(const char* srcFileName, const char* suffix)
+FIO_determineCompressedName(const char* srcFileName, const char* outDirName, const char* suffix)
 {
    static size_t dfnbCapacity = 0;
    static char* dstFileNameBuffer = NULL;   /* using static allocation : this function cannot be multi-threaded */
-
-    size_t const sfnSize = strlen(srcFileName);
+    char* outDirFilename = NULL;
+    size_t sfnSize = strlen(srcFileName);
    size_t const suffixSize = strlen(suffix);
-
+    if (outDirName) {
+        outDirFilename = FIO_createFilename_fromOutDir(srcFileName, outDirName, suffixSize);
+        sfnSize = strlen(outDirFilename);
+        assert(outDirFilename != NULL);
+    }
+    
    if (dfnbCapacity <= sfnSize+suffixSize+1) {
        /* resize buffer for dstName */
        free(dstFileNameBuffer);
@ -1405,23 +1501,30 @@ FIO_determineCompressedName(const char* srcFileName, const char* suffix)
        dstFileNameBuffer = (char*)malloc(dfnbCapacity);
        if (!dstFileNameBuffer) {
            EXM_THROW(30, "zstd: %s", strerror(errno));
-    }   }
+        }
+    }
    assert(dstFileNameBuffer != NULL);
-    memcpy(dstFileNameBuffer, srcFileName, sfnSize);
-    memcpy(dstFileNameBuffer+sfnSize, suffix, suffixSize+1 /* Include terminating null */);

+    if (outDirFilename) {
+        memcpy(dstFileNameBuffer, outDirFilename, sfnSize);
+        free(outDirFilename);
+    } else {
+        memcpy(dstFileNameBuffer, srcFileName, sfnSize);
+    }
+    memcpy(dstFileNameBuffer+sfnSize, suffix, suffixSize+1 /* Include terminating null */);
    return dstFileNameBuffer;
 }


 /* FIO_compressMultipleFilenames() :
 * compress nbFiles files
- * into one destination (outFileName)
- * or into one file each (outFileName == NULL, but suffix != NULL).
+ * into either one destination (outFileName),
+ * or into one file each (outFileName == NULL, but suffix != NULL),
+ * or into a destination folder (specified with -O)
 */
-int FIO_compressMultipleFilenames(FIO_prefs_t* const prefs,
-                                  const char** inFileNamesTable, unsigned nbFiles,
-                                  const char* outFileName, const char* suffix,
+int FIO_compressMultipleFilenames(FIO_prefs_t* const prefs, const char** inFileNamesTable,
+                                  const char* outDirName, unsigned nbFiles, 
+                                  const char* outFileName, const char* suffix, 
                                  const char* dictFileName, int compressionLevel,
                                  ZSTD_compressionParameters comprParams)
 {
@ -1430,7 +1533,6 @@ int FIO_compressMultipleFilenames(FIO_prefs_t* const prefs,

    /* init */
    assert(outFileName != NULL || suffix != NULL);
-
    if (outFileName != NULL) {   /* output into a single destination (stdout typically) */
        ress.dstFile = FIO_openDstFile(prefs, NULL, outFileName);
        if (ress.dstFile == NULL) {  /* could not open outFileName */
@ -1448,9 +1550,12 @@ int FIO_compressMultipleFilenames(FIO_prefs_t* const prefs,
        unsigned u;
        for (u=0; u<nbFiles; u++) {
            const char* const srcFileName = inFileNamesTable[u];
-            const char* const dstFileName = FIO_determineCompressedName(srcFileName, suffix);  /* cannot fail */
+            const char* const dstFileName = FIO_determineCompressedName(srcFileName, outDirName, suffix);  /* cannot fail */
            error |= FIO_compressFilename_srcFile(prefs, ress, dstFileName, srcFileName, compressionLevel);
-    }   }
+        }
+        if (outDirName)
+            FIO_checkFilenameCollisions(inFileNamesTable ,nbFiles);
+    }

    FIO_freeCResources(ress);
    return error;
@ -1523,7 +1628,7 @@ static unsigned FIO_fwriteSparse(FIO_prefs_t* const prefs, FILE* file, const voi
    if (!prefs->sparseFileSupport) {  /* normal write */
        size_t const sizeCheck = fwrite(buffer, 1, bufferSize, file);
        if (sizeCheck != bufferSize)
-            EXM_THROW(70, "Write error : %s (cannot write decoded block)",
+            EXM_THROW(70, "Write error : cannot write decoded block : %s",
                            strerror(errno));
        return 0;
    }
@ -1554,7 +1659,8 @@ static unsigned FIO_fwriteSparse(FIO_prefs_t* const prefs, FILE* file, const voi
            ptrT += nb0T;
            {   size_t const sizeCheck = fwrite(ptrT, sizeof(size_t), seg0SizeT, file);
                if (sizeCheck != seg0SizeT)
-                    EXM_THROW(73, "Write error : cannot write decoded block");
+                    EXM_THROW(73, "Write error : cannot write decoded block : %s",
+                            strerror(errno));
        }   }
        ptrT += seg0SizeT;
    }
@ -1575,7 +1681,8 @@ static unsigned FIO_fwriteSparse(FIO_prefs_t* const prefs, FILE* file, const voi
                storedSkips = 0;
                {   size_t const sizeCheck = fwrite(restPtr, 1, (size_t)(restEnd - restPtr), file);
                    if (sizeCheck != (size_t)(restEnd - restPtr))
-                        EXM_THROW(75, "Write error : cannot write decoded end of block");
+                        EXM_THROW(75, "Write error : cannot write decoded end of block : %s",
+                            strerror(errno));
    }   }   }   }

    return storedSkips;
@ -1593,7 +1700,7 @@ FIO_fwriteSparseEnd(FIO_prefs_t* const prefs, FILE* file, unsigned storedSkips)
         * so that skipped ones get implicitly translated as zero by FS */
        {   const char lastZeroByte[1] = { 0 };
            if (fwrite(lastZeroByte, 1, 1, file) != 1)
-                EXM_THROW(69, "Write error : cannot write last zero");
+                EXM_THROW(69, "Write error : cannot write last zero : %s", strerror(errno));
    }   }
 }

@ -1612,7 +1719,7 @@ static int FIO_passThrough(FIO_prefs_t* const prefs,
    /* assumption : ress->srcBufferLoaded bytes already loaded and stored within buffer */
    {   size_t const sizeCheck = fwrite(buffer, 1, alreadyLoaded, foutput);
        if (sizeCheck != alreadyLoaded) {
-            DISPLAYLEVEL(1, "Pass-through write error \n");
+            DISPLAYLEVEL(1, "Pass-through write error : %s\n", strerror(errno));
            return 1;
    }   }

@ -1719,11 +1826,6 @@ static unsigned long long FIO_decompressZstdFrame(
        }

        if (readSizeHint == 0) break;   /* end of frame */
-        if (inBuff.size != inBuff.pos) {
-            DISPLAYLEVEL(1, "%s : Decoding error (37) : should consume entire input \n",
-                            srcFileName);
-            return FIO_ERROR_FRAME_DECODING;
-        }

        /* Fill input buffer */
        {   size_t const toDecode = MIN(readSizeHint, ress->srcBufferSize);  /* support large skippable frames */
@ -1788,7 +1890,7 @@ static unsigned long long FIO_decompressGzFrame(dRess_t* ress,
        {   size_t const decompBytes = ress->dstBufferSize - strm.avail_out;
            if (decompBytes) {
                if (fwrite(ress->dstBuffer, 1, decompBytes, ress->dstFile) != decompBytes) {
-                    DISPLAYLEVEL(1, "zstd: %s \n", strerror(errno));
+                    DISPLAYLEVEL(1, "zstd: fwrite error: %s \n", strerror(errno));
                    decodingError = 1; break;
                }
                outFileSize += decompBytes;
@ -1863,7 +1965,7 @@ static unsigned long long FIO_decompressLzmaFrame(dRess_t* ress, FILE* srcFile,
        {   size_t const decompBytes = ress->dstBufferSize - strm.avail_out;
            if (decompBytes) {
                if (fwrite(ress->dstBuffer, 1, decompBytes, ress->dstFile) != decompBytes) {
-                    DISPLAYLEVEL(1, "zstd: %s \n", strerror(errno));
+                    DISPLAYLEVEL(1, "zstd: fwrite error: %s \n", strerror(errno));
                    decodingError = 1; break;
                }
                outFileSize += decompBytes;
@ -1934,7 +2036,7 @@ static unsigned long long FIO_decompressLz4Frame(dRess_t* ress,
            /* Write Block */
            if (decodedBytes) {
                if (fwrite(ress->dstBuffer, 1, decodedBytes, ress->dstFile) != decodedBytes) {
-                    DISPLAYLEVEL(1, "zstd: %s \n", strerror(errno));
+                    DISPLAYLEVEL(1, "zstd: fwrite error: %s \n", strerror(errno));
                    decodingError = 1; nextToLoad = 0; break;
                }
                filesize += decodedBytes;
@ -2169,13 +2271,14 @@ int FIO_decompressFilename(FIO_prefs_t* const prefs,
 * @return a pointer to it.
 * @return == NULL if there is an error */
 static const char*
-FIO_determineDstName(const char* srcFileName)
+FIO_determineDstName(const char* srcFileName, const char* outDirName)
 {
    static size_t dfnbCapacity = 0;
    static char* dstFileNameBuffer = NULL;   /* using static allocation : this function cannot be multi-threaded */
-
-    size_t const sfnSize = strlen(srcFileName);
+    char* outDirFilename = NULL;
+    size_t sfnSize = strlen(srcFileName);
    size_t suffixSize;
+    
    const char* const suffixPtr = strrchr(srcFileName, '.');
    if (suffixPtr == NULL) {
        DISPLAYLEVEL(1, "zstd: %s: unknown suffix -- ignored \n",
@ -2213,19 +2316,29 @@ FIO_determineDstName(const char* srcFileName)
                     srcFileName, suffixlist);
        return NULL;
    }
+    if (outDirName) {
+        outDirFilename = FIO_createFilename_fromOutDir(srcFileName, outDirName, 0);
+        sfnSize = strlen(outDirFilename);
+        assert(outDirFilename != NULL);
+    }

-    /* allocate enough space to write dstFilename into it */
    if (dfnbCapacity+suffixSize <= sfnSize+1) {
+        /* allocate enough space to write dstFilename into it */
        free(dstFileNameBuffer);
        dfnbCapacity = sfnSize + 20;
        dstFileNameBuffer = (char*)malloc(dfnbCapacity);
        if (dstFileNameBuffer==NULL)
-            EXM_THROW(74, "%s : not enough memory for dstFileName", strerror(errno));
+            EXM_THROW(74, "%s : not enough memory for dstFileName", strerror(errno)); 
    }

    /* return dst name == src name truncated from suffix */
    assert(dstFileNameBuffer != NULL);
-    memcpy(dstFileNameBuffer, srcFileName, sfnSize - suffixSize);
+    if (outDirFilename) {
+        memcpy(dstFileNameBuffer, outDirFilename, sfnSize - suffixSize);
+        free(outDirFilename);
+    } else {
+        memcpy(dstFileNameBuffer, srcFileName, sfnSize - suffixSize);
+    }
    dstFileNameBuffer[sfnSize-suffixSize] = '\0';
    return dstFileNameBuffer;

@ -2235,8 +2348,8 @@ FIO_determineDstName(const char* srcFileName)

 int
 FIO_decompressMultipleFilenames(FIO_prefs_t* const prefs,
-                                const char* srcNamesTable[], unsigned nbFiles,
-                                const char* outFileName,
+                                const char** srcNamesTable, unsigned nbFiles,
+                                const char* outDirName, const char* outFileName,
                                const char* dictFileName)
 {
    int error = 0;
@ -2255,19 +2368,19 @@ FIO_decompressMultipleFilenames(FIO_prefs_t* const prefs,
        unsigned u;
        for (u=0; u<nbFiles; u++) {   /* create dstFileName */
            const char* const srcFileName = srcNamesTable[u];
-            const char* const dstFileName = FIO_determineDstName(srcFileName);
+            const char* const dstFileName = FIO_determineDstName(srcFileName, outDirName);
            if (dstFileName == NULL) { error=1; continue; }

            error |= FIO_decompressSrcFile(prefs, ress, dstFileName, srcFileName);
        }
+        if (outDirName)
+            FIO_checkFilenameCollisions(srcNamesTable ,nbFiles);
    }

    FIO_freeDResources(ress);
    return error;
 }

-
-
 /* **************************************************************************
 *  .zst file info (--list command)
 ***************************************************************************/
--- a/programs/fileio.h
+++ b/programs/fileio.h
@ -87,8 +87,9 @@ void FIO_setNotificationLevel(int level);
 /** FIO_compressFilename() :
    @return : 0 == ok;  1 == pb with src file. */
 int FIO_compressFilename (FIO_prefs_t* const prefs,
-                          const char* outfilename, const char* infilename, const char* dictFileName,
-                          int compressionLevel, ZSTD_compressionParameters comprParams);
+                          const char* outfilename, const char* infilename,
+                          const char* dictFileName, int compressionLevel,
+                          ZSTD_compressionParameters comprParams);

 /** FIO_decompressFilename() :
    @return : 0 == ok;  1 == pb with src file. */
@ -103,19 +104,24 @@ int FIO_listMultipleFiles(unsigned numFiles, const char** filenameTable, int dis
 ***************************************/
 /** FIO_compressMultipleFilenames() :
    @return : nb of missing files */
-int FIO_compressMultipleFilenames(FIO_prefs_t* const prefs,
-                                  const char** srcNamesTable, unsigned nbFiles,
-                                  const char* outFileName, const char* suffix,
-                                  const char* dictFileName, int compressionLevel,
+int FIO_compressMultipleFilenames(FIO_prefs_t* const prefs, const char** inFileNamesTable,
+                                  const char* outDirName, unsigned nbFiles, 
+                                  const char* outFileName, const char* suffix, 
+                                  const char* dictFileName, int compressionLevel, 
                                  ZSTD_compressionParameters comprParams);

 /** FIO_decompressMultipleFilenames() :
    @return : nb of missing or skipped files */
 int FIO_decompressMultipleFilenames(FIO_prefs_t* const prefs,
                                    const char** srcNamesTable, unsigned nbFiles,
+                                    const char* outDirName,
                                    const char* outFileName,
                                    const char* dictFileName);

+/* FIO_checkFilenameCollisions() :
+ * Checks for and warns if thereå are any files that would have the same output path
+ */
+int FIO_checkFilenameCollisions(const char** filenameTable, unsigned nbFiles);

 /*-*************************************
 *  Advanced stuff (should actually be hosted elsewhere)
--- a/programs/platform.h
+++ b/programs/platform.h
@ -92,7 +92,7 @@ extern "C" {

 #    if defined(__linux__) || defined(__linux)
 #      ifndef _POSIX_C_SOURCE
-#        define _POSIX_C_SOURCE 200112L  /* feature test macro : https://www.gnu.org/software/libc/manual/html_node/Feature-Test-Macros.html */
+#        define _POSIX_C_SOURCE 200809L  /* feature test macro : https://www.gnu.org/software/libc/manual/html_node/Feature-Test-Macros.html */
 #      endif
 #    endif
 #    include <unistd.h>  /* declares _POSIX_VERSION */
--- a/programs/timefn.h
+++ b/programs/timefn.h
@ -19,12 +19,6 @@ extern "C" {
 /*-****************************************
 *  Dependencies
 ******************************************/
-#include <sys/types.h>    /* utime */
-#if defined(_MSC_VER)
-#  include <sys/utime.h>  /* utime */
-#else
-#  include <utime.h>      /* utime */
-#endif
 #include <time.h>         /* clock_t, clock, CLOCKS_PER_SEC */


--- a/programs/util.c
+++ b/programs/util.c
@ -20,6 +20,9 @@ extern "C" {
 #include <errno.h>
 #include <assert.h>

+#if defined(_MSC_VER) || defined(__MINGW32__) || defined (__MSVCRT__)
+#include <direct.h>     /* needed for _mkdir in windows */
+#endif

 int UTIL_fileExist(const char* filename)
 {
@ -54,14 +57,25 @@ int UTIL_getFileStat(const char* infilename, stat_t *statbuf)
 int UTIL_setFileStat(const char *filename, stat_t *statbuf)
 {
    int res = 0;
-    struct utimbuf timebuf;

    if (!UTIL_isRegularFile(filename))
        return -1;

-    timebuf.actime = time(NULL);
-    timebuf.modtime = statbuf->st_mtime;
-    res += utime(filename, &timebuf);  /* set access and modification times */
+    /* set access and modification times */
+#if defined(_WIN32) || (PLATFORM_POSIX_VERSION < 200809L)
+    {
+        struct utimbuf timebuf;
+        timebuf.actime = time(NULL);
+        timebuf.modtime = statbuf->st_mtime;
+        res += utime(filename, &timebuf);
+    }
+#else
+    {
+        /* (atime, mtime) */
+        struct timespec timebuf[2] = { {0, UTIME_NOW}, statbuf->st_mtim };
+        res += utimensat(AT_FDCWD, filename, timebuf, 0);
+    }
+#endif

 #if !defined(_WIN32)
    res += chown(filename, statbuf->st_uid, statbuf->st_gid);  /* Copy ownership */
@ -87,6 +101,10 @@ U32 UTIL_isDirectory(const char* infilename)
    return 0;
 }

+int UTIL_compareStr(const void *p1, const void *p2) {
+    return strcmp(* (char * const *) p1, * (char * const *) p2);
+}
+
 int UTIL_isSameFile(const char* file1, const char* file2)
 {
 #if defined(_MSC_VER)
--- a/programs/util.h
+++ b/programs/util.h
@ -25,12 +25,17 @@ extern "C" {
 #include <stdio.h>        /* fprintf */
 #include <sys/types.h>    /* stat, utime */
 #include <sys/stat.h>     /* stat, chmod */
-#if defined(_MSC_VER)
+#if defined(_WIN32)
 #  include <sys/utime.h>  /* utime */
 #  include <io.h>         /* _chmod */
 #else
 #  include <unistd.h>     /* chown, stat */
+#if PLATFORM_POSIX_VERSION < 200809L
 #  include <utime.h>      /* utime */
+#else
+#  include <fcntl.h>      /* AT_FDCWD */
+#  include <sys/stat.h>   /* utimensat */
+#endif
 #endif
 #include <time.h>         /* clock_t, clock, CLOCKS_PER_SEC, nanosleep */
 #include "mem.h"          /* U32, U64 */
@ -129,6 +134,7 @@ int UTIL_setFileStat(const char* filename, stat_t* statbuf);
 U32 UTIL_isDirectory(const char* infilename);
 int UTIL_getFileStat(const char* infilename, stat_t* statbuf);
 int UTIL_isSameFile(const char* file1, const char* file2);
+int UTIL_compareStr(const void *p1, const void *p2);

 U32 UTIL_isLink(const char* infilename);
 #define UTIL_FILESIZE_UNKNOWN  ((U64)(-1))
--- a/programs/zstd.1
+++ b/programs/zstd.1
@ -1,5 +1,5 @@
 .
-.TH "ZSTD" "1" "August 2019" "zstd 1.4.3" "User Commands"
+.TH "ZSTD" "1" "September 2019" "zstd 1.4.4" "User Commands"
 .
 .SH "NAME"
 \fBzstd\fR \- zstd, zstdmt, unzstd, zstdcat \- Compress or decompress \.zst files
@ -127,6 +127,14 @@ Does not spawn a thread for compression, use a single thread for both I/O and co
 \fBzstd\fR will dynamically adapt compression level to perceived I/O conditions\. Compression level adaptation can be observed live by using command \fB\-v\fR\. Adaptation can be constrained between supplied \fBmin\fR and \fBmax\fR levels\. The feature works when combined with multi\-threading and \fB\-\-long\fR mode\. It does not work with \fB\-\-single\-thread\fR\. It sets window size to 8 MB by default (can be changed manually, see \fBwlog\fR)\. Due to the chaotic nature of dynamic adaptation, compressed result is not reproducible\. \fInote\fR : at the time of this writing, \fB\-\-adapt\fR can remain stuck at low speed when combined with multiple worker threads (>=2)\.
 .
 .TP
+\fB\-\-stream\-size=#\fR
+Sets the pledged source size of input coming from a stream\. This value must be exact, as it will be included in the produced frame header\. Incorrect stream sizes will cause an error\. This information will be used to better optimize compression parameters, resulting in better and potentially faster compression, especially for smaller source sizes\.
+.
+.TP
+\fB\-\-size\-hint=#\fR
+When handling input from a stream, \fBzstd\fR must guess how large the source size will be when optimizing compression parameters\. If the stream size is relatively small, this guess may be a poor one, resulting in a higher compression ratio than expected\. This feature allows for controlling the guess when needed\. Exact guesses result in better compression ratios\. Overestimates result in slightly degraded compression ratios, while underestimates may result in significant degradation\.
+.
+.TP
 \fB\-\-rsyncable\fR
 \fBzstd\fR will periodically synchronize the compression state to make the compressed file more rsync\-friendly\. There is a negligible impact to compression ratio, and the faster compression levels will see a small compression speed hit\. This feature does not work with \fB\-\-single\-thread\fR\. You probably don\'t want to use it with long range mode, since it will decrease the effectiveness of the synchronization points, but your milage may vary\.
 .
--- a/programs/zstdcli.c
+++ b/programs/zstdcli.c
@ -136,6 +136,7 @@ static int usage_advanced(const char* programName)
    DISPLAY( " -q     : suppress warnings; specify twice to suppress errors too\n");
    DISPLAY( " -c     : force write to standard output, even if it is the console\n");
    DISPLAY( " -l     : print information about zstd compressed files \n");
+    DISPLAY( " --output-dir-flat directory: results stored into `directory`. Filename collisions mean first file will be compressed. With -f, the last file will be compressed.\n");
 #ifndef ZSTD_NOCOMPRESS
    DISPLAY( "--ultra : enable levels beyond %i, up to %i (requires more memory)\n", ZSTDCLI_CLEVEL_MAX, ZSTD_maxCLevel());
    DISPLAY( "--long[=#]: enable long distance matching with given window log (default: %u)\n", g_defaultMaxWindowLog);
@ -562,6 +563,7 @@ int main(int argCount, const char* argv[])
        adaptMax = MAXCLEVEL,
        rsyncable = 0,
        nextArgumentIsOutFileName = 0,
+        nextArgumentIsOutDirName = 0,
        nextArgumentIsMaxDict = 0,
        nextArgumentIsDictID = 0,
        nextArgumentsAreFiles = 0,
@ -586,6 +588,7 @@ int main(int argCount, const char* argv[])
    unsigned filenameIdx = 0;
    const char* programName = argv[0];
    const char* outFileName = NULL;
+    const char* outDirName = NULL;
    const char* dictFileName = NULL;
    const char* suffix = ZSTD_EXTENSION;
    unsigned maxDictSize = g_defaultMaxDictSize;
@ -686,6 +689,7 @@ int main(int argCount, const char* argv[])
                    if (!strcmp(argument, "--keep")) { FIO_setRemoveSrcFile(prefs, 0); continue; }
                    if (!strcmp(argument, "--rm")) { FIO_setRemoveSrcFile(prefs, 1); continue; }
                    if (!strcmp(argument, "--priority=rt")) { setRealTimePrio = 1; continue; }
+                    if (!strcmp(argument, "--output-dir-flat")) {nextArgumentIsOutDirName=1; lastCommand=1; continue; }
                    if (!strcmp(argument, "--adapt")) { adapt = 1; continue; }
                    if (longCommandWArg(&argument, "--adapt=")) { adapt = 1; if (!parseAdaptParameters(argument, &adaptMin, &adaptMax)) CLEAN_RETURN(badusage(programName)); continue; }
                    if (!strcmp(argument, "--single-thread")) { nbWorkers = 0; singleThread = 1; continue; }
@ -852,7 +856,7 @@ int main(int argCount, const char* argv[])

                        /* destination file name */
                    case 'o': nextArgumentIsOutFileName=1; lastCommand=1; argument++; break;
-
+                   
                        /* limit decompression memory */
                    case 'M':
                        argument++;
@ -965,6 +969,13 @@ int main(int argCount, const char* argv[])
            continue;
        }

+        if (nextArgumentIsOutDirName) {
+            nextArgumentIsOutDirName = 0;
+            lastCommand = 0;
+            outDirName = argument;
+            continue;
+        }
+
        /* add filename to list */
        filenameTable[filenameIdx++] = argument;
    }
@ -1166,7 +1177,7 @@ int main(int argCount, const char* argv[])
        if ((filenameIdx==1) && outFileName)
          operationResult = FIO_compressFilename(prefs, outFileName, filenameTable[0], dictFileName, cLevel, compressionParams);
        else
-          operationResult = FIO_compressMultipleFilenames(prefs, filenameTable, filenameIdx, outFileName, suffix, dictFileName, cLevel, compressionParams);
+          operationResult = FIO_compressMultipleFilenames(prefs, filenameTable, outDirName, filenameIdx, outFileName, suffix, dictFileName, cLevel, compressionParams);
 #else
        (void)suffix; (void)adapt; (void)rsyncable; (void)ultra; (void)cLevel; (void)ldmFlag; (void)literalCompressionMode; (void)targetCBlockSize; (void)streamSrcSize; (void)srcSizeHint; /* not used when ZSTD_NOCOMPRESS set */
        DISPLAY("Compression not supported \n");
@ -1184,7 +1195,7 @@ int main(int argCount, const char* argv[])
        if (filenameIdx==1 && outFileName)
            operationResult = FIO_decompressFilename(prefs, outFileName, filenameTable[0], dictFileName);
        else
-            operationResult = FIO_decompressMultipleFilenames(prefs, filenameTable, filenameIdx, outFileName, dictFileName);
+            operationResult = FIO_decompressMultipleFilenames(prefs, filenameTable, filenameIdx, outDirName, outFileName, dictFileName);
 #else
        DISPLAY("Decompression not supported \n");
 #endif
--- a/programs/zstdgrep.1
+++ b/programs/zstdgrep.1
@ -1,5 +1,5 @@
 .
-.TH "ZSTDGREP" "1" "August 2019" "zstd 1.4.3" "User Commands"
+.TH "ZSTDGREP" "1" "September 2019" "zstd 1.4.4" "User Commands"
 .
 .SH "NAME"
 \fBzstdgrep\fR \- print lines matching a pattern in zstandard\-compressed files
--- a/programs/zstdless.1
+++ b/programs/zstdless.1
@ -1,5 +1,5 @@
 .
-.TH "ZSTDLESS" "1" "August 2019" "zstd 1.4.3" "User Commands"
+.TH "ZSTDLESS" "1" "September 2019" "zstd 1.4.4" "User Commands"
 .
 .SH "NAME"
 \fBzstdless\fR \- view zstandard\-compressed files
--- a/tests/decodecorpus.c
+++ b/tests/decodecorpus.c
@ -758,8 +758,8 @@ static U32 generateSequences(U32* seed, frame_t* frame, seqStore_t* seqStore,
            DISPLAYLEVEL(7, "        repeat offset: %d\n", (int)repIndex);
        }
        /* use libzstd sequence handling */
-        ZSTD_storeSeq(seqStore, literalLen, literals, offsetCode,
-                      matchLen - MINMATCH);
+        ZSTD_storeSeq(seqStore, literalLen, literals, literals + literalLen,
+                      offsetCode, matchLen - MINMATCH);

        literalsSize -= literalLen;
        excessMatch -= (matchLen - MIN_SEQ_LEN);
--- a/tests/fuzz/Makefile
+++ b/tests/fuzz/Makefile
@ -40,8 +40,8 @@ FUZZ_LDFLAGS := -pthread $(LDFLAGS)
 FUZZ_ARFLAGS := $(ARFLAGS)
 FUZZ_TARGET_FLAGS = $(FUZZ_CPPFLAGS) $(FUZZ_CXXFLAGS) $(FUZZ_LDFLAGS)

-FUZZ_HEADERS := fuzz_helpers.h fuzz.h zstd_helpers.h
-FUZZ_SRC := $(PRGDIR)/util.c zstd_helpers.c
+FUZZ_HEADERS := fuzz_helpers.h fuzz.h zstd_helpers.h fuzz_data_producer.h
+FUZZ_SRC := $(PRGDIR)/util.c zstd_helpers.c fuzz_data_producer.c

 ZSTDCOMMON_SRC := $(ZSTDDIR)/common/*.c
 ZSTDCOMP_SRC   := $(ZSTDDIR)/compress/*.c
--- a/tests/fuzz/README.md
+++ b/tests/fuzz/README.md
@ -90,7 +90,7 @@ CC=afl-clang CXX=afl-clang++ ./fuzz.py build all --enable-asan --enable-ubsan

 ## Regression Testing

-The regression rest supports the `all` target to run all the fuzzers in one
+The regression test supports the `all` target to run all the fuzzers in one
 command.

 ```
--- a/tests/fuzz/block_decompress.c
+++ b/tests/fuzz/block_decompress.c
@ -28,8 +28,6 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
 {
    size_t const neededBufSize = ZSTD_BLOCKSIZE_MAX;

-    FUZZ_seed(&src, &size);
-
    /* Allocate all buffers and contexts if not already allocated */
    if (neededBufSize > bufSize) {
        free(rBuf);
--- a/tests/fuzz/block_round_trip.c
+++ b/tests/fuzz/block_round_trip.c
@ -20,21 +20,20 @@
 #include <string.h>
 #include "fuzz_helpers.h"
 #include "zstd.h"
-
-static const int kMaxClevel = 19;
+#include "zstd_helpers.h"
+#include "fuzz_data_producer.h"

 static ZSTD_CCtx *cctx = NULL;
 static ZSTD_DCtx *dctx = NULL;
 static void* cBuf = NULL;
 static void* rBuf = NULL;
 static size_t bufSize = 0;
-static uint32_t seed;

 static size_t roundTripTest(void *result, size_t resultCapacity,
                            void *compressed, size_t compressedCapacity,
-                            const void *src, size_t srcSize)
+                            const void *src, size_t srcSize,
+                            int cLevel)
 {
-    int const cLevel = FUZZ_rand(&seed) % kMaxClevel;
    ZSTD_parameters const params = ZSTD_getParams(cLevel, srcSize, 0);
    size_t ret = ZSTD_compressBegin_advanced(cctx, NULL, 0, params, srcSize);
    FUZZ_ZASSERT(ret);
@ -52,12 +51,16 @@ static size_t roundTripTest(void *result, size_t resultCapacity,

 int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
 {
-    size_t neededBufSize;
+    /* Give a random portion of src data to the producer, to use for
+    parameter generation. The rest will be used for (de)compression */
+    FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size);
+    size = FUZZ_dataProducer_reserveDataPrefix(producer);

-    seed = FUZZ_seed(&src, &size);
-    neededBufSize = size;
+    int const cLevel = FUZZ_dataProducer_int32Range(producer, kMinClevel, kMaxClevel);
+
+    size_t neededBufSize = size;
    if (size > ZSTD_BLOCKSIZE_MAX)
-        return 0;
+        size = ZSTD_BLOCKSIZE_MAX;

    /* Allocate all buffers and contexts if not already allocated */
    if (neededBufSize > bufSize || !cBuf || !rBuf) {
@ -79,11 +82,13 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)

    {
        size_t const result =
-            roundTripTest(rBuf, neededBufSize, cBuf, neededBufSize, src, size);
+            roundTripTest(rBuf, neededBufSize, cBuf, neededBufSize, src, size,
+              cLevel);
        FUZZ_ZASSERT(result);
        FUZZ_ASSERT_MSG(result == size, "Incorrect regenerated size");
        FUZZ_ASSERT_MSG(!memcmp(src, rBuf, size), "Corruption!");
    }
+    FUZZ_dataProducer_free(producer);
 #ifndef STATEFUL_FUZZING
    ZSTD_freeCCtx(cctx); cctx = NULL;
    ZSTD_freeDCtx(dctx); dctx = NULL;
--- a/tests/fuzz/dictionary_decompress.c
+++ b/tests/fuzz/dictionary_decompress.c
@ -18,33 +18,37 @@
 #include <stdio.h>
 #include "fuzz_helpers.h"
 #include "zstd_helpers.h"
+#include "fuzz_data_producer.h"

 static ZSTD_DCtx *dctx = NULL;

 int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
 {
-    uint32_t seed = FUZZ_seed(&src, &size);
+    /* Give a random portion of src data to the producer, to use for
+    parameter generation. The rest will be used for (de)compression */
+    FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size);
+    size = FUZZ_dataProducer_reserveDataPrefix(producer);
+
    FUZZ_dict_t dict;
    ZSTD_DDict* ddict = NULL;
-    int i;

    if (!dctx) {
        dctx = ZSTD_createDCtx();
        FUZZ_ASSERT(dctx);
    }
-    dict = FUZZ_train(src, size, &seed);
-    if (FUZZ_rand32(&seed, 0, 1) == 0) {
+    dict = FUZZ_train(src, size, producer);
+    if (FUZZ_dataProducer_uint32Range(producer, 0, 1) == 0) {
        ddict = ZSTD_createDDict(dict.buff, dict.size);
        FUZZ_ASSERT(ddict);
    } else {
        FUZZ_ZASSERT(ZSTD_DCtx_loadDictionary_advanced(
                dctx, dict.buff, dict.size,
-                (ZSTD_dictLoadMethod_e)FUZZ_rand32(&seed, 0, 1),
-                (ZSTD_dictContentType_e)FUZZ_rand32(&seed, 0, 2)));
+                (ZSTD_dictLoadMethod_e)FUZZ_dataProducer_uint32Range(producer, 0, 1),
+                (ZSTD_dictContentType_e)FUZZ_dataProducer_uint32Range(producer, 0, 2)));
    }
-    /* Run it 10 times over 10 output sizes. Reuse the context and dict. */
-    for (i = 0; i < 10; ++i) {
-        size_t const bufSize = FUZZ_rand32(&seed, 0, 2 * size);
+
+    {
+        size_t const bufSize = FUZZ_dataProducer_uint32Range(producer, 0, 10 * size);
        void* rBuf = malloc(bufSize);
        FUZZ_ASSERT(rBuf);
        if (ddict) {
@ -55,6 +59,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
        free(rBuf);
    }
    free(dict.buff);
+    FUZZ_dataProducer_free(producer);
    ZSTD_freeDDict(ddict);
 #ifndef STATEFUL_FUZZING
    ZSTD_freeDCtx(dctx); dctx = NULL;
--- a/tests/fuzz/dictionary_round_trip.c
+++ b/tests/fuzz/dictionary_round_trip.c
@ -19,22 +19,21 @@
 #include <string.h>
 #include "fuzz_helpers.h"
 #include "zstd_helpers.h"
-
-static const int kMaxClevel = 19;
+#include "fuzz_data_producer.h"

 static ZSTD_CCtx *cctx = NULL;
 static ZSTD_DCtx *dctx = NULL;
-static uint32_t seed;

 static size_t roundTripTest(void *result, size_t resultCapacity,
                            void *compressed, size_t compressedCapacity,
-                            const void *src, size_t srcSize)
+                            const void *src, size_t srcSize,
+                            FUZZ_dataProducer_t *producer)
 {
    ZSTD_dictContentType_e dictContentType = ZSTD_dct_auto;
-    FUZZ_dict_t dict = FUZZ_train(src, srcSize, &seed);
+    FUZZ_dict_t dict = FUZZ_train(src, srcSize, producer);
    size_t cSize;
-    if ((FUZZ_rand(&seed) & 15) == 0) {
-        int const cLevel = FUZZ_rand(&seed) % kMaxClevel;
+    if (FUZZ_dataProducer_uint32Range(producer, 0, 15) == 0) {
+        int const cLevel = FUZZ_dataProducer_int32Range(producer, kMinClevel, kMaxClevel);

        cSize = ZSTD_compress_usingDict(cctx,
                compressed, compressedCapacity,
@ -42,20 +41,20 @@ static size_t roundTripTest(void *result, size_t resultCapacity,
                dict.buff, dict.size,
                cLevel);
    } else {
-        dictContentType = FUZZ_rand32(&seed, 0, 2);
-        FUZZ_setRandomParameters(cctx, srcSize, &seed);
+        dictContentType = FUZZ_dataProducer_uint32Range(producer, 0, 2);
+        FUZZ_setRandomParameters(cctx, srcSize, producer);
        /* Disable checksum so we can use sizes smaller than compress bound. */
        FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 0));
        FUZZ_ZASSERT(ZSTD_CCtx_loadDictionary_advanced(
                cctx, dict.buff, dict.size,
-                (ZSTD_dictLoadMethod_e)FUZZ_rand32(&seed, 0, 1),
+                (ZSTD_dictLoadMethod_e)FUZZ_dataProducer_uint32Range(producer, 0, 1),
                dictContentType));
        cSize = ZSTD_compress2(cctx, compressed, compressedCapacity, src, srcSize);
    }
    FUZZ_ZASSERT(cSize);
    FUZZ_ZASSERT(ZSTD_DCtx_loadDictionary_advanced(
        dctx, dict.buff, dict.size,
-        (ZSTD_dictLoadMethod_e)FUZZ_rand32(&seed, 0, 1),
+        (ZSTD_dictLoadMethod_e)FUZZ_dataProducer_uint32Range(producer, 0, 1),
        dictContentType));
    {
        size_t const ret = ZSTD_decompressDCtx(
@ -67,17 +66,20 @@ static size_t roundTripTest(void *result, size_t resultCapacity,

 int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
 {
+    /* Give a random portion of src data to the producer, to use for
+    parameter generation. The rest will be used for (de)compression */
+    FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size);
+    size = FUZZ_dataProducer_reserveDataPrefix(producer);
+
    size_t const rBufSize = size;
    void* rBuf = malloc(rBufSize);
    size_t cBufSize = ZSTD_compressBound(size);
-    void* cBuf;
-
-    seed = FUZZ_seed(&src, &size);
+    void *cBuf;
    /* Half of the time fuzz with a 1 byte smaller output size.
     * This will still succeed because we force the checksum to be disabled,
     * giving us 4 bytes of overhead.
     */
-    cBufSize -= FUZZ_rand32(&seed, 0, 1);
+    cBufSize -= FUZZ_dataProducer_uint32Range(producer, 0, 1);
    cBuf = malloc(cBufSize);

    if (!cctx) {
@ -91,13 +93,14 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)

    {
        size_t const result =
-            roundTripTest(rBuf, rBufSize, cBuf, cBufSize, src, size);
+            roundTripTest(rBuf, rBufSize, cBuf, cBufSize, src, size, producer);
        FUZZ_ZASSERT(result);
        FUZZ_ASSERT_MSG(result == size, "Incorrect regenerated size");
        FUZZ_ASSERT_MSG(!memcmp(src, rBuf, size), "Corruption!");
    }
    free(rBuf);
    free(cBuf);
+    FUZZ_dataProducer_free(producer);
 #ifndef STATEFUL_FUZZING
    ZSTD_freeCCtx(cctx); cctx = NULL;
    ZSTD_freeDCtx(dctx); dctx = NULL;
--- a/tests/fuzz/fuzz.h
+++ b/tests/fuzz/fuzz.h
@ -17,12 +17,6 @@
 *        test code paths which are only executed when contexts are reused.
 *        WARNING: Makes reproducing crashes much harder.
 *        Default: Not defined.
- * @param FUZZ_RNG_SEED_SIZE:
- *        The number of bytes of the source to look at when constructing a seed
- *        for the deterministic RNG. These bytes are discarded before passing
- *        the data to zstd functions. Every fuzzer initializes the RNG exactly
- *        once before doing anything else, even if it is unused.
- *        Default: 4.
 * @param DEBUGLEVEL:
 *        This is a parameter for the zstd library. Defining `DEBUGLEVEL=1`
 *        enables assert() statements in the zstd library. Higher levels enable
@ -42,10 +36,6 @@
 #ifndef FUZZ_H
 #define FUZZ_H

-#ifndef FUZZ_RNG_SEED_SIZE
-#  define FUZZ_RNG_SEED_SIZE 4
-#endif
-
 #include <stddef.h>
 #include <stdint.h>

--- a/tests/fuzz/fuzz.py
+++ b/tests/fuzz/fuzz.py
@ -660,7 +660,7 @@ def gen_parser(args):
    parser.add_argument(
        '--max-size-log',
        type=int,
-        default=13,
+        default=18,
        help='Maximum sample size to generate')
    parser.add_argument(
        '--seed',
@ -720,7 +720,7 @@ def gen(args):
            if info.frame_type == FrameType.BLOCK:
                cmd += [
                    '--gen-blocks',
-                    '--max-block-size-log={}'.format(args.max_size_log)
+                    '--max-block-size-log={}'.format(min(args.max_size_log, 17))
                ]
            else:
                cmd += ['--max-content-size-log={}'.format(args.max_size_log)]
@ -740,10 +740,8 @@ def gen(args):
            for name in os.listdir(samples):
                samplename = abs_join(samples, name)
                outname = abs_join(seed, name)
-                rng_seed = os.urandom(args.fuzz_rng_seed_size)
                with open(samplename, 'rb') as sample:
                    with open(outname, 'wb') as out:
-                        out.write(rng_seed)
                        CHUNK_SIZE = 131072
                        chunk = sample.read(CHUNK_SIZE)
                        while len(chunk) > 0:
--- a/tests/fuzz/fuzz_data_producer.c
+++ b/tests/fuzz/fuzz_data_producer.c
@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2016-present, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ */
+
+#include "fuzz_data_producer.h"
+
+struct FUZZ_dataProducer_s{
+  const uint8_t *data;
+  size_t size;
+};
+
+FUZZ_dataProducer_t *FUZZ_dataProducer_create(const uint8_t *data, size_t size) {
+    FUZZ_dataProducer_t *producer = malloc(sizeof(FUZZ_dataProducer_t));
+
+    FUZZ_ASSERT(producer != NULL);
+
+    producer->data = data;
+    producer->size = size;
+    return producer;
+}
+
+void FUZZ_dataProducer_free(FUZZ_dataProducer_t *producer) { free(producer); }
+
+uint32_t FUZZ_dataProducer_uint32Range(FUZZ_dataProducer_t *producer, uint32_t min,
+                                  uint32_t max) {
+    FUZZ_ASSERT(min <= max);
+
+    uint32_t range = max - min;
+    uint32_t rolling = range;
+    uint32_t result = 0;
+
+    while (rolling > 0 && producer->size > 0) {
+      uint8_t next = *(producer->data + producer->size - 1);
+      producer->size -= 1;
+      result = (result << 8) | next;
+      rolling >>= 8;
+    }
+
+    if (range == 0xffffffff) {
+      return result;
+    }
+
+    return min + result % (range + 1);
+}
+
+uint32_t FUZZ_dataProducer_uint32(FUZZ_dataProducer_t *producer) {
+    return FUZZ_dataProducer_uint32Range(producer, 0, 0xffffffff);
+}
+
+int32_t FUZZ_dataProducer_int32Range(FUZZ_dataProducer_t *producer,
+                                    int32_t min, int32_t max)
+{
+    FUZZ_ASSERT(min <= max);
+
+    if (min < 0)
+      return (int)FUZZ_dataProducer_uint32Range(producer, 0, max - min) + min;
+
+    return FUZZ_dataProducer_uint32Range(producer, min, max);
+}
+
+size_t FUZZ_dataProducer_remainingBytes(FUZZ_dataProducer_t *producer){
+    return producer->size;
+}
+
+size_t FUZZ_dataProducer_contract(FUZZ_dataProducer_t *producer, size_t newSize)
+{
+    newSize = newSize > producer->size ? producer->size : newSize;
+
+    size_t remaining = producer->size - newSize;
+    producer->data = producer->data + remaining;
+    producer->size = newSize;
+    return remaining;
+}
+
+size_t FUZZ_dataProducer_reserveDataPrefix(FUZZ_dataProducer_t *producer)
+{
+    size_t producerSliceSize = FUZZ_dataProducer_uint32Range(
+                                  producer, 0, producer->size);
+    return FUZZ_dataProducer_contract(producer, producerSliceSize);
+}
--- a/tests/fuzz/fuzz_data_producer.h
+++ b/tests/fuzz/fuzz_data_producer.h
@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2016-present, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ */
+
+/**
+ * Helper APIs for generating random data from input data stream.
+ The producer reads bytes from the end of the input and appends them together
+ to generate  a random number in the requested range. If it runs out of input
+ data, it will keep returning the same value (min) over and over again.
+
+ */
+
+#ifndef FUZZ_DATA_PRODUCER_H
+#define FUZZ_DATA_PRODUCER_H
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "fuzz_helpers.h"
+
+/* Struct used for maintaining the state of the data */
+typedef struct FUZZ_dataProducer_s FUZZ_dataProducer_t;
+
+/* Returns a data producer state struct. Use for producer initialization. */
+FUZZ_dataProducer_t *FUZZ_dataProducer_create(const uint8_t *data, size_t size);
+
+/* Frees the data producer */
+void FUZZ_dataProducer_free(FUZZ_dataProducer_t *producer);
+
+/* Returns value between [min, max] */
+uint32_t FUZZ_dataProducer_uint32Range(FUZZ_dataProducer_t *producer, uint32_t min,
+                                  uint32_t max);
+
+/* Returns a uint32 value */
+uint32_t FUZZ_dataProducer_uint32(FUZZ_dataProducer_t *producer);
+
+/* Returns a signed value between [min, max] */
+int32_t FUZZ_dataProducer_int32Range(FUZZ_dataProducer_t *producer,
+                                    int32_t min, int32_t max);
+
+/* Returns the size of the remaining bytes of data in the producer */
+size_t FUZZ_dataProducer_remainingBytes(FUZZ_dataProducer_t *producer);
+
+/* Restricts the producer to only the last newSize bytes of data.
+If newSize > current data size, nothing happens. Returns the number of bytes
+the producer won't use anymore, after contracting. */
+size_t FUZZ_dataProducer_contract(FUZZ_dataProducer_t *producer, size_t newSize);
+
+/* Restricts the producer to use only the last X bytes of data, where X is
+ a random number in the interval [0, data_size]. Returns the size of the
+ remaining data the producer won't use anymore (the prefix). */
+size_t FUZZ_dataProducer_reserveDataPrefix(FUZZ_dataProducer_t *producer);
+#endif // FUZZ_DATA_PRODUCER_H
--- a/tests/fuzz/fuzz_helpers.h
+++ b/tests/fuzz/fuzz_helpers.h
@ -55,37 +55,6 @@ extern "C" {
 #define FUZZ_STATIC static
 #endif

-/**
- * Deterministically constructs a seed based on the fuzz input.
- * Consumes up to the first FUZZ_RNG_SEED_SIZE bytes of the input.
- */
-FUZZ_STATIC uint32_t FUZZ_seed(uint8_t const **src, size_t* size) {
-    uint8_t const *data = *src;
-    size_t const toHash = MIN(FUZZ_RNG_SEED_SIZE, *size);
-    *size -= toHash;
-    *src += toHash;
-    return XXH32(data, toHash, 0);
-}
-
-#define FUZZ_rotl32(x, r) (((x) << (r)) | ((x) >> (32 - (r))))
-
-FUZZ_STATIC uint32_t FUZZ_rand(uint32_t *state) {
-    static const uint32_t prime1 = 2654435761U;
-    static const uint32_t prime2 = 2246822519U;
-    uint32_t rand32 = *state;
-    rand32 *= prime1;
-    rand32 += prime2;
-    rand32 = FUZZ_rotl32(rand32, 13);
-    *state = rand32;
-    return rand32 >> 5;
-}
-
-/* Returns a random numer in the range [min, max]. */
-FUZZ_STATIC uint32_t FUZZ_rand32(uint32_t *state, uint32_t min, uint32_t max) {
-    uint32_t random = FUZZ_rand(state);
-    return min + (random % (max - min + 1));
-}
-
 #ifdef __cplusplus
 }
 #endif
--- a/tests/fuzz/simple_compress.c
+++ b/tests/fuzz/simple_compress.c
@ -18,28 +18,33 @@
 #include <stdio.h>
 #include "fuzz_helpers.h"
 #include "zstd.h"
+#include "zstd_helpers.h"
+#include "fuzz_data_producer.h"

 static ZSTD_CCtx *cctx = NULL;

 int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
 {
-    uint32_t seed = FUZZ_seed(&src, &size);
+    /* Give a random portion of src data to the producer, to use for
+    parameter generation. The rest will be used for (de)compression */
+    FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size);
+    size = FUZZ_dataProducer_reserveDataPrefix(producer);
+
    size_t const maxSize = ZSTD_compressBound(size);
-    int i;
+    size_t const bufSize = FUZZ_dataProducer_uint32Range(producer, 0, maxSize);
+
+    int const cLevel = FUZZ_dataProducer_int32Range(producer, kMinClevel, kMaxClevel);
+
    if (!cctx) {
        cctx = ZSTD_createCCtx();
        FUZZ_ASSERT(cctx);
    }
-    /* Run it 10 times over 10 output sizes. Reuse the context. */
-    for (i = 0; i < 10; ++i) {
-        int const level = (int)FUZZ_rand32(&seed, 0, 19 + 3) - 3; /* [-3, 19] */
-        size_t const bufSize = FUZZ_rand32(&seed, 0, maxSize);
-        void* rBuf = malloc(bufSize);
-        FUZZ_ASSERT(rBuf);
-        ZSTD_compressCCtx(cctx, rBuf, bufSize, src, size, level);
-        free(rBuf);
-    }

+    void *rBuf = malloc(bufSize);
+    FUZZ_ASSERT(rBuf);
+    ZSTD_compressCCtx(cctx, rBuf, bufSize, src, size, cLevel);
+    free(rBuf);
+    FUZZ_dataProducer_free(producer);
 #ifndef STATEFUL_FUZZING
    ZSTD_freeCCtx(cctx); cctx = NULL;
 #endif
--- a/tests/fuzz/simple_decompress.c
+++ b/tests/fuzz/simple_decompress.c
@ -17,26 +17,30 @@
 #include <stdio.h>
 #include "fuzz_helpers.h"
 #include "zstd.h"
+#include "fuzz_data_producer.h"

 static ZSTD_DCtx *dctx = NULL;

 int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
 {
+    /* Give a random portion of src data to the producer, to use for
+    parameter generation. The rest will be used for (de)compression */
+    FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size);
+    size = FUZZ_dataProducer_reserveDataPrefix(producer);

-    uint32_t seed = FUZZ_seed(&src, &size);
-    int i;
    if (!dctx) {
        dctx = ZSTD_createDCtx();
        FUZZ_ASSERT(dctx);
    }
-    /* Run it 10 times over 10 output sizes. Reuse the context. */
-    for (i = 0; i < 10; ++i) {
-        size_t const bufSize = FUZZ_rand32(&seed, 0, 2 * size);
-        void* rBuf = malloc(bufSize);
-        FUZZ_ASSERT(rBuf);
-        ZSTD_decompressDCtx(dctx, rBuf, bufSize, src, size);
-        free(rBuf);
-    }
+
+    size_t const bufSize = FUZZ_dataProducer_uint32Range(producer, 0, 10 * size);
+    void *rBuf = malloc(bufSize);
+    FUZZ_ASSERT(rBuf);
+
+    ZSTD_decompressDCtx(dctx, rBuf, bufSize, src, size);
+    free(rBuf);
+
+    FUZZ_dataProducer_free(producer);

 #ifndef STATEFUL_FUZZING
    ZSTD_freeDCtx(dctx); dctx = NULL;
--- a/tests/fuzz/simple_round_trip.c
+++ b/tests/fuzz/simple_round_trip.c
@ -20,23 +20,23 @@
 #include <string.h>
 #include "fuzz_helpers.h"
 #include "zstd_helpers.h"
-
-static const int kMaxClevel = 19;
+#include "fuzz_data_producer.h"

 static ZSTD_CCtx *cctx = NULL;
 static ZSTD_DCtx *dctx = NULL;
-static uint32_t seed;

 static size_t roundTripTest(void *result, size_t resultCapacity,
                            void *compressed, size_t compressedCapacity,
-                            const void *src, size_t srcSize)
+                            const void *src, size_t srcSize,
+                            FUZZ_dataProducer_t *producer)
 {
    size_t cSize;
-    if (FUZZ_rand(&seed) & 1) {
-        FUZZ_setRandomParameters(cctx, srcSize, &seed);
+    if (FUZZ_dataProducer_uint32Range(producer, 0, 1)) {
+        FUZZ_setRandomParameters(cctx, srcSize, producer);
        cSize = ZSTD_compress2(cctx, compressed, compressedCapacity, src, srcSize);
    } else {
-        int const cLevel = FUZZ_rand(&seed) % kMaxClevel;
+      int const cLevel = FUZZ_dataProducer_int32Range(producer, kMinClevel, kMaxClevel);
+
        cSize = ZSTD_compressCCtx(
            cctx, compressed, compressedCapacity, src, srcSize, cLevel);
    }
@ -51,12 +51,17 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
    size_t cBufSize = ZSTD_compressBound(size);
    void* cBuf;

-    seed = FUZZ_seed(&src, &size);
+    /* Give a random portion of src data to the producer, to use for
+    parameter generation. The rest will be used for (de)compression */
+    FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size);
+    size = FUZZ_dataProducer_reserveDataPrefix(producer);
+
    /* Half of the time fuzz with a 1 byte smaller output size.
     * This will still succeed because we don't use a dictionary, so the dictID
     * field is empty, giving us 4 bytes of overhead.
     */
-    cBufSize -= FUZZ_rand32(&seed, 0, 1);
+    cBufSize -= FUZZ_dataProducer_uint32Range(producer, 0, 1);
+
    cBuf = malloc(cBufSize);

    FUZZ_ASSERT(cBuf && rBuf);
@ -72,13 +77,14 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)

    {
        size_t const result =
-            roundTripTest(rBuf, rBufSize, cBuf, cBufSize, src, size);
+            roundTripTest(rBuf, rBufSize, cBuf, cBufSize, src, size, producer);
        FUZZ_ZASSERT(result);
        FUZZ_ASSERT_MSG(result == size, "Incorrect regenerated size");
        FUZZ_ASSERT_MSG(!memcmp(src, rBuf, size), "Corruption!");
    }
    free(rBuf);
    free(cBuf);
+    FUZZ_dataProducer_free(producer);
 #ifndef STATEFUL_FUZZING
    ZSTD_freeCCtx(cctx); cctx = NULL;
    ZSTD_freeDCtx(dctx); dctx = NULL;
--- a/tests/fuzz/stream_decompress.c
+++ b/tests/fuzz/stream_decompress.c
@ -19,6 +19,7 @@
 #include <stdio.h>
 #include "fuzz_helpers.h"
 #include "zstd.h"
+#include "fuzz_data_producer.h"

 static size_t const kBufSize = ZSTD_BLOCKSIZE_MAX;

@ -26,22 +27,23 @@ static ZSTD_DStream *dstream = NULL;
 static void* buf = NULL;
 uint32_t seed;

-static ZSTD_outBuffer makeOutBuffer(void)
+static ZSTD_outBuffer makeOutBuffer(FUZZ_dataProducer_t *producer)
 {
  ZSTD_outBuffer buffer = { buf, 0, 0 };

-  buffer.size = (FUZZ_rand(&seed) % kBufSize) + 1;
+  buffer.size = (FUZZ_dataProducer_uint32Range(producer, 1, kBufSize));
  FUZZ_ASSERT(buffer.size <= kBufSize);

  return buffer;
 }

-static ZSTD_inBuffer makeInBuffer(const uint8_t **src, size_t *size)
+static ZSTD_inBuffer makeInBuffer(const uint8_t **src, size_t *size,
+                                  FUZZ_dataProducer_t *producer)
 {
  ZSTD_inBuffer buffer = { *src, 0, 0 };

  FUZZ_ASSERT(*size > 0);
-  buffer.size = (FUZZ_rand(&seed) % *size) + 1;
+  buffer.size = (FUZZ_dataProducer_uint32Range(producer, 1, *size));
  FUZZ_ASSERT(buffer.size <= *size);
  *src += buffer.size;
  *size -= buffer.size;
@ -51,13 +53,16 @@ static ZSTD_inBuffer makeInBuffer(const uint8_t **src, size_t *size)

 int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
 {
-    seed = FUZZ_seed(&src, &size);
+    /* Give a random portion of src data to the producer, to use for
+    parameter generation. The rest will be used for (de)compression */
+    FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size);
+    size = FUZZ_dataProducer_reserveDataPrefix(producer);

    /* Allocate all buffers and contexts if not already allocated */
    if (!buf) {
      buf = malloc(kBufSize);
-      FUZZ_ASSERT(buf);
-    }
+        FUZZ_ASSERT(buf);
+      }

    if (!dstream) {
        dstream = ZSTD_createDStream();
@ -67,9 +72,9 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
    }

    while (size > 0) {
-        ZSTD_inBuffer in = makeInBuffer(&src, &size);
+        ZSTD_inBuffer in = makeInBuffer(&src, &size, producer);
        while (in.pos != in.size) {
-            ZSTD_outBuffer out = makeOutBuffer();
+            ZSTD_outBuffer out = makeOutBuffer(producer);
            size_t const rc = ZSTD_decompressStream(dstream, &out, &in);
            if (ZSTD_isError(rc)) goto error;
        }
@ -79,5 +84,6 @@ error:
 #ifndef STATEFUL_FUZZING
    ZSTD_freeDStream(dstream); dstream = NULL;
 #endif
+    FUZZ_dataProducer_free(producer);
    return 0;
 }
--- a/tests/fuzz/stream_round_trip.c
+++ b/tests/fuzz/stream_round_trip.c
@ -20,31 +20,33 @@
 #include <string.h>
 #include "fuzz_helpers.h"
 #include "zstd_helpers.h"
+#include "fuzz_data_producer.h"

 ZSTD_CCtx *cctx = NULL;
 static ZSTD_DCtx *dctx = NULL;
 static uint8_t* cBuf = NULL;
 static uint8_t* rBuf = NULL;
 static size_t bufSize = 0;
-static uint32_t seed;

-static ZSTD_outBuffer makeOutBuffer(uint8_t *dst, size_t capacity)
+static ZSTD_outBuffer makeOutBuffer(uint8_t *dst, size_t capacity,
+                                    FUZZ_dataProducer_t *producer)
 {
    ZSTD_outBuffer buffer = { dst, 0, 0 };

    FUZZ_ASSERT(capacity > 0);
-    buffer.size = (FUZZ_rand(&seed) % capacity) + 1;
+    buffer.size = (FUZZ_dataProducer_uint32Range(producer, 1, capacity));
    FUZZ_ASSERT(buffer.size <= capacity);

    return buffer;
 }

-static ZSTD_inBuffer makeInBuffer(const uint8_t **src, size_t *size)
+static ZSTD_inBuffer makeInBuffer(const uint8_t **src, size_t *size,
+                                  FUZZ_dataProducer_t *producer)
 {
    ZSTD_inBuffer buffer = { *src, 0, 0 };

    FUZZ_ASSERT(*size > 0);
-    buffer.size = (FUZZ_rand(&seed) % *size) + 1;
+    buffer.size = (FUZZ_dataProducer_uint32Range(producer, 1, *size));
    FUZZ_ASSERT(buffer.size <= *size);
    *src += buffer.size;
    *size -= buffer.size;
@ -53,23 +55,24 @@ static ZSTD_inBuffer makeInBuffer(const uint8_t **src, size_t *size)
 }

 static size_t compress(uint8_t *dst, size_t capacity,
-                       const uint8_t *src, size_t srcSize)
+                       const uint8_t *src, size_t srcSize,
+                     FUZZ_dataProducer_t *producer)
 {
    size_t dstSize = 0;
    ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only);
-    FUZZ_setRandomParameters(cctx, srcSize, &seed);
+    FUZZ_setRandomParameters(cctx, srcSize, producer);

    while (srcSize > 0) {
-        ZSTD_inBuffer in = makeInBuffer(&src, &srcSize);
+        ZSTD_inBuffer in = makeInBuffer(&src, &srcSize, producer);
        /* Mode controls the action. If mode == -1 we pick a new mode */
        int mode = -1;
        while (in.pos < in.size || mode != -1) {
-            ZSTD_outBuffer out = makeOutBuffer(dst, capacity);
+            ZSTD_outBuffer out = makeOutBuffer(dst, capacity, producer);
            /* Previous action finished, pick a new mode. */
-            if (mode == -1) mode = FUZZ_rand(&seed) % 10;
+            if (mode == -1) mode = FUZZ_dataProducer_uint32Range(producer, 0, 9);
            switch (mode) {
-                case 0: /* fall-though */
-                case 1: /* fall-though */
+                case 0: /* fall-through */
+                case 1: /* fall-through */
                case 2: {
                    size_t const ret =
                        ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_flush);
@ -85,9 +88,9 @@ static size_t compress(uint8_t *dst, size_t capacity,
                    /* Reset the compressor when the frame is finished */
                    if (ret == 0) {
                        ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only);
-                        if ((FUZZ_rand(&seed) & 7) == 0) {
+                        if (FUZZ_dataProducer_uint32Range(producer, 0, 7) == 0) {
                            size_t const remaining = in.size - in.pos;
-                            FUZZ_setRandomParameters(cctx, remaining, &seed);
+                            FUZZ_setRandomParameters(cctx, remaining, producer);
                        }
                        mode = -1;
                    }
@ -107,7 +110,7 @@ static size_t compress(uint8_t *dst, size_t capacity,
    }
    for (;;) {
        ZSTD_inBuffer in = {NULL, 0, 0};
-        ZSTD_outBuffer out = makeOutBuffer(dst, capacity);
+        ZSTD_outBuffer out = makeOutBuffer(dst, capacity, producer);
        size_t const ret = ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_end);
        FUZZ_ZASSERT(ret);

@ -122,10 +125,13 @@ static size_t compress(uint8_t *dst, size_t capacity,

 int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
 {
-    size_t neededBufSize;
+    /* Give a random portion of src data to the producer, to use for
+    parameter generation. The rest will be used for (de)compression */
+    FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size);
+    size = FUZZ_dataProducer_reserveDataPrefix(producer);

-    seed = FUZZ_seed(&src, &size);
-    neededBufSize = ZSTD_compressBound(size) * 2;
+    size_t neededBufSize;
+    neededBufSize = ZSTD_compressBound(size) * 5;

    /* Allocate all buffers and contexts if not already allocated */
    if (neededBufSize > bufSize) {
@ -146,7 +152,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
    }

    {
-        size_t const cSize = compress(cBuf, neededBufSize, src, size);
+        size_t const cSize = compress(cBuf, neededBufSize, src, size, producer);
        size_t const rSize =
            ZSTD_decompressDCtx(dctx, rBuf, neededBufSize, cBuf, cSize);
        FUZZ_ZASSERT(rSize);
@ -154,6 +160,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
        FUZZ_ASSERT_MSG(!memcmp(src, rBuf, size), "Corruption!");
    }

+    FUZZ_dataProducer_free(producer);
 #ifndef STATEFUL_FUZZING
    ZSTD_freeCCtx(cctx); cctx = NULL;
    ZSTD_freeDCtx(dctx); dctx = NULL;
--- a/tests/fuzz/zstd_frame_info.c
+++ b/tests/fuzz/zstd_frame_info.c
@ -21,10 +21,6 @@
 int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
 {
    ZSTD_frameHeader zfh;
-    /* Consume the seed to be compatible with the corpora of other decompression
-     * fuzzers.
-     */
-    FUZZ_seed(&src, &size);
    /* You can fuzz any helper functions here that are fast, and take zstd
     * compressed data as input. E.g. don't expect the input to be a dictionary,
     * so don't fuzz ZSTD_getDictID_fromDict().
--- a/tests/fuzz/zstd_helpers.c
+++ b/tests/fuzz/zstd_helpers.c
@ -17,53 +17,56 @@
 #include "zstd.h"
 #include "zdict.h"

+const int kMinClevel = -3;
+const int kMaxClevel = 19;
+
 static void set(ZSTD_CCtx *cctx, ZSTD_cParameter param, int value)
 {
    FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, param, value));
 }

 static void setRand(ZSTD_CCtx *cctx, ZSTD_cParameter param, unsigned min,
-                    unsigned max, uint32_t *state) {
-    unsigned const value = FUZZ_rand32(state, min, max);
+                    unsigned max, FUZZ_dataProducer_t *producer) {
+    unsigned const value = FUZZ_dataProducer_uint32Range(producer, min, max);
    set(cctx, param, value);
 }

-ZSTD_compressionParameters FUZZ_randomCParams(size_t srcSize, uint32_t *state)
+ZSTD_compressionParameters FUZZ_randomCParams(size_t srcSize, FUZZ_dataProducer_t *producer)
 {
    /* Select compression parameters */
    ZSTD_compressionParameters cParams;
-    cParams.windowLog = FUZZ_rand32(state, ZSTD_WINDOWLOG_MIN, 15);
-    cParams.hashLog = FUZZ_rand32(state, ZSTD_HASHLOG_MIN, 15);
-    cParams.chainLog = FUZZ_rand32(state, ZSTD_CHAINLOG_MIN, 16);
-    cParams.searchLog = FUZZ_rand32(state, ZSTD_SEARCHLOG_MIN, 9);
-    cParams.minMatch = FUZZ_rand32(state, ZSTD_MINMATCH_MIN,
+    cParams.windowLog = FUZZ_dataProducer_uint32Range(producer, ZSTD_WINDOWLOG_MIN, 15);
+    cParams.hashLog = FUZZ_dataProducer_uint32Range(producer, ZSTD_HASHLOG_MIN, 15);
+    cParams.chainLog = FUZZ_dataProducer_uint32Range(producer, ZSTD_CHAINLOG_MIN, 16);
+    cParams.searchLog = FUZZ_dataProducer_uint32Range(producer, ZSTD_SEARCHLOG_MIN, 9);
+    cParams.minMatch = FUZZ_dataProducer_uint32Range(producer, ZSTD_MINMATCH_MIN,
                                          ZSTD_MINMATCH_MAX);
-    cParams.targetLength = FUZZ_rand32(state, 0, 512);
-    cParams.strategy = FUZZ_rand32(state, ZSTD_STRATEGY_MIN, ZSTD_STRATEGY_MAX);
+    cParams.targetLength = FUZZ_dataProducer_uint32Range(producer, 0, 512);
+    cParams.strategy = FUZZ_dataProducer_uint32Range(producer, ZSTD_STRATEGY_MIN, ZSTD_STRATEGY_MAX);
    return ZSTD_adjustCParams(cParams, srcSize, 0);
 }

-ZSTD_frameParameters FUZZ_randomFParams(uint32_t *state)
+ZSTD_frameParameters FUZZ_randomFParams(FUZZ_dataProducer_t *producer)
 {
    /* Select frame parameters */
    ZSTD_frameParameters fParams;
-    fParams.contentSizeFlag = FUZZ_rand32(state, 0, 1);
-    fParams.checksumFlag = FUZZ_rand32(state, 0, 1);
-    fParams.noDictIDFlag = FUZZ_rand32(state, 0, 1);
+    fParams.contentSizeFlag = FUZZ_dataProducer_uint32Range(producer, 0, 1);
+    fParams.checksumFlag = FUZZ_dataProducer_uint32Range(producer, 0, 1);
+    fParams.noDictIDFlag = FUZZ_dataProducer_uint32Range(producer, 0, 1);
    return fParams;
 }

-ZSTD_parameters FUZZ_randomParams(size_t srcSize, uint32_t *state)
+ZSTD_parameters FUZZ_randomParams(size_t srcSize, FUZZ_dataProducer_t *producer)
 {
    ZSTD_parameters params;
-    params.cParams = FUZZ_randomCParams(srcSize, state);
-    params.fParams = FUZZ_randomFParams(state);
+    params.cParams = FUZZ_randomCParams(srcSize, producer);
+    params.fParams = FUZZ_randomFParams(producer);
    return params;
 }

-void FUZZ_setRandomParameters(ZSTD_CCtx *cctx, size_t srcSize, uint32_t *state)
+void FUZZ_setRandomParameters(ZSTD_CCtx *cctx, size_t srcSize, FUZZ_dataProducer_t *producer)
 {
-    ZSTD_compressionParameters cParams = FUZZ_randomCParams(srcSize, state);
+    ZSTD_compressionParameters cParams = FUZZ_randomCParams(srcSize, producer);
    set(cctx, ZSTD_c_windowLog, cParams.windowLog);
    set(cctx, ZSTD_c_hashLog, cParams.hashLog);
    set(cctx, ZSTD_c_chainLog, cParams.chainLog);
@ -72,30 +75,30 @@ void FUZZ_setRandomParameters(ZSTD_CCtx *cctx, size_t srcSize, uint32_t *state)
    set(cctx, ZSTD_c_targetLength, cParams.targetLength);
    set(cctx, ZSTD_c_strategy, cParams.strategy);
    /* Select frame parameters */
-    setRand(cctx, ZSTD_c_contentSizeFlag, 0, 1, state);
-    setRand(cctx, ZSTD_c_checksumFlag, 0, 1, state);
-    setRand(cctx, ZSTD_c_dictIDFlag, 0, 1, state);
+    setRand(cctx, ZSTD_c_contentSizeFlag, 0, 1, producer);
+    setRand(cctx, ZSTD_c_checksumFlag, 0, 1, producer);
+    setRand(cctx, ZSTD_c_dictIDFlag, 0, 1, producer);
    /* Select long distance matching parameters */
-    setRand(cctx, ZSTD_c_enableLongDistanceMatching, 0, 1, state);
-    setRand(cctx, ZSTD_c_ldmHashLog, ZSTD_HASHLOG_MIN, 16, state);
+    setRand(cctx, ZSTD_c_enableLongDistanceMatching, 0, 1, producer);
+    setRand(cctx, ZSTD_c_ldmHashLog, ZSTD_HASHLOG_MIN, 16, producer);
    setRand(cctx, ZSTD_c_ldmMinMatch, ZSTD_LDM_MINMATCH_MIN,
-            ZSTD_LDM_MINMATCH_MAX, state);
+            ZSTD_LDM_MINMATCH_MAX, producer);
    setRand(cctx, ZSTD_c_ldmBucketSizeLog, 0, ZSTD_LDM_BUCKETSIZELOG_MAX,
-            state);
+            producer);
    setRand(cctx, ZSTD_c_ldmHashRateLog, ZSTD_LDM_HASHRATELOG_MIN,
-            ZSTD_LDM_HASHRATELOG_MAX, state);
+            ZSTD_LDM_HASHRATELOG_MAX, producer);
    /* Set misc parameters */
-    setRand(cctx, ZSTD_c_nbWorkers, 0, 2, state);
-    setRand(cctx, ZSTD_c_rsyncable, 0, 1, state);
-    setRand(cctx, ZSTD_c_forceMaxWindow, 0, 1, state);
-    setRand(cctx, ZSTD_c_literalCompressionMode, 0, 2, state);
-    setRand(cctx, ZSTD_c_forceAttachDict, 0, 2, state);
-    if (FUZZ_rand32(state, 0, 1) == 0) {
-      setRand(cctx, ZSTD_c_srcSizeHint, ZSTD_SRCSIZEHINT_MIN, 2 * srcSize, state);
+    setRand(cctx, ZSTD_c_nbWorkers, 0, 2, producer);
+    setRand(cctx, ZSTD_c_rsyncable, 0, 1, producer);
+    setRand(cctx, ZSTD_c_forceMaxWindow, 0, 1, producer);
+    setRand(cctx, ZSTD_c_literalCompressionMode, 0, 2, producer);
+    setRand(cctx, ZSTD_c_forceAttachDict, 0, 2, producer);
+    if (FUZZ_dataProducer_uint32Range(producer, 0, 1) == 0) {
+      setRand(cctx, ZSTD_c_srcSizeHint, ZSTD_SRCSIZEHINT_MIN, 2 * srcSize, producer);
    }
 }

-FUZZ_dict_t FUZZ_train(void const* src, size_t srcSize, uint32_t *state)
+FUZZ_dict_t FUZZ_train(void const* src, size_t srcSize, FUZZ_dataProducer_t *producer)
 {
    size_t const dictSize = MAX(srcSize / 8, 1024);
    size_t const totalSampleSize = dictSize * 11;
@ -110,7 +113,7 @@ FUZZ_dict_t FUZZ_train(void const* src, size_t srcSize, uint32_t *state)

    for (sample = 0; sample < nbSamples; ++sample) {
      size_t const remaining = totalSampleSize - pos;
-      size_t const offset = FUZZ_rand32(state, 0, MAX(srcSize, 1) - 1);
+      size_t const offset = FUZZ_dataProducer_uint32Range(producer, 0, MAX(srcSize, 1) - 1);
      size_t const limit = MIN(srcSize - offset, remaining);
      size_t const toCopy = MIN(limit, remaining / (nbSamples - sample));
      memcpy(samples + pos, src + offset, toCopy);
--- a/tests/fuzz/zstd_helpers.h
+++ b/tests/fuzz/zstd_helpers.h
@ -17,17 +17,21 @@
 #define ZSTD_STATIC_LINKING_ONLY

 #include "zstd.h"
+#include "fuzz_data_producer.h"
 #include <stdint.h>

 #ifdef __cplusplus
 extern "C" {
 #endif

-void FUZZ_setRandomParameters(ZSTD_CCtx *cctx, size_t srcSize, uint32_t *state);
+extern const int kMinClevel;
+extern const int kMaxClevel;

-ZSTD_compressionParameters FUZZ_randomCParams(size_t srcSize, uint32_t *state);
-ZSTD_frameParameters FUZZ_randomFParams(uint32_t *state);
-ZSTD_parameters FUZZ_randomParams(size_t srcSize, uint32_t *state);
+void FUZZ_setRandomParameters(ZSTD_CCtx *cctx, size_t srcSize, FUZZ_dataProducer_t *producer);
+
+ZSTD_compressionParameters FUZZ_randomCParams(size_t srcSize, FUZZ_dataProducer_t *producer);
+ZSTD_frameParameters FUZZ_randomFParams(FUZZ_dataProducer_t *producer);
+ZSTD_parameters FUZZ_randomParams(size_t srcSize, FUZZ_dataProducer_t *producer);

 typedef struct {
  void* buff;
@ -38,8 +42,7 @@ typedef struct {
 * NOTE: Don't use this to train production dictionaries, it is only optimized
 * for speed, and doesn't care about dictionary quality.
 */
-FUZZ_dict_t FUZZ_train(void const* src, size_t srcSize, uint32_t *state);
-
+FUZZ_dict_t FUZZ_train(void const* src, size_t srcSize, FUZZ_dataProducer_t *producer);

 #ifdef __cplusplus
 }
--- a/tests/fuzzer.c
+++ b/tests/fuzzer.c
@ -304,6 +304,28 @@ static int FUZ_mallocTests(unsigned seed, double compressibility, unsigned part)

 #endif

+static void FUZ_decodeSequences(BYTE* dst, ZSTD_Sequence* seqs, size_t seqsSize, BYTE* src, size_t size)
+{
+    size_t i;
+    size_t j;
+    for(i = 0; i < seqsSize - 1; ++i) {
+        assert(dst + seqs[i].litLength + seqs[i].matchLength < dst + size);
+        assert(src + seqs[i].litLength + seqs[i].matchLength < src + size);
+
+        memcpy(dst, src, seqs[i].litLength);
+        dst += seqs[i].litLength;
+        src += seqs[i].litLength;
+        size -= seqs[i].litLength;
+
+        for (j = 0; j < seqs[i].matchLength; ++j)
+            dst[j] = dst[j - seqs[i].offset];
+        dst += seqs[i].matchLength;
+        src += seqs[i].matchLength;
+        size -= seqs[i].matchLength;
+    }
+    memcpy(dst, src, size);
+}
+
 /*=============================================
 *   Unit tests
 =============================================*/
@ -1960,6 +1982,33 @@ static int basicUnitTests(U32 const seed, double compressibility)
        DISPLAYLEVEL(3, "OK \n");
    }

+    DISPLAYLEVEL(3, "test%3i : ZSTD_getSequences decode from sequences test : ", testNb++);
+    {
+        size_t srcSize = 100 KB;
+        BYTE* src = (BYTE*)CNBuffer;
+        BYTE* decoded = (BYTE*)compressedBuffer;
+
+        ZSTD_CCtx* cctx = ZSTD_createCCtx();
+        ZSTD_Sequence* seqs = (ZSTD_Sequence*)malloc(srcSize * sizeof(ZSTD_Sequence));
+        size_t seqsSize;
+
+        if (seqs == NULL) goto _output_error;
+        assert(cctx != NULL);
+
+        /* Populate src with random data */
+        RDG_genBuffer(CNBuffer, srcSize, compressibility, 0., seed);
+
+        /* get the sequences */
+        seqsSize = ZSTD_getSequences(cctx, seqs, srcSize, src, srcSize);
+
+        /* "decode" and compare the sequences */
+        FUZ_decodeSequences(decoded, seqs, seqsSize, src, srcSize);
+        assert(!memcmp(CNBuffer, compressedBuffer, srcSize));
+
+        ZSTD_freeCCtx(cctx);
+        free(seqs);
+    }
+
    /* Multiple blocks of zeros test */
    #define LONGZEROSLENGTH 1000000 /* 1MB of zeros */
    DISPLAYLEVEL(3, "test%3i : compress %u zeroes : ", testNb++, LONGZEROSLENGTH);
@ -1972,7 +2021,6 @@ static int basicUnitTests(U32 const seed, double compressibility)
      if (r != LONGZEROSLENGTH) goto _output_error; }
    DISPLAYLEVEL(3, "OK \n");

-
    /* All zeroes test (test bug #137) */
    #define ZEROESLENGTH 100
    DISPLAYLEVEL(3, "test%3i : compress %u zeroes : ", testNb++, ZEROESLENGTH);
@ -2150,6 +2198,79 @@ static int basicUnitTests(U32 const seed, double compressibility)
    }
    DISPLAYLEVEL(3, "OK \n");

+    DISPLAYLEVEL(3, "test%3i : table cleanliness through index reduction : ", testNb++);
+    {
+        int cLevel;
+        size_t approxIndex = 0;
+        size_t maxIndex = ((3U << 29) + (1U << ZSTD_WINDOWLOG_MAX)); /* ZSTD_CURRENT_MAX from zstd_compress_internal.h */
+
+        /* Provision enough space in a static context so that we can do all
+         * this without ever reallocating, which would reset the indices. */
+        size_t const staticCCtxSize = ZSTD_estimateCStreamSize(22);
+        void* const staticCCtxBuffer = malloc(staticCCtxSize);
+        ZSTD_CCtx* cctx = ZSTD_initStaticCCtx(staticCCtxBuffer, staticCCtxSize);
+
+        /* bump the indices so the following compressions happen at high
+         * indices. */
+        {
+            ZSTD_outBuffer out = { compressedBuffer, compressedBufferSize, 0 };
+            ZSTD_inBuffer in = { CNBuffer, CNBuffSize, 0 };
+            ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters);
+            CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, -500));
+            while (approxIndex <= (maxIndex / 4) * 3) {
+                CHECK_Z(ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_flush));
+                approxIndex += in.pos;
+                CHECK(in.pos == in.size);
+                in.pos = 0;
+                out.pos = 0;
+            }
+            CHECK_Z(ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_end));
+        }
+
+        /* spew a bunch of stuff into the table area */
+        for (cLevel = 1; cLevel <= 22; cLevel++) {
+            ZSTD_outBuffer out = { compressedBuffer, compressedBufferSize / cLevel, 0 };
+            ZSTD_inBuffer in = { CNBuffer, CNBuffSize, 0 };
+            ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters);
+            CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, cLevel));
+            CHECK_Z(ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_flush));
+            CHECK_Z(ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_end));
+            approxIndex += in.pos;
+        }
+
+        /* now crank the indices so we overflow */
+        {
+            ZSTD_outBuffer out = { compressedBuffer, compressedBufferSize, 0 };
+            ZSTD_inBuffer in = { CNBuffer, CNBuffSize, 0 };
+            ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters);
+            CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, -500));
+            while (approxIndex <= maxIndex) {
+                CHECK_Z(ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_flush));
+                approxIndex += in.pos;
+                CHECK(in.pos == in.size);
+                in.pos = 0;
+                out.pos = 0;
+            }
+            CHECK_Z(ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_end));
+        }
+
+        /* do a bunch of compressions again in low indices and ensure we don't
+         * hit untracked invalid indices */
+        for (cLevel = 1; cLevel <= 22; cLevel++) {
+            ZSTD_outBuffer out = { compressedBuffer, compressedBufferSize / cLevel, 0 };
+            ZSTD_inBuffer in = { CNBuffer, CNBuffSize, 0 };
+            ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters);
+            CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, cLevel));
+            CHECK_Z(ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_flush));
+            CHECK_Z(ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_end));
+            approxIndex += in.pos;
+        }
+
+        ZSTD_freeCCtx(cctx);
+        free(staticCCtxBuffer);
+    }
+    DISPLAYLEVEL(3, "OK \n");
+
 _end:
    free(CNBuffer);
    free(compressedBuffer);
--- a/tests/golden-compression/huffman-compressed-larger
+++ b/tests/golden-compression/huffman-compressed-larger
--- a/tests/golden-decompression/rle-first-block.zst
+++ b/tests/golden-decompression/rle-first-block.zst
--- a/tests/playTests.sh
+++ b/tests/playTests.sh
@ -241,6 +241,11 @@ $ZSTD -f tmp && die "attempt to compress a non existing file"
 test -f tmp.zst  # destination file should still be present
 rm tmp*

+println "\n===> decompression only tests "
+head -c 1048576 /dev/zero > tmp
+$ZSTD -d -o tmp1 "$TESTDIR/golden-decompression/rle-first-block.zst"
+$DIFF -s tmp1 tmp
+rm tmp*

 println "test : compress multiple files"
 println hello > tmp1
@ -264,6 +269,24 @@ if [ "$?" -eq 139 ]; then
 fi
 rm tmp*

+println "test : compress multiple files into an output directory, --output-dir-flat"
+println henlo > tmp1
+mkdir tmpInputTestDir
+mkdir tmpInputTestDir/we
+mkdir tmpInputTestDir/we/must
+mkdir tmpInputTestDir/we/must/go
+mkdir tmpInputTestDir/we/must/go/deeper
+println cool > tmpInputTestDir/we/must/go/deeper/tmp2
+mkdir tmpOutDir
+$ZSTD tmp1 tmpInputTestDir/we/must/go/deeper/tmp2 --output-dir-flat tmpOutDir
+test -f tmpOutDir/tmp1.zst
+test -f tmpOutDir/tmp2.zst
+println "test : decompress multiple files into an output directory, --output-dir-flat"
+mkdir tmpOutDirDecomp
+$ZSTD tmpOutDir/ -r -d --output-dir-flat tmpOutDirDecomp
+test -f tmpOutDirDecomp/tmp2
+test -f tmpOutDirDecomp/tmp1
+rm -rf tmp*

 println "\n===>  Advanced compression parameters "
 println "Hello world!" | $ZSTD --zstd=windowLog=21,      - -o tmp.zst && die "wrong parameters not detected!"
@ -407,7 +430,6 @@ ls -ls tmp* # check size of tmpdec (should be 2*(tmp1 + tmp2 + tmp3))
 println "compress multiple files including a missing one (notHere) : "
 $ZSTD -f tmp1 notHere tmp2 && die "missing file not detected!"

-
 println "\n===>  stream-size mode"

 ./datagen -g11000 > tmp
@ -638,8 +660,8 @@ $ZSTD -t tmpSplit.* && die "bad file not detected !"

 println "\n===>  golden files tests "

-$ZSTD -t -r "$TESTDIR/files"
-$ZSTD -c -r "$TESTDIR/files" | $ZSTD -t
+$ZSTD -t -r "$TESTDIR/golden-compression"
+$ZSTD -c -r "$TESTDIR/golden-compression" | $ZSTD -t


 println "\n===>  benchmark mode tests "
--- a/tests/zstreamtest.c
+++ b/tests/zstreamtest.c
@ -1151,6 +1151,16 @@ static int basicUnitTests(U32 seed, double compressibility)
    }
    DISPLAYLEVEL(3, "OK \n");

+    DISPLAYLEVEL(3, "test%3i : ZSTD_c_srcSizeHint bounds : ", testNb++);
+    ZSTD_CCtx_reset(zc, ZSTD_reset_session_and_parameters);
+    CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_srcSizeHint, INT_MAX));
+    {   int srcSizeHint;
+        CHECK_Z(ZSTD_CCtx_getParameter(zc, ZSTD_c_srcSizeHint, &srcSizeHint));
+        CHECK(!(srcSizeHint == INT_MAX), "srcSizeHint doesn't match");
+    }
+    CHECK(!ZSTD_isError(ZSTD_CCtx_setParameter(zc, ZSTD_c_srcSizeHint, -1)), "Out of range doesn't error");
+    DISPLAYLEVEL(3, "OK \n");
+
    /* Overlen overwriting window data bug */
    DISPLAYLEVEL(3, "test%3i : wildcopy doesn't overwrite potential match data : ", testNb++);
    {   /* This test has a window size of 1024 bytes and consists of 3 blocks: