From e103d7b4a64c6321bb3ac4df07899ea09c3edbb2 Mon Sep 17 00:00:00 2001
From: Nick Terrell <terrelln@fb.com>
Date: Fri, 1 May 2020 16:11:47 -0700
Subject: [PATCH] Fix superblock mode (#2100)

Fixes:

Enable RLE blocks for superblock mode
Fix the limitation that the literals block must shrink. Instead, when we're within 200 bytes of the next header byte size, we will just use the next one up. That way we should (almost?) always have space for the table.
Remove the limitation that the first sub-block MUST have compressed literals and be compressed. Now one sub-block MUST be compressed (otherwise we fall back to raw block which is okay, since that is streamable). If no block has compressed literals that is okay, we will fix up the next Huffman table.
Handle the case where the last sub-block is uncompressed (maybe it is very small). Before it would skip superblock in this case, now we allow the last sub-block to be uncompressed. To do this we need to regenerate the correct repcodes.
Respect disableLiteralsCompression in superblock mode
Fix superblock mode to handle a block consisting of only compressed literals
Fix a off by 1 error in superblock mode that disabled it whenever there were last literals
Fix superblock mode with long literals/matches (> 0xFFFF)
Allow superblock mode to repeat Huffman tables
Respect ZSTD_minGain().
Tests:

Simple check for the condition in #2096.
When the simple_round_trip fuzzer enables superblock mode, it checks that the compressed size isn't expanded too much.
Remaining limitations:

O(targetCBlockSize^2) because we recompute statistics every sequence
Unable to split literals of length > targetCBlockSize into multiple sequences
Refuses to generate sub-blocks that don't shrink the compressed data, so we could end up with large sub-blocks. We should emit those sections as uncompressed blocks instead.
...
Fixes #2096
---
 lib/common/huf.h                        |   1 +
 lib/common/zstd_internal.h              |  25 ++
 lib/compress/huf_compress.c             |   2 +-
 lib/compress/zstd_compress.c            |  52 +++--
 lib/compress/zstd_compress_internal.h   |  50 ++++
 lib/compress/zstd_compress_literals.c   |   8 +-
 lib/compress/zstd_compress_superblock.c | 289 +++++++++++++++++-------
 lib/compress/zstd_compress_superblock.h |   1 +
 lib/compress/zstd_opt.c                 |  24 --
 tests/fuzz/simple_round_trip.c          |  26 ++-
 tests/fuzzer.c                          |  34 ++-
 11 files changed, 378 insertions(+), 134 deletions(-)

diff --git a/lib/common/huf.h b/lib/common/huf.h
index 0d27ccdb..23e184d4 100644
--- a/lib/common/huf.h
+++ b/lib/common/huf.h
@@ -189,6 +189,7 @@ size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSym
 size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog);
 size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
 size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue);
+int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue);
 
 typedef enum {
    HUF_repeat_none,  /**< Cannot use the previous table */
diff --git a/lib/common/zstd_internal.h b/lib/common/zstd_internal.h
index 2103ef85..950b789c 100644
--- a/lib/common/zstd_internal.h
+++ b/lib/common/zstd_internal.h
@@ -291,6 +291,31 @@ typedef struct {
     U32   longLengthPos;
 } seqStore_t;
 
+typedef struct {
+    U32 litLength;
+    U32 matchLength;
+} ZSTD_sequenceLength;
+
+/**
+ * Returns the ZSTD_sequenceLength for the given sequences. It handles the decoding of long sequences
+ * indicated by longLengthPos and longLengthID, and adds MINMATCH back to matchLength.
+ */
+MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore, seqDef const* seq)
+{
+    ZSTD_sequenceLength seqLen;
+    seqLen.litLength = seq->litLength;
+    seqLen.matchLength = seq->matchLength + MINMATCH;
+    if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) {
+        if (seqStore->longLengthID == 1) {
+            seqLen.litLength += 0xFFFF;
+        }
+        if (seqStore->longLengthID == 2) {
+            seqLen.matchLength += 0xFFFF;
+        }
+    }
+    return seqLen;
+}
+
 /**
  * Contains the compressed frame size and an upper-bound for the decompressed frame size.
  * Note: before using `compressedSize`, check for errors using ZSTD_isError().
diff --git a/lib/compress/huf_compress.c b/lib/compress/huf_compress.c
index 5cab31d0..f54123c5 100644
--- a/lib/compress/huf_compress.c
+++ b/lib/compress/huf_compress.c
@@ -417,7 +417,7 @@ size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count,
     return nbBits >> 3;
 }
 
-static int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) {
+int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) {
   int bad = 0;
   int s;
   for (s = 0; s <= (int)maxSymbolValue; ++s) {
diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c
index 552ca9fb..d12a1e6f 100644
--- a/lib/compress/zstd_compress.c
+++ b/lib/compress/zstd_compress.c
@@ -1928,21 +1928,6 @@ void ZSTD_seqToCodes(const seqStore_t* seqStorePtr)
         mlCodeTable[seqStorePtr->longLengthPos] = MaxML;
 }
 
-static int ZSTD_disableLiteralsCompression(const ZSTD_CCtx_params* cctxParams)
-{
-    switch (cctxParams->literalCompressionMode) {
-    case ZSTD_lcm_huffman:
-        return 0;
-    case ZSTD_lcm_uncompressed:
-        return 1;
-    default:
-        assert(0 /* impossible: pre-validated */);
-        /* fall-through */
-    case ZSTD_lcm_auto:
-        return (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0);
-    }
-}
-
 /* ZSTD_useTargetCBlockSize():
  * Returns if target compressed block size param is being used.
  * If used, compression will do best effort to make a compressed block size to be around targetCBlockSize.
@@ -2387,6 +2372,18 @@ static int ZSTD_isRLE(const BYTE *ip, size_t length) {
     return 1;
 }
 
+/* Returns true if the given block may be RLE.
+ * This is just a heuristic based on the compressibility.
+ * It may return both false positives and false negatives.
+ */
+static int ZSTD_maybeRLE(seqStore_t const* seqStore)
+{
+    size_t const nbSeqs = (size_t)(seqStore->sequences - seqStore->sequencesStart);
+    size_t const nbLits = (size_t)(seqStore->lit - seqStore->litStart);
+
+    return nbSeqs < 4 && nbLits < 10;
+}
+
 static void ZSTD_confirmRepcodesAndEntropyTables(ZSTD_CCtx* zc)
 {
     ZSTD_compressedBlockState_t* const tmp = zc->blockState.prevCBlock;
@@ -2463,6 +2460,16 @@ static size_t ZSTD_compressBlock_targetCBlockSize_body(ZSTD_CCtx* zc,
 {
     DEBUGLOG(6, "Attempting ZSTD_compressSuperBlock()");
     if (bss == ZSTDbss_compress) {
+        if (/* We don't want to emit our first block as a RLE even if it qualifies because
+            * doing so will cause the decoder (cli only) to throw a "should consume all input error."
+            * This is only an issue for zstd <= v1.4.3
+            */
+            !zc->isFirstBlock &&
+            ZSTD_maybeRLE(&zc->seqStore) &&
+            ZSTD_isRLE((BYTE const*)src, srcSize))
+        {
+            return ZSTD_rleCompressBlock(dst, dstCapacity, *(BYTE const*)src, srcSize, lastBlock);
+        }
         /* Attempt superblock compression.
          *
          * Note that compressed size of ZSTD_compressSuperBlock() is not bound by the
@@ -2481,12 +2488,15 @@ static size_t ZSTD_compressBlock_targetCBlockSize_body(ZSTD_CCtx* zc,
          *   * cSize >= blockBound(srcSize): We have expanded the block too much so
          *     emit an uncompressed block.
          */
-        size_t const cSize = ZSTD_compressSuperBlock(zc, dst, dstCapacity, lastBlock);
-        if (cSize != ERROR(dstSize_tooSmall)) {
-            FORWARD_IF_ERROR(cSize);
-            if (cSize != 0 && cSize < srcSize + ZSTD_blockHeaderSize) {
-                ZSTD_confirmRepcodesAndEntropyTables(zc);
-                return cSize;
+        {
+            size_t const cSize = ZSTD_compressSuperBlock(zc, dst, dstCapacity, src, srcSize, lastBlock);
+            if (cSize != ERROR(dstSize_tooSmall)) {
+                size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, zc->appliedParams.cParams.strategy);
+                FORWARD_IF_ERROR(cSize);
+                if (cSize != 0 && cSize < maxCSize + ZSTD_blockHeaderSize) {
+                    ZSTD_confirmRepcodesAndEntropyTables(zc);
+                    return cSize;
+                }
             }
         }
     }
diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h
index 893e8def..db7b89ce 100644
--- a/lib/compress/zstd_compress_internal.h
+++ b/lib/compress/zstd_compress_internal.h
@@ -326,6 +326,31 @@ MEM_STATIC U32 ZSTD_MLcode(U32 mlBase)
     return (mlBase > 127) ? ZSTD_highbit32(mlBase) + ML_deltaCode : ML_Code[mlBase];
 }
 
+typedef struct repcodes_s {
+    U32 rep[3];
+} repcodes_t;
+
+MEM_STATIC repcodes_t ZSTD_updateRep(U32 const rep[3], U32 const offset, U32 const ll0)
+{
+    repcodes_t newReps;
+    if (offset >= ZSTD_REP_NUM) {  /* full offset */
+        newReps.rep[2] = rep[1];
+        newReps.rep[1] = rep[0];
+        newReps.rep[0] = offset - ZSTD_REP_MOVE;
+    } else {   /* repcode */
+        U32 const repCode = offset + ll0;
+        if (repCode > 0) {  /* note : if repCode==0, no change */
+            U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
+            newReps.rep[2] = (repCode >= 2) ? rep[1] : rep[2];
+            newReps.rep[1] = rep[0];
+            newReps.rep[0] = currentOffset;
+        } else {   /* repCode == 0 */
+            memcpy(&newReps, rep, sizeof(newReps));
+        }
+    }
+    return newReps;
+}
+
 /* ZSTD_cParam_withinBounds:
  * @return 1 if value is within cParam bounds,
  * 0 otherwise */
@@ -351,6 +376,16 @@ MEM_STATIC size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const voi
     return ZSTD_blockHeaderSize + srcSize;
 }
 
+MEM_STATIC size_t ZSTD_rleCompressBlock (void* dst, size_t dstCapacity, BYTE src, size_t srcSize, U32 lastBlock)
+{
+    BYTE* const op = (BYTE*)dst;
+    U32 const cBlockHeader = lastBlock + (((U32)bt_rle)<<1) + (U32)(srcSize << 3);
+    RETURN_ERROR_IF(dstCapacity < 4, dstSize_tooSmall, "");
+    MEM_writeLE24(op, cBlockHeader);
+    op[3] = src;
+    return 4;
+}
+
 
 /* ZSTD_minGain() :
  * minimum compression required
@@ -364,6 +399,21 @@ MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat)
     return (srcSize >> minlog) + 2;
 }
 
+MEM_STATIC int ZSTD_disableLiteralsCompression(const ZSTD_CCtx_params* cctxParams)
+{
+    switch (cctxParams->literalCompressionMode) {
+    case ZSTD_lcm_huffman:
+        return 0;
+    case ZSTD_lcm_uncompressed:
+        return 1;
+    default:
+        assert(0 /* impossible: pre-validated */);
+        /* fall-through */
+    case ZSTD_lcm_auto:
+        return (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0);
+    }
+}
+
 /*! ZSTD_safecopyLiterals() :
  *  memcpy() function that won't read beyond more than WILDCOPY_OVERLENGTH bytes past ilimit_w.
  *  Only called when the sequence ends past ilimit_w, so it only needs to be optimized for single
diff --git a/lib/compress/zstd_compress_literals.c b/lib/compress/zstd_compress_literals.c
index 8d22bcad..b7680004 100644
--- a/lib/compress/zstd_compress_literals.c
+++ b/lib/compress/zstd_compress_literals.c
@@ -36,6 +36,7 @@ size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src,
     }
 
     memcpy(ostart + flSize, src, srcSize);
+    DEBUGLOG(5, "Raw literals: %u -> %u", (U32)srcSize, (U32)(srcSize + flSize));
     return srcSize + flSize;
 }
 
@@ -62,6 +63,7 @@ size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void*
     }
 
     ostart[flSize] = *(const BYTE*)src;
+    DEBUGLOG(5, "RLE literals: %u -> %u", (U32)srcSize, (U32)flSize + 1);
     return flSize+1;
 }
 
@@ -80,8 +82,8 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
     symbolEncodingType_e hType = set_compressed;
     size_t cLitSize;
 
-    DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i)",
-                disableLiteralCompression);
+    DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i srcSize=%u)",
+                disableLiteralCompression, (U32)srcSize);
 
     /* Prepare nextEntropy assuming reusing the existing table */
     memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
@@ -110,6 +112,7 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
                 (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2);
         if (repeat != HUF_repeat_none) {
             /* reused the existing table */
+            DEBUGLOG(5, "Reusing previous huffman table");
             hType = set_repeat;
         }
     }
@@ -150,5 +153,6 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
     default:  /* not possible : lhSize is {3,4,5} */
         assert(0);
     }
+    DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)srcSize, (U32)(lhSize+cLitSize));
     return lhSize+cLitSize;
 }
diff --git a/lib/compress/zstd_compress_superblock.c b/lib/compress/zstd_compress_superblock.c
index 8c98d18e..fd475dcc 100644
--- a/lib/compress/zstd_compress_superblock.c
+++ b/lib/compress/zstd_compress_superblock.c
@@ -16,6 +16,7 @@
 #include "zstd_compress_sequences.h"
 #include "zstd_compress_literals.h"
 #include "zstd_compress_superblock.h"
+#include "zstd_internal.h"  /* ZSTD_getSequenceLength */
 
 /*-*************************************
 *  Superblock entropy buffer structs
@@ -53,15 +54,14 @@ typedef struct {
 
 /** ZSTD_buildSuperBlockEntropy_literal() :
  *  Builds entropy for the super-block literals.
- *  Stores literals block type (raw, rle, compressed) and
+ *  Stores literals block type (raw, rle, compressed, repeat) and
  *  huffman description table to hufMetadata.
- *  Currently, this does not consider the option of reusing huffman table from
- *  previous super-block. I think it would be a good improvement to add that option.
  *  @return : size of huffman description table or error code */
 static size_t ZSTD_buildSuperBlockEntropy_literal(void* const src, size_t srcSize,
                                             const ZSTD_hufCTables_t* prevHuf,
                                                   ZSTD_hufCTables_t* nextHuf,
                                                   ZSTD_hufCTablesMetadata_t* hufMetadata,
+                                                  const int disableLiteralsCompression,
                                                   void* workspace, size_t wkspSize)
 {
     BYTE* const wkspStart = (BYTE*)workspace;
@@ -72,26 +72,49 @@ static size_t ZSTD_buildSuperBlockEntropy_literal(void* const src, size_t srcSiz
     BYTE* const nodeWksp = countWkspStart + countWkspSize;
     const size_t nodeWkspSize = wkspEnd-nodeWksp;
     unsigned maxSymbolValue = 255;
-    unsigned huffLog = 11;
+    unsigned huffLog = HUF_TABLELOG_DEFAULT;
+    HUF_repeat repeat = prevHuf->repeatMode;
 
     DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_literal (srcSize=%zu)", srcSize);
 
     /* Prepare nextEntropy assuming reusing the existing table */
     memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
 
+    if (disableLiteralsCompression) {
+        DEBUGLOG(5, "set_basic - disabled");
+        hufMetadata->hType = set_basic;
+        return 0;
+    }
+
     /* small ? don't even attempt compression (speed opt) */
 #   define COMPRESS_LITERALS_SIZE_MIN 63
-    {   size_t const minLitSize = COMPRESS_LITERALS_SIZE_MIN;
-        if (srcSize <= minLitSize) { hufMetadata->hType = set_basic; return 0; }
+    {   size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN;
+        if (srcSize <= minLitSize) {
+            DEBUGLOG(5, "set_basic - too small");
+            hufMetadata->hType = set_basic;
+            return 0;
+        }
     }
 
     /* Scan input and build symbol stats */
     {   size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)src, srcSize, workspace, wkspSize);
         FORWARD_IF_ERROR(largest);
-        if (largest == srcSize) { hufMetadata->hType = set_rle; return 0; }
-        if (largest <= (srcSize >> 7)+4) { hufMetadata->hType = set_basic; return 0; }
+        if (largest == srcSize) {
+            DEBUGLOG(5, "set_rle");
+            hufMetadata->hType = set_rle;
+            return 0;
+        }
+        if (largest <= (srcSize >> 7)+4) {
+            DEBUGLOG(5, "set_basic - no gain");
+            hufMetadata->hType = set_basic;
+            return 0;
+        }
     }
 
+    /* Validate the previous Huffman table */
+    if (repeat == HUF_repeat_check && !HUF_validateCTable((HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue)) {
+        repeat = HUF_repeat_none;
+    }
 
     /* Build Huffman Tree */
     memset(nextHuf->CTable, 0, sizeof(nextHuf->CTable));
@@ -101,13 +124,32 @@ static size_t ZSTD_buildSuperBlockEntropy_literal(void* const src, size_t srcSiz
                                                     nodeWksp, nodeWkspSize);
         FORWARD_IF_ERROR(maxBits);
         huffLog = (U32)maxBits;
-        {   size_t cSize = HUF_estimateCompressedSize(
-                              (HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue);
-            size_t hSize = HUF_writeCTable(
-                              hufMetadata->hufDesBuffer, sizeof(hufMetadata->hufDesBuffer),
-                              (HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog);
-            if (cSize + hSize >= srcSize) { hufMetadata->hType = set_basic; return 0; }
+        {   /* Build and write the CTable */
+            size_t const newCSize = HUF_estimateCompressedSize(
+                    (HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue);
+            size_t const hSize = HUF_writeCTable(
+                    hufMetadata->hufDesBuffer, sizeof(hufMetadata->hufDesBuffer),
+                    (HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog);
+            /* Check against repeating the previous CTable */
+            if (repeat != HUF_repeat_none) {
+                size_t const oldCSize = HUF_estimateCompressedSize(
+                        (HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue);
+                if (oldCSize < srcSize && (oldCSize <= hSize + newCSize || hSize + 12 >= srcSize)) {
+                    DEBUGLOG(5, "set_repeat - smaller");
+                    memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
+                    hufMetadata->hType = set_repeat;
+                    return 0;
+                }
+            }
+            if (newCSize + hSize >= srcSize) {
+                DEBUGLOG(5, "set_basic - no gains");
+                memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
+                hufMetadata->hType = set_basic;
+                return 0;
+            }
+            DEBUGLOG(5, "set_compressed (hSize=%u)", (U32)hSize);
             hufMetadata->hType = set_compressed;
+            nextHuf->repeatMode = HUF_repeat_check;
             return hSize;
         }
     }
@@ -241,6 +283,7 @@ ZSTD_buildSuperBlockEntropy(seqStore_t* seqStorePtr,
         ZSTD_buildSuperBlockEntropy_literal(seqStorePtr->litStart, litSize,
                                             &prevEntropy->huf, &nextEntropy->huf,
                                             &entropyMetadata->hufMetadata,
+                                            ZSTD_disableLiteralsCompression(cctxParams),
                                             workspace, wkspSize);
     FORWARD_IF_ERROR(entropyMetadata->hufMetadata.hufDesSize);
     entropyMetadata->fseMetadata.fseTablesSize =
@@ -255,21 +298,19 @@ ZSTD_buildSuperBlockEntropy(seqStore_t* seqStorePtr,
 
 /** ZSTD_compressSubBlock_literal() :
  *  Compresses literals section for a sub-block.
- *  Compressed literal size needs to be less than uncompressed literal size.
- *      ZSTD spec doesn't have this constaint. I will explain why I have this constraint here.
- *      Literals section header size ranges from 1 to 5 bytes,
- *      which is dictated by regenerated size and compressed size.
- *      In order to figure out the memory address to start writing compressed literal,
- *      it is necessary to figure out the literals section header size.
- *      The challenge is that compressed size is only known after compression.
- *      This is a chicken and egg problem.
- *      I am simplifying the problem by assuming that
- *      compressed size will always be less than or equal to regenerated size,
- *      and using regenerated size to calculate literals section header size.
+ *  When we have to write the Huffman table we will sometimes choose a header
+ *  size larger than necessary. This is because we have to pick the header size
+ *  before we know the table size + compressed size, so we have a bound on the
+ *  table size. If we guessed incorrectly, we fall back to uncompressed literals.
+ *
+ *  We write the header when writeEntropy=1 and set entropyWrriten=1 when we succeeded
+ *  in writing the header, otherwise it is set to 0.
+ *
  *  hufMetadata->hType has literals block type info.
  *      If it is set_basic, all sub-blocks literals section will be Raw_Literals_Block.
  *      If it is set_rle, all sub-blocks literals section will be RLE_Literals_Block.
  *      If it is set_compressed, first sub-block's literals section will be Compressed_Literals_Block
+ *      If it is set_compressed, first sub-block's literals section will be Treeless_Literals_Block
  *      and the following sub-blocks' literals sections will be Treeless_Literals_Block.
  *  @return : compressed size of literals section of a sub-block
  *            Or 0 if it unable to compress.
@@ -278,28 +319,22 @@ static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
                                     const ZSTD_hufCTablesMetadata_t* hufMetadata,
                                     const BYTE* literals, size_t litSize,
                                     void* dst, size_t dstSize,
-                                    const int bmi2, int writeEntropy)
+                                    const int bmi2, int writeEntropy, int* entropyWritten)
 {
-    size_t const lhSize = 3 + (litSize >= 1 KB) + (litSize >= 16 KB);
+    size_t const header = writeEntropy ? 200 : 0;
+    size_t const lhSize = 3 + (litSize >= (1 KB - header)) + (litSize >= (16 KB - header));
     BYTE* const ostart = (BYTE*)dst;
     BYTE* const oend = ostart + dstSize;
     BYTE* op = ostart + lhSize;
-    U32 singleStream = litSize < 256;
-    symbolEncodingType_e hType = writeEntropy ? set_compressed : set_repeat;
+    U32 const singleStream = lhSize == 3;
+    symbolEncodingType_e hType = writeEntropy ? hufMetadata->hType : set_repeat;
     size_t cLitSize = 0;
 
     (void)bmi2; // TODO bmi2...
 
     DEBUGLOG(5, "ZSTD_compressSubBlock_literal (litSize=%zu, lhSize=%zu, writeEntropy=%d)", litSize, lhSize, writeEntropy);
 
-    if (writeEntropy && litSize == 0) {
-      /* Literals section cannot be compressed mode when litSize == 0.
-       * (This seems to be decoder constraint.)
-       * Entropy cannot be written if literals section is not compressed mode.
-       */
-      return 0;
-    }
-
+    *entropyWritten = 0;
     if (litSize == 0 || hufMetadata->hType == set_basic) {
       DEBUGLOG(5, "ZSTD_compressSubBlock_literal using raw literal");
       return ZSTD_noCompressLiterals(dst, dstSize, literals, litSize);
@@ -308,8 +343,10 @@ static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
       return ZSTD_compressRleLiteralsBlock(dst, dstSize, literals, litSize);
     }
 
-    if (lhSize == 3) singleStream = 1;
-    if (writeEntropy) {
+    assert(litSize > 0);
+    assert(hufMetadata->hType == set_compressed || hufMetadata->hType == set_repeat);
+
+    if (writeEntropy && hufMetadata->hType == set_compressed) {
         memcpy(op, hufMetadata->hufDesBuffer, hufMetadata->hufDesSize);
         op += hufMetadata->hufDesSize;
         cLitSize += hufMetadata->hufDesSize;
@@ -322,11 +359,19 @@ static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
         op += cSize;
         cLitSize += cSize;
         if (cSize == 0 || ERR_isError(cSize)) {
-          return 0;
+            DEBUGLOG(5, "Failed to write entropy tables %s", ZSTD_getErrorName(cSize));
+            return 0;
         }
-        if (cLitSize > litSize) {
-            if (writeEntropy) return 0;
-            else return ZSTD_noCompressLiterals(dst, dstSize, literals, litSize);
+        /* If we expand and we aren't writing a header then emit uncompressed */
+        if (!writeEntropy && cLitSize >= litSize) {
+            DEBUGLOG(5, "ZSTD_compressSubBlock_literal using raw literal because uncompressible");
+            return ZSTD_noCompressLiterals(dst, dstSize, literals, litSize);
+        }
+        /* If we are writing headers then allow expansion that doesn't change our header size. */
+        if (lhSize < (size_t)(3 + (cLitSize >= 1 KB) + (cLitSize >= 16 KB))) {
+            assert(cLitSize > litSize);
+            DEBUGLOG(5, "Literals expanded beyond allowed header size");
+            return ZSTD_noCompressLiterals(dst, dstSize, literals, litSize);
         }
         DEBUGLOG(5, "ZSTD_compressSubBlock_literal (cSize=%zu)", cSize);
     }
@@ -353,17 +398,26 @@ static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
     default:  /* not possible : lhSize is {3,4,5} */
         assert(0);
     }
+    *entropyWritten = 1;
+    DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)litSize, (U32)(op-ostart));
     return op-ostart;
 }
 
-static size_t ZSTD_seqDecompressedSize(const seqDef* sequences, size_t nbSeq, size_t litSize) {
+static size_t ZSTD_seqDecompressedSize(seqStore_t const* seqStore, const seqDef* sequences, size_t nbSeq, size_t litSize, int lastSequence) {
     const seqDef* const sstart = sequences;
     const seqDef* const send = sequences + nbSeq;
     const seqDef* sp = sstart;
     size_t matchLengthSum = 0;
+    size_t litLengthSum = 0;
     while (send-sp > 0) {
-      matchLengthSum += sp->matchLength + MINMATCH;
-      sp++;
+        ZSTD_sequenceLength const seqLen = ZSTD_getSequenceLength(seqStore, sp);
+        litLengthSum += seqLen.litLength;
+        matchLengthSum += seqLen.matchLength;
+        sp++;
+    }
+    assert(litLengthSum <= litSize);
+    if (!lastSequence) {
+        assert(litLengthSum == litSize);
     }
     return matchLengthSum + litSize;
 }
@@ -372,8 +426,9 @@ static size_t ZSTD_seqDecompressedSize(const seqDef* sequences, size_t nbSeq, si
  *  Compresses sequences section for a sub-block.
  *  fseMetadata->llType, fseMetadata->ofType, and fseMetadata->mlType have
  *  symbol compression modes for the super-block.
- *  First sub-block will have these in its header. The following sub-blocks
- *  will always have repeat mode.
+ *  The first successfully compressed block will have these in its header.
+ *  We set entropyWritten=1 when we succeed in compressing the sequences.
+ *  The following sub-blocks will always have repeat mode.
  *  @return : compressed size of sequences section of a sub-block
  *            Or 0 if it is unable to compress
  *            Or error code. */
@@ -383,7 +438,7 @@ static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables
                                               const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode,
                                               const ZSTD_CCtx_params* cctxParams,
                                               void* dst, size_t dstCapacity,
-                                              const int bmi2, int writeEntropy)
+                                              const int bmi2, int writeEntropy, int* entropyWritten)
 {
     const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN;
     BYTE* const ostart = (BYTE*)dst;
@@ -393,6 +448,7 @@ static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables
 
     DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (nbSeq=%zu, writeEntropy=%d, longOffsets=%d)", nbSeq, writeEntropy, longOffsets);
 
+    *entropyWritten = 0;
     /* Sequences Header */
     RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/,
                     dstSize_tooSmall);
@@ -402,9 +458,6 @@ static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables
         op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2;
     else
         op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3;
-    if (writeEntropy && nbSeq == 0) {
-        return 0;
-    }
     if (nbSeq==0) {
         return op - ostart;
     }
@@ -444,6 +497,7 @@ static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables
          * In this exceedingly rare case, we will simply emit an uncompressed
          * block, since it isn't worth optimizing.
          */
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
         if (writeEntropy && fseMetadata->lastCountSize && fseMetadata->lastCountSize + bitstreamSize < 4) {
             /* NCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */
             assert(fseMetadata->lastCountSize + bitstreamSize == 3);
@@ -451,6 +505,7 @@ static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables
                         "emitting an uncompressed block.");
             return 0;
         }
+#endif
         DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (bitstreamSize=%zu)", bitstreamSize);
     }
 
@@ -461,10 +516,15 @@ static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables
      * with rle mode and the current block's sequences section is compressed
      * with repeat mode where sequences section body size can be 1 byte.
      */
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
     if (op-seqHead < 4) {
+        DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.4.0 by emitting "
+                    "an uncompressed block when sequences are < 4 bytes");
         return 0;
     }
+#endif
 
+    *entropyWritten = 1;
     return op - ostart;
 }
 
@@ -479,16 +539,19 @@ static size_t ZSTD_compressSubBlock(const ZSTD_entropyCTables_t* entropy,
                                     const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode,
                                     const ZSTD_CCtx_params* cctxParams,
                                     void* dst, size_t dstCapacity,
-                                    const int bmi2, int writeEntropy, U32 lastBlock)
+                                    const int bmi2,
+                                    int writeLitEntropy, int writeSeqEntropy,
+                                    int* litEntropyWritten, int* seqEntropyWritten,
+                                    U32 lastBlock)
 {
     BYTE* const ostart = (BYTE*)dst;
     BYTE* const oend = ostart + dstCapacity;
     BYTE* op = ostart + ZSTD_blockHeaderSize;
-    DEBUGLOG(5, "ZSTD_compressSubBlock (litSize=%zu, nbSeq=%zu, writeEntropy=%d, lastBlock=%d)",
-                litSize, nbSeq, writeEntropy, lastBlock);
+    DEBUGLOG(5, "ZSTD_compressSubBlock (litSize=%zu, nbSeq=%zu, writeLitEntropy=%d, writeSeqEntropy=%d, lastBlock=%d)",
+                litSize, nbSeq, writeLitEntropy, writeSeqEntropy, lastBlock);
     {   size_t cLitSize = ZSTD_compressSubBlock_literal((const HUF_CElt*)entropy->huf.CTable,
                                                         &entropyMetadata->hufMetadata, literals, litSize,
-                                                        op, oend-op, bmi2, writeEntropy);
+                                                        op, oend-op, bmi2, writeLitEntropy, litEntropyWritten);
         FORWARD_IF_ERROR(cLitSize);
         if (cLitSize == 0) return 0;
         op += cLitSize;
@@ -499,7 +562,7 @@ static size_t ZSTD_compressSubBlock(const ZSTD_entropyCTables_t* entropy,
                                                   llCode, mlCode, ofCode,
                                                   cctxParams,
                                                   op, oend-op,
-                                                  bmi2, writeEntropy);
+                                                  bmi2, writeSeqEntropy, seqEntropyWritten);
         FORWARD_IF_ERROR(cSeqSize);
         if (cSeqSize == 0) return 0;
         op += cSeqSize;
@@ -524,7 +587,7 @@ static size_t ZSTD_estimateSubBlockSize_literal(const BYTE* literals, size_t lit
 
     if (hufMetadata->hType == set_basic) return litSize;
     else if (hufMetadata->hType == set_rle) return 1;
-    else if (hufMetadata->hType == set_compressed) {
+    else if (hufMetadata->hType == set_compressed || hufMetadata->hType == set_repeat) {
         size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)literals, litSize, workspace, wkspSize);
         if (ZSTD_isError(largest)) return litSize;
         {   size_t cLitSizeEstimate = HUF_estimateCompressedSize((const HUF_CElt*)huf->CTable, countWksp, maxSymbolValue);
@@ -601,17 +664,28 @@ static size_t ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize,
                                         const ZSTD_entropyCTables_t* entropy,
                                         const ZSTD_entropyCTablesMetadata_t* entropyMetadata,
                                         void* workspace, size_t wkspSize,
-                                        int writeEntropy) {
+                                        int writeLitEntropy, int writeSeqEntropy) {
     size_t cSizeEstimate = 0;
     cSizeEstimate += ZSTD_estimateSubBlockSize_literal(literals, litSize,
                                                          &entropy->huf, &entropyMetadata->hufMetadata,
-                                                         workspace, wkspSize, writeEntropy);
+                                                         workspace, wkspSize, writeLitEntropy);
     cSizeEstimate += ZSTD_estimateSubBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable,
                                                          nbSeq, &entropy->fse, &entropyMetadata->fseMetadata,
-                                                         workspace, wkspSize, writeEntropy);
+                                                         workspace, wkspSize, writeSeqEntropy);
     return cSizeEstimate + ZSTD_blockHeaderSize;
 }
 
+static int ZSTD_needSequenceEntropyTables(ZSTD_fseCTablesMetadata_t const* fseMetadata)
+{
+    if (fseMetadata->llType == set_compressed || fseMetadata->llType == set_rle)
+        return 1;
+    if (fseMetadata->mlType == set_compressed || fseMetadata->mlType == set_rle)
+        return 1;
+    if (fseMetadata->ofType == set_compressed || fseMetadata->ofType == set_rle)
+        return 1;
+    return 0;
+}
+
 /** ZSTD_compressSubBlock_multi() :
  *  Breaks super-block into multiple sub-blocks and compresses them.
  *  Entropy will be written to the first block.
@@ -620,10 +694,12 @@ static size_t ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize,
  *  @return : compressed size of the super block (which is multiple ZSTD blocks)
  *            Or 0 if it failed to compress. */
 static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
-                            const ZSTD_entropyCTables_t* entropy,
+                            const ZSTD_compressedBlockState_t* prevCBlock,
+                            ZSTD_compressedBlockState_t* nextCBlock,
                             const ZSTD_entropyCTablesMetadata_t* entropyMetadata,
                             const ZSTD_CCtx_params* cctxParams,
                                   void* dst, size_t dstCapacity,
+                            const void* src, size_t srcSize,
                             const int bmi2, U32 lastBlock,
                             void* workspace, size_t wkspSize)
 {
@@ -633,6 +709,8 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
     const BYTE* const lstart = seqStorePtr->litStart;
     const BYTE* const lend = seqStorePtr->lit;
     const BYTE* lp = lstart;
+    BYTE const* ip = (BYTE const*)src;
+    BYTE const* const iend = ip + srcSize;
     BYTE* const ostart = (BYTE*)dst;
     BYTE* const oend = ostart + dstCapacity;
     BYTE* op = ostart;
@@ -641,41 +719,57 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
     const BYTE* ofCodePtr = seqStorePtr->ofCode;
     size_t targetCBlockSize = cctxParams->targetCBlockSize;
     size_t litSize, seqCount;
-    int writeEntropy = 1;
-    size_t remaining = ZSTD_seqDecompressedSize(sstart, send-sstart, lend-lstart);
-    size_t cBlockSizeEstimate = 0;
+    int writeLitEntropy = entropyMetadata->hufMetadata.hType == set_compressed;
+    int writeSeqEntropy = 1;
+    int lastSequence = 0;
 
     DEBUGLOG(5, "ZSTD_compressSubBlock_multi (litSize=%u, nbSeq=%u)",
                 (unsigned)(lend-lp), (unsigned)(send-sstart));
 
     litSize = 0;
     seqCount = 0;
-    while (sp + seqCount < send) {
-        const seqDef* const sequence = sp + seqCount;
-        const U32 lastSequence = sequence+1 == send;
-        litSize = (sequence == send) ? (size_t)(lend-lp) : litSize + sequence->litLength;
-        seqCount++;
+    do {
+        size_t cBlockSizeEstimate = 0;
+        if (sstart == send) {
+            lastSequence = 1;
+        } else {
+            const seqDef* const sequence = sp + seqCount;
+            lastSequence = sequence == send - 1;
+            litSize += ZSTD_getSequenceLength(seqStorePtr, sequence).litLength;
+            seqCount++;
+        }
+        if (lastSequence) {
+            assert(lp <= lend);
+            assert(litSize <= (size_t)(lend - lp));
+            litSize = (size_t)(lend - lp);
+        }
         /* I think there is an optimization opportunity here.
          * Calling ZSTD_estimateSubBlockSize for every sequence can be wasteful
          * since it recalculates estimate from scratch.
          * For example, it would recount literal distribution and symbol codes everytime.
          */
         cBlockSizeEstimate = ZSTD_estimateSubBlockSize(lp, litSize, ofCodePtr, llCodePtr, mlCodePtr, seqCount,
-                                                       entropy, entropyMetadata,
-                                                       workspace, wkspSize, writeEntropy);
+                                                       &nextCBlock->entropy, entropyMetadata,
+                                                       workspace, wkspSize, writeLitEntropy, writeSeqEntropy);
         if (cBlockSizeEstimate > targetCBlockSize || lastSequence) {
-            const size_t decompressedSize = ZSTD_seqDecompressedSize(sp, seqCount, litSize);
-            const size_t cSize = ZSTD_compressSubBlock(entropy, entropyMetadata,
+            int litEntropyWritten = 0;
+            int seqEntropyWritten = 0;
+            const size_t decompressedSize = ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, lastSequence);
+            const size_t cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata,
                                                        sp, seqCount,
                                                        lp, litSize,
                                                        llCodePtr, mlCodePtr, ofCodePtr,
                                                        cctxParams,
                                                        op, oend-op,
-                                                       bmi2, writeEntropy, lastBlock && lastSequence);
+                                                       bmi2, writeLitEntropy, writeSeqEntropy,
+                                                       &litEntropyWritten, &seqEntropyWritten,
+                                                       lastBlock && lastSequence);
             FORWARD_IF_ERROR(cSize);
+            DEBUGLOG(5, "cSize = %zu | decompressedSize = %zu", cSize, decompressedSize);
             if (cSize > 0 && cSize < decompressedSize) {
-                assert(remaining >= decompressedSize);
-                remaining -= decompressedSize;
+                DEBUGLOG(5, "Committed the sub-block");
+                assert(ip + decompressedSize <= iend);
+                ip += decompressedSize;
                 sp += seqCount;
                 lp += litSize;
                 op += cSize;
@@ -684,20 +778,51 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
                 ofCodePtr += seqCount;
                 litSize = 0;
                 seqCount = 0;
-                writeEntropy = 0; // Entropy only needs to be written once
+                /* Entropy only needs to be written once */
+                if (litEntropyWritten) {
+                    writeLitEntropy = 0;
+                }
+                if (seqEntropyWritten) {
+                    writeSeqEntropy = 0;
+                }
             }
         }
+    } while (!lastSequence);
+    if (writeLitEntropy) {
+        DEBUGLOG(5, "ZSTD_compressSubBlock_multi has literal entropy tables unwritten");
+        memcpy(&nextCBlock->entropy.huf, &prevCBlock->entropy.huf, sizeof(prevCBlock->entropy.huf));
     }
-    if (remaining) {
-        DEBUGLOG(5, "ZSTD_compressSubBlock_multi failed to compress");
+    if (writeSeqEntropy && ZSTD_needSequenceEntropyTables(&entropyMetadata->fseMetadata)) {
+        /* If we haven't written our entropy tables, then we've violated our contract and
+         * must emit an uncompressed block.
+         */
+        DEBUGLOG(5, "ZSTD_compressSubBlock_multi has sequence entropy tables unwritten");
         return 0;
     }
+    if (ip < iend) {
+        size_t const cSize = ZSTD_noCompressBlock(op, oend - op, ip, iend - ip, lastBlock);
+        DEBUGLOG(5, "ZSTD_compressSubBlock_multi last sub-block uncompressed, %zu bytes", (size_t)(iend - ip));
+        FORWARD_IF_ERROR(cSize);
+        assert(cSize != 0);
+        op += cSize;
+        /* We have to regenerate the repcodes because we've skipped some sequences */
+        if (sp < send) {
+            seqDef const* seq;
+            repcodes_t rep;
+            memcpy(&rep, prevCBlock->rep, sizeof(rep)); 
+            for (seq = sstart; seq < sp; ++seq) {
+                rep = ZSTD_updateRep(rep.rep, seq->offset - 1, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0);
+            }
+            memcpy(nextCBlock->rep, &rep, sizeof(rep));
+        }
+    }
     DEBUGLOG(5, "ZSTD_compressSubBlock_multi compressed");
     return op-ostart;
 }
 
 size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc,
                                void* dst, size_t dstCapacity,
+                               void const* src, size_t srcSize,
                                unsigned lastBlock) {
     ZSTD_entropyCTablesMetadata_t entropyMetadata;
 
@@ -709,10 +834,12 @@ size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc,
           zc->entropyWorkspace, HUF_WORKSPACE_SIZE /* statically allocated in resetCCtx */));
 
     return ZSTD_compressSubBlock_multi(&zc->seqStore,
-            &zc->blockState.nextCBlock->entropy,
+            zc->blockState.prevCBlock,
+            zc->blockState.nextCBlock,
             &entropyMetadata,
             &zc->appliedParams,
             dst, dstCapacity,
+            src, srcSize,
             zc->bmi2, lastBlock,
             zc->entropyWorkspace, HUF_WORKSPACE_SIZE /* statically allocated in resetCCtx */);
 }
diff --git a/lib/compress/zstd_compress_superblock.h b/lib/compress/zstd_compress_superblock.h
index 3bd6fdcf..35d20729 100644
--- a/lib/compress/zstd_compress_superblock.h
+++ b/lib/compress/zstd_compress_superblock.h
@@ -26,6 +26,7 @@
  * The given block will be compressed into multiple sub blocks that are around targetCBlockSize. */
 size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc,
                                void* dst, size_t dstCapacity,
+                               void const* src, size_t srcSize,
                                unsigned lastBlock);
 
 #endif /* ZSTD_COMPRESS_ADVANCED_H */
diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c
index a835e9ec..8d630196 100644
--- a/lib/compress/zstd_opt.c
+++ b/lib/compress/zstd_opt.c
@@ -765,30 +765,6 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches (
 /*-*******************************
 *  Optimal parser
 *********************************/
-typedef struct repcodes_s {
-    U32 rep[3];
-} repcodes_t;
-
-static repcodes_t ZSTD_updateRep(U32 const rep[3], U32 const offset, U32 const ll0)
-{
-    repcodes_t newReps;
-    if (offset >= ZSTD_REP_NUM) {  /* full offset */
-        newReps.rep[2] = rep[1];
-        newReps.rep[1] = rep[0];
-        newReps.rep[0] = offset - ZSTD_REP_MOVE;
-    } else {   /* repcode */
-        U32 const repCode = offset + ll0;
-        if (repCode > 0) {  /* note : if repCode==0, no change */
-            U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
-            newReps.rep[2] = (repCode >= 2) ? rep[1] : rep[2];
-            newReps.rep[1] = rep[0];
-            newReps.rep[0] = currentOffset;
-        } else {   /* repCode == 0 */
-            memcpy(&newReps, rep, sizeof(newReps));
-        }
-    }
-    return newReps;
-}
 
 
 static U32 ZSTD_totalLen(ZSTD_optimal_t sol)
diff --git a/tests/fuzz/simple_round_trip.c b/tests/fuzz/simple_round_trip.c
index e37fa6f6..41ea9673 100644
--- a/tests/fuzz/simple_round_trip.c
+++ b/tests/fuzz/simple_round_trip.c
@@ -32,9 +32,12 @@ static size_t roundTripTest(void *result, size_t resultCapacity,
                             FUZZ_dataProducer_t *producer)
 {
     size_t cSize;
+    size_t dSize;
+    int targetCBlockSize = 0;
     if (FUZZ_dataProducer_uint32Range(producer, 0, 1)) {
         FUZZ_setRandomParameters(cctx, srcSize, producer);
         cSize = ZSTD_compress2(cctx, compressed, compressedCapacity, src, srcSize);
+        FUZZ_ZASSERT(ZSTD_CCtx_getParameter(cctx, ZSTD_c_targetCBlockSize, &targetCBlockSize));
     } else {
       int const cLevel = FUZZ_dataProducer_int32Range(producer, kMinClevel, kMaxClevel);
 
@@ -42,14 +45,33 @@ static size_t roundTripTest(void *result, size_t resultCapacity,
             cctx, compressed, compressedCapacity, src, srcSize, cLevel);
     }
     FUZZ_ZASSERT(cSize);
-    return ZSTD_decompressDCtx(dctx, result, resultCapacity, compressed, cSize);
+    dSize = ZSTD_decompressDCtx(dctx, result, resultCapacity, compressed, cSize);
+    FUZZ_ZASSERT(dSize);
+    /* When superblock is enabled make sure we don't expand the block more than expected. */
+    if (targetCBlockSize != 0) {
+        size_t normalCSize;
+        FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, ZSTD_c_targetCBlockSize, 0));
+        normalCSize = ZSTD_compress2(cctx, compressed, compressedCapacity, src, srcSize);
+        FUZZ_ZASSERT(normalCSize);
+        {
+            size_t const bytesPerBlock = 3 /* block header */
+                + 5 /* Literal header */
+                + 6 /* Huffman jump table */
+                + 3 /* number of sequences */
+                + 1 /* symbol compression modes */;
+            size_t const expectedExpansion = bytesPerBlock * (1 + (normalCSize / MAX(1, targetCBlockSize)));
+            size_t const allowedExpansion = (srcSize >> 4) + 3 * expectedExpansion + 10;
+            FUZZ_ASSERT(cSize <= normalCSize + allowedExpansion);
+        }
+    }
+    return dSize;
 }
 
 int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
 {
     size_t const rBufSize = size;
     void* rBuf = malloc(rBufSize);
-    size_t cBufSize = ZSTD_compressBound(size) * 2;
+    size_t cBufSize = ZSTD_compressBound(size);
     void* cBuf;
 
     /* Give a random portion of src data to the producer, to use for
diff --git a/tests/fuzzer.c b/tests/fuzzer.c
index 416df24d..700cb577 100644
--- a/tests/fuzzer.c
+++ b/tests/fuzzer.c
@@ -708,8 +708,8 @@ static int basicUnitTests(U32 const seed, double compressibility)
           for (read = 0; read < streamCompressThreshold; read += streamCompressDelta) {
             ZSTD_inBuffer in = {src, streamCompressDelta, 0};
             ZSTD_outBuffer out = {dst, dstCapacity, 0};
-            assert(!ZSTD_isError(ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_continue)));
-            assert(!ZSTD_isError(ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_end)));
+            CHECK_Z(ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_continue));
+            CHECK_Z(ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_end));
             src += streamCompressDelta; srcSize -= streamCompressDelta;
             dst += out.pos; dstCapacity -= out.pos;}}
 
@@ -717,7 +717,35 @@ static int basicUnitTests(U32 const seed, double compressibility)
 
         { ZSTD_inBuffer in = {src, srcSize, 0};
           ZSTD_outBuffer out = {dst, dstCapacity, 0};
-          assert(!ZSTD_isError(ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_end)));}
+          CHECK_Z(ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_end));}
+        ZSTD_freeCCtx(cctx);
+    }
+    DISPLAYLEVEL(3, "OK \n");
+
+    DISPLAYLEVEL(3, "test%3d: superblock with no literals : ", testNb++);
+    /* Generate the same data 20 times over */
+    {
+        size_t const avgChunkSize = CNBuffSize / 20;
+        size_t b;
+        for (b = 0; b < CNBuffSize; b += avgChunkSize) {
+            size_t const chunkSize = MIN(CNBuffSize - b, avgChunkSize);
+            RDG_genBuffer((char*)CNBuffer + b, chunkSize, compressibility, 0. /* auto */, seed);
+        }
+    }
+    {
+        ZSTD_CCtx* const cctx = ZSTD_createCCtx();
+        size_t const normalCSize = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, CNBuffSize);
+        size_t const allowedExpansion = (CNBuffSize * 3 / 1000);
+        size_t superCSize;
+        CHECK_Z(normalCSize);
+        ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, 19);
+        ZSTD_CCtx_setParameter(cctx, ZSTD_c_targetCBlockSize, 1000);
+        superCSize = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, CNBuffSize);
+        CHECK_Z(superCSize);
+        if (superCSize > normalCSize + allowedExpansion) {
+            DISPLAYLEVEL(1, "Superblock too big: %u > %u + %u \n", (U32)superCSize, (U32)normalCSize, (U32)allowedExpansion);
+            goto _output_error;
+        }
         ZSTD_freeCCtx(cctx);
     }
     DISPLAYLEVEL(3, "OK \n");