Merge branch 'dev' of github.com:facebook/zstd into dev

2017-08-11 10:17:49 -07:00 · 2017-08-11 10:17:49 -07:00 · 0ab9d55e7a
commit 0ab9d55e7a
parent e94fd71bbd cae4024889
33 changed files with 2906 additions and 214 deletions
--- a/5
+++ b/5
@ -142,6 +142,11 @@ gcc6build: clean
 	gcc-6 -v
 	CC=gcc-6 $(MAKE) all MOREFLAGS="-Werror"

+.PHONY: gcc7build
+gcc7build: clean
+	gcc-7 -v
+	CC=gcc-7 $(MAKE) all MOREFLAGS="-Werror"
+
 .PHONY: clangbuild
 clangbuild: clean
 	clang -v
--- a/circle.yml
+++ b/circle.yml
@ -3,7 +3,7 @@ dependencies:
    - sudo dpkg --add-architecture i386
    - sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test; sudo apt-get -y -qq update
    - sudo apt-get -y install gcc-powerpc-linux-gnu gcc-arm-linux-gnueabi libc6-dev-armel-cross gcc-aarch64-linux-gnu libc6-dev-arm64-cross
-    - sudo apt-get -y install libstdc++-6-dev clang gcc g++ gcc-5 gcc-6 zlib1g-dev liblzma-dev
+    - sudo apt-get -y install libstdc++-7-dev clang gcc g++ gcc-5 gcc-6 gcc-7 zlib1g-dev liblzma-dev
    - sudo apt-get -y install linux-libc-dev:i386 libc6-dev-i386

 test:
@ -45,7 +45,7 @@ test:
        parallel: true
    - ? |
        if [[ "$CIRCLE_NODE_INDEX" == "0" ]]                                    ; then make ppc64build   && make clean; fi &&
-        if [[ "$CIRCLE_NODE_TOTAL" < "2" ]] || [[ "$CIRCLE_NODE_INDEX" == "1" ]]; then true              && make clean; fi #could add another test here
+        if [[ "$CIRCLE_NODE_TOTAL" < "2" ]] || [[ "$CIRCLE_NODE_INDEX" == "1" ]]; then make gcc7build    && make clean; fi #could add another test here
      :
        parallel: true
    - ? |
@ -64,7 +64,7 @@ test:
    #- gcc -v; make -C tests test32 MOREFLAGS="-I/usr/include/x86_64-linux-gnu" && make clean
    #- make uasan               && make clean
    #- make asan32              && make clean
-    #- make -C tests test32 CC=clang MOREFLAGS="-g -fsanitize=address -I/usr/include/x86_64-linux-gnu" 
+    #- make -C tests test32 CC=clang MOREFLAGS="-g -fsanitize=address -I/usr/include/x86_64-linux-gnu"
  # Valgrind tests
    #- CFLAGS="-O1 -g" make -C zlibWrapper valgrindTest && make clean
    #- make -C tests valgrindTest && make clean
--- a/contrib/adaptive-compression/Makefile
+++ b/contrib/adaptive-compression/Makefile
@ -6,6 +6,7 @@ ZSTDCOMP_FILES   := $(ZSTDDIR)/compress/*.c
 ZSTDDECOMP_FILES := $(ZSTDDIR)/decompress/*.c
 ZSTD_FILES  := $(ZSTDDECOMP_FILES) $(ZSTDCOMMON_FILES) $(ZSTDCOMP_FILES)

+MULTITHREAD_LDFLAGS = -pthread
 DEBUGFLAGS= -g -DZSTD_DEBUG=1
 CPPFLAGS += -I$(ZSTDDIR) -I$(ZSTDDIR)/common -I$(ZSTDDIR)/compress \
            -I$(ZSTDDIR)/dictBuilder -I$(ZSTDDIR)/deprecated -I$(PRGDIR)
@ -17,7 +18,7 @@ CFLAGS   += -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow                 \
            -Wredundant-decls
 CFLAGS   += $(DEBUGFLAGS)
 CFLAGS   += $(MOREFLAGS)
-FLAGS     = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS)
+FLAGS     = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) $(MULTITHREAD_LDFLAGS)

 all: adapt datagen

@ -28,7 +29,7 @@ adapt-debug: $(ZSTD_FILES) adapt.c
 	$(CC) $(FLAGS) -DDEBUG_MODE=2 $^ -o adapt

 datagen : $(PRGDIR)/datagen.c datagencli.c
-	$(CC)      $(FLAGS) $^ -o $@$(EXT)
+	$(CC)      $(FLAGS) $^ -o $@

 test-adapt-correctness: datagen adapt
 	@./test-correctness.sh
@ -45,3 +46,31 @@ clean:
 	@$(RM) -f tests/*.zst
 	@$(RM) -f tests/tmp*
 	@echo "finished cleaning"
+
+#-----------------------------------------------------------------------------
+# make install is validated only for Linux, OSX, BSD, Hurd and Solaris targets
+#-----------------------------------------------------------------------------
+ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS))
+
+ifneq (,$(filter $(shell uname),SunOS))
+INSTALL ?= ginstall
+else
+INSTALL ?= install
+endif
+
+PREFIX  ?= /usr/local
+DESTDIR ?=
+BINDIR  ?= $(PREFIX)/bin
+
+INSTALL_PROGRAM ?= $(INSTALL) -m 755
+
+install: adapt
+	@echo Installing binaries
+	@$(INSTALL) -d -m 755 $(DESTDIR)$(BINDIR)/
+	@$(INSTALL_PROGRAM) adapt $(DESTDIR)$(BINDIR)/zstd-adaptive
+	@echo zstd-adaptive installation completed
+
+uninstall:
+	@$(RM) $(DESTDIR)$(BINDIR)/zstd-adaptive
+	@echo zstd-adaptive programs successfully uninstalled
+endif
--- a/contrib/adaptive-compression/adapt.c
+++ b/contrib/adaptive-compression/adapt.c
@ -42,6 +42,8 @@ static size_t g_streamedSize = 0;
 static unsigned g_useProgressBar = 1;
 static UTIL_freq_t g_ticksPerSecond;
 static unsigned g_forceCompressionLevel = 0;
+static unsigned g_minCLevel = 1;
+static unsigned g_maxCLevel;

 typedef struct {
    void* start;
@ -57,7 +59,6 @@ typedef struct {
 typedef struct {
    buffer_t src;
    buffer_t dst;
-    unsigned compressionLevel;
    unsigned jobID;
    unsigned lastJobPlusOne;
    size_t compressedSize;
@ -76,7 +77,6 @@ typedef struct {

 typedef struct {
    unsigned compressionLevel;
-    unsigned numActiveThreads;
    unsigned numJobs;
    unsigned nextJobID;
    unsigned threadError;
@ -139,6 +139,7 @@ typedef struct {
    mutex_t compressionCompletion_mutex;
    mutex_t createCompletion_mutex;
    mutex_t writeCompletion_mutex;
+    mutex_t compressionLevel_mutex;
    size_t lastDictSize;
    inBuff_t input;
    jobDescription* jobs;
@ -200,6 +201,7 @@ static int freeCCtx(adaptCCtx* ctx)
        error |= destroyMutex(&ctx->compressionCompletion_mutex);
        error |= destroyMutex(&ctx->createCompletion_mutex);
        error |= destroyMutex(&ctx->writeCompletion_mutex);
+        error |= destroyMutex(&ctx->compressionLevel_mutex);
        error |= ZSTD_isError(ZSTD_freeCCtx(ctx->cctx));
        free(ctx->input.buffer.start);
        if (ctx->jobs){
@ -241,6 +243,7 @@ static int initCCtx(adaptCCtx* ctx, unsigned numJobs)
        pthreadError |= initMutex(&ctx->compressionCompletion_mutex);
        pthreadError |= initMutex(&ctx->createCompletion_mutex);
        pthreadError |= initMutex(&ctx->writeCompletion_mutex);
+        pthreadError |= initMutex(&ctx->compressionLevel_mutex);
        if (pthreadError) return pthreadError;
    }
    ctx->numJobs = numJobs;
@ -331,7 +334,7 @@ static void signalErrorToThreads(adaptCCtx* ctx)
    pthread_mutex_unlock(&ctx->jobReady_mutex.pMutex);

    pthread_mutex_lock(&ctx->jobCompressed_mutex.pMutex);
-    pthread_cond_signal(&ctx->jobCompressed_cond.pCond);
+    pthread_cond_broadcast(&ctx->jobCompressed_cond.pCond);
    pthread_mutex_unlock(&ctx->jobReady_mutex.pMutex);

    pthread_mutex_lock(&ctx->jobWrite_mutex.pMutex);
@ -382,16 +385,22 @@ static void adaptCompressionLevel(adaptCCtx* ctx)
    double compressWaitWriteCompletion;
    double writeWaitCompressionCompletion;
    double const threshold = 0.00001;
-    unsigned const prevCompressionLevel = ctx->compressionLevel;
+    unsigned prevCompressionLevel;
+
+    pthread_mutex_lock(&ctx->compressionLevel_mutex.pMutex);
+    prevCompressionLevel = ctx->compressionLevel;
+    pthread_mutex_unlock(&ctx->compressionLevel_mutex.pMutex);


    if (g_forceCompressionLevel) {
+        pthread_mutex_lock(&ctx->compressionLevel_mutex.pMutex);
        ctx->compressionLevel = g_compressionLevel;
+        pthread_mutex_unlock(&ctx->compressionLevel_mutex.pMutex);
        return;
    }


-    DEBUG(2, "adapting compression level %u\n", ctx->compressionLevel);
+    DEBUG(2, "adapting compression level %u\n", prevCompressionLevel);

    /* read and reset completion measurements */
    pthread_mutex_lock(&ctx->compressionCompletion_mutex.pMutex);
@ -412,6 +421,8 @@ static void adaptCompressionLevel(adaptCCtx* ctx)
    pthread_mutex_unlock(&ctx->createCompletion_mutex.pMutex);
    DEBUG(2, "convergence counter: %u\n", ctx->convergenceCounter);

+    assert(g_minCLevel <= prevCompressionLevel && g_maxCLevel >= prevCompressionLevel);
+
    /* adaptation logic */
    if (ctx->cooldown) ctx->cooldown--;

@ -420,14 +431,16 @@ static void adaptCompressionLevel(adaptCCtx* ctx)
        /* use whichever one waited less because it was slower */
        double const completion = MAX(createWaitCompressionCompletion, writeWaitCompressionCompletion);
        unsigned const change = convertCompletionToChange(completion);
-        unsigned const boundChange = MIN(change, ctx->compressionLevel - 1);
+        unsigned const boundChange = MIN(change, prevCompressionLevel - g_minCLevel);
        if (ctx->convergenceCounter >= CONVERGENCE_LOWER_BOUND && boundChange != 0) {
            /* reset convergence counter, might have been a spike */
            ctx->convergenceCounter = 0;
            DEBUG(2, "convergence counter reset, no change applied\n");
        }
        else if (boundChange != 0) {
+            pthread_mutex_lock(&ctx->compressionLevel_mutex.pMutex);
            ctx->compressionLevel -= boundChange;
+            pthread_mutex_unlock(&ctx->compressionLevel_mutex.pMutex);
            ctx->cooldown = CLEVEL_DECREASE_COOLDOWN;
            ctx->convergenceCounter = 1;

@ -438,14 +451,16 @@ static void adaptCompressionLevel(adaptCCtx* ctx)
        /* compress waiting on write */
        double const completion = MIN(compressWaitWriteCompletion, compressWaitCreateCompletion);
        unsigned const change = convertCompletionToChange(completion);
-        unsigned const boundChange = MIN(change, ZSTD_maxCLevel() - ctx->compressionLevel);
+        unsigned const boundChange = MIN(change, g_maxCLevel - prevCompressionLevel);
        if (ctx->convergenceCounter >= CONVERGENCE_LOWER_BOUND && boundChange != 0) {
            /* reset convergence counter, might have been a spike */
            ctx->convergenceCounter = 0;
            DEBUG(2, "convergence counter reset, no change applied\n");
        }
        else if (boundChange != 0) {
+            pthread_mutex_lock(&ctx->compressionLevel_mutex.pMutex);
            ctx->compressionLevel += boundChange;
+            pthread_mutex_unlock(&ctx->compressionLevel_mutex.pMutex);
            ctx->cooldown = 0;
            ctx->convergenceCounter = 1;

@ -454,9 +469,11 @@ static void adaptCompressionLevel(adaptCCtx* ctx)

    }

+    pthread_mutex_lock(&ctx->compressionLevel_mutex.pMutex);
    if (ctx->compressionLevel == prevCompressionLevel) {
        ctx->convergenceCounter++;
    }
+    pthread_mutex_unlock(&ctx->compressionLevel_mutex.pMutex);
 }

 static size_t getUseableDictSize(unsigned compressionLevel)
@ -536,15 +553,23 @@ static void* compressionThread(void* arg)
        /* adapt compression level */
        if (currJob) adaptCompressionLevel(ctx);

+        pthread_mutex_lock(&ctx->compressionLevel_mutex.pMutex);
        DEBUG(2, "job %u compressed with level %u\n", currJob, ctx->compressionLevel);
+        pthread_mutex_unlock(&ctx->compressionLevel_mutex.pMutex);
+
        /* compress the data */
        {
            size_t const compressionBlockSize = ZSTD_BLOCKSIZE_MAX; /* 128 KB */
-            unsigned const cLevel = ctx->compressionLevel;
+            unsigned cLevel;
            unsigned blockNum = 0;
            size_t remaining = job->src.size;
            size_t srcPos = 0;
            size_t dstPos = 0;
+
+            pthread_mutex_lock(&ctx->compressionLevel_mutex.pMutex);
+            cLevel = ctx->compressionLevel;
+            pthread_mutex_unlock(&ctx->compressionLevel_mutex.pMutex);
+
            /* reset compressed size */
            job->compressedSize = 0;
            DEBUG(2, "calling ZSTD_compressBegin()\n");
@ -619,18 +644,20 @@ static void* compressionThread(void* arg)

 static void displayProgress(unsigned cLevel, unsigned last)
 {
-    if (!g_useProgressBar) return;
    UTIL_time_t currTime;
    UTIL_getTime(&currTime);
-    double const timeElapsed = (double)(UTIL_getSpanTimeMicro(g_ticksPerSecond, g_startTime, currTime) / 1000.0);
-    double const sizeMB = (double)g_streamedSize / (1 << 20);
-    double const avgCompRate = sizeMB * 1000 / timeElapsed;
-    fprintf(stderr, "\r| Comp. Level: %2u | Time Elapsed: %7.2f s | Data Size: %7.1f MB | Avg Comp. Rate: %6.2f MB/s |", cLevel, timeElapsed/1000.0, sizeMB, avgCompRate);
-    if (last) {
-        fprintf(stderr, "\n");
-    }
-    else {
-        fflush(stderr);
+    if (!g_useProgressBar) return;
+    {
+        double const timeElapsed = (double)(UTIL_getSpanTimeMicro(g_ticksPerSecond, g_startTime, currTime) / 1000.0);
+        double const sizeMB = (double)g_streamedSize / (1 << 20);
+        double const avgCompRate = sizeMB * 1000 / timeElapsed;
+        fprintf(stderr, "\r| Comp. Level: %2u | Time Elapsed: %7.2f s | Data Size: %7.1f MB | Avg Comp. Rate: %6.2f MB/s |", cLevel, timeElapsed/1000.0, sizeMB, avgCompRate);
+        if (last) {
+            fprintf(stderr, "\n");
+        }
+        else {
+            fflush(stderr);
+        }
    }
 }

@ -706,7 +733,13 @@ static void* outputThread(void* arg)
                }
            }
        }
-        displayProgress(ctx->compressionLevel, job->lastJobPlusOne == currJob + 1);
+        {
+            unsigned cLevel;
+            pthread_mutex_lock(&ctx->compressionLevel_mutex.pMutex);
+            cLevel = ctx->compressionLevel;
+            pthread_mutex_unlock(&ctx->compressionLevel_mutex.pMutex);
+            displayProgress(cLevel, job->lastJobPlusOne == currJob + 1);
+        }
        pthread_mutex_lock(&ctx->jobWrite_mutex.pMutex);
        ctx->jobWriteID++;
        pthread_cond_signal(&ctx->jobWrite_cond.pCond);
@ -734,7 +767,6 @@ static int createCompressionJob(adaptCCtx* ctx, size_t srcSize, int last)
    jobDescription* const job = &ctx->jobs[nextJobIndex];


-    job->compressionLevel = ctx->compressionLevel;
    job->src.size = srcSize;
    job->jobID = nextJob;
    if (last) job->lastJobPlusOne = nextJob + 1;
@ -779,6 +811,11 @@ static int performCompression(adaptCCtx* ctx, FILE* const srcFile, outputThreadA
            signalErrorToThreads(ctx);
            return 1;
        }
+        else if (pthread_detach(out)) {
+        	DISPLAY("Error: could not detach output thread\n");
+        	signalErrorToThreads(ctx);
+        	return 1;
+        }
    }

    /* create compression thread */
@ -789,6 +826,11 @@ static int performCompression(adaptCCtx* ctx, FILE* const srcFile, outputThreadA
            signalErrorToThreads(ctx);
            return 1;
        }
+        else if (pthread_detach(compression)) {
+        	DISPLAY("Error: could not detach compression thread\n");
+        	signalErrorToThreads(ctx);
+        	return 1;
+        }
    }
    {
        unsigned currJob = 0;
@ -928,9 +970,9 @@ static int freeFileCompressionResources(fcResources* fcr)
 static int compressFilename(const char* const srcFilename, const char* const dstFilenameOrNull)
 {
    int ret = 0;
+    fcResources fcr = createFileCompressionResources(srcFilename, dstFilenameOrNull);
    UTIL_getTime(&g_startTime);
    g_streamedSize = 0;
-    fcResources fcr = createFileCompressionResources(srcFilename, dstFilenameOrNull);
    ret |= performCompression(fcr.ctx, fcr.srcFile, fcr.otArg);
    ret |= freeFileCompressionResources(&fcr);
    return ret;
@ -973,19 +1015,21 @@ static unsigned readU32FromChar(const char** stringPtr)
    return result;
 }

-static void help()
+static void help(const char* progPath)
 {
    PRINT("Usage:\n");
-    PRINT("  ./multi [options] [file(s)]\n");
+    PRINT("  %s [options] [file(s)]\n", progPath);
    PRINT("\n");
    PRINT("Options:\n");
    PRINT("  -oFILE : specify the output file name\n");
-    PRINT("  -i#    : provide initial compression level\n");
+    PRINT("  -i#    : provide initial compression level -- default %d, must be in the range [L, U] where L and U are bound values (see below for defaults)\n", DEFAULT_COMPRESSION_LEVEL);
    PRINT("  -h     : display help/information\n");
    PRINT("  -f     : force the compression level to stay constant\n");
    PRINT("  -c     : force write to stdout\n");
    PRINT("  -p     : hide progress bar\n");
    PRINT("  -q     : quiet mode -- do not show progress bar or other information\n");
+    PRINT("  -l#    : provide lower bound for compression level -- default 1\n");
+    PRINT("  -u#    : provide upper bound for compression level -- default %u\n", ZSTD_maxCLevel());
 }
 /* return 0 if successful, else return error */
 int main(int argCount, const char* argv[])
@ -993,10 +1037,12 @@ int main(int argCount, const char* argv[])
    const char* outFilename = NULL;
    const char** filenameTable = (const char**)malloc(argCount*sizeof(const char*));
    unsigned filenameIdx = 0;
-    filenameTable[0] = stdinmark;
    unsigned forceStdout = 0;
+    unsigned providedInitialCLevel = 0;
    int ret = 0;
    int argNum;
+    filenameTable[0] = stdinmark;
+    g_maxCLevel = ZSTD_maxCLevel();

    UTIL_initTimer(&g_ticksPerSecond);

@ -1018,9 +1064,10 @@ int main(int argCount, const char* argv[])
                case 'i':
                    argument += 2;
                    g_compressionLevel = readU32FromChar(&argument);
+                    providedInitialCLevel = 1;
                    break;
                case 'h':
-                    help();
+                    help(argv[0]);
                    goto _main_exit;
                case 'p':
                    g_useProgressBar = 0;
@ -1036,6 +1083,14 @@ int main(int argCount, const char* argv[])
                    g_useProgressBar = 0;
                    g_displayLevel = 0;
                    break;
+                case 'l':
+                    argument += 2;
+                    g_minCLevel = readU32FromChar(&argument);
+                    break;
+                case 'u':
+                    argument += 2;
+                    g_maxCLevel = readU32FromChar(&argument);
+                    break;
                default:
                    DISPLAY("Error: invalid argument provided\n");
                    ret = 1;
@ -1048,6 +1103,20 @@ int main(int argCount, const char* argv[])
        filenameTable[filenameIdx++] = argument;
    }

+    /* check initial, max, and min compression levels */
+    {
+        unsigned const minMaxInconsistent = g_minCLevel > g_maxCLevel;
+        unsigned const initialNotInRange = g_minCLevel > g_compressionLevel || g_maxCLevel < g_compressionLevel;
+        if (minMaxInconsistent || (initialNotInRange && providedInitialCLevel)) {
+            DISPLAY("Error: provided compression level parameters are invalid\n");
+            ret = 1;
+            goto _main_exit;
+        }
+        else if (initialNotInRange) {
+            g_compressionLevel = g_minCLevel;
+        }
+    }
+
    /* error checking with number of files */
    if (filenameIdx > 1 && (outFilename != NULL && strcmp(outFilename, stdoutmark))) {
        DISPLAY("Error: multiple input files provided, cannot use specified output file\n");
--- a/contrib/adaptive-compression/test-correctness.sh
+++ b/contrib/adaptive-compression/test-correctness.sh
@ -242,4 +242,11 @@ echo -e "\ncorrectness tests -- window size test"
 ./datagen -s39 -g1GB | pv -L 25m -q | ./adapt -i1 | pv -q > tmp.zst
 zstd -d tmp.zst
 rm tmp*
+
+echo -e "\ncorrectness tests -- testing bounds"
+./datagen -s40 -g1GB | pv -L 25m -q | ./adapt -i1 -u4 | pv -q > tmp.zst
+rm tmp*
+
+./datagen -s41 -g1GB | ./adapt -i14 -l4 > tmp.zst
+rm tmp*
 make clean
--- a/contrib/linux-kernel/0000-cover-letter.patch
+++ b/contrib/linux-kernel/0000-cover-letter.patch
@ -1,7 +1,7 @@
-From 0cd63464d182bb9708f8b25f7da3dc8e5ec6b4fa Mon Sep 17 00:00:00 2001
+From 308795a7713ca6fcd468b60fba9a2fca99cee6a0 Mon Sep 17 00:00:00 2001
 From: Nick Terrell <terrelln@fb.com>
-Date: Thu, 20 Jul 2017 13:18:30 -0700
-Subject: [PATCH v3 0/4] Add xxhash and zstd modules
+Date: Tue, 8 Aug 2017 19:20:25 -0700
+Subject: [PATCH v5 0/5] Add xxhash and zstd modules

 Hi all,

@ -16,27 +16,45 @@ Nick Terrell
 Changelog:

 v1 -> v2:
- Make pointer in lib/xxhash.c:394 non-const (1/4)
- Use div_u64() for division of u64s (2/4)
+- Make pointer in lib/xxhash.c:394 non-const (1/5)
+- Use div_u64() for division of u64s (2/5)
 - Reduce stack usage of ZSTD_compressSequences(), ZSTD_buildSeqTable(),
  ZSTD_decompressSequencesLong(), FSE_buildDTable(), FSE_decompress_wksp(),
  HUF_writeCTable(), HUF_readStats(), HUF_readCTable(),
-  HUF_compressWeights(), HUF_readDTableX2(), and HUF_readDTableX4() (2/4)
- No zstd function uses more than 400 B of stack space (2/4)
+  HUF_compressWeights(), HUF_readDTableX2(), and HUF_readDTableX4() (2/5)
+- No zstd function uses more than 400 B of stack space (2/5)

 v2 -> v3:
 - Work around gcc-7 bug https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81388
-  (2/4)
- Fix bug in dictionary compression from upstream commit cc1522351f (2/4)
- Port upstream BtrFS commits e1ddce71d6, 389a6cfc2a, and 6acafd1eff (3/4)
- Change default compression level for BtrFS to 3 (3/4)
+  (2/5)
+- Fix bug in dictionary compression from upstream commit cc1522351f (2/5)
+- Port upstream BtrFS commits e1ddce71d6, 389a6cfc2a, and 6acafd1eff (3/5)
+- Change default compression level for BtrFS to 3 (3/5)

-Nick Terrell (4):
+v3 -> v4:
+- Fix compiler warnings (2/5)
+- Add missing includes (3/5)
+- Fix minor linter warnings (3/5, 4/5)
+- Add crypto patch (5/5)
+
+v4 -> v5:
+- Fix rare compression bug from upstream commit 308047eb5d (2/5)
+- Fix bug introduced in v3 when working around the gcc-7 bug (2/5)
+- Fix ZSTD_DStream initialization code in squashfs (4/5)
+- Fix patch documentation for patches written by Sean Purcell (4/5)
+
+Nick Terrell (5):
  lib: Add xxhash module
  lib: Add zstd modules
  btrfs: Add zstd support
  squashfs: Add zstd support
+  crypto: Add zstd support

+ crypto/Kconfig             |    9 +
+ crypto/Makefile            |    1 +
+ crypto/testmgr.c           |   10 +
+ crypto/testmgr.h           |   71 +
+ crypto/zstd.c              |  265 ++++
 fs/btrfs/Kconfig           |    2 +
 fs/btrfs/Makefile          |    2 +-
 fs/btrfs/compression.c     |    1 +
@ -47,13 +65,13 @@ Nick Terrell (4):
 fs/btrfs/props.c           |    6 +
 fs/btrfs/super.c           |   12 +-
 fs/btrfs/sysfs.c           |    2 +
- fs/btrfs/zstd.c            |  435 ++++++
+ fs/btrfs/zstd.c            |  432 ++++++
 fs/squashfs/Kconfig        |   14 +
 fs/squashfs/Makefile       |    1 +
 fs/squashfs/decompressor.c |    7 +
 fs/squashfs/decompressor.h |    4 +
 fs/squashfs/squashfs_fs.h  |    1 +
- fs/squashfs/zstd_wrapper.c |  150 ++
+ fs/squashfs/zstd_wrapper.c |  151 ++
 include/linux/xxhash.h     |  236 +++
 include/linux/zstd.h       | 1157 +++++++++++++++
 include/uapi/linux/btrfs.h |    8 +-
@ -62,9 +80,9 @@ Nick Terrell (4):
 lib/xxhash.c               |  500 +++++++
 lib/zstd/Makefile          |   18 +
 lib/zstd/bitstream.h       |  374 +++++
- lib/zstd/compress.c        | 3479 ++++++++++++++++++++++++++++++++++++++++++++
- lib/zstd/decompress.c      | 2526 ++++++++++++++++++++++++++++++++
- lib/zstd/entropy_common.c  |  243 ++++
+ lib/zstd/compress.c        | 3484 ++++++++++++++++++++++++++++++++++++++++++++
+ lib/zstd/decompress.c      | 2528 ++++++++++++++++++++++++++++++++
+ lib/zstd/entropy_common.c  |  243 +++
 lib/zstd/error_private.h   |   53 +
 lib/zstd/fse.h             |  575 ++++++++
 lib/zstd/fse_compress.c    |  795 ++++++++++
@ -74,9 +92,10 @@ Nick Terrell (4):
 lib/zstd/huf_decompress.c  |  960 ++++++++++++
 lib/zstd/mem.h             |  151 ++
 lib/zstd/zstd_common.c     |   75 +
- lib/zstd/zstd_internal.h   |  250 ++++
+ lib/zstd/zstd_internal.h   |  263 ++++
 lib/zstd/zstd_opt.h        | 1014 +++++++++++++
- 39 files changed, 14382 insertions(+), 12 deletions(-)
+ 44 files changed, 14756 insertions(+), 12 deletions(-)
+ create mode 100644 crypto/zstd.c
 create mode 100644 fs/btrfs/zstd.c
 create mode 100644 fs/squashfs/zstd_wrapper.c
 create mode 100644 include/linux/xxhash.h
--- a/contrib/linux-kernel/0001-lib-Add-xxhash-module.patch
+++ b/contrib/linux-kernel/0001-lib-Add-xxhash-module.patch
@ -1,7 +1,7 @@
-From fc7f26acbabda35f1c61dfc357dbb207dc8ed23d Mon Sep 17 00:00:00 2001
+From a4b1ffb6e89bbccd519f9afa0910635668436105 Mon Sep 17 00:00:00 2001
 From: Nick Terrell <terrelln@fb.com>
 Date: Mon, 17 Jul 2017 17:07:18 -0700
-Subject: [PATCH v3 1/4] lib: Add xxhash module
+Subject: [PATCH v5 1/5] lib: Add xxhash module

 Adds xxhash kernel module with xxh32 and xxh64 hashes. xxhash is an
 extremely fast non-cryptographic hash algorithm for checksumming.
--- a/contrib/linux-kernel/0002-lib-Add-zstd-modules.patch
+++ b/contrib/linux-kernel/0002-lib-Add-zstd-modules.patch
@ -1,7 +1,7 @@
-From 686a6149b98250d66b5951e3ae05e79063e9de98 Mon Sep 17 00:00:00 2001
+From b7f044163968d724be55bf4841fd80babe036dc2 Mon Sep 17 00:00:00 2001
 From: Nick Terrell <terrelln@fb.com>
 Date: Mon, 17 Jul 2017 17:08:19 -0700
-Subject: [PATCH v3 2/4] lib: Add zstd modules
+Subject: [PATCH v5 2/5] lib: Add zstd modules

 Add zstd compression and decompression kernel modules.
 zstd offers a wide varity of compression speed and quality trade-offs.
@ -114,13 +114,20 @@ v2 -> v3:
 - Work around gcc-7 bug https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81388
 - Fix bug in dictionary compression from upstream commit cc1522351f

+v3 -> v4:
+- Fix minor compiler warnings
+
+v4 -> v5:
+- Fix rare compression bug from upstream commit 308047eb5d
+- Fix bug introduced in v3 when working around the gcc-7 bug
+
 include/linux/zstd.h      | 1157 +++++++++++++++
 lib/Kconfig               |    8 +
 lib/Makefile              |    2 +
 lib/zstd/Makefile         |   18 +
 lib/zstd/bitstream.h      |  374 +++++
- lib/zstd/compress.c       | 3479 +++++++++++++++++++++++++++++++++++++++++++++
- lib/zstd/decompress.c     | 2526 ++++++++++++++++++++++++++++++++
+ lib/zstd/compress.c       | 3484 +++++++++++++++++++++++++++++++++++++++++++++
+ lib/zstd/decompress.c     | 2528 ++++++++++++++++++++++++++++++++
 lib/zstd/entropy_common.c |  243 ++++
 lib/zstd/error_private.h  |   53 +
 lib/zstd/fse.h            |  575 ++++++++
@ -131,9 +138,9 @@ v2 -> v3:
 lib/zstd/huf_decompress.c |  960 +++++++++++++
 lib/zstd/mem.h            |  151 ++
 lib/zstd/zstd_common.c    |   75 +
- lib/zstd/zstd_internal.h  |  250 ++++
+ lib/zstd/zstd_internal.h  |  263 ++++
 lib/zstd/zstd_opt.h       | 1014 +++++++++++++
- 19 files changed, 12994 insertions(+)
+ 19 files changed, 13014 insertions(+)
 create mode 100644 include/linux/zstd.h
 create mode 100644 lib/zstd/Makefile
 create mode 100644 lib/zstd/bitstream.h
@ -1753,10 +1760,10 @@ index 0000000..a826b99
 +#endif /* BITSTREAM_H_MODULE */
 diff --git a/lib/zstd/compress.c b/lib/zstd/compress.c
 new file mode 100644
-index 0000000..d60ab7d
+index 0000000..f9166cf
 --- /dev/null
 +++ b/lib/zstd/compress.c
-@@ -0,0 +1,3479 @@
+@@ -0,0 +1,3484 @@
 +/**
 + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
 + * All rights reserved.
@ -2342,7 +2349,7 @@ index 0000000..d60ab7d
 +		mlCodeTable[seqStorePtr->longLengthPos] = MaxML;
 +}
 +
-+ZSTD_STATIC size_t ZSTD_compressSequences(ZSTD_CCtx *zc, void *dst, size_t dstCapacity, size_t srcSize)
+ZSTD_STATIC size_t ZSTD_compressSequences_internal(ZSTD_CCtx *zc, void *dst, size_t dstCapacity)
 +{
 +	const int longOffsets = zc->params.cParams.windowLog > STREAM_ACCUMULATOR_MIN;
 +	const seqStore_t *seqStorePtr = &(zc->seqStore);
@ -2395,7 +2402,7 @@ index 0000000..d60ab7d
 +	else
 +		op[0] = 0xFF, ZSTD_writeLE16(op + 1, (U16)(nbSeq - LONGNBSEQ)), op += 3;
 +	if (nbSeq == 0)
-+		goto _check_compressibility;
+		return op - ostart;
 +
 +	/* seqHead : flags for FSE encoding type */
 +	seqHead = op++;
@ -2585,28 +2592,33 @@ index 0000000..d60ab7d
 +			op += streamSize;
 +		}
 +	}
-+
-+/* check compressibility */
-+_check_compressibility:
-+	{
-+		size_t const minGain = ZSTD_minGain(srcSize);
-+		size_t const maxCSize = srcSize - minGain;
-+		if ((size_t)(op - ostart) >= maxCSize) {
-+			zc->flagStaticHufTable = HUF_repeat_none;
-+			return 0;
-+		}
-+	}
-+
-+	/* confirm repcodes */
-+	{
-+		int i;
-+		for (i = 0; i < ZSTD_REP_NUM; i++)
-+			zc->rep[i] = zc->repToConfirm[i];
-+	}
-+
 +	return op - ostart;
 +}
 +
+ZSTD_STATIC size_t ZSTD_compressSequences(ZSTD_CCtx *zc, void *dst, size_t dstCapacity, size_t srcSize)
+{
+	size_t const cSize = ZSTD_compressSequences_internal(zc, dst, dstCapacity);
+	size_t const minGain = ZSTD_minGain(srcSize);
+	size_t const maxCSize = srcSize - minGain;
+	/* If the srcSize <= dstCapacity, then there is enough space to write a
+	 * raw uncompressed block. Since we ran out of space, the block must not
+	 * be compressible, so fall back to a raw uncompressed block.
+	 */
+	int const uncompressibleError = cSize == ERROR(dstSize_tooSmall) && srcSize <= dstCapacity;
+	int i;
+
+	if (ZSTD_isError(cSize) && !uncompressibleError)
+		return cSize;
+	if (cSize >= maxCSize || uncompressibleError) {
+		zc->flagStaticHufTable = HUF_repeat_none;
+		return 0;
+	}
+	/* confirm repcodes */
+	for (i = 0; i < ZSTD_REP_NUM; i++)
+		zc->rep[i] = zc->repToConfirm[i];
+	return cSize;
+}
+
 +/*! ZSTD_storeSeq() :
 +	Store a sequence (literal length, literals, offset code and match length code) into seqStore_t.
 +	`offsetCode` : distance to match, or 0 == repCode.
@ -5238,10 +5250,10 @@ index 0000000..d60ab7d
 +MODULE_DESCRIPTION("Zstd Compressor");
 diff --git a/lib/zstd/decompress.c b/lib/zstd/decompress.c
 new file mode 100644
-index 0000000..62449ae
+index 0000000..b178467
 --- /dev/null
 +++ b/lib/zstd/decompress.c
-@@ -0,0 +1,2526 @@
+@@ -0,0 +1,2528 @@
 +/**
 + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
 + * All rights reserved.
@ -6242,6 +6254,8 @@ index 0000000..62449ae
 +		BIT_reloadDStream(&seqState->DStream);		   /* <= 18 bits */
 +	FSE_updateState(&seqState->stateOffb, &seqState->DStream); /* <=  8 bits */
 +
+	seq.match = NULL;
+
 +	return seq;
 +}
 +
@ -11996,10 +12010,10 @@ index 0000000..a282624
 +}
 diff --git a/lib/zstd/zstd_internal.h b/lib/zstd/zstd_internal.h
 new file mode 100644
-index 0000000..f0ba474
+index 0000000..1a79fab
 --- /dev/null
 +++ b/lib/zstd/zstd_internal.h
-@@ -0,0 +1,250 @@
+@@ -0,0 +1,263 @@
 +/**
 + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
 + * All rights reserved.
@ -12128,7 +12142,7 @@ index 0000000..f0ba474
 +/*-*******************************************
 +*  Shared functions to include for inlining
 +*********************************************/
-+static void ZSTD_copy8(void *dst, const void *src) {
+ZSTD_STATIC void ZSTD_copy8(void *dst, const void *src) {
 +	memcpy(dst, src, 8);
 +}
 +/*! ZSTD_wildcopy() :
@ -12136,8 +12150,21 @@ index 0000000..f0ba474
 +#define WILDCOPY_OVERLENGTH 8
 +ZSTD_STATIC void ZSTD_wildcopy(void *dst, const void *src, ptrdiff_t length)
 +{
-+	if (length > 0)
-+		memcpy(dst, src, length);
+	const BYTE* ip = (const BYTE*)src;
+	BYTE* op = (BYTE*)dst;
+	BYTE* const oend = op + length;
+	/* Work around https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81388.
+	 * Avoid the bad case where the loop only runs once by handling the
+	 * special case separately. This doesn't trigger the bug because it
+	 * doesn't involve pointer/integer overflow.
+	 */
+	if (length <= 8)
+		return ZSTD_copy8(dst, src);
+	do {
+		ZSTD_copy8(op, ip);
+		op += 8;
+		ip += 8;
+	} while (op < oend);
 +}
 +
 +/*-*******************************************
--- a/contrib/linux-kernel/0003-btrfs-Add-zstd-support.patch
+++ b/contrib/linux-kernel/0003-btrfs-Add-zstd-support.patch
@ -1,7 +1,7 @@
-From b0ef8fc63c9ca251ceca632f53aa1de8f1f17772 Mon Sep 17 00:00:00 2001
+From 8a9dddfbf6551afea73911e367dd4be64d62b9fd Mon Sep 17 00:00:00 2001
 From: Nick Terrell <terrelln@fb.com>
 Date: Mon, 17 Jul 2017 17:08:39 -0700
-Subject: [PATCH v3 3/4] btrfs: Add zstd support
+Subject: [PATCH v5 3/5] btrfs: Add zstd support

 Add zstd compression and decompression support to BtrFS. zstd at its
 fastest level compresses almost as well as zlib, while offering much
@ -67,6 +67,10 @@ v2 -> v3:
 - Port upstream BtrFS commits e1ddce71d6, 389a6cfc2a, and 6acafd1eff
 - Change default compression level for BtrFS to 3

+v3 -> v4:
+- Add missing includes, which fixes the aarch64 build
+- Fix minor linter warnings
+
 fs/btrfs/Kconfig           |   2 +
 fs/btrfs/Makefile          |   2 +-
 fs/btrfs/compression.c     |   1 +
@ -77,9 +81,9 @@ v2 -> v3:
 fs/btrfs/props.c           |   6 +
 fs/btrfs/super.c           |  12 +-
 fs/btrfs/sysfs.c           |   2 +
- fs/btrfs/zstd.c            | 435 +++++++++++++++++++++++++++++++++++++++++++++
+ fs/btrfs/zstd.c            | 432 +++++++++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/btrfs.h |   8 +-
- 12 files changed, 471 insertions(+), 12 deletions(-)
+ 12 files changed, 468 insertions(+), 12 deletions(-)
 create mode 100644 fs/btrfs/zstd.c

 diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig
@ -277,10 +281,10 @@ index c2d5f35..2b6d37c 100644
 	BTRFS_FEAT_ATTR_PTR(raid56),
 diff --git a/fs/btrfs/zstd.c b/fs/btrfs/zstd.c
 new file mode 100644
-index 0000000..1822068
+index 0000000..607ce47
 --- /dev/null
 +++ b/fs/btrfs/zstd.c
-@@ -0,0 +1,435 @@
+@@ -0,0 +1,432 @@
 +/*
 + * Copyright (c) 2016-present, Facebook, Inc.
 + * All rights reserved.
@ -293,20 +297,16 @@ index 0000000..1822068
 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 + * General Public License for more details.
-+ *
-+ * You should have received a copy of the GNU General Public
-+ * License along with this program; if not, write to the
-+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
-+ * Boston, MA 021110-1307, USA.
 + */
-+#include <linux/kernel.h>
-+#include <linux/slab.h>
-+#include <linux/vmalloc.h>
-+#include <linux/init.h>
-+#include <linux/err.h>
-+#include <linux/sched.h>
-+#include <linux/pagemap.h>
 +#include <linux/bio.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/refcount.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
 +#include <linux/zstd.h>
 +#include "compression.h"
 +
@ -316,7 +316,8 @@ index 0000000..1822068
 +
 +static ZSTD_parameters zstd_get_btrfs_parameters(size_t src_len)
 +{
-+	ZSTD_parameters params = ZSTD_getParams(ZSTD_BTRFS_DEFAULT_LEVEL, src_len, 0);
+	ZSTD_parameters params = ZSTD_getParams(ZSTD_BTRFS_DEFAULT_LEVEL,
+						src_len, 0);
 +
 +	if (params.cParams.windowLog > ZSTD_BTRFS_MAX_WINDOWLOG)
 +		params.cParams.windowLog = ZSTD_BTRFS_MAX_WINDOWLOG;
--- a/contrib/linux-kernel/0004-squashfs-Add-zstd-support.patch
+++ b/contrib/linux-kernel/0004-squashfs-Add-zstd-support.patch
@ -1,7 +1,7 @@
-From 0cd63464d182bb9708f8b25f7da3dc8e5ec6b4fa Mon Sep 17 00:00:00 2001
-From: Nick Terrell <terrelln@fb.com>
+From 46bf8f6d30d6ddf2446c110f122482b5e5e16933 Mon Sep 17 00:00:00 2001
+From: Sean Purcell <me@seanp.xyz>
 Date: Mon, 17 Jul 2017 17:08:59 -0700
-Subject: [PATCH v3 4/4] squashfs: Add zstd support
+Subject: [PATCH v5 4/5] squashfs: Add zstd support

 Add zstd compression and decompression support to SquashFS. zstd is a
 great fit for SquashFS because it can compress at ratios approaching xz,
@ -42,16 +42,23 @@ taking over the submission process.

 zstd source repository: https://github.com/facebook/zstd

-Cc: Sean Purcell <me@seanp.xyz>
+Signed-off-by: Sean Purcell <me@seanp.xyz>
 Signed-off-by: Nick Terrell <terrelln@fb.com>
 ---
+v3 -> v4:
+- Fix minor linter warnings
+
+v4 -> v5:
+- Fix ZSTD_DStream initialization code in squashfs
+- Fix patch documentation to reflect that Sean Purcell is the author
+
 fs/squashfs/Kconfig        |  14 +++++
 fs/squashfs/Makefile       |   1 +
 fs/squashfs/decompressor.c |   7 +++
 fs/squashfs/decompressor.h |   4 ++
 fs/squashfs/squashfs_fs.h  |   1 +
- fs/squashfs/zstd_wrapper.c | 150 +++++++++++++++++++++++++++++++++++++++++++++
- 6 files changed, 177 insertions(+)
+ fs/squashfs/zstd_wrapper.c | 151 +++++++++++++++++++++++++++++++++++++++++++++
+ 6 files changed, 178 insertions(+)
 create mode 100644 fs/squashfs/zstd_wrapper.c

 diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig
@ -140,10 +147,10 @@ index 506f4ba..24d12fd 100644
 	__le32			s_magic;
 diff --git a/fs/squashfs/zstd_wrapper.c b/fs/squashfs/zstd_wrapper.c
 new file mode 100644
-index 0000000..8cb7c76
+index 0000000..eeaabf8
 --- /dev/null
 +++ b/fs/squashfs/zstd_wrapper.c
-@@ -0,0 +1,150 @@
+@@ -0,0 +1,151 @@
 +/*
 + * Squashfs - a compressed read only filesystem for Linux
 + *
@ -160,10 +167,6 @@ index 0000000..8cb7c76
 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 + * GNU General Public License for more details.
 + *
-+ * You should have received a copy of the GNU General Public License
-+ * along with this program; if not, write to the Free Software
-+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
-+ *
 + * zstd_wrapper.c
 + */
 +
@ -182,15 +185,18 @@ index 0000000..8cb7c76
 +struct workspace {
 +	void *mem;
 +	size_t mem_size;
+	size_t window_size;
 +};
 +
 +static void *zstd_init(struct squashfs_sb_info *msblk, void *buff)
 +{
 +	struct workspace *wksp = kmalloc(sizeof(*wksp), GFP_KERNEL);
+
 +	if (wksp == NULL)
 +		goto failed;
-+	wksp->mem_size = ZSTD_DStreamWorkspaceBound(max_t(size_t,
-+				msblk->block_size, SQUASHFS_METADATA_SIZE));
+	wksp->window_size = max_t(size_t,
+			msblk->block_size, SQUASHFS_METADATA_SIZE);
+	wksp->mem_size = ZSTD_DStreamWorkspaceBound(wksp->window_size);
 +	wksp->mem = vmalloc(wksp->mem_size);
 +	if (wksp->mem == NULL)
 +		goto failed;
@ -226,7 +232,7 @@ index 0000000..8cb7c76
 +	ZSTD_inBuffer in_buf = { NULL, 0, 0 };
 +	ZSTD_outBuffer out_buf = { NULL, 0, 0 };
 +
-+	stream = ZSTD_initDStream(wksp->mem_size, wksp->mem, wksp->mem_size);
+	stream = ZSTD_initDStream(wksp->window_size, wksp->mem, wksp->mem_size);
 +
 +	if (!stream) {
 +		ERROR("Failed to initialize zstd decompressor\n");
@ -239,6 +245,7 @@ index 0000000..8cb7c76
 +	do {
 +		if (in_buf.pos == in_buf.size && k < b) {
 +			int avail = min(length, msblk->devblksize - offset);
+
 +			length -= avail;
 +			in_buf.src = bh[k]->b_data + offset;
 +			in_buf.size = avail;
@ -249,8 +256,9 @@ index 0000000..8cb7c76
 +		if (out_buf.pos == out_buf.size) {
 +			out_buf.dst = squashfs_next_page(output);
 +			if (out_buf.dst == NULL) {
-+				/* shouldn't run out of pages before stream is
-+				 * done */
+				/* Shouldn't run out of pages
+				 * before stream is done.
+				 */
 +				squashfs_finish_page(output);
 +				goto out;
 +			}
--- a/contrib/linux-kernel/0005-crypto-Add-zstd-support.patch
+++ b/contrib/linux-kernel/0005-crypto-Add-zstd-support.patch
@ -0,0 +1,424 @@
+From 308795a7713ca6fcd468b60fba9a2fca99cee6a0 Mon Sep 17 00:00:00 2001
+From: Nick Terrell <terrelln@fb.com>
+Date: Wed, 2 Aug 2017 18:02:13 -0700
+Subject: [PATCH v5 5/5] crypto: Add zstd support
+
+Adds zstd support to crypto and scompress. Only supports the default
+level.
+
+Signed-off-by: Nick Terrell <terrelln@fb.com>
+---
+ crypto/Kconfig   |   9 ++
+ crypto/Makefile  |   1 +
+ crypto/testmgr.c |  10 +++
+ crypto/testmgr.h |  71 +++++++++++++++
+ crypto/zstd.c    | 265 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
+ 5 files changed, 356 insertions(+)
+ create mode 100644 crypto/zstd.c
+
+diff --git a/crypto/Kconfig b/crypto/Kconfig
+index caa770e..4fc3936 100644
+--- a/crypto/Kconfig
+++ b/crypto/Kconfig
+@@ -1662,6 +1662,15 @@ config CRYPTO_LZ4HC
+ 	help
+ 	  This is the LZ4 high compression mode algorithm.
+
+config CRYPTO_ZSTD
+	tristate "Zstd compression algorithm"
+	select CRYPTO_ALGAPI
+	select CRYPTO_ACOMP2
+	select ZSTD_COMPRESS
+	select ZSTD_DECOMPRESS
+	help
+	  This is the zstd algorithm.
+
+ comment "Random Number Generation"
+
+ config CRYPTO_ANSI_CPRNG
+diff --git a/crypto/Makefile b/crypto/Makefile
+index d41f033..b22e1e8 100644
+--- a/crypto/Makefile
+++ b/crypto/Makefile
+@@ -133,6 +133,7 @@ obj-$(CONFIG_CRYPTO_USER_API_HASH) += algif_hash.o
+ obj-$(CONFIG_CRYPTO_USER_API_SKCIPHER) += algif_skcipher.o
+ obj-$(CONFIG_CRYPTO_USER_API_RNG) += algif_rng.o
+ obj-$(CONFIG_CRYPTO_USER_API_AEAD) += algif_aead.o
+obj-$(CONFIG_CRYPTO_ZSTD) += zstd.o
+
+ ecdh_generic-y := ecc.o
+ ecdh_generic-y += ecdh.o
+diff --git a/crypto/testmgr.c b/crypto/testmgr.c
+index 7125ba3..8a124d3 100644
+--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
+@@ -3603,6 +3603,16 @@ static const struct alg_test_desc alg_test_descs[] = {
+ 				.decomp = __VECS(zlib_deflate_decomp_tv_template)
+ 			}
+ 		}
+	}, {
+		.alg = "zstd",
+		.test = alg_test_comp,
+		.fips_allowed = 1,
+		.suite = {
+			.comp = {
+				.comp = __VECS(zstd_comp_tv_template),
+				.decomp = __VECS(zstd_decomp_tv_template)
+			}
+		}
+ 	}
+ };
+
+diff --git a/crypto/testmgr.h b/crypto/testmgr.h
+index 6ceb0e2..e6b5920 100644
+--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
+@@ -34631,4 +34631,75 @@ static const struct comp_testvec lz4hc_decomp_tv_template[] = {
+ 	},
+ };
+
+static const struct comp_testvec zstd_comp_tv_template[] = {
+	{
+		.inlen	= 68,
+		.outlen	= 39,
+		.input	= "The algorithm is zstd. "
+			  "The algorithm is zstd. "
+			  "The algorithm is zstd.",
+		.output	= "\x28\xb5\x2f\xfd\x00\x50\xf5\x00\x00\xb8\x54\x68\x65"
+			  "\x20\x61\x6c\x67\x6f\x72\x69\x74\x68\x6d\x20\x69\x73"
+			  "\x20\x7a\x73\x74\x64\x2e\x20\x01\x00\x55\x73\x36\x01"
+			  ,
+	},
+	{
+		.inlen	= 244,
+		.outlen	= 151,
+		.input	= "zstd, short for Zstandard, is a fast lossless "
+			  "compression algorithm, targeting real-time "
+			  "compression scenarios at zlib-level and better "
+			  "compression ratios. The zstd compression library "
+			  "provides in-memory compression and decompression "
+			  "functions.",
+		.output	= "\x28\xb5\x2f\xfd\x00\x50\x75\x04\x00\x42\x4b\x1e\x17"
+			  "\x90\x81\x31\x00\xf2\x2f\xe4\x36\xc9\xef\x92\x88\x32"
+			  "\xc9\xf2\x24\x94\xd8\x68\x9a\x0f\x00\x0c\xc4\x31\x6f"
+			  "\x0d\x0c\x38\xac\x5c\x48\x03\xcd\x63\x67\xc0\xf3\xad"
+			  "\x4e\x90\xaa\x78\xa0\xa4\xc5\x99\xda\x2f\xb6\x24\x60"
+			  "\xe2\x79\x4b\xaa\xb6\x6b\x85\x0b\xc9\xc6\x04\x66\x86"
+			  "\xe2\xcc\xe2\x25\x3f\x4f\x09\xcd\xb8\x9d\xdb\xc1\x90"
+			  "\xa9\x11\xbc\x35\x44\x69\x2d\x9c\x64\x4f\x13\x31\x64"
+			  "\xcc\xfb\x4d\x95\x93\x86\x7f\x33\x7f\x1a\xef\xe9\x30"
+			  "\xf9\x67\xa1\x94\x0a\x69\x0f\x60\xcd\xc3\xab\x99\xdc"
+			  "\x42\xed\x97\x05\x00\x33\xc3\x15\x95\x3a\x06\xa0\x0e"
+			  "\x20\xa9\x0e\x82\xb9\x43\x45\x01",
+	},
+};
+
+static const struct comp_testvec zstd_decomp_tv_template[] = {
+	{
+		.inlen	= 43,
+		.outlen	= 68,
+		.input	= "\x28\xb5\x2f\xfd\x04\x50\xf5\x00\x00\xb8\x54\x68\x65"
+			  "\x20\x61\x6c\x67\x6f\x72\x69\x74\x68\x6d\x20\x69\x73"
+			  "\x20\x7a\x73\x74\x64\x2e\x20\x01\x00\x55\x73\x36\x01"
+			  "\x6b\xf4\x13\x35",
+		.output	= "The algorithm is zstd. "
+			  "The algorithm is zstd. "
+			  "The algorithm is zstd.",
+	},
+	{
+		.inlen	= 155,
+		.outlen	= 244,
+		.input	= "\x28\xb5\x2f\xfd\x04\x50\x75\x04\x00\x42\x4b\x1e\x17"
+			  "\x90\x81\x31\x00\xf2\x2f\xe4\x36\xc9\xef\x92\x88\x32"
+			  "\xc9\xf2\x24\x94\xd8\x68\x9a\x0f\x00\x0c\xc4\x31\x6f"
+			  "\x0d\x0c\x38\xac\x5c\x48\x03\xcd\x63\x67\xc0\xf3\xad"
+			  "\x4e\x90\xaa\x78\xa0\xa4\xc5\x99\xda\x2f\xb6\x24\x60"
+			  "\xe2\x79\x4b\xaa\xb6\x6b\x85\x0b\xc9\xc6\x04\x66\x86"
+			  "\xe2\xcc\xe2\x25\x3f\x4f\x09\xcd\xb8\x9d\xdb\xc1\x90"
+			  "\xa9\x11\xbc\x35\x44\x69\x2d\x9c\x64\x4f\x13\x31\x64"
+			  "\xcc\xfb\x4d\x95\x93\x86\x7f\x33\x7f\x1a\xef\xe9\x30"
+			  "\xf9\x67\xa1\x94\x0a\x69\x0f\x60\xcd\xc3\xab\x99\xdc"
+			  "\x42\xed\x97\x05\x00\x33\xc3\x15\x95\x3a\x06\xa0\x0e"
+			  "\x20\xa9\x0e\x82\xb9\x43\x45\x01\xaa\x6d\xda\x0d",
+		.output	= "zstd, short for Zstandard, is a fast lossless "
+			  "compression algorithm, targeting real-time "
+			  "compression scenarios at zlib-level and better "
+			  "compression ratios. The zstd compression library "
+			  "provides in-memory compression and decompression "
+			  "functions.",
+	},
+};
+ #endif	/* _CRYPTO_TESTMGR_H */
+diff --git a/crypto/zstd.c b/crypto/zstd.c
+new file mode 100644
+index 0000000..9a76b3e
+--- /dev/null
+++ b/crypto/zstd.c
+@@ -0,0 +1,265 @@
+/*
+ * Cryptographic API.
+ *
+ * Copyright (c) 2017-present, Facebook, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#include <linux/crypto.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/vmalloc.h>
+#include <linux/zstd.h>
+#include <crypto/internal/scompress.h>
+
+
+#define ZSTD_DEF_LEVEL	3
+
+struct zstd_ctx {
+	ZSTD_CCtx *cctx;
+	ZSTD_DCtx *dctx;
+	void *cwksp;
+	void *dwksp;
+};
+
+static ZSTD_parameters zstd_params(void)
+{
+	return ZSTD_getParams(ZSTD_DEF_LEVEL, 0, 0);
+}
+
+static int zstd_comp_init(struct zstd_ctx *ctx)
+{
+	int ret = 0;
+	const ZSTD_parameters params = zstd_params();
+	const size_t wksp_size = ZSTD_CCtxWorkspaceBound(params.cParams);
+
+	ctx->cwksp = vzalloc(wksp_size);
+	if (!ctx->cwksp) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	ctx->cctx = ZSTD_initCCtx(ctx->cwksp, wksp_size);
+	if (!ctx->cctx) {
+		ret = -EINVAL;
+		goto out_free;
+	}
+out:
+	return ret;
+out_free:
+	vfree(ctx->cwksp);
+	goto out;
+}
+
+static int zstd_decomp_init(struct zstd_ctx *ctx)
+{
+	int ret = 0;
+	const size_t wksp_size = ZSTD_DCtxWorkspaceBound();
+
+	ctx->dwksp = vzalloc(wksp_size);
+	if (!ctx->dwksp) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	ctx->dctx = ZSTD_initDCtx(ctx->dwksp, wksp_size);
+	if (!ctx->dctx) {
+		ret = -EINVAL;
+		goto out_free;
+	}
+out:
+	return ret;
+out_free:
+	vfree(ctx->dwksp);
+	goto out;
+}
+
+static void zstd_comp_exit(struct zstd_ctx *ctx)
+{
+	vfree(ctx->cwksp);
+	ctx->cwksp = NULL;
+	ctx->cctx = NULL;
+}
+
+static void zstd_decomp_exit(struct zstd_ctx *ctx)
+{
+	vfree(ctx->dwksp);
+	ctx->dwksp = NULL;
+	ctx->dctx = NULL;
+}
+
+static int __zstd_init(void *ctx)
+{
+	int ret;
+
+	ret = zstd_comp_init(ctx);
+	if (ret)
+		return ret;
+	ret = zstd_decomp_init(ctx);
+	if (ret)
+		zstd_comp_exit(ctx);
+	return ret;
+}
+
+static void *zstd_alloc_ctx(struct crypto_scomp *tfm)
+{
+	int ret;
+	struct zstd_ctx *ctx;
+
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return ERR_PTR(-ENOMEM);
+
+	ret = __zstd_init(ctx);
+	if (ret) {
+		kfree(ctx);
+		return ERR_PTR(ret);
+	}
+
+	return ctx;
+}
+
+static int zstd_init(struct crypto_tfm *tfm)
+{
+	struct zstd_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	return __zstd_init(ctx);
+}
+
+static void __zstd_exit(void *ctx)
+{
+	zstd_comp_exit(ctx);
+	zstd_decomp_exit(ctx);
+}
+
+static void zstd_free_ctx(struct crypto_scomp *tfm, void *ctx)
+{
+	__zstd_exit(ctx);
+	kzfree(ctx);
+}
+
+static void zstd_exit(struct crypto_tfm *tfm)
+{
+	struct zstd_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	__zstd_exit(ctx);
+}
+
+static int __zstd_compress(const u8 *src, unsigned int slen,
+			   u8 *dst, unsigned int *dlen, void *ctx)
+{
+	size_t out_len;
+	struct zstd_ctx *zctx = ctx;
+	const ZSTD_parameters params = zstd_params();
+
+	out_len = ZSTD_compressCCtx(zctx->cctx, dst, *dlen, src, slen, params);
+	if (ZSTD_isError(out_len))
+		return -EINVAL;
+	*dlen = out_len;
+	return 0;
+}
+
+static int zstd_compress(struct crypto_tfm *tfm, const u8 *src,
+			 unsigned int slen, u8 *dst, unsigned int *dlen)
+{
+	struct zstd_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	return __zstd_compress(src, slen, dst, dlen, ctx);
+}
+
+static int zstd_scompress(struct crypto_scomp *tfm, const u8 *src,
+			  unsigned int slen, u8 *dst, unsigned int *dlen,
+			  void *ctx)
+{
+	return __zstd_compress(src, slen, dst, dlen, ctx);
+}
+
+static int __zstd_decompress(const u8 *src, unsigned int slen,
+			     u8 *dst, unsigned int *dlen, void *ctx)
+{
+	size_t out_len;
+	struct zstd_ctx *zctx = ctx;
+
+	out_len = ZSTD_decompressDCtx(zctx->dctx, dst, *dlen, src, slen);
+	if (ZSTD_isError(out_len))
+		return -EINVAL;
+	*dlen = out_len;
+	return 0;
+}
+
+static int zstd_decompress(struct crypto_tfm *tfm, const u8 *src,
+			   unsigned int slen, u8 *dst, unsigned int *dlen)
+{
+	struct zstd_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	return __zstd_decompress(src, slen, dst, dlen, ctx);
+}
+
+static int zstd_sdecompress(struct crypto_scomp *tfm, const u8 *src,
+			    unsigned int slen, u8 *dst, unsigned int *dlen,
+			    void *ctx)
+{
+	return __zstd_decompress(src, slen, dst, dlen, ctx);
+}
+
+static struct crypto_alg alg = {
+	.cra_name		= "zstd",
+	.cra_flags		= CRYPTO_ALG_TYPE_COMPRESS,
+	.cra_ctxsize		= sizeof(struct zstd_ctx),
+	.cra_module		= THIS_MODULE,
+	.cra_init		= zstd_init,
+	.cra_exit		= zstd_exit,
+	.cra_u			= { .compress = {
+	.coa_compress		= zstd_compress,
+	.coa_decompress		= zstd_decompress } }
+};
+
+static struct scomp_alg scomp = {
+	.alloc_ctx		= zstd_alloc_ctx,
+	.free_ctx		= zstd_free_ctx,
+	.compress		= zstd_scompress,
+	.decompress		= zstd_sdecompress,
+	.base			= {
+		.cra_name	= "zstd",
+		.cra_driver_name = "zstd-scomp",
+		.cra_module	 = THIS_MODULE,
+	}
+};
+
+static int __init zstd_mod_init(void)
+{
+	int ret;
+
+	ret = crypto_register_alg(&alg);
+	if (ret)
+		return ret;
+
+	ret = crypto_register_scomp(&scomp);
+	if (ret)
+		crypto_unregister_alg(&alg);
+
+	return ret;
+}
+
+static void __exit zstd_mod_fini(void)
+{
+	crypto_unregister_alg(&alg);
+	crypto_unregister_scomp(&scomp);
+}
+
+module_init(zstd_mod_init);
+module_exit(zstd_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Zstd Compression Algorithm");
+MODULE_ALIAS_CRYPTO("zstd");
+--
+2.9.3
--- a/contrib/linux-kernel/0006-squashfs-tools-Add-zstd-support.patch
+++ b/contrib/linux-kernel/0006-squashfs-tools-Add-zstd-support.patch
@ -0,0 +1,418 @@
+From cc08b43a31fed1289c2027d5090999da569457f1 Mon Sep 17 00:00:00 2001
+From: Sean Purcell <me@seanp.xyz>
+Date: Thu, 3 Aug 2017 17:47:03 -0700
+Subject: [PATCH v5] squashfs-tools: Add zstd support
+
+This patch adds zstd support to squashfs-tools. It works with zstd
+versions >= 1.0.0. It was originally written by Sean Purcell.
+
+Signed-off-by: Sean Purcell <me@seanp.xyz>
+Signed-off-by: Nick Terrell <terrelln@fb.com>
+---
+v4 -> v5:
+- Fix patch documentation to reflect that Sean Purcell is the author
+- Don't strip trailing whitespace of unreleated code
+- Make zstd_display_options() static
+
+ squashfs-tools/Makefile       |  21 ++++
+ squashfs-tools/compressor.c   |   8 ++
+ squashfs-tools/squashfs_fs.h  |   1 +
+ squashfs-tools/zstd_wrapper.c | 254 ++++++++++++++++++++++++++++++++++++++++++
+ squashfs-tools/zstd_wrapper.h |  48 ++++++++
+ 5 files changed, 332 insertions(+)
+ create mode 100644 squashfs-tools/zstd_wrapper.c
+ create mode 100644 squashfs-tools/zstd_wrapper.h
+
+diff --git a/squashfs-tools/Makefile b/squashfs-tools/Makefile
+index 52d2582..8e82e09 100644
+--- a/squashfs-tools/Makefile
+++ b/squashfs-tools/Makefile
+@@ -75,6 +75,19 @@ GZIP_SUPPORT = 1
+ #LZMA_SUPPORT = 1
+ #LZMA_DIR = ../../../../LZMA/lzma465
+
+
+########### Building ZSTD support ############
+#
+# The ZSTD library is supported
+# ZSTD homepage: http://zstd.net
+# ZSTD source repository: https://github.com/facebook/zstd
+#
+# To build configure the tools using cmake to build shared libraries,
+# install and uncomment
+# the ZSTD_SUPPORT line below.
+#
+#ZSTD_SUPPORT = 1
+
+ ######## Specifying default compression ########
+ #
+ # The next line specifies which compression algorithm is used by default
+@@ -177,6 +190,14 @@ LIBS += -llz4
+ COMPRESSORS += lz4
+ endif
+
+ifeq ($(ZSTD_SUPPORT),1)
+CFLAGS += -DZSTD_SUPPORT
+MKSQUASHFS_OBJS += zstd_wrapper.o
+UNSQUASHFS_OBJS += zstd_wrapper.o
+LIBS += -lzstd
+COMPRESSORS += zstd
+endif
+
+ ifeq ($(XATTR_SUPPORT),1)
+ ifeq ($(XATTR_DEFAULT),1)
+ CFLAGS += -DXATTR_SUPPORT -DXATTR_DEFAULT
+diff --git a/squashfs-tools/compressor.c b/squashfs-tools/compressor.c
+index 525e316..02b5e90 100644
+--- a/squashfs-tools/compressor.c
+++ b/squashfs-tools/compressor.c
+@@ -65,6 +65,13 @@ static struct compressor xz_comp_ops = {
+ extern struct compressor xz_comp_ops;
+ #endif
+
+#ifndef ZSTD_SUPPORT
+static struct compressor zstd_comp_ops = {
+	ZSTD_COMPRESSION, "zstd"
+};
+#else
+extern struct compressor zstd_comp_ops;
+#endif
+
+ static struct compressor unknown_comp_ops = {
+ 	0, "unknown"
+@@ -77,6 +84,7 @@ struct compressor *compressor[] = {
+ 	&lzo_comp_ops,
+ 	&lz4_comp_ops,
+ 	&xz_comp_ops,
+	&zstd_comp_ops,
+ 	&unknown_comp_ops
+ };
+
+diff --git a/squashfs-tools/squashfs_fs.h b/squashfs-tools/squashfs_fs.h
+index 791fe12..afca918 100644
+--- a/squashfs-tools/squashfs_fs.h
+++ b/squashfs-tools/squashfs_fs.h
+@@ -277,6 +277,7 @@ typedef long long		squashfs_inode;
+ #define LZO_COMPRESSION		3
+ #define XZ_COMPRESSION		4
+ #define LZ4_COMPRESSION		5
+#define ZSTD_COMPRESSION	6
+
+ struct squashfs_super_block {
+ 	unsigned int		s_magic;
+diff --git a/squashfs-tools/zstd_wrapper.c b/squashfs-tools/zstd_wrapper.c
+new file mode 100644
+index 0000000..dcab75a
+--- /dev/null
+++ b/squashfs-tools/zstd_wrapper.c
+@@ -0,0 +1,254 @@
+/*
+ * Copyright (c) 2017
+ * Phillip Lougher <phillip@squashfs.org.uk>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2,
+ * or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * zstd_wrapper.c
+ *
+ * Support for ZSTD compression http://zstd.net
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <zstd.h>
+#include <zstd_errors.h>
+
+#include "squashfs_fs.h"
+#include "zstd_wrapper.h"
+#include "compressor.h"
+
+static int compression_level = ZSTD_DEFAULT_COMPRESSION_LEVEL;
+
+/*
+ * This function is called by the options parsing code in mksquashfs.c
+ * to parse any -X compressor option.
+ *
+ * This function returns:
+ *	>=0 (number of additional args parsed) on success
+ *	-1 if the option was unrecognised, or
+ *	-2 if the option was recognised, but otherwise bad in
+ *	   some way (e.g. invalid parameter)
+ *
+ * Note: this function sets internal compressor state, but does not
+ * pass back the results of the parsing other than success/failure.
+ * The zstd_dump_options() function is called later to get the options in
+ * a format suitable for writing to the filesystem.
+ */
+static int zstd_options(char *argv[], int argc)
+{
+	if (strcmp(argv[0], "-Xcompression-level") == 0) {
+		if (argc < 2) {
+			fprintf(stderr, "zstd: -Xcompression-level missing "
+				"compression level\n");
+			fprintf(stderr, "zstd: -Xcompression-level it should "
+				"be 1 <= n <= %d\n", ZSTD_maxCLevel());
+			goto failed;
+		}
+
+		compression_level = atoi(argv[1]);
+		if (compression_level < 1 ||
+		    compression_level > ZSTD_maxCLevel()) {
+			fprintf(stderr, "zstd: -Xcompression-level invalid, it "
+				"should be 1 <= n <= %d\n", ZSTD_maxCLevel());
+			goto failed;
+		}
+
+		return 1;
+	}
+
+	return -1;
+failed:
+	return -2;
+}
+
+/*
+ * This function is called by mksquashfs to dump the parsed
+ * compressor options in a format suitable for writing to the
+ * compressor options field in the filesystem (stored immediately
+ * after the superblock).
+ *
+ * This function returns a pointer to the compression options structure
+ * to be stored (and the size), or NULL if there are no compression
+ * options.
+ */
+static void *zstd_dump_options(int block_size, int *size)
+{
+	static struct zstd_comp_opts comp_opts;
+
+	/* don't return anything if the options are all default */
+	if (compression_level == ZSTD_DEFAULT_COMPRESSION_LEVEL)
+		return NULL;
+
+	comp_opts.compression_level = compression_level;
+
+	SQUASHFS_INSWAP_COMP_OPTS(&comp_opts);
+
+	*size = sizeof(comp_opts);
+	return &comp_opts;
+}
+
+/*
+ * This function is a helper specifically for the append mode of
+ * mksquashfs.  Its purpose is to set the internal compressor state
+ * to the stored compressor options in the passed compressor options
+ * structure.
+ *
+ * In effect this function sets up the compressor options
+ * to the same state they were when the filesystem was originally
+ * generated, this is to ensure on appending, the compressor uses
+ * the same compression options that were used to generate the
+ * original filesystem.
+ *
+ * Note, even if there are no compressor options, this function is still
+ * called with an empty compressor structure (size == 0), to explicitly
+ * set the default options, this is to ensure any user supplied
+ * -X options on the appending mksquashfs command line are over-ridden.
+ *
+ * This function returns 0 on sucessful extraction of options, and -1 on error.
+ */
+static int zstd_extract_options(int block_size, void *buffer, int size)
+{
+	struct zstd_comp_opts *comp_opts = buffer;
+
+	if (size == 0) {
+		/* Set default values */
+		compression_level = ZSTD_DEFAULT_COMPRESSION_LEVEL;
+		return 0;
+	}
+
+	/* we expect a comp_opts structure of sufficient size to be present */
+	if (size < sizeof(*comp_opts))
+		goto failed;
+
+	SQUASHFS_INSWAP_COMP_OPTS(comp_opts);
+
+	if (comp_opts->compression_level < 1 ||
+	    comp_opts->compression_level > ZSTD_maxCLevel()) {
+		fprintf(stderr, "zstd: bad compression level in compression "
+			"options structure\n");
+		goto failed;
+	}
+
+	compression_level = comp_opts->compression_level;
+
+	return 0;
+
+failed:
+	fprintf(stderr, "zstd: error reading stored compressor options from "
+		"filesystem!\n");
+
+	return -1;
+}
+
+static void zstd_display_options(void *buffer, int size)
+{
+	struct zstd_comp_opts *comp_opts = buffer;
+
+	/* we expect a comp_opts structure of sufficient size to be present */
+	if (size < sizeof(*comp_opts))
+		goto failed;
+
+	SQUASHFS_INSWAP_COMP_OPTS(comp_opts);
+
+	if (comp_opts->compression_level < 1 ||
+	    comp_opts->compression_level > ZSTD_maxCLevel()) {
+		fprintf(stderr, "zstd: bad compression level in compression "
+			"options structure\n");
+		goto failed;
+	}
+
+	printf("\tcompression-level %d\n", comp_opts->compression_level);
+
+	return;
+
+failed:
+	fprintf(stderr, "zstd: error reading stored compressor options from "
+		"filesystem!\n");
+}
+
+/*
+ * This function is called by mksquashfs to initialise the
+ * compressor, before compress() is called.
+ *
+ * This function returns 0 on success, and -1 on error.
+ */
+static int zstd_init(void **strm, int block_size, int datablock)
+{
+	ZSTD_CCtx *cctx = ZSTD_createCCtx();
+
+	if (!cctx) {
+		fprintf(stderr, "zstd: failed to allocate compression "
+			"context!\n");
+		return -1;
+	}
+
+	*strm = cctx;
+	return 0;
+}
+
+static int zstd_compress(void *strm, void *dest, void *src, int size,
+			 int block_size, int *error)
+{
+	const size_t res = ZSTD_compressCCtx((ZSTD_CCtx*)strm, dest, block_size,
+					     src, size, compression_level);
+
+	if (ZSTD_isError(res)) {
+		/* FIXME:
+		 * zstd does not expose stable error codes. The error enum may
+		 * change between versions. Until upstream zstd stablizes the
+		 * error codes, we have no way of knowing why the error occurs.
+		 * zstd shouldn't fail to compress any input unless there isn't
+		 * enough output space. We assume that is the cause and return
+		 * the special error code for not enough output space.
+		 */
+		return 0;
+	}
+
+	return (int)res;
+}
+
+static int zstd_uncompress(void *dest, void *src, int size, int outsize,
+			   int *error)
+{
+	const size_t res = ZSTD_decompress(dest, outsize, src, size);
+
+	if (ZSTD_isError(res)) {
+		fprintf(stderr, "\t%d %d\n", outsize, size);
+
+		*error = (int)ZSTD_getErrorCode(res);
+		return -1;
+	}
+
+	return (int)res;
+}
+
+static void zstd_usage(void)
+{
+	fprintf(stderr, "\t  -Xcompression-level <compression-level>\n");
+	fprintf(stderr, "\t\t<compression-level> should be 1 .. %d (default "
+		"%d)\n", ZSTD_maxCLevel(), ZSTD_DEFAULT_COMPRESSION_LEVEL);
+}
+
+struct compressor zstd_comp_ops = {
+	.init = zstd_init,
+	.compress = zstd_compress,
+	.uncompress = zstd_uncompress,
+	.options = zstd_options,
+	.dump_options = zstd_dump_options,
+	.extract_options = zstd_extract_options,
+	.display_options = zstd_display_options,
+	.usage = zstd_usage,
+	.id = ZSTD_COMPRESSION,
+	.name = "zstd",
+	.supported = 1
+};
+diff --git a/squashfs-tools/zstd_wrapper.h b/squashfs-tools/zstd_wrapper.h
+new file mode 100644
+index 0000000..4fbef0a
+--- /dev/null
+++ b/squashfs-tools/zstd_wrapper.h
+@@ -0,0 +1,48 @@
+#ifndef ZSTD_WRAPPER_H
+#define ZSTD_WRAPPER_H
+/*
+ * Squashfs
+ *
+ * Copyright (c) 2017
+ * Phillip Lougher <phillip@squashfs.org.uk>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2,
+ * or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * zstd_wrapper.h
+ *
+ */
+
+#ifndef linux
+#define __BYTE_ORDER BYTE_ORDER
+#define __BIG_ENDIAN BIG_ENDIAN
+#define __LITTLE_ENDIAN LITTLE_ENDIAN
+#else
+#include <endian.h>
+#endif
+
+#if __BYTE_ORDER == __BIG_ENDIAN
+extern unsigned int inswap_le16(unsigned short);
+extern unsigned int inswap_le32(unsigned int);
+
+#define SQUASHFS_INSWAP_COMP_OPTS(s) { \
+	(s)->compression_level = inswap_le32((s)->compression_level); \
+}
+#else
+#define SQUASHFS_INSWAP_COMP_OPTS(s)
+#endif
+
+/* Default compression */
+#define ZSTD_DEFAULT_COMPRESSION_LEVEL 15
+
+struct zstd_comp_opts {
+	int compression_level;
+};
+#endif
+--
+2.9.3
--- a/contrib/linux-kernel/README.md
+++ b/contrib/linux-kernel/README.md
@ -1,7 +1,7 @@
 # Linux Kernel Patch

 There are four pieces, the `xxhash` kernel module, the `zstd_compress` and `zstd_decompress` kernel modules, the BtrFS patch, and the SquashFS patch.
-The patches are based off of the linux kernel master branch (version 4.10).
+The patches are based off of the linux kernel master branch.

 ## xxHash kernel module

@ -42,7 +42,7 @@ The patches are based off of the linux kernel master branch (version 4.10).
 Benchmarks run on a Ubuntu 14.04 with 2 cores and 4 GiB of RAM.
 The VM is running on a Macbook Pro with a 3.1 GHz Intel Core i7 processor,
 16 GB of ram, and a SSD.
-The kernel running was built from the master branch with the patch (version 4.10).
+The kernel running was built from the master branch with the patch.

 The compression benchmark is copying 10 copies of the
 unzipped [silesia corpus](http://mattmahoney.net/dc/silesia.html) into a BtrFS
@ -69,14 +69,14 @@ See `btrfs-benchmark.sh` for details.

 * The patch is located in `squashfs.diff`
 * Additionally `fs/squashfs/zstd_wrapper.c` is provided as a source for convenience.
-* The patch has been tested on a 4.10 kernel.
+* The patch has been tested on the master branch of the kernel.

 ### Benchmarks

 Benchmarks run on a Ubuntu 14.04 with 2 cores and 4 GiB of RAM.
 The VM is running on a Macbook Pro with a 3.1 GHz Intel Core i7 processor,
 16 GB of ram, and a SSD.
-The kernel running was built from the master branch with the patch (version 4.10).
+The kernel running was built from the master branch with the patch.

 The compression benchmark is the file tree from the SquashFS archive found in the
 Ubuntu 16.10 desktop image (ubuntu-16.10-desktop-amd64.iso).
--- a/contrib/linux-kernel/fs/btrfs/zstd.c
+++ b/contrib/linux-kernel/fs/btrfs/zstd.c
@ -10,20 +10,16 @@
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
 */
-#include <linux/kernel.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-#include <linux/init.h>
-#include <linux/err.h>
-#include <linux/sched.h>
-#include <linux/pagemap.h>
 #include <linux/bio.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/refcount.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
 #include <linux/zstd.h>
 #include "compression.h"

@ -33,7 +29,8 @@

 static ZSTD_parameters zstd_get_btrfs_parameters(size_t src_len)
 {
-	ZSTD_parameters params = ZSTD_getParams(ZSTD_BTRFS_DEFAULT_LEVEL, src_len, 0);
+	ZSTD_parameters params = ZSTD_getParams(ZSTD_BTRFS_DEFAULT_LEVEL,
+						src_len, 0);

 	if (params.cParams.windowLog > ZSTD_BTRFS_MAX_WINDOWLOG)
 		params.cParams.windowLog = ZSTD_BTRFS_MAX_WINDOWLOG;
--- a/contrib/linux-kernel/fs/squashfs/zstd_wrapper.c
+++ b/contrib/linux-kernel/fs/squashfs/zstd_wrapper.c
@ -14,10 +14,6 @@
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- *
 * zstd_wrapper.c
 */

@ -36,15 +32,18 @@
 struct workspace {
 	void *mem;
 	size_t mem_size;
+	size_t window_size;
 };

 static void *zstd_init(struct squashfs_sb_info *msblk, void *buff)
 {
 	struct workspace *wksp = kmalloc(sizeof(*wksp), GFP_KERNEL);
+
 	if (wksp == NULL)
 		goto failed;
-	wksp->mem_size = ZSTD_DStreamWorkspaceBound(max_t(size_t,
-				msblk->block_size, SQUASHFS_METADATA_SIZE));
+	wksp->window_size = max_t(size_t,
+			msblk->block_size, SQUASHFS_METADATA_SIZE);
+	wksp->mem_size = ZSTD_DStreamWorkspaceBound(wksp->window_size);
 	wksp->mem = vmalloc(wksp->mem_size);
 	if (wksp->mem == NULL)
 		goto failed;
@ -80,7 +79,7 @@ static int zstd_uncompress(struct squashfs_sb_info *msblk, void *strm,
 	ZSTD_inBuffer in_buf = { NULL, 0, 0 };
 	ZSTD_outBuffer out_buf = { NULL, 0, 0 };

-	stream = ZSTD_initDStream(wksp->mem_size, wksp->mem, wksp->mem_size);
+	stream = ZSTD_initDStream(wksp->window_size, wksp->mem, wksp->mem_size);

 	if (!stream) {
 		ERROR("Failed to initialize zstd decompressor\n");
@ -93,6 +92,7 @@ static int zstd_uncompress(struct squashfs_sb_info *msblk, void *strm,
 	do {
 		if (in_buf.pos == in_buf.size && k < b) {
 			int avail = min(length, msblk->devblksize - offset);
+
 			length -= avail;
 			in_buf.src = bh[k]->b_data + offset;
 			in_buf.size = avail;
@ -103,8 +103,9 @@ static int zstd_uncompress(struct squashfs_sb_info *msblk, void *strm,
 		if (out_buf.pos == out_buf.size) {
 			out_buf.dst = squashfs_next_page(output);
 			if (out_buf.dst == NULL) {
-				/* shouldn't run out of pages before stream is
-				 * done */
+				/* Shouldn't run out of pages
+				 * before stream is done.
+				 */
 				squashfs_finish_page(output);
 				goto out;
 			}
--- a/contrib/linux-kernel/lib/zstd/compress.c
+++ b/contrib/linux-kernel/lib/zstd/compress.c
@ -583,7 +583,7 @@ void ZSTD_seqToCodes(const seqStore_t *seqStorePtr)
 		mlCodeTable[seqStorePtr->longLengthPos] = MaxML;
 }

-ZSTD_STATIC size_t ZSTD_compressSequences(ZSTD_CCtx *zc, void *dst, size_t dstCapacity, size_t srcSize)
+ZSTD_STATIC size_t ZSTD_compressSequences_internal(ZSTD_CCtx *zc, void *dst, size_t dstCapacity)
 {
 	const int longOffsets = zc->params.cParams.windowLog > STREAM_ACCUMULATOR_MIN;
 	const seqStore_t *seqStorePtr = &(zc->seqStore);
@ -636,7 +636,7 @@ ZSTD_STATIC size_t ZSTD_compressSequences(ZSTD_CCtx *zc, void *dst, size_t dstCa
 	else
 		op[0] = 0xFF, ZSTD_writeLE16(op + 1, (U16)(nbSeq - LONGNBSEQ)), op += 3;
 	if (nbSeq == 0)
-		goto _check_compressibility;
+		return op - ostart;

 	/* seqHead : flags for FSE encoding type */
 	seqHead = op++;
@ -826,28 +826,33 @@ ZSTD_STATIC size_t ZSTD_compressSequences(ZSTD_CCtx *zc, void *dst, size_t dstCa
 			op += streamSize;
 		}
 	}
-
-/* check compressibility */
-_check_compressibility:
-	{
-		size_t const minGain = ZSTD_minGain(srcSize);
-		size_t const maxCSize = srcSize - minGain;
-		if ((size_t)(op - ostart) >= maxCSize) {
-			zc->flagStaticHufTable = HUF_repeat_none;
-			return 0;
-		}
-	}
-
-	/* confirm repcodes */
-	{
-		int i;
-		for (i = 0; i < ZSTD_REP_NUM; i++)
-			zc->rep[i] = zc->repToConfirm[i];
-	}
-
 	return op - ostart;
 }

+ZSTD_STATIC size_t ZSTD_compressSequences(ZSTD_CCtx *zc, void *dst, size_t dstCapacity, size_t srcSize)
+{
+	size_t const cSize = ZSTD_compressSequences_internal(zc, dst, dstCapacity);
+	size_t const minGain = ZSTD_minGain(srcSize);
+	size_t const maxCSize = srcSize - minGain;
+	/* If the srcSize <= dstCapacity, then there is enough space to write a
+	 * raw uncompressed block. Since we ran out of space, the block must not
+	 * be compressible, so fall back to a raw uncompressed block.
+	 */
+	int const uncompressibleError = cSize == ERROR(dstSize_tooSmall) && srcSize <= dstCapacity;
+	int i;
+
+	if (ZSTD_isError(cSize) && !uncompressibleError)
+		return cSize;
+	if (cSize >= maxCSize || uncompressibleError) {
+		zc->flagStaticHufTable = HUF_repeat_none;
+		return 0;
+	}
+	/* confirm repcodes */
+	for (i = 0; i < ZSTD_REP_NUM; i++)
+		zc->rep[i] = zc->repToConfirm[i];
+	return cSize;
+}
+
 /*! ZSTD_storeSeq() :
 	Store a sequence (literal length, literals, offset code and match length code) into seqStore_t.
 	`offsetCode` : distance to match, or 0 == repCode.
--- a/contrib/linux-kernel/lib/zstd/decompress.c
+++ b/contrib/linux-kernel/lib/zstd/decompress.c
@ -998,6 +998,8 @@ static seq_t ZSTD_decodeSequence(seqState_t *seqState)
 		BIT_reloadDStream(&seqState->DStream);		   /* <= 18 bits */
 	FSE_updateState(&seqState->stateOffb, &seqState->DStream); /* <=  8 bits */

+	seq.match = NULL;
+
 	return seq;
 }

--- a/contrib/linux-kernel/lib/zstd/zstd_internal.h
+++ b/contrib/linux-kernel/lib/zstd/zstd_internal.h
@ -126,7 +126,7 @@ static const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG;
 /*-*******************************************
 *  Shared functions to include for inlining
 *********************************************/
-static void ZSTD_copy8(void *dst, const void *src) {
+ZSTD_STATIC void ZSTD_copy8(void *dst, const void *src) {
 	memcpy(dst, src, 8);
 }
 /*! ZSTD_wildcopy() :
@ -134,8 +134,21 @@ static void ZSTD_copy8(void *dst, const void *src) {
 #define WILDCOPY_OVERLENGTH 8
 ZSTD_STATIC void ZSTD_wildcopy(void *dst, const void *src, ptrdiff_t length)
 {
-	if (length > 0)
-		memcpy(dst, src, length);
+	const BYTE* ip = (const BYTE*)src;
+	BYTE* op = (BYTE*)dst;
+	BYTE* const oend = op + length;
+	/* Work around https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81388.
+	 * Avoid the bad case where the loop only runs once by handling the
+	 * special case separately. This doesn't trigger the bug because it
+	 * doesn't involve pointer/integer overflow.
+	 */
+	if (length <= 8)
+		return ZSTD_copy8(dst, src);
+	do {
+		ZSTD_copy8(op, ip);
+		op += 8;
+		ip += 8;
+	} while (op < oend);
 }

 /*-*******************************************
--- a/contrib/long_distance_matching/Makefile
+++ b/contrib/long_distance_matching/Makefile
@ -0,0 +1,37 @@
+# ################################################################
+# Copyright (c) 2016-present, Facebook, Inc.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree. An additional grant
+# of patent rights can be found in the PATENTS file in the same directory.
+# ################################################################
+
+# This Makefile presumes libzstd is installed, using `sudo make install`
+
+CPPFLAGS+= -I../../lib/common
+CFLAGS  ?= -O3
+DEBUGFLAGS = -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \
+            -Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \
+            -Wstrict-prototypes -Wundef -Wpointer-arith -Wformat-security \
+            -Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \
+            -Wredundant-decls
+CFLAGS  += $(DEBUGFLAGS) $(MOREFLAGS)
+FLAGS    = $(CPPFLAGS) $(CFLAGS)
+
+LDFLAGS += -lzstd
+
+.PHONY: default all clean
+
+default: all
+
+all: ldm 
+	
+ldm: ldm_common.c ldm.c main.c
+	$(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@
+
+clean:
+	@rm -f core *.o tmp* result* *.ldm *.ldm.dec \
+	ldm
+	@echo Cleaning completed
+
--- a/contrib/long_distance_matching/README.md
+++ b/contrib/long_distance_matching/README.md
@ -0,0 +1,102 @@
+This is a compression algorithm focused on finding long distance matches.
+
+It is based upon lz4 and uses nearly the same block format (github.com/lz4/lz4/blob/dev/doc/lz4_Block_format.md). The number of bytes to encode the offset is four instead of two in lz4 to reflect the longer distance matching. The block format is described in `ldm.h`.
+
+### Build
+
+Run `make`.
+
+### Compressing a file
+
+`ldm <filename>`
+
+Decompression and verification can be enabled by defining `DECOMPRESS_AND_VERIFY` in `main.c`.
+The output file names are as follows:
+- `<filename>.ldm` : compressed file
+- `<filename>.ldm.dec` : decompressed file
+
+### Parameters
+
+There are various parameters that can be tuned. These parameters can be tuned in `ldm.h` or, alternatively if `ldm_params.h` is included, in `ldm_params.h` (for easier configuration).
+
+The parameters are as follows and must all be defined:
+- `LDM_MEMORY_USAGE` : the memory usage of the underlying hash table in bytes.
+- `HASH_BUCKET_SIZE_LOG` : the log size of each bucket in the hash table (used in collision resolution).
+- `LDM_LAG` : the lag (in bytes) in inserting entries into the hash table.
+- `LDM_WINDOW_SIZE_LOG` : the log maximum window size when searching for matches.
+- `LDM_MIN_MATCH_LENGTH` : the minimum match length.
+- `INSERT_BY_TAG` : insert entries into the hash table as a function of the hash. This increases speed by reducing the number of hash table lookups and match comparisons. Certain hashes will never be inserted.
+- `USE_CHECKSUM`  : store a checksum with the hash table entries for faster comparison. This halves the number of entries the hash table can contain.
+
+The optional parameter `HASH_ONLY_EVERY_LOG` is the log inverse frequency of insertion into the hash table. That is, an entry is inserted approximately every `1 << HASH_ONLY_EVERY_LOG` times. If this parameter is not defined, the value is computed as a function of the window size and memory usage to approximate an even coverage of the window.
+
+
+### Benchmark
+
+Below is a comparison of various compression methods on a tar of four versions of llvm (versions `3.9.0`, `3.9.1`, `4.0.0`, `4.0.1`) with a total size of `727900160` B.
+
+| Method | Size | Ratio |
+|:---|---:|---:|
+|lrzip -p 32 -n -w 1 | `369968714` | `1.97`|
+|ldm | `209391361` | `3.48`|
+|lz4 | `189954338` | `3.83`|
+|lrzip -p 32 -l -w 1 | `163940343` | `4.44`|
+|zstd -1 | `126080293` | `5.77`|
+|lrzip -p 32 -n | `124821009` | `5.83`|
+|lrzip -p 32 -n -w 1 & zstd -1 | `120317909` | `6.05`|
+|zstd -3 -o | `115290952` | `6.31`|
+|lrzip -p 32 -g -L 9 -w 1 | `107168979` | `6.79`|
+|zstd -6 -o | `102772098` | `7.08`|
+|zstd -T16 -9 | `98040470` | `7.42`|
+|lrzip -p 32 -n -w 1 & zstd -T32 -19 | `88050289` | `8.27`|
+|zstd -T32 -19 | `83626098` | `8.70`|
+|lrzip -p 32 -n & zstd -1 | `36335117` | `20.03`|
+|ldm & zstd -6 | `32856232` | `22.15`|
+|lrzip -p 32 -g -L 9 | `32243594` | `22.58`|
+|lrzip -p 32 -n & zstd -6 | `30954572` | `23.52`|
+|lrzip -p 32 -n & zstd -T32 -19 | `26472064` | `27.50`|
+
+The method marked `ldm` was run with the following parameters:
+
+| Parameter | Value |
+|:---|---:|
+| `LDM_MEMORY_USAGE`    |   `23`|
+|`HASH_BUCKET_SIZE_LOG` |    `3`|
+|`LDM_LAG`              |    `0`|
+|`LDM_WINDOW_SIZE_LOG`  |   `28`|
+|`LDM_MIN_MATCH_LENGTH`|   `64`|
+|`INSERT_BY_TAG`        |    `1`|
+|`USE_CHECKSUM`         |    `1`|
+
+The compression speed was `220.5 MB/s`.
+
+### Parameter selection
+
+Below is a brief discussion of the effects of the parameters on the speed and compression ratio.
+
+#### Speed
+
+A large bottleneck in terms of speed is finding the matches and comparing to see if they are greater than the minimum match length. Generally:
+- The fewer matches found (or the lower the percentage of the literals matched), the slower the algorithm will behave.
+- Increasing `HASH_ONLY_EVERY_LOG` results in fewer inserts and, if `INSERT_BY_TAG` is set, fewer lookups in the table. This has a large effect on speed, as well as compression ratio.
+- If `HASH_ONLY_EVERY_LOG` is not set, its value is calculated based on `LDM_WINDOW_SIZE_LOG` and `LDM_MEMORY_USAGE`. Increasing `LDM_WINDOW_SIZE_LOG` has the effect of increasing `HASH_ONLY_EVERY_LOG` and increasing `LDM_MEMORY_USAGE` decreases `HASH_ONLY_EVERY_LOG`.
+- `USE_CHECKSUM` generally improves speed with hash table lookups.
+
+#### Compression ratio
+
+The compression ratio is highly correlated with the coverage of matches. As a long distance matcher, the algorithm was designed to "optimize" for long distance matches outside the zstd compression window. The compression ratio after recompressing the output of the long-distance matcher with zstd was a more important signal in development than the raw compression ratio itself.
+
+Generally, increasing `LDM_MEMORY_USAGE` will improve the compression ratio. However when using the default computed value of `HASH_ONLY_EVERY_LOG`, this increases the frequency of insertion and lookup in the table and thus may result in a decrease in speed. 
+
+Below is a table showing the speed and compression ratio when compressing the llvm tar (as described above) using different settings for `LDM_MEMORY_USAGE`. The other parameters were the same as used in the benchmark above.
+
+| `LDM_MEMORY_USAGE` | Ratio | Speed (MB/s) | Ratio after zstd -6  |
+|---:| ---: | ---: | ---: |
+| `18` | `1.85` | `232.4` | `10.92` |
+| `21` | `2.79` | `233.9` | `15.92` |
+| `23` | `3.48` | `220.5` | `18.29` |
+| `25` | `4.56` | `140.8` | `19.21` |
+
+### Compression statistics
+
+Compression statistics (and the configuration) can be enabled/disabled via `COMPUTE_STATS` and `OUTPUT_CONFIGURATION` in `ldm.h`.
--- a/contrib/long_distance_matching/ldm.c
+++ b/contrib/long_distance_matching/ldm.c
@ -0,0 +1,857 @@
+#include <limits.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "ldm.h"
+
+#define LDM_HASHTABLESIZE (1 << (LDM_MEMORY_USAGE))
+#define LDM_HASHTABLESIZE_U32 ((LDM_HASHTABLESIZE) >> 2)
+#define LDM_HASHTABLESIZE_U64 ((LDM_HASHTABLESIZE) >> 3)
+
+#if USE_CHECKSUM
+  #define LDM_HASH_ENTRY_SIZE_LOG 3
+#else
+  #define LDM_HASH_ENTRY_SIZE_LOG 2
+#endif
+
+// Entries are inserted into the table HASH_ONLY_EVERY + 1 times "on average".
+#ifndef HASH_ONLY_EVERY_LOG
+  #define HASH_ONLY_EVERY_LOG (LDM_WINDOW_SIZE_LOG-((LDM_MEMORY_USAGE)-(LDM_HASH_ENTRY_SIZE_LOG)))
+#endif
+
+#define HASH_ONLY_EVERY ((1 << (HASH_ONLY_EVERY_LOG)) - 1)
+
+#define HASH_BUCKET_SIZE (1 << (HASH_BUCKET_SIZE_LOG))
+#define NUM_HASH_BUCKETS_LOG ((LDM_MEMORY_USAGE)-(LDM_HASH_ENTRY_SIZE_LOG)-(HASH_BUCKET_SIZE_LOG))
+
+#define HASH_CHAR_OFFSET 10
+
+// Take the first match in the hash bucket only.
+//#define ZSTD_SKIP
+
+static const U64 prime8bytes = 11400714785074694791ULL;
+
+// Type of the small hash used to index into the hash table.
+typedef U32 hash_t;
+
+#if USE_CHECKSUM
+typedef struct LDM_hashEntry {
+  U32 offset;
+  U32 checksum;
+} LDM_hashEntry;
+#else
+typedef struct LDM_hashEntry {
+  U32 offset;
+} LDM_hashEntry;
+#endif
+
+struct LDM_compressStats {
+  U32 windowSizeLog, hashTableSizeLog;
+  U32 numMatches;
+  U64 totalMatchLength;
+  U64 totalLiteralLength;
+  U64 totalOffset;
+
+  U32 matchLengthHistogram[32];
+
+  U32 minOffset, maxOffset;
+  U32 offsetHistogram[32];
+};
+
+typedef struct LDM_hashTable LDM_hashTable;
+
+struct LDM_CCtx {
+  size_t isize;             /* Input size */
+  size_t maxOSize;          /* Maximum output size */
+
+  const BYTE *ibase;        /* Base of input */
+  const BYTE *ip;           /* Current input position */
+  const BYTE *iend;         /* End of input */
+
+  // Maximum input position such that hashing at the position does not exceed
+  // end of input.
+  const BYTE *ihashLimit;
+
+  // Maximum input position such that finding a match of at least the minimum
+  // match length does not exceed end of input.
+  const BYTE *imatchLimit;
+
+  const BYTE *obase;        /* Base of output */
+  BYTE *op;                 /* Output */
+
+  const BYTE *anchor;       /* Anchor to start of current (match) block */
+
+  LDM_compressStats stats;            /* Compression statistics */
+
+  LDM_hashTable *hashTable;
+
+  const BYTE *lastPosHashed;          /* Last position hashed */
+  U64 lastHash;
+
+  const BYTE *nextIp;                 // TODO: this is redundant (ip + step)
+  const BYTE *nextPosHashed;
+  U64 nextHash;
+
+  unsigned step;                      // ip step, should be 1.
+
+  const BYTE *lagIp;
+  U64 lagHash;
+};
+
+struct LDM_hashTable {
+  U32 numBuckets;          // The number of buckets.
+  U32 numEntries;          // numBuckets * HASH_BUCKET_SIZE.
+
+  LDM_hashEntry *entries;
+  BYTE *bucketOffsets;     // A pointer (per bucket) to the next insert position.
+};
+
+static void HASH_destroyTable(LDM_hashTable *table) {
+  free(table->entries);
+  free(table->bucketOffsets);
+  free(table);
+}
+
+/**
+ * Create a hash table that can contain size elements.
+ * The number of buckets is determined by size >> HASH_BUCKET_SIZE_LOG.
+ *
+ * Returns NULL if table creation failed.
+ */
+static LDM_hashTable *HASH_createTable(U32 size) {
+  LDM_hashTable *table = malloc(sizeof(LDM_hashTable));
+  if (!table) return NULL;
+
+  table->numBuckets = size >> HASH_BUCKET_SIZE_LOG;
+  table->numEntries = size;
+  table->entries = calloc(size, sizeof(LDM_hashEntry));
+  table->bucketOffsets = calloc(size >> HASH_BUCKET_SIZE_LOG, sizeof(BYTE));
+
+  if (!table->entries || !table->bucketOffsets) {
+    HASH_destroyTable(table);
+    return NULL;
+  }
+
+  return table;
+}
+
+static LDM_hashEntry *getBucket(const LDM_hashTable *table, const hash_t hash) {
+  return table->entries + (hash << HASH_BUCKET_SIZE_LOG);
+}
+
+static unsigned ZSTD_NbCommonBytes (register size_t val) {
+  if (MEM_isLittleEndian()) {
+    if (MEM_64bits()) {
+#    if defined(_MSC_VER) && defined(_WIN64)
+      unsigned long r = 0;
+      _BitScanForward64( &r, (U64)val );
+      return (unsigned)(r>>3);
+#     elif defined(__GNUC__) && (__GNUC__ >= 3)
+      return (__builtin_ctzll((U64)val) >> 3);
+#     else
+      static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2,
+                                               0, 3, 1, 3, 1, 4, 2, 7,
+                                               0, 2, 3, 6, 1, 5, 3, 5,
+                                               1, 3, 4, 4, 2, 5, 6, 7,
+                                               7, 0, 1, 2, 3, 3, 4, 6,
+                                               2, 6, 5, 5, 3, 4, 5, 6,
+                                               7, 1, 2, 4, 6, 4, 4, 5,
+                                               7, 2, 6, 5, 7, 6, 7, 7 };
+      return DeBruijnBytePos[
+          ((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
+#     endif
+  } else { /* 32 bits */
+#     if defined(_MSC_VER)
+      unsigned long r=0;
+      _BitScanForward( &r, (U32)val );
+      return (unsigned)(r>>3);
+#     elif defined(__GNUC__) && (__GNUC__ >= 3)
+      return (__builtin_ctz((U32)val) >> 3);
+#     else
+      static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0,
+                                               3, 2, 2, 1, 3, 2, 0, 1,
+                                               3, 3, 1, 2, 2, 2, 2, 0,
+                                               3, 1, 2, 0, 1, 0, 1, 1 };
+      return DeBruijnBytePos[
+          ((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
+#     endif
+    }
+  } else {  /* Big Endian CPU */
+    if (MEM_64bits()) {
+#     if defined(_MSC_VER) && defined(_WIN64)
+      unsigned long r = 0;
+      _BitScanReverse64( &r, val );
+      return (unsigned)(r>>3);
+#     elif defined(__GNUC__) && (__GNUC__ >= 3)
+      return (__builtin_clzll(val) >> 3);
+#     else
+      unsigned r;
+      /* calculate this way due to compiler complaining in 32-bits mode */
+      const unsigned n32 = sizeof(size_t)*4;
+      if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; }
+      if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
+      r += (!val);
+      return r;
+#       endif
+    } else { /* 32 bits */
+#     if defined(_MSC_VER)
+      unsigned long r = 0;
+      _BitScanReverse( &r, (unsigned long)val );
+      return (unsigned)(r>>3);
+#     elif defined(__GNUC__) && (__GNUC__ >= 3)
+      return (__builtin_clz((U32)val) >> 3);
+#     else
+      unsigned r;
+      if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
+      r += (!val);
+      return r;
+#     endif
+    }
+  }
+}
+
+// From lib/compress/zstd_compress.c
+static size_t ZSTD_count(const BYTE *pIn, const BYTE *pMatch,
+                         const BYTE *const pInLimit) {
+    const BYTE * const pStart = pIn;
+    const BYTE * const pInLoopLimit = pInLimit - (sizeof(size_t)-1);
+
+    while (pIn < pInLoopLimit) {
+        size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn);
+        if (!diff) {
+          pIn += sizeof(size_t);
+          pMatch += sizeof(size_t);
+          continue;
+        }
+        pIn += ZSTD_NbCommonBytes(diff);
+        return (size_t)(pIn - pStart);
+    }
+
+    if (MEM_64bits()) {
+      if ((pIn < (pInLimit - 3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) {
+        pIn += 4;
+        pMatch += 4;
+      }
+    }
+    if ((pIn < (pInLimit - 1)) && (MEM_read16(pMatch) == MEM_read16(pIn))) {
+      pIn += 2;
+      pMatch += 2;
+    }
+    if ((pIn < pInLimit) && (*pMatch == *pIn)) {
+      pIn++;
+    }
+    return (size_t)(pIn - pStart);
+}
+
+/**
+ * Count number of bytes that match backwards before pIn and pMatch.
+ *
+ * We count only bytes where pMatch > pBase and pIn > pAnchor.
+ */
+static size_t countBackwardsMatch(const BYTE *pIn, const BYTE *pAnchor,
+                                  const BYTE *pMatch, const BYTE *pBase) {
+  size_t matchLength = 0;
+  while (pIn > pAnchor && pMatch > pBase && pIn[-1] == pMatch[-1]) {
+    pIn--;
+    pMatch--;
+    matchLength++;
+  }
+  return matchLength;
+}
+
+/**
+ * Returns a pointer to the entry in the hash table matching the hash and
+ * checksum with the "longest match length" as defined below. The forward and
+ * backward match lengths are written to *pForwardMatchLength and
+ * *pBackwardMatchLength.
+ *
+ * The match length is defined based on cctx->ip and the entry's offset.
+ * The forward match is computed from cctx->ip and entry->offset + cctx->ibase.
+ * The backward match is computed backwards from cctx->ip and
+ * cctx->ibase only if the forward match is longer than LDM_MIN_MATCH_LENGTH.
+ */
+static LDM_hashEntry *HASH_getBestEntry(const LDM_CCtx *cctx,
+                                        const hash_t hash,
+                                        const U32 checksum,
+                                        U64 *pForwardMatchLength,
+                                        U64 *pBackwardMatchLength) {
+  LDM_hashTable *table = cctx->hashTable;
+  LDM_hashEntry *bucket = getBucket(table, hash);
+  LDM_hashEntry *cur;
+  LDM_hashEntry *bestEntry = NULL;
+  U64 bestMatchLength = 0;
+#if !(USE_CHECKSUM)
+  (void)checksum;
+#endif
+  for (cur = bucket; cur < bucket + HASH_BUCKET_SIZE; ++cur) {
+    const BYTE *pMatch = cur->offset + cctx->ibase;
+
+    // Check checksum for faster check.
+#if USE_CHECKSUM
+    if (cur->checksum == checksum &&
+        cctx->ip - pMatch <= LDM_WINDOW_SIZE) {
+#else
+    if (cctx->ip - pMatch <= LDM_WINDOW_SIZE) {
+#endif
+      U64 forwardMatchLength = ZSTD_count(cctx->ip, pMatch, cctx->iend);
+      U64 backwardMatchLength, totalMatchLength;
+
+      // Only take matches where the forward match length is large enough
+      // for speed.
+      if (forwardMatchLength < LDM_MIN_MATCH_LENGTH) {
+        continue;
+      }
+
+      backwardMatchLength =
+          countBackwardsMatch(cctx->ip, cctx->anchor,
+                              cur->offset + cctx->ibase,
+                              cctx->ibase);
+
+      totalMatchLength = forwardMatchLength + backwardMatchLength;
+
+      if (totalMatchLength >= bestMatchLength) {
+        bestMatchLength = totalMatchLength;
+        *pForwardMatchLength = forwardMatchLength;
+        *pBackwardMatchLength = backwardMatchLength;
+
+        bestEntry = cur;
+#ifdef ZSTD_SKIP
+        return cur;
+#endif
+      }
+    }
+  }
+  if (bestEntry != NULL) {
+    return bestEntry;
+  }
+  return NULL;
+}
+
+/**
+ * Insert an entry into the hash table. The table uses a "circular buffer",
+ * with the oldest entry overwritten.
+ */
+static void HASH_insert(LDM_hashTable *table,
+                        const hash_t hash, const LDM_hashEntry entry) {
+  *(getBucket(table, hash) + table->bucketOffsets[hash]) = entry;
+  table->bucketOffsets[hash]++;
+  table->bucketOffsets[hash] &= HASH_BUCKET_SIZE - 1;
+}
+
+static void HASH_outputTableOccupancy(const LDM_hashTable *table) {
+  U32 ctr = 0;
+  LDM_hashEntry *cur = table->entries;
+  LDM_hashEntry *end = table->entries + (table->numBuckets * HASH_BUCKET_SIZE);
+  for (; cur < end; ++cur) {
+    if (cur->offset == 0) {
+      ctr++;
+    }
+  }
+
+  // The number of buckets is repeated as a check for now.
+  printf("Num buckets, bucket size: %d (2^%d), %d\n",
+         table->numBuckets, NUM_HASH_BUCKETS_LOG, HASH_BUCKET_SIZE);
+  printf("Hash table size, empty slots, %% empty: %u, %u, %.3f\n",
+         table->numEntries, ctr,
+         100.0 * (double)(ctr) / table->numEntries);
+}
+
+// TODO: This can be done more efficiently, for example by using builtin
+// functions (but it is not that important as it is only used for computing
+// stats).
+static int intLog2(U64 x) {
+  int ret = 0;
+  while (x >>= 1) {
+    ret++;
+  }
+  return ret;
+}
+
+void LDM_printCompressStats(const LDM_compressStats *stats) {
+  printf("=====================\n");
+  printf("Compression statistics\n");
+  printf("Window size, hash table size (bytes): 2^%u, 2^%u\n",
+          stats->windowSizeLog, stats->hashTableSizeLog);
+  printf("num matches, total match length, %% matched: %u, %llu, %.3f\n",
+          stats->numMatches,
+          stats->totalMatchLength,
+          100.0 * (double)stats->totalMatchLength /
+              (double)(stats->totalMatchLength + stats->totalLiteralLength));
+  printf("avg match length: %.1f\n", ((double)stats->totalMatchLength) /
+                                         (double)stats->numMatches);
+  printf("avg literal length, total literalLength: %.1f, %llu\n",
+         ((double)stats->totalLiteralLength) / (double)stats->numMatches,
+         stats->totalLiteralLength);
+  printf("avg offset length: %.1f\n",
+         ((double)stats->totalOffset) / (double)stats->numMatches);
+  printf("min offset, max offset: %u, %u\n",
+         stats->minOffset, stats->maxOffset);
+
+  printf("\n");
+  printf("offset histogram | match length histogram\n");
+  printf("offset/ML, num matches, %% of matches | num matches, %% of matches\n");
+
+  {
+    int i;
+    int logMaxOffset = intLog2(stats->maxOffset);
+    for (i = 0; i <= logMaxOffset; i++) {
+      printf("2^%*d: %10u    %6.3f%% |2^%*d:  %10u    %6.3f \n",
+             2, i,
+             stats->offsetHistogram[i],
+             100.0 * (double) stats->offsetHistogram[i] /
+                     (double) stats->numMatches,
+             2, i,
+             stats->matchLengthHistogram[i],
+             100.0 * (double) stats->matchLengthHistogram[i] /
+                     (double) stats->numMatches);
+    }
+  }
+  printf("\n");
+  printf("=====================\n");
+}
+
+/**
+ * Return the upper (most significant) NUM_HASH_BUCKETS_LOG bits.
+ */
+static hash_t getSmallHash(U64 hash) {
+  return hash >> (64 - NUM_HASH_BUCKETS_LOG);
+}
+
+/**
+ * Return the 32 bits after the upper NUM_HASH_BUCKETS_LOG bits.
+ */
+static U32 getChecksum(U64 hash) {
+  return (hash >> (64 - 32 - NUM_HASH_BUCKETS_LOG)) & 0xFFFFFFFF;
+}
+
+#if INSERT_BY_TAG
+static U32 lowerBitsFromHfHash(U64 hash) {
+  // The number of bits used so far is NUM_HASH_BUCKETS_LOG + 32.
+  // So there are 32 - NUM_HASH_BUCKETS_LOG bits left.
+  // Occasional hashing requires HASH_ONLY_EVERY_LOG bits.
+  // So if 32 - LDMHASHLOG < HASH_ONLY_EVERY_LOG, just return lower bits
+  // allowing for reuse of bits.
+  if (32 - NUM_HASH_BUCKETS_LOG < HASH_ONLY_EVERY_LOG) {
+    return hash & HASH_ONLY_EVERY;
+  } else {
+    // Otherwise shift by
+    // (32 - NUM_HASH_BUCKETS_LOG - HASH_ONLY_EVERY_LOG) bits first.
+    return (hash >> (32 - NUM_HASH_BUCKETS_LOG - HASH_ONLY_EVERY_LOG)) &
+           HASH_ONLY_EVERY;
+  }
+}
+#endif
+
+/**
+ * Get a 64-bit hash using the first len bytes from buf.
+ *
+ * Giving bytes s = s_1, s_2, ... s_k, the hash is defined to be
+ * H(s) = s_1*(a^(k-1)) + s_2*(a^(k-2)) + ... + s_k*(a^0)
+ *
+ * where the constant a is defined to be prime8bytes.
+ *
+ * The implementation adds an offset to each byte, so
+ * H(s) = (s_1 + HASH_CHAR_OFFSET)*(a^(k-1)) + ...
+ */
+static U64 getHash(const BYTE *buf, U32 len) {
+  U64 ret = 0;
+  U32 i;
+  for (i = 0; i < len; i++) {
+    ret *= prime8bytes;
+    ret += buf[i] + HASH_CHAR_OFFSET;
+  }
+  return ret;
+
+}
+
+static U64 ipow(U64 base, U64 exp) {
+  U64 ret = 1;
+  while (exp) {
+    if (exp & 1) {
+      ret *= base;
+    }
+    exp >>= 1;
+    base *= base;
+  }
+  return ret;
+}
+
+static U64 updateHash(U64 hash, U32 len,
+                      BYTE toRemove, BYTE toAdd) {
+  // TODO: this relies on compiler optimization.
+  // The exponential can be calculated explicitly as len is constant.
+  hash -= ((toRemove + HASH_CHAR_OFFSET) *
+          ipow(prime8bytes, len - 1));
+  hash *= prime8bytes;
+  hash += toAdd + HASH_CHAR_OFFSET;
+  return hash;
+}
+
+/**
+ * Update cctx->nextHash and cctx->nextPosHashed
+ * based on cctx->lastHash and cctx->lastPosHashed.
+ *
+ * This uses a rolling hash and requires that the last position hashed
+ * corresponds to cctx->nextIp - step.
+ */
+static void setNextHash(LDM_CCtx *cctx) {
+  cctx->nextHash = updateHash(
+      cctx->lastHash, LDM_HASH_LENGTH,
+      cctx->lastPosHashed[0],
+      cctx->lastPosHashed[LDM_HASH_LENGTH]);
+  cctx->nextPosHashed = cctx->nextIp;
+
+#if LDM_LAG
+  if (cctx->ip - cctx->ibase > LDM_LAG) {
+    cctx->lagHash = updateHash(
+      cctx->lagHash, LDM_HASH_LENGTH,
+      cctx->lagIp[0], cctx->lagIp[LDM_HASH_LENGTH]);
+    cctx->lagIp++;
+  }
+#endif
+}
+
+static void putHashOfCurrentPositionFromHash(LDM_CCtx *cctx, U64 hash) {
+  // Hash only every HASH_ONLY_EVERY times, based on cctx->ip.
+  // Note: this works only when cctx->step is 1.
+#if LDM_LAG
+  if (cctx -> lagIp - cctx->ibase > 0) {
+#if INSERT_BY_TAG
+    U32 hashEveryMask = lowerBitsFromHfHash(cctx->lagHash);
+    if (hashEveryMask == HASH_ONLY_EVERY) {
+#else
+    if (((cctx->ip - cctx->ibase) & HASH_ONLY_EVERY) == HASH_ONLY_EVERY) {
+#endif
+      U32 smallHash = getSmallHash(cctx->lagHash);
+
+#   if USE_CHECKSUM
+      U32 checksum = getChecksum(cctx->lagHash);
+      const LDM_hashEntry entry = { cctx->lagIp - cctx->ibase, checksum };
+#   else
+      const LDM_hashEntry entry = { cctx->lagIp - cctx->ibase };
+#   endif
+
+      HASH_insert(cctx->hashTable, smallHash, entry);
+    }
+  } else {
+#endif // LDM_LAG
+#if INSERT_BY_TAG
+    U32 hashEveryMask = lowerBitsFromHfHash(hash);
+    if (hashEveryMask == HASH_ONLY_EVERY) {
+#else
+    if (((cctx->ip - cctx->ibase) & HASH_ONLY_EVERY) == HASH_ONLY_EVERY) {
+#endif
+      U32 smallHash = getSmallHash(hash);
+
+#if USE_CHECKSUM
+      U32 checksum = getChecksum(hash);
+      const LDM_hashEntry entry = { cctx->ip - cctx->ibase, checksum };
+#else
+      const LDM_hashEntry entry = { cctx->ip - cctx->ibase };
+#endif
+      HASH_insert(cctx->hashTable, smallHash, entry);
+    }
+#if LDM_LAG
+  }
+#endif
+
+  cctx->lastPosHashed = cctx->ip;
+  cctx->lastHash = hash;
+}
+
+/**
+ * Copy over the cctx->lastHash, and cctx->lastPosHashed
+ * fields from the "next" fields.
+ *
+ * This requires that cctx->ip == cctx->nextPosHashed.
+ */
+static void LDM_updateLastHashFromNextHash(LDM_CCtx *cctx) {
+  putHashOfCurrentPositionFromHash(cctx, cctx->nextHash);
+}
+
+/**
+ * Insert hash of the current position into the hash table.
+ */
+static void LDM_putHashOfCurrentPosition(LDM_CCtx *cctx) {
+  U64 hash = getHash(cctx->ip, LDM_HASH_LENGTH);
+
+  putHashOfCurrentPositionFromHash(cctx, hash);
+}
+
+size_t LDM_initializeCCtx(LDM_CCtx *cctx,
+                          const void *src, size_t srcSize,
+                          void *dst, size_t maxDstSize) {
+  cctx->isize = srcSize;
+  cctx->maxOSize = maxDstSize;
+
+  cctx->ibase = (const BYTE *)src;
+  cctx->ip = cctx->ibase;
+  cctx->iend = cctx->ibase + srcSize;
+
+  cctx->ihashLimit = cctx->iend - LDM_HASH_LENGTH;
+  cctx->imatchLimit = cctx->iend - LDM_MIN_MATCH_LENGTH;
+
+  cctx->obase = (BYTE *)dst;
+  cctx->op = (BYTE *)dst;
+
+  cctx->anchor = cctx->ibase;
+
+  memset(&(cctx->stats), 0, sizeof(cctx->stats));
+#if USE_CHECKSUM
+  cctx->hashTable = HASH_createTable(LDM_HASHTABLESIZE_U64);
+#else
+  cctx->hashTable = HASH_createTable(LDM_HASHTABLESIZE_U32);
+#endif
+
+  if (!cctx->hashTable) return 1;
+
+  cctx->stats.minOffset = UINT_MAX;
+  cctx->stats.windowSizeLog = LDM_WINDOW_SIZE_LOG;
+  cctx->stats.hashTableSizeLog = LDM_MEMORY_USAGE;
+
+  cctx->lastPosHashed = NULL;
+
+  cctx->step = 1;   // Fixed to be 1 for now. Changing may break things.
+  cctx->nextIp = cctx->ip + cctx->step;
+  cctx->nextPosHashed = 0;
+
+  return 0;
+}
+
+void LDM_destroyCCtx(LDM_CCtx *cctx) {
+  HASH_destroyTable(cctx->hashTable);
+}
+
+/**
+ * Finds the "best" match.
+ *
+ * Returns 0 if successful and 1 otherwise (i.e. no match can be found
+ * in the remaining input that is long enough).
+ *
+ * forwardMatchLength contains the forward length of the match.
+ */
+static int LDM_findBestMatch(LDM_CCtx *cctx, const BYTE **match,
+                             U64 *forwardMatchLength, U64 *backwardMatchLength) {
+
+  LDM_hashEntry *entry = NULL;
+  cctx->nextIp = cctx->ip + cctx->step;
+
+  while (entry == NULL) {
+    U64 hash;
+    hash_t smallHash;
+    U32 checksum;
+#if INSERT_BY_TAG
+    U32 hashEveryMask;
+#endif
+    setNextHash(cctx);
+
+    hash = cctx->nextHash;
+    smallHash = getSmallHash(hash);
+    checksum = getChecksum(hash);
+#if INSERT_BY_TAG
+    hashEveryMask = lowerBitsFromHfHash(hash);
+#endif
+
+    cctx->ip = cctx->nextIp;
+    cctx->nextIp += cctx->step;
+
+    if (cctx->ip > cctx->imatchLimit) {
+      return 1;
+    }
+#if INSERT_BY_TAG
+    if (hashEveryMask == HASH_ONLY_EVERY) {
+
+      entry = HASH_getBestEntry(cctx, smallHash, checksum,
+                                forwardMatchLength, backwardMatchLength);
+    }
+#else
+    entry = HASH_getBestEntry(cctx, smallHash, checksum,
+                              forwardMatchLength, backwardMatchLength);
+#endif
+
+    if (entry != NULL) {
+      *match = entry->offset + cctx->ibase;
+    }
+
+    putHashOfCurrentPositionFromHash(cctx, hash);
+
+  }
+  setNextHash(cctx);
+  return 0;
+}
+
+void LDM_encodeLiteralLengthAndLiterals(
+    LDM_CCtx *cctx, BYTE *pToken, const U64 literalLength) {
+  /* Encode the literal length. */
+  if (literalLength >= RUN_MASK) {
+    U64 len = (U64)literalLength - RUN_MASK;
+    *pToken = (RUN_MASK << ML_BITS);
+    for (; len >= 255; len -= 255) {
+      *(cctx->op)++ = 255;
+    }
+    *(cctx->op)++ = (BYTE)len;
+  } else {
+    *pToken = (BYTE)(literalLength << ML_BITS);
+  }
+
+  /* Encode the literals. */
+  memcpy(cctx->op, cctx->anchor, literalLength);
+  cctx->op += literalLength;
+}
+
+void LDM_outputBlock(LDM_CCtx *cctx,
+                     const U64 literalLength,
+                     const U32 offset,
+                     const U64 matchLength) {
+  BYTE *pToken = cctx->op++;
+
+  /* Encode the literal length and literals. */
+  LDM_encodeLiteralLengthAndLiterals(cctx, pToken, literalLength);
+
+  /* Encode the offset. */
+  MEM_write32(cctx->op, offset);
+  cctx->op += LDM_OFFSET_SIZE;
+
+  /* Encode the match length. */
+  if (matchLength >= ML_MASK) {
+    U64 matchLengthRemaining = matchLength;
+    *pToken += ML_MASK;
+    matchLengthRemaining -= ML_MASK;
+    MEM_write32(cctx->op, 0xFFFFFFFF);
+    while (matchLengthRemaining >= 4*0xFF) {
+      cctx->op += 4;
+      MEM_write32(cctx->op, 0xffffffff);
+      matchLengthRemaining -= 4*0xFF;
+    }
+    cctx->op += matchLengthRemaining / 255;
+    *(cctx->op)++ = (BYTE)(matchLengthRemaining % 255);
+  } else {
+    *pToken += (BYTE)(matchLength);
+  }
+}
+
+// TODO: maxDstSize is unused. This function may seg fault when writing
+// beyond the size of dst, as it does not check maxDstSize. Writing to
+// a buffer and performing checks is a possible solution.
+//
+// This is based upon lz4.
+size_t LDM_compress(const void *src, size_t srcSize,
+                    void *dst, size_t maxDstSize) {
+  LDM_CCtx cctx;
+  const BYTE *match = NULL;
+  U64 forwardMatchLength = 0;
+  U64 backwardsMatchLength = 0;
+
+  if (LDM_initializeCCtx(&cctx, src, srcSize, dst, maxDstSize)) {
+    // Initialization failed.
+    return 0;
+  }
+
+#ifdef OUTPUT_CONFIGURATION
+  LDM_outputConfiguration();
+#endif
+
+  /* Hash the first position and put it into the hash table. */
+  LDM_putHashOfCurrentPosition(&cctx);
+
+  cctx.lagIp = cctx.ip;
+  cctx.lagHash = cctx.lastHash;
+
+  /**
+   * Find a match.
+   * If no more matches can be found (i.e. the length of the remaining input
+   * is less than the minimum match length), then stop searching for matches
+   * and encode the final literals.
+   */
+  while (!LDM_findBestMatch(&cctx, &match, &forwardMatchLength,
+                           &backwardsMatchLength)) {
+
+#ifdef COMPUTE_STATS
+    cctx.stats.numMatches++;
+#endif
+
+     cctx.ip -= backwardsMatchLength;
+     match -= backwardsMatchLength;
+
+    /**
+     * Write current block (literals, literal length, match offset, match
+     * length) and update pointers and hashes.
+     */
+    {
+      const U64 literalLength = cctx.ip - cctx.anchor;
+      const U32 offset = cctx.ip - match;
+      const U64 matchLength = forwardMatchLength +
+                              backwardsMatchLength -
+                              LDM_MIN_MATCH_LENGTH;
+
+      LDM_outputBlock(&cctx, literalLength, offset, matchLength);
+
+#ifdef COMPUTE_STATS
+      cctx.stats.totalLiteralLength += literalLength;
+      cctx.stats.totalOffset += offset;
+      cctx.stats.totalMatchLength += matchLength + LDM_MIN_MATCH_LENGTH;
+      cctx.stats.minOffset =
+          offset < cctx.stats.minOffset ? offset : cctx.stats.minOffset;
+      cctx.stats.maxOffset =
+          offset > cctx.stats.maxOffset ? offset : cctx.stats.maxOffset;
+      cctx.stats.offsetHistogram[(U32)intLog2(offset)]++;
+      cctx.stats.matchLengthHistogram[
+          (U32)intLog2(matchLength + LDM_MIN_MATCH_LENGTH)]++;
+#endif
+
+      // Move ip to end of block, inserting hashes at each position.
+      cctx.nextIp = cctx.ip + cctx.step;
+      while (cctx.ip < cctx.anchor + LDM_MIN_MATCH_LENGTH +
+                       matchLength + literalLength) {
+        if (cctx.ip > cctx.lastPosHashed) {
+          // TODO: Simplify.
+          LDM_updateLastHashFromNextHash(&cctx);
+          setNextHash(&cctx);
+        }
+        cctx.ip++;
+        cctx.nextIp++;
+      }
+    }
+
+    // Set start of next block to current input pointer.
+    cctx.anchor = cctx.ip;
+    LDM_updateLastHashFromNextHash(&cctx);
+  }
+
+  /* Encode the last literals (no more matches). */
+  {
+    const U64 lastRun = cctx.iend - cctx.anchor;
+    BYTE *pToken = cctx.op++;
+    LDM_encodeLiteralLengthAndLiterals(&cctx, pToken, lastRun);
+  }
+
+#ifdef COMPUTE_STATS
+  LDM_printCompressStats(&cctx.stats);
+  HASH_outputTableOccupancy(cctx.hashTable);
+#endif
+
+  {
+    const size_t ret = cctx.op - cctx.obase;
+    LDM_destroyCCtx(&cctx);
+    return ret;
+  }
+}
+
+void LDM_outputConfiguration(void) {
+  printf("=====================\n");
+  printf("Configuration\n");
+  printf("LDM_WINDOW_SIZE_LOG: %d\n", LDM_WINDOW_SIZE_LOG);
+  printf("LDM_MIN_MATCH_LENGTH, LDM_HASH_LENGTH: %d, %d\n",
+         LDM_MIN_MATCH_LENGTH, LDM_HASH_LENGTH);
+  printf("LDM_MEMORY_USAGE: %d\n", LDM_MEMORY_USAGE);
+  printf("HASH_ONLY_EVERY_LOG: %d\n", HASH_ONLY_EVERY_LOG);
+  printf("HASH_BUCKET_SIZE_LOG: %d\n", HASH_BUCKET_SIZE_LOG);
+  printf("LDM_LAG: %d\n", LDM_LAG);
+  printf("USE_CHECKSUM: %d\n", USE_CHECKSUM);
+  printf("INSERT_BY_TAG: %d\n", INSERT_BY_TAG);
+  printf("HASH_CHAR_OFFSET: %d\n", HASH_CHAR_OFFSET);
+  printf("=====================\n");
+}
+
--- a/contrib/long_distance_matching/ldm.h
+++ b/contrib/long_distance_matching/ldm.h
@ -0,0 +1,197 @@
+#ifndef LDM_H
+#define LDM_H
+
+#include "mem.h"    // from /lib/common/mem.h
+
+//#include "ldm_params.h"
+
+// =============================================================================
+// Modify the parameters in ldm_params.h if "ldm_params.h" is included.
+// Otherwise, modify the parameters here.
+// =============================================================================
+
+#ifndef LDM_PARAMS_H
+  // Defines the size of the hash table.
+  // Note that this is not the number of buckets.
+  // Currently this should be less than WINDOW_SIZE_LOG + 4.
+  #define LDM_MEMORY_USAGE 23
+
+  // The number of entries in a hash bucket.
+  #define HASH_BUCKET_SIZE_LOG 3 // The maximum is 4 for now.
+
+  // Defines the lag in inserting elements into the hash table.
+  #define LDM_LAG 0
+
+  // The maximum window size when searching for matches.
+  // The maximum value is 30
+  #define LDM_WINDOW_SIZE_LOG 28
+
+  // The minimum match length.
+  // This should be a multiple of four.
+  #define LDM_MIN_MATCH_LENGTH 64
+
+  // If INSERT_BY_TAG, insert entries into the hash table as a function of the
+  // hash. Certain hashes will not be inserted.
+  //
+  // Otherwise, insert as a function of the position.
+  #define INSERT_BY_TAG 1
+
+  // Store a checksum with the hash table entries for faster comparison.
+  // This halves the number of entries the hash table can contain.
+  #define USE_CHECKSUM 1
+#endif
+
+// Output compression statistics.
+#define COMPUTE_STATS
+
+// Output the configuration.
+#define OUTPUT_CONFIGURATION
+
+// If defined, forces the probability of insertion to be approximately
+// one per (1 << HASH_ONLY_EVERY_LOG). If not defined, the probability will be
+// calculated based on the memory usage and window size for "even" insertion
+// throughout the window.
+
+// #define HASH_ONLY_EVERY_LOG 8
+
+// =============================================================================
+
+// The number of bytes storing the compressed and decompressed size
+// in the header.
+#define LDM_COMPRESSED_SIZE 8
+#define LDM_DECOMPRESSED_SIZE 8
+#define LDM_HEADER_SIZE ((LDM_COMPRESSED_SIZE)+(LDM_DECOMPRESSED_SIZE))
+
+#define ML_BITS 4
+#define ML_MASK ((1U<<ML_BITS)-1)
+#define RUN_BITS (8-ML_BITS)
+#define RUN_MASK ((1U<<RUN_BITS)-1)
+
+// The number of bytes storing the offset.
+#define LDM_OFFSET_SIZE 4
+
+#define LDM_WINDOW_SIZE (1 << (LDM_WINDOW_SIZE_LOG))
+
+// TODO: Match lengths that are too small do not use the hash table efficiently.
+// There should be a minimum hash length given the hash table size.
+#define LDM_HASH_LENGTH LDM_MIN_MATCH_LENGTH
+
+typedef struct LDM_compressStats LDM_compressStats;
+typedef struct LDM_CCtx LDM_CCtx;
+typedef struct LDM_DCtx LDM_DCtx;
+
+/**
+ *  Compresses src into dst.
+ *  Returns the compressed size if successful, 0 otherwise.
+ *
+ *  NB: This currently ignores maxDstSize and assumes enough space is available.
+ *
+ *  Block format (see lz4 documentation for more information):
+ *  github.com/lz4/lz4/blob/dev/doc/lz4_Block_format.md
+ *
+ *  A block is composed of sequences. Each sequence begins with a token, which
+ *  is a one-byte value separated into two 4-bit fields.
+ *
+ *  The first field uses the four high bits of the token and encodes the literal
+ *  length. If the field value is 0, there is no literal. If it is 15,
+ *  additional bytes are added (each ranging from 0 to 255) to the previous
+ *  value to produce a total length.
+ *
+ *  Following the token and optional length bytes are the literals.
+ *
+ *  Next are the 4 bytes representing the offset of the match (2 in lz4),
+ *  representing the position to copy the literals.
+ *
+ *  The lower four bits of the token encode the match length. With additional
+ *  bytes added similarly to the additional literal length bytes after the offset.
+ *
+ *  The last sequence is incomplete and stops right after the literals.
+ */
+size_t LDM_compress(const void *src, size_t srcSize,
+                    void *dst, size_t maxDstSize);
+
+/**
+ * Initialize the compression context.
+ *
+ * Allocates memory for the hash table.
+ *
+ * Returns 0 if successful, 1 otherwise.
+ */
+size_t LDM_initializeCCtx(LDM_CCtx *cctx,
+                          const void *src, size_t srcSize,
+                          void *dst, size_t maxDstSize);
+
+/**
+ * Frees up memory allocated in LDM_initializeCCtx().
+ */
+void LDM_destroyCCtx(LDM_CCtx *cctx);
+
+/**
+ * Prints the distribution of offsets in the hash table.
+ *
+ * The offsets are defined as the distance of the hash table entry from the
+ * current input position of the cctx.
+ */
+void LDM_outputHashTableOffsetHistogram(const LDM_CCtx *cctx);
+
+/**
+ * Outputs compression statistics to stdout.
+ */
+void LDM_printCompressStats(const LDM_compressStats *stats);
+
+/**
+ * Encode the literal length followed by the literals.
+ *
+ * The literal length is written to the upper four bits of pToken, with
+ * additional bytes written to the output as needed (see lz4).
+ *
+ * This is followed by literalLength bytes corresponding to the literals.
+ */
+void LDM_encodeLiteralLengthAndLiterals(LDM_CCtx *cctx, BYTE *pToken,
+                                        const U64 literalLength);
+
+/**
+ * Write current block (literals, literal length, match offset,
+ * match length).
+ */
+void LDM_outputBlock(LDM_CCtx *cctx,
+                     const U64 literalLength,
+                     const U32 offset,
+                     const U64 matchLength);
+
+/**
+ * Decompresses src into dst.
+ *
+ * Note: assumes src does not have a header.
+ */
+size_t LDM_decompress(const void *src, size_t srcSize,
+                      void *dst, size_t maxDstSize);
+
+/**
+ * Initialize the decompression context.
+ */
+void LDM_initializeDCtx(LDM_DCtx *dctx,
+                        const void *src, size_t compressedSize,
+                        void *dst, size_t maxDecompressedSize);
+
+/**
+ * Reads the header from src and writes the compressed size and
+ * decompressed size into compressedSize and decompressedSize respectively.
+ *
+ * NB: LDM_compress and LDM_decompress currently do not add/read headers.
+ */
+void LDM_readHeader(const void *src, U64 *compressedSize,
+                    U64 *decompressedSize);
+
+/**
+ * Write the compressed and decompressed size.
+ */
+void LDM_writeHeader(void *memPtr, U64 compressedSize,
+                     U64 decompressedSize);
+
+/**
+ * Output the configuration used.
+ */
+void LDM_outputConfiguration(void);
+
+#endif /* LDM_H */
--- a/contrib/long_distance_matching/ldm_common.c
+++ b/contrib/long_distance_matching/ldm_common.c
@ -0,0 +1,109 @@
+#include <stdio.h>
+
+#include "ldm.h"
+
+/**
+ * This function reads the header at the beginning of src and writes
+ * the compressed and decompressed size to compressedSize and
+ * decompressedSize.
+ *
+ * The header consists of 16 bytes: 8 bytes each in little-endian format
+ * of the compressed size and the decompressed size.
+ */
+void LDM_readHeader(const void *src, U64 *compressedSize,
+                    U64 *decompressedSize) {
+  const BYTE *ip = (const BYTE *)src;
+  *compressedSize = MEM_readLE64(ip);
+  *decompressedSize = MEM_readLE64(ip + 8);
+}
+
+/**
+ * Writes the 16-byte header (8-bytes each of the compressedSize and
+ * decompressedSize in little-endian format) to memPtr.
+ */
+void LDM_writeHeader(void *memPtr, U64 compressedSize,
+                     U64 decompressedSize) {
+  MEM_writeLE64(memPtr, compressedSize);
+  MEM_writeLE64((BYTE *)memPtr + 8, decompressedSize);
+}
+
+struct LDM_DCtx {
+  size_t compressedSize;
+  size_t maxDecompressedSize;
+
+  const BYTE *ibase;   /* Base of input */
+  const BYTE *ip;      /* Current input position */
+  const BYTE *iend;    /* End of source */
+
+  const BYTE *obase;   /* Base of output */
+  BYTE *op;            /* Current output position */
+  const BYTE *oend;    /* End of output */
+};
+
+void LDM_initializeDCtx(LDM_DCtx *dctx,
+                        const void *src, size_t compressedSize,
+                        void *dst, size_t maxDecompressedSize) {
+  dctx->compressedSize = compressedSize;
+  dctx->maxDecompressedSize = maxDecompressedSize;
+
+  dctx->ibase = src;
+  dctx->ip = (const BYTE *)src;
+  dctx->iend = dctx->ip + dctx->compressedSize;
+  dctx->op = dst;
+  dctx->oend = dctx->op + dctx->maxDecompressedSize;
+}
+
+size_t LDM_decompress(const void *src, size_t compressedSize,
+                      void *dst, size_t maxDecompressedSize) {
+
+  LDM_DCtx dctx;
+  LDM_initializeDCtx(&dctx, src, compressedSize, dst, maxDecompressedSize);
+
+  while (dctx.ip < dctx.iend) {
+    BYTE *cpy;
+    const BYTE *match;
+    size_t length, offset;
+
+    /* Get the literal length. */
+    const unsigned token = *(dctx.ip)++;
+    if ((length = (token >> ML_BITS)) == RUN_MASK) {
+      unsigned s;
+      do {
+        s = *(dctx.ip)++;
+        length += s;
+      } while (s == 255);
+    }
+
+    /* Copy the literals. */
+    cpy = dctx.op + length;
+    memcpy(dctx.op, dctx.ip, length);
+    dctx.ip += length;
+    dctx.op = cpy;
+
+    //TODO: dynamic offset size?
+    /* Encode the offset. */
+    offset = MEM_read32(dctx.ip);
+    dctx.ip += LDM_OFFSET_SIZE;
+    match = dctx.op - offset;
+
+    /* Get the match length. */
+    length = token & ML_MASK;
+    if (length == ML_MASK) {
+      unsigned s;
+      do {
+        s = *(dctx.ip)++;
+        length += s;
+      } while (s == 255);
+    }
+    length += LDM_MIN_MATCH_LENGTH;
+
+    /* Copy match. */
+    cpy = dctx.op + length;
+
+    // TODO: this can be made more efficient.
+    while (match < cpy - offset && dctx.op < dctx.oend) {
+      *(dctx.op)++ = *match++;
+    }
+  }
+  return dctx.op - (BYTE *)dst;
+}
--- a/contrib/long_distance_matching/ldm_params.h
+++ b/contrib/long_distance_matching/ldm_params.h
@ -0,0 +1,12 @@
+#ifndef LDM_PARAMS_H
+#define LDM_PARAMS_H
+
+#define LDM_MEMORY_USAGE 23
+#define HASH_BUCKET_SIZE_LOG 3
+#define LDM_LAG 0
+#define LDM_WINDOW_SIZE_LOG 28
+#define LDM_MIN_MATCH_LENGTH 64
+#define INSERT_BY_TAG 1
+#define USE_CHECKSUM 1
+
+#endif  // LDM_PARAMS_H
--- a/contrib/long_distance_matching/main.c
+++ b/contrib/long_distance_matching/main.c
@ -0,0 +1,269 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <zstd.h>
+
+#include <fcntl.h>
+#include "ldm.h"
+#include "zstd.h"
+
+// #define DECOMPRESS_AND_VERIFY
+
+/* Compress file given by fname and output to oname.
+ * Returns 0 if successful, error code otherwise.
+ *
+ * This adds a header from LDM_writeHeader to the beginning of the output.
+ *
+ * This might seg fault if the compressed size is > the decompress
+ * size due to the mmapping and output file size allocated to be the input size
+ * The compress function should check before writing or buffer writes.
+ */
+static int compress(const char *fname, const char *oname) {
+  int fdin, fdout;
+  struct stat statbuf;
+  char *src, *dst;
+  size_t maxCompressedSize, compressedSize;
+
+  struct timeval tv1, tv2;
+  double timeTaken;
+
+
+  /* Open the input file. */
+  if ((fdin = open(fname, O_RDONLY)) < 0) {
+    perror("Error in file opening");
+    return 1;
+  }
+
+  /* Open the output file. */
+  if ((fdout = open(oname, O_RDWR | O_CREAT | O_TRUNC, (mode_t)0600)) < 0) {
+    perror("Can't create output file");
+    return 1;
+  }
+
+  /* Find the size of the input file. */
+  if (fstat (fdin, &statbuf) < 0) {
+    perror("Fstat error");
+    return 1;
+  }
+
+  maxCompressedSize = (statbuf.st_size + LDM_HEADER_SIZE);
+
+  // Handle case where compressed size is > decompressed size.
+  // TODO: The compress function should check before writing or buffer writes.
+  maxCompressedSize += statbuf.st_size / 255;
+
+  ftruncate(fdout, maxCompressedSize);
+
+  /* mmap the input file. */
+  if ((src = mmap(0, statbuf.st_size, PROT_READ,  MAP_SHARED, fdin, 0))
+          == (caddr_t) - 1) {
+      perror("mmap error for input");
+      return 1;
+  }
+
+  /* mmap the output file. */
+  if ((dst = mmap(0, maxCompressedSize, PROT_READ | PROT_WRITE,
+                  MAP_SHARED, fdout, 0)) == (caddr_t) - 1) {
+      perror("mmap error for output");
+      return 1;
+  }
+
+  gettimeofday(&tv1, NULL);
+
+  compressedSize = LDM_HEADER_SIZE +
+      LDM_compress(src, statbuf.st_size,
+                   dst + LDM_HEADER_SIZE, maxCompressedSize);
+
+  gettimeofday(&tv2, NULL);
+
+  // Write the header.
+  LDM_writeHeader(dst, compressedSize, statbuf.st_size);
+
+  // Truncate file to compressedSize.
+  ftruncate(fdout, compressedSize);
+
+  printf("%25s : %10lu -> %10lu - %s \n", fname,
+         (size_t)statbuf.st_size, (size_t)compressedSize, oname);
+  printf("Compression ratio: %.2fx --- %.1f%%\n",
+         (double)statbuf.st_size / (double)compressedSize,
+         (double)compressedSize / (double)(statbuf.st_size) * 100.0);
+
+  timeTaken = (double) (tv2.tv_usec - tv1.tv_usec) / 1000000 +
+              (double) (tv2.tv_sec - tv1.tv_sec),
+
+  printf("Total compress time = %.3f seconds, Average scanning speed: %.3f MB/s\n",
+         timeTaken,
+         ((double)statbuf.st_size / (double) (1 << 20)) / timeTaken);
+
+  // Close files.
+  close(fdin);
+  close(fdout);
+  return 0;
+}
+
+#ifdef DECOMPRESS_AND_VERIFY
+/* Decompress file compressed using LDM_compress.
+ * The input file should have the LDM_HEADER followed by payload.
+ * Returns 0 if succesful, and an error code otherwise.
+ */
+static int decompress(const char *fname, const char *oname) {
+  int fdin, fdout;
+  struct stat statbuf;
+  char *src, *dst;
+  U64 compressedSize, decompressedSize;
+  size_t outSize;
+
+  /* Open the input file. */
+  if ((fdin = open(fname, O_RDONLY)) < 0) {
+    perror("Error in file opening");
+    return 1;
+  }
+
+  /* Open the output file. */
+  if ((fdout = open(oname, O_RDWR | O_CREAT | O_TRUNC, (mode_t)0600)) < 0) {
+    perror("Can't create output file");
+    return 1;
+  }
+
+  /* Find the size of the input file. */
+  if (fstat (fdin, &statbuf) < 0) {
+    perror("Fstat error");
+    return 1;
+  }
+
+  /* mmap the input file. */
+  if ((src = mmap(0, statbuf.st_size, PROT_READ,  MAP_SHARED, fdin, 0))
+          == (caddr_t) - 1) {
+      perror("mmap error for input");
+      return 1;
+  }
+
+  /* Read the header. */
+  LDM_readHeader(src, &compressedSize, &decompressedSize);
+
+  ftruncate(fdout, decompressedSize);
+
+  /* mmap the output file */
+  if ((dst = mmap(0, decompressedSize, PROT_READ | PROT_WRITE,
+                  MAP_SHARED, fdout, 0)) == (caddr_t) - 1) {
+      perror("mmap error for output");
+      return 1;
+  }
+
+  outSize = LDM_decompress(
+      src + LDM_HEADER_SIZE, statbuf.st_size - LDM_HEADER_SIZE,
+      dst, decompressedSize);
+  printf("Ret size out: %zu\n", outSize);
+
+  close(fdin);
+  close(fdout);
+  return 0;
+}
+
+/* Compare two files.
+ * Returns 0 iff they are the same.
+ */
+static int compare(FILE *fp0, FILE *fp1) {
+  int result = 0;
+  while (result == 0) {
+    char b0[1024];
+    char b1[1024];
+    const size_t r0 = fread(b0, 1, sizeof(b0), fp0);
+    const size_t r1 = fread(b1, 1, sizeof(b1), fp1);
+
+    result = (int)r0 - (int)r1;
+
+    if (0 == r0 || 0 == r1) break;
+
+    if (0 == result) result = memcmp(b0, b1, r0);
+  }
+  return result;
+}
+
+/* Verify the input file is the same as the decompressed file. */
+static int verify(const char *inpFilename, const char *decFilename) {
+  FILE *inpFp, *decFp;
+
+  if ((inpFp = fopen(inpFilename, "rb")) == NULL) {
+    perror("Could not open input file\n");
+    return 1;
+  }
+
+  if ((decFp = fopen(decFilename, "rb")) == NULL) {
+    perror("Could not open decompressed file\n");
+    return 1;
+  }
+
+  printf("verify : %s <-> %s\n", inpFilename, decFilename);
+  {
+    const int cmp = compare(inpFp, decFp);
+    if(0 == cmp) {
+      printf("verify : OK\n");
+    } else {
+      printf("verify : NG\n");
+      return 1;
+    }
+  }
+
+	fclose(decFp);
+	fclose(inpFp);
+  return 0;
+}
+#endif
+
+int main(int argc, const char *argv[]) {
+  const char * const exeName = argv[0];
+  char inpFilename[256] = { 0 };
+  char ldmFilename[256] = { 0 };
+  char decFilename[256] = { 0 };
+
+  if (argc < 2) {
+    printf("Wrong arguments\n");
+    printf("Usage:\n");
+    printf("%s FILE\n", exeName);
+    return 1;
+  }
+
+  snprintf(inpFilename, 256, "%s", argv[1]);
+  snprintf(ldmFilename, 256, "%s.ldm", argv[1]);
+  snprintf(decFilename, 256, "%s.ldm.dec", argv[1]);
+
+ 	printf("inp = [%s]\n", inpFilename);
+	printf("ldm = [%s]\n", ldmFilename);
+	printf("dec = [%s]\n", decFilename);
+
+  /* Compress */
+  {
+    if (compress(inpFilename, ldmFilename)) {
+        printf("Compress error\n");
+        return 1;
+    }
+  }
+
+#ifdef DECOMPRESS_AND_VERIFY
+  /* Decompress */
+  {
+    struct timeval tv1, tv2;
+    gettimeofday(&tv1, NULL);
+    if (decompress(ldmFilename, decFilename)) {
+        printf("Decompress error\n");
+        return 1;
+    }
+    gettimeofday(&tv2, NULL);
+    printf("Total decompress time = %f seconds\n",
+          (double) (tv2.tv_usec - tv1.tv_usec) / 1000000 +
+          (double) (tv2.tv_sec - tv1.tv_sec));
+  }
+  /* verify */
+  if (verify(inpFilename, decFilename)) {
+    printf("Verification error\n");
+    return 1;
+  }
+#endif
+  return 0;
+}
--- a/lib/common/pool.c
+++ b/lib/common/pool.c
@ -39,6 +39,12 @@ struct POOL_ctx_s {
    size_t queueHead;
    size_t queueTail;
    size_t queueSize;
+
+    /* The number of threads working on jobs */
+    size_t numThreadsBusy;
+    /* Indicates if the queue is empty */
+    int queueEmpty;
+
    /* The mutex protects the queue */
    pthread_mutex_t queueMutex;
    /* Condition variable for pushers to wait on when the queue is full */
@ -60,21 +66,37 @@ static void* POOL_thread(void* opaque) {
    for (;;) {
        /* Lock the mutex and wait for a non-empty queue or until shutdown */
        pthread_mutex_lock(&ctx->queueMutex);
-        while (ctx->queueHead == ctx->queueTail && !ctx->shutdown) {
+
+        while (ctx->queueEmpty && !ctx->shutdown) {
            pthread_cond_wait(&ctx->queuePopCond, &ctx->queueMutex);
        }
        /* empty => shutting down: so stop */
-        if (ctx->queueHead == ctx->queueTail) {
+        if (ctx->queueEmpty) {
            pthread_mutex_unlock(&ctx->queueMutex);
            return opaque;
        }
        /* Pop a job off the queue */
-        {   POOL_job const job = ctx->queue[ctx->queueHead];
+        {
+            POOL_job const job = ctx->queue[ctx->queueHead];
            ctx->queueHead = (ctx->queueHead + 1) % ctx->queueSize;
+            ctx->numThreadsBusy++;
+            ctx->queueEmpty = ctx->queueHead == ctx->queueTail;
            /* Unlock the mutex, signal a pusher, and run the job */
            pthread_mutex_unlock(&ctx->queueMutex);
-            pthread_cond_signal(&ctx->queuePushCond);
+
+            if (ctx->queueSize > 1) {
+                pthread_cond_signal(&ctx->queuePushCond);
+            }
+
            job.function(job.opaque);
+
+            /* If the intended queue size was 0, signal after finishing job */
+            if (ctx->queueSize == 1) {
+                pthread_mutex_lock(&ctx->queueMutex);
+                ctx->numThreadsBusy--;
+                pthread_mutex_unlock(&ctx->queueMutex);
+                pthread_cond_signal(&ctx->queuePushCond);
+            }
        }
    }
    /* Unreachable */
@ -83,7 +105,7 @@ static void* POOL_thread(void* opaque) {
 POOL_ctx *POOL_create(size_t numThreads, size_t queueSize) {
    POOL_ctx *ctx;
    /* Check the parameters */
-    if (!numThreads || !queueSize) { return NULL; }
+    if (!numThreads) { return NULL; }
    /* Allocate the context and zero initialize */
    ctx = (POOL_ctx *)calloc(1, sizeof(POOL_ctx));
    if (!ctx) { return NULL; }
@ -95,6 +117,8 @@ POOL_ctx *POOL_create(size_t numThreads, size_t queueSize) {
    ctx->queue = (POOL_job*) malloc(ctx->queueSize * sizeof(POOL_job));
    ctx->queueHead = 0;
    ctx->queueTail = 0;
+    ctx->numThreadsBusy = 0;
+    ctx->queueEmpty = 1;
    (void)pthread_mutex_init(&ctx->queueMutex, NULL);
    (void)pthread_cond_init(&ctx->queuePushCond, NULL);
    (void)pthread_cond_init(&ctx->queuePopCond, NULL);
@ -153,22 +177,37 @@ size_t POOL_sizeof(POOL_ctx *ctx) {
        + ctx->numThreads * sizeof(pthread_t);
 }

+/**
+ * Returns 1 if the queue is full and 0 otherwise.
+ *
+ * If the queueSize is 1 (the pool was created with an intended queueSize of 0),
+ * then a queue is empty if there is a thread free and no job is waiting.
+ */
+static int isQueueFull(POOL_ctx const* ctx) {
+    if (ctx->queueSize > 1) {
+        return ctx->queueHead == ((ctx->queueTail + 1) % ctx->queueSize);
+    } else {
+        return ctx->numThreadsBusy == ctx->numThreads ||
+               !ctx->queueEmpty;
+    }
+}
+
 void POOL_add(void* ctxVoid, POOL_function function, void *opaque) {
    POOL_ctx* const ctx = (POOL_ctx*)ctxVoid;
    if (!ctx) { return; }

    pthread_mutex_lock(&ctx->queueMutex);
    {   POOL_job const job = {function, opaque};
+
        /* Wait until there is space in the queue for the new job */
-        size_t newTail = (ctx->queueTail + 1) % ctx->queueSize;
-        while (ctx->queueHead == newTail && !ctx->shutdown) {
+        while (isQueueFull(ctx) && !ctx->shutdown) {
          pthread_cond_wait(&ctx->queuePushCond, &ctx->queueMutex);
-          newTail = (ctx->queueTail + 1) % ctx->queueSize;
        }
        /* The queue is still going => there is space */
        if (!ctx->shutdown) {
+            ctx->queueEmpty = 0;
            ctx->queue[ctx->queueTail] = job;
-            ctx->queueTail = newTail;
+            ctx->queueTail = (ctx->queueTail + 1) % ctx->queueSize;
        }
    }
    pthread_mutex_unlock(&ctx->queueMutex);
--- a/lib/common/pool.h
+++ b/lib/common/pool.h
@ -22,7 +22,6 @@ typedef struct POOL_ctx_s POOL_ctx;
 *  Create a thread pool with at most `numThreads` threads.
 * `numThreads` must be at least 1.
 *  The maximum number of queued jobs before blocking is `queueSize`.
- * `queueSize` must be at least 1.
 * @return : POOL_ctx pointer on success, else NULL.
 */
 POOL_ctx *POOL_create(size_t numThreads, size_t queueSize);
--- a/lib/compress/zstd_compress.c
+++ b/lib/compress/zstd_compress.c
@ -942,7 +942,7 @@ static size_t ZSTD_compressLiterals (ZSTD_entropyCTables_t * entropy,
        else { entropy->hufCTable_repeatMode = HUF_repeat_check; }       /* now have a table to reuse */
    }

-    if ((cLitSize==0) | (cLitSize >= srcSize - minGain)) {
+    if ((cLitSize==0) | (cLitSize >= srcSize - minGain) | ERR_isError(cLitSize)) {
        entropy->hufCTable_repeatMode = HUF_repeat_none;
        return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
    }
@ -1156,11 +1156,10 @@ MEM_STATIC size_t ZSTD_encodeSequences(void* dst, size_t dstCapacity,
    }
 }

-MEM_STATIC size_t ZSTD_compressSequences (seqStore_t* seqStorePtr,
+MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
                              ZSTD_entropyCTables_t* entropy,
                              ZSTD_compressionParameters const* cParams,
-                              void* dst, size_t dstCapacity,
-                              size_t srcSize)
+                              void* dst, size_t dstCapacity)
 {
    const int longOffsets = cParams->windowLog > STREAM_ACCUMULATOR_MIN;
    U32 count[MaxSeq+1];
@ -1195,7 +1194,7 @@ MEM_STATIC size_t ZSTD_compressSequences (seqStore_t* seqStorePtr,
    if (nbSeq < 0x7F) *op++ = (BYTE)nbSeq;
    else if (nbSeq < LONGNBSEQ) op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2;
    else op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3;
-    if (nbSeq==0) goto _check_compressibility;
+    if (nbSeq==0) return op - ostart;

    /* seqHead : flags for FSE encoding type */
    seqHead = op++;
@ -1244,23 +1243,40 @@ MEM_STATIC size_t ZSTD_compressSequences (seqStore_t* seqStorePtr,
        op += streamSize;
    }

+    return op - ostart;
+}

-    /* check compressibility */
-_check_compressibility:
-    {   size_t const minGain = ZSTD_minGain(srcSize);
-        size_t const maxCSize = srcSize - minGain;
-        if ((size_t)(op-ostart) >= maxCSize) {
-            entropy->hufCTable_repeatMode = HUF_repeat_none;
-            entropy->offcode_repeatMode = FSE_repeat_none;
-            entropy->matchlength_repeatMode = FSE_repeat_none;
-            entropy->litlength_repeatMode = FSE_repeat_none;
-            return 0;
-    }   }
+MEM_STATIC size_t ZSTD_compressSequences(seqStore_t* seqStorePtr,
+                              ZSTD_entropyCTables_t* entropy,
+                              ZSTD_compressionParameters const* cParams,
+                              void* dst, size_t dstCapacity,
+                              size_t srcSize)
+{
+    size_t const cSize = ZSTD_compressSequences_internal(seqStorePtr, entropy, cParams,
+                                                         dst, dstCapacity);
+    size_t const minGain = ZSTD_minGain(srcSize);
+    size_t const maxCSize = srcSize - minGain;
+    /* If the srcSize <= dstCapacity, then there is enough space to write a
+     * raw uncompressed block. Since we ran out of space, the block must not
+     * be compressible, so fall back to a raw uncompressed block.
+     */
+    int const uncompressibleError = cSize == ERROR(dstSize_tooSmall) && srcSize <= dstCapacity;
+
+    if (ZSTD_isError(cSize) && !uncompressibleError)
+        return cSize;
+    /* Check compressibility */
+    if (cSize >= maxCSize || uncompressibleError) {
+        entropy->hufCTable_repeatMode = HUF_repeat_none;
+        entropy->offcode_repeatMode = FSE_repeat_none;
+        entropy->matchlength_repeatMode = FSE_repeat_none;
+        entropy->litlength_repeatMode = FSE_repeat_none;
+        return 0;
+    }
+    assert(!ZSTD_isError(cSize));

    /* confirm repcodes */
    { int i; for (i=0; i<ZSTD_REP_NUM; i++) seqStorePtr->rep[i] = seqStorePtr->repToConfirm[i]; }
-
-    return op - ostart;
+    return cSize;
 }


--- a/lib/decompress/huf_decompress.c
+++ b/lib/decompress/huf_decompress.c
@ -917,11 +917,11 @@ static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, qu
 *   Tells which decoder is likely to decode faster,
 *   based on a set of pre-determined metrics.
 *   @return : 0==HUF_decompress4X2, 1==HUF_decompress4X4 .
-*   Assumption : 0 < cSrcSize < dstSize <= 128 KB */
+*   Assumption : 0 < cSrcSize, dstSize <= 128 KB */
 U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize)
 {
    /* decoder timing evaluation */
-    U32 const Q = (U32)(cSrcSize * 16 / dstSize);   /* Q < 16 since dstSize > cSrcSize */
+    U32 const Q = cSrcSize >= dstSize ? 15 : (U32)(cSrcSize * 16 / dstSize);   /* Q < 16 */
    U32 const D256 = (U32)(dstSize >> 8);
    U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256);
    U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256);
@ -977,7 +977,7 @@ size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst,
 {
    /* validation checks */
    if (dstSize == 0) return ERROR(dstSize_tooSmall);
-    if ((cSrcSize >= dstSize) || (cSrcSize <= 1)) return ERROR(corruption_detected);   /* invalid */
+    if (cSrcSize == 0) return ERROR(corruption_detected);

    {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
        return algoNb ? HUF_decompress4X4_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize):
--- a/lib/decompress/zstd_decompress.c
+++ b/lib/decompress/zstd_decompress.c
@ -1731,7 +1731,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
            return 0;
        }
        dctx->expected = 0;   /* not necessary to copy more */
-
+        /* fall-through */
    case ZSTDds_decodeFrameHeader:
        assert(src != NULL);
        memcpy(dctx->headerBuffer + ZSTD_frameHeaderSize_prefix, src, dctx->expected);
@ -2391,7 +2391,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
                    zds->outBuffSize = neededOutSize;
            }   }
            zds->streamStage = zdss_read;
-            /* pass-through */
+            /* fall-through */

        case zdss_read:
            DEBUGLOG(5, "stage zdss_read");
@ -2416,8 +2416,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
            }   }
            if (ip==iend) { someMoreWork = 0; break; }   /* no more input */
            zds->streamStage = zdss_load;
-            /* pass-through */
-
+            /* fall-through */
        case zdss_load:
            {   size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds);
                size_t const toLoad = neededInSize - zds->inPos;   /* should always be <= remaining space within inBuff */
@ -2439,8 +2438,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
                    zds->outEnd = zds->outStart +  decodedSize;
            }   }
            zds->streamStage = zdss_flush;
-            /* pass-through */
-
+            /* fall-through */
        case zdss_flush:
            {   size_t const toFlushSize = zds->outEnd - zds->outStart;
                size_t const flushedSize = ZSTD_limitCopy(op, oend-op, zds->outBuff + zds->outStart, toFlushSize);
--- a/tests/files/huffman-compressed-larger
+++ b/tests/files/huffman-compressed-larger
--- a/tests/playTests.sh
+++ b/tests/playTests.sh
@ -386,6 +386,13 @@ $ZSTD -t tmpSplit.* && die "bad file not detected !"
 ./datagen | $ZSTD -c | $ZSTD -t


+
+$ECHO "\n**** golden files tests **** "
+
+$ZSTD -t -r files
+$ZSTD -c -r files | $ZSTD -t
+
+
 $ECHO "\n**** benchmark mode tests **** "

 $ECHO "bench one file"
--- a/tests/poolTests.c
+++ b/tests/poolTests.c
@ -1,5 +1,6 @@
 #include "pool.h"
 #include "threading.h"
+#include "util.h"
 #include <stddef.h>
 #include <stdio.h>

@ -50,21 +51,45 @@ int testOrder(size_t numThreads, size_t queueSize) {
  return 0;
 }

+void waitFn(void *opaque) {
+  (void)opaque;
+  UTIL_sleepMilli(1);
+}
+
+/* Tests for deadlock */
+int testWait(size_t numThreads, size_t queueSize) {
+  struct data data;
+  POOL_ctx *ctx = POOL_create(numThreads, queueSize);
+  ASSERT_TRUE(ctx);
+  {
+    size_t i;
+    for (i = 0; i < 16; ++i) {
+        POOL_add(ctx, &waitFn, &data);
+    }
+  }
+  POOL_free(ctx);
+  return 0;
+}
+
 int main(int argc, const char **argv) {
  size_t numThreads;
  for (numThreads = 1; numThreads <= 4; ++numThreads) {
    size_t queueSize;
-    for (queueSize = 1; queueSize <= 2; ++queueSize) {
+    for (queueSize = 0; queueSize <= 2; ++queueSize) {
      if (testOrder(numThreads, queueSize)) {
        printf("FAIL: testOrder\n");
        return 1;
      }
+      if (testWait(numThreads, queueSize)) {
+        printf("FAIL: testWait\n");
+        return 1;
+      }
    }
  }
  printf("PASS: testOrder\n");
  (void)argc;
  (void)argv;
-  return (POOL_create(0, 1) || POOL_create(1, 0)) ? printf("FAIL: testInvalid\n"), 1
-                                                  : printf("PASS: testInvalid\n"), 0;
+  return (POOL_create(0, 1)) ? printf("FAIL: testInvalid\n"), 1
+                             : printf("PASS: testInvalid\n"), 0;
  return 0;
 }