Merge branch 'dev' into ahmed_file

2019-10-28 13:43:28 -07:00 · 2019-10-28 13:43:28 -07:00 · a78a8759a2
commit a78a8759a2
parent 12efa1ed89 6d5e0f1c9f
11 changed files with 210 additions and 74 deletions
--- a/28
+++ b/28
@ -1,8 +1,32 @@
+v1.4.4
+perf: Improved decompression speed, by > 10%, by @terrelln
+perf: Better compression speed when re-using a context, by @felixhandte
+perf: Fix compression ratio when compressing large files with small dictionary, by @senhuang42
+perf: zstd reference encoder can generate RLE blocks, by @bimbashrestha
+perf: minor generic speed optimization, by @davidbolvansky
+api: new ability to extract sequences from the parser for analysis, by @bimbashrestha
+api: fixed decoding of magic-less frames, by @terrelln
+api: fixed ZSTD_initCStream_advanced() performance with fast modes, reported by @QrczakMK
+cli: Named pipes support, by @bimbashrestha
+cli: short tar's extension support, by @stokito
+cli: command --output-dir-flat= , generates target files into requested directory, by @senhuang42
+cli: commands --stream-size=# and --size-hint=#, by @nmagerko
+cli: faster `-t` test mode
+cli: improved some error messages, by @vangyzen
+cli: rare deadlock condition within dictionary builder, by @terrelln
+build: single-file decoder with emscripten compilation script, by @cwoffenden
+build: fixed zlibWrapper compilation on Visual Studio, reported by @bluenlive
+build: fixed deprecation warning for certain gcc version, reported by @jasonma163
+build: fix compilation on old gcc versions, by @cemeyer
+build: improved installation directories for cmake script, by Dmitri Shubin
+pack: modified pkgconfig, for better integration into openwrt, requested by @neheb
+misc: Improved documentation : ZSTD_CLEVEL, DYNAMIC_BMI2, ZSTD_CDict, function deprecation, zstd format
+misc: fixed educational decoder : accept larger literals section, and removed UNALIGNED() macro
+
 v1.4.3
 bug: Fix Dictionary Compression Ratio Regression by @cyan4973 (#1709)
-bug: Fix Buffer Overflow in v0.3 Decompression by @felixhandte (#1722)
+bug: Fix Buffer Overflow in legacy v0.3 decompression by @felixhandte (#1722)
 build: Add support for IAR C/C++ Compiler for Arm by @joseph0918 (#1705)
-misc: Add NULL pointer check in util.c by @leeyoung624 (#1706)

 v1.4.2
 bug: Fix bug in zstd-0.5 decoder by @terrelln (#1696)
--- a/build/cmake/README.md
+++ b/build/cmake/README.md
@ -5,9 +5,9 @@ use case sensitivity that matches modern (ie. cmake version 2.6 and above)
 conventions of using lower-case for commands, and upper-case for
 variables.

-# How to build
+## How to build

-As cmake doesn't support command like `cmake clean`, it's recommanded to perform a "out of source build".
+As cmake doesn't support command like `cmake clean`, it's recommended to perform a "out of source build".
 To do this, you can create a new directory and build in it:
 ```sh
 cd build/cmake
@ -16,7 +16,7 @@ cd builddir
 cmake ..
 make
 ```
-Then you can clean all cmake caches by simpily delete the new directory:
+Then you can clean all cmake caches by simply delete the new directory:
 ```sh
 rm -rf build/cmake/builddir
 ```
@ -34,19 +34,19 @@ cd build/cmake/builddir
 cmake -LH ..
 ```

-Bool options can be set to ON/OFF with -D\[option\]=\[ON/OFF\]. You can configure cmake options like this:
+Bool options can be set to `ON/OFF` with `-D[option]=[ON/OFF]`. You can configure cmake options like this:
 ```sh
 cd build/cmake/builddir
 cmake -DZSTD_BUILD_TESTS=ON -DZSTD_LEGACY_SUPPORT=ON ..
 make
 ```

-## referring
+### referring
 [Looking for a 'cmake clean' command to clear up CMake output](https://stackoverflow.com/questions/9680420/looking-for-a-cmake-clean-command-to-clear-up-cmake-output)

-# CMake Style Recommendations
+## CMake Style Recommendations

-## Indent all code correctly, i.e. the body of
+### Indent all code correctly, i.e. the body of

 * if/else/endif
 * foreach/endforeach
@ -57,7 +57,7 @@ make
 Use spaces for indenting, 2, 3 or 4 spaces preferably. Use the same amount of
 spaces for indenting as is used in the rest of the file. Do not use tabs.

-## Upper/lower casing
+### Upper/lower casing

 Most important: use consistent upper- or lowercasing within one file !

@ -77,7 +77,7 @@ Add_Executable(hello hello.c)
 aDd_ExEcUtAbLe(blub blub.c)
 ```

-## End commands
+### End commands
 To make the code easier to read, use empty commands for endforeach(), endif(),
 endfunction(), endmacro() and endwhile(). Also, use empty else() commands.

@ -99,6 +99,6 @@ if(BARVAR)
 endif(BARVAR)
 ```

-## Other resources for best practices
+### Other resources for best practices

-`https://cmake.org/cmake/help/latest/manual/cmake-developer.7.html#modules`
+https://cmake.org/cmake/help/latest/manual/cmake-developer.7.html#modules
--- a/build/cmake/lib/CMakeLists.txt
+++ b/build/cmake/lib/CMakeLists.txt
@ -134,11 +134,10 @@ if (UNIX)
    # pkg-config
    set(PREFIX "${CMAKE_INSTALL_PREFIX}")
    set(LIBDIR "${CMAKE_INSTALL_FULL_LIBDIR}")
-    set(INCLUDEDIR "${CMAKE_INSTALL_FULL_INCLUDEDIR}")
    set(VERSION "${zstd_VERSION_MAJOR}.${zstd_VERSION_MINOR}.${zstd_VERSION_PATCH}")
    add_custom_target(libzstd.pc ALL
            ${CMAKE_COMMAND} -DIN="${LIBRARY_DIR}/libzstd.pc.in" -DOUT="libzstd.pc"
-            -DPREFIX="${PREFIX}" -DLIBDIR="${LIBDIR}" -DINCLUDEDIR="${INCLUDEDIR}" -DVERSION="${VERSION}"
+            -DPREFIX="${PREFIX}" -DVERSION="${VERSION}"
            -P "${CMAKE_CURRENT_SOURCE_DIR}/pkgconfig.cmake"
            COMMENT "Creating pkg-config file")

--- a/doc/zstd_manual.html
+++ b/doc/zstd_manual.html
@ -692,12 +692,17 @@ size_t ZSTD_freeDStream(ZSTD_DStream* zds);

 <pre><b>ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize,
                             int compressionLevel);
-</b><p>  When compressing multiple messages / blocks using the same dictionary, it's recommended to load it only once.
-  ZSTD_createCDict() will create a digested dictionary, ready to start future compression operations without startup cost.
+</b><p>  When compressing multiple messages or blocks using the same dictionary,
+  it's recommended to digest the dictionary only once, since it's a costly operation.
+  ZSTD_createCDict() will create a state from digesting a dictionary.
+  The resulting state can be used for future compression operations with very limited startup cost.
  ZSTD_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only.
- `dictBuffer` can be released after ZSTD_CDict creation, because its content is copied within CDict.
-  Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate `dictBuffer` content.
-  Note : A ZSTD_CDict can be created from an empty dictBuffer, but it is inefficient when used to compress small data. 
+ @dictBuffer can be released after ZSTD_CDict creation, because its content is copied within CDict.
+  Note 1 : Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate @dictBuffer content.
+  Note 2 : A ZSTD_CDict can be created from an empty @dictBuffer,
+      in which case the only thing that it transports is the @compressionLevel.
+      This can be useful in a pipeline featuring ZSTD_compress_usingCDict() exclusively,
+      expecting a ZSTD_CDict parameter with any data, including those without a known dictionary. 
 </p></pre><BR>

 <pre><b>size_t      ZSTD_freeCDict(ZSTD_CDict* CDict);
@ -969,6 +974,12 @@ size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
     *   This method is effective when the dictionary sizes are very small relative
     *   to the input size, and the input size is fairly large to begin with.
     *
+     * - The CDict's tables are not used at all, and instead we use the working
+     *   context alone to reload the dictionary and use params based on the source
+     *   size. See ZSTD_compress_insertDictionary() and ZSTD_compress_usingDict().
+     *   This method is effective when the dictionary sizes are very small relative
+     *   to the input size, and the input size is fairly large to begin with.
+     *
     * Zstd has a simple internal heuristic that selects which strategy to use
     * at the beginning of a compression. However, if experimentation shows that
     * Zstd is making poor choices, it is possible to override that choice with
--- a/lib/Makefile
+++ b/lib/Makefile
@ -244,8 +244,6 @@ libzstd.pc:
 libzstd.pc: libzstd.pc.in
 	@echo creating pkgconfig
 	@sed -e 's|@PREFIX@|$(PREFIX)|' \
-             -e 's|@LIBDIR@|$(LIBDIR)|' \
-             -e 's|@INCLUDEDIR@|$(INCLUDEDIR)|' \
             -e 's|@VERSION@|$(VERSION)|' \
             $< >$@

--- a/lib/libzstd.pc.in
+++ b/lib/libzstd.pc.in
@ -3,8 +3,9 @@
 #   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)

 prefix=@PREFIX@
-libdir=@LIBDIR@
-includedir=@INCLUDEDIR@
+exec_prefix=${prefix}
+includedir=${prefix}/include
+libdir=${exec_prefix}/lib

 Name: zstd
 Description: fast lossless compression algorithm library
--- a/lib/zstd.h
+++ b/lib/zstd.h
@ -808,12 +808,17 @@ ZSTDLIB_API size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx,
 typedef struct ZSTD_CDict_s ZSTD_CDict;

 /*! ZSTD_createCDict() :
- *  When compressing multiple messages / blocks using the same dictionary, it's recommended to load it only once.
- *  ZSTD_createCDict() will create a digested dictionary, ready to start future compression operations without startup cost.
+ *  When compressing multiple messages or blocks using the same dictionary,
+ *  it's recommended to digest the dictionary only once, since it's a costly operation.
+ *  ZSTD_createCDict() will create a state from digesting a dictionary.
+ *  The resulting state can be used for future compression operations with very limited startup cost.
 *  ZSTD_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only.
- * `dictBuffer` can be released after ZSTD_CDict creation, because its content is copied within CDict.
- *  Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate `dictBuffer` content.
- *  Note : A ZSTD_CDict can be created from an empty dictBuffer, but it is inefficient when used to compress small data. */
+ * @dictBuffer can be released after ZSTD_CDict creation, because its content is copied within CDict.
+ *  Note 1 : Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate @dictBuffer content.
+ *  Note 2 : A ZSTD_CDict can be created from an empty @dictBuffer,
+ *      in which case the only thing that it transports is the @compressionLevel.
+ *      This can be useful in a pipeline featuring ZSTD_compress_usingCDict() exclusively,
+ *      expecting a ZSTD_CDict parameter with any data, including those without a known dictionary. */
 ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize,
                                         int compressionLevel);

@ -1167,7 +1172,7 @@ typedef enum {
     *   tables. However, this model incurs no start-up cost (as long as the
     *   working context's tables can be reused). For small inputs, this can be
     *   faster than copying the CDict's tables.
-     * 
+     *
     * - The CDict's tables are not used at all, and instead we use the working
     *   context alone to reload the dictionary and use params based on the source
     *   size. See ZSTD_compress_insertDictionary() and ZSTD_compress_usingDict().
--- a/programs/fileio.c
+++ b/programs/fileio.c
@ -1496,17 +1496,17 @@ FIO_determineCompressedName(const char* srcFileName, const char* outDirName, con
    static char* dstFileNameBuffer = NULL;   /* using static allocation : this function cannot be multi-threaded */
    char* outDirFilename = NULL;
    size_t sfnSize = strlen(srcFileName);
-    size_t const suffixSize = strlen(suffix);
+    size_t const srcSuffixLen = strlen(suffix);
    if (outDirName) {
-        outDirFilename = FIO_createFilename_fromOutDir(srcFileName, outDirName, suffixSize);
+        outDirFilename = FIO_createFilename_fromOutDir(srcFileName, outDirName, srcSuffixLen);
        sfnSize = strlen(outDirFilename);
        assert(outDirFilename != NULL);
    }

-    if (dfnbCapacity <= sfnSize+suffixSize+1) {
+    if (dfnbCapacity <= sfnSize+srcSuffixLen+1) {
        /* resize buffer for dstName */
        free(dstFileNameBuffer);
-        dfnbCapacity = sfnSize + suffixSize + 30;
+        dfnbCapacity = sfnSize + srcSuffixLen + 30;
        dstFileNameBuffer = (char*)malloc(dfnbCapacity);
        if (!dstFileNameBuffer) {
            EXM_THROW(30, "zstd: %s", strerror(errno));
@ -1520,7 +1520,7 @@ FIO_determineCompressedName(const char* srcFileName, const char* outDirName, con
    } else {
        memcpy(dstFileNameBuffer, srcFileName, sfnSize);
    }
-    memcpy(dstFileNameBuffer+sfnSize, suffix, suffixSize+1 /* Include terminating null */);
+    memcpy(dstFileNameBuffer+sfnSize, suffix, srcSuffixLen+1 /* Include terminating null */);
    return dstFileNameBuffer;
 }

@ -2287,6 +2287,37 @@ int FIO_decompressFilename(FIO_prefs_t* const prefs,
    return decodingError;
 }

+static const char *suffixList[] = {
+    ZSTD_EXTENSION,
+    TZSTD_EXTENSION,
+#ifdef ZSTD_GZDECOMPRESS
+    GZ_EXTENSION,
+    TGZ_EXTENSION,
+#endif
+#ifdef ZSTD_LZMADECOMPRESS
+    LZMA_EXTENSION,
+    XZ_EXTENSION,
+    TXZ_EXTENSION,
+#endif
+#ifdef ZSTD_LZ4DECOMPRESS
+    LZ4_EXTENSION,
+    TLZ4_EXTENSION,
+#endif
+    NULL
+};
+
+static const char *suffixListStr =
+    ZSTD_EXTENSION "/" TZSTD_EXTENSION
+#ifdef ZSTD_GZDECOMPRESS
+    "/" GZ_EXTENSION "/" TGZ_EXTENSION
+#endif
+#ifdef ZSTD_LZMADECOMPRESS
+    "/" LZMA_EXTENSION "/" XZ_EXTENSION "/" TXZ_EXTENSION
+#endif
+#ifdef ZSTD_LZ4DECOMPRESS
+    "/" LZ4_EXTENSION "/" TLZ4_EXTENSION
+#endif
+;

 /* FIO_determineDstName() :
 * create a destination filename from a srcFileName.
@ -2297,71 +2328,78 @@ FIO_determineDstName(const char* srcFileName, const char* outDirName)
 {
    static size_t dfnbCapacity = 0;
    static char* dstFileNameBuffer = NULL;   /* using static allocation : this function cannot be multi-threaded */
+    size_t dstFileNameEndPos;
    char* outDirFilename = NULL;
+    const char* dstSuffix = "";
+    size_t dstSuffixLen = 0;
+
    size_t sfnSize = strlen(srcFileName);
-    size_t suffixSize;

-    const char* const suffixPtr = strrchr(srcFileName, '.');
-    if (suffixPtr == NULL) {
-        DISPLAYLEVEL(1, "zstd: %s: unknown suffix -- ignored \n",
-                        srcFileName);
+    size_t srcSuffixLen;
+    const char* const srcSuffix = strrchr(srcFileName, '.');
+    if (srcSuffix == NULL) {
+        DISPLAYLEVEL(1,
+            "zstd: %s: unknown suffix (%s expected). "
+            "Can't derive the output file name. "
+            "Specify it with -o dstFileName. Ignoring.\n",
+            srcFileName, suffixListStr);
        return NULL;
    }
-    suffixSize = strlen(suffixPtr);
+    srcSuffixLen = strlen(srcSuffix);

-    /* check suffix is authorized */
-    if (sfnSize <= suffixSize
-        || (   strcmp(suffixPtr, ZSTD_EXTENSION)
-        #ifdef ZSTD_GZDECOMPRESS
-            && strcmp(suffixPtr, GZ_EXTENSION)
-        #endif
-        #ifdef ZSTD_LZMADECOMPRESS
-            && strcmp(suffixPtr, XZ_EXTENSION)
-            && strcmp(suffixPtr, LZMA_EXTENSION)
-        #endif
-        #ifdef ZSTD_LZ4DECOMPRESS
-            && strcmp(suffixPtr, LZ4_EXTENSION)
-        #endif
-            ) ) {
-        const char* suffixlist = ZSTD_EXTENSION
-        #ifdef ZSTD_GZDECOMPRESS
-            "/" GZ_EXTENSION
-        #endif
-        #ifdef ZSTD_LZMADECOMPRESS
-            "/" XZ_EXTENSION "/" LZMA_EXTENSION
-        #endif
-        #ifdef ZSTD_LZ4DECOMPRESS
-            "/" LZ4_EXTENSION
-        #endif
-        ;
-        DISPLAYLEVEL(1, "zstd: %s: unknown suffix (%s expected) -- ignored \n",
-                     srcFileName, suffixlist);
-        return NULL;
+    {
+        const char** matchedSuffixPtr;
+        for (matchedSuffixPtr = suffixList; *matchedSuffixPtr != NULL; matchedSuffixPtr++) {
+            if (!strcmp(*matchedSuffixPtr, srcSuffix)) {
+                break;
+            }
+        }
+
+        /* check suffix is authorized */
+        if (sfnSize <= srcSuffixLen || *matchedSuffixPtr == NULL) {
+            DISPLAYLEVEL(1,
+                "zstd: %s: unknown suffix (%s expected). "
+                "Can't derive the output file name. "
+                "Specify it with -o dstFileName. Ignoring.\n",
+                srcFileName, suffixListStr);
+            return NULL;
+        }
+
+        if ((*matchedSuffixPtr)[1] == 't') {
+            dstSuffix = ".tar";
+            dstSuffixLen = strlen(dstSuffix);
+        }
    }
+
    if (outDirName) {
        outDirFilename = FIO_createFilename_fromOutDir(srcFileName, outDirName, 0);
        sfnSize = strlen(outDirFilename);
        assert(outDirFilename != NULL);
    }

-    if (dfnbCapacity+suffixSize <= sfnSize+1) {
+    if (dfnbCapacity+srcSuffixLen <= sfnSize+1+dstSuffixLen) {
        /* allocate enough space to write dstFilename into it */
        free(dstFileNameBuffer);
        dfnbCapacity = sfnSize + 20;
        dstFileNameBuffer = (char*)malloc(dfnbCapacity);
        if (dstFileNameBuffer==NULL)
-            EXM_THROW(74, "%s : not enough memory for dstFileName", strerror(errno));
+            EXM_THROW(74, "%s : not enough memory for dstFileName",
+                      strerror(errno));
    }

    /* return dst name == src name truncated from suffix */
    assert(dstFileNameBuffer != NULL);
+    dstFileNameEndPos = sfnSize - srcSuffixLen;
    if (outDirFilename) {
-        memcpy(dstFileNameBuffer, outDirFilename, sfnSize - suffixSize);
+        memcpy(dstFileNameBuffer, outDirFilename, dstFileNameEndPos);
        free(outDirFilename);
    } else {
-        memcpy(dstFileNameBuffer, srcFileName, sfnSize - suffixSize);
+        memcpy(dstFileNameBuffer, srcFileName, dstFileNameEndPos);
    }
-    dstFileNameBuffer[sfnSize-suffixSize] = '\0';
+
+    /* The short tar extensions tzst, tgz, txz and tlz4 files should have "tar"
+     * extension on decompression. Also writes terminating null. */
+    strcpy(dstFileNameBuffer + dstFileNameEndPos, dstSuffix);
    return dstFileNameBuffer;

    /* note : dstFileNameBuffer memory is not going to be free */
--- a/programs/fileio.h
+++ b/programs/fileio.h
@ -30,11 +30,23 @@ extern "C" {
 #else
 #  define nulmark "/dev/null"
 #endif
+
+/**
+ * We test whether the extension we found starts with 't', and if so, we append
+ * ".tar" to the end of the output name.
+ */
 #define LZMA_EXTENSION  ".lzma"
 #define XZ_EXTENSION    ".xz"
+#define TXZ_EXTENSION   ".txz"
+
 #define GZ_EXTENSION    ".gz"
+#define TGZ_EXTENSION   ".tgz"
+
 #define ZSTD_EXTENSION  ".zst"
+#define TZSTD_EXTENSION ".tzst"
+
 #define LZ4_EXTENSION   ".lz4"
+#define TLZ4_EXTENSION  ".tlz4"


 /*-*************************************
--- a/tests/playTests.sh
+++ b/tests/playTests.sh
@ -896,6 +896,46 @@ if [ $LZ4MODE -ne 1 ]; then
    grep ".lz4" tmplg > $INTOVOID && die "Unsupported suffix listed"
 fi

+println "\n===>  tar extension tests "
+
+rm -f tmp tmp.tar tmp.tzst tmp.tgz tmp.txz tmp.tlz4
+
+./datagen > tmp
+tar cf tmp.tar tmp
+$ZSTD tmp.tar -o tmp.tzst
+rm tmp.tar
+$ZSTD -d tmp.tzst
+[ -e tmp.tar ] || die ".tzst failed to decompress to .tar!"
+rm -f tmp.tar tmp.tzst
+
+if [ $GZIPMODE -eq 1 ]; then
+    tar czf tmp.tgz tmp
+    $ZSTD -d tmp.tgz
+    [ -e tmp.tar ] || die ".tgz failed to decompress to .tar!"
+    rm -f tmp.tar tmp.tgz
+fi
+
+if [ $LZMAMODE -eq 1 ]; then
+    tar c tmp | $ZSTD --format=xz > tmp.txz
+    $ZSTD -d tmp.txz
+    [ -e tmp.tar ] || die ".txz failed to decompress to .tar!"
+    rm -f tmp.tar tmp.txz
+fi
+
+if [ $LZ4MODE -eq 1 ]; then
+    tar c tmp | $ZSTD --format=lz4 > tmp.tlz4
+    $ZSTD -d tmp.tlz4
+    [ -e tmp.tar ] || die ".tlz4 failed to decompress to .tar!"
+    rm -f tmp.tar tmp.tlz4
+fi
+
+touch tmp.t tmp.tz tmp.tzs
+! $ZSTD -d tmp.t
+! $ZSTD -d tmp.tz
+! $ZSTD -d tmp.tzs
+
+exit
+
 println "\n===>  zstd round-trip tests "

 roundTripTest
--- a/zlibWrapper/gzread.c
+++ b/zlibWrapper/gzread.c
@ -8,6 +8,14 @@

 #include "gzguts.h"

+/* fix for Visual Studio, which doesn't support ssize_t type.
+ * see https://github.com/facebook/zstd/issues/1800#issuecomment-545945050 */
+#if defined(_MSC_VER) && !defined(ssize_t)
+#  include <BaseTsd.h>
+   typedef SSIZE_T ssize_t;
+#endif
+
+
 /* Local functions */
 local int gz_load OF((gz_statep, unsigned char *, unsigned, unsigned *));
 local int gz_avail OF((gz_statep));