Merge branch 'dev' into ahmed_file

This commit is contained in:
Yann Collet 2019-10-28 13:43:28 -07:00
commit a78a8759a2
11 changed files with 210 additions and 74 deletions

View File

@ -1,8 +1,32 @@
v1.4.4
perf: Improved decompression speed, by > 10%, by @terrelln
perf: Better compression speed when re-using a context, by @felixhandte
perf: Fix compression ratio when compressing large files with small dictionary, by @senhuang42
perf: zstd reference encoder can generate RLE blocks, by @bimbashrestha
perf: minor generic speed optimization, by @davidbolvansky
api: new ability to extract sequences from the parser for analysis, by @bimbashrestha
api: fixed decoding of magic-less frames, by @terrelln
api: fixed ZSTD_initCStream_advanced() performance with fast modes, reported by @QrczakMK
cli: Named pipes support, by @bimbashrestha
cli: short tar's extension support, by @stokito
cli: command --output-dir-flat= , generates target files into requested directory, by @senhuang42
cli: commands --stream-size=# and --size-hint=#, by @nmagerko
cli: faster `-t` test mode
cli: improved some error messages, by @vangyzen
cli: rare deadlock condition within dictionary builder, by @terrelln
build: single-file decoder with emscripten compilation script, by @cwoffenden
build: fixed zlibWrapper compilation on Visual Studio, reported by @bluenlive
build: fixed deprecation warning for certain gcc version, reported by @jasonma163
build: fix compilation on old gcc versions, by @cemeyer
build: improved installation directories for cmake script, by Dmitri Shubin
pack: modified pkgconfig, for better integration into openwrt, requested by @neheb
misc: Improved documentation : ZSTD_CLEVEL, DYNAMIC_BMI2, ZSTD_CDict, function deprecation, zstd format
misc: fixed educational decoder : accept larger literals section, and removed UNALIGNED() macro
v1.4.3
bug: Fix Dictionary Compression Ratio Regression by @cyan4973 (#1709)
bug: Fix Buffer Overflow in v0.3 Decompression by @felixhandte (#1722)
bug: Fix Buffer Overflow in legacy v0.3 decompression by @felixhandte (#1722)
build: Add support for IAR C/C++ Compiler for Arm by @joseph0918 (#1705)
misc: Add NULL pointer check in util.c by @leeyoung624 (#1706)
v1.4.2
bug: Fix bug in zstd-0.5 decoder by @terrelln (#1696)

View File

@ -5,9 +5,9 @@ use case sensitivity that matches modern (ie. cmake version 2.6 and above)
conventions of using lower-case for commands, and upper-case for
variables.
# How to build
## How to build
As cmake doesn't support command like `cmake clean`, it's recommanded to perform a "out of source build".
As cmake doesn't support command like `cmake clean`, it's recommended to perform a "out of source build".
To do this, you can create a new directory and build in it:
```sh
cd build/cmake
@ -16,7 +16,7 @@ cd builddir
cmake ..
make
```
Then you can clean all cmake caches by simpily delete the new directory:
Then you can clean all cmake caches by simply delete the new directory:
```sh
rm -rf build/cmake/builddir
```
@ -34,19 +34,19 @@ cd build/cmake/builddir
cmake -LH ..
```
Bool options can be set to ON/OFF with -D\[option\]=\[ON/OFF\]. You can configure cmake options like this:
Bool options can be set to `ON/OFF` with `-D[option]=[ON/OFF]`. You can configure cmake options like this:
```sh
cd build/cmake/builddir
cmake -DZSTD_BUILD_TESTS=ON -DZSTD_LEGACY_SUPPORT=ON ..
make
```
## referring
### referring
[Looking for a 'cmake clean' command to clear up CMake output](https://stackoverflow.com/questions/9680420/looking-for-a-cmake-clean-command-to-clear-up-cmake-output)
# CMake Style Recommendations
## CMake Style Recommendations
## Indent all code correctly, i.e. the body of
### Indent all code correctly, i.e. the body of
* if/else/endif
* foreach/endforeach
@ -57,7 +57,7 @@ make
Use spaces for indenting, 2, 3 or 4 spaces preferably. Use the same amount of
spaces for indenting as is used in the rest of the file. Do not use tabs.
## Upper/lower casing
### Upper/lower casing
Most important: use consistent upper- or lowercasing within one file !
@ -77,7 +77,7 @@ Add_Executable(hello hello.c)
aDd_ExEcUtAbLe(blub blub.c)
```
## End commands
### End commands
To make the code easier to read, use empty commands for endforeach(), endif(),
endfunction(), endmacro() and endwhile(). Also, use empty else() commands.
@ -99,6 +99,6 @@ if(BARVAR)
endif(BARVAR)
```
## Other resources for best practices
### Other resources for best practices
`https://cmake.org/cmake/help/latest/manual/cmake-developer.7.html#modules`
https://cmake.org/cmake/help/latest/manual/cmake-developer.7.html#modules

View File

@ -134,11 +134,10 @@ if (UNIX)
# pkg-config
set(PREFIX "${CMAKE_INSTALL_PREFIX}")
set(LIBDIR "${CMAKE_INSTALL_FULL_LIBDIR}")
set(INCLUDEDIR "${CMAKE_INSTALL_FULL_INCLUDEDIR}")
set(VERSION "${zstd_VERSION_MAJOR}.${zstd_VERSION_MINOR}.${zstd_VERSION_PATCH}")
add_custom_target(libzstd.pc ALL
${CMAKE_COMMAND} -DIN="${LIBRARY_DIR}/libzstd.pc.in" -DOUT="libzstd.pc"
-DPREFIX="${PREFIX}" -DLIBDIR="${LIBDIR}" -DINCLUDEDIR="${INCLUDEDIR}" -DVERSION="${VERSION}"
-DPREFIX="${PREFIX}" -DVERSION="${VERSION}"
-P "${CMAKE_CURRENT_SOURCE_DIR}/pkgconfig.cmake"
COMMENT "Creating pkg-config file")

View File

@ -692,12 +692,17 @@ size_t ZSTD_freeDStream(ZSTD_DStream* zds);
<pre><b>ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize,
int compressionLevel);
</b><p> When compressing multiple messages / blocks using the same dictionary, it's recommended to load it only once.
ZSTD_createCDict() will create a digested dictionary, ready to start future compression operations without startup cost.
</b><p> When compressing multiple messages or blocks using the same dictionary,
it's recommended to digest the dictionary only once, since it's a costly operation.
ZSTD_createCDict() will create a state from digesting a dictionary.
The resulting state can be used for future compression operations with very limited startup cost.
ZSTD_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only.
`dictBuffer` can be released after ZSTD_CDict creation, because its content is copied within CDict.
Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate `dictBuffer` content.
Note : A ZSTD_CDict can be created from an empty dictBuffer, but it is inefficient when used to compress small data.
@dictBuffer can be released after ZSTD_CDict creation, because its content is copied within CDict.
Note 1 : Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate @dictBuffer content.
Note 2 : A ZSTD_CDict can be created from an empty @dictBuffer,
in which case the only thing that it transports is the @compressionLevel.
This can be useful in a pipeline featuring ZSTD_compress_usingCDict() exclusively,
expecting a ZSTD_CDict parameter with any data, including those without a known dictionary.
</p></pre><BR>
<pre><b>size_t ZSTD_freeCDict(ZSTD_CDict* CDict);
@ -969,6 +974,12 @@ size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
* This method is effective when the dictionary sizes are very small relative
* to the input size, and the input size is fairly large to begin with.
*
* - The CDict's tables are not used at all, and instead we use the working
* context alone to reload the dictionary and use params based on the source
* size. See ZSTD_compress_insertDictionary() and ZSTD_compress_usingDict().
* This method is effective when the dictionary sizes are very small relative
* to the input size, and the input size is fairly large to begin with.
*
* Zstd has a simple internal heuristic that selects which strategy to use
* at the beginning of a compression. However, if experimentation shows that
* Zstd is making poor choices, it is possible to override that choice with

View File

@ -244,8 +244,6 @@ libzstd.pc:
libzstd.pc: libzstd.pc.in
@echo creating pkgconfig
@sed -e 's|@PREFIX@|$(PREFIX)|' \
-e 's|@LIBDIR@|$(LIBDIR)|' \
-e 's|@INCLUDEDIR@|$(INCLUDEDIR)|' \
-e 's|@VERSION@|$(VERSION)|' \
$< >$@

View File

@ -3,8 +3,9 @@
# BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
prefix=@PREFIX@
libdir=@LIBDIR@
includedir=@INCLUDEDIR@
exec_prefix=${prefix}
includedir=${prefix}/include
libdir=${exec_prefix}/lib
Name: zstd
Description: fast lossless compression algorithm library

View File

@ -808,12 +808,17 @@ ZSTDLIB_API size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx,
typedef struct ZSTD_CDict_s ZSTD_CDict;
/*! ZSTD_createCDict() :
* When compressing multiple messages / blocks using the same dictionary, it's recommended to load it only once.
* ZSTD_createCDict() will create a digested dictionary, ready to start future compression operations without startup cost.
* When compressing multiple messages or blocks using the same dictionary,
* it's recommended to digest the dictionary only once, since it's a costly operation.
* ZSTD_createCDict() will create a state from digesting a dictionary.
* The resulting state can be used for future compression operations with very limited startup cost.
* ZSTD_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only.
* `dictBuffer` can be released after ZSTD_CDict creation, because its content is copied within CDict.
* Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate `dictBuffer` content.
* Note : A ZSTD_CDict can be created from an empty dictBuffer, but it is inefficient when used to compress small data. */
* @dictBuffer can be released after ZSTD_CDict creation, because its content is copied within CDict.
* Note 1 : Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate @dictBuffer content.
* Note 2 : A ZSTD_CDict can be created from an empty @dictBuffer,
* in which case the only thing that it transports is the @compressionLevel.
* This can be useful in a pipeline featuring ZSTD_compress_usingCDict() exclusively,
* expecting a ZSTD_CDict parameter with any data, including those without a known dictionary. */
ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize,
int compressionLevel);
@ -1167,7 +1172,7 @@ typedef enum {
* tables. However, this model incurs no start-up cost (as long as the
* working context's tables can be reused). For small inputs, this can be
* faster than copying the CDict's tables.
*
*
* - The CDict's tables are not used at all, and instead we use the working
* context alone to reload the dictionary and use params based on the source
* size. See ZSTD_compress_insertDictionary() and ZSTD_compress_usingDict().

View File

@ -1496,17 +1496,17 @@ FIO_determineCompressedName(const char* srcFileName, const char* outDirName, con
static char* dstFileNameBuffer = NULL; /* using static allocation : this function cannot be multi-threaded */
char* outDirFilename = NULL;
size_t sfnSize = strlen(srcFileName);
size_t const suffixSize = strlen(suffix);
size_t const srcSuffixLen = strlen(suffix);
if (outDirName) {
outDirFilename = FIO_createFilename_fromOutDir(srcFileName, outDirName, suffixSize);
outDirFilename = FIO_createFilename_fromOutDir(srcFileName, outDirName, srcSuffixLen);
sfnSize = strlen(outDirFilename);
assert(outDirFilename != NULL);
}
if (dfnbCapacity <= sfnSize+suffixSize+1) {
if (dfnbCapacity <= sfnSize+srcSuffixLen+1) {
/* resize buffer for dstName */
free(dstFileNameBuffer);
dfnbCapacity = sfnSize + suffixSize + 30;
dfnbCapacity = sfnSize + srcSuffixLen + 30;
dstFileNameBuffer = (char*)malloc(dfnbCapacity);
if (!dstFileNameBuffer) {
EXM_THROW(30, "zstd: %s", strerror(errno));
@ -1520,7 +1520,7 @@ FIO_determineCompressedName(const char* srcFileName, const char* outDirName, con
} else {
memcpy(dstFileNameBuffer, srcFileName, sfnSize);
}
memcpy(dstFileNameBuffer+sfnSize, suffix, suffixSize+1 /* Include terminating null */);
memcpy(dstFileNameBuffer+sfnSize, suffix, srcSuffixLen+1 /* Include terminating null */);
return dstFileNameBuffer;
}
@ -2287,6 +2287,37 @@ int FIO_decompressFilename(FIO_prefs_t* const prefs,
return decodingError;
}
static const char *suffixList[] = {
ZSTD_EXTENSION,
TZSTD_EXTENSION,
#ifdef ZSTD_GZDECOMPRESS
GZ_EXTENSION,
TGZ_EXTENSION,
#endif
#ifdef ZSTD_LZMADECOMPRESS
LZMA_EXTENSION,
XZ_EXTENSION,
TXZ_EXTENSION,
#endif
#ifdef ZSTD_LZ4DECOMPRESS
LZ4_EXTENSION,
TLZ4_EXTENSION,
#endif
NULL
};
static const char *suffixListStr =
ZSTD_EXTENSION "/" TZSTD_EXTENSION
#ifdef ZSTD_GZDECOMPRESS
"/" GZ_EXTENSION "/" TGZ_EXTENSION
#endif
#ifdef ZSTD_LZMADECOMPRESS
"/" LZMA_EXTENSION "/" XZ_EXTENSION "/" TXZ_EXTENSION
#endif
#ifdef ZSTD_LZ4DECOMPRESS
"/" LZ4_EXTENSION "/" TLZ4_EXTENSION
#endif
;
/* FIO_determineDstName() :
* create a destination filename from a srcFileName.
@ -2297,71 +2328,78 @@ FIO_determineDstName(const char* srcFileName, const char* outDirName)
{
static size_t dfnbCapacity = 0;
static char* dstFileNameBuffer = NULL; /* using static allocation : this function cannot be multi-threaded */
size_t dstFileNameEndPos;
char* outDirFilename = NULL;
const char* dstSuffix = "";
size_t dstSuffixLen = 0;
size_t sfnSize = strlen(srcFileName);
size_t suffixSize;
const char* const suffixPtr = strrchr(srcFileName, '.');
if (suffixPtr == NULL) {
DISPLAYLEVEL(1, "zstd: %s: unknown suffix -- ignored \n",
srcFileName);
size_t srcSuffixLen;
const char* const srcSuffix = strrchr(srcFileName, '.');
if (srcSuffix == NULL) {
DISPLAYLEVEL(1,
"zstd: %s: unknown suffix (%s expected). "
"Can't derive the output file name. "
"Specify it with -o dstFileName. Ignoring.\n",
srcFileName, suffixListStr);
return NULL;
}
suffixSize = strlen(suffixPtr);
srcSuffixLen = strlen(srcSuffix);
/* check suffix is authorized */
if (sfnSize <= suffixSize
|| ( strcmp(suffixPtr, ZSTD_EXTENSION)
#ifdef ZSTD_GZDECOMPRESS
&& strcmp(suffixPtr, GZ_EXTENSION)
#endif
#ifdef ZSTD_LZMADECOMPRESS
&& strcmp(suffixPtr, XZ_EXTENSION)
&& strcmp(suffixPtr, LZMA_EXTENSION)
#endif
#ifdef ZSTD_LZ4DECOMPRESS
&& strcmp(suffixPtr, LZ4_EXTENSION)
#endif
) ) {
const char* suffixlist = ZSTD_EXTENSION
#ifdef ZSTD_GZDECOMPRESS
"/" GZ_EXTENSION
#endif
#ifdef ZSTD_LZMADECOMPRESS
"/" XZ_EXTENSION "/" LZMA_EXTENSION
#endif
#ifdef ZSTD_LZ4DECOMPRESS
"/" LZ4_EXTENSION
#endif
;
DISPLAYLEVEL(1, "zstd: %s: unknown suffix (%s expected) -- ignored \n",
srcFileName, suffixlist);
return NULL;
{
const char** matchedSuffixPtr;
for (matchedSuffixPtr = suffixList; *matchedSuffixPtr != NULL; matchedSuffixPtr++) {
if (!strcmp(*matchedSuffixPtr, srcSuffix)) {
break;
}
}
/* check suffix is authorized */
if (sfnSize <= srcSuffixLen || *matchedSuffixPtr == NULL) {
DISPLAYLEVEL(1,
"zstd: %s: unknown suffix (%s expected). "
"Can't derive the output file name. "
"Specify it with -o dstFileName. Ignoring.\n",
srcFileName, suffixListStr);
return NULL;
}
if ((*matchedSuffixPtr)[1] == 't') {
dstSuffix = ".tar";
dstSuffixLen = strlen(dstSuffix);
}
}
if (outDirName) {
outDirFilename = FIO_createFilename_fromOutDir(srcFileName, outDirName, 0);
sfnSize = strlen(outDirFilename);
assert(outDirFilename != NULL);
}
if (dfnbCapacity+suffixSize <= sfnSize+1) {
if (dfnbCapacity+srcSuffixLen <= sfnSize+1+dstSuffixLen) {
/* allocate enough space to write dstFilename into it */
free(dstFileNameBuffer);
dfnbCapacity = sfnSize + 20;
dstFileNameBuffer = (char*)malloc(dfnbCapacity);
if (dstFileNameBuffer==NULL)
EXM_THROW(74, "%s : not enough memory for dstFileName", strerror(errno));
EXM_THROW(74, "%s : not enough memory for dstFileName",
strerror(errno));
}
/* return dst name == src name truncated from suffix */
assert(dstFileNameBuffer != NULL);
dstFileNameEndPos = sfnSize - srcSuffixLen;
if (outDirFilename) {
memcpy(dstFileNameBuffer, outDirFilename, sfnSize - suffixSize);
memcpy(dstFileNameBuffer, outDirFilename, dstFileNameEndPos);
free(outDirFilename);
} else {
memcpy(dstFileNameBuffer, srcFileName, sfnSize - suffixSize);
memcpy(dstFileNameBuffer, srcFileName, dstFileNameEndPos);
}
dstFileNameBuffer[sfnSize-suffixSize] = '\0';
/* The short tar extensions tzst, tgz, txz and tlz4 files should have "tar"
* extension on decompression. Also writes terminating null. */
strcpy(dstFileNameBuffer + dstFileNameEndPos, dstSuffix);
return dstFileNameBuffer;
/* note : dstFileNameBuffer memory is not going to be free */

View File

@ -30,11 +30,23 @@ extern "C" {
#else
# define nulmark "/dev/null"
#endif
/**
* We test whether the extension we found starts with 't', and if so, we append
* ".tar" to the end of the output name.
*/
#define LZMA_EXTENSION ".lzma"
#define XZ_EXTENSION ".xz"
#define TXZ_EXTENSION ".txz"
#define GZ_EXTENSION ".gz"
#define TGZ_EXTENSION ".tgz"
#define ZSTD_EXTENSION ".zst"
#define TZSTD_EXTENSION ".tzst"
#define LZ4_EXTENSION ".lz4"
#define TLZ4_EXTENSION ".tlz4"
/*-*************************************

View File

@ -896,6 +896,46 @@ if [ $LZ4MODE -ne 1 ]; then
grep ".lz4" tmplg > $INTOVOID && die "Unsupported suffix listed"
fi
println "\n===> tar extension tests "
rm -f tmp tmp.tar tmp.tzst tmp.tgz tmp.txz tmp.tlz4
./datagen > tmp
tar cf tmp.tar tmp
$ZSTD tmp.tar -o tmp.tzst
rm tmp.tar
$ZSTD -d tmp.tzst
[ -e tmp.tar ] || die ".tzst failed to decompress to .tar!"
rm -f tmp.tar tmp.tzst
if [ $GZIPMODE -eq 1 ]; then
tar czf tmp.tgz tmp
$ZSTD -d tmp.tgz
[ -e tmp.tar ] || die ".tgz failed to decompress to .tar!"
rm -f tmp.tar tmp.tgz
fi
if [ $LZMAMODE -eq 1 ]; then
tar c tmp | $ZSTD --format=xz > tmp.txz
$ZSTD -d tmp.txz
[ -e tmp.tar ] || die ".txz failed to decompress to .tar!"
rm -f tmp.tar tmp.txz
fi
if [ $LZ4MODE -eq 1 ]; then
tar c tmp | $ZSTD --format=lz4 > tmp.tlz4
$ZSTD -d tmp.tlz4
[ -e tmp.tar ] || die ".tlz4 failed to decompress to .tar!"
rm -f tmp.tar tmp.tlz4
fi
touch tmp.t tmp.tz tmp.tzs
! $ZSTD -d tmp.t
! $ZSTD -d tmp.tz
! $ZSTD -d tmp.tzs
exit
println "\n===> zstd round-trip tests "
roundTripTest

View File

@ -8,6 +8,14 @@
#include "gzguts.h"
/* fix for Visual Studio, which doesn't support ssize_t type.
* see https://github.com/facebook/zstd/issues/1800#issuecomment-545945050 */
#if defined(_MSC_VER) && !defined(ssize_t)
# include <BaseTsd.h>
typedef SSIZE_T ssize_t;
#endif
/* Local functions */
local int gz_load OF((gz_statep, unsigned char *, unsigned, unsigned *));
local int gz_avail OF((gz_statep));