Add xz and lzma support.

Finish feature started by @inikep.

* Add xz and lzma compression and decompression support to target `xzstd`.
* Fix bug in gzip decompression that silently accepted truncated files.
* Add gzip frame composition tests.
* Add xz/lzma compatibility tests.
* Add xz/lzma frame composition tests.
This commit is contained in:
Nick Terrell 2017-03-13 18:11:07 -07:00
parent a296c6646a
commit aa8bcf360f
5 changed files with 261 additions and 36 deletions

View File

@ -68,12 +68,27 @@ EXT =
endif
# zlib detection
NO_ZLIB_MSG := ==> no zlib, building zstd without .gz support
VOID = /dev/null
HAVE_ZLIB := $(shell printf '\#include <zlib.h>\nint main(){}' | $(CC) -o have_zlib -x c - -lz 2> $(VOID) && rm have_zlib$(EXT) && echo 1 || echo 0)
ifeq ($(HAVE_ZLIB), 1)
ZLIB_MSG := ==> building zstd with .gz compression support
ZLIBCPP = -DZSTD_GZCOMPRESS -DZSTD_GZDECOMPRESS
ZLIBLD = -lz
else
ZLIB_MSG := $(NO_ZLIB_MSG)
endif
# lzma detection
NO_LZMA_MSG := ==> no liblzma, building zstd without .xz/.lzma support
HAVE_LZMA := $(shell printf '\#include <lzma.h>\nint main(){}' | $(CC) -o have_lzma -x c - -llzma 2> $(VOID) && rm have_lzma$(EXT) && echo 1 || echo 0)
ifeq ($(HAVE_LZMA), 1)
LZMA_MSG := ==> building zstd with .xz/.lzma compression support
LZMACPP = -DZSTD_LZMACOMPRESS -DZSTD_LZMADECOMPRESS
LZMALD = -llzma
else
LZMA_MSG := $(NO_LZMA_MSG)
endif
.PHONY: default all clean clean_decomp_o install uninstall generate_res
@ -85,14 +100,15 @@ $(ZSTDDECOMP_O): CFLAGS += $(ALIGN_LOOP)
zstd : CPPFLAGS += $(ZLIBCPP)
zstd : LDFLAGS += $(ZLIBLD)
zstd-nogz : HAVE_ZLIB=0
zstd zstd-nogz : CPPFLAGS += -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT)
zstd zstd-nogz : $(ZSTDLIB_OBJ) zstdcli.o fileio.o bench.o datagen.o dibio.o
ifeq ($(HAVE_ZLIB), 1)
@echo "==> building zstd with .gz decompression support "
else
@echo "==> no zlib, building zstd with .zst support only (no .gz support) "
endif
zstd : LZMA_MSG := $(NO_LZMA_MSG)
zstd-nogz : ZLIB_MSG := $(NO_ZLIB_MSG)
zstd-nogz : LZMA_MSG := $(NO_LZMA_MSG)
xzstd : CPPFLAGS += $(ZLIBCPP) $(LZMACPP)
xzstd : LDFLAGS += $(ZLIBLD) $(LZMALD)
zstd zstd-nogz xzstd : CPPFLAGS += -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT)
zstd zstd-nogz xzstd : $(ZSTDLIB_OBJ) zstdcli.o fileio.o bench.o datagen.o dibio.o
@echo "$(ZLIB_MSG)"
@echo "$(LZMA_MSG)"
ifneq (,$(filter Windows%,$(OS)))
windres/generate_res.bat
endif

View File

@ -44,6 +44,9 @@
# define z_const
# endif
#endif
#if defined(ZSTD_LZMACOMPRESS) || defined(ZSTD_LZMADECOMPRESS)
# include <lzma.h>
#endif
/*-*************************************
@ -71,7 +74,6 @@
#define MAX_DICT_SIZE (8 MB) /* protection against large input (attack scenario) */
#define FNSPACE 30
#define GZ_EXTENSION ".gz"
/*-*************************************
@ -434,6 +436,65 @@ static unsigned long long FIO_compressGzFrame(cRess_t* ress, const char* srcFile
#endif
#ifdef ZSTD_LZMACOMPRESS
static unsigned long long FIO_compressLzmaFrame(cRess_t* ress, const char* srcFileName, U64 const srcFileSize, int compressionLevel, U64* readsize, int plain_lzma)
{
unsigned long long inFileSize = 0, outFileSize = 0;
lzma_stream strm = LZMA_STREAM_INIT;
lzma_action action = LZMA_RUN;
lzma_ret ret;
if (compressionLevel < 0) compressionLevel = 0;
if (compressionLevel > 9) compressionLevel = 9;
if (plain_lzma) {
lzma_options_lzma opt_lzma;
if (lzma_lzma_preset(&opt_lzma, compressionLevel)) EXM_THROW(71, "zstd: %s: lzma_lzma_preset error", srcFileName);
ret = lzma_alone_encoder(&strm, &opt_lzma); /* LZMA */
if (ret != LZMA_OK) EXM_THROW(71, "zstd: %s: lzma_alone_encoder error %d", srcFileName, ret);
} else {
ret = lzma_easy_encoder(&strm, compressionLevel, LZMA_CHECK_CRC64); /* XZ */
if (ret != LZMA_OK) EXM_THROW(71, "zstd: %s: lzma_easy_encoder error %d", srcFileName, ret);
}
strm.next_in = 0;
strm.avail_in = 0;
strm.next_out = ress->dstBuffer;
strm.avail_out = ress->dstBufferSize;
while (1) {
if (strm.avail_in == 0) {
size_t const inSize = fread(ress->srcBuffer, 1, ress->srcBufferSize, ress->srcFile);
if (inSize == 0) action = LZMA_FINISH;
inFileSize += inSize;
strm.next_in = ress->srcBuffer;
strm.avail_in = inSize;
}
ret = lzma_code(&strm, action);
if (ret != LZMA_OK && ret != LZMA_STREAM_END) EXM_THROW(72, "zstd: %s: lzma_code encoding error %d", srcFileName, ret);
{ size_t const compBytes = ress->dstBufferSize - strm.avail_out;
if (compBytes) {
if (fwrite(ress->dstBuffer, 1, compBytes, ress->dstFile) != compBytes) EXM_THROW(73, "Write error : cannot write to output file");
outFileSize += compBytes;
strm.next_out = ress->dstBuffer;
strm.avail_out = ress->dstBufferSize;
}
}
if (!srcFileSize) DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%%", (U32)(inFileSize>>20), (double)outFileSize/inFileSize*100)
else DISPLAYUPDATE(2, "\rRead : %u / %u MB ==> %.2f%%", (U32)(inFileSize>>20), (U32)(srcFileSize>>20), (double)outFileSize/inFileSize*100);
if (ret == LZMA_STREAM_END) break;
}
lzma_end(&strm);
*readsize = inFileSize;
return outFileSize;
}
#endif
/*! FIO_compressFilename_internal() :
* same as FIO_compressFilename_extRess(), with `ress.desFile` already opened.
* @return : 0 : compression completed correctly,
@ -448,14 +509,26 @@ static int FIO_compressFilename_internal(cRess_t ress,
U64 compressedfilesize = 0;
U64 const fileSize = UTIL_getFileSize(srcFileName);
if (g_compressionType) {
switch (g_compressionType) {
case FIO_zstdCompression:
break;
case FIO_gzipCompression:
#ifdef ZSTD_GZCOMPRESS
compressedfilesize = FIO_compressGzFrame(&ress, srcFileName, fileSize, compressionLevel, &readsize);
compressedfilesize = FIO_compressGzFrame(&ress, srcFileName, fileSize, compressionLevel, &readsize);
#else
(void)compressionLevel;
EXM_THROW(20, "zstd: %s: file cannot be compressed as gzip (zstd compiled without ZSTD_GZCOMPRESS) -- ignored \n", srcFileName);
(void)compressionLevel;
EXM_THROW(20, "zstd: %s: file cannot be compressed as gzip (zstd compiled without ZSTD_GZCOMPRESS) -- ignored \n", srcFileName);
#endif
goto finish;
goto finish;
case FIO_xzCompression:
case FIO_lzmaCompression:
#ifdef ZSTD_LZMACOMPRESS
compressedfilesize = FIO_compressLzmaFrame(&ress, srcFileName, fileSize, compressionLevel, &readsize, g_compressionType==FIO_lzmaCompression);
#else
(void)compressionLevel;
EXM_THROW(20, "zstd: %s: file cannot be compressed as xz/lzma (zstd compiled without ZSTD_LZMACOMPRESS) -- ignored \n", srcFileName);
#endif
goto finish;
}
/* init */
@ -763,10 +836,10 @@ static void FIO_fwriteSparseEnd(FILE* file, unsigned storedSkips)
{
if (storedSkips-->0) { /* implies g_sparseFileSupport>0 */
int const seekResult = LONG_SEEK(file, storedSkips, SEEK_CUR);
if (seekResult != 0) EXM_THROW(69, "Final skip error (sparse file)\n");
if (seekResult != 0) EXM_THROW(69, "Final skip error (sparse file)");
{ const char lastZeroByte[1] = { 0 };
size_t const sizeCheck = fwrite(lastZeroByte, 1, 1, file);
if (sizeCheck != 1) EXM_THROW(69, "Write error : cannot write last zero\n");
if (sizeCheck != 1) EXM_THROW(69, "Write error : cannot write last zero");
} }
}
@ -849,6 +922,7 @@ static unsigned long long FIO_decompressGzFrame(dRess_t* ress, FILE* srcFile, co
{
unsigned long long outFileSize = 0;
z_stream strm;
int flush = Z_NO_FLUSH;
int ret;
strm.zalloc = Z_NULL;
@ -866,11 +940,12 @@ static unsigned long long FIO_decompressGzFrame(dRess_t* ress, FILE* srcFile, co
for ( ; ; ) {
if (strm.avail_in == 0) {
ress->srcBufferLoaded = fread(ress->srcBuffer, 1, ress->srcBufferSize, srcFile);
if (ress->srcBufferLoaded == 0) break;
if (ress->srcBufferLoaded == 0) flush = Z_FINISH;
strm.next_in = (z_const unsigned char*)ress->srcBuffer;
strm.avail_in = (uInt)ress->srcBufferLoaded;
}
ret = inflate(&strm, Z_NO_FLUSH);
ret = inflate(&strm, flush);
if (ret == Z_BUF_ERROR) EXM_THROW(39, "zstd: %s: premature end", srcFileName);
if (ret != Z_OK && ret != Z_STREAM_END) { DISPLAY("zstd: %s: inflate error %d \n", srcFileName, ret); return 0; }
{ size_t const decompBytes = ress->dstBufferSize - strm.avail_out;
if (decompBytes) {
@ -886,7 +961,60 @@ static unsigned long long FIO_decompressGzFrame(dRess_t* ress, FILE* srcFile, co
if (strm.avail_in > 0) memmove(ress->srcBuffer, strm.next_in, strm.avail_in);
ress->srcBufferLoaded = strm.avail_in;
ret = inflateEnd(&strm);
if (ret != Z_OK) EXM_THROW(32, "zstd: %s: inflateEnd error %d \n", srcFileName, ret);
if (ret != Z_OK) EXM_THROW(32, "zstd: %s: inflateEnd error %d", srcFileName, ret);
return outFileSize;
}
#endif
#ifdef ZSTD_LZMADECOMPRESS
static unsigned long long FIO_decompressLzmaFrame(dRess_t* ress, FILE* srcFile, const char* srcFileName, int plain_lzma)
{
unsigned long long outFileSize = 0;
lzma_stream strm = LZMA_STREAM_INIT;
lzma_action action = LZMA_RUN;
lzma_ret ret;
strm.next_in = 0;
strm.avail_in = 0;
if (plain_lzma) {
ret = lzma_alone_decoder(&strm, UINT64_MAX); /* LZMA */
} else {
ret = lzma_stream_decoder(&strm, UINT64_MAX, 0); /* XZ */
}
if (ret != LZMA_OK) EXM_THROW(71, "zstd: %s: lzma_alone_decoder/lzma_stream_decoder error %d", srcFileName, ret);
strm.next_out = ress->dstBuffer;
strm.avail_out = ress->dstBufferSize;
strm.avail_in = ress->srcBufferLoaded;
strm.next_in = ress->srcBuffer;
for ( ; ; ) {
if (strm.avail_in == 0) {
ress->srcBufferLoaded = fread(ress->srcBuffer, 1, ress->srcBufferSize, srcFile);
if (ress->srcBufferLoaded == 0) action = LZMA_FINISH;
strm.next_in = ress->srcBuffer;
strm.avail_in = ress->srcBufferLoaded;
}
ret = lzma_code(&strm, action);
if (ret == LZMA_BUF_ERROR) EXM_THROW(39, "zstd: %s: premature end", srcFileName);
if (ret != LZMA_OK && ret != LZMA_STREAM_END) { DISPLAY("zstd: %s: lzma_code decoding error %d \n", srcFileName, ret); return 0; }
{ size_t const decompBytes = ress->dstBufferSize - strm.avail_out;
if (decompBytes) {
if (fwrite(ress->dstBuffer, 1, decompBytes, ress->dstFile) != decompBytes) EXM_THROW(31, "Write error : cannot write to output file");
outFileSize += decompBytes;
strm.next_out = ress->dstBuffer;
strm.avail_out = ress->dstBufferSize;
}
}
if (ret == LZMA_STREAM_END) break;
}
if (strm.avail_in > 0) memmove(ress->srcBuffer, strm.next_in, strm.avail_in);
ress->srcBufferLoaded = strm.avail_in;
lzma_end(&strm);
return outFileSize;
}
#endif
@ -924,7 +1052,7 @@ static int FIO_decompressSrcFile(dRess_t ress, const char* dstFileName, const ch
}
readSomething = 1; /* there is at least >= 4 bytes in srcFile */
if (ress.srcBufferLoaded < toRead) { DISPLAY("zstd: %s: unknown header \n", srcFileName); fclose(srcFile); return 1; } /* srcFileName is empty */
if (buf[0] == 31 && buf[1] == 139) { /* gz header */
if (buf[0] == 31 && buf[1] == 139) { /* gz magic number */
#ifdef ZSTD_GZDECOMPRESS
unsigned long long const result = FIO_decompressGzFrame(&ress, srcFile, srcFileName);
if (result == 0) return 1;
@ -932,6 +1060,16 @@ static int FIO_decompressSrcFile(dRess_t ress, const char* dstFileName, const ch
#else
DISPLAYLEVEL(1, "zstd: %s: gzip file cannot be uncompressed (zstd compiled without ZSTD_GZDECOMPRESS) -- ignored \n", srcFileName);
return 1;
#endif
} else if ((buf[0] == 0xFD && buf[1] == 0x37) /* xz magic number */
|| (buf[0] == 0x5D && buf[1] == 0x00)) { /* lzma header (no magic number) */
#ifdef ZSTD_LZMADECOMPRESS
unsigned long long const result = FIO_decompressLzmaFrame(&ress, srcFile, srcFileName, buf[0] != 0xFD);
if (result == 0) return 1;
filesize += result;
#else
DISPLAYLEVEL(1, "zstd: %s: xz/lzma file cannot be uncompressed (zstd compiled without ZSTD_LZMADECOMPRESS) -- ignored \n", srcFileName);
return 1;
#endif
} else {
if (!ZSTD_isFrame(ress.srcBuffer, toRead)) {
@ -1020,32 +1158,31 @@ int FIO_decompressMultipleFilenames(const char** srcNamesTable, unsigned nbFiles
missingFiles += FIO_decompressSrcFile(ress, suffix, srcNamesTable[u]);
if (fclose(ress.dstFile)) EXM_THROW(72, "Write error : cannot properly close stdout");
} else {
size_t const suffixSize = strlen(suffix);
size_t const gzipSuffixSize = strlen(GZ_EXTENSION);
size_t suffixSize;
size_t dfnSize = FNSPACE;
unsigned u;
char* dstFileName = (char*)malloc(FNSPACE);
if (dstFileName==NULL) EXM_THROW(73, "not enough memory for dstFileName");
for (u=0; u<nbFiles; u++) { /* create dstFileName */
const char* const srcFileName = srcNamesTable[u];
const char* const suffixPtr = strrchr(srcFileName, '.');
size_t const sfnSize = strlen(srcFileName);
const char* const suffixPtr = srcFileName + sfnSize - suffixSize;
const char* const gzipSuffixPtr = srcFileName + sfnSize - gzipSuffixSize;
if (!suffixPtr) {
DISPLAYLEVEL(1, "zstd: %s: unknown suffix -- ignored \n", srcFileName);
skippedFiles++;
continue;
}
suffixSize = strlen(suffixPtr);
if (dfnSize+suffixSize <= sfnSize+1) {
free(dstFileName);
dfnSize = sfnSize + 20;
dstFileName = (char*)malloc(dfnSize);
if (dstFileName==NULL) EXM_THROW(74, "not enough memory for dstFileName");
}
if (sfnSize <= suffixSize || strcmp(suffixPtr, suffix) != 0) {
if (sfnSize <= gzipSuffixSize || strcmp(gzipSuffixPtr, GZ_EXTENSION) != 0) {
DISPLAYLEVEL(1, "zstd: %s: unknown suffix (%s/%s expected) -- ignored \n", srcFileName, suffix, GZ_EXTENSION);
skippedFiles++;
continue;
} else {
memcpy(dstFileName, srcFileName, sfnSize - gzipSuffixSize);
dstFileName[sfnSize-gzipSuffixSize] = '\0';
}
if (sfnSize <= suffixSize || (strcmp(suffixPtr, GZ_EXTENSION) && strcmp(suffixPtr, XZ_EXTENSION) && strcmp(suffixPtr, ZSTD_EXTENSION) && strcmp(suffixPtr, LZMA_EXTENSION))) {
DISPLAYLEVEL(1, "zstd: %s: unknown suffix (%s/%s/%s/%s expected) -- ignored \n", srcFileName, GZ_EXTENSION, XZ_EXTENSION, ZSTD_EXTENSION, LZMA_EXTENSION);
skippedFiles++;
continue;
} else {
memcpy(dstFileName, srcFileName, sfnSize - suffixSize);
dstFileName[sfnSize-suffixSize] = '\0';

View File

@ -29,12 +29,16 @@ extern "C" {
#else
# define nulmark "/dev/null"
#endif
#define LZMA_EXTENSION ".lzma"
#define XZ_EXTENSION ".xz"
#define GZ_EXTENSION ".gz"
#define ZSTD_EXTENSION ".zst"
/*-*************************************
* Types
***************************************/
typedef enum { FIO_zstdCompression, FIO_gzipCompression } FIO_compressionType_t;
typedef enum { FIO_zstdCompression, FIO_gzipCompression, FIO_xzCompression, FIO_lzmaCompression } FIO_compressionType_t;
/*-*************************************

View File

@ -49,13 +49,13 @@
#define AUTHOR "Yann Collet"
#define WELCOME_MESSAGE "*** %s %i-bits %s, by %s ***\n", COMPRESSOR_NAME, (int)(sizeof(size_t)*8), ZSTD_VERSION, AUTHOR
#define GZ_EXTENSION ".gz"
#define ZSTD_EXTENSION ".zst"
#define ZSTD_UNZSTD "unzstd"
#define ZSTD_CAT "zstdcat"
#define ZSTD_GZ "gzip"
#define ZSTD_GUNZIP "gunzip"
#define ZSTD_GZCAT "gzcat"
#define ZSTD_LZMA "lzma"
#define ZSTD_XZ "xz"
#define KB *(1 <<10)
#define MB *(1 <<20)
@ -130,6 +130,10 @@ static int usage_advanced(const char* programName)
#ifdef ZSTD_GZCOMPRESS
DISPLAY( "--format=gzip : compress files to the .gz format \n");
#endif
#ifdef ZSTD_LZMACOMPRESS
DISPLAY( "--format=xz : compress files to the .xz format \n");
DISPLAY( "--format=lzma : compress files to the .lzma format \n");
#endif
#endif
#ifndef ZSTD_NODECOMPRESS
DISPLAY( "--test : test compressed file integrity \n");
@ -325,6 +329,8 @@ int main(int argCount, const char* argv[])
if (!strcmp(programName, ZSTD_GZ)) { suffix = GZ_EXTENSION; FIO_setCompressionType(FIO_gzipCompression); FIO_setRemoveSrcFile(1); } /* behave like gzip */
if (!strcmp(programName, ZSTD_GUNZIP)) { operation=zom_decompress; FIO_setRemoveSrcFile(1); } /* behave like gunzip */
if (!strcmp(programName, ZSTD_GZCAT)) { operation=zom_decompress; forceStdout=1; FIO_overwriteMode(); outFileName=stdoutmark; displayLevel=1; } /* behave like gzcat */
if (!strcmp(programName, ZSTD_LZMA)) { suffix = LZMA_EXTENSION; FIO_setCompressionType(FIO_lzmaCompression); FIO_setRemoveSrcFile(1); } /* behave like lzma */
if (!strcmp(programName, ZSTD_XZ)) { suffix = XZ_EXTENSION; FIO_setCompressionType(FIO_xzCompression); FIO_setRemoveSrcFile(1); } /* behave like xz */
memset(&compressionParams, 0, sizeof(compressionParams));
/* command switches */
@ -373,6 +379,10 @@ int main(int argCount, const char* argv[])
#ifdef ZSTD_GZCOMPRESS
if (!strcmp(argument, "--format=gzip")) { suffix = GZ_EXTENSION; FIO_setCompressionType(FIO_gzipCompression); continue; }
#endif
#ifdef ZSTD_LZMACOMPRESS
if (!strcmp(argument, "--format=lzma")) { suffix = LZMA_EXTENSION; FIO_setCompressionType(FIO_lzmaCompression); continue; }
if (!strcmp(argument, "--format=xz")) { suffix = XZ_EXTENSION; FIO_setCompressionType(FIO_xzCompression); continue; }
#endif
/* long commands with arguments */
#ifndef ZSTD_NODICT

View File

@ -325,6 +325,7 @@ if [ $GZIPMODE -eq 1 ]; then
gzip -t -v tmp.gz
gzip -f tmp
$ZSTD -d -f -v tmp.gz
rm tmp*
else
$ECHO "gzip binary not detected"
fi
@ -333,6 +334,63 @@ else
fi
$ECHO "\n**** gzip frame tests **** "
if [ $GZIPMODE -eq 1 ]; then
./datagen > tmp
$ZSTD -f --format=gzip tmp
$ZSTD -f tmp
cat tmp.gz tmp.zst tmp.gz tmp.zst | $ZSTD -d -f -o tmp
head -c -1 tmp.gz | $ZSTD -t && die "incomplete frame not detected !"
rm tmp*
else
$ECHO "gzip mode not supported"
fi
$ECHO "\n**** xz compatibility tests **** "
LZMAMODE=1
$ZSTD --format=xz -V || LZMAMODE=0
if [ $LZMAMODE -eq 1 ]; then
$ECHO "xz support detected"
XZEXE=1
xz -V && lzma -V || XZEXE=0
if [ $XZEXE -eq 1 ]; then
./datagen > tmp
$ZSTD --format=lzma -f tmp
$ZSTD --format=xz -f tmp
xz -t -v tmp.xz
xz -t -v tmp.lzma
xz -f -k tmp
lzma -f -k --lzma1 tmp
$ZSTD -d -f -v tmp.xz
$ZSTD -d -f -v tmp.lzma
rm tmp*
else
$ECHO "xz binary not detected"
fi
else
$ECHO "xz mode not supported"
fi
$ECHO "\n**** xz frame tests **** "
if [ $LZMAMODE -eq 1 ]; then
./datagen > tmp
$ZSTD -f --format=xz tmp
$ZSTD -f --format=lzma tmp
$ZSTD -f tmp
cat tmp.xz tmp.lzma tmp.zst tmp.lzma tmp.xz tmp.zst | $ZSTD -d -f -o tmp
head -c -1 tmp.xz | $ZSTD -t && die "incomplete frame not detected !"
head -c -1 tmp.lzma | $ZSTD -t && die "incomplete frame not detected !"
rm tmp*
else
$ECHO "xz mode not supported"
fi
$ECHO "\n**** zstd round-trip tests **** "
roundTripTest