Merge pull request #673 from iburinoc/lz4

Add LZ4 compress/decompress support to CLI
This commit is contained in:
Yann Collet 2017-04-26 09:32:16 -07:00 committed by GitHub
commit 9a7698c1f0
5 changed files with 219 additions and 7 deletions

View File

@ -116,6 +116,16 @@ LZMALD = -llzma
else
LZMA_MSG := $(NO_LZMA_MSG)
endif
# lz4 detection
NO_LZ4_MSG := ==> no liblz4, building zstd without .lz4 support
HAVE_LZ4 := $(shell printf '\#include <lz4frame.h>\n\#include <lz4.h>\nint main(void) { return 0; }' | $(CC) $(FLAGS) -o have_lz4$(EXT) -x c - -llz4 2> $(VOID) && rm have_lz4$(EXT) && echo 1 || echo 0)
ifeq ($(HAVE_LZ4), 1)
LZ4_MSG := ==> building zstd with .lz4 compression support
LZ4CPP = -DZSTD_LZ4COMPRESS -DZSTD_LZ4DECOMPRESS
LZ4LD = -llz4
else
LZ4_MSG := $(NO_LZ4_MSG)
endif
MD2ROFF =ronn
MD2ROFF_FLAGS = --roff --warnings --manual="User Commands" --organization="zstd $(ZSTD_VERSION)"
@ -135,11 +145,16 @@ zstd-nogz : ZLIB_MSG := $(NO_ZLIB_MSG)
zstd-nogz : LZMA_MSG := $(NO_LZMA_MSG)
xzstd : CPPFLAGS += $(ZLIBCPP) $(LZMACPP)
xzstd : LDFLAGS += $(ZLIBLD) $(LZMALD)
zstd zstd-nogz xzstd : CPPFLAGS += -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT)
zstd zstd-nogz xzstd : $(ZSTDLIB_FILES) zstdcli.o fileio.o bench.o datagen.o dibio.o
xzstd : LZ4_MSG := $(NO_LZMA_MSG)
zstd4 : CPPFLAGS += $(ZLIBCPP) $(LZ4CPP)
zstd4 : LDFLAGS += $(ZLIBLD) $(LZ4LD)
zstd4 : LZMA_MSG := $(NO_LZMA_MSG)
zstd zstd-nogz xzstd zstd4 : CPPFLAGS += -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT)
zstd zstd-nogz xzstd zstd4 : $(ZSTDLIB_FILES) zstdcli.o fileio.o bench.o datagen.o dibio.o
@echo "$(THREAD_MSG)"
@echo "$(ZLIB_MSG)"
@echo "$(LZMA_MSG)"
@echo "$(LZ4_MSG)"
ifneq (,$(filter Windows%,$(OS)))
windres/generate_res.bat
endif

View File

@ -53,6 +53,12 @@
# include <lzma.h>
#endif
#define LZ4_MAGICNUMBER 0x184D2204
#if defined(ZSTD_LZ4COMPRESS) || defined(ZSTD_LZ4DECOMPRESS)
# include <lz4frame.h>
# include <lz4.h>
#endif
/*-*************************************
* Constants
@ -514,6 +520,79 @@ static unsigned long long FIO_compressLzmaFrame(cRess_t* ress, const char* srcFi
}
#endif
#ifdef ZSTD_LZ4COMPRESS
static int FIO_LZ4_GetBlockSize_FromBlockId (int id) { return (1 << (8 + (2 * id))); }
static unsigned long long FIO_compressLz4Frame(cRess_t* ress, const char* srcFileName, U64 const srcFileSize, int compressionLevel, U64* readsize)
{
unsigned long long inFileSize = 0, outFileSize = 0;
LZ4F_preferences_t prefs;
LZ4F_compressionContext_t ctx;
LZ4F_errorCode_t const errorCode = LZ4F_createCompressionContext(&ctx, LZ4F_VERSION);
if (LZ4F_isError(errorCode)) EXM_THROW(31, "zstd: failed to create lz4 compression context");
memset(&prefs, 0, sizeof(prefs));
prefs.autoFlush = 1;
prefs.compressionLevel = compressionLevel;
prefs.frameInfo.blockMode = blockIndependent; /* stick to defaults for lz4 cli */
prefs.frameInfo.blockSizeID = max4MB;
prefs.frameInfo.contentChecksumFlag = (contentChecksum_t)g_checksumFlag;
#if LZ4_VERSION_NUMBER >= 10600
prefs.frameInfo.contentSize = srcFileSize;
#endif
{
size_t blockSize = FIO_LZ4_GetBlockSize_FromBlockId(max4MB);
size_t readSize;
size_t headerSize = LZ4F_compressBegin(ctx, ress->dstBuffer, ress->dstBufferSize, &prefs);
if (LZ4F_isError(headerSize)) EXM_THROW(33, "File header generation failed : %s", LZ4F_getErrorName(headerSize));
{ size_t const sizeCheck = fwrite(ress->dstBuffer, 1, headerSize, ress->dstFile);
if (sizeCheck!=headerSize) EXM_THROW(34, "Write error : cannot write header"); }
outFileSize += headerSize;
/* Read first block */
readSize = fread(ress->srcBuffer, (size_t)1, (size_t)blockSize, ress->srcFile);
inFileSize += readSize;
/* Main Loop */
while (readSize>0) {
size_t outSize;
/* Compress Block */
outSize = LZ4F_compressUpdate(ctx, ress->dstBuffer, ress->dstBufferSize, ress->srcBuffer, readSize, NULL);
if (LZ4F_isError(outSize)) EXM_THROW(35, "zstd: %s: lz4 compression failed : %s", srcFileName, LZ4F_getErrorName(outSize));
outFileSize += outSize;
if (!srcFileSize) DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%%", (U32)(inFileSize>>20), (double)outFileSize/inFileSize*100)
else DISPLAYUPDATE(2, "\rRead : %u / %u MB ==> %.2f%%", (U32)(inFileSize>>20), (U32)(srcFileSize>>20), (double)outFileSize/inFileSize*100);
/* Write Block */
{ size_t const sizeCheck = fwrite(ress->dstBuffer, 1, outSize, ress->dstFile);
if (sizeCheck!=outSize) EXM_THROW(36, "Write error : cannot write compressed block"); }
/* Read next block */
readSize = fread(ress->srcBuffer, (size_t)1, (size_t)blockSize, ress->srcFile);
inFileSize += readSize;
}
if (ferror(ress->srcFile)) EXM_THROW(37, "Error reading %s ", srcFileName);
/* End of Stream mark */
headerSize = LZ4F_compressEnd(ctx, ress->dstBuffer, ress->dstBufferSize, NULL);
if (LZ4F_isError(headerSize)) EXM_THROW(38, "zstd: %s: lz4 end of file generation failed : %s", srcFileName, LZ4F_getErrorName(headerSize));
{ size_t const sizeCheck = fwrite(ress->dstBuffer, 1, headerSize, ress->dstFile);
if (sizeCheck!=headerSize) EXM_THROW(39, "Write error : cannot write end of stream"); }
outFileSize += headerSize;
}
*readsize = inFileSize;
LZ4F_freeCompressionContext(ctx);
return outFileSize;
}
#endif
/*! FIO_compressFilename_internal() :
* same as FIO_compressFilename_extRess(), with `ress.desFile` already opened.
@ -547,6 +626,13 @@ static int FIO_compressFilename_internal(cRess_t ress,
#else
(void)compressionLevel;
EXM_THROW(20, "zstd: %s: file cannot be compressed as xz/lzma (zstd compiled without ZSTD_LZMACOMPRESS) -- ignored \n", srcFileName);
#endif
case FIO_lz4Compression:
#ifdef ZSTD_LZ4COMPRESS
compressedfilesize = FIO_compressLz4Frame(&ress, srcFileName, fileSize, compressionLevel, &readsize);
#else
(void)compressionLevel;
EXM_THROW(20, "zstd: %s: file cannot be compressed as lz4 (zstd compiled without ZSTD_LZ4COMPRESS) -- ignored \n", srcFileName);
#endif
goto finish;
}
@ -1039,6 +1125,66 @@ static unsigned long long FIO_decompressLzmaFrame(dRess_t* ress, FILE* srcFile,
}
#endif
#ifdef ZSTD_LZ4DECOMPRESS
static unsigned long long FIO_decompressLz4Frame(dRess_t* ress, FILE* srcFile, const char* srcFileName)
{
unsigned long long filesize = 0;
LZ4F_errorCode_t nextToLoad;
LZ4F_decompressionContext_t dCtx;
LZ4F_errorCode_t const errorCode = LZ4F_createDecompressionContext(&dCtx, LZ4F_VERSION);
if (LZ4F_isError(errorCode)) EXM_THROW(61, "zstd: failed to create lz4 decompression context");
/* Init feed with magic number (already consumed from FILE* sFile) */
{ size_t inSize = 4;
size_t outSize= 0;
MEM_writeLE32(ress->srcBuffer, LZ4_MAGICNUMBER);
nextToLoad = LZ4F_decompress(dCtx, ress->dstBuffer, &outSize, ress->srcBuffer, &inSize, NULL);
if (LZ4F_isError(nextToLoad)) EXM_THROW(62, "zstd: %s: lz4 header error : %s", srcFileName, LZ4F_getErrorName(nextToLoad));
}
/* Main Loop */
for (;nextToLoad;) {
size_t readSize;
size_t pos = 0;
size_t decodedBytes = ress->dstBufferSize;
/* Read input */
if (nextToLoad > ress->srcBufferSize) nextToLoad = ress->srcBufferSize;
readSize = fread(ress->srcBuffer, 1, nextToLoad, srcFile);
if (!readSize) break; /* reached end of file or stream */
while ((pos < readSize) || (decodedBytes == ress->dstBufferSize)) { /* still to read, or still to flush */
/* Decode Input (at least partially) */
size_t remaining = readSize - pos;
decodedBytes = ress->dstBufferSize;
nextToLoad = LZ4F_decompress(dCtx, ress->dstBuffer, &decodedBytes, (char*)(ress->srcBuffer)+pos, &remaining, NULL);
if (LZ4F_isError(nextToLoad)) EXM_THROW(66, "zstd: %s: decompression error : %s", srcFileName, LZ4F_getErrorName(nextToLoad));
pos += remaining;
/* Write Block */
if (decodedBytes) {
if (fwrite(ress->dstBuffer, 1, decodedBytes, ress->dstFile) != decodedBytes) EXM_THROW(63, "Write error : cannot write to output file");
filesize += decodedBytes;
DISPLAYUPDATE(2, "\rDecompressed : %u MB ", (unsigned)(filesize>>20));
}
if (!nextToLoad) break;
}
}
/* can be out because readSize == 0, which could be an fread() error */
if (ferror(srcFile)) EXM_THROW(67, "zstd: %s: read error", srcFileName);
if (nextToLoad!=0) EXM_THROW(68, "zstd: %s: unfinished stream", srcFileName);
LZ4F_freeDecompressionContext(dCtx);
ress->srcBufferLoaded = 0; /* LZ4F will go to the frame boundary */
return filesize;
}
#endif
/** FIO_decompressSrcFile() :
Decompression `srcFileName` into `ress.dstFile`
@ -1090,6 +1236,15 @@ static int FIO_decompressSrcFile(dRess_t ress, const char* dstFileName, const ch
#else
DISPLAYLEVEL(1, "zstd: %s: xz/lzma file cannot be uncompressed (zstd compiled without ZSTD_LZMADECOMPRESS) -- ignored \n", srcFileName);
return 1;
#endif
} else if (MEM_readLE32(buf) == LZ4_MAGICNUMBER) {
#ifdef ZSTD_LZ4DECOMPRESS
unsigned long long const result = FIO_decompressLz4Frame(&ress, srcFile, srcFileName);
if (result == 0) return 1;
filesize += result;
#else
DISPLAYLEVEL(1, "zstd: %s: lz4 file cannot be uncompressed (zstd compiled without ZSTD_LZ4DECOMPRESS) -- ignored \n", srcFileName);
return 1;
#endif
} else {
if (!ZSTD_isFrame(ress.srcBuffer, toRead)) {
@ -1199,7 +1354,7 @@ int FIO_decompressMultipleFilenames(const char** srcNamesTable, unsigned nbFiles
dstFileName = (char*)malloc(dfnSize);
if (dstFileName==NULL) EXM_THROW(74, "not enough memory for dstFileName");
}
if (sfnSize <= suffixSize || (strcmp(suffixPtr, GZ_EXTENSION) && strcmp(suffixPtr, XZ_EXTENSION) && strcmp(suffixPtr, ZSTD_EXTENSION) && strcmp(suffixPtr, LZMA_EXTENSION))) {
if (sfnSize <= suffixSize || (strcmp(suffixPtr, GZ_EXTENSION) && strcmp(suffixPtr, XZ_EXTENSION) && strcmp(suffixPtr, ZSTD_EXTENSION) && strcmp(suffixPtr, LZMA_EXTENSION) && strcmp(suffixPtr, LZ4_EXTENSION))) {
DISPLAYLEVEL(1, "zstd: %s: unknown suffix (%s/%s/%s/%s expected) -- ignored \n", srcFileName, GZ_EXTENSION, XZ_EXTENSION, ZSTD_EXTENSION, LZMA_EXTENSION);
skippedFiles++;
continue;

View File

@ -33,12 +33,13 @@ extern "C" {
#define XZ_EXTENSION ".xz"
#define GZ_EXTENSION ".gz"
#define ZSTD_EXTENSION ".zst"
#define LZ4_EXTENSION ".lz4"
/*-*************************************
* Types
***************************************/
typedef enum { FIO_zstdCompression, FIO_gzipCompression, FIO_xzCompression, FIO_lzmaCompression } FIO_compressionType_t;
typedef enum { FIO_zstdCompression, FIO_gzipCompression, FIO_xzCompression, FIO_lzmaCompression, FIO_lz4Compression } FIO_compressionType_t;
/*-*************************************

View File

@ -136,6 +136,9 @@ static int usage_advanced(const char* programName)
DISPLAY( "--format=xz : compress files to the .xz format \n");
DISPLAY( "--format=lzma : compress files to the .lzma format \n");
#endif
#ifdef ZSTD_LZ4COMPRESS
DISPLAY( "--format=lz4 : compress files to the .lz4 format \n");
#endif
#ifndef ZSTD_NODECOMPRESS
DISPLAY( "--test : test compressed file integrity \n");
#if ZSTD_SPARSE_DEFAULT
@ -404,6 +407,9 @@ int main(int argCount, const char* argv[])
if (!strcmp(argument, "--format=lzma")) { suffix = LZMA_EXTENSION; FIO_setCompressionType(FIO_lzmaCompression); continue; }
if (!strcmp(argument, "--format=xz")) { suffix = XZ_EXTENSION; FIO_setCompressionType(FIO_xzCompression); continue; }
#endif
#ifdef ZSTD_LZ4COMPRESS
if (!strcmp(argument, "--format=lz4")) { suffix = LZ4_EXTENSION; FIO_setCompressionType(FIO_lz4Compression); continue; }
#endif
/* long commands with arguments */
#ifndef ZSTD_NODICT

View File

@ -403,7 +403,7 @@ if [ $GZIPMODE -eq 1 ]; then
$ZSTD -f --format=gzip tmp
$ZSTD -f tmp
cat tmp.gz tmp.zst tmp.gz tmp.zst | $ZSTD -d -f -o tmp
head -c -1 tmp.gz | $ZSTD -t && die "incomplete frame not detected !"
head -c -1 tmp.gz | $ZSTD -t > $INTOVOID && die "incomplete frame not detected !"
rm tmp*
else
$ECHO "gzip mode not supported"
@ -445,13 +445,48 @@ if [ $LZMAMODE -eq 1 ]; then
$ZSTD -f --format=lzma tmp
$ZSTD -f tmp
cat tmp.xz tmp.lzma tmp.zst tmp.lzma tmp.xz tmp.zst | $ZSTD -d -f -o tmp
head -c -1 tmp.xz | $ZSTD -t && die "incomplete frame not detected !"
head -c -1 tmp.lzma | $ZSTD -t && die "incomplete frame not detected !"
head -c -1 tmp.xz | $ZSTD -t > $INTOVOID && die "incomplete frame not detected !"
head -c -1 tmp.lzma | $ZSTD -t > $INTOVOID && die "incomplete frame not detected !"
rm tmp*
else
$ECHO "xz mode not supported"
fi
$ECHO "\n**** lz4 compatibility tests **** "
LZ4MODE=1
$ZSTD --format=lz4 -V || LZ4MODE=0
if [ $LZ4MODE -eq 1 ]; then
$ECHO "lz4 support detected"
LZ4EXE=1
lz4 -V || LZ4EXE=0
if [ $LZ4EXE -eq 1 ]; then
./datagen > tmp
$ZSTD --format=lz4 -f tmp
lz4 -t -v tmp.lz4
lz4 -f tmp
$ZSTD -d -f -v tmp.lz4
rm tmp*
else
$ECHO "lz4 binary not detected"
fi
else
$ECHO "lz4 mode not supported"
fi
$ECHO "\n**** lz4 frame tests **** "
if [ $LZ4MODE -eq 1 ]; then
./datagen > tmp
$ZSTD -f --format=lz4 tmp
$ZSTD -f tmp
cat tmp.lz4 tmp.zst tmp.lz4 tmp.zst | $ZSTD -d -f -o tmp
head -c -1 tmp.lz4 | $ZSTD -t > $INTOVOID && die "incomplete frame not detected !"
rm tmp*
else
$ECHO "lz4 mode not supported"
fi
$ECHO "\n**** zstd round-trip tests **** "