diff --git a/programs/Makefile b/programs/Makefile index 4ab4dfd6..b3763484 100644 --- a/programs/Makefile +++ b/programs/Makefile @@ -116,6 +116,16 @@ LZMALD = -llzma else LZMA_MSG := $(NO_LZMA_MSG) endif +# lz4 detection +NO_LZ4_MSG := ==> no liblz4, building zstd without .lz4 support +HAVE_LZ4 := $(shell printf '\#include \nint main(void) { return 0; }' | $(CC) $(FLAGS) -o have_lz4$(EXT) -x c - -llz4 2> $(VOID) && rm have_lz4$(EXT) && echo 1 || echo 0) +ifeq ($(HAVE_LZ4), 1) +LZ4_MSG := ==> building zstd with .lz4 compression support +LZ4CPP = -DZSTD_LZ4COMPRESS -DZSTD_LZ4DECOMPRESS +LZ4LD = -llz4 +else +LZ4_MSG := $(NO_LZ4_MSG) +endif MD2ROFF =ronn MD2ROFF_FLAGS = --roff --warnings --manual="User Commands" --organization="zstd $(ZSTD_VERSION)" @@ -135,11 +145,16 @@ zstd-nogz : ZLIB_MSG := $(NO_ZLIB_MSG) zstd-nogz : LZMA_MSG := $(NO_LZMA_MSG) xzstd : CPPFLAGS += $(ZLIBCPP) $(LZMACPP) xzstd : LDFLAGS += $(ZLIBLD) $(LZMALD) -zstd zstd-nogz xzstd : CPPFLAGS += -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT) -zstd zstd-nogz xzstd : $(ZSTDLIB_FILES) zstdcli.o fileio.o bench.o datagen.o dibio.o +xzstd : LZ4_MSG := $(NO_LZMA_MSG) +zstd4 : CPPFLAGS += $(ZLIBCPP) $(LZ4CPP) +zstd4 : LDFLAGS += $(ZLIBLD) $(LZ4LD) +zstd4 : LZMA_MSG := $(NO_LZMA_MSG) +zstd zstd-nogz xzstd zstd4 : CPPFLAGS += -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT) +zstd zstd-nogz xzstd zstd4 : $(ZSTDLIB_FILES) zstdcli.o fileio.o bench.o datagen.o dibio.o @echo "$(THREAD_MSG)" @echo "$(ZLIB_MSG)" @echo "$(LZMA_MSG)" + @echo "$(LZ4_MSG)" ifneq (,$(filter Windows%,$(OS))) windres/generate_res.bat endif diff --git a/programs/fileio.c b/programs/fileio.c index 9bca2066..1b54256e 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -53,6 +53,11 @@ # include #endif +#define LZ4_MAGICNUMBER 0x184D2204 +#if defined(ZSTD_LZ4COMPRESS) || defined(ZSTD_LZ4DECOMPRESS) +# include +#endif + /*-************************************* * Constants @@ -514,6 +519,77 @@ static unsigned long long FIO_compressLzmaFrame(cRess_t* ress, const char* srcFi } #endif +#ifdef ZSTD_LZ4COMPRESS +static int FIO_LZ4_GetBlockSize_FromBlockId (int id) { return (1 << (8 + (2 * id))); } +static unsigned long long FIO_compressLz4Frame(cRess_t* ress, const char* srcFileName, U64 const srcFileSize, int compressionLevel, U64* readsize) +{ + unsigned long long inFileSize = 0, outFileSize = 0; + + LZ4F_preferences_t prefs; + LZ4F_cctx* ctx; + + LZ4F_errorCode_t const errorCode = LZ4F_createCompressionContext(&ctx, LZ4F_VERSION); + if (LZ4F_isError(errorCode)) EXM_THROW(31, "zstd: failed to create lz4 compression context"); + + memset(&prefs, 0, sizeof(prefs)); + + prefs.autoFlush = 1; + prefs.compressionLevel = compressionLevel; + prefs.frameInfo.blockMode = LZ4F_blockIndependent; /* stick to defaults for lz4 cli */ + prefs.frameInfo.blockSizeID = LZ4F_max4MB; + prefs.frameInfo.contentChecksumFlag = (LZ4F_contentChecksum_t)g_checksumFlag; + prefs.frameInfo.contentSize = srcFileSize; + + { + size_t blockSize = FIO_LZ4_GetBlockSize_FromBlockId(LZ4F_max4MB); + size_t readSize; + size_t headerSize = LZ4F_compressBegin(ctx, ress->dstBuffer, ress->dstBufferSize, &prefs); + if (LZ4F_isError(headerSize)) EXM_THROW(33, "File header generation failed : %s", LZ4F_getErrorName(headerSize)); + { size_t const sizeCheck = fwrite(ress->dstBuffer, 1, headerSize, ress->dstFile); + if (sizeCheck!=headerSize) EXM_THROW(34, "Write error : cannot write header"); } + outFileSize += headerSize; + + /* Read first block */ + readSize = fread(ress->srcBuffer, (size_t)1, (size_t)blockSize, ress->srcFile); + inFileSize += readSize; + + /* Main Loop */ + while (readSize>0) { + size_t outSize; + + /* Compress Block */ + outSize = LZ4F_compressUpdate(ctx, ress->dstBuffer, ress->dstBufferSize, ress->srcBuffer, readSize, NULL); + if (LZ4F_isError(outSize)) EXM_THROW(35, "zstd: %s: lz4 compression failed : %s", srcFileName, LZ4F_getErrorName(outSize)); + outFileSize += outSize; + if (!srcFileSize) DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%%", (U32)(inFileSize>>20), (double)outFileSize/inFileSize*100) + else DISPLAYUPDATE(2, "\rRead : %u / %u MB ==> %.2f%%", (U32)(inFileSize>>20), (U32)(srcFileSize>>20), (double)outFileSize/inFileSize*100); + + /* Write Block */ + { size_t const sizeCheck = fwrite(ress->dstBuffer, 1, outSize, ress->dstFile); + if (sizeCheck!=outSize) EXM_THROW(36, "Write error : cannot write compressed block"); } + + /* Read next block */ + readSize = fread(ress->srcBuffer, (size_t)1, (size_t)blockSize, ress->srcFile); + inFileSize += readSize; + } + if (ferror(ress->srcFile)) EXM_THROW(37, "Error reading %s ", srcFileName); + + /* End of Stream mark */ + headerSize = LZ4F_compressEnd(ctx, ress->dstBuffer, ress->dstBufferSize, NULL); + if (LZ4F_isError(headerSize)) EXM_THROW(38, "zstd: %s: lz4 end of file generation failed : %s", srcFileName, LZ4F_getErrorName(headerSize)); + + { size_t const sizeCheck = fwrite(ress->dstBuffer, 1, headerSize, ress->dstFile); + if (sizeCheck!=headerSize) EXM_THROW(39, "Write error : cannot write end of stream"); } + outFileSize += headerSize; + } + + *readsize = inFileSize; + LZ4F_freeCompressionContext(ctx); + + return outFileSize; +} +#endif + /*! FIO_compressFilename_internal() : * same as FIO_compressFilename_extRess(), with `ress.desFile` already opened. @@ -547,6 +623,13 @@ static int FIO_compressFilename_internal(cRess_t ress, #else (void)compressionLevel; EXM_THROW(20, "zstd: %s: file cannot be compressed as xz/lzma (zstd compiled without ZSTD_LZMACOMPRESS) -- ignored \n", srcFileName); +#endif + case FIO_lz4Compression: +#ifdef ZSTD_LZ4COMPRESS + compressedfilesize = FIO_compressLz4Frame(&ress, srcFileName, fileSize, compressionLevel, &readsize); +#else + (void)compressionLevel; + EXM_THROW(20, "zstd: %s: file cannot be compressed as lz4 (zstd compiled without ZSTD_LZ4COMPRESS) -- ignored \n", srcFileName); #endif goto finish; } @@ -1039,6 +1122,66 @@ static unsigned long long FIO_decompressLzmaFrame(dRess_t* ress, FILE* srcFile, } #endif +#ifdef ZSTD_LZ4DECOMPRESS +static unsigned long long FIO_decompressLz4Frame(dRess_t* ress, FILE* srcFile, const char* srcFileName) +{ + unsigned long long filesize = 0; + LZ4F_errorCode_t nextToLoad; + LZ4F_dctx* dCtx; + LZ4F_errorCode_t const errorCode = LZ4F_createDecompressionContext(&dCtx, LZ4F_VERSION); + + if (LZ4F_isError(errorCode)) EXM_THROW(61, "zstd: failed to create lz4 decompression context"); + + /* Init feed with magic number (already consumed from FILE* sFile) */ + { size_t inSize = 4; + size_t outSize= 0; + MEM_writeLE32(ress->srcBuffer, LZ4_MAGICNUMBER); + nextToLoad = LZ4F_decompress(dCtx, ress->dstBuffer, &outSize, ress->srcBuffer, &inSize, NULL); + if (LZ4F_isError(nextToLoad)) EXM_THROW(62, "zstd: %s: lz4 header error : %s", srcFileName, LZ4F_getErrorName(nextToLoad)); + } + + /* Main Loop */ + for (;nextToLoad;) { + size_t readSize; + size_t pos = 0; + size_t decodedBytes = ress->dstBufferSize; + + /* Read input */ + if (nextToLoad > ress->srcBufferSize) nextToLoad = ress->srcBufferSize; + readSize = fread(ress->srcBuffer, 1, nextToLoad, srcFile); + if (!readSize) break; /* reached end of file or stream */ + + while ((pos < readSize) || (decodedBytes == ress->dstBufferSize)) { /* still to read, or still to flush */ + /* Decode Input (at least partially) */ + size_t remaining = readSize - pos; + decodedBytes = ress->dstBufferSize; + nextToLoad = LZ4F_decompress(dCtx, ress->dstBuffer, &decodedBytes, (char*)(ress->srcBuffer)+pos, &remaining, NULL); + if (LZ4F_isError(nextToLoad)) EXM_THROW(66, "zstd: %s: decompression error : %s", srcFileName, LZ4F_getErrorName(nextToLoad)); + pos += remaining; + + /* Write Block */ + if (decodedBytes) { + if (fwrite(ress->dstBuffer, 1, decodedBytes, ress->dstFile) != decodedBytes) EXM_THROW(63, "Write error : cannot write to output file"); + filesize += decodedBytes; + DISPLAYUPDATE(2, "\rDecompressed : %u MB ", (unsigned)(filesize>>20)); + } + + if (!nextToLoad) break; + } + } + /* can be out because readSize == 0, which could be an fread() error */ + if (ferror(srcFile)) EXM_THROW(67, "zstd: %s: read error", srcFileName); + + if (nextToLoad!=0) EXM_THROW(68, "zstd: %s: unfinished stream", srcFileName); + + LZ4F_freeDecompressionContext(dCtx); + ress->srcBufferLoaded = 0; /* LZ4F will go to the frame boundary */ + + return filesize; +} +#endif + + /** FIO_decompressSrcFile() : Decompression `srcFileName` into `ress.dstFile` @@ -1090,6 +1233,15 @@ static int FIO_decompressSrcFile(dRess_t ress, const char* dstFileName, const ch #else DISPLAYLEVEL(1, "zstd: %s: xz/lzma file cannot be uncompressed (zstd compiled without ZSTD_LZMADECOMPRESS) -- ignored \n", srcFileName); return 1; +#endif + } else if (MEM_readLE32(buf) == LZ4_MAGICNUMBER) { +#ifdef ZSTD_LZ4DECOMPRESS + unsigned long long const result = FIO_decompressLz4Frame(&ress, srcFile, srcFileName); + if (result == 0) return 1; + filesize += result; +#else + DISPLAYLEVEL(1, "zstd: %s: lz4 file cannot be uncompressed (zstd compiled without ZSTD_LZ4DECOMPRESS) -- ignored \n", srcFileName); + return 1; #endif } else { if (!ZSTD_isFrame(ress.srcBuffer, toRead)) { @@ -1199,7 +1351,7 @@ int FIO_decompressMultipleFilenames(const char** srcNamesTable, unsigned nbFiles dstFileName = (char*)malloc(dfnSize); if (dstFileName==NULL) EXM_THROW(74, "not enough memory for dstFileName"); } - if (sfnSize <= suffixSize || (strcmp(suffixPtr, GZ_EXTENSION) && strcmp(suffixPtr, XZ_EXTENSION) && strcmp(suffixPtr, ZSTD_EXTENSION) && strcmp(suffixPtr, LZMA_EXTENSION))) { + if (sfnSize <= suffixSize || (strcmp(suffixPtr, GZ_EXTENSION) && strcmp(suffixPtr, XZ_EXTENSION) && strcmp(suffixPtr, ZSTD_EXTENSION) && strcmp(suffixPtr, LZMA_EXTENSION) && strcmp(suffixPtr, LZ4_EXTENSION))) { DISPLAYLEVEL(1, "zstd: %s: unknown suffix (%s/%s/%s/%s expected) -- ignored \n", srcFileName, GZ_EXTENSION, XZ_EXTENSION, ZSTD_EXTENSION, LZMA_EXTENSION); skippedFiles++; continue; diff --git a/programs/fileio.h b/programs/fileio.h index 0dd58d62..65da98d7 100644 --- a/programs/fileio.h +++ b/programs/fileio.h @@ -33,12 +33,13 @@ extern "C" { #define XZ_EXTENSION ".xz" #define GZ_EXTENSION ".gz" #define ZSTD_EXTENSION ".zst" +#define LZ4_EXTENSION ".lz4" /*-************************************* * Types ***************************************/ -typedef enum { FIO_zstdCompression, FIO_gzipCompression, FIO_xzCompression, FIO_lzmaCompression } FIO_compressionType_t; +typedef enum { FIO_zstdCompression, FIO_gzipCompression, FIO_xzCompression, FIO_lzmaCompression, FIO_lz4Compression } FIO_compressionType_t; /*-************************************* diff --git a/programs/zstdcli.c b/programs/zstdcli.c index 76f55de5..79bc8487 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -136,6 +136,9 @@ static int usage_advanced(const char* programName) DISPLAY( "--format=xz : compress files to the .xz format \n"); DISPLAY( "--format=lzma : compress files to the .lzma format \n"); #endif +#ifdef ZSTD_LZ4COMPRESS + DISPLAY( "--format=lz4 : compress files to the .lz4 format \n"); +#endif #ifndef ZSTD_NODECOMPRESS DISPLAY( "--test : test compressed file integrity \n"); #if ZSTD_SPARSE_DEFAULT @@ -404,6 +407,9 @@ int main(int argCount, const char* argv[]) if (!strcmp(argument, "--format=lzma")) { suffix = LZMA_EXTENSION; FIO_setCompressionType(FIO_lzmaCompression); continue; } if (!strcmp(argument, "--format=xz")) { suffix = XZ_EXTENSION; FIO_setCompressionType(FIO_xzCompression); continue; } #endif +#ifdef ZSTD_LZ4COMPRESS + if (!strcmp(argument, "--format=lz4")) { suffix = LZ4_EXTENSION; FIO_setCompressionType(FIO_lz4Compression); continue; } +#endif /* long commands with arguments */ #ifndef ZSTD_NODICT diff --git a/tests/playTests.sh b/tests/playTests.sh index a91e9e8b..369506c2 100755 --- a/tests/playTests.sh +++ b/tests/playTests.sh @@ -403,7 +403,7 @@ if [ $GZIPMODE -eq 1 ]; then $ZSTD -f --format=gzip tmp $ZSTD -f tmp cat tmp.gz tmp.zst tmp.gz tmp.zst | $ZSTD -d -f -o tmp - head -c -1 tmp.gz | $ZSTD -t && die "incomplete frame not detected !" + head -c -1 tmp.gz | $ZSTD -t > $INTOVOID && die "incomplete frame not detected !" rm tmp* else $ECHO "gzip mode not supported" @@ -445,13 +445,48 @@ if [ $LZMAMODE -eq 1 ]; then $ZSTD -f --format=lzma tmp $ZSTD -f tmp cat tmp.xz tmp.lzma tmp.zst tmp.lzma tmp.xz tmp.zst | $ZSTD -d -f -o tmp - head -c -1 tmp.xz | $ZSTD -t && die "incomplete frame not detected !" - head -c -1 tmp.lzma | $ZSTD -t && die "incomplete frame not detected !" + head -c -1 tmp.xz | $ZSTD -t > $INTOVOID && die "incomplete frame not detected !" + head -c -1 tmp.lzma | $ZSTD -t > $INTOVOID && die "incomplete frame not detected !" rm tmp* else $ECHO "xz mode not supported" fi +$ECHO "\n**** lz4 compatibility tests **** " + +LZ4MODE=1 +$ZSTD --format=lz4 -V || LZ4MODE=0 +if [ $LZ4MODE -eq 1 ]; then + $ECHO "lz4 support detected" + LZ4EXE=1 + lz4 -V || LZ4EXE=0 + if [ $LZ4EXE -eq 1 ]; then + ./datagen > tmp + $ZSTD --format=lz4 -f tmp + lz4 -t -v tmp.lz4 + lz4 -f tmp + $ZSTD -d -f -v tmp.lz4 + rm tmp* + else + $ECHO "lz4 binary not detected" + fi +else + $ECHO "lz4 mode not supported" +fi + + +$ECHO "\n**** lz4 frame tests **** " + +if [ $LZ4MODE -eq 1 ]; then + ./datagen > tmp + $ZSTD -f --format=lz4 tmp + $ZSTD -f tmp + cat tmp.lz4 tmp.zst tmp.lz4 tmp.zst | $ZSTD -d -f -o tmp + head -c -1 tmp.lz4 | $ZSTD -t > $INTOVOID && die "incomplete frame not detected !" + rm tmp* +else + $ECHO "lz4 mode not supported" +fi $ECHO "\n**** zstd round-trip tests **** "