diff --git a/.travis.yml b/.travis.yml index e27c7981..b1ffa799 100644 --- a/.travis.yml +++ b/.travis.yml @@ -47,9 +47,9 @@ matrix: - os: linux sudo: required env: PLATFORM="Ubuntu 12.04" MAKE_PARAM="-C programs test32" -# - os: linux -# sudo: required -# env: PLATFORM="Ubuntu 12.04" MAKE_PARAM="-C tests versionsTest" + - os: linux + sudo: required + env: PLATFORM="Ubuntu 12.04" MAKE_PARAM="-C tests versionsTest" - os: linux sudo: required env: PLATFORM="Ubuntu 12.04" MAKE_PARAM=asan32 diff --git a/NEWS b/NEWS index 767fe5c0..c7aeae98 100644 --- a/NEWS +++ b/NEWS @@ -1,5 +1,6 @@ v0.7.0 New : Support for directory compression, using `-r`, thanks to Przemyslaw Skibinski +New : Command `--rm`, to remove source file after successful de/compression New : Visual build scripts, by Christophe Chevalier New : Support for Sparse File-systems (do not use space for zero-filled sectors) New : Frame checksum support diff --git a/build/build.generic.cmd b/build/build.generic.cmd index 502c8db9..ed46c922 100644 --- a/build/build.generic.cmd +++ b/build/build.generic.cmd @@ -2,48 +2,50 @@ IF "%1%" == "" GOTO display_help -SET vs_version=%1 +SETLOCAL -SET vs_platform=%2 -IF "%vs_platform%" == "" SET vs_platform=x64 +SET msbuild_version=%1 -SET vs_configuration=%3 -IF "%vs_configuration%" == "" SET vs_configuration=Release +SET msbuild_platform=%2 +IF "%msbuild_platform%" == "" SET msbuild_platform=x64 -SET vs_toolset=%4 +SET msbuild_configuration=%3 +IF "%msbuild_configuration%" == "" SET msbuild_configuration=Release + +SET msbuild_toolset=%4 GOTO build :display_help -echo Syntax: build.generic.cmd vs_version vs_platform vs_configuration vs_toolset -echo vs_version: VS installed version (VS2012, VS2013, VS2015, ...) -echo vs_platform: Platform (x64 or Win32) -echo vs_configuration: VS configuration (Release or Debug) -echo vs_toolset: Platform Toolset (v100, v110, v120, v140) +echo Syntax: build.generic.cmd msbuild_version msbuild_platform msbuild_configuration msbuild_toolset +echo msbuild_version: VS installed version (VS2012, VS2013, VS2015, ...) +echo msbuild_platform: Platform (x64 or Win32) +echo msbuild_configuration: VS configuration (Release or Debug) +echo msbuild_toolset: Platform Toolset (v100, v110, v120, v140) EXIT /B 1 :build SET msbuild="%windir%\Microsoft.NET\Framework\v4.0.30319\MSBuild.exe" -IF %vs_version% == VS2013 SET msbuild="C:\Program Files (x86)\MSBuild\12.0\Bin\MSBuild.exe" -IF %vs_version% == VS2015 SET msbuild="C:\Program Files (x86)\MSBuild\14.0\Bin\MSBuild.exe" +IF %msbuild_version% == VS2013 SET msbuild="%programfiles(x86)%\MSBuild\12.0\Bin\MSBuild.exe" +IF %msbuild_version% == VS2015 SET msbuild="%programfiles(x86)%\MSBuild\14.0\Bin\MSBuild.exe" rem TODO: Visual Studio "15" (vNext) will use MSBuild 15.0 ? SET project="%~p0\..\projects\VS2010\zstd.sln" -SET msbuildparams=/verbosity:minimal /nologo /t:Clean,Build /p:Platform=%vs_platform% /p:Configuration=%vs_configuration% -IF NOT "%vs_toolset%" == "" SET msbuildparams=%msbuildparams% /p:PlatformToolset=%vs_toolset% +SET msbuild_params=/verbosity:minimal /nologo /t:Clean,Build /p:Platform=%msbuild_platform% /p:Configuration=%msbuild_configuration% +IF NOT "%msbuild_toolset%" == "" SET msbuild_params=%msbuild_params% /p:PlatformToolset=%msbuild_toolset% SET output=%~p0%bin -SET output="%output%/%vs_configuration%/%vs_platform%/" -SET msbuildparams=%msbuildparams% /p:OutDir=%output% +SET output="%output%/%msbuild_configuration%/%msbuild_platform%/" +SET msbuild_params=%msbuild_params% /p:OutDir=%output% -echo ### Building %vs_version% project for %vs_configuration% %vs_platform% (%vs_toolset%)... -echo ### Build Params: %msbuildparams% +echo ### Building %msbuild_version% project for %msbuild_configuration% %msbuild_platform% (%msbuild_toolset%)... +echo ### Build Params: %msbuild_params% -%msbuild% %project% %msbuildparams% +%msbuild% %project% %msbuild_params% IF ERRORLEVEL 1 EXIT /B 1 echo # Success echo # OutDir: %output% diff --git a/lib/common/bitstream.h b/lib/common/bitstream.h index 694df4e3..e96798fe 100644 --- a/lib/common/bitstream.h +++ b/lib/common/bitstream.h @@ -266,8 +266,8 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer); bitD->bitContainer = MEM_readLEST(bitD->ptr); { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1]; - if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ - bitD->bitsConsumed = 8 - BIT_highbit32(lastByte); } + bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; + if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ } } else { bitD->start = (const char*)srcBuffer; bitD->ptr = bitD->start; @@ -283,8 +283,8 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si default:; } { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1]; - if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ - bitD->bitsConsumed = 8 - BIT_highbit32(lastByte); } + bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; + if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ } bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8; } diff --git a/lib/common/huf.h b/lib/common/huf.h index c7669457..ef538df3 100644 --- a/lib/common/huf.h +++ b/lib/common/huf.h @@ -43,24 +43,22 @@ extern "C" { #include /* size_t */ -/*-**************************************** -* HUF simple functions -******************************************/ -size_t HUF_compress(void* dst, size_t dstCapacity, - const void* src, size_t srcSize); -size_t HUF_decompress(void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize); -/* +/* *** simple functions *** */ +/** HUF_compress() : Compress content from buffer 'src', of size 'srcSize', into buffer 'dst'. 'dst' buffer must be already allocated. Compression runs faster if `dstCapacity` >= HUF_compressBound(srcSize). - `srcSize` must be <= `HUF_BLOCKSIZE_MAX` == 128 KB - @return : size of compressed data (<= `dstCapacity`) + `srcSize` must be <= `HUF_BLOCKSIZE_MAX` == 128 KB. + @return : size of compressed data (<= `dstCapacity`). Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!! if return == 1, srcData is a single repeated byte symbol (RLE compression). if HUF_isError(return), compression failed (more details using HUF_getErrorName()) +*/ +size_t HUF_compress(void* dst, size_t dstCapacity, + const void* src, size_t srcSize); +/** HUF_decompress() : Decompress HUF data from buffer 'cSrc', of size 'cSrcSize', into already allocated buffer 'dst', of minimum size 'dstSize'. @@ -68,9 +66,11 @@ HUF_decompress() : Note : in contrast with FSE, HUF_decompress can regenerate RLE (cSrcSize==1) and uncompressed (cSrcSize==dstSize) data, because it knows size to regenerate. - @return : size of regenerated data (== dstSize) + @return : size of regenerated data (== dstSize), or an error code, which can be tested using HUF_isError() */ +size_t HUF_decompress(void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize); /* **************************************** @@ -122,19 +122,28 @@ size_t HUF_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize HUF_CElt* name = (HUF_CElt*)(name##hv) /* no final ; */ /* static allocation of HUF's DTable */ -typedef U16 HUF_DTable; +typedef U32 HUF_DTable; #define HUF_DTABLE_SIZE(maxTableLog) (1 + (1<<(maxTableLog))) #define HUF_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) \ - HUF_DTable DTable[HUF_DTABLE_SIZE(maxTableLog)] = { ((maxTableLog)*0x101) } + HUF_DTable DTable[HUF_DTABLE_SIZE((maxTableLog)-1)] = { ((U32)((maxTableLog)-1)*0x1000001) } #define HUF_CREATE_STATIC_DTABLEX4(DTable, maxTableLog) \ - HUF_DTable DTable[HUF_DTABLE_SIZE((maxTableLog)+1)] = { (((maxTableLog)+1)*0x101) } + HUF_DTable DTable[HUF_DTABLE_SIZE(maxTableLog)] = { ((U32)(maxTableLog)*0x1000001) } /* **************************************** * Advanced decompression functions ******************************************/ -size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */ -size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbols decoder */ +size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ +size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ + +size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); +size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< considers RLE and uncompressed as errors */ +size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ +size_t HUF_decompress4X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ + +size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); +size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ +size_t HUF_decompress1X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ /* **************************************** @@ -191,6 +200,7 @@ U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize); size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize); size_t HUF_readDTableX4 (HUF_DTable* DTable, const void* src, size_t srcSize); +size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); size_t HUF_decompress4X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); @@ -203,6 +213,7 @@ size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, si size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */ size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbol decoder */ +size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); size_t HUF_decompress1X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); diff --git a/lib/common/zstd.h b/lib/common/zstd.h index 3c1e16a7..b774434b 100644 --- a/lib/common/zstd.h +++ b/lib/common/zstd.h @@ -292,16 +292,17 @@ ZSTDLIB_API size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx, const void* dict,size_t dictSize, ZSTD_parameters params); -/*- Advanced Decompression functions -*/ + +/*--- Advanced Decompression functions ---*/ /*! ZSTD_createDCtx_advanced() : * Create a ZSTD decompression context using external alloc and free functions */ ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem); -/* ************************************** -* Streaming functions (direct mode) -****************************************/ +/* **************************************************************** +* Streaming functions (direct mode - synchronous and buffer-less) +******************************************************************/ ZSTDLIB_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel); ZSTDLIB_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel); ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, U64 pledgedSrcSize); @@ -311,10 +312,8 @@ ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstC ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity); /* - Streaming compression, synchronous mode (bufferless) - A ZSTD_CCtx object is required to track streaming operations. - Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage it. + Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage resource. ZSTD_CCtx object can be re-used multiple times within successive compression operations. Start by initializing a context. @@ -323,12 +322,13 @@ ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapaci It's also possible to duplicate a reference context which has already been initialized, using ZSTD_copyCCtx() Then, consume your input using ZSTD_compressContinue(). - The interface is synchronous, so all input will be consumed and produce a compressed output. + ZSTD_compressContinue() presumes prior data is still accessible and unmodified (up to maximum distance size, see WindowLog). + The interface is synchronous, so input will be entirely consumed and produce associated compressed output. You must ensure there is enough space in destination buffer to store compressed data under worst case scenario. Worst case evaluation is provided by ZSTD_compressBound(). Finish a frame with ZSTD_compressEnd(), which will write the epilogue. - Without the epilogue, frames will be considered incomplete by decoder. + Without epilogue, frames will be considered unfinished (broken) by decoders. You can then reuse ZSTD_CCtx to compress some new frame. */ diff --git a/lib/common/zstd_internal.h b/lib/common/zstd_internal.h index ebf3e880..17ae1a77 100644 --- a/lib/common/zstd_internal.h +++ b/lib/common/zstd_internal.h @@ -69,6 +69,7 @@ #define ZSTD_REP_NUM 3 #define ZSTD_REP_INIT ZSTD_REP_NUM #define ZSTD_REP_MOVE (ZSTD_REP_NUM-1) +static const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 }; #define KB *(1 <<10) #define MB *(1 <<20) @@ -93,17 +94,12 @@ typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t; #define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */) /* for a non-null block */ #define HufLog 12 - -#define IS_HUF 0 -#define IS_PCH 1 -#define IS_RAW 2 -#define IS_RLE 3 +typedef enum { lbt_huffman, lbt_repeat, lbt_raw, lbt_rle } litBlockType_t; #define LONGNBSEQ 0x7F00 #define MINMATCH 3 #define EQUAL_READ32 4 -#define REPCODE_STARTVALUE 1 #define Litbits 8 #define MaxLit ((1<params = params; zc->blockSize = blockSize; zc->frameContentSize = frameContentSize; + { int i; for (i=0; irep[i] = repStartValue[i]; } if (params.cParams.strategy == ZSTD_btopt) { zc->seqStore.litFreq = (U32*)(zc->seqStore.buffer); @@ -575,15 +578,15 @@ static size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void switch(flSize) { case 1: /* 2 - 1 - 5 */ - ostart[0] = (BYTE)((IS_RAW<<6) + (0<<5) + srcSize); + ostart[0] = (BYTE)((lbt_raw<<6) + (0<<5) + srcSize); break; case 2: /* 2 - 2 - 12 */ - ostart[0] = (BYTE)((IS_RAW<<6) + (2<<4) + (srcSize >> 8)); + ostart[0] = (BYTE)((lbt_raw<<6) + (2<<4) + (srcSize >> 8)); ostart[1] = (BYTE)srcSize; break; default: /*note : should not be necessary : flSize is within {1,2,3} */ case 3: /* 2 - 2 - 20 */ - ostart[0] = (BYTE)((IS_RAW<<6) + (3<<4) + (srcSize >> 16)); + ostart[0] = (BYTE)((lbt_raw<<6) + (3<<4) + (srcSize >> 16)); ostart[1] = (BYTE)(srcSize>>8); ostart[2] = (BYTE)srcSize; break; @@ -603,15 +606,15 @@ static size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, cons switch(flSize) { case 1: /* 2 - 1 - 5 */ - ostart[0] = (BYTE)((IS_RLE<<6) + (0<<5) + srcSize); + ostart[0] = (BYTE)((lbt_rle<<6) + (0<<5) + srcSize); break; case 2: /* 2 - 2 - 12 */ - ostart[0] = (BYTE)((IS_RLE<<6) + (2<<4) + (srcSize >> 8)); + ostart[0] = (BYTE)((lbt_rle<<6) + (2<<4) + (srcSize >> 8)); ostart[1] = (BYTE)srcSize; break; default: /*note : should not be necessary : flSize is necessarily within {1,2,3} */ case 3: /* 2 - 2 - 20 */ - ostart[0] = (BYTE)((IS_RLE<<6) + (3<<4) + (srcSize >> 16)); + ostart[0] = (BYTE)((lbt_rle<<6) + (3<<4) + (srcSize >> 16)); ostart[1] = (BYTE)(srcSize>>8); ostart[2] = (BYTE)srcSize; break; @@ -632,7 +635,7 @@ static size_t ZSTD_compressLiterals (ZSTD_CCtx* zc, size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB); BYTE* const ostart = (BYTE*)dst; U32 singleStream = srcSize < 256; - U32 hType = IS_HUF; + litBlockType_t hType = lbt_huffman; size_t cLitSize; @@ -644,7 +647,7 @@ static size_t ZSTD_compressLiterals (ZSTD_CCtx* zc, if (dstCapacity < lhSize+1) return ERROR(dstSize_tooSmall); /* not enough space for compression */ if (zc->flagStaticTables && (lhSize==3)) { - hType = IS_PCH; + hType = lbt_repeat; singleStream = 1; cLitSize = HUF_compress1X_usingCTable(ostart+lhSize, dstCapacity-lhSize, src, srcSize, zc->hufTable); } else { @@ -918,6 +921,9 @@ _check_compressibility: size_t const maxCSize = srcSize - minGain; if ((size_t)(op-ostart) >= maxCSize) return 0; } + /* confirm repcodes */ + { int i; for (i=0; irep[i] = zc->savedRep[i]; } + return op - ostart; } @@ -927,17 +933,17 @@ _check_compressibility: `offsetCode` : distance to match, or 0 == repCode. `matchCode` : matchLength - MINMATCH */ -MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* literals, size_t offsetCode, size_t matchCode) +MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const void* literals, size_t offsetCode, size_t matchCode) { #if 0 /* for debug */ static const BYTE* g_start = NULL; const U32 pos = (U32)(literals - g_start); if (g_start==NULL) g_start = literals; - if ((pos > 2587900) && (pos < 2588050)) + //if ((pos > 1) && (pos < 50000)) printf("Cpos %6u :%5u literals & match %3u bytes at distance %6u \n", pos, (U32)litLength, (U32)matchCode+MINMATCH, (U32)offsetCode); #endif - ZSTD_statsUpdatePrices(&seqStorePtr->stats, litLength, literals, offsetCode, matchCode); + ZSTD_statsUpdatePrices(&seqStorePtr->stats, litLength, (const BYTE*)literals, offsetCode, matchCode); /* debug only */ /* copy Literals */ ZSTD_wildcopy(seqStorePtr->lit, literals, litLength); @@ -1104,43 +1110,47 @@ static void ZSTD_fillHashTable (ZSTD_CCtx* zc, const void* end, const U32 mls) FORCE_INLINE -void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* zc, +void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* cctx, const void* src, size_t srcSize, const U32 mls) { - U32* const hashTable = zc->hashTable; - const U32 hBits = zc->params.cParams.hashLog; - seqStore_t* seqStorePtr = &(zc->seqStore); - const BYTE* const base = zc->base; + U32* const hashTable = cctx->hashTable; + const U32 hBits = cctx->params.cParams.hashLog; + seqStore_t* seqStorePtr = &(cctx->seqStore); + const BYTE* const base = cctx->base; const BYTE* const istart = (const BYTE*)src; const BYTE* ip = istart; const BYTE* anchor = istart; - const U32 lowIndex = zc->dictLimit; - const BYTE* const lowest = base + lowIndex; + const U32 lowestIndex = cctx->dictLimit; + const BYTE* const lowest = base + lowestIndex; const BYTE* const iend = istart + srcSize; const BYTE* const ilimit = iend - 8; - size_t offset_2=REPCODE_STARTVALUE, offset_1=REPCODE_STARTVALUE; + size_t offset_1=cctx->rep[0], offset_2=cctx->rep[1]; /* init */ ZSTD_resetSeqStore(seqStorePtr); - if (ip < lowest+REPCODE_STARTVALUE) ip = lowest+REPCODE_STARTVALUE; + ip += (ip==lowest); + { U32 const maxRep = (U32)(ip-lowest); + if (offset_1 > maxRep) offset_1 = 0; + if (offset_2 > maxRep) offset_2 = 0; + } /* Main Search Loop */ - while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ + while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ size_t mlCode; size_t offset; - const size_t h = ZSTD_hashPtr(ip, hBits, mls); - const U32 matchIndex = hashTable[h]; + size_t const h = ZSTD_hashPtr(ip, hBits, mls); + U32 const current = (U32)(ip-base); + U32 const matchIndex = hashTable[h]; const BYTE* match = base + matchIndex; - const U32 current = (U32)(ip-base); hashTable[h] = current; /* update hash table */ - if (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)) { /* note : by construction, offset_1 <= current */ + if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) { /* note : by construction, offset_1 <= current */ mlCode = ZSTD_count(ip+1+EQUAL_READ32, ip+1+EQUAL_READ32-offset_1, iend) + EQUAL_READ32; ip++; ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mlCode-MINMATCH); } else { - if ( (matchIndex <= lowIndex) || + if ( (matchIndex <= lowestIndex) || (MEM_read32(match) != MEM_read32(ip)) ) { ip += ((ip-anchor) >> g_searchStrength) + 1; continue; @@ -1164,7 +1174,8 @@ void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* zc, hashTable[ZSTD_hashPtr(ip-2, hBits, mls)] = (U32)(ip-2-base); /* check immediate repcode */ while ( (ip <= ilimit) - && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) { + && ( (offset_2>0) + & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) { /* store sequence */ size_t const rlCode = ZSTD_count(ip+EQUAL_READ32, ip+EQUAL_READ32-offset_2, iend) + EQUAL_READ32; { size_t const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */ @@ -1175,6 +1186,10 @@ void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* zc, continue; /* faster when present ... (?) */ } } } + /* save reps for next block */ + cctx->savedRep[0] = offset_1 ? (U32)offset_1 : (U32)(iend-base); + cctx->savedRep[1] = offset_2 ? (U32)offset_2 : (U32)(iend-base); + /* Last Literals */ { size_t const lastLLSize = iend - anchor; memcpy(seqStorePtr->lit, anchor, lastLLSize); @@ -1214,22 +1229,20 @@ static void ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx, const BYTE* const istart = (const BYTE*)src; const BYTE* ip = istart; const BYTE* anchor = istart; - const U32 lowLimit = ctx->lowLimit; - const BYTE* const dictStart = dictBase + lowLimit; + const U32 lowestIndex = ctx->lowLimit; + const BYTE* const dictStart = dictBase + lowestIndex; const U32 dictLimit = ctx->dictLimit; const BYTE* const lowPrefixPtr = base + dictLimit; const BYTE* const dictEnd = dictBase + dictLimit; const BYTE* const iend = istart + srcSize; const BYTE* const ilimit = iend - 8; - - U32 offset_2=REPCODE_STARTVALUE, offset_1=REPCODE_STARTVALUE; - + U32 offset_1=ctx->rep[0], offset_2=ctx->rep[1]; /* init */ ZSTD_resetSeqStore(seqStorePtr); /* skip first position to avoid read overflow during repcode match check */ - hashTable[ZSTD_hashPtr(ip+0, hBits, mls)] = (U32)(ip-base+0); - ip += REPCODE_STARTVALUE; + hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip-base); + ip++; /* Main Search Loop */ while (ip < ilimit) { /* < instead of <=, because (ip+1) */ @@ -1245,14 +1258,14 @@ static void ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx, U32 offset; hashTable[h] = current; /* update hash table */ - if ( ((repIndex >= dictLimit) || (repIndex <= dictLimit-4)) - && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { + if ( (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */ + && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { const BYTE* repMatchEnd = repIndex < dictLimit ? dictEnd : iend; mlCode = ZSTD_count_2segments(ip+1+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repMatchEnd, lowPrefixPtr) + EQUAL_READ32; ip++; ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mlCode-MINMATCH); } else { - if ( (matchIndex < lowLimit) || + if ( (matchIndex < lowestIndex) || (MEM_read32(match) != MEM_read32(ip)) ) { ip += ((ip-anchor) >> g_searchStrength) + 1; continue; @@ -1280,8 +1293,8 @@ static void ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx, U32 const current2 = (U32)(ip-base); U32 const repIndex2 = current2 - offset_2; const BYTE* repMatch2 = repIndex2 < dictLimit ? dictBase + repIndex2 : base + repIndex2; - if ( ((repIndex2 <= dictLimit-4) || (repIndex2 >= dictLimit)) - && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { + if ( (((U32)((dictLimit-1) - repIndex2) >= 3) & (repIndex2 > lowestIndex)) /* intentional overflow */ + && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { const BYTE* const repEnd2 = repIndex2 < dictLimit ? dictEnd : iend; size_t repLength2 = ZSTD_count_2segments(ip+EQUAL_READ32, repMatch2+EQUAL_READ32, iend, repEnd2, lowPrefixPtr) + EQUAL_READ32; U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ @@ -1294,6 +1307,9 @@ static void ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx, break; } } } + /* save reps for next block */ + ctx->savedRep[0] = offset_1; ctx->savedRep[1] = offset_2; + /* Last Literals */ { size_t const lastLLSize = iend - anchor; memcpy(seqStorePtr->lit, anchor, lastLLSize); @@ -1721,15 +1737,19 @@ void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx, typedef size_t (*searchMax_f)(ZSTD_CCtx* zc, const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr, U32 maxNbAttempts, U32 matchLengthSearch); - searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS : ZSTD_HcFindBestMatch_selectMLS; + searchMax_f const searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS : ZSTD_HcFindBestMatch_selectMLS; + U32 rep[ZSTD_REP_INIT]; /* init */ - U32 rep[ZSTD_REP_INIT]; - { U32 i ; for (i=0; inextToUpdate3 = ctx->nextToUpdate; ZSTD_resetSeqStore(seqStorePtr); - if ((ip-base) < REPCODE_STARTVALUE) ip = base + REPCODE_STARTVALUE; + { U32 i; + U32 const maxRep = (U32)(ip-base); + for (i=0; irep[i]; + if (rep[i]>maxRep) rep[i]=0; + } } /* Match Loop */ while (ip < ilimit) { @@ -1738,7 +1758,7 @@ void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx, const BYTE* start=ip+1; /* check repCode */ - if (MEM_read32(ip+1) == MEM_read32(ip+1 - rep[0])) { + if ((rep[0]>0) & (MEM_read32(ip+1) == MEM_read32(ip+1 - rep[0]))) { /* repcode : we take it */ matchLength = ZSTD_count(ip+1+EQUAL_READ32, ip+1+EQUAL_READ32-rep[0], iend) + EQUAL_READ32; if (depth==0) goto _storeSequence; @@ -1760,7 +1780,7 @@ void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx, if (depth>=1) while (ip0) & (MEM_read32(ip) == MEM_read32(ip - rep[0])))) { size_t const mlRep = ZSTD_count(ip+EQUAL_READ32, ip+EQUAL_READ32-rep[0], iend) + EQUAL_READ32; int const gain2 = (int)(mlRep * 3); int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1); @@ -1779,7 +1799,7 @@ void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx, /* let's find an even better one */ if ((depth==2) && (ip0) & (MEM_read32(ip) == MEM_read32(ip - rep[0])))) { size_t const ml2 = ZSTD_count(ip+EQUAL_READ32, ip+EQUAL_READ32-rep[0], iend) + EQUAL_READ32; int const gain2 = (int)(ml2 * 4); int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1); @@ -1813,7 +1833,8 @@ _storeSequence: /* check immediate repcode */ while ( (ip <= ilimit) - && (MEM_read32(ip) == MEM_read32(ip - rep[1])) ) { + && ((rep[1]>0) + & (MEM_read32(ip) == MEM_read32(ip - rep[1])) )) { /* store sequence */ matchLength = ZSTD_count(ip+EQUAL_READ32, ip+EQUAL_READ32-rep[1], iend) + EQUAL_READ32; offset = rep[1]; rep[1] = rep[0]; rep[0] = (U32)offset; /* swap repcodes */ @@ -1823,6 +1844,13 @@ _storeSequence: continue; /* faster when present ... (?) */ } } + /* Save reps for next block */ + { int i; + for (i=0; isavedRep[i] = rep[i]; + } } + /* Last Literals */ { size_t const lastLLSize = iend - anchor; memcpy(seqStorePtr->lit, anchor, lastLLSize); @@ -1866,6 +1894,7 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx, const BYTE* const ilimit = iend - 8; const BYTE* const base = ctx->base; const U32 dictLimit = ctx->dictLimit; + const U32 lowestIndex = ctx->lowLimit; const BYTE* const prefixStart = base + dictLimit; const BYTE* const dictBase = ctx->dictBase; const BYTE* const dictEnd = dictBase + dictLimit; @@ -1881,11 +1910,11 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx, /* init */ U32 rep[ZSTD_REP_INIT]; - { U32 i; for (i=0; irep[i]; } ctx->nextToUpdate3 = ctx->nextToUpdate; ZSTD_resetSeqStore(seqStorePtr); - if ((ip - prefixStart) < REPCODE_STARTVALUE) ip += REPCODE_STARTVALUE; + ip += (ip == prefixStart); /* Match Loop */ while (ip < ilimit) { @@ -1895,11 +1924,10 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx, U32 current = (U32)(ip-base); /* check repCode */ - { - const U32 repIndex = (U32)(current+1 - rep[0]); + { const U32 repIndex = (U32)(current+1 - rep[0]); const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; const BYTE* const repMatch = repBase + repIndex; - if ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow */ + if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */ if (MEM_read32(ip+1) == MEM_read32(repMatch)) { /* repcode detected we should take it */ const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; @@ -1929,7 +1957,7 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx, const U32 repIndex = (U32)(current - rep[0]); const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; const BYTE* const repMatch = repBase + repIndex; - if ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow */ + if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */ if (MEM_read32(ip) == MEM_read32(repMatch)) { /* repcode detected */ const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; @@ -1959,7 +1987,7 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx, const U32 repIndex = (U32)(current - rep[0]); const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; const BYTE* const repMatch = repBase + repIndex; - if ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow */ + if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */ if (MEM_read32(ip) == MEM_read32(repMatch)) { /* repcode detected */ const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; @@ -2003,7 +2031,7 @@ _storeSequence: const U32 repIndex = (U32)((ip-base) - rep[1]); const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; const BYTE* const repMatch = repBase + repIndex; - if ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow */ + if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */ if (MEM_read32(ip) == MEM_read32(repMatch)) { /* repcode detected we should take it */ const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; @@ -2017,6 +2045,9 @@ _storeSequence: break; } } + /* Save reps for next block */ + ctx->savedRep[0] = rep[0]; ctx->savedRep[1] = rep[1]; ctx->savedRep[2] = rep[2]; + /* Last Literals */ { size_t const lastLLSize = iend - anchor; memcpy(seqStorePtr->lit, anchor, lastLLSize); @@ -2315,7 +2346,6 @@ static size_t ZSTD_loadDictEntropyStats(ZSTD_CCtx* zc, const void* dict, size_t { size_t const hufHeaderSize = HUF_readCTable(zc->hufTable, 255, dict, dictSize); if (HUF_isError(hufHeaderSize)) return ERROR(dictionary_corrupted); - zc->flagStaticTables = 1; dict = (const char*)dict + hufHeaderSize; dictSize -= hufHeaderSize; } @@ -2349,6 +2379,7 @@ static size_t ZSTD_loadDictEntropyStats(ZSTD_CCtx* zc, const void* dict, size_t dictSize -= litlengthHeaderSize; } + zc->flagStaticTables = 1; return (dictSizeStart-dictSize); } diff --git a/lib/compress/zstd_opt.h b/lib/compress/zstd_opt.h index 46b2cb14..703b568e 100644 --- a/lib/compress/zstd_opt.h +++ b/lib/compress/zstd_opt.h @@ -461,15 +461,14 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, ZSTD_optimal_t* opt = seqStorePtr->priceTable; ZSTD_match_t* matches = seqStorePtr->matchTable; const BYTE* inr; + U32 offset, rep[ZSTD_REP_INIT]; /* init */ - U32 offset, rep[ZSTD_REP_INIT]; - { U32 i; for (i=0; inextToUpdate3 = ctx->nextToUpdate; ZSTD_resetSeqStore(seqStorePtr); ZSTD_rescaleFreqs(seqStorePtr); - if ((ip-prefixStart) < REPCODE_STARTVALUE) ip = prefixStart + REPCODE_STARTVALUE; + ip += (ip==prefixStart); + { U32 i; for (i=0; irep[i]; } ZSTD_LOG_BLOCK("%d: COMPBLOCK_OPT_GENERIC srcSz=%d maxSrch=%d mls=%d sufLen=%d\n", (int)(ip-base), (int)srcSize, maxSearches, mls, sufficient_len); @@ -482,23 +481,24 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, litlen = (U32)(ip - anchor); /* check repCode */ - { U32 i; for (i=0; i sufficient_len || mlen >= ZSTD_OPT_NUM) { - best_mlen = mlen; best_off = i; cur = 0; last_pos = 1; - goto _storeSequence; - } - best_off = (i<=1 && ip == anchor) ? 1-i : i; - do { - price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH); - if (mlen > last_pos || price < opt[mlen].price) - SET_PRICE(mlen, mlen, i, litlen, price); /* note : macro modifies last_pos */ - mlen--; - } while (mlen >= minMatch); - } } + { U32 i; + for (i=0; i sufficient_len || mlen >= ZSTD_OPT_NUM) { + best_mlen = mlen; best_off = i; cur = 0; last_pos = 1; + goto _storeSequence; + } + best_off = (i<=1 && ip == anchor) ? 1-i : i; + do { + price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH); + if (mlen > last_pos || price < opt[mlen].price) + SET_PRICE(mlen, mlen, i, litlen, price); /* note : macro modifies last_pos */ + mlen--; + } while (mlen >= minMatch); + } } } match_num = ZSTD_BtGetAllMatches_selectMLS(ctx, ip, iend, maxSearches, mls, matches, minMatch); @@ -516,15 +516,15 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, /* set prices using matches at position = 0 */ best_mlen = (last_pos) ? last_pos : minMatch; for (u = 0; u < match_num; u++) { - mlen = (u>0) ? matches[u-1].len+1 : best_mlen; - best_mlen = matches[u].len; - ZSTD_LOG_PARSER("%d: start Found mlen=%d off=%d best_mlen=%d last_pos=%d\n", (int)(ip-base), matches[u].len, matches[u].off, (int)best_mlen, (int)last_pos); - while (mlen <= best_mlen) { + mlen = (u>0) ? matches[u-1].len+1 : best_mlen; + best_mlen = matches[u].len; + ZSTD_LOG_PARSER("%d: start Found mlen=%d off=%d best_mlen=%d last_pos=%d\n", (int)(ip-base), matches[u].len, matches[u].off, (int)best_mlen, (int)last_pos); + while (mlen <= best_mlen) { price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off, mlen - MINMATCH); if (mlen > last_pos || price < opt[mlen].price) SET_PRICE(mlen, mlen, matches[u].off, litlen, price); /* note : macro modifies last_pos */ mlen++; - } } + } } if (last_pos < minMatch) { ip++; continue; } @@ -572,38 +572,40 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, ZSTD_LOG_PARSER("%d: CURRENT_NoExt price[%d/%d]=%d off=%d mlen=%d litlen=%d rep[0]=%d rep[1]=%d\n", (int)(inr-base), cur, last_pos, opt[cur].price, opt[cur].off, opt[cur].mlen, opt[cur].litlen, opt[cur].rep[0], opt[cur].rep[1]); best_mlen = minMatch; - { U32 i; for (i=0; i sufficient_len || cur + mlen >= ZSTD_OPT_NUM) { - ZSTD_LOG_PARSER("%d: REP sufficient_len=%d best_mlen=%d best_off=%d last_pos=%d\n", (int)(inr-base), sufficient_len, best_mlen, best_off, last_pos); - best_mlen = mlen; best_off = i; last_pos = cur + 1; - goto _storeSequence; - } + if (mlen > sufficient_len || cur + mlen >= ZSTD_OPT_NUM) { + ZSTD_LOG_PARSER("%d: REP sufficient_len=%d best_mlen=%d best_off=%d last_pos=%d\n", (int)(inr-base), sufficient_len, best_mlen, best_off, last_pos); + best_mlen = mlen; best_off = i; last_pos = cur + 1; + goto _storeSequence; + } - best_off = (i<=1 && opt[cur].mlen != 1) ? 1-i : i; - if (opt[cur].mlen == 1) { - litlen = opt[cur].litlen; - if (cur > litlen) { - price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr-litlen, best_off, mlen - MINMATCH); - } else - price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH); - } else { - litlen = 0; - price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, best_off, mlen - MINMATCH); - } + best_off = (i<=1 && opt[cur].mlen != 1) ? 1-i : i; + if (opt[cur].mlen == 1) { + litlen = opt[cur].litlen; + if (cur > litlen) { + price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr-litlen, best_off, mlen - MINMATCH); + } else + price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH); + } else { + litlen = 0; + price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, best_off, mlen - MINMATCH); + } - if (mlen > best_mlen) best_mlen = mlen; - ZSTD_LOG_PARSER("%d: Found REP mlen=%d off=%d price=%d litlen=%d\n", (int)(inr-base), mlen, best_off, price, litlen); + if (mlen > best_mlen) best_mlen = mlen; + ZSTD_LOG_PARSER("%d: Found REP mlen=%d off=%d price=%d litlen=%d\n", (int)(inr-base), mlen, best_off, price, litlen); - do { - if (cur + mlen > last_pos || price <= opt[cur + mlen].price) - SET_PRICE(cur + mlen, mlen, i, litlen, price); - mlen--; - } while (mlen >= minMatch); - } } + do { + if (cur + mlen > last_pos || price <= opt[cur + mlen].price) + SET_PRICE(cur + mlen, mlen, i, litlen, price); + mlen--; + } while (mlen >= minMatch); + } } } match_num = ZSTD_BtGetAllMatches_selectMLS(ctx, inr, iend, maxSearches, mls, matches, best_mlen); ZSTD_LOG_PARSER("%d: ZSTD_GetAllMatches match_num=%d\n", (int)(inr-base), match_num); @@ -712,8 +714,11 @@ _storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */ anchor = ip = ip + mlen; } } /* for (cur=0; cur < last_pos; ) */ - { /* Last Literals */ - size_t lastLLSize = iend - anchor; + /* Save reps for next block */ + { int i; for (i=0; isavedRep[i] = rep[i]; } + + /* Last Literals */ + { size_t const lastLLSize = iend - anchor; ZSTD_LOG_ENCODE("%d: lastLLSize literals=%u\n", (int)(ip-base), (U32)lastLLSize); memcpy(seqStorePtr->lit, anchor, lastLLSize); seqStorePtr->lit += lastLLSize; @@ -732,6 +737,7 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, const BYTE* const iend = istart + srcSize; const BYTE* const ilimit = iend - 8; const BYTE* const base = ctx->base; + const U32 lowestIndex = ctx->lowLimit; const U32 dictLimit = ctx->dictLimit; const BYTE* const prefixStart = base + dictLimit; const BYTE* const dictBase = ctx->dictBase; @@ -748,12 +754,12 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, /* init */ U32 offset, rep[ZSTD_REP_INIT]; - { U32 i; for (i=0; irep[i]; } ctx->nextToUpdate3 = ctx->nextToUpdate; ZSTD_resetSeqStore(seqStorePtr); ZSTD_rescaleFreqs(seqStorePtr); - if ((ip - prefixStart) < REPCODE_STARTVALUE) ip += REPCODE_STARTVALUE; + ip += (ip==prefixStart); ZSTD_LOG_BLOCK("%d: COMPBLOCK_OPT_EXTDICT srcSz=%d maxSrch=%d mls=%d sufLen=%d\n", (int)(ip-base), (int)srcSize, maxSearches, mls, sufficient_len); @@ -768,31 +774,32 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, opt[0].litlen = (U32)(ip - anchor); /* check repCode */ - { U32 i; for (i=0; i= 3) /* intentional overflow */ - && (MEM_readMINMATCH(ip, minMatch) == MEM_readMINMATCH(repMatch, minMatch)) ) { - /* repcode detected we should take it */ - const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; - mlen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iend, repEnd, prefixStart) + minMatch; + { U32 i; + for (i=0; i= 3) & (repIndex>lowestIndex)) /* intentional overflow */ + && (MEM_readMINMATCH(ip, minMatch) == MEM_readMINMATCH(repMatch, minMatch)) ) { + /* repcode detected we should take it */ + const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; + mlen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iend, repEnd, prefixStart) + minMatch; - ZSTD_LOG_PARSER("%d: start try REP rep[%d]=%d mlen=%d\n", (int)(ip-base), i, (int)rep[i], (int)mlen); - if (mlen > sufficient_len || mlen >= ZSTD_OPT_NUM) { - best_mlen = mlen; best_off = i; cur = 0; last_pos = 1; - goto _storeSequence; - } + ZSTD_LOG_PARSER("%d: start try REP rep[%d]=%d mlen=%d\n", (int)(ip-base), i, (int)rep[i], (int)mlen); + if (mlen > sufficient_len || mlen >= ZSTD_OPT_NUM) { + best_mlen = mlen; best_off = i; cur = 0; last_pos = 1; + goto _storeSequence; + } - best_off = (i<=1 && ip == anchor) ? 1-i : i; - litlen = opt[0].litlen; - do { - price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH); - if (mlen > last_pos || price < opt[mlen].price) - SET_PRICE(mlen, mlen, i, litlen, price); /* note : macro modifies last_pos */ - mlen--; - } while (mlen >= minMatch); - } } } + best_off = (i<=1 && ip == anchor) ? 1-i : i; + litlen = opt[0].litlen; + do { + price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH); + if (mlen > last_pos || price < opt[mlen].price) + SET_PRICE(mlen, mlen, i, litlen, price); /* note : macro modifies last_pos */ + mlen--; + } while (mlen >= minMatch); + } } } match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, ip, iend, maxSearches, mls, matches, minMatch); /* first search (depth 0) */ @@ -869,44 +876,45 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, ZSTD_LOG_PARSER("%d: CURRENT_Ext price[%d/%d]=%d off=%d mlen=%d litlen=%d rep[0]=%d rep[1]=%d\n", (int)(inr-base), cur, last_pos, opt[cur].price, opt[cur].off, opt[cur].mlen, opt[cur].litlen, opt[cur].rep[0], opt[cur].rep[1]); best_mlen = 0; - { U32 i; for (i=0; i= 3) /* intentional overflow */ - && (MEM_readMINMATCH(inr, minMatch) == MEM_readMINMATCH(repMatch, minMatch)) ) { - /* repcode detected */ - const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; - mlen = (U32)ZSTD_count_2segments(inr+minMatch, repMatch+minMatch, iend, repEnd, prefixStart) + minMatch; - ZSTD_LOG_PARSER("%d: Found REP %d/%d mlen=%d off=%d rep=%d opt[%d].off=%d\n", (int)(inr-base), i, ZSTD_REP_NUM, mlen, i, opt[cur].rep[i], cur, opt[cur].off); + { U32 i; + for (i=0; i= 3) & (repIndex>lowestIndex)) /* intentional overflow */ + && (MEM_readMINMATCH(inr, minMatch) == MEM_readMINMATCH(repMatch, minMatch)) ) { + /* repcode detected */ + const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; + mlen = (U32)ZSTD_count_2segments(inr+minMatch, repMatch+minMatch, iend, repEnd, prefixStart) + minMatch; + ZSTD_LOG_PARSER("%d: Found REP %d/%d mlen=%d off=%d rep=%d opt[%d].off=%d\n", (int)(inr-base), i, ZSTD_REP_NUM, mlen, i, opt[cur].rep[i], cur, opt[cur].off); - if (mlen > sufficient_len || cur + mlen >= ZSTD_OPT_NUM) { - ZSTD_LOG_PARSER("%d: REP sufficient_len=%d best_mlen=%d best_off=%d last_pos=%d\n", (int)(inr-base), sufficient_len, best_mlen, best_off, last_pos); - best_mlen = mlen; best_off = i; last_pos = cur + 1; - goto _storeSequence; - } + if (mlen > sufficient_len || cur + mlen >= ZSTD_OPT_NUM) { + ZSTD_LOG_PARSER("%d: REP sufficient_len=%d best_mlen=%d best_off=%d last_pos=%d\n", (int)(inr-base), sufficient_len, best_mlen, best_off, last_pos); + best_mlen = mlen; best_off = i; last_pos = cur + 1; + goto _storeSequence; + } - best_off = (i<=1 && opt[cur].mlen != 1) ? 1-i : i; - if (opt[cur].mlen == 1) { - litlen = opt[cur].litlen; - if (cur > litlen) { - price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr-litlen, best_off, mlen - MINMATCH); - } else - price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH); - } else { - litlen = 0; - price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, best_off, mlen - MINMATCH); - } + best_off = (i<=1 && opt[cur].mlen != 1) ? 1-i : i; + if (opt[cur].mlen == 1) { + litlen = opt[cur].litlen; + if (cur > litlen) { + price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr-litlen, best_off, mlen - MINMATCH); + } else + price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH); + } else { + litlen = 0; + price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, best_off, mlen - MINMATCH); + } - best_mlen = mlen; - ZSTD_LOG_PARSER("%d: Found REP mlen=%d off=%d price=%d litlen=%d\n", (int)(inr-base), mlen, best_off, price, litlen); + best_mlen = mlen; + ZSTD_LOG_PARSER("%d: Found REP mlen=%d off=%d price=%d litlen=%d\n", (int)(inr-base), mlen, best_off, price, litlen); - do { - if (cur + mlen > last_pos || price <= opt[cur + mlen].price) - SET_PRICE(cur + mlen, mlen, i, litlen, price); - mlen--; - } while (mlen >= minMatch); - } } } + do { + if (cur + mlen > last_pos || price <= opt[cur + mlen].price) + SET_PRICE(cur + mlen, mlen, i, litlen, price); + mlen--; + } while (mlen >= minMatch); + } } } match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, inr, iend, maxSearches, mls, matches, minMatch); ZSTD_LOG_PARSER("%d: ZSTD_GetAllMatches match_num=%d\n", (int)(inr-base), match_num); @@ -1023,8 +1031,11 @@ _storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */ anchor = ip = ip + mlen; } } /* for (cur=0; cur < last_pos; ) */ - { /* Last Literals */ - size_t lastLLSize = iend - anchor; + /* Save reps for next block */ + ctx->savedRep[0] = rep[0]; ctx->savedRep[1] = rep[1]; ctx->savedRep[2] = rep[2]; + + /* Last Literals */ + { size_t lastLLSize = iend - anchor; ZSTD_LOG_ENCODE("%d: lastLLSize literals=%u\n", (int)(ip-base), (U32)(lastLLSize)); memcpy(seqStorePtr->lit, anchor, lastLLSize); seqStorePtr->lit += lastLLSize; diff --git a/lib/decompress/huf_decompress.c b/lib/decompress/huf_decompress.c index ed235f5e..5a998ee2 100644 --- a/lib/decompress/huf_decompress.c +++ b/lib/decompress/huf_decompress.c @@ -72,10 +72,23 @@ #define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ +/*-***************************/ +/* generic DTableDesc */ +/*-***************************/ + +typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved; } DTableDesc; + +static DTableDesc HUF_getDTableDesc(const HUF_DTable* table) +{ + DTableDesc dtd; + memcpy(&dtd, table, sizeof(dtd)); + return dtd; +} + + /*-***************************/ /* single-symbol decoding */ /*-***************************/ -typedef struct { BYTE maxTableLog; BYTE currentTableLog; } DTableDesc; typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX2; /* single-symbol decoding */ @@ -84,45 +97,45 @@ size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize) BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; /* large enough for values from 0 to 16 */ U32 tableLog = 0; - size_t iSize; U32 nbSymbols = 0; - U32 n; - U32 nextRankStart; + size_t iSize; void* const dtPtr = DTable + 1; HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr; - DTableDesc dtd; - HUF_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable)); /* if compilation fails here, assertion is false */ - memcpy(&dtd, DTable, sizeof(dtd)); + HUF_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable)); //memset(huffWeight, 0, sizeof(huffWeight)); /* is not necessary, even though some analyzer complain ... */ iSize = HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize); if (HUF_isError(iSize)) return iSize; - /* check result */ - if (tableLog > dtd.maxTableLog) return ERROR(tableLog_tooLarge); /* DTable is too small */ - dtd.currentTableLog = (BYTE)tableLog; /* maybe should separate sizeof allocated DTable, from used size of DTable, in case of re-use */ - memcpy(DTable, &dtd, sizeof(dtd)); + /* Table header */ + { DTableDesc dtd = HUF_getDTableDesc(DTable); + if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge); /* DTable too small, huffman tree cannot fit in */ + dtd.tableType = 0; + dtd.tableLog = (BYTE)tableLog; + memcpy(DTable, &dtd, sizeof(dtd)); + } /* Prepare ranks */ - nextRankStart = 0; - for (n=1; n> 1; - U32 i; - HUF_DEltX2 D; - D.byte = (BYTE)n; D.nbBits = (BYTE)(tableLog + 1 - w); - for (i = rankVal[w]; i < rankVal[w] + length; i++) - dt[i] = D; - rankVal[w] += length; - } + { U32 n; + for (n=0; n> 1; + U32 i; + HUF_DEltX2 D; + D.byte = (BYTE)n; D.nbBits = (BYTE)(tableLog + 1 - w); + for (i = rankVal[w]; i < rankVal[w] + length; i++) + dt[i] = D; + rankVal[w] += length; + } } return iSize; } @@ -130,8 +143,8 @@ size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize) static BYTE HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, const U32 dtLog) { - const size_t val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */ - const BYTE c = dt[val].byte; + size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */ + BYTE const c = dt[val].byte; BIT_skipBits(Dstream, dt[val].nbBits); return c; } @@ -170,21 +183,18 @@ static inline size_t HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* const bitDPtr, B return pEnd-pStart; } -size_t HUF_decompress1X2_usingDTable( +static size_t HUF_decompress1X2_usingDTable_internal( void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable) { BYTE* op = (BYTE*)dst; BYTE* const oend = op + dstSize; - const void* dtPtr = DTable; - const HUF_DEltX2* const dt = ((const HUF_DEltX2*)dtPtr)+1; + const void* dtPtr = DTable + 1; + const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr; BIT_DStream_t bitD; - DTableDesc dtd; - U32 dtLog; - - memcpy(&dtd, DTable, sizeof(dtd)); - dtLog = dtd.currentTableLog; + DTableDesc const dtd = HUF_getDTableDesc(DTable); + U32 const dtLog = dtd.tableLog; { size_t const errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize); if (HUF_isError(errorCode)) return errorCode; } @@ -197,22 +207,36 @@ size_t HUF_decompress1X2_usingDTable( return dstSize; } +size_t HUF_decompress1X2_usingDTable( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + DTableDesc dtd = HUF_getDTableDesc(DTable); + if (dtd.tableType != 0) return ERROR(GENERIC); + return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable); +} + +size_t HUF_decompress1X2_DCtx (HUF_DTable* DCtx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + const BYTE* ip = (const BYTE*) cSrc; + + size_t const hSize = HUF_readDTableX2 (DCtx, cSrc, cSrcSize); + if (HUF_isError(hSize)) return hSize; + if (hSize >= cSrcSize) return ERROR(srcSize_wrong); + ip += hSize; cSrcSize -= hSize; + + return HUF_decompress1X2_usingDTable_internal (dst, dstSize, ip, cSrcSize, DCtx); +} + size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) { HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX); - const BYTE* ip = (const BYTE*) cSrc; - - size_t const errorCode = HUF_readDTableX2 (DTable, cSrc, cSrcSize); - if (HUF_isError(errorCode)) return errorCode; - if (errorCode >= cSrcSize) return ERROR(srcSize_wrong); - ip += errorCode; - cSrcSize -= errorCode; - - return HUF_decompress1X2_usingDTable (dst, dstSize, ip, cSrcSize, DTable); + return HUF_decompress1X2_DCtx (DTable, dst, dstSize, cSrc, cSrcSize); } -size_t HUF_decompress4X2_usingDTable( +static size_t HUF_decompress4X2_usingDTable_internal( void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable) @@ -223,8 +247,8 @@ size_t HUF_decompress4X2_usingDTable( { const BYTE* const istart = (const BYTE*) cSrc; BYTE* const ostart = (BYTE*) dst; BYTE* const oend = ostart + dstSize; - const void* const dtPtr = DTable; - const HUF_DEltX2* const dt = ((const HUF_DEltX2*)dtPtr) +1; + const void* const dtPtr = DTable + 1; + const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr; /* Init */ BIT_DStream_t bitD1; @@ -248,11 +272,8 @@ size_t HUF_decompress4X2_usingDTable( BYTE* op3 = opStart3; BYTE* op4 = opStart4; U32 endSignal; - DTableDesc dtd; - U32 dtLog; - - memcpy(&dtd, DTable, sizeof(dtd)); - dtLog = dtd.currentTableLog; + DTableDesc const dtd = HUF_getDTableDesc(DTable); + U32 const dtLog = dtd.tableLog; if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ { size_t const errorCode = BIT_initDStream(&bitD1, istart1, length1); @@ -308,18 +329,33 @@ size_t HUF_decompress4X2_usingDTable( } +size_t HUF_decompress4X2_usingDTable( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + DTableDesc dtd = HUF_getDTableDesc(DTable); + if (dtd.tableType != 0) return ERROR(GENERIC); + return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable); +} + + +size_t HUF_decompress4X2_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + const BYTE* ip = (const BYTE*) cSrc; + + size_t const hSize = HUF_readDTableX2 (dctx, cSrc, cSrcSize); + if (HUF_isError(hSize)) return hSize; + if (hSize >= cSrcSize) return ERROR(srcSize_wrong); + ip += hSize; cSrcSize -= hSize; + + return HUF_decompress4X2_usingDTable_internal (dst, dstSize, ip, cSrcSize, dctx); +} + size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) { HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX); - const BYTE* ip = (const BYTE*) cSrc; - - size_t const hSize = HUF_readDTableX2 (DTable, cSrc, cSrcSize); - if (HUF_isError(hSize)) return hSize; - if (hSize >= cSrcSize) return ERROR(srcSize_wrong); - ip += hSize; - cSrcSize -= hSize; - - return HUF_decompress4X2_usingDTable (dst, dstSize, ip, cSrcSize, DTable); + return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); } @@ -423,15 +459,13 @@ size_t HUF_readDTableX4 (HUF_DTable* DTable, const void* src, size_t srcSize) U32* const rankStart = rankStart0+1; rankVal_t rankVal; U32 tableLog, maxW, sizeOfSort, nbSymbols; - DTableDesc dtd; - U32 maxTableLog; + DTableDesc dtd = HUF_getDTableDesc(DTable); + U32 const maxTableLog = dtd.maxTableLog; size_t iSize; void* dtPtr = DTable+1; /* force compiler to avoid strict-aliasing */ HUF_DEltX4* const dt = (HUF_DEltX4*)dtPtr; - HUF_STATIC_ASSERT(sizeof(HUF_DEltX4) == sizeof(U32)); /* if compilation fails here, assertion is false */ - memcpy(&dtd, DTable, sizeof(dtd)); - maxTableLog = dtd.maxTableLog-1; + HUF_STATIC_ASSERT(sizeof(HUF_DEltX4) == sizeof(HUF_DTable)); /* if compilation fails here, assertion is false */ if (maxTableLog > HUF_TABLELOG_ABSOLUTEMAX) return ERROR(tableLog_tooLarge); //memset(weightList, 0, sizeof(weightList)); /* is not necessary, even though some analyzer complain ... */ @@ -490,7 +524,8 @@ size_t HUF_readDTableX4 (HUF_DTable* DTable, const void* src, size_t srcSize) rankStart0, rankVal, maxW, tableLog+1); - dtd.currentTableLog = (BYTE)maxTableLog; + dtd.tableLog = (BYTE)maxTableLog; + dtd.tableType = 1; memcpy(DTable, &dtd, sizeof(dtd)); return iSize; } @@ -556,7 +591,7 @@ static inline size_t HUF_decodeStreamX4(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* c } -size_t HUF_decompress1X4_usingDTable( +static size_t HUF_decompress1X4_usingDTable_internal( void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable) @@ -573,9 +608,8 @@ size_t HUF_decompress1X4_usingDTable( BYTE* const oend = ostart + dstSize; const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */ const HUF_DEltX4* const dt = (const HUF_DEltX4*)dtPtr; - DTableDesc dtd; - memcpy(&dtd, DTable, sizeof(dtd)); - HUF_decodeStreamX4(ostart, &bitD, oend, dt, dtd.currentTableLog); + DTableDesc const dtd = HUF_getDTableDesc(DTable); + HUF_decodeStreamX4(ostart, &bitD, oend, dt, dtd.tableLog); } /* check */ @@ -585,21 +619,35 @@ size_t HUF_decompress1X4_usingDTable( return dstSize; } +size_t HUF_decompress1X4_usingDTable( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + DTableDesc dtd = HUF_getDTableDesc(DTable); + if (dtd.tableType != 0) return ERROR(GENERIC); + return HUF_decompress1X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable); +} + +size_t HUF_decompress1X4_DCtx (HUF_DTable* DCtx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + const BYTE* ip = (const BYTE*) cSrc; + + size_t const hSize = HUF_readDTableX4 (DCtx, cSrc, cSrcSize); + if (HUF_isError(hSize)) return hSize; + if (hSize >= cSrcSize) return ERROR(srcSize_wrong); + ip += hSize; cSrcSize -= hSize; + + return HUF_decompress1X4_usingDTable_internal (dst, dstSize, ip, cSrcSize, DCtx); +} + size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) { HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_TABLELOG_MAX); - const BYTE* ip = (const BYTE*) cSrc; - - size_t const hSize = HUF_readDTableX4 (DTable, cSrc, cSrcSize); - if (HUF_isError(hSize)) return hSize; - if (hSize >= cSrcSize) return ERROR(srcSize_wrong); - ip += hSize; - cSrcSize -= hSize; - - return HUF_decompress1X4_usingDTable (dst, dstSize, ip, cSrcSize, DTable); + return HUF_decompress1X4_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); } -size_t HUF_decompress4X4_usingDTable( +static size_t HUF_decompress4X4_usingDTable_internal( void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable) @@ -634,11 +682,8 @@ size_t HUF_decompress4X4_usingDTable( BYTE* op3 = opStart3; BYTE* op4 = opStart4; U32 endSignal; - DTableDesc dtd; - U32 dtLog; - - memcpy(&dtd, DTable, sizeof(dtd)); - dtLog = dtd.currentTableLog; + DTableDesc const dtd = HUF_getDTableDesc(DTable); + U32 const dtLog = dtd.tableLog; if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ { size_t const errorCode = BIT_initDStream(&bitD1, istart1, length1); @@ -695,18 +740,33 @@ size_t HUF_decompress4X4_usingDTable( } +size_t HUF_decompress4X4_usingDTable( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + DTableDesc dtd = HUF_getDTableDesc(DTable); + if (dtd.tableType != 1) return ERROR(GENERIC); + return HUF_decompress4X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable); +} + + +size_t HUF_decompress4X4_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + const BYTE* ip = (const BYTE*) cSrc; + + size_t hSize = HUF_readDTableX4 (dctx, cSrc, cSrcSize); + if (HUF_isError(hSize)) return hSize; + if (hSize >= cSrcSize) return ERROR(srcSize_wrong); + ip += hSize; cSrcSize -= hSize; + + return HUF_decompress4X4_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx); +} + size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) { HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_TABLELOG_MAX); - const BYTE* ip = (const BYTE*) cSrc; - - size_t hSize = HUF_readDTableX4 (DTable, cSrc, cSrcSize); - if (HUF_isError(hSize)) return hSize; - if (hSize >= cSrcSize) return ERROR(srcSize_wrong); - ip += hSize; - cSrcSize -= hSize; - - return HUF_decompress4X4_usingDTable (dst, dstSize, ip, cSrcSize, DTable); + return HUF_decompress4X4_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); } @@ -714,6 +774,25 @@ size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cS /* Generic decompression selector */ /* ********************************/ +size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + DTableDesc const dtd = HUF_getDTableDesc(DTable); + return dtd.tableType ? HUF_decompress1X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable) : + HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable); +} + +size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + DTableDesc const dtd = HUF_getDTableDesc(DTable); + return dtd.tableType ? HUF_decompress4X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable) : + HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable); +} + + typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t; static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] = { @@ -773,3 +852,43 @@ size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcS //return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize); /* multi-streams single-symbol decoding */ //return HUF_decompress4X4(dst, dstSize, cSrc, cSrcSize); /* multi-streams double-symbols decoding */ } + +size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + /* validation checks */ + if (dstSize == 0) return ERROR(dstSize_tooSmall); + if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ + if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ + if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ + + { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); + return algoNb ? HUF_decompress4X4_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) : + HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ; + } +} + +size_t HUF_decompress4X_hufOnly (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + /* validation checks */ + if (dstSize == 0) return ERROR(dstSize_tooSmall); + if ((cSrcSize >= dstSize) || (cSrcSize <= 1)) return ERROR(corruption_detected); /* invalid */ + + { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); + return algoNb ? HUF_decompress4X4_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) : + HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ; + } +} + +size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + /* validation checks */ + if (dstSize == 0) return ERROR(dstSize_tooSmall); + if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ + if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ + if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ + + { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); + return algoNb ? HUF_decompress1X4_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) : + HUF_decompress1X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ; + } +} diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index 9a41228c..82d54fb5 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -112,21 +112,23 @@ struct ZSTD_DCtx_s FSE_DTable LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)]; FSE_DTable OffTable[FSE_DTABLE_SIZE_U32(OffFSELog)]; FSE_DTable MLTable[FSE_DTABLE_SIZE_U32(MLFSELog)]; - HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog+1)]; + HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */ const void* previousDstEnd; const void* base; const void* vBase; const void* dictEnd; size_t expected; - size_t headerSize; + U32 rep[3]; ZSTD_frameParams fParams; - XXH64_state_t xxhState; - ZSTD_customMem customMem; blockType_t bType; /* used in ZSTD_decompressContinue(), to transfer blockType between header decoding and block decoding stages */ ZSTD_dStage stage; + U32 litEntropy; + U32 fseEntropy; + XXH64_state_t xxhState; + size_t headerSize; U32 dictID; - U32 flagRepeatTable; const BYTE* litPtr; + ZSTD_customMem customMem; size_t litBufSize; size_t litSize; BYTE litBuffer[ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH]; @@ -143,9 +145,10 @@ size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx) dctx->base = NULL; dctx->vBase = NULL; dctx->dictEnd = NULL; - dctx->hufTable[0] = (HUF_DTable)((HufLog+1)*0x101); - dctx->flagRepeatTable = 0; + dctx->hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); + dctx->litEntropy = dctx->fseEntropy = 0; dctx->dictID = 0; + { int i; for (i=0; irep[i] = repStartValue[i]; } return 0; } @@ -171,7 +174,6 @@ ZSTD_DCtx* ZSTD_createDCtx(void) return ZSTD_createDCtx_advanced(defaultCustomMem); } - size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx) { if (dctx==NULL) return 0; /* support free on NULL */ @@ -450,13 +452,14 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, const void* src, size_t srcSize) /* note : srcSize < BLOCKSIZE */ { const BYTE* const istart = (const BYTE*) src; + litBlockType_t lbt; - /* any compressed block with literals segment must be at least this size */ if (srcSize < MIN_CBLOCK_SIZE) return ERROR(corruption_detected); + lbt = (litBlockType_t)(istart[0]>> 6); - switch(istart[0]>> 6) + switch(lbt) { - case IS_HUF: + case lbt_huffman: { size_t litSize, litCSize, singleStream=0; U32 lhSize = ((istart[0]) >> 4) & 3; if (srcSize < 5) return ERROR(corruption_detected); /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for lhSize, + cSize (+nbSeq) */ @@ -486,27 +489,29 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, if (litCSize + lhSize > srcSize) return ERROR(corruption_detected); if (HUF_isError(singleStream ? - HUF_decompress1X2(dctx->litBuffer, litSize, istart+lhSize, litCSize) : - HUF_decompress (dctx->litBuffer, litSize, istart+lhSize, litCSize) )) + HUF_decompress1X2_DCtx(dctx->hufTable, dctx->litBuffer, litSize, istart+lhSize, litCSize) : + HUF_decompress4X_hufOnly (dctx->hufTable, dctx->litBuffer, litSize, istart+lhSize, litCSize) )) return ERROR(corruption_detected); dctx->litPtr = dctx->litBuffer; dctx->litBufSize = ZSTD_BLOCKSIZE_MAX+8; dctx->litSize = litSize; + dctx->litEntropy = 1; return litCSize + lhSize; } - case IS_PCH: + case lbt_repeat: { size_t litSize, litCSize; U32 lhSize = ((istart[0]) >> 4) & 3; if (lhSize != 1) /* only case supported for now : small litSize, single stream */ return ERROR(corruption_detected); - if (!dctx->flagRepeatTable) + if (dctx->litEntropy==0) return ERROR(dictionary_corrupted); /* 2 - 2 - 10 - 10 */ lhSize=3; litSize = ((istart[0] & 15) << 6) + (istart[1] >> 2); litCSize = ((istart[1] & 3) << 8) + istart[2]; + if (litCSize + lhSize > srcSize) return ERROR(corruption_detected); { size_t const errorCode = HUF_decompress1X4_usingDTable(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->hufTable); if (HUF_isError(errorCode)) return ERROR(corruption_detected); @@ -516,7 +521,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, dctx->litSize = litSize; return litCSize + lhSize; } - case IS_RAW: + case lbt_raw: { size_t litSize; U32 lhSize = ((istart[0]) >> 4) & 3; switch(lhSize) @@ -547,7 +552,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, dctx->litSize = litSize; return lhSize+litSize; } - case IS_RLE: + case lbt_rle: { size_t litSize; U32 lhSize = ((istart[0]) >> 4) & 3; switch(lhSize) @@ -676,7 +681,6 @@ typedef struct { } seqState_t; - static seq_t ZSTD_decodeSequence(seqState_t* seqState) { seq_t seq; @@ -850,16 +854,16 @@ static size_t ZSTD_decompressSequences( int nbSeq; /* Build Decoding Tables */ - { size_t const seqHSize = ZSTD_decodeSeqHeaders(&nbSeq, DTableLL, DTableML, DTableOffb, dctx->flagRepeatTable, ip, seqSize); + { size_t const seqHSize = ZSTD_decodeSeqHeaders(&nbSeq, DTableLL, DTableML, DTableOffb, dctx->fseEntropy, ip, seqSize); if (ZSTD_isError(seqHSize)) return seqHSize; ip += seqHSize; - dctx->flagRepeatTable = 0; } /* Regen sequences */ if (nbSeq) { seqState_t seqState; - { U32 i; for (i=0; ifseEntropy = 1; + { U32 i; for (i=0; irep[i]; } { size_t const errorCode = BIT_initDStream(&(seqState.DStream), ip, iend-ip); if (ERR_isError(errorCode)) return ERROR(corruption_detected); } FSE_initDState(&(seqState.stateLL), &(seqState.DStream), DTableLL); @@ -869,16 +873,6 @@ static size_t ZSTD_decompressSequences( for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq ; ) { nbSeq--; { seq_t const sequence = ZSTD_decodeSequence(&seqState); - -#if 0 /* debug */ - static BYTE* start = NULL; - if (start==NULL) start = op; - size_t pos = (size_t)(op-start); - if ((pos >= 5810037) && (pos < 5810400)) - printf("Dpos %6u :%5u literals & match %3u bytes at distance %6u \n", - pos, (U32)sequence.litLength, (U32)sequence.matchLength, (U32)sequence.offset); -#endif - size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litLimit_8, base, vBase, dictEnd); if (ZSTD_isError(oneSeqSize)) return oneSeqSize; op += oneSeqSize; @@ -886,6 +880,8 @@ static size_t ZSTD_decompressSequences( /* check if reached exact end */ if (nbSeq) return ERROR(corruption_detected); + /* save reps for next block */ + { U32 i; for (i=0; irep[i] = (U32)(seqState.prevOffset[i]); } } /* last literal segment */ @@ -972,7 +968,7 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, /* Loop on each block */ while (1) { - size_t decodedSize=0; + size_t decodedSize; blockProperties_t blockProperties; size_t const cBlockSize = ZSTD_getcBlockSize(ip, iend-ip, &blockProperties); if (ZSTD_isError(cBlockSize)) return cBlockSize; @@ -995,6 +991,7 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, case bt_end : /* end of frame */ if (remainingSize) return ERROR(srcSize_wrong); + decodedSize = 0; break; default: return ERROR(GENERIC); /* impossible */ @@ -1228,7 +1225,7 @@ static size_t ZSTD_loadEntropy(ZSTD_DCtx* dctx, const void* dict, size_t const d dictSize -= litlengthHeaderSize; } - dctx->flagRepeatTable = 1; + dctx->litEntropy = dctx->fseEntropy = 1; return dictSizeStart - dictSize; } @@ -1317,7 +1314,7 @@ ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, ZSTD_cu /*! ZSTD_createDDict() : * Create a digested dictionary, ready to start decompression operation without startup delay. -* `dict` can be released after creation */ +* `dict` can be released after `ZSTD_DDict` creation */ ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize) { ZSTD_customMem const allocator = { NULL, NULL, NULL }; @@ -1336,7 +1333,7 @@ size_t ZSTD_freeDDict(ZSTD_DDict* ddict) /*! ZSTD_decompress_usingDDict() : * Decompression using a pre-digested Dictionary -* In contrast with older ZSTD_decompress_usingDict(), use dictionary without significant overhead. */ +* Use dictionary without significant overhead. */ ZSTDLIB_API size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, diff --git a/lib/legacy/zstd_legacy.h b/lib/legacy/zstd_legacy.h index bda3640d..22921bec 100644 --- a/lib/legacy/zstd_legacy.h +++ b/lib/legacy/zstd_legacy.h @@ -86,18 +86,16 @@ MEM_STATIC size_t ZSTD_decompressLegacy( case ZSTDv04_magicNumber : return ZSTDv04_decompress(dst, dstCapacity, src, compressedSize); case ZSTDv05_MAGICNUMBER : - { - size_t result; - ZSTDv05_DCtx* zd = ZSTDv05_createDCtx(); + { size_t result; + ZSTDv05_DCtx* const zd = ZSTDv05_createDCtx(); if (zd==NULL) return ERROR(memory_allocation); result = ZSTDv05_decompress_usingDict(zd, dst, dstCapacity, src, compressedSize, dict, dictSize); ZSTDv05_freeDCtx(zd); return result; } case ZSTDv06_MAGICNUMBER : - { - size_t result; - ZSTDv06_DCtx* zd = ZSTDv06_createDCtx(); + { size_t result; + ZSTDv06_DCtx* const zd = ZSTDv06_createDCtx(); if (zd==NULL) return ERROR(memory_allocation); result = ZSTDv06_decompress_usingDict(zd, dst, dstCapacity, src, compressedSize, dict, dictSize); ZSTDv06_freeDCtx(zd); diff --git a/programs/fileio.c b/programs/fileio.c index 47302f9c..decc971c 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -139,6 +139,8 @@ static U32 g_dictIDFlag = 1; void FIO_setDictIDFlag(unsigned dictIDFlag) { g_dictIDFlag = dictIDFlag; } static U32 g_checksumFlag = 0; void FIO_setChecksumFlag(unsigned checksumFlag) { g_checksumFlag = checksumFlag; } +static U32 g_removeSrcFile = 0; +void FIO_setRemoveSrcFile(unsigned flag) { g_removeSrcFile = (flag>0); } /*-************************************* @@ -365,8 +367,9 @@ static int FIO_compressFilename_internal(cRess_t ress, /* Status */ DISPLAYLEVEL(2, "\r%79s\r", ""); - DISPLAYLEVEL(2,"Compressed %llu bytes into %llu bytes ==> %.2f%%\n", - (unsigned long long)readsize, (unsigned long long) compressedfilesize, (double)compressedfilesize/readsize*100); + DISPLAYLEVEL(2,"%-20.20s :%6.2f%% (%6llu =>%6llu bytes, %s) \n", srcFileName, + (double)compressedfilesize/readsize*100, (unsigned long long)readsize, (unsigned long long) compressedfilesize, + dstFileName); return 0; } @@ -384,36 +387,38 @@ static int FIO_compressFilename_srcFile(cRess_t ress, int result; /* File check */ + if (UTIL_isDirectory(srcFileName)) { + DISPLAYLEVEL(1, "zstd: %s is a directory -- ignored \n", srcFileName); + return 1; + } ress.srcFile = FIO_openSrcFile(srcFileName); if (!ress.srcFile) return 1; /* srcFile could not be opened */ result = FIO_compressFilename_internal(ress, dstFileName, srcFileName, cLevel); fclose(ress.srcFile); + if ((g_removeSrcFile) && (!result)) remove(srcFileName); return result; } -/*! FIO_compressFilename_extRess() : +/*! FIO_compressFilename_dstFile() : * @return : 0 : compression completed correctly, - * 1 : missing or pb opening srcFileName + * 1 : pb */ -static int FIO_compressFilename_extRess(cRess_t ress, +static int FIO_compressFilename_dstFile(cRess_t ress, const char* dstFileName, const char* srcFileName, int cLevel) { int result; - ress.srcFile = FIO_openSrcFile(srcFileName); - if (ress.srcFile==0) return 1; ress.dstFile = FIO_openDstFile(dstFileName); if (ress.dstFile==0) { fclose(ress.srcFile); return 1; } - result = FIO_compressFilename_internal(ress, dstFileName, srcFileName, cLevel); - if (result!=0) remove(dstFileName); /* remove operation artefact */ + result = FIO_compressFilename_srcFile(ress, dstFileName, srcFileName, cLevel); - fclose(ress.srcFile); /* no pb to expect : only reading */ if (fclose(ress.dstFile)) EXM_THROW(28, "Write error : cannot properly close %s", dstFileName); + if (result!=0) remove(dstFileName); /* remove operation artefact */ return result; } @@ -422,14 +427,12 @@ int FIO_compressFilename(const char* dstFileName, const char* srcFileName, const char* dictFileName, int compressionLevel) { clock_t const start = clock(); + cRess_t const ress = FIO_createCResources(dictFileName); - int issueWithSrcFile = 0; - - issueWithSrcFile += FIO_compressFilename_extRess(ress, dstFileName, srcFileName, compressionLevel); - + int const issueWithSrcFile = FIO_compressFilename_dstFile(ress, dstFileName, srcFileName, compressionLevel); FIO_freeCResources(ress); - { double seconds = (double)(clock() - start) / CLOCKS_PER_SEC; + { double const seconds = (double)(clock() - start) / CLOCKS_PER_SEC; DISPLAYLEVEL(4, "Completed in %.2f sec \n", seconds); } return issueWithSrcFile; @@ -465,7 +468,7 @@ int FIO_compressMultipleFilenames(const char** inFileNamesTable, unsigned nbFile if (dfnSize <= ifnSize+suffixSize+1) { free(dstFileName); dfnSize = ifnSize + 20; dstFileName = (char*)malloc(dfnSize); } strcpy(dstFileName, inFileNamesTable[u]); strcat(dstFileName, suffix); - missed_files += FIO_compressFilename_extRess(ress, dstFileName, + missed_files += FIO_compressFilename_dstFile(ress, dstFileName, inFileNamesTable[u], compressionLevel); } } @@ -679,7 +682,13 @@ static int FIO_decompressSrcFile(dRess_t ress, const char* srcFileName) { unsigned long long filesize = 0; FILE* const dstFile = ress.dstFile; - FILE* const srcFile = FIO_openSrcFile(srcFileName); + FILE* srcFile; + + if (UTIL_isDirectory(srcFileName)) { + DISPLAYLEVEL(1, "zstd: %s is a directory -- ignored \n", srcFileName); + return 1; + } + srcFile = FIO_openSrcFile(srcFileName); if (srcFile==0) return 1; /* for each frame */ @@ -712,6 +721,7 @@ static int FIO_decompressSrcFile(dRess_t ress, const char* srcFileName) /* Close */ fclose(srcFile); + if (g_removeSrcFile) remove(srcFileName); return 0; } @@ -721,7 +731,7 @@ static int FIO_decompressSrcFile(dRess_t ress, const char* srcFileName) @return : 0 : OK 1 : operation aborted (src not available, dst already taken, etc.) */ -static int FIO_decompressFile_extRess(dRess_t ress, +static int FIO_decompressDstFile(dRess_t ress, const char* dstFileName, const char* srcFileName) { int result; @@ -729,9 +739,9 @@ static int FIO_decompressFile_extRess(dRess_t ress, if (ress.dstFile==0) return 1; result = FIO_decompressSrcFile(ress, srcFileName); - if (result != 0) remove(dstFileName); if (fclose(ress.dstFile)) EXM_THROW(38, "Write error : cannot properly close %s", dstFileName); + if (result != 0) remove(dstFileName); return result; } @@ -742,7 +752,7 @@ int FIO_decompressFilename(const char* dstFileName, const char* srcFileName, int missingFiles = 0; dRess_t ress = FIO_createDResources(dictFileName); - missingFiles += FIO_decompressFile_extRess(ress, dstFileName, srcFileName); + missingFiles += FIO_decompressDstFile(ress, dstFileName, srcFileName); FIO_freeDResources(ress); return missingFiles; @@ -789,7 +799,7 @@ int FIO_decompressMultipleFilenames(const char** srcNamesTable, unsigned nbFiles memcpy(dstFileName, srcFileName, sfnSize - suffixSize); dstFileName[sfnSize-suffixSize] = '\0'; - missingFiles += FIO_decompressFile_extRess(ress, dstFileName, srcFileName); + missingFiles += FIO_decompressDstFile(ress, dstFileName, srcFileName); } free(dstFileName); } diff --git a/programs/fileio.h b/programs/fileio.h index 01e30834..4a4f3d22 100644 --- a/programs/fileio.h +++ b/programs/fileio.h @@ -50,6 +50,7 @@ void FIO_setMaxWLog(unsigned maxWLog); /**< if `maxWLog` == 0, no max enforc void FIO_setSparseWrite(unsigned sparse); /**< 0: no sparse; 1: disable on stdout; 2: always enabled */ void FIO_setDictIDFlag(unsigned dictIDFlag); void FIO_setChecksumFlag(unsigned checksumFlag); +void FIO_setRemoveSrcFile(unsigned flag); /*-************************************* diff --git a/programs/playTests.sh b/programs/playTests.sh index f51d28ea..3be4c777 100755 --- a/programs/playTests.sh +++ b/programs/playTests.sh @@ -66,6 +66,14 @@ $ZSTD -q tmp && die "overwrite check failed!" $ZSTD -q -f tmp $ZSTD -q --force tmp $ZSTD -df tmp && die "should have refused : wrong extension" +$ECHO "test : file removal" +$ZSTD -f --rm tmp +ls tmp && die "tmp should no longer be present" +$ZSTD -f -d --rm tmp.zst +ls tmp.zst && die "tmp.zst should no longer be present" +rm tmp +$ZSTD -f tmp && die "tmp not present : should have failed" +ls tmp.zst && die "tmp.zst should not be created" $ECHO "\n**** Pass-Through mode **** " diff --git a/programs/zstdcli.c b/programs/zstdcli.c index 75ce129f..b59c6ebd 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -132,23 +132,24 @@ static int usage_advanced(const char* programName) #ifdef UTIL_HAS_CREATEFILELIST DISPLAY( " -r : operate recursively on directories\n"); #endif + DISPLAY( "--rm : remove source files after successful de/compression \n"); #ifndef ZSTD_NOCOMPRESS DISPLAY( "--ultra : enable ultra modes (requires more memory to decompress)\n"); - DISPLAY( "--no-dictID:don't write dictID into header (dictionary compression)\n"); + DISPLAY( "--no-dictID : don't write dictID into header (dictionary compression)\n"); DISPLAY( "--check : enable integrity check\n"); #endif #ifndef ZSTD_NODECOMPRESS DISPLAY( "--test : test compressed file integrity \n"); - DISPLAY( "--[no-]sparse : sparse mode (default:enabled on file, disabled on stdout)\n"); + DISPLAY( "--[no-]sparse : sparse mode (default:enabled on file, disabled on stdout)\n"); #endif #ifndef ZSTD_NODICT DISPLAY( "\n"); DISPLAY( "Dictionary builder :\n"); - DISPLAY( "--train : create a dictionary from a training set of files \n"); - DISPLAY( " -o file: `file` is dictionary name (default: %s) \n", g_defaultDictName); - DISPLAY( "--maxdict:limit dictionary to specified size (default : %u) \n", g_defaultMaxDictSize); + DISPLAY( "--train ## : create a dictionary from a training set of files \n"); + DISPLAY( " -o file : `file` is dictionary name (default: %s) \n", g_defaultDictName); + DISPLAY( "--maxdict ## : limit dictionary to specified size (default : %u) \n", g_defaultMaxDictSize); DISPLAY( " -s# : dictionary selectivity level (default: %u)\n", g_defaultSelectivityLevel); - DISPLAY( "--dictID: force dictionary ID to specified value (default: random)\n"); + DISPLAY( "--dictID ## : force dictionary ID to specified value (default: random)\n"); #endif #ifndef ZSTD_NOBENCH DISPLAY( "\n"); @@ -264,6 +265,7 @@ int main(int argCount, const char** argv) if (!strcmp(argument, "--maxdict")) { nextArgumentIsMaxDict=1; continue; } if (!strcmp(argument, "--dictID")) { nextArgumentIsDictID=1; continue; } if (!strcmp(argument, "--keep")) { continue; } /* does nothing, since preserving input is default; for gzip/xz compatibility */ + if (!strcmp(argument, "--rm")) { FIO_setRemoveSrcFile(1); continue; } /* '-' means stdin/stdout */ if (!strcmp(argument, "-")){ diff --git a/projects/VS2010/fullbench/fullbench.vcxproj b/projects/VS2010/fullbench/fullbench.vcxproj index 47576535..0cd32d86 100644 --- a/projects/VS2010/fullbench/fullbench.vcxproj +++ b/projects/VS2010/fullbench/fullbench.vcxproj @@ -1,4 +1,4 @@ - + @@ -172,15 +172,11 @@ - - - - diff --git a/projects/VS2010/fullbench/fullbench.vcxproj.filters b/projects/VS2010/fullbench/fullbench.vcxproj.filters index 75d738c9..a81b2511 100644 --- a/projects/VS2010/fullbench/fullbench.vcxproj.filters +++ b/projects/VS2010/fullbench/fullbench.vcxproj.filters @@ -47,38 +47,29 @@ Source Files + + Source Files + Header Files - - Header Files - Header Files - - Header Files - Header Files Header Files - - Header Files - Header Files Header Files - - Header Files - Header Files @@ -88,5 +79,8 @@ Header Files + + Header Files + \ No newline at end of file diff --git a/projects/VS2010/fuzzer/fuzzer.vcxproj b/projects/VS2010/fuzzer/fuzzer.vcxproj index bdda5f3f..56052575 100644 --- a/projects/VS2010/fuzzer/fuzzer.vcxproj +++ b/projects/VS2010/fuzzer/fuzzer.vcxproj @@ -1,4 +1,4 @@ - + @@ -172,19 +172,14 @@ - - - - - diff --git a/projects/VS2010/fuzzer/fuzzer.vcxproj.filters b/projects/VS2010/fuzzer/fuzzer.vcxproj.filters index fbbc183a..5161ea0e 100644 --- a/projects/VS2010/fuzzer/fuzzer.vcxproj.filters +++ b/projects/VS2010/fuzzer/fuzzer.vcxproj.filters @@ -14,9 +14,6 @@ Source Files - - Source Files - Source Files @@ -44,11 +41,17 @@ Source Files + + Source Files + + + Source Files + + + Source Files + - - Header Files - Header Files @@ -58,35 +61,32 @@ Header Files - - Header Files - Header Files - - Header Files - Header Files Header Files - - Header Files - Header Files - - Header Files - Header Files Header Files + + Header Files + + + Header Files + + + Header Files + \ No newline at end of file diff --git a/projects/VS2010/zstd/zstd.vcxproj b/projects/VS2010/zstd/zstd.vcxproj index fbbb4d01..3c1e80b5 100644 --- a/projects/VS2010/zstd/zstd.vcxproj +++ b/projects/VS2010/zstd/zstd.vcxproj @@ -1,4 +1,4 @@ - + @@ -48,17 +48,12 @@ - - - - - diff --git a/projects/VS2010/zstd/zstd.vcxproj.filters b/projects/VS2010/zstd/zstd.vcxproj.filters index 49d93916..0e1e9279 100644 --- a/projects/VS2010/zstd/zstd.vcxproj.filters +++ b/projects/VS2010/zstd/zstd.vcxproj.filters @@ -1,4 +1,4 @@ - + @@ -17,9 +17,6 @@ Source Files - - Source Files - Source Files @@ -86,6 +83,9 @@ Source Files + + Source Files + @@ -94,9 +94,6 @@ Header Files - - Header Files - Header Files @@ -130,44 +127,32 @@ Header Files - - Header Files - Header Files Header Files - - Header Files - Header Files - - Header Files - Header Files - - Header Files - Header Files Header Files - - Header Files - Header Files Header Files + + Header Files + \ No newline at end of file diff --git a/projects/VS2010/zstdlib/zstdlib.vcxproj b/projects/VS2010/zstdlib/zstdlib.vcxproj index e0e3dbb6..70f80647 100644 --- a/projects/VS2010/zstdlib/zstdlib.vcxproj +++ b/projects/VS2010/zstdlib/zstdlib.vcxproj @@ -1,4 +1,4 @@ - + @@ -37,15 +37,11 @@ - - - - diff --git a/projects/VS2010/zstdlib/zstdlib.vcxproj.filters b/projects/VS2010/zstdlib/zstdlib.vcxproj.filters index e4bebb54..439e3cea 100644 --- a/projects/VS2010/zstdlib/zstdlib.vcxproj.filters +++ b/projects/VS2010/zstdlib/zstdlib.vcxproj.filters @@ -47,11 +47,11 @@ Source Files + + Source Files + - - Header Files - Header Files @@ -67,35 +67,29 @@ Header Files - - Header Files - Header Files - - Header Files - Header Files - - Header Files - Header Files Header Files - - Header Files - Header Files Header Files + + Header Files + + + + \ No newline at end of file diff --git a/tests/.gitignore b/tests/.gitignore index bdb2cbdf..4d14ba0f 100644 --- a/tests/.gitignore +++ b/tests/.gitignore @@ -1,3 +1,4 @@ # Tmp test directory zstdtest - +speedTest +versionsTest diff --git a/tests/Makefile b/tests/Makefile index 420b81e9..53017853 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -27,6 +27,8 @@ PYTHON?= python3 TESTDIR := zstdtest +.PHONY: default all clean versionsTest + default: all all: versionsTest