From d3debc39ea6c07376b2ca61f74f69ed4130c41cf Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 18 Feb 2016 14:49:24 +0100 Subject: [PATCH 001/247] Visual : `zstd` supports for `*` wildcard character on command line Visual : `zstd` 32-bits version compatible with Windows XP --- README.md | 4 ++-- visual/2013/zstd/zstd.vcxproj | 13 ++++++++----- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index b84d8a8f..bf407179 100644 --- a/README.md +++ b/README.md @@ -106,7 +106,7 @@ size_t dictSize = ZDICT_trainFromBuffer(dictBuffer, dictBufferCapacity, #include "zstd.h" (...) ZSTD_CCtx* context = ZSTD_createCCtx(); -size_t compressedSize = ZSTD_compress_usingDict(context, dst, dstCapacity, src, srcSize, dict, dictSize, compressionLevel); +size_t compressedSize = ZSTD_compress_usingDict(context, dst, dstCapacity, src, srcSize, dictBuffer, dictSize, compressionLevel); ``` 3) Decompress with dictionary @@ -115,7 +115,7 @@ size_t compressedSize = ZSTD_compress_usingDict(context, dst, dstCapacity, src, #include "zstd.h" (...) ZSTD_DCtx* context = ZSTD_createDCtx(); -size_t regeneratedSize = ZSTD_decompress_usingDict(context, dst, dstCapacity, cSrc, cSrcSize, dict, dictSize); +size_t regeneratedSize = ZSTD_decompress_usingDict(context, dst, dstCapacity, cSrc, cSrcSize, dictBuffer, dictSize); ``` diff --git a/visual/2013/zstd/zstd.vcxproj b/visual/2013/zstd/zstd.vcxproj index 45319769..7c1c8852 100644 --- a/visual/2013/zstd/zstd.vcxproj +++ b/visual/2013/zstd/zstd.vcxproj @@ -87,7 +87,7 @@ Application false true - v120 + v120_xp Unicode @@ -126,12 +126,14 @@ false $(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath); - true + false + $(SolutionDir)$(Configuration)\ false $(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath); false + $(SolutionDir)$(Configuration)\ @@ -170,19 +172,19 @@ Level4 - MaxSpeed + Full true true WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) - true + false true - /analyze:stacksize19000 %(AdditionalOptions) Console true true true + setargv.obj; kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) @@ -203,6 +205,7 @@ true true true + setargv.obj;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) From ba7ba5b6596a26ada7e45c0e7e6d9d01be5e456f Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 18 Feb 2016 19:15:19 +0100 Subject: [PATCH 002/247] Visual : 32-bit build release uses static linking (/MT) --- visual/2013/zstd/zstd.vcxproj | 1 + 1 file changed, 1 insertion(+) diff --git a/visual/2013/zstd/zstd.vcxproj b/visual/2013/zstd/zstd.vcxproj index 7c1c8852..99d308b9 100644 --- a/visual/2013/zstd/zstd.vcxproj +++ b/visual/2013/zstd/zstd.vcxproj @@ -178,6 +178,7 @@ WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) false true + MultiThreaded Console From cc52a97214f53fe2605b3999686e0402cfcbf2b4 Mon Sep 17 00:00:00 2001 From: inikep Date: Fri, 19 Feb 2016 10:09:35 +0100 Subject: [PATCH 003/247] added hashLog3 --- lib/zstd_compress.c | 235 ++++++++++++++++++++++---------------------- lib/zstd_static.h | 3 + 2 files changed, 123 insertions(+), 115 deletions(-) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index 7bea6abe..26898c59 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -74,24 +74,6 @@ size_t ZSTD_compressBound(size_t srcSize) { return FSE_compressBound(srcSize) + /*-************************************* * Sequence storage ***************************************/ -/** ZSTD_resetFreqs() : for opt variants */ -static void ZSTD_resetFreqs(seqStore_t* ssPtr) -{ - unsigned u; - ssPtr->matchLengthSum = 512; // (1<litLengthSum = 256; // (1<litSum = (1<offCodeSum = (1<litFreq[u] = 1; - for (u=0; u<=MaxLL; u++) - ssPtr->litLengthFreq[u] = 1; - for (u=0; u<=MaxML; u++) - ssPtr->matchLengthFreq[u] = 1; - for (u=0; u<=MaxOff; u++) - ssPtr->offCodeFreq[u] = 1; -} static void ZSTD_resetSeqStore(seqStore_t* ssPtr) { @@ -114,6 +96,7 @@ struct ZSTD_CCtx_s U32 dictLimit; /* below that point, need extDict */ U32 lowLimit; /* below that point, no more data */ U32 nextToUpdate; /* index from which to continue dictionary update */ + U32 nextToUpdate3; /* index from which to continue dictionary update */ U32 loadedDictEnd; U32 stage; ZSTD_parameters params; @@ -125,6 +108,7 @@ struct ZSTD_CCtx_s seqStore_t seqStore; /* sequences storage ptrs */ U32* hashTable; + U32* hashTable3; U32* contentTable; HUF_CElt* hufTable; U32 flagStaticTables; @@ -167,6 +151,7 @@ void ZSTD_validateParams(ZSTD_parameters* params) CLAMP(params->windowLog, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX); CLAMP(params->contentLog, ZSTD_CONTENTLOG_MIN, ZSTD_CONTENTLOG_MAX); CLAMP(params->hashLog, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX); + CLAMP(params->hashLog3, ZSTD_HASHLOG3_MIN, ZSTD_HASHLOG3_MAX); CLAMP(params->searchLog, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX); CLAMP(params->searchLength, ZSTD_SEARCHLENGTH_MIN, ZSTD_SEARCHLENGTH_MAX); CLAMP(params->targetLength, ZSTD_TARGETLENGTH_MIN, ZSTD_TARGETLENGTH_MAX); @@ -188,7 +173,7 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc, const size_t blockSize = MIN(BLOCKSIZE, (size_t)1 << params.windowLog); /* reserve table memory */ const U32 contentLog = (params.strategy == ZSTD_fast) ? 1 : params.contentLog; - const size_t tableSpace = ((1 << contentLog) + (1 << params.hashLog)) * sizeof(U32); + const size_t tableSpace = ((1 << contentLog) + (1 << params.hashLog) + (1 << params.hashLog3)) * sizeof(U32); const size_t neededSpace = tableSpace + (256*sizeof(U32)) + (3*blockSize) + ((1<workSpaceSize < neededSpace) { free(zc->workSpace); @@ -197,7 +182,8 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc, zc->workSpaceSize = neededSpace; } memset(zc->workSpace, 0, tableSpace ); /* reset only tables */ - zc->hashTable = (U32*)(zc->workSpace); + zc->hashTable3 = (U32*)(zc->workSpace); + zc->hashTable = zc->hashTable3 + ((size_t)1 << params.hashLog3); zc->contentTable = zc->hashTable + ((size_t)1 << params.hashLog); zc->seqStore.buffer = zc->contentTable + ((size_t)1 << contentLog); zc->hufTable = (HUF_CElt*)zc->seqStore.buffer; @@ -225,6 +211,7 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc, zc->seqStore.matchLengthStart = zc->seqStore.litLengthStart + (blockSize>>2); zc->seqStore.dumpsStart = zc->seqStore.matchLengthStart + (blockSize>>2); // zc->seqStore.XXX = zc->seqStore.dumpsStart + (blockSize>>4); + zc->seqStore.litLengthSum = 0; zc->hbSize = 0; zc->stage = 0; @@ -248,7 +235,7 @@ size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx) ZSTD_resetCCtx_advanced(dstCCtx, srcCCtx->params); /* copy tables */ - memcpy(dstCCtx->hashTable, srcCCtx->hashTable, tableSpace); + memcpy(dstCCtx->workSpace, srcCCtx->workSpace, tableSpace); /* copy frame header */ dstCCtx->hbSize = srcCCtx->hbSize; @@ -555,6 +542,11 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, op += cSize; } +#if ZSTD_OPT_DEBUG >= 5 + if (nbSeq >= 32768) + printf("ERROR: nbSeq=%d\n", (int)nbSeq); +#endif + /* Sequences Header */ if ((oend-op) < MIN_SEQUENCES_SIZE) return ERROR(dstSize_tooSmall); if (nbSeq < 128) *op++ = (BYTE)nbSeq; @@ -732,7 +724,7 @@ MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const B if (g_start==NULL) g_start = literals; //if (literals - g_start == 8695) printf("pos %6u : %3u literals & match %3u bytes at distance %6u \n", - (U32)(literals - g_start), (U32)litLength, (U32)matchCode+4, (U32)offsetCode); + (U32)(literals - g_start), (U32)litLength, (U32)matchCode+MINMATCH, (U32)offsetCode); #endif /* copy Literals */ @@ -875,6 +867,10 @@ static size_t ZSTD_count_2segments(const BYTE* ip, const BYTE* match, const BYTE /*-************************************* * Hashes ***************************************/ +static const U32 prime3bytes = 506832829U; +static U32 ZSTD_hash3(U32 u, U32 h) { return ((u << (32-24)) * prime3bytes) >> (32-h) ; } +static size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_read32(ptr), h); } + static const U32 prime4bytes = 2654435761U; static U32 ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; } static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_read32(ptr), h); } @@ -1414,6 +1410,10 @@ static size_t ZSTD_BtFindBestMatch_selectMLS_extDict ( FORCE_INLINE U32 ZSTD_insertAndFindFirstIndex (ZSTD_CCtx* zc, const BYTE* ip, U32 mls) { +#if MINMATCH == 3 + U32* const hashTable3 = zc->hashTable3; + const U32 hashLog3 = zc->params.hashLog3; +#endif U32* const hashTable = zc->hashTable; const U32 hashLog = zc->params.hashLog; U32* const chainTable = zc->contentTable; @@ -1426,6 +1426,9 @@ U32 ZSTD_insertAndFindFirstIndex (ZSTD_CCtx* zc, const BYTE* ip, U32 mls) size_t h = ZSTD_hashPtr(base+idx, hashLog, mls); NEXT_IN_CHAIN(idx, chainMask) = hashTable[h]; hashTable[h] = idx; +#if MINMATCH == 3 + hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx; +#endif idx++; } @@ -1433,6 +1436,8 @@ U32 ZSTD_insertAndFindFirstIndex (ZSTD_CCtx* zc, const BYTE* ip, U32 mls) return hashTable[ZSTD_hashPtr(ip, hashLog, mls)]; } +#include "zstd_opt.h" + FORCE_INLINE /* inlining is important to hardwire a hot branch (template emulation) */ size_t ZSTD_HcFindBestMatch_generic ( @@ -1651,7 +1656,6 @@ _storeSequence: } } -#include "zstd_opt.h" static void ZSTD_compressBlock_opt_bt(ZSTD_CCtx* ctx, const void* src, size_t srcSize) { @@ -2271,107 +2275,108 @@ size_t ZSTD_compress(void* dst, size_t maxDstSize, const void* src, size_t srcSi #define ZSTD_MAX_CLEVEL 21 unsigned ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; } + static const ZSTD_parameters ZSTD_defaultParameters[4][ZSTD_MAX_CLEVEL+1] = { { /* "default" */ - /* l, W, C, H, S, L, SL, strat */ - { 0, 0, 0, 0, 0, 0, 0, ZSTD_fast }, /* level 0 - never used */ - { 0, 19, 13, 14, 1, 7, 4, ZSTD_fast }, /* level 1 */ - { 0, 19, 15, 16, 1, 6, 4, ZSTD_fast }, /* level 2 */ - { 0, 20, 18, 20, 1, 6, 4, ZSTD_fast }, /* level 3 */ - { 0, 21, 19, 21, 1, 6, 4, ZSTD_fast }, /* level 4 */ - { 0, 20, 14, 18, 3, 5, 4, ZSTD_greedy }, /* level 5 */ - { 0, 20, 18, 19, 3, 5, 4, ZSTD_greedy }, /* level 6 */ - { 0, 21, 17, 20, 3, 5, 4, ZSTD_lazy }, /* level 7 */ - { 0, 21, 19, 20, 3, 5, 4, ZSTD_lazy }, /* level 8 */ - { 0, 21, 20, 20, 3, 5, 4, ZSTD_lazy2 }, /* level 9 */ - { 0, 21, 19, 21, 4, 5, 4, ZSTD_lazy2 }, /* level 10 */ - { 0, 22, 20, 22, 4, 5, 4, ZSTD_lazy2 }, /* level 11 */ - { 0, 22, 20, 22, 5, 5, 4, ZSTD_lazy2 }, /* level 12 */ - { 0, 22, 21, 22, 5, 5, 4, ZSTD_lazy2 }, /* level 13 */ - { 0, 22, 22, 23, 5, 5, 4, ZSTD_lazy2 }, /* level 14 */ - { 0, 23, 23, 23, 5, 5, 4, ZSTD_lazy2 }, /* level 15 */ - { 0, 23, 22, 22, 5, 5, 4, ZSTD_btlazy2 }, /* level 16 */ - { 0, 24, 24, 23, 4, 5, 4, ZSTD_btlazy2 }, /* level 17 */ - { 0, 24, 24, 23, 5, 5, 30, ZSTD_btopt }, /* level 18 */ - { 0, 25, 25, 24, 5, 4, 40, ZSTD_btopt }, /* level 19 */ - { 0, 26, 26, 25, 8, 4,256, ZSTD_btopt }, /* level 20 */ - { 0, 26, 27, 25, 10, 4,256, ZSTD_btopt }, /* level 21 */ + /* l, W, C, H, H3, S, L, SL, strat */ + { 0, 0, 0, 0, 0, 0, 0, 0, ZSTD_fast }, /* level 0 - never used */ + { 0, 19, 13, 14, 0, 1, 7, 4, ZSTD_fast }, /* level 1 */ + { 0, 19, 15, 16, 0, 1, 6, 4, ZSTD_fast }, /* level 2 */ + { 0, 20, 18, 20, 0, 1, 6, 4, ZSTD_fast }, /* level 3 */ + { 0, 21, 19, 21, 0, 1, 6, 4, ZSTD_fast }, /* level 4 */ + { 0, 20, 14, 18, 0, 3, 5, 4, ZSTD_greedy }, /* level 5 */ + { 0, 20, 18, 19, 0, 3, 5, 4, ZSTD_greedy }, /* level 6 */ + { 0, 21, 17, 20, 0, 3, 5, 4, ZSTD_lazy }, /* level 7 */ + { 0, 21, 19, 20, 0, 3, 5, 4, ZSTD_lazy }, /* level 8 */ + { 0, 21, 20, 20, 0, 3, 5, 4, ZSTD_lazy2 }, /* level 9 */ + { 0, 21, 19, 21, 0, 4, 5, 4, ZSTD_lazy2 }, /* level 10 */ + { 0, 22, 20, 22, 0, 4, 5, 4, ZSTD_lazy2 }, /* level 11 */ + { 0, 22, 20, 22, 0, 5, 5, 4, ZSTD_lazy2 }, /* level 12 */ + { 0, 22, 21, 22, 0, 5, 5, 4, ZSTD_lazy2 }, /* level 13 */ + { 0, 22, 22, 23, 0, 5, 5, 4, ZSTD_lazy2 }, /* level 14 */ + { 0, 23, 23, 23, 0, 5, 5, 4, ZSTD_lazy2 }, /* level 15 */ + { 0, 23, 22, 22, 0, 5, 5, 4, ZSTD_btlazy2 }, /* level 16 */ + { 0, 24, 24, 23, 0, 4, 5, 4, ZSTD_btlazy2 }, /* level 17 */ + { 0, 24, 24, 23, 16, 5, 5, 30, ZSTD_btopt }, /* level 18 */ + { 0, 25, 25, 24, 16, 5, 4, 40, ZSTD_btopt }, /* level 19 */ + { 0, 26, 26, 25, 16, 8, 4,256, ZSTD_btopt }, /* level 20 */ + { 0, 26, 27, 25, 24, 10, 4,256, ZSTD_btopt }, /* level 21 */ }, { /* for srcSize <= 256 KB */ - /* l, W, C, H, S, L, T, strat */ - { 0, 0, 0, 0, 0, 0, 0, ZSTD_fast }, /* level 0 */ - { 0, 18, 14, 15, 1, 6, 4, ZSTD_fast }, /* level 1 */ - { 0, 18, 14, 16, 1, 5, 4, ZSTD_fast }, /* level 2 */ - { 0, 18, 14, 17, 1, 5, 4, ZSTD_fast }, /* level 3.*/ - { 0, 18, 14, 15, 4, 4, 4, ZSTD_greedy }, /* level 4 */ - { 0, 18, 16, 17, 4, 4, 4, ZSTD_greedy }, /* level 5 */ - { 0, 18, 17, 17, 3, 4, 4, ZSTD_lazy }, /* level 6 */ - { 0, 18, 17, 17, 4, 4, 4, ZSTD_lazy }, /* level 7 */ - { 0, 18, 17, 17, 4, 4, 4, ZSTD_lazy2 }, /* level 8 */ - { 0, 18, 17, 17, 5, 4, 4, ZSTD_lazy2 }, /* level 9 */ - { 0, 18, 17, 17, 6, 4, 4, ZSTD_lazy2 }, /* level 10 */ - { 0, 18, 17, 17, 7, 4, 4, ZSTD_lazy2 }, /* level 11 */ - { 0, 18, 18, 17, 4, 4, 4, ZSTD_btlazy2 }, /* level 12 */ - { 0, 18, 19, 17, 7, 4, 4, ZSTD_btlazy2 }, /* level 13.*/ - { 0, 18, 17, 19, 8, 4, 24, ZSTD_btopt }, /* level 14.*/ - { 0, 18, 19, 19, 8, 4, 48, ZSTD_btopt }, /* level 15.*/ - { 0, 18, 19, 18, 9, 4,128, ZSTD_btopt }, /* level 16.*/ - { 0, 18, 19, 18, 9, 4,192, ZSTD_btopt }, /* level 17.*/ - { 0, 18, 19, 18, 9, 4,256, ZSTD_btopt }, /* level 18.*/ - { 0, 18, 19, 18, 10, 4,256, ZSTD_btopt }, /* level 19.*/ - { 0, 18, 19, 18, 11, 4,256, ZSTD_btopt }, /* level 20.*/ - { 0, 18, 19, 18, 12, 4,256, ZSTD_btopt }, /* level 21.*/ + /* l, W, C, H, H3, S, L, T, strat */ + { 0, 0, 0, 0, 0, 0, 0, 0, ZSTD_fast }, /* level 0 */ + { 0, 18, 14, 15, 0, 1, 6, 4, ZSTD_fast }, /* level 1 */ + { 0, 18, 14, 16, 0, 1, 5, 4, ZSTD_fast }, /* level 2 */ + { 0, 18, 14, 17, 0, 1, 5, 4, ZSTD_fast }, /* level 3.*/ + { 0, 18, 14, 15, 0, 4, 4, 4, ZSTD_greedy }, /* level 4 */ + { 0, 18, 16, 17, 0, 4, 4, 4, ZSTD_greedy }, /* level 5 */ + { 0, 18, 17, 17, 0, 3, 4, 4, ZSTD_lazy }, /* level 6 */ + { 0, 18, 17, 17, 0, 4, 4, 4, ZSTD_lazy }, /* level 7 */ + { 0, 18, 17, 17, 0, 4, 4, 4, ZSTD_lazy2 }, /* level 8 */ + { 0, 18, 17, 17, 0, 5, 4, 4, ZSTD_lazy2 }, /* level 9 */ + { 0, 18, 17, 17, 0, 6, 4, 4, ZSTD_lazy2 }, /* level 10 */ + { 0, 18, 17, 17, 0, 7, 4, 4, ZSTD_lazy2 }, /* level 11 */ + { 0, 18, 18, 17, 0, 4, 4, 4, ZSTD_btlazy2 }, /* level 12 */ + { 0, 18, 19, 17, 0, 7, 4, 4, ZSTD_btlazy2 }, /* level 13.*/ + { 0, 18, 17, 19, 0, 8, 4, 24, ZSTD_btopt }, /* level 14.*/ + { 0, 18, 19, 19, 0, 8, 4, 48, ZSTD_btopt }, /* level 15.*/ + { 0, 18, 19, 18, 0, 9, 4,128, ZSTD_btopt }, /* level 16.*/ + { 0, 18, 19, 18, 0, 9, 4,192, ZSTD_btopt }, /* level 17.*/ + { 0, 18, 19, 18, 0, 9, 4,256, ZSTD_btopt }, /* level 18.*/ + { 0, 18, 19, 18, 0, 10, 4,256, ZSTD_btopt }, /* level 19.*/ + { 0, 18, 19, 18, 0, 11, 4,256, ZSTD_btopt }, /* level 20.*/ + { 0, 18, 19, 18, 0, 12, 4,256, ZSTD_btopt }, /* level 21.*/ }, { /* for srcSize <= 128 KB */ - /* l, W, C, H, S, L, T, strat */ - { 0, 0, 0, 0, 0, 0, 0, ZSTD_fast }, /* level 0 - never used */ - { 0, 17, 12, 13, 1, 6, 4, ZSTD_fast }, /* level 1 */ - { 0, 17, 13, 16, 1, 5, 4, ZSTD_fast }, /* level 2 */ - { 0, 17, 13, 14, 2, 5, 4, ZSTD_greedy }, /* level 3 */ - { 0, 17, 13, 15, 3, 4, 4, ZSTD_greedy }, /* level 4 */ - { 0, 17, 15, 17, 4, 4, 4, ZSTD_greedy }, /* level 5 */ - { 0, 17, 16, 17, 3, 4, 4, ZSTD_lazy }, /* level 6 */ - { 0, 17, 16, 17, 4, 4, 4, ZSTD_lazy }, /* level 7 */ - { 0, 17, 17, 16, 4, 4, 4, ZSTD_lazy2 }, /* level 8 */ - { 0, 17, 17, 16, 5, 4, 4, ZSTD_lazy2 }, /* level 9 */ - { 0, 17, 17, 16, 6, 4, 4, ZSTD_lazy2 }, /* level 10 */ - { 0, 17, 17, 17, 7, 4, 4, ZSTD_lazy2 }, /* level 11 */ - { 0, 17, 17, 17, 8, 4, 4, ZSTD_lazy2 }, /* level 12 */ - { 0, 17, 17, 17, 9, 4, 4, ZSTD_lazy2 }, /* level 13 */ - { 0, 17, 18, 16, 5, 4, 20, ZSTD_btopt }, /* level 14 */ - { 0, 17, 18, 16, 9, 4, 48, ZSTD_btopt }, /* level 15 */ - { 0, 17, 18, 17, 7, 4,128, ZSTD_btopt }, /* level 16 */ - { 0, 17, 18, 17, 8, 4,128, ZSTD_btopt }, /* level 17 */ - { 0, 17, 18, 17, 8, 4,256, ZSTD_btopt }, /* level 18 */ - { 0, 17, 18, 17, 9, 4,256, ZSTD_btopt }, /* level 19 */ - { 0, 17, 18, 17, 10, 4,512, ZSTD_btopt }, /* level 20 */ - { 0, 17, 18, 17, 11, 4,512, ZSTD_btopt }, /* level 21 */ + /* l, W, C, H, H3, S, L, T, strat */ + { 0, 0, 0, 0, 0, 0, 0, 0, ZSTD_fast }, /* level 0 - never used */ + { 0, 17, 12, 13, 0, 1, 6, 4, ZSTD_fast }, /* level 1 */ + { 0, 17, 13, 16, 0, 1, 5, 4, ZSTD_fast }, /* level 2 */ + { 0, 17, 13, 14, 0, 2, 5, 4, ZSTD_greedy }, /* level 3 */ + { 0, 17, 13, 15, 0, 3, 4, 4, ZSTD_greedy }, /* level 4 */ + { 0, 17, 15, 17, 0, 4, 4, 4, ZSTD_greedy }, /* level 5 */ + { 0, 17, 16, 17, 0, 3, 4, 4, ZSTD_lazy }, /* level 6 */ + { 0, 17, 16, 17, 0, 4, 4, 4, ZSTD_lazy }, /* level 7 */ + { 0, 17, 17, 16, 0, 4, 4, 4, ZSTD_lazy2 }, /* level 8 */ + { 0, 17, 17, 16, 0, 5, 4, 4, ZSTD_lazy2 }, /* level 9 */ + { 0, 17, 17, 16, 0, 6, 4, 4, ZSTD_lazy2 }, /* level 10 */ + { 0, 17, 17, 17, 0, 7, 4, 4, ZSTD_lazy2 }, /* level 11 */ + { 0, 17, 17, 17, 0, 8, 4, 4, ZSTD_lazy2 }, /* level 12 */ + { 0, 17, 17, 17, 0, 9, 4, 4, ZSTD_lazy2 }, /* level 13 */ + { 0, 17, 18, 16, 0, 5, 4, 20, ZSTD_btopt }, /* level 14 */ + { 0, 17, 18, 16, 0, 9, 4, 48, ZSTD_btopt }, /* level 15 */ + { 0, 17, 18, 17, 0, 7, 4,128, ZSTD_btopt }, /* level 16 */ + { 0, 17, 18, 17, 0, 8, 4,128, ZSTD_btopt }, /* level 17 */ + { 0, 17, 18, 17, 0, 8, 4,256, ZSTD_btopt }, /* level 18 */ + { 0, 17, 18, 17, 0, 9, 4,256, ZSTD_btopt }, /* level 19 */ + { 0, 17, 18, 17, 0, 10, 4,512, ZSTD_btopt }, /* level 20 */ + { 0, 17, 18, 17, 0, 11, 4,512, ZSTD_btopt }, /* level 21 */ }, { /* for srcSize <= 16 KB */ - /* l, W, C, H, S, L, T, strat */ - { 0, 0, 0, 0, 0, 0, 0, ZSTD_fast }, /* level 0 -- never used */ - { 0, 14, 14, 14, 1, 4, 4, ZSTD_fast }, /* level 1 */ - { 0, 14, 14, 15, 1, 4, 4, ZSTD_fast }, /* level 2 */ - { 0, 14, 13, 15, 4, 4, 4, ZSTD_greedy }, /* level 3 */ - { 0, 14, 14, 15, 3, 4, 4, ZSTD_lazy }, /* level 4 */ - { 0, 14, 14, 14, 6, 4, 4, ZSTD_lazy }, /* level 5 */ - { 0, 14, 14, 14, 5, 4, 4, ZSTD_lazy2 }, /* level 6 */ - { 0, 14, 14, 14, 7, 4, 4, ZSTD_lazy2 }, /* level 7 */ - { 0, 14, 14, 14, 8, 4, 4, ZSTD_lazy2 }, /* level 8 */ - { 0, 14, 14, 14, 9, 4, 4, ZSTD_lazy2 }, /* level 9 */ - { 0, 14, 14, 14, 10, 4, 4, ZSTD_lazy2 }, /* level 10 */ - { 0, 14, 14, 14, 11, 4, 4, ZSTD_lazy2 }, /* level 11 */ - { 0, 14, 15, 15, 12, 4, 32, ZSTD_btopt }, /* level 12 */ - { 0, 14, 15, 15, 12, 4, 64, ZSTD_btopt }, /* level 13 */ - { 0, 14, 15, 15, 12, 4, 96, ZSTD_btopt }, /* level 14 */ - { 0, 14, 15, 15, 12, 4,128, ZSTD_btopt }, /* level 15 */ - { 0, 14, 15, 15, 12, 4,256, ZSTD_btopt }, /* level 16 */ - { 0, 14, 15, 15, 13, 4,256, ZSTD_btopt }, /* level 17 */ - { 0, 14, 15, 15, 14, 4,256, ZSTD_btopt }, /* level 18 */ - { 0, 14, 15, 15, 15, 4,256, ZSTD_btopt }, /* level 19 */ - { 0, 14, 15, 15, 16, 4,256, ZSTD_btopt }, /* level 20 */ - { 0, 14, 15, 15, 17, 4,256, ZSTD_btopt }, /* level 21 */ + /* l, W, C, H, H3, S, L, T, strat */ + { 0, 0, 0, 0, 0, 0, 0, 0, ZSTD_fast }, /* level 0 -- never used */ + { 0, 14, 14, 14, 0, 1, 4, 4, ZSTD_fast }, /* level 1 */ + { 0, 14, 14, 15, 0, 1, 4, 4, ZSTD_fast }, /* level 2 */ + { 0, 14, 13, 15, 0, 4, 4, 4, ZSTD_greedy }, /* level 3 */ + { 0, 14, 14, 15, 0, 3, 4, 4, ZSTD_lazy }, /* level 4 */ + { 0, 14, 14, 14, 0, 6, 4, 4, ZSTD_lazy }, /* level 5 */ + { 0, 14, 14, 14, 0, 5, 4, 4, ZSTD_lazy2 }, /* level 6 */ + { 0, 14, 14, 14, 0, 7, 4, 4, ZSTD_lazy2 }, /* level 7 */ + { 0, 14, 14, 14, 0, 8, 4, 4, ZSTD_lazy2 }, /* level 8 */ + { 0, 14, 14, 14, 0, 9, 4, 4, ZSTD_lazy2 }, /* level 9 */ + { 0, 14, 14, 14, 0, 10, 4, 4, ZSTD_lazy2 }, /* level 10 */ + { 0, 14, 14, 14, 0, 11, 4, 4, ZSTD_lazy2 }, /* level 11 */ + { 0, 14, 15, 15, 0, 12, 4, 32, ZSTD_btopt }, /* level 12 */ + { 0, 14, 15, 15, 0, 12, 4, 64, ZSTD_btopt }, /* level 13 */ + { 0, 14, 15, 15, 0, 12, 4, 96, ZSTD_btopt }, /* level 14 */ + { 0, 14, 15, 15, 0, 12, 4,128, ZSTD_btopt }, /* level 15 */ + { 0, 14, 15, 15, 0, 12, 4,256, ZSTD_btopt }, /* level 16 */ + { 0, 14, 15, 15, 0, 13, 4,256, ZSTD_btopt }, /* level 17 */ + { 0, 14, 15, 15, 0, 14, 4,256, ZSTD_btopt }, /* level 18 */ + { 0, 14, 15, 15, 0, 15, 4,256, ZSTD_btopt }, /* level 19 */ + { 0, 14, 15, 15, 0, 16, 4,256, ZSTD_btopt }, /* level 20 */ + { 0, 14, 15, 15, 0, 17, 4,256, ZSTD_btopt }, /* level 21 */ }, }; diff --git a/lib/zstd_static.h b/lib/zstd_static.h index 61216535..6a92a458 100644 --- a/lib/zstd_static.h +++ b/lib/zstd_static.h @@ -64,6 +64,8 @@ extern "C" { #define ZSTD_CONTENTLOG_MIN 4 #define ZSTD_HASHLOG_MAX 28 #define ZSTD_HASHLOG_MIN 12 +#define ZSTD_HASHLOG3_MAX 24 +#define ZSTD_HASHLOG3_MIN 12 #define ZSTD_SEARCHLOG_MAX (ZSTD_CONTENTLOG_MAX-1) #define ZSTD_SEARCHLOG_MIN 1 #define ZSTD_SEARCHLENGTH_MAX 7 @@ -80,6 +82,7 @@ typedef struct U32 windowLog; /* largest match distance : larger == more compression, more memory needed during decompression */ U32 contentLog; /* full search segment : larger == more compression, slower, more memory (useless for fast) */ U32 hashLog; /* dispatch table : larger == faster, more memory */ + U32 hashLog3; /* dispatch table : larger == faster, more memory */ U32 searchLog; /* nb of searches : larger == more compression, slower */ U32 searchLength; /* match length searched : larger == faster decompression, sometimes less compression */ U32 targetLength; /* acceptable match size for optimal parser (only) : larger == more compression, slower */ From c3a9a9ca5636fd4ed480a04d39439c5200b74a30 Mon Sep 17 00:00:00 2001 From: inikep Date: Fri, 19 Feb 2016 11:05:25 +0100 Subject: [PATCH 004/247] ZSTD_rescaleFreqs and removed minml --- lib/zstd_internal.h | 5 + lib/zstd_opt.h | 377 +++++++++++++++++++++++++++++--------------- 2 files changed, 253 insertions(+), 129 deletions(-) diff --git a/lib/zstd_internal.h b/lib/zstd_internal.h index 26fc8578..3d60e87b 100644 --- a/lib/zstd_internal.h +++ b/lib/zstd_internal.h @@ -50,6 +50,11 @@ /*-************************************* * Common constants ***************************************/ +#define ZSTD_OPT_DEBUG 0 // 1 = tableID=0; 5 = check encoded sequences; 9 = full logs +#if ZSTD_OPT_DEBUG > 0 + #include /* for debug */ +#endif + #define ZSTD_DICT_MAGIC 0xEC30A435 #define KB *(1 <<10) diff --git a/lib/zstd_opt.h b/lib/zstd_opt.h index ec9a2a15..ba970c8f 100644 --- a/lib/zstd_opt.h +++ b/lib/zstd_opt.h @@ -33,9 +33,6 @@ /* Note : this file is intended to be included within zstd_compress.c */ -/*- Dependencies -*/ -#include /* for debug */ - /*- Local types -*/ typedef struct { @@ -55,13 +52,13 @@ typedef struct { /*- Constants -*/ -#define ZSTD_OPT_NUM (1<<12) -#define ZSTD_FREQ_THRESHOLD (256) +#define ZSTD_OPT_NUM (1<<12) +#define ZSTD_FREQ_START 1 +#define ZSTD_FREQ_STEP 1 +#define ZSTD_FREQ_DIV 6 /*- Debug -*/ -#define ZSTD_OPT_DEBUG 0 // 1 = tableID=0; 5 = check encoded sequences - -#if defined(ZSTD_OPT_DEBUG) && ZSTD_OPT_DEBUG>=1 +#if defined(ZSTD_OPT_DEBUG) && ZSTD_OPT_DEBUG>=9 #define ZSTD_LOG_PARSER(...) printf(__VA_ARGS__) #define ZSTD_LOG_ENCODE(...) printf(__VA_ARGS__) #define ZSTD_LOG_TRY_PRICE(...) printf(__VA_ARGS__) @@ -72,11 +69,18 @@ typedef struct { #endif -FORCE_INLINE U32 ZSTD_getLiteralPriceReal(seqStore_t* seqStorePtr, U32 litLength, const BYTE* literals) +#if MINMATCH == 3 + #define MEM_readMINMATCH(ptr) ((U32)(MEM_read32(ptr)<<8)) +#else + #define MEM_readMINMATCH(ptr) (U32)(MEM_read32(ptr)) +#endif + + +FORCE_INLINE U32 ZSTD_getLiteralPrice(seqStore_t* seqStorePtr, U32 litLength, const BYTE* literals) { U32 price, freq, u; - if (!litLength) return 1; /* special case */ + if (!litLength) return 0; /* special case */ /* literals */ price = litLength * ZSTD_highbit(seqStorePtr->litSum); @@ -84,7 +88,7 @@ FORCE_INLINE U32 ZSTD_getLiteralPriceReal(seqStore_t* seqStorePtr, U32 litLength price -= ZSTD_highbit(seqStorePtr->litFreq[literals[u]]); /* literal Length */ - price += ((litLength >= MaxLL)*8) + ((litLength >= 255+MaxLL)*16) + ((litLength>=(1<<15))*8); + price += ((litLength >= MaxLL)<<3) + ((litLength >= 255+MaxLL)<<4) + ((litLength>=(1<<15))<<3); if (litLength >= MaxLL) litLength = MaxLL; freq = seqStorePtr->litLengthFreq[litLength]; price += ZSTD_highbit(seqStorePtr->litLengthSum) - ZSTD_highbit(freq); @@ -93,37 +97,65 @@ FORCE_INLINE U32 ZSTD_getLiteralPriceReal(seqStore_t* seqStorePtr, U32 litLength } -FORCE_INLINE U32 ZSTD_getLiteralPrice(seqStore_t* seqStorePtr, U32 litLength, const BYTE* literals) -{ - if (seqStorePtr->litSum > ZSTD_FREQ_THRESHOLD) - return ZSTD_getLiteralPriceReal(seqStorePtr, litLength, literals); - /* backup eval */ - return 1 + (litLength<<3); -} - - -FORCE_INLINE U32 ZSTD_getMatchPriceReal(seqStore_t* seqStorePtr, U32 offset, U32 matchLength) +FORCE_INLINE U32 ZSTD_getPrice(seqStore_t* seqStorePtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength) { /* offset */ BYTE offCode = offset ? (BYTE)ZSTD_highbit(offset) + 1 : 0; - U32 price = ZSTD_highbit(seqStorePtr->offCodeSum) - ZSTD_highbit(seqStorePtr->offCodeFreq[offCode]); - price += offCode; + U32 price = offCode + ZSTD_highbit(seqStorePtr->offCodeSum) - ZSTD_highbit(seqStorePtr->offCodeFreq[offCode]); /* match Length */ - price += ((matchLength >= MaxML)*8) + ((matchLength >= 255+MaxML)*16) + ((matchLength>=(1<<15))*8); + matchLength -= MINMATCH; + price += ((matchLength >= MaxML)<<3) + ((matchLength >= 255+MaxML)<<4) + ((matchLength>=(1<<15))<<3); if (matchLength >= MaxML) matchLength = MaxML; - price += ZSTD_highbit(seqStorePtr->matchLengthSum) - ZSTD_highbit(seqStorePtr->matchLengthFreq[matchLength]); + price += ZSTD_getLiteralPrice(seqStorePtr, litLength, literals) + ZSTD_highbit(seqStorePtr->matchLengthSum) - ZSTD_highbit(seqStorePtr->matchLengthFreq[matchLength]); - return price; + return ((seqStorePtr->litSum>>4) / seqStorePtr->litLengthSum) + (matchLength==0) + (litLength==0) + price; } -FORCE_INLINE U32 ZSTD_getPrice(seqStore_t* seqStorePtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength) +MEM_STATIC void ZSTD_rescaleFreqs(seqStore_t* ssPtr) { - if (seqStorePtr->litSum > ZSTD_FREQ_THRESHOLD) - return ZSTD_getLiteralPriceReal(seqStorePtr, litLength, literals) + ZSTD_getMatchPriceReal(seqStorePtr, offset, matchLength); - /* backup eval */ - return (litLength<<3) + ZSTD_highbit((U32)matchLength+1) + Offbits + ZSTD_highbit((U32)offset+1); + unsigned u; + + // printf("matchLengthSum=%d litLengthSum=%d litSum=%d offCodeSum=%d\n", ssPtr->matchLengthSum, ssPtr->litLengthSum, ssPtr->litSum, ssPtr->offCodeSum); + + if (ssPtr->litLengthSum == 0) { + ssPtr->matchLengthSum = (1<litLengthSum = (1<litSum = (1<offCodeSum = (1<litFreq[u] = 1; + for (u=0; u<=MaxLL; u++) + ssPtr->litLengthFreq[u] = 1; + for (u=0; u<=MaxML; u++) + ssPtr->matchLengthFreq[u] = 1; + for (u=0; u<=MaxOff; u++) + ssPtr->offCodeFreq[u] = 1; + } else { + ssPtr->matchLengthSum = 0; + ssPtr->litLengthSum = 0; + ssPtr->litSum = 0; + ssPtr->offCodeSum = 0; + + for (u=0; u<=MaxLit; u++) { + ssPtr->litFreq[u] = ZSTD_FREQ_START + (ssPtr->litFreq[u]>>ZSTD_FREQ_DIV); + ssPtr->litSum += ssPtr->litFreq[u]; + } + for (u=0; u<=MaxLL; u++) { + ssPtr->litLengthFreq[u] = ZSTD_FREQ_START + (ssPtr->litLengthFreq[u]>>ZSTD_FREQ_DIV); + ssPtr->litLengthSum += ssPtr->litLengthFreq[u]; + } + for (u=0; u<=MaxML; u++) { + ssPtr->matchLengthFreq[u] = ZSTD_FREQ_START + (ssPtr->matchLengthFreq[u]>>ZSTD_FREQ_DIV); + ssPtr->matchLengthSum += ssPtr->matchLengthFreq[u]; + } + for (u=0; u<=MaxOff; u++) { + ssPtr->offCodeFreq[u] = ZSTD_FREQ_START + (ssPtr->offCodeFreq[u]>>ZSTD_FREQ_DIV); + ssPtr->offCodeSum += ssPtr->offCodeFreq[u]; + } + } } @@ -132,29 +164,28 @@ MEM_STATIC void ZSTD_updatePrice(seqStore_t* seqStorePtr, U32 litLength, const B U32 u; /* literals */ - seqStorePtr->litSum += litLength; + seqStorePtr->litSum += litLength * ZSTD_FREQ_STEP; for (u=0; u < litLength; u++) - seqStorePtr->litFreq[literals[u]]++; + seqStorePtr->litFreq[literals[u]] += ZSTD_FREQ_STEP; /* literal Length */ - seqStorePtr->litLengthSum++; + seqStorePtr->litLengthSum += ZSTD_FREQ_STEP; if (litLength >= MaxLL) - seqStorePtr->litLengthFreq[MaxLL]++; + seqStorePtr->litLengthFreq[MaxLL] += ZSTD_FREQ_STEP; else - seqStorePtr->litLengthFreq[litLength]++; + seqStorePtr->litLengthFreq[litLength] += ZSTD_FREQ_STEP; /* match offset */ - seqStorePtr->offCodeSum++; - BYTE offCode = (BYTE)ZSTD_highbit(offset) + 1; - if (offset==0) offCode=0; - seqStorePtr->offCodeFreq[offCode]++; + seqStorePtr->offCodeSum += ZSTD_FREQ_STEP; + BYTE offCode = offset ? (BYTE)ZSTD_highbit(offset) + 1 : 0; + seqStorePtr->offCodeFreq[offCode] += ZSTD_FREQ_STEP; /* match Length */ - seqStorePtr->matchLengthSum++; + seqStorePtr->matchLengthSum += ZSTD_FREQ_STEP; if (matchLength >= MaxML) - seqStorePtr->matchLengthFreq[MaxML]++; + seqStorePtr->matchLengthFreq[MaxML] += ZSTD_FREQ_STEP; else - seqStorePtr->matchLengthFreq[matchLength]++; + seqStorePtr->matchLengthFreq[matchLength] += ZSTD_FREQ_STEP; } @@ -173,11 +204,33 @@ MEM_STATIC void ZSTD_updatePrice(seqStore_t* seqStorePtr, U32 litLength, const B /*-************************************* * Binary Tree search ***************************************/ +/* Update hashTable3 up to ip (excluded) + Assumption : always within prefix (ie. not within extDict) */ +#if MINMATCH == 3 +static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_CCtx* zc, const BYTE* ip) +{ + U32* const hashTable3 = zc->hashTable3; + const U32 hashLog3 = zc->params.hashLog3; + const BYTE* const base = zc->base; + const U32 target = (U32)(ip - base); + U32 idx = zc->nextToUpdate3; + + while(idx < target) { + hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx; + idx++; + } + + zc->nextToUpdate3 = target; + return hashTable3[ZSTD_hash3Ptr(ip, hashLog3)]; +} +#endif + + static U32 ZSTD_insertBtAndGetAllMatches ( ZSTD_CCtx* zc, - const BYTE* const ip, const BYTE* const iend, + const BYTE* const ip, const BYTE* const iLimit, U32 nbCompares, const U32 mls, - U32 extDict, ZSTD_match_t* matches, size_t bestLength) + U32 extDict, ZSTD_match_t* matches) { const BYTE* const base = zc->base; const U32 current = (U32)(ip-base); @@ -201,9 +254,39 @@ static U32 ZSTD_insertBtAndGetAllMatches ( U32 dummy32; /* to be nullified at the end */ U32 mnum = 0; - bestLength = MINMATCH-1; + size_t bestLength = MINMATCH-1; hashTable[h] = current; /* Update Hash Table */ +#if MINMATCH == 3 + /* HC3 match finder */ + U32 matchIndex3 = ZSTD_insertAndFindFirstIndexHash3 (zc, ip); + + if (matchIndex3>windowLow) { + const BYTE* match; + size_t currentMl=0; + if ((!extDict) || matchIndex3 >= dictLimit) { + match = base + matchIndex3; + if (match[bestLength] == ip[bestLength]) currentMl = ZSTD_count(ip, match, iLimit); // faster + // if (MEM_readMINMATCH(match) == MEM_readMINMATCH(ip)) currentMl = ZSTD_count(ip+MINMATCH, match+MINMATCH, iLimit)+MINMATCH; // stronger + } else { + match = dictBase + matchIndex3; + if (MEM_readMINMATCH(match) == MEM_readMINMATCH(ip)) /* assumption : matchIndex3 <= dictLimit-4 (by table construction) */ + currentMl = ZSTD_count_2segments(ip+MINMATCH, match+MINMATCH, iLimit, dictEnd, prefixStart) + MINMATCH; + } + + /* save best solution */ + if (currentMl > bestLength) { + bestLength = currentMl; + matches[mnum].off = current - matchIndex3; + matches[mnum].len = (U32)currentMl; + matches[mnum].back = 0; + mnum++; + if (currentMl > ZSTD_OPT_NUM) return mnum; + if (ip+currentMl == iLimit) return mnum; /* best possible, and avoid read overflow*/ + } + } +#endif + while (nbCompares-- && (matchIndex > windowLow)) { U32* nextPtr = bt + 2*(matchIndex & btMask); size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ @@ -211,11 +294,16 @@ static U32 ZSTD_insertBtAndGetAllMatches ( if ((!extDict) || (matchIndex+matchLength >= dictLimit)) { match = base + matchIndex; - if (match[matchLength] == ip[matchLength]) - matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iend) +1; + if (match[matchLength] == ip[matchLength]) { +#if ZSTD_OPT_DEBUG >= 5 + if (memcmp(match, ip, matchLength) != 0) + printf("%d: ERROR: matchLength=%d ZSTD_count=%d\n", current, (int)matchLength, (int)ZSTD_count(ip, match, ip+matchLength)); +#endif + matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iLimit) +1; + } } else { match = dictBase + matchIndex; - matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); + matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart); if (matchIndex+matchLength >= dictLimit) match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ } @@ -228,7 +316,7 @@ static U32 ZSTD_insertBtAndGetAllMatches ( matches[mnum].back = 0; mnum++; if (matchLength > ZSTD_OPT_NUM) break; - if (ip+matchLength == iend) /* equal : no way to know if inf or sup */ + if (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */ break; /* drop, to guarantee consistency (miss a little bit of compression) */ } @@ -259,26 +347,26 @@ static U32 ZSTD_insertBtAndGetAllMatches ( static U32 ZSTD_BtGetAllMatches ( ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iLimit, - const U32 maxNbAttempts, const U32 mls, ZSTD_match_t* matches, U32 minml) + const U32 maxNbAttempts, const U32 mls, ZSTD_match_t* matches) { if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */ ZSTD_updateTree(zc, ip, iLimit, maxNbAttempts, mls); - return ZSTD_insertBtAndGetAllMatches(zc, ip, iLimit, maxNbAttempts, mls, 0, matches, minml); + return ZSTD_insertBtAndGetAllMatches(zc, ip, iLimit, maxNbAttempts, mls, 0, matches); } static U32 ZSTD_BtGetAllMatches_selectMLS ( ZSTD_CCtx* zc, /* Index table will be updated */ const BYTE* ip, const BYTE* const iLowLimit, const BYTE* const iHighLimit, - const U32 maxNbAttempts, const U32 matchLengthSearch, ZSTD_match_t* matches, U32 minml) + const U32 maxNbAttempts, const U32 matchLengthSearch, ZSTD_match_t* matches) { (void)iLowLimit; /* unused */ switch(matchLengthSearch) { default : - case 4 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 4, matches, minml); - case 5 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 5, matches, minml); - case 6 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 6, matches, minml); + case 4 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 4, matches); + case 5 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 5, matches); + case 6 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 6, matches); } } @@ -286,26 +374,26 @@ static U32 ZSTD_BtGetAllMatches_selectMLS ( static U32 ZSTD_BtGetAllMatches_extDict ( ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iLimit, - const U32 maxNbAttempts, const U32 mls, ZSTD_match_t* matches, U32 minml) + const U32 maxNbAttempts, const U32 mls, ZSTD_match_t* matches) { if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */ ZSTD_updateTree_extDict(zc, ip, iLimit, maxNbAttempts, mls); - return ZSTD_insertBtAndGetAllMatches(zc, ip, iLimit, maxNbAttempts, mls, 1, matches, minml); + return ZSTD_insertBtAndGetAllMatches(zc, ip, iLimit, maxNbAttempts, mls, 1, matches); } static U32 ZSTD_BtGetAllMatches_selectMLS_extDict ( ZSTD_CCtx* zc, /* Index table will be updated */ const BYTE* ip, const BYTE* const iLowLimit, const BYTE* const iHighLimit, - const U32 maxNbAttempts, const U32 matchLengthSearch, ZSTD_match_t* matches, U32 minml) + const U32 maxNbAttempts, const U32 matchLengthSearch, ZSTD_match_t* matches) { (void)iLowLimit; switch(matchLengthSearch) { default : - case 4 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 4, matches, minml); - case 5 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 5, matches, minml); - case 6 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 6, matches, minml); + case 4 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 4, matches); + case 5 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 5, matches); + case 6 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 6, matches); } } @@ -317,7 +405,7 @@ FORCE_INLINE /* inlining is important to hardwire a hot branch (template emulati U32 ZSTD_HcGetAllMatches_generic ( ZSTD_CCtx* zc, /* Index table will be updated */ const BYTE* const ip, const BYTE* const iLowLimit, const BYTE* const iHighLimit, - const U32 maxNbAttempts, const U32 mls, const U32 extDict, ZSTD_match_t* matches, size_t minml) + const U32 maxNbAttempts, const U32 mls, const U32 extDict, ZSTD_match_t* matches) { U32* const chainTable = zc->contentTable; const U32 chainSize = (1U << zc->params.contentLog); @@ -335,11 +423,40 @@ U32 ZSTD_HcGetAllMatches_generic ( U32 mnum = 0; const BYTE* match; U32 nbAttempts=maxNbAttempts; - minml=MINMATCH-1; + size_t minml=MINMATCH-1; - /* HC4 match finder */ + /* HC4 match finder with update */ matchIndex = ZSTD_insertAndFindFirstIndex (zc, ip, mls); +#if MINMATCH == 3 + /* HC3 match finder */ + U32 matchIndex3 = zc->hashTable3[ZSTD_hash3Ptr(ip, zc->params.hashLog3)]; + + if (matchIndex3>lowLimit) { + size_t currentMl=0; + if ((!extDict) || matchIndex3 >= dictLimit) { + match = base + matchIndex3; + if (match[minml] == ip[minml]) currentMl = ZSTD_count(ip, match, iHighLimit); // faster + //if (MEM_readMINMATCH(match) == MEM_readMINMATCH(ip)) currentMl = ZSTD_count(ip+MINMATCH, match+MINMATCH, iHighLimit)+MINMATCH; // stronger + } else { + match = dictBase + matchIndex3; + if (MEM_readMINMATCH(match) == MEM_readMINMATCH(ip)) /* assumption : matchIndex3 <= dictLimit-4 (by table construction) */ + currentMl = ZSTD_count_2segments(ip+MINMATCH, match+MINMATCH, iHighLimit, dictEnd, prefixStart) + MINMATCH; + } + + /* save best solution */ + if (currentMl > minml) { + minml = currentMl; + matches[mnum].off = current - matchIndex3; + matches[mnum].len = (U32)currentMl; + matches[mnum].back = 0; + mnum++; + if (currentMl > ZSTD_OPT_NUM) return mnum; + if (ip+currentMl == iHighLimit) return mnum; /* best possible, and avoid read overflow*/ + } + } +#endif + while ((matchIndex>lowLimit) && (nbAttempts)) { size_t currentMl=0; int back = 0; @@ -347,16 +464,16 @@ U32 ZSTD_HcGetAllMatches_generic ( if ((!extDict) || matchIndex >= dictLimit) { match = base + matchIndex; if (match[minml] == ip[minml]) currentMl = ZSTD_count(ip, match, iHighLimit); if (currentMl>0) { // faster - //if (MEM_read32(match) == MEM_read32(ip)) { currentMl = ZSTD_count(ip+MINMATCH, match+MINMATCH, iHighLimit)+MINMATCH; // stronger - while ((match-back > prefixStart) && (ip-back > iLowLimit) && (ip[-back-1] == match[-back-1])) back++; - currentMl += back; + //if (MEM_readMINMATCH(match) == MEM_readMINMATCH(ip)) { currentMl = ZSTD_count(ip+MINMATCH, match+MINMATCH, iHighLimit)+MINMATCH; // stronger + while ((match+back > prefixStart) && (ip+back > iLowLimit) && (ip[back-1] == match[back-1])) back--; + currentMl += (U32)(-back); } } else { match = dictBase + matchIndex; - if (MEM_read32(match) == MEM_read32(ip)) { /* assumption : matchIndex <= dictLimit-4 (by table construction) */ + if (MEM_readMINMATCH(match) == MEM_readMINMATCH(ip)) { /* assumption : matchIndex <= dictLimit-4 (by table construction) */ currentMl = ZSTD_count_2segments(ip+MINMATCH, match+MINMATCH, iHighLimit, dictEnd, prefixStart) + MINMATCH; - while ((match-back > dictStart) && (ip-back > iLowLimit) && (ip[-back-1] == match[-back-1])) back++; /* backward match extension */ - currentMl += back; + while ((match+back > dictStart) && (ip+back > iLowLimit) && (ip[back-1] == match[back-1])) back--; /* backward match extension */ + currentMl += (U32)(-back); } } /* save best solution */ @@ -364,7 +481,7 @@ U32 ZSTD_HcGetAllMatches_generic ( minml = currentMl; matches[mnum].off = current - matchIndex; matches[mnum].len = (U32)currentMl; - matches[mnum].back = back; + matches[mnum].back = (U32)(-back); mnum++; if (currentMl > ZSTD_OPT_NUM) break; if (ip+currentMl == iHighLimit) break; /* best possible, and avoid read overflow*/ @@ -381,28 +498,28 @@ U32 ZSTD_HcGetAllMatches_generic ( static U32 ZSTD_HcGetAllMatches_selectMLS ( ZSTD_CCtx* zc, const BYTE* ip, const BYTE* const iLowLimit, const BYTE* const iHighLimit, - const U32 maxNbAttempts, const U32 matchLengthSearch, ZSTD_match_t* matches, U32 minml) + const U32 maxNbAttempts, const U32 matchLengthSearch, ZSTD_match_t* matches) { switch(matchLengthSearch) { default : - case 4 : return ZSTD_HcGetAllMatches_generic(zc, ip, iLowLimit, iHighLimit, maxNbAttempts, 4, 0, matches, minml); - case 5 : return ZSTD_HcGetAllMatches_generic(zc, ip, iLowLimit, iHighLimit, maxNbAttempts, 5, 0, matches, minml); - case 6 : return ZSTD_HcGetAllMatches_generic(zc, ip, iLowLimit, iHighLimit, maxNbAttempts, 6, 0, matches, minml); + case 4 : return ZSTD_HcGetAllMatches_generic(zc, ip, iLowLimit, iHighLimit, maxNbAttempts, 4, 0, matches); + case 5 : return ZSTD_HcGetAllMatches_generic(zc, ip, iLowLimit, iHighLimit, maxNbAttempts, 5, 0, matches); + case 6 : return ZSTD_HcGetAllMatches_generic(zc, ip, iLowLimit, iHighLimit, maxNbAttempts, 6, 0, matches); } } static U32 ZSTD_HcGetAllMatches_selectMLS_extDict ( ZSTD_CCtx* zc, const BYTE* ip, const BYTE* const iLowLimit, const BYTE* const iHighLimit, - const U32 maxNbAttempts, const U32 matchLengthSearch, ZSTD_match_t* matches, U32 minml) + const U32 maxNbAttempts, const U32 matchLengthSearch, ZSTD_match_t* matches) { switch(matchLengthSearch) { default : - case 4 : return ZSTD_HcGetAllMatches_generic(zc, ip, iLowLimit, iHighLimit, maxNbAttempts, 4, 1, matches, minml); - case 5 : return ZSTD_HcGetAllMatches_generic(zc, ip, iLowLimit, iHighLimit, maxNbAttempts, 5, 1, matches, minml); - case 6 : return ZSTD_HcGetAllMatches_generic(zc, ip, iLowLimit, iHighLimit, maxNbAttempts, 6, 1, matches, minml); + case 4 : return ZSTD_HcGetAllMatches_generic(zc, ip, iLowLimit, iHighLimit, maxNbAttempts, 4, 1, matches); + case 5 : return ZSTD_HcGetAllMatches_generic(zc, ip, iLowLimit, iHighLimit, maxNbAttempts, 5, 1, matches); + case 6 : return ZSTD_HcGetAllMatches_generic(zc, ip, iLowLimit, iHighLimit, maxNbAttempts, 6, 1, matches); } } @@ -428,7 +545,7 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, const U32 mls = ctx->params.searchLength; typedef U32 (*getAllMatches_f)(ZSTD_CCtx* zc, const BYTE* ip, const BYTE* iLowLimit, const BYTE* iHighLimit, - U32 maxNbAttempts, U32 matchLengthSearch, ZSTD_match_t* matches, U32 minml); + U32 maxNbAttempts, U32 matchLengthSearch, ZSTD_match_t* matches); getAllMatches_f getAllMatches = searchMethod ? ZSTD_BtGetAllMatches_selectMLS : ZSTD_HcGetAllMatches_selectMLS; ZSTD_optimal_t opt[ZSTD_OPT_NUM+4]; @@ -439,10 +556,10 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, const U32 sufficient_len = ctx->params.targetLength; const U32 faster_get_matches = (ctx->params.strategy == ZSTD_opt); - /* init */ + ctx->nextToUpdate3 = ctx->nextToUpdate; ZSTD_resetSeqStore(seqStorePtr); - ZSTD_resetFreqs(seqStorePtr); + ZSTD_rescaleFreqs(seqStorePtr); if ((ip-base) < REPCODE_STARTVALUE) ip = base + REPCODE_STARTVALUE; /* Match Loop */ @@ -457,7 +574,7 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, opt[0].litlen = (U32)(ip - anchor); /* check repCode */ - if (MEM_read32(ip+1) == MEM_read32(ip+1 - rep_1)) { + if (MEM_readMINMATCH(ip+1) == MEM_readMINMATCH(ip+1 - rep_1)) { /* repcode : we take it */ mlen = (U32)ZSTD_count(ip+1+MINMATCH, ip+1+MINMATCH-rep_1, iend) + MINMATCH; @@ -469,19 +586,17 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, litlen = opt[0].litlen + 1; do { - price = ZSTD_getPrice(seqStorePtr, litlen, anchor, 0, mlen - MINMATCH); + price = ZSTD_getPrice(seqStorePtr, litlen, anchor, 0, mlen); if (mlen + 1 > last_pos || price < opt[mlen + 1].price) SET_PRICE(mlen + 1, mlen, 0, litlen, price); /* note : macro modifies last_pos */ mlen--; } while (mlen >= MINMATCH); } - best_mlen = (last_pos) ? last_pos : MINMATCH; - if (faster_get_matches && last_pos) match_num = 0; else - match_num = getAllMatches(ctx, ip, ip, iend, maxSearches, mls, matches, best_mlen); /* first search (depth 0) */ + match_num = getAllMatches(ctx, ip, ip, iend, maxSearches, mls, matches); /* first search (depth 0) */ ZSTD_LOG_PARSER("%d: match_num=%d last_pos=%d\n", (int)(ip-base), match_num, last_pos); if (!last_pos && !match_num) { ip++; continue; } @@ -498,6 +613,8 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, goto _storeSequence; } + best_mlen = (last_pos) ? last_pos : MINMATCH; + // set prices using matches at position = 0 for (u = 0; u < match_num; u++) { mlen = (u>0) ? matches[u-1].len+1 : best_mlen; @@ -505,7 +622,7 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, ZSTD_LOG_PARSER("%d: start Found mlen=%d off=%d best_mlen=%d last_pos=%d\n", (int)(ip-base), matches[u].len, matches[u].off, (int)best_mlen, (int)last_pos); litlen = opt[0].litlen; while (mlen <= best_mlen) { - price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off, mlen - MINMATCH); + price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off, mlen); if (mlen > last_pos || price < opt[mlen].price) SET_PRICE(mlen, mlen, matches[u].off, litlen, price); mlen++; @@ -570,7 +687,7 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, ZSTD_LOG_PARSER("%d: try REP1 rep=%u mlen=%u\n", (int)(inr-base), (U32)cur_rep, mlen); } - if (MEM_read32(inr) == MEM_read32(inr - cur_rep)) { // check rep + if (MEM_readMINMATCH(inr) == MEM_readMINMATCH(inr - cur_rep)) { // check rep mlen = (U32)ZSTD_count(inr+MINMATCH, inr+MINMATCH - cur_rep, iend) + MINMATCH; ZSTD_LOG_PARSER("%d: Found REP mlen=%d off=%d rep=%d opt[%d].off=%d\n", (int)(inr-base), mlen, 0, opt[cur].rep, cur, opt[cur].off); @@ -585,19 +702,19 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, if (opt[cur].mlen == 1) { litlen = opt[cur].litlen; if (cur > litlen) { - price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr-litlen, 0, mlen - MINMATCH); + price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr-litlen, 0, mlen); ZSTD_LOG_TRY_PRICE("%d: TRY5 opt[%d].price=%d price=%d cur=%d litlen=%d\n", (int)(inr-base), cur - litlen, opt[cur - litlen].price, price, cur, litlen); } else - price = ZSTD_getPrice(seqStorePtr, litlen, anchor, 0, mlen - MINMATCH); + price = ZSTD_getPrice(seqStorePtr, litlen, anchor, 0, mlen); } else { litlen = 0; - price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, 0, mlen - MINMATCH); - ZSTD_LOG_TRY_PRICE("%d: TRY7 price=%d cur=%d litlen=0 getprice=%d\n", (int)(inr-base), price, cur, (int)ZSTD_getPrice(seqStorePtr, 0, NULL, 0, mlen - MINMATCH)); + price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, 0, mlen); + ZSTD_LOG_TRY_PRICE("%d: TRY7 price=%d cur=%d litlen=0 getprice=%d\n", (int)(inr-base), price, cur, (int)ZSTD_getPrice(seqStorePtr, 0, NULL, 0, mlen)); } best_mlen = mlen; if (faster_get_matches) skip_num = best_mlen; - ZSTD_LOG_PARSER("%d: Found REP mlen=%d off=%d price=%d litlen=%d price[%d]=%d\n", (int)(inr-base), mlen, 0, price, litlen, cur - litlen, opt[cur - litlen].price); + ZSTD_LOG_PARSER("%d: Found REP mlen=%d off=%d price=%d litlen=%d\n", (int)(inr-base), mlen, 0, price, litlen); do { if (cur + mlen > last_pos || price <= opt[cur + mlen].price) @@ -608,9 +725,7 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, if (faster_get_matches && skip_num > 0) { skip_num--; continue; } - best_mlen = (best_mlen > MINMATCH) ? best_mlen : MINMATCH; - - match_num = getAllMatches(ctx, inr, ip, iend, maxSearches, mls, matches, best_mlen); + match_num = getAllMatches(ctx, inr, ip, iend, maxSearches, mls, matches); ZSTD_LOG_PARSER("%d: ZSTD_GetAllMatches match_num=%d\n", (int)(inr-base), match_num); if (match_num > 0 && matches[match_num-1].len > sufficient_len) { @@ -621,6 +736,8 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, goto _storeSequence; } + best_mlen = (best_mlen > MINMATCH) ? best_mlen : MINMATCH; + /* set prices using matches at position = cur */ for (u = 0; u < match_num; u++) { mlen = (u>0) ? matches[u-1].len+1 : best_mlen; @@ -635,15 +752,15 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, if (opt[cur2].mlen == 1) { litlen = opt[cur2].litlen; if (cur2 > litlen) - price = opt[cur2 - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, ip+cur2-litlen, matches[u].off, mlen - MINMATCH); + price = opt[cur2 - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, ip+cur2-litlen, matches[u].off, mlen); else - price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off, mlen - MINMATCH); + price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off, mlen); } else { litlen = 0; - price = opt[cur2].price + ZSTD_getPrice(seqStorePtr, 0, NULL, matches[u].off, mlen - MINMATCH); + price = opt[cur2].price + ZSTD_getPrice(seqStorePtr, 0, NULL, matches[u].off, mlen); } - ZSTD_LOG_PARSER("%d: Found2 pred=%d mlen=%d best_mlen=%d off=%d price=%d litlen=%d price[%d]=%d\n", (int)(inr-base), matches[u].back, mlen, best_mlen, matches[u].off, price, litlen, cur - litlen, opt[cur - litlen].price); + ZSTD_LOG_PARSER("%d: Found2 pred=%d mlen=%d best_mlen=%d off=%d price=%d litlen=%d\n", (int)(inr-base), matches[u].back, mlen, best_mlen, matches[u].off, price, litlen); ZSTD_LOG_TRY_PRICE("%d: TRY8 price=%d opt[%d].price=%d\n", (int)(inr-base), price, cur2 + mlen, opt[cur2 + mlen].price); if (cur2 + mlen > last_pos || (price < opt[cur2 + mlen].price)) @@ -691,7 +808,7 @@ _storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */ cur += mlen; U32 litLength = (U32)(ip - anchor); - ZSTD_LOG_ENCODE("%d/%d: ENCODE1 literals=%d mlen=%d off=%d rep1=%d rep2=%d\n", (int)(ip-base), (int)(iend-base), (int)(litLength), (int)mlen, (int)(offset), (int)rep_1, (int)rep_2); + ZSTD_LOG_ENCODE("%d/%d: ENCODE literals=%d mlen=%d off=%d rep1=%d rep2=%d\n", (int)(ip-base), (int)(iend-base), (int)(litLength), (int)mlen, (int)(offset), (int)rep_1, (int)rep_2); if (offset) { rep_2 = rep_1; @@ -703,14 +820,15 @@ _storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */ rep_1 = best_off; } } - ZSTD_LOG_ENCODE("%d/%d: ENCODE2 literals=%d mlen=%d off=%d rep1=%d rep2=%d\n", (int)(ip-base), (int)(iend-base), (int)(litLength), (int)mlen, (int)(offset), (int)rep_1, (int)rep_2); + // ZSTD_LOG_ENCODE("%d/%d: ENCODE2 literals=%d mlen=%d off=%d rep1=%d rep2=%d\n", (int)(ip-base), (int)(iend-base), (int)(litLength), (int)mlen, (int)(offset), (int)rep_1, (int)rep_2); #if ZSTD_OPT_DEBUG >= 5 - int ml2; + U32 ml2; if (offset) - ml2 = ZSTD_count(ip, ip-offset, iend); + ml2 = (U32)ZSTD_count(ip, ip-offset, iend); else - ml2 = ZSTD_count(ip, ip-rep_1, iend); + ml2 = (U32)ZSTD_count(ip, ip-rep_1, iend); + if (offset == 0 || offset >= 8) if (ml2 < mlen && ml2 < MINMATCH) { printf("%d: ERROR iend=%d mlen=%d offset=%d ml2=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset, (int)ml2); exit(0); } if (ip < anchor) { @@ -732,7 +850,7 @@ _storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */ /* check immediate repcode */ while ( (anchor <= ilimit) - && (MEM_read32(anchor) == MEM_read32(anchor - rep_2)) ) { + && (MEM_readMINMATCH(anchor) == MEM_readMINMATCH(anchor - rep_2)) ) { /* store sequence */ best_mlen = (U32)ZSTD_count(anchor+MINMATCH, anchor+MINMATCH-rep_2, iend); best_off = rep_2; @@ -778,7 +896,7 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, const U32 mls = ctx->params.searchLength; typedef U32 (*getAllMatches_f)(ZSTD_CCtx* zc, const BYTE* ip, const BYTE* iLowLimit, const BYTE* iHighLimit, - U32 maxNbAttempts, U32 matchLengthSearch, ZSTD_match_t* matches, U32 minml); + U32 maxNbAttempts, U32 matchLengthSearch, ZSTD_match_t* matches); getAllMatches_f getAllMatches = searchMethod ? ZSTD_BtGetAllMatches_selectMLS_extDict : ZSTD_HcGetAllMatches_selectMLS_extDict; ZSTD_optimal_t opt[ZSTD_OPT_NUM+4]; @@ -790,8 +908,9 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, const U32 faster_get_matches = (ctx->params.strategy == ZSTD_opt); /* init */ + ctx->nextToUpdate3 = ctx->nextToUpdate; ZSTD_resetSeqStore(seqStorePtr); - ZSTD_resetFreqs(seqStorePtr); + ZSTD_rescaleFreqs(seqStorePtr); if ((ip - prefixStart) < REPCODE_STARTVALUE) ip += REPCODE_STARTVALUE; /* Match Loop */ @@ -810,7 +929,7 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; const BYTE* const repMatch = repBase + repIndex; if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow */ - && (MEM_read32(ip+1) == MEM_read32(repMatch)) ) { + && (MEM_readMINMATCH(ip+1) == MEM_readMINMATCH(repMatch)) ) { /* repcode detected we should take it */ const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; mlen = (U32)ZSTD_count_2segments(ip+1+MINMATCH, repMatch+MINMATCH, iend, repEnd, prefixStart) + MINMATCH; @@ -823,7 +942,7 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, litlen = opt[0].litlen + 1; do { - price = ZSTD_getPrice(seqStorePtr, litlen, anchor, 0, mlen - MINMATCH); + price = ZSTD_getPrice(seqStorePtr, litlen, anchor, 0, mlen); if (mlen + 1 > last_pos || price < opt[mlen + 1].price) SET_PRICE(mlen + 1, mlen, 0, litlen, price); mlen--; @@ -835,7 +954,7 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, if (faster_get_matches && last_pos) match_num = 0; else - match_num = getAllMatches(ctx, ip, ip, iend, maxSearches, mls, matches, best_mlen); /* first search (depth 0) */ + match_num = getAllMatches(ctx, ip, ip, iend, maxSearches, mls, matches); /* first search (depth 0) */ ZSTD_LOG_PARSER("%d: match_num=%d last_pos=%d\n", (int)(ip-base), match_num, last_pos); if (!last_pos && !match_num) { ip++; continue; } @@ -859,7 +978,7 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, ZSTD_LOG_PARSER("%d: start Found mlen=%d off=%d best_mlen=%d last_pos=%d\n", (int)(ip-base), matches[u].len, matches[u].off, (int)best_mlen, (int)last_pos); litlen = opt[0].litlen; while (mlen <= best_mlen) { - price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off, mlen - MINMATCH); + price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off, mlen); if (mlen > last_pos || price < opt[mlen].price) SET_PRICE(mlen, mlen, matches[u].off, litlen, price); mlen++; @@ -931,7 +1050,7 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; const BYTE* const repMatch = repBase + repIndex; if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow */ - &&(MEM_read32(inr) == MEM_read32(repMatch)) ) { + &&(MEM_readMINMATCH(inr) == MEM_readMINMATCH(repMatch)) ) { /* repcode detected */ const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; mlen = (U32)ZSTD_count_2segments(inr+MINMATCH, repMatch+MINMATCH, iend, repEnd, prefixStart) + MINMATCH; @@ -948,20 +1067,20 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, if (opt[cur].mlen == 1) { litlen = opt[cur].litlen; if (cur > litlen) { - price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr-litlen, 0, mlen - MINMATCH); + price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr-litlen, 0, mlen); ZSTD_LOG_TRY_PRICE("%d: TRY5 opt[%d].price=%d price=%d cur=%d litlen=%d\n", (int)(inr-base), cur - litlen, opt[cur - litlen].price, price, cur, litlen); } else - price = ZSTD_getPrice(seqStorePtr, litlen, anchor, 0, mlen - MINMATCH); + price = ZSTD_getPrice(seqStorePtr, litlen, anchor, 0, mlen); } else { litlen = 0; - price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, 0, mlen - MINMATCH); - ZSTD_LOG_TRY_PRICE("%d: TRY7 price=%d cur=%d litlen=0 getprice=%d\n", (int)(inr-base), price, cur, (int)ZSTD_getPrice(seqStorePtr, 0, NULL, 0, mlen - MINMATCH)); + price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, 0, mlen); + ZSTD_LOG_TRY_PRICE("%d: TRY7 price=%d cur=%d litlen=0 getprice=%d\n", (int)(inr-base), price, cur, (int)ZSTD_getPrice(seqStorePtr, 0, NULL, 0, mlen)); } best_mlen = mlen; if (faster_get_matches) skip_num = best_mlen; - ZSTD_LOG_PARSER("%d: Found REP mlen=%d off=%d price=%d litlen=%d price[%d]=%d\n", (int)(inr-base), mlen, 0, price, litlen, cur - litlen, opt[cur - litlen].price); + ZSTD_LOG_PARSER("%d: Found REP mlen=%d off=%d price=%d litlen=%d\n", (int)(inr-base), mlen, 0, price, litlen); do { if (cur + mlen > last_pos || price <= opt[cur + mlen].price) // || ((price == opt[cur + mlen].price) && (opt[cur].mlen == 1) && (cur != litlen))) // at equal price prefer REP instead of MATCH @@ -974,7 +1093,7 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, best_mlen = (best_mlen > MINMATCH) ? best_mlen : MINMATCH; - match_num = getAllMatches(ctx, inr, ip, iend, maxSearches, mls, matches, best_mlen); + match_num = getAllMatches(ctx, inr, ip, iend, maxSearches, mls, matches); ZSTD_LOG_PARSER("%d: ZSTD_GetAllMatches match_num=%d\n", (int)(inr-base), match_num); if (match_num > 0 && matches[match_num-1].len > sufficient_len) { @@ -999,15 +1118,15 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, if (opt[cur2].mlen == 1) { litlen = opt[cur2].litlen; if (cur2 > litlen) - price = opt[cur2 - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, ip+cur2-litlen, matches[u].off, mlen - MINMATCH); + price = opt[cur2 - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, ip+cur2-litlen, matches[u].off, mlen); else - price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off, mlen - MINMATCH); + price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off, mlen); } else { litlen = 0; - price = opt[cur2].price + ZSTD_getPrice(seqStorePtr, 0, NULL, matches[u].off, mlen - MINMATCH); + price = opt[cur2].price + ZSTD_getPrice(seqStorePtr, 0, NULL, matches[u].off, mlen); } - ZSTD_LOG_PARSER("%d: Found2 pred=%d mlen=%d best_mlen=%d off=%d price=%d litlen=%d price[%d]=%d\n", (int)(inr-base), matches[u].back, mlen, best_mlen, matches[u].off, price, litlen, cur - litlen, opt[cur - litlen].price); + ZSTD_LOG_PARSER("%d: Found2 pred=%d mlen=%d best_mlen=%d off=%d price=%d litlen=%d\n", (int)(inr-base), matches[u].back, mlen, best_mlen, matches[u].off, price, litlen); ZSTD_LOG_TRY_PRICE("%d: TRY8 price=%d opt[%d].price=%d\n", (int)(inr-base), price, cur2 + mlen, opt[cur2 + mlen].price); if (cur2 + mlen > last_pos || (price < opt[cur2 + mlen].price)) @@ -1070,11 +1189,11 @@ _storeSequence: // cur, last_pos, best_mlen, best_off have to be set ZSTD_LOG_ENCODE("%d/%d: ENCODE2 literals=%d mlen=%d off=%d rep1=%d rep2=%d\n", (int)(ip-base), (int)(iend-base), (int)(litLength), (int)mlen, (int)(offset), (int)rep_1, (int)rep_2); #if ZSTD_OPT_DEBUG >= 5 - int ml2; + U32 ml2; if (offset) - ml2 = ZSTD_count(ip, ip-offset, iend); + ml2 = (U32)ZSTD_count(ip, ip-offset, iend); else - ml2 = ZSTD_count(ip, ip-rep_1, iend); + ml2 = (U32)ZSTD_count(ip, ip-rep_1, iend); if (ml2 < mlen && ml2 < MINMATCH) { printf("%d: ERROR iend=%d mlen=%d offset=%d ml2=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset, (int)ml2); exit(0); } if (ip < anchor) { @@ -1100,7 +1219,7 @@ _storeSequence: // cur, last_pos, best_mlen, best_off have to be set const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; const BYTE* const repMatch = repBase + repIndex; if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow */ - && (MEM_read32(anchor) == MEM_read32(repMatch)) ) { + && (MEM_readMINMATCH(anchor) == MEM_readMINMATCH(repMatch)) ) { /* repcode detected, let's take it */ const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; mlen = (U32)ZSTD_count_2segments(anchor+MINMATCH, repMatch+MINMATCH, iend, repEnd, prefixStart) + MINMATCH; From fcafb26b2e6afed63de1528274d328705d18783f Mon Sep 17 00:00:00 2001 From: inikep Date: Fri, 19 Feb 2016 11:59:44 +0100 Subject: [PATCH 005/247] speed optimiaztions --- lib/zstd_opt.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/lib/zstd_opt.h b/lib/zstd_opt.h index ba970c8f..98ed77d6 100644 --- a/lib/zstd_opt.h +++ b/lib/zstd_opt.h @@ -78,9 +78,7 @@ typedef struct { FORCE_INLINE U32 ZSTD_getLiteralPrice(seqStore_t* seqStorePtr, U32 litLength, const BYTE* literals) { - U32 price, freq, u; - - if (!litLength) return 0; /* special case */ + U32 price, u; /* literals */ price = litLength * ZSTD_highbit(seqStorePtr->litSum); @@ -90,8 +88,7 @@ FORCE_INLINE U32 ZSTD_getLiteralPrice(seqStore_t* seqStorePtr, U32 litLength, co /* literal Length */ price += ((litLength >= MaxLL)<<3) + ((litLength >= 255+MaxLL)<<4) + ((litLength>=(1<<15))<<3); if (litLength >= MaxLL) litLength = MaxLL; - freq = seqStorePtr->litLengthFreq[litLength]; - price += ZSTD_highbit(seqStorePtr->litLengthSum) - ZSTD_highbit(freq); + price += ZSTD_highbit(seqStorePtr->litLengthSum) - ZSTD_highbit(seqStorePtr->litLengthFreq[litLength]); return price; } @@ -107,9 +104,12 @@ FORCE_INLINE U32 ZSTD_getPrice(seqStore_t* seqStorePtr, U32 litLength, const BYT matchLength -= MINMATCH; price += ((matchLength >= MaxML)<<3) + ((matchLength >= 255+MaxML)<<4) + ((matchLength>=(1<<15))<<3); if (matchLength >= MaxML) matchLength = MaxML; - price += ZSTD_getLiteralPrice(seqStorePtr, litLength, literals) + ZSTD_highbit(seqStorePtr->matchLengthSum) - ZSTD_highbit(seqStorePtr->matchLengthFreq[matchLength]); + price += ZSTD_highbit(seqStorePtr->matchLengthSum) - ZSTD_highbit(seqStorePtr->matchLengthFreq[matchLength]); - return ((seqStorePtr->litSum>>4) / seqStorePtr->litLengthSum) + (matchLength==0) + (litLength==0) + price; + if (!litLength) + return price + 1; /* special case */ + + return price + ZSTD_getLiteralPrice(seqStorePtr, litLength, literals); //((seqStorePtr->litSum>>4) / seqStorePtr->litLengthSum) + (matchLength==0) + (litLength==0) + price; } From 191b52a8dcc47c33e34d0dbfd7f95953ac1ad9ed Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Fri, 19 Feb 2016 15:16:15 +0100 Subject: [PATCH 006/247] silence array subscript warning --- lib/huff0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/huff0.c b/lib/huff0.c index 929bc87b..d1749372 100644 --- a/lib/huff0.c +++ b/lib/huff0.c @@ -1381,7 +1381,7 @@ size_t HUF_readDTableX6 (U32* DTable, const void* src, size_t srcSize) if (tableLog > memLog) return ERROR(tableLog_tooLarge); /* DTable is too small */ /* find maxWeight */ - for (maxW = tableLog; rankStats[maxW]==0; maxW--) {} /* necessarily finds a solution before 0 */ + for (maxW = tableLog; maxW && rankStats[maxW]==0; maxW--) {} /* necessarily finds a solution before 0 */ /* Get start index of each weight */ { From f8482131d786386cda296a436c8b86025e3138d4 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Fri, 19 Feb 2016 17:33:43 +0100 Subject: [PATCH 007/247] better compatibility with Visual <= 2012 --- lib/huff0.c | 323 ++++++++++++++++++++++++++-------------------------- 1 file changed, 163 insertions(+), 160 deletions(-) diff --git a/lib/huff0.c b/lib/huff0.c index d1749372..7afb1337 100644 --- a/lib/huff0.c +++ b/lib/huff0.c @@ -810,91 +810,92 @@ size_t HUF_decompress4X2_usingDTable( const void* cSrc, size_t cSrcSize, const U16* DTable) { - const BYTE* const istart = (const BYTE*) cSrc; - BYTE* const ostart = (BYTE*) dst; - BYTE* const oend = ostart + dstSize; - const void* const dtPtr = DTable; - const HUF_DEltX2* const dt = ((const HUF_DEltX2*)dtPtr) +1; - const U32 dtLog = DTable[0]; - size_t errorCode; - /* Check */ - if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ + if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ + { + const BYTE* const istart = (const BYTE*) cSrc; + BYTE* const ostart = (BYTE*) dst; + BYTE* const oend = ostart + dstSize; + const void* const dtPtr = DTable; + const HUF_DEltX2* const dt = ((const HUF_DEltX2*)dtPtr) +1; + const U32 dtLog = DTable[0]; + size_t errorCode; - /* Init */ - BIT_DStream_t bitD1; - BIT_DStream_t bitD2; - BIT_DStream_t bitD3; - BIT_DStream_t bitD4; - const size_t length1 = MEM_readLE16(istart); - const size_t length2 = MEM_readLE16(istart+2); - const size_t length3 = MEM_readLE16(istart+4); - size_t length4; - const BYTE* const istart1 = istart + 6; /* jumpTable */ - const BYTE* const istart2 = istart1 + length1; - const BYTE* const istart3 = istart2 + length2; - const BYTE* const istart4 = istart3 + length3; - const size_t segmentSize = (dstSize+3) / 4; - BYTE* const opStart2 = ostart + segmentSize; - BYTE* const opStart3 = opStart2 + segmentSize; - BYTE* const opStart4 = opStart3 + segmentSize; - BYTE* op1 = ostart; - BYTE* op2 = opStart2; - BYTE* op3 = opStart3; - BYTE* op4 = opStart4; - U32 endSignal; + /* Init */ + BIT_DStream_t bitD1; + BIT_DStream_t bitD2; + BIT_DStream_t bitD3; + BIT_DStream_t bitD4; + const size_t length1 = MEM_readLE16(istart); + const size_t length2 = MEM_readLE16(istart+2); + const size_t length3 = MEM_readLE16(istart+4); + size_t length4; + const BYTE* const istart1 = istart + 6; /* jumpTable */ + const BYTE* const istart2 = istart1 + length1; + const BYTE* const istart3 = istart2 + length2; + const BYTE* const istart4 = istart3 + length3; + const size_t segmentSize = (dstSize+3) / 4; + BYTE* const opStart2 = ostart + segmentSize; + BYTE* const opStart3 = opStart2 + segmentSize; + BYTE* const opStart4 = opStart3 + segmentSize; + BYTE* op1 = ostart; + BYTE* op2 = opStart2; + BYTE* op3 = opStart3; + BYTE* op4 = opStart4; + U32 endSignal; - length4 = cSrcSize - (length1 + length2 + length3 + 6); - if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ - errorCode = BIT_initDStream(&bitD1, istart1, length1); - if (HUF_isError(errorCode)) return errorCode; - errorCode = BIT_initDStream(&bitD2, istart2, length2); - if (HUF_isError(errorCode)) return errorCode; - errorCode = BIT_initDStream(&bitD3, istart3, length3); - if (HUF_isError(errorCode)) return errorCode; - errorCode = BIT_initDStream(&bitD4, istart4, length4); - if (HUF_isError(errorCode)) return errorCode; + length4 = cSrcSize - (length1 + length2 + length3 + 6); + if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ + errorCode = BIT_initDStream(&bitD1, istart1, length1); + if (HUF_isError(errorCode)) return errorCode; + errorCode = BIT_initDStream(&bitD2, istart2, length2); + if (HUF_isError(errorCode)) return errorCode; + errorCode = BIT_initDStream(&bitD3, istart3, length3); + if (HUF_isError(errorCode)) return errorCode; + errorCode = BIT_initDStream(&bitD4, istart4, length4); + if (HUF_isError(errorCode)) return errorCode; - /* 16-32 symbols per loop (4-8 symbols per stream) */ - endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); - for ( ; (endSignal==BIT_DStream_unfinished) && (op4<(oend-7)) ; ) { - HUF_DECODE_SYMBOLX2_2(op1, &bitD1); - HUF_DECODE_SYMBOLX2_2(op2, &bitD2); - HUF_DECODE_SYMBOLX2_2(op3, &bitD3); - HUF_DECODE_SYMBOLX2_2(op4, &bitD4); - HUF_DECODE_SYMBOLX2_1(op1, &bitD1); - HUF_DECODE_SYMBOLX2_1(op2, &bitD2); - HUF_DECODE_SYMBOLX2_1(op3, &bitD3); - HUF_DECODE_SYMBOLX2_1(op4, &bitD4); - HUF_DECODE_SYMBOLX2_2(op1, &bitD1); - HUF_DECODE_SYMBOLX2_2(op2, &bitD2); - HUF_DECODE_SYMBOLX2_2(op3, &bitD3); - HUF_DECODE_SYMBOLX2_2(op4, &bitD4); - HUF_DECODE_SYMBOLX2_0(op1, &bitD1); - HUF_DECODE_SYMBOLX2_0(op2, &bitD2); - HUF_DECODE_SYMBOLX2_0(op3, &bitD3); - HUF_DECODE_SYMBOLX2_0(op4, &bitD4); + /* 16-32 symbols per loop (4-8 symbols per stream) */ endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); + for ( ; (endSignal==BIT_DStream_unfinished) && (op4<(oend-7)) ; ) { + HUF_DECODE_SYMBOLX2_2(op1, &bitD1); + HUF_DECODE_SYMBOLX2_2(op2, &bitD2); + HUF_DECODE_SYMBOLX2_2(op3, &bitD3); + HUF_DECODE_SYMBOLX2_2(op4, &bitD4); + HUF_DECODE_SYMBOLX2_1(op1, &bitD1); + HUF_DECODE_SYMBOLX2_1(op2, &bitD2); + HUF_DECODE_SYMBOLX2_1(op3, &bitD3); + HUF_DECODE_SYMBOLX2_1(op4, &bitD4); + HUF_DECODE_SYMBOLX2_2(op1, &bitD1); + HUF_DECODE_SYMBOLX2_2(op2, &bitD2); + HUF_DECODE_SYMBOLX2_2(op3, &bitD3); + HUF_DECODE_SYMBOLX2_2(op4, &bitD4); + HUF_DECODE_SYMBOLX2_0(op1, &bitD1); + HUF_DECODE_SYMBOLX2_0(op2, &bitD2); + HUF_DECODE_SYMBOLX2_0(op3, &bitD3); + HUF_DECODE_SYMBOLX2_0(op4, &bitD4); + endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); + } + + /* check corruption */ + if (op1 > opStart2) return ERROR(corruption_detected); + if (op2 > opStart3) return ERROR(corruption_detected); + if (op3 > opStart4) return ERROR(corruption_detected); + /* note : op4 supposed already verified within main loop */ + + /* finish bitStreams one by one */ + HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog); + HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog); + HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog); + HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog); + + /* check */ + endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); + if (!endSignal) return ERROR(corruption_detected); + + /* decoded size */ + return dstSize; } - - /* check corruption */ - if (op1 > opStart2) return ERROR(corruption_detected); - if (op2 > opStart3) return ERROR(corruption_detected); - if (op3 > opStart4) return ERROR(corruption_detected); - /* note : op4 supposed already verified within main loop */ - - /* finish bitStreams one by one */ - HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog); - HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog); - HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog); - HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog); - - /* check */ - endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); - if (!endSignal) return ERROR(corruption_detected); - - /* decoded size */ - return dstSize; } @@ -1559,95 +1560,97 @@ size_t HUF_decompress4X6_usingDTable( const void* cSrc, size_t cSrcSize, const U32* DTable) { - const BYTE* const istart = (const BYTE*) cSrc; - BYTE* const ostart = (BYTE*) dst; - BYTE* const oend = ostart + dstSize; - - const U32 dtLog = DTable[0]; - const void* const ddPtr = DTable+1; - const HUF_DDescX6* dd = (const HUF_DDescX6*)ddPtr; - const void* const dsPtr = DTable + 1 + ((size_t)1<<(dtLog-1)); - const HUF_DSeqX6* ds = (const HUF_DSeqX6*)dsPtr; - size_t errorCode; - /* Check */ if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ - /* Init */ - BIT_DStream_t bitD1; - BIT_DStream_t bitD2; - BIT_DStream_t bitD3; - BIT_DStream_t bitD4; - const size_t length1 = MEM_readLE16(istart); - const size_t length2 = MEM_readLE16(istart+2); - const size_t length3 = MEM_readLE16(istart+4); - size_t length4; - const BYTE* const istart1 = istart + 6; /* jumpTable */ - const BYTE* const istart2 = istart1 + length1; - const BYTE* const istart3 = istart2 + length2; - const BYTE* const istart4 = istart3 + length3; - const size_t segmentSize = (dstSize+3) / 4; - BYTE* const opStart2 = ostart + segmentSize; - BYTE* const opStart3 = opStart2 + segmentSize; - BYTE* const opStart4 = opStart3 + segmentSize; - BYTE* op1 = ostart; - BYTE* op2 = opStart2; - BYTE* op3 = opStart3; - BYTE* op4 = opStart4; - U32 endSignal; + { + const BYTE* const istart = (const BYTE*) cSrc; + BYTE* const ostart = (BYTE*) dst; + BYTE* const oend = ostart + dstSize; - length4 = cSrcSize - (length1 + length2 + length3 + 6); - if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ - errorCode = BIT_initDStream(&bitD1, istart1, length1); - if (HUF_isError(errorCode)) return errorCode; - errorCode = BIT_initDStream(&bitD2, istart2, length2); - if (HUF_isError(errorCode)) return errorCode; - errorCode = BIT_initDStream(&bitD3, istart3, length3); - if (HUF_isError(errorCode)) return errorCode; - errorCode = BIT_initDStream(&bitD4, istart4, length4); - if (HUF_isError(errorCode)) return errorCode; + const U32 dtLog = DTable[0]; + const void* const ddPtr = DTable+1; + const HUF_DDescX6* dd = (const HUF_DDescX6*)ddPtr; + const void* const dsPtr = DTable + 1 + ((size_t)1<<(dtLog-1)); + const HUF_DSeqX6* ds = (const HUF_DSeqX6*)dsPtr; + size_t errorCode; - /* 16-64 symbols per loop (4-16 symbols per stream) */ - endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); - for ( ; (op3 <= opStart4) && (endSignal==BIT_DStream_unfinished) && (op4<=(oend-16)) ; ) { - HUF_DECODE_SYMBOLX6_2(op1, &bitD1); - HUF_DECODE_SYMBOLX6_2(op2, &bitD2); - HUF_DECODE_SYMBOLX6_2(op3, &bitD3); - HUF_DECODE_SYMBOLX6_2(op4, &bitD4); - HUF_DECODE_SYMBOLX6_1(op1, &bitD1); - HUF_DECODE_SYMBOLX6_1(op2, &bitD2); - HUF_DECODE_SYMBOLX6_1(op3, &bitD3); - HUF_DECODE_SYMBOLX6_1(op4, &bitD4); - HUF_DECODE_SYMBOLX6_2(op1, &bitD1); - HUF_DECODE_SYMBOLX6_2(op2, &bitD2); - HUF_DECODE_SYMBOLX6_2(op3, &bitD3); - HUF_DECODE_SYMBOLX6_2(op4, &bitD4); - HUF_DECODE_SYMBOLX6_0(op1, &bitD1); - HUF_DECODE_SYMBOLX6_0(op2, &bitD2); - HUF_DECODE_SYMBOLX6_0(op3, &bitD3); - HUF_DECODE_SYMBOLX6_0(op4, &bitD4); + /* Init */ + BIT_DStream_t bitD1; + BIT_DStream_t bitD2; + BIT_DStream_t bitD3; + BIT_DStream_t bitD4; + const size_t length1 = MEM_readLE16(istart); + const size_t length2 = MEM_readLE16(istart+2); + const size_t length3 = MEM_readLE16(istart+4); + size_t length4; + const BYTE* const istart1 = istart + 6; /* jumpTable */ + const BYTE* const istart2 = istart1 + length1; + const BYTE* const istart3 = istart2 + length2; + const BYTE* const istart4 = istart3 + length3; + const size_t segmentSize = (dstSize+3) / 4; + BYTE* const opStart2 = ostart + segmentSize; + BYTE* const opStart3 = opStart2 + segmentSize; + BYTE* const opStart4 = opStart3 + segmentSize; + BYTE* op1 = ostart; + BYTE* op2 = opStart2; + BYTE* op3 = opStart3; + BYTE* op4 = opStart4; + U32 endSignal; + length4 = cSrcSize - (length1 + length2 + length3 + 6); + if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ + errorCode = BIT_initDStream(&bitD1, istart1, length1); + if (HUF_isError(errorCode)) return errorCode; + errorCode = BIT_initDStream(&bitD2, istart2, length2); + if (HUF_isError(errorCode)) return errorCode; + errorCode = BIT_initDStream(&bitD3, istart3, length3); + if (HUF_isError(errorCode)) return errorCode; + errorCode = BIT_initDStream(&bitD4, istart4, length4); + if (HUF_isError(errorCode)) return errorCode; + + /* 16-64 symbols per loop (4-16 symbols per stream) */ endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); + for ( ; (op3 <= opStart4) && (endSignal==BIT_DStream_unfinished) && (op4<=(oend-16)) ; ) { + HUF_DECODE_SYMBOLX6_2(op1, &bitD1); + HUF_DECODE_SYMBOLX6_2(op2, &bitD2); + HUF_DECODE_SYMBOLX6_2(op3, &bitD3); + HUF_DECODE_SYMBOLX6_2(op4, &bitD4); + HUF_DECODE_SYMBOLX6_1(op1, &bitD1); + HUF_DECODE_SYMBOLX6_1(op2, &bitD2); + HUF_DECODE_SYMBOLX6_1(op3, &bitD3); + HUF_DECODE_SYMBOLX6_1(op4, &bitD4); + HUF_DECODE_SYMBOLX6_2(op1, &bitD1); + HUF_DECODE_SYMBOLX6_2(op2, &bitD2); + HUF_DECODE_SYMBOLX6_2(op3, &bitD3); + HUF_DECODE_SYMBOLX6_2(op4, &bitD4); + HUF_DECODE_SYMBOLX6_0(op1, &bitD1); + HUF_DECODE_SYMBOLX6_0(op2, &bitD2); + HUF_DECODE_SYMBOLX6_0(op3, &bitD3); + HUF_DECODE_SYMBOLX6_0(op4, &bitD4); + + endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); + } + + /* check corruption */ + if (op1 > opStart2) return ERROR(corruption_detected); + if (op2 > opStart3) return ERROR(corruption_detected); + if (op3 > opStart4) return ERROR(corruption_detected); + /* note : op4 supposed already verified within main loop */ + + /* finish bitStreams one by one */ + HUF_decodeStreamX6(op1, &bitD1, opStart2, DTable, dtLog); + HUF_decodeStreamX6(op2, &bitD2, opStart3, DTable, dtLog); + HUF_decodeStreamX6(op3, &bitD3, opStart4, DTable, dtLog); + HUF_decodeStreamX6(op4, &bitD4, oend, DTable, dtLog); + + /* check */ + endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); + if (!endSignal) return ERROR(corruption_detected); + + /* decoded size */ + return dstSize; } - - /* check corruption */ - if (op1 > opStart2) return ERROR(corruption_detected); - if (op2 > opStart3) return ERROR(corruption_detected); - if (op3 > opStart4) return ERROR(corruption_detected); - /* note : op4 supposed already verified within main loop */ - - /* finish bitStreams one by one */ - HUF_decodeStreamX6(op1, &bitD1, opStart2, DTable, dtLog); - HUF_decodeStreamX6(op2, &bitD2, opStart3, DTable, dtLog); - HUF_decodeStreamX6(op3, &bitD3, opStart4, DTable, dtLog); - HUF_decodeStreamX6(op4, &bitD4, oend, DTable, dtLog); - - /* check */ - endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); - if (!endSignal) return ERROR(corruption_detected); - - /* decoded size */ - return dstSize; } From 6291c54006e21abd4b516ec0a96488c4056ead46 Mon Sep 17 00:00:00 2001 From: inikep Date: Fri, 19 Feb 2016 18:24:14 +0100 Subject: [PATCH 008/247] litlen bounded to 128 bytes --- lib/zstd_internal.h | 4 ++-- lib/zstd_opt.h | 32 ++++++++++++++++++-------------- 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/lib/zstd_internal.h b/lib/zstd_internal.h index 3d60e87b..620572e5 100644 --- a/lib/zstd_internal.h +++ b/lib/zstd_internal.h @@ -50,7 +50,7 @@ /*-************************************* * Common constants ***************************************/ -#define ZSTD_OPT_DEBUG 0 // 1 = tableID=0; 5 = check encoded sequences; 9 = full logs +#define ZSTD_OPT_DEBUG 1 // 1 = tableID=0; 5 = check encoded sequences; 9 = full logs #if ZSTD_OPT_DEBUG > 0 #include /* for debug */ #endif @@ -79,7 +79,7 @@ static const size_t ZSTD_frameHeaderSize_min = 5; #define IS_RAW 2 #define IS_RLE 3 -#define MINMATCH 4 +#define MINMATCH 3 #define REPCODE_STARTVALUE 1 #define Litbits 8 diff --git a/lib/zstd_opt.h b/lib/zstd_opt.h index 98ed77d6..a25e4642 100644 --- a/lib/zstd_opt.h +++ b/lib/zstd_opt.h @@ -107,9 +107,9 @@ FORCE_INLINE U32 ZSTD_getPrice(seqStore_t* seqStorePtr, U32 litLength, const BYT price += ZSTD_highbit(seqStorePtr->matchLengthSum) - ZSTD_highbit(seqStorePtr->matchLengthFreq[matchLength]); if (!litLength) - return price + 1; /* special case */ + return price + 1 + ((seqStorePtr->litSum>>4) / seqStorePtr->litLengthSum) + (matchLength==0); - return price + ZSTD_getLiteralPrice(seqStorePtr, litLength, literals); //((seqStorePtr->litSum>>4) / seqStorePtr->litLengthSum) + (matchLength==0) + (litLength==0) + price; + return price + ZSTD_getLiteralPrice(seqStorePtr, litLength, literals) + ((seqStorePtr->litSum>>4) / seqStorePtr->litLengthSum) + (matchLength==0); } @@ -536,6 +536,7 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, const BYTE* const istart = (const BYTE*)src; const BYTE* ip = istart; const BYTE* anchor = istart; + const BYTE* litstart; const BYTE* const iend = istart + srcSize; const BYTE* const ilimit = iend - 8; const BYTE* const base = ctx->base + ctx->dictLimit; @@ -571,7 +572,8 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, memset(opt, 0, sizeof(ZSTD_optimal_t)); last_pos = 0; inr = ip; - opt[0].litlen = (U32)(ip - anchor); + litstart = ((U32)(ip - anchor) > 128) ? ip - 128 : anchor; + opt[0].litlen = (U32)(ip - litstart); /* check repCode */ if (MEM_readMINMATCH(ip+1) == MEM_readMINMATCH(ip+1 - rep_1)) { @@ -586,7 +588,7 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, litlen = opt[0].litlen + 1; do { - price = ZSTD_getPrice(seqStorePtr, litlen, anchor, 0, mlen); + price = ZSTD_getPrice(seqStorePtr, litlen, litstart, 0, mlen); if (mlen + 1 > last_pos || price < opt[mlen + 1].price) SET_PRICE(mlen + 1, mlen, 0, litlen, price); /* note : macro modifies last_pos */ mlen--; @@ -622,7 +624,7 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, ZSTD_LOG_PARSER("%d: start Found mlen=%d off=%d best_mlen=%d last_pos=%d\n", (int)(ip-base), matches[u].len, matches[u].off, (int)best_mlen, (int)last_pos); litlen = opt[0].litlen; while (mlen <= best_mlen) { - price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off, mlen); + price = ZSTD_getPrice(seqStorePtr, litlen, litstart, matches[u].off, mlen); if (mlen > last_pos || price < opt[mlen].price) SET_PRICE(mlen, mlen, matches[u].off, litlen, price); mlen++; @@ -641,7 +643,7 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, price = opt[cur - litlen].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr-litlen); ZSTD_LOG_TRY_PRICE("%d: TRY1 opt[%d].price=%d price=%d cur=%d litlen=%d\n", (int)(inr-base), cur - litlen, opt[cur - litlen].price, price, cur, litlen); } else - price = ZSTD_getLiteralPrice(seqStorePtr, litlen, anchor); + price = ZSTD_getLiteralPrice(seqStorePtr, litlen, litstart); } else { litlen = 1; price = opt[cur - 1].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr-1); @@ -705,7 +707,7 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr-litlen, 0, mlen); ZSTD_LOG_TRY_PRICE("%d: TRY5 opt[%d].price=%d price=%d cur=%d litlen=%d\n", (int)(inr-base), cur - litlen, opt[cur - litlen].price, price, cur, litlen); } else - price = ZSTD_getPrice(seqStorePtr, litlen, anchor, 0, mlen); + price = ZSTD_getPrice(seqStorePtr, litlen, litstart, 0, mlen); } else { litlen = 0; price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, 0, mlen); @@ -754,7 +756,7 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, if (cur2 > litlen) price = opt[cur2 - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, ip+cur2-litlen, matches[u].off, mlen); else - price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off, mlen); + price = ZSTD_getPrice(seqStorePtr, litlen, litstart, matches[u].off, mlen); } else { litlen = 0; price = opt[cur2].price + ZSTD_getPrice(seqStorePtr, 0, NULL, matches[u].off, mlen); @@ -883,6 +885,7 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, const BYTE* const istart = (const BYTE*)src; const BYTE* ip = istart; const BYTE* anchor = istart; + const BYTE* litstart; const BYTE* const iend = istart + srcSize; const BYTE* const ilimit = iend - 8; const BYTE* const base = ctx->base; @@ -921,7 +924,8 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, memset(opt, 0, sizeof(ZSTD_optimal_t)); last_pos = 0; inr = ip; - opt[0].litlen = (U32)(ip - anchor); + litstart = ((U32)(ip - anchor) > 128) ? ip - 128 : anchor; + opt[0].litlen = (U32)(ip - litstart); /* check repCode */ { @@ -942,7 +946,7 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, litlen = opt[0].litlen + 1; do { - price = ZSTD_getPrice(seqStorePtr, litlen, anchor, 0, mlen); + price = ZSTD_getPrice(seqStorePtr, litlen, litstart, 0, mlen); if (mlen + 1 > last_pos || price < opt[mlen + 1].price) SET_PRICE(mlen + 1, mlen, 0, litlen, price); mlen--; @@ -978,7 +982,7 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, ZSTD_LOG_PARSER("%d: start Found mlen=%d off=%d best_mlen=%d last_pos=%d\n", (int)(ip-base), matches[u].len, matches[u].off, (int)best_mlen, (int)last_pos); litlen = opt[0].litlen; while (mlen <= best_mlen) { - price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off, mlen); + price = ZSTD_getPrice(seqStorePtr, litlen, litstart, matches[u].off, mlen); if (mlen > last_pos || price < opt[mlen].price) SET_PRICE(mlen, mlen, matches[u].off, litlen, price); mlen++; @@ -1000,7 +1004,7 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, price = opt[cur - litlen].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr-litlen); ZSTD_LOG_TRY_PRICE("%d: TRY1 opt[%d].price=%d price=%d cur=%d litlen=%d\n", (int)(inr-base), cur - litlen, opt[cur - litlen].price, price, cur, litlen); } else - price = ZSTD_getLiteralPrice(seqStorePtr, litlen, anchor); + price = ZSTD_getLiteralPrice(seqStorePtr, litlen, litstart); } else { litlen = 1; price = opt[cur - 1].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr-1); @@ -1070,7 +1074,7 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr-litlen, 0, mlen); ZSTD_LOG_TRY_PRICE("%d: TRY5 opt[%d].price=%d price=%d cur=%d litlen=%d\n", (int)(inr-base), cur - litlen, opt[cur - litlen].price, price, cur, litlen); } else - price = ZSTD_getPrice(seqStorePtr, litlen, anchor, 0, mlen); + price = ZSTD_getPrice(seqStorePtr, litlen, litstart, 0, mlen); } else { litlen = 0; price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, 0, mlen); @@ -1120,7 +1124,7 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, if (cur2 > litlen) price = opt[cur2 - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, ip+cur2-litlen, matches[u].off, mlen); else - price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off, mlen); + price = ZSTD_getPrice(seqStorePtr, litlen, litstart, matches[u].off, mlen); } else { litlen = 0; price = opt[cur2].price + ZSTD_getPrice(seqStorePtr, 0, NULL, matches[u].off, mlen); From d3b8d7a4e533eeb29a4df93cf6ef870a4a1bb9ad Mon Sep 17 00:00:00 2001 From: inikep Date: Mon, 22 Feb 2016 10:06:17 +0100 Subject: [PATCH 009/247] removed ZSTD_opt parser --- lib/zstd_compress.c | 25 ++----- lib/zstd_opt.h | 174 ++++---------------------------------------- lib/zstd_static.h | 2 +- 3 files changed, 21 insertions(+), 180 deletions(-) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index 26898c59..8c917752 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -1657,14 +1657,9 @@ _storeSequence: } -static void ZSTD_compressBlock_opt_bt(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +static void ZSTD_compressBlock_btopt(ZSTD_CCtx* ctx, const void* src, size_t srcSize) { - ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 1, 2); -} - -static void ZSTD_compressBlock_opt(ZSTD_CCtx* ctx, const void* src, size_t srcSize) -{ - ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 0, 2); + ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 2); } static void ZSTD_compressBlock_btlazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize) @@ -1881,14 +1876,9 @@ static void ZSTD_compressBlock_btlazy2_extDict(ZSTD_CCtx* ctx, const void* src, ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 1, 2); } -static void ZSTD_compressBlock_opt_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +static void ZSTD_compressBlock_btopt_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) { - ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize, 0, 2); -} - -static void ZSTD_compressBlock_opt_bt_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) -{ - ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize, 1, 2); + ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize, 2); } @@ -1896,9 +1886,9 @@ typedef void (*ZSTD_blockCompressor) (ZSTD_CCtx* ctx, const void* src, size_t sr static ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict) { - static const ZSTD_blockCompressor blockCompressor[2][7] = { - { ZSTD_compressBlock_fast, ZSTD_compressBlock_greedy, ZSTD_compressBlock_lazy,ZSTD_compressBlock_lazy2, ZSTD_compressBlock_btlazy2, ZSTD_compressBlock_opt, ZSTD_compressBlock_opt_bt }, - { ZSTD_compressBlock_fast_extDict, ZSTD_compressBlock_greedy_extDict, ZSTD_compressBlock_lazy_extDict,ZSTD_compressBlock_lazy2_extDict, ZSTD_compressBlock_btlazy2_extDict, ZSTD_compressBlock_opt_extDict, ZSTD_compressBlock_opt_bt_extDict } + static const ZSTD_blockCompressor blockCompressor[2][6] = { + { ZSTD_compressBlock_fast, ZSTD_compressBlock_greedy, ZSTD_compressBlock_lazy, ZSTD_compressBlock_lazy2, ZSTD_compressBlock_btlazy2, ZSTD_compressBlock_btopt }, + { ZSTD_compressBlock_fast_extDict, ZSTD_compressBlock_greedy_extDict, ZSTD_compressBlock_lazy_extDict,ZSTD_compressBlock_lazy2_extDict, ZSTD_compressBlock_btlazy2_extDict, ZSTD_compressBlock_btopt_extDict } }; return blockCompressor[extDict][(U32)strat]; @@ -2062,7 +2052,6 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_CCtx* zc, const void* src, size_t case ZSTD_greedy: case ZSTD_lazy: case ZSTD_lazy2: - case ZSTD_opt: ZSTD_insertAndFindFirstIndex (zc, iend-8, zc->params.searchLength); break; diff --git a/lib/zstd_opt.h b/lib/zstd_opt.h index a25e4642..accefe52 100644 --- a/lib/zstd_opt.h +++ b/lib/zstd_opt.h @@ -398,139 +398,13 @@ static U32 ZSTD_BtGetAllMatches_selectMLS_extDict ( } -/* *********************** -* Hash Chain -*************************/ -FORCE_INLINE /* inlining is important to hardwire a hot branch (template emulation) */ -U32 ZSTD_HcGetAllMatches_generic ( - ZSTD_CCtx* zc, /* Index table will be updated */ - const BYTE* const ip, const BYTE* const iLowLimit, const BYTE* const iHighLimit, - const U32 maxNbAttempts, const U32 mls, const U32 extDict, ZSTD_match_t* matches) -{ - U32* const chainTable = zc->contentTable; - const U32 chainSize = (1U << zc->params.contentLog); - const U32 chainMask = chainSize-1; - const BYTE* const base = zc->base; - const BYTE* const dictBase = zc->dictBase; - const U32 dictLimit = zc->dictLimit; - const BYTE* const prefixStart = base + dictLimit; - const BYTE* const dictEnd = dictBase + dictLimit; - const BYTE* const dictStart = dictBase + zc->lowLimit; - const U32 lowLimit = zc->lowLimit; - const U32 current = (U32)(ip-base); - const U32 minChain = current > chainSize ? current - chainSize : 0; - U32 matchIndex; - U32 mnum = 0; - const BYTE* match; - U32 nbAttempts=maxNbAttempts; - size_t minml=MINMATCH-1; - - /* HC4 match finder with update */ - matchIndex = ZSTD_insertAndFindFirstIndex (zc, ip, mls); - -#if MINMATCH == 3 - /* HC3 match finder */ - U32 matchIndex3 = zc->hashTable3[ZSTD_hash3Ptr(ip, zc->params.hashLog3)]; - - if (matchIndex3>lowLimit) { - size_t currentMl=0; - if ((!extDict) || matchIndex3 >= dictLimit) { - match = base + matchIndex3; - if (match[minml] == ip[minml]) currentMl = ZSTD_count(ip, match, iHighLimit); // faster - //if (MEM_readMINMATCH(match) == MEM_readMINMATCH(ip)) currentMl = ZSTD_count(ip+MINMATCH, match+MINMATCH, iHighLimit)+MINMATCH; // stronger - } else { - match = dictBase + matchIndex3; - if (MEM_readMINMATCH(match) == MEM_readMINMATCH(ip)) /* assumption : matchIndex3 <= dictLimit-4 (by table construction) */ - currentMl = ZSTD_count_2segments(ip+MINMATCH, match+MINMATCH, iHighLimit, dictEnd, prefixStart) + MINMATCH; - } - - /* save best solution */ - if (currentMl > minml) { - minml = currentMl; - matches[mnum].off = current - matchIndex3; - matches[mnum].len = (U32)currentMl; - matches[mnum].back = 0; - mnum++; - if (currentMl > ZSTD_OPT_NUM) return mnum; - if (ip+currentMl == iHighLimit) return mnum; /* best possible, and avoid read overflow*/ - } - } -#endif - - while ((matchIndex>lowLimit) && (nbAttempts)) { - size_t currentMl=0; - int back = 0; - nbAttempts--; - if ((!extDict) || matchIndex >= dictLimit) { - match = base + matchIndex; - if (match[minml] == ip[minml]) currentMl = ZSTD_count(ip, match, iHighLimit); if (currentMl>0) { // faster - //if (MEM_readMINMATCH(match) == MEM_readMINMATCH(ip)) { currentMl = ZSTD_count(ip+MINMATCH, match+MINMATCH, iHighLimit)+MINMATCH; // stronger - while ((match+back > prefixStart) && (ip+back > iLowLimit) && (ip[back-1] == match[back-1])) back--; - currentMl += (U32)(-back); - } - } else { - match = dictBase + matchIndex; - if (MEM_readMINMATCH(match) == MEM_readMINMATCH(ip)) { /* assumption : matchIndex <= dictLimit-4 (by table construction) */ - currentMl = ZSTD_count_2segments(ip+MINMATCH, match+MINMATCH, iHighLimit, dictEnd, prefixStart) + MINMATCH; - while ((match+back > dictStart) && (ip+back > iLowLimit) && (ip[back-1] == match[back-1])) back--; /* backward match extension */ - currentMl += (U32)(-back); - } } - - /* save best solution */ - if (currentMl > minml) { - minml = currentMl; - matches[mnum].off = current - matchIndex; - matches[mnum].len = (U32)currentMl; - matches[mnum].back = (U32)(-back); - mnum++; - if (currentMl > ZSTD_OPT_NUM) break; - if (ip+currentMl == iHighLimit) break; /* best possible, and avoid read overflow*/ - } - - if (matchIndex <= minChain) break; - matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask); - } - - return mnum; -} - - -static U32 ZSTD_HcGetAllMatches_selectMLS ( - ZSTD_CCtx* zc, - const BYTE* ip, const BYTE* const iLowLimit, const BYTE* const iHighLimit, - const U32 maxNbAttempts, const U32 matchLengthSearch, ZSTD_match_t* matches) -{ - switch(matchLengthSearch) - { - default : - case 4 : return ZSTD_HcGetAllMatches_generic(zc, ip, iLowLimit, iHighLimit, maxNbAttempts, 4, 0, matches); - case 5 : return ZSTD_HcGetAllMatches_generic(zc, ip, iLowLimit, iHighLimit, maxNbAttempts, 5, 0, matches); - case 6 : return ZSTD_HcGetAllMatches_generic(zc, ip, iLowLimit, iHighLimit, maxNbAttempts, 6, 0, matches); - } -} - -static U32 ZSTD_HcGetAllMatches_selectMLS_extDict ( - ZSTD_CCtx* zc, - const BYTE* ip, const BYTE* const iLowLimit, const BYTE* const iHighLimit, - const U32 maxNbAttempts, const U32 matchLengthSearch, ZSTD_match_t* matches) -{ - switch(matchLengthSearch) - { - default : - case 4 : return ZSTD_HcGetAllMatches_generic(zc, ip, iLowLimit, iHighLimit, maxNbAttempts, 4, 1, matches); - case 5 : return ZSTD_HcGetAllMatches_generic(zc, ip, iLowLimit, iHighLimit, maxNbAttempts, 5, 1, matches); - case 6 : return ZSTD_HcGetAllMatches_generic(zc, ip, iLowLimit, iHighLimit, maxNbAttempts, 6, 1, matches); - } -} - - /*-******************************* * Optimal parser *********************************/ FORCE_INLINE void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, const void* src, size_t srcSize, - const U32 searchMethod, const U32 depth) + const U32 depth) { seqStore_t* seqStorePtr = &(ctx->seqStore); const BYTE* const istart = (const BYTE*)src; @@ -545,17 +419,12 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, const U32 maxSearches = 1U << ctx->params.searchLog; const U32 mls = ctx->params.searchLength; - typedef U32 (*getAllMatches_f)(ZSTD_CCtx* zc, const BYTE* ip, const BYTE* iLowLimit, const BYTE* iHighLimit, - U32 maxNbAttempts, U32 matchLengthSearch, ZSTD_match_t* matches); - getAllMatches_f getAllMatches = searchMethod ? ZSTD_BtGetAllMatches_selectMLS : ZSTD_HcGetAllMatches_selectMLS; - ZSTD_optimal_t opt[ZSTD_OPT_NUM+4]; ZSTD_match_t matches[ZSTD_OPT_NUM+1]; const BYTE* inr; - U32 skip_num, cur, cur2, match_num, last_pos, litlen, price; + U32 cur, cur2, match_num, last_pos, litlen, price; const U32 sufficient_len = ctx->params.targetLength; - const U32 faster_get_matches = (ctx->params.strategy == ZSTD_opt); /* init */ ctx->nextToUpdate3 = ctx->nextToUpdate; @@ -595,10 +464,7 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, } while (mlen >= MINMATCH); } - if (faster_get_matches && last_pos) - match_num = 0; - else - match_num = getAllMatches(ctx, ip, ip, iend, maxSearches, mls, matches); /* first search (depth 0) */ + match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, ip, ip, iend, maxSearches, mls, matches); /* first search (depth 0) */ ZSTD_LOG_PARSER("%d: match_num=%d last_pos=%d\n", (int)(ip-base), match_num, last_pos); if (!last_pos && !match_num) { ip++; continue; } @@ -633,7 +499,7 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, if (last_pos < MINMATCH) { ip++; continue; } /* check further positions */ - for (skip_num = 0, cur = 1; cur <= last_pos; cur++) { + for (cur = 1; cur <= last_pos; cur++) { size_t cur_rep; inr = ip + cur; @@ -715,8 +581,7 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, } best_mlen = mlen; - if (faster_get_matches) skip_num = best_mlen; - ZSTD_LOG_PARSER("%d: Found REP mlen=%d off=%d price=%d litlen=%d\n", (int)(inr-base), mlen, 0, price, litlen); + ZSTD_LOG_PARSER("%d: Found REP mlen=%d off=%d price=%d litlen=%d\n", (int)(inr-base), mlen, 0, price, litlen); do { if (cur + mlen > last_pos || price <= opt[cur + mlen].price) @@ -725,9 +590,7 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, } while (mlen >= MINMATCH); } - if (faster_get_matches && skip_num > 0) { skip_num--; continue; } - - match_num = getAllMatches(ctx, inr, ip, iend, maxSearches, mls, matches); + match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, inr, ip, iend, maxSearches, mls, matches); ZSTD_LOG_PARSER("%d: ZSTD_GetAllMatches match_num=%d\n", (int)(inr-base), match_num); if (match_num > 0 && matches[match_num-1].len > sufficient_len) { @@ -769,7 +632,7 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, SET_PRICE(cur2 + mlen, mlen, matches[u].off, litlen, price); mlen++; - } } } // for (skip_num = 0, cur = 1; cur <= last_pos; cur++) + } } } // for (cur = 1; cur <= last_pos; cur++) best_mlen = opt[last_pos].mlen; best_off = opt[last_pos].off; @@ -879,7 +742,7 @@ _storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */ FORCE_INLINE void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, const void* src, size_t srcSize, - const U32 searchMethod, const U32 depth) + const U32 depth) { seqStore_t* seqStorePtr = &(ctx->seqStore); const BYTE* const istart = (const BYTE*)src; @@ -898,17 +761,12 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, const U32 maxSearches = 1U << ctx->params.searchLog; const U32 mls = ctx->params.searchLength; - typedef U32 (*getAllMatches_f)(ZSTD_CCtx* zc, const BYTE* ip, const BYTE* iLowLimit, const BYTE* iHighLimit, - U32 maxNbAttempts, U32 matchLengthSearch, ZSTD_match_t* matches); - getAllMatches_f getAllMatches = searchMethod ? ZSTD_BtGetAllMatches_selectMLS_extDict : ZSTD_HcGetAllMatches_selectMLS_extDict; - ZSTD_optimal_t opt[ZSTD_OPT_NUM+4]; ZSTD_match_t matches[ZSTD_OPT_NUM+1]; const BYTE* inr; - U32 skip_num, cur, cur2, match_num, last_pos, litlen, price; + U32 cur, cur2, match_num, last_pos, litlen, price; const U32 sufficient_len = ctx->params.targetLength; - const U32 faster_get_matches = (ctx->params.strategy == ZSTD_opt); /* init */ ctx->nextToUpdate3 = ctx->nextToUpdate; @@ -955,10 +813,7 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, best_mlen = (last_pos) ? last_pos : MINMATCH; - if (faster_get_matches && last_pos) - match_num = 0; - else - match_num = getAllMatches(ctx, ip, ip, iend, maxSearches, mls, matches); /* first search (depth 0) */ + match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, ip, ip, iend, maxSearches, mls, matches); /* first search (depth 0) */ ZSTD_LOG_PARSER("%d: match_num=%d last_pos=%d\n", (int)(ip-base), match_num, last_pos); if (!last_pos && !match_num) { ip++; continue; } @@ -994,7 +849,7 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, } /* check further positions */ - for (skip_num = 0, cur = 1; cur <= last_pos; cur++) { + for (cur = 1; cur <= last_pos; cur++) { size_t cur_rep; inr = ip + cur; @@ -1082,7 +937,6 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, } best_mlen = mlen; - if (faster_get_matches) skip_num = best_mlen; ZSTD_LOG_PARSER("%d: Found REP mlen=%d off=%d price=%d litlen=%d\n", (int)(inr-base), mlen, 0, price, litlen); @@ -1093,11 +947,9 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, } while (mlen >= MINMATCH); } - if (faster_get_matches && skip_num > 0) { skip_num--; continue; } - best_mlen = (best_mlen > MINMATCH) ? best_mlen : MINMATCH; - match_num = getAllMatches(ctx, inr, ip, iend, maxSearches, mls, matches); + match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, inr, ip, iend, maxSearches, mls, matches); ZSTD_LOG_PARSER("%d: ZSTD_GetAllMatches match_num=%d\n", (int)(inr-base), match_num); if (match_num > 0 && matches[match_num-1].len > sufficient_len) { @@ -1137,7 +989,7 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, SET_PRICE(cur2 + mlen, mlen, matches[u].off, litlen, price); mlen++; - } } } // for (skip_num = 0, cur = 1; cur <= last_pos; cur++) + } } } // for (cur = 1; cur <= last_pos; cur++) best_mlen = opt[last_pos].mlen; best_off = opt[last_pos].off; diff --git a/lib/zstd_static.h b/lib/zstd_static.h index 6a92a458..692b147d 100644 --- a/lib/zstd_static.h +++ b/lib/zstd_static.h @@ -74,7 +74,7 @@ extern "C" { #define ZSTD_TARGETLENGTH_MAX 999 /* from faster to stronger */ -typedef enum { ZSTD_fast, ZSTD_greedy, ZSTD_lazy, ZSTD_lazy2, ZSTD_btlazy2, ZSTD_opt, ZSTD_btopt } ZSTD_strategy; +typedef enum { ZSTD_fast, ZSTD_greedy, ZSTD_lazy, ZSTD_lazy2, ZSTD_btlazy2, ZSTD_btopt } ZSTD_strategy; typedef struct { From c950b78ce2a82a000bc412edc412d3325fa47f12 Mon Sep 17 00:00:00 2001 From: inikep Date: Mon, 22 Feb 2016 10:11:39 +0100 Subject: [PATCH 010/247] removed backward match extension --- lib/zstd_opt.h | 58 +++++++++++++++++++++----------------------------- 1 file changed, 24 insertions(+), 34 deletions(-) diff --git a/lib/zstd_opt.h b/lib/zstd_opt.h index accefe52..9b1fffd7 100644 --- a/lib/zstd_opt.h +++ b/lib/zstd_opt.h @@ -279,7 +279,6 @@ static U32 ZSTD_insertBtAndGetAllMatches ( bestLength = currentMl; matches[mnum].off = current - matchIndex3; matches[mnum].len = (U32)currentMl; - matches[mnum].back = 0; mnum++; if (currentMl > ZSTD_OPT_NUM) return mnum; if (ip+currentMl == iLimit) return mnum; /* best possible, and avoid read overflow*/ @@ -313,7 +312,6 @@ static U32 ZSTD_insertBtAndGetAllMatches ( bestLength = matchLength; matches[mnum].off = current - matchIndex; matches[mnum].len = (U32)matchLength; - matches[mnum].back = 0; mnum++; if (matchLength > ZSTD_OPT_NUM) break; if (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */ @@ -422,7 +420,7 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, ZSTD_optimal_t opt[ZSTD_OPT_NUM+4]; ZSTD_match_t matches[ZSTD_OPT_NUM+1]; const BYTE* inr; - U32 cur, cur2, match_num, last_pos, litlen, price; + U32 cur, match_num, last_pos, litlen, price; const U32 sufficient_len = ctx->params.targetLength; @@ -594,7 +592,6 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, ZSTD_LOG_PARSER("%d: ZSTD_GetAllMatches match_num=%d\n", (int)(inr-base), match_num); if (match_num > 0 && matches[match_num-1].len > sufficient_len) { - cur -= matches[match_num-1].back; best_mlen = matches[match_num-1].len; best_off = matches[match_num-1].off; last_pos = cur + 1; @@ -606,30 +603,27 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, /* set prices using matches at position = cur */ for (u = 0; u < match_num; u++) { mlen = (u>0) ? matches[u-1].len+1 : best_mlen; - cur2 = cur - matches[u].back; - best_mlen = (cur2 + matches[u].len < ZSTD_OPT_NUM) ? matches[u].len : ZSTD_OPT_NUM - cur2; + best_mlen = (cur + matches[u].len < ZSTD_OPT_NUM) ? matches[u].len : ZSTD_OPT_NUM - cur; - ZSTD_LOG_PARSER("%d: Found1 cur=%d cur2=%d mlen=%d off=%d best_mlen=%d last_pos=%d\n", (int)(inr-base), cur, cur2, matches[u].len, matches[u].off, best_mlen, last_pos); - if (mlen < matches[u].back + 1) - mlen = matches[u].back + 1; + ZSTD_LOG_PARSER("%d: Found1 cur=%d mlen=%d off=%d best_mlen=%d last_pos=%d\n", (int)(inr-base), cur, matches[u].len, matches[u].off, best_mlen, last_pos); while (mlen <= best_mlen) { - if (opt[cur2].mlen == 1) { - litlen = opt[cur2].litlen; - if (cur2 > litlen) - price = opt[cur2 - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, ip+cur2-litlen, matches[u].off, mlen); + if (opt[cur].mlen == 1) { + litlen = opt[cur].litlen; + if (cur > litlen) + price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, ip+cur-litlen, matches[u].off, mlen); else price = ZSTD_getPrice(seqStorePtr, litlen, litstart, matches[u].off, mlen); } else { litlen = 0; - price = opt[cur2].price + ZSTD_getPrice(seqStorePtr, 0, NULL, matches[u].off, mlen); + price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, matches[u].off, mlen); } - ZSTD_LOG_PARSER("%d: Found2 pred=%d mlen=%d best_mlen=%d off=%d price=%d litlen=%d\n", (int)(inr-base), matches[u].back, mlen, best_mlen, matches[u].off, price, litlen); - ZSTD_LOG_TRY_PRICE("%d: TRY8 price=%d opt[%d].price=%d\n", (int)(inr-base), price, cur2 + mlen, opt[cur2 + mlen].price); + ZSTD_LOG_PARSER("%d: Found2 mlen=%d best_mlen=%d off=%d price=%d litlen=%d\n", (int)(inr-base), mlen, best_mlen, matches[u].off, price, litlen); + ZSTD_LOG_TRY_PRICE("%d: TRY8 price=%d opt[%d].price=%d\n", (int)(inr-base), price, cur + mlen, opt[cur + mlen].price); - if (cur2 + mlen > last_pos || (price < opt[cur2 + mlen].price)) - SET_PRICE(cur2 + mlen, mlen, matches[u].off, litlen, price); + if (cur + mlen > last_pos || (price < opt[cur + mlen].price)) + SET_PRICE(cur + mlen, mlen, matches[u].off, litlen, price); mlen++; } } } // for (cur = 1; cur <= last_pos; cur++) @@ -764,7 +758,7 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, ZSTD_optimal_t opt[ZSTD_OPT_NUM+4]; ZSTD_match_t matches[ZSTD_OPT_NUM+1]; const BYTE* inr; - U32 cur, cur2, match_num, last_pos, litlen, price; + U32 cur, match_num, last_pos, litlen, price; const U32 sufficient_len = ctx->params.targetLength; @@ -953,7 +947,6 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, ZSTD_LOG_PARSER("%d: ZSTD_GetAllMatches match_num=%d\n", (int)(inr-base), match_num); if (match_num > 0 && matches[match_num-1].len > sufficient_len) { - cur -= matches[match_num-1].back; best_mlen = matches[match_num-1].len; best_off = matches[match_num-1].off; last_pos = cur + 1; @@ -963,30 +956,27 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, // set prices using matches at position = cur for (u = 0; u < match_num; u++) { mlen = (u>0) ? matches[u-1].len+1 : best_mlen; - cur2 = cur - matches[u].back; - best_mlen = (cur2 + matches[u].len < ZSTD_OPT_NUM) ? matches[u].len : ZSTD_OPT_NUM - cur2; + best_mlen = (cur + matches[u].len < ZSTD_OPT_NUM) ? matches[u].len : ZSTD_OPT_NUM - cur; - ZSTD_LOG_PARSER("%d: Found1 cur=%d cur2=%d mlen=%d off=%d best_mlen=%d last_pos=%d\n", (int)(inr-base), cur, cur2, matches[u].len, matches[u].off, best_mlen, last_pos); - if (mlen < matches[u].back + 1) - mlen = matches[u].back + 1; + ZSTD_LOG_PARSER("%d: Found1 cur=%d mlen=%d off=%d best_mlen=%d last_pos=%d\n", (int)(inr-base), cur, matches[u].len, matches[u].off, best_mlen, last_pos); while (mlen <= best_mlen) { - if (opt[cur2].mlen == 1) { - litlen = opt[cur2].litlen; - if (cur2 > litlen) - price = opt[cur2 - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, ip+cur2-litlen, matches[u].off, mlen); + if (opt[cur].mlen == 1) { + litlen = opt[cur].litlen; + if (cur > litlen) + price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, ip+cur-litlen, matches[u].off, mlen); else price = ZSTD_getPrice(seqStorePtr, litlen, litstart, matches[u].off, mlen); } else { litlen = 0; - price = opt[cur2].price + ZSTD_getPrice(seqStorePtr, 0, NULL, matches[u].off, mlen); + price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, matches[u].off, mlen); } - ZSTD_LOG_PARSER("%d: Found2 pred=%d mlen=%d best_mlen=%d off=%d price=%d litlen=%d\n", (int)(inr-base), matches[u].back, mlen, best_mlen, matches[u].off, price, litlen); - ZSTD_LOG_TRY_PRICE("%d: TRY8 price=%d opt[%d].price=%d\n", (int)(inr-base), price, cur2 + mlen, opt[cur2 + mlen].price); + ZSTD_LOG_PARSER("%d: Found2 mlen=%d best_mlen=%d off=%d price=%d litlen=%d\n", (int)(inr-base), mlen, best_mlen, matches[u].off, price, litlen); + ZSTD_LOG_TRY_PRICE("%d: TRY8 price=%d opt[%d].price=%d\n", (int)(inr-base), price, cur + mlen, opt[cur + mlen].price); - if (cur2 + mlen > last_pos || (price < opt[cur2 + mlen].price)) - SET_PRICE(cur2 + mlen, mlen, matches[u].off, litlen, price); + if (cur + mlen > last_pos || (price < opt[cur + mlen].price)) + SET_PRICE(cur + mlen, mlen, matches[u].off, litlen, price); mlen++; } } } // for (cur = 1; cur <= last_pos; cur++) From 84f43e235979e75fa6c42b73faac535c75ad3398 Mon Sep 17 00:00:00 2001 From: inikep Date: Mon, 22 Feb 2016 11:34:07 +0100 Subject: [PATCH 011/247] support for searchLength from 3 to 7 for optimal parser --- lib/zstd_compress.c | 23 ++- lib/zstd_internal.h | 2 +- lib/zstd_opt.h | 327 ++++++++++------------------------------ lib/zstd_opt_internal.h | 231 ++++++++++++++++++++++++++++ lib/zstd_static.h | 2 +- 5 files changed, 321 insertions(+), 264 deletions(-) create mode 100644 lib/zstd_opt_internal.h diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index 8c917752..8e572b4a 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -1369,6 +1369,7 @@ static void ZSTD_updateTree_extDict(ZSTD_CCtx* zc, const BYTE* const ip, const B while (idx < target) idx += ZSTD_insertBt1(zc, base+idx, mls, iend, nbCompares, 1); } +#include "zstd_opt_internal.h" /** Tree updater, providing best match */ static size_t ZSTD_BtFindBestMatch_extDict ( @@ -1410,10 +1411,6 @@ static size_t ZSTD_BtFindBestMatch_selectMLS_extDict ( FORCE_INLINE U32 ZSTD_insertAndFindFirstIndex (ZSTD_CCtx* zc, const BYTE* ip, U32 mls) { -#if MINMATCH == 3 - U32* const hashTable3 = zc->hashTable3; - const U32 hashLog3 = zc->params.hashLog3; -#endif U32* const hashTable = zc->hashTable; const U32 hashLog = zc->params.hashLog; U32* const chainTable = zc->contentTable; @@ -1426,9 +1423,6 @@ U32 ZSTD_insertAndFindFirstIndex (ZSTD_CCtx* zc, const BYTE* ip, U32 mls) size_t h = ZSTD_hashPtr(base+idx, hashLog, mls); NEXT_IN_CHAIN(idx, chainMask) = hashTable[h]; hashTable[h] = idx; -#if MINMATCH == 3 - hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx; -#endif idx++; } @@ -1436,8 +1430,6 @@ U32 ZSTD_insertAndFindFirstIndex (ZSTD_CCtx* zc, const BYTE* ip, U32 mls) return hashTable[ZSTD_hashPtr(ip, hashLog, mls)]; } -#include "zstd_opt.h" - FORCE_INLINE /* inlining is important to hardwire a hot branch (template emulation) */ size_t ZSTD_HcFindBestMatch_generic ( @@ -1659,7 +1651,10 @@ _storeSequence: static void ZSTD_compressBlock_btopt(ZSTD_CCtx* ctx, const void* src, size_t srcSize) { - ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 2); + if (ctx->params.searchLength == 3) + ZSTD_compressBlock_opt_generic3(ctx, src, srcSize, 2); + else + ZSTD_compressBlock_opt_generic4(ctx, src, srcSize, 2); } static void ZSTD_compressBlock_btlazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize) @@ -1878,7 +1873,10 @@ static void ZSTD_compressBlock_btlazy2_extDict(ZSTD_CCtx* ctx, const void* src, static void ZSTD_compressBlock_btopt_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) { - ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize, 2); + if (ctx->params.searchLength == 3) + ZSTD_compressBlock_opt_extDict_generic3(ctx, src, srcSize, 2); + else + ZSTD_compressBlock_opt_extDict_generic4(ctx, src, srcSize, 2); } @@ -2261,7 +2259,7 @@ size_t ZSTD_compress(void* dst, size_t maxDstSize, const void* src, size_t srcSi /*-===== Pre-defined compression levels =====-*/ -#define ZSTD_MAX_CLEVEL 21 +#define ZSTD_MAX_CLEVEL 22 unsigned ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; } @@ -2290,6 +2288,7 @@ static const ZSTD_parameters ZSTD_defaultParameters[4][ZSTD_MAX_CLEVEL+1] = { { 0, 25, 25, 24, 16, 5, 4, 40, ZSTD_btopt }, /* level 19 */ { 0, 26, 26, 25, 16, 8, 4,256, ZSTD_btopt }, /* level 20 */ { 0, 26, 27, 25, 24, 10, 4,256, ZSTD_btopt }, /* level 21 */ + { 0, 26, 26, 25, 16, 8, 3,256, ZSTD_btopt }, /* level 20+MM3 */ }, { /* for srcSize <= 256 KB */ /* l, W, C, H, H3, S, L, T, strat */ diff --git a/lib/zstd_internal.h b/lib/zstd_internal.h index 620572e5..b9826aa7 100644 --- a/lib/zstd_internal.h +++ b/lib/zstd_internal.h @@ -79,7 +79,7 @@ static const size_t ZSTD_frameHeaderSize_min = 5; #define IS_RAW 2 #define IS_RLE 3 -#define MINMATCH 3 +#define MINMATCH 4 #define REPCODE_STARTVALUE 1 #define Litbits 8 diff --git a/lib/zstd_opt.h b/lib/zstd_opt.h index 9b1fffd7..24343d69 100644 --- a/lib/zstd_opt.h +++ b/lib/zstd_opt.h @@ -31,77 +31,17 @@ - Zstd source repository : https://www.zstd.net */ -/* Note : this file is intended to be included within zstd_compress.c */ +/* Note : this file is intended to be included within zstd_opt_internal.h */ -/*- Local types -*/ -typedef struct { - U32 off; - U32 len; - U32 back; -} ZSTD_match_t; - -typedef struct { - U32 price; - U32 off; - U32 mlen; - U32 litlen; - U32 rep; - U32 rep2; -} ZSTD_optimal_t; - - -/*- Constants -*/ -#define ZSTD_OPT_NUM (1<<12) -#define ZSTD_FREQ_START 1 -#define ZSTD_FREQ_STEP 1 -#define ZSTD_FREQ_DIV 6 - -/*- Debug -*/ -#if defined(ZSTD_OPT_DEBUG) && ZSTD_OPT_DEBUG>=9 - #define ZSTD_LOG_PARSER(...) printf(__VA_ARGS__) - #define ZSTD_LOG_ENCODE(...) printf(__VA_ARGS__) - #define ZSTD_LOG_TRY_PRICE(...) printf(__VA_ARGS__) -#else - #define ZSTD_LOG_PARSER(...) - #define ZSTD_LOG_ENCODE(...) - #define ZSTD_LOG_TRY_PRICE(...) -#endif - - -#if MINMATCH == 3 - #define MEM_readMINMATCH(ptr) ((U32)(MEM_read32(ptr)<<8)) -#else - #define MEM_readMINMATCH(ptr) (U32)(MEM_read32(ptr)) -#endif - - -FORCE_INLINE U32 ZSTD_getLiteralPrice(seqStore_t* seqStorePtr, U32 litLength, const BYTE* literals) -{ - U32 price, u; - - /* literals */ - price = litLength * ZSTD_highbit(seqStorePtr->litSum); - for (u=0; u < litLength; u++) - price -= ZSTD_highbit(seqStorePtr->litFreq[literals[u]]); - - /* literal Length */ - price += ((litLength >= MaxLL)<<3) + ((litLength >= 255+MaxLL)<<4) + ((litLength>=(1<<15))<<3); - if (litLength >= MaxLL) litLength = MaxLL; - price += ZSTD_highbit(seqStorePtr->litLengthSum) - ZSTD_highbit(seqStorePtr->litLengthFreq[litLength]); - - return price; -} - - -FORCE_INLINE U32 ZSTD_getPrice(seqStore_t* seqStorePtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength) +FORCE_INLINE U32 ZSTD_GETPRICE(seqStore_t* seqStorePtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength) { /* offset */ BYTE offCode = offset ? (BYTE)ZSTD_highbit(offset) + 1 : 0; U32 price = offCode + ZSTD_highbit(seqStorePtr->offCodeSum) - ZSTD_highbit(seqStorePtr->offCodeFreq[offCode]); /* match Length */ - matchLength -= MINMATCH; + matchLength -= MINMATCHOPT; price += ((matchLength >= MaxML)<<3) + ((matchLength >= 255+MaxML)<<4) + ((matchLength>=(1<<15))<<3); if (matchLength >= MaxML) matchLength = MaxML; price += ZSTD_highbit(seqStorePtr->matchLengthSum) - ZSTD_highbit(seqStorePtr->matchLengthFreq[matchLength]); @@ -113,120 +53,10 @@ FORCE_INLINE U32 ZSTD_getPrice(seqStore_t* seqStorePtr, U32 litLength, const BYT } -MEM_STATIC void ZSTD_rescaleFreqs(seqStore_t* ssPtr) -{ - unsigned u; - - // printf("matchLengthSum=%d litLengthSum=%d litSum=%d offCodeSum=%d\n", ssPtr->matchLengthSum, ssPtr->litLengthSum, ssPtr->litSum, ssPtr->offCodeSum); - - if (ssPtr->litLengthSum == 0) { - ssPtr->matchLengthSum = (1<litLengthSum = (1<litSum = (1<offCodeSum = (1<litFreq[u] = 1; - for (u=0; u<=MaxLL; u++) - ssPtr->litLengthFreq[u] = 1; - for (u=0; u<=MaxML; u++) - ssPtr->matchLengthFreq[u] = 1; - for (u=0; u<=MaxOff; u++) - ssPtr->offCodeFreq[u] = 1; - } else { - ssPtr->matchLengthSum = 0; - ssPtr->litLengthSum = 0; - ssPtr->litSum = 0; - ssPtr->offCodeSum = 0; - - for (u=0; u<=MaxLit; u++) { - ssPtr->litFreq[u] = ZSTD_FREQ_START + (ssPtr->litFreq[u]>>ZSTD_FREQ_DIV); - ssPtr->litSum += ssPtr->litFreq[u]; - } - for (u=0; u<=MaxLL; u++) { - ssPtr->litLengthFreq[u] = ZSTD_FREQ_START + (ssPtr->litLengthFreq[u]>>ZSTD_FREQ_DIV); - ssPtr->litLengthSum += ssPtr->litLengthFreq[u]; - } - for (u=0; u<=MaxML; u++) { - ssPtr->matchLengthFreq[u] = ZSTD_FREQ_START + (ssPtr->matchLengthFreq[u]>>ZSTD_FREQ_DIV); - ssPtr->matchLengthSum += ssPtr->matchLengthFreq[u]; - } - for (u=0; u<=MaxOff; u++) { - ssPtr->offCodeFreq[u] = ZSTD_FREQ_START + (ssPtr->offCodeFreq[u]>>ZSTD_FREQ_DIV); - ssPtr->offCodeSum += ssPtr->offCodeFreq[u]; - } - } -} - - -MEM_STATIC void ZSTD_updatePrice(seqStore_t* seqStorePtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength) -{ - U32 u; - - /* literals */ - seqStorePtr->litSum += litLength * ZSTD_FREQ_STEP; - for (u=0; u < litLength; u++) - seqStorePtr->litFreq[literals[u]] += ZSTD_FREQ_STEP; - - /* literal Length */ - seqStorePtr->litLengthSum += ZSTD_FREQ_STEP; - if (litLength >= MaxLL) - seqStorePtr->litLengthFreq[MaxLL] += ZSTD_FREQ_STEP; - else - seqStorePtr->litLengthFreq[litLength] += ZSTD_FREQ_STEP; - - /* match offset */ - seqStorePtr->offCodeSum += ZSTD_FREQ_STEP; - BYTE offCode = offset ? (BYTE)ZSTD_highbit(offset) + 1 : 0; - seqStorePtr->offCodeFreq[offCode] += ZSTD_FREQ_STEP; - - /* match Length */ - seqStorePtr->matchLengthSum += ZSTD_FREQ_STEP; - if (matchLength >= MaxML) - seqStorePtr->matchLengthFreq[MaxML] += ZSTD_FREQ_STEP; - else - seqStorePtr->matchLengthFreq[matchLength] += ZSTD_FREQ_STEP; -} - - -#define SET_PRICE(pos, mlen_, offset_, litlen_, price_) \ - { \ - while (last_pos < pos) { opt[last_pos+1].price = 1<<30; last_pos++; } \ - opt[pos].mlen = mlen_; \ - opt[pos].off = offset_; \ - opt[pos].litlen = litlen_; \ - opt[pos].price = price_; \ - ZSTD_LOG_PARSER("%d: SET price[%d/%d]=%d litlen=%d len=%d off=%d\n", (int)(inr-base), (int)pos, (int)last_pos, opt[pos].price, opt[pos].litlen, opt[pos].mlen, opt[pos].off); \ - } - - - /*-************************************* * Binary Tree search ***************************************/ -/* Update hashTable3 up to ip (excluded) - Assumption : always within prefix (ie. not within extDict) */ -#if MINMATCH == 3 -static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_CCtx* zc, const BYTE* ip) -{ - U32* const hashTable3 = zc->hashTable3; - const U32 hashLog3 = zc->params.hashLog3; - const BYTE* const base = zc->base; - const U32 target = (U32)(ip - base); - U32 idx = zc->nextToUpdate3; - - while(idx < target) { - hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx; - idx++; - } - - zc->nextToUpdate3 = target; - return hashTable3[ZSTD_hash3Ptr(ip, hashLog3)]; -} -#endif - - -static U32 ZSTD_insertBtAndGetAllMatches ( +static U32 ZSTD_INSERTBTANDGETALLMATCHES ( ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iLimit, U32 nbCompares, const U32 mls, @@ -254,10 +84,10 @@ static U32 ZSTD_insertBtAndGetAllMatches ( U32 dummy32; /* to be nullified at the end */ U32 mnum = 0; - size_t bestLength = MINMATCH-1; + size_t bestLength = MINMATCHOPT-1; hashTable[h] = current; /* Update Hash Table */ -#if MINMATCH == 3 +#if MINMATCHOPT == 3 /* HC3 match finder */ U32 matchIndex3 = ZSTD_insertAndFindFirstIndexHash3 (zc, ip); @@ -266,12 +96,11 @@ static U32 ZSTD_insertBtAndGetAllMatches ( size_t currentMl=0; if ((!extDict) || matchIndex3 >= dictLimit) { match = base + matchIndex3; - if (match[bestLength] == ip[bestLength]) currentMl = ZSTD_count(ip, match, iLimit); // faster - // if (MEM_readMINMATCH(match) == MEM_readMINMATCH(ip)) currentMl = ZSTD_count(ip+MINMATCH, match+MINMATCH, iLimit)+MINMATCH; // stronger + if (match[bestLength] == ip[bestLength]) currentMl = ZSTD_count(ip, match, iLimit); } else { match = dictBase + matchIndex3; if (MEM_readMINMATCH(match) == MEM_readMINMATCH(ip)) /* assumption : matchIndex3 <= dictLimit-4 (by table construction) */ - currentMl = ZSTD_count_2segments(ip+MINMATCH, match+MINMATCH, iLimit, dictEnd, prefixStart) + MINMATCH; + currentMl = ZSTD_count_2segments(ip+MINMATCHOPT, match+MINMATCHOPT, iLimit, dictEnd, prefixStart) + MINMATCHOPT; } /* save best solution */ @@ -342,18 +171,18 @@ static U32 ZSTD_insertBtAndGetAllMatches ( /** Tree updater, providing best match */ -static U32 ZSTD_BtGetAllMatches ( +static U32 ZSTD_BTGETALLMATCHES ( ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iLimit, const U32 maxNbAttempts, const U32 mls, ZSTD_match_t* matches) { if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */ ZSTD_updateTree(zc, ip, iLimit, maxNbAttempts, mls); - return ZSTD_insertBtAndGetAllMatches(zc, ip, iLimit, maxNbAttempts, mls, 0, matches); + return ZSTD_INSERTBTANDGETALLMATCHES(zc, ip, iLimit, maxNbAttempts, mls, 0, matches); } -static U32 ZSTD_BtGetAllMatches_selectMLS ( +static U32 ZSTD_BTGETALLMATCHES_SELECTMLS ( ZSTD_CCtx* zc, /* Index table will be updated */ const BYTE* ip, const BYTE* const iLowLimit, const BYTE* const iHighLimit, const U32 maxNbAttempts, const U32 matchLengthSearch, ZSTD_match_t* matches) @@ -362,25 +191,25 @@ static U32 ZSTD_BtGetAllMatches_selectMLS ( switch(matchLengthSearch) { default : - case 4 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 4, matches); - case 5 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 5, matches); - case 6 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 6, matches); + case 4 : return ZSTD_BTGETALLMATCHES(zc, ip, iHighLimit, maxNbAttempts, 4, matches); + case 5 : return ZSTD_BTGETALLMATCHES(zc, ip, iHighLimit, maxNbAttempts, 5, matches); + case 6 : return ZSTD_BTGETALLMATCHES(zc, ip, iHighLimit, maxNbAttempts, 6, matches); } } /** Tree updater, providing best match */ -static U32 ZSTD_BtGetAllMatches_extDict ( +static U32 ZSTD_BTGETALLMATCHES_EXTDICT ( ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iLimit, const U32 maxNbAttempts, const U32 mls, ZSTD_match_t* matches) { if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */ ZSTD_updateTree_extDict(zc, ip, iLimit, maxNbAttempts, mls); - return ZSTD_insertBtAndGetAllMatches(zc, ip, iLimit, maxNbAttempts, mls, 1, matches); + return ZSTD_INSERTBTANDGETALLMATCHES(zc, ip, iLimit, maxNbAttempts, mls, 1, matches); } -static U32 ZSTD_BtGetAllMatches_selectMLS_extDict ( +static U32 ZSTD_BTGETALLMATCHES_SELECTMLS_EXTDICT ( ZSTD_CCtx* zc, /* Index table will be updated */ const BYTE* ip, const BYTE* const iLowLimit, const BYTE* const iHighLimit, const U32 maxNbAttempts, const U32 matchLengthSearch, ZSTD_match_t* matches) @@ -389,9 +218,9 @@ static U32 ZSTD_BtGetAllMatches_selectMLS_extDict ( switch(matchLengthSearch) { default : - case 4 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 4, matches); - case 5 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 5, matches); - case 6 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 6, matches); + case 4 : return ZSTD_BTGETALLMATCHES_EXTDICT(zc, ip, iHighLimit, maxNbAttempts, 4, matches); + case 5 : return ZSTD_BTGETALLMATCHES_EXTDICT(zc, ip, iHighLimit, maxNbAttempts, 5, matches); + case 6 : return ZSTD_BTGETALLMATCHES_EXTDICT(zc, ip, iHighLimit, maxNbAttempts, 6, matches); } } @@ -400,7 +229,7 @@ static U32 ZSTD_BtGetAllMatches_selectMLS_extDict ( * Optimal parser *********************************/ FORCE_INLINE -void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, +void ZSTD_COMPRESSBLOCK_OPT_GENERIC(ZSTD_CCtx* ctx, const void* src, size_t srcSize, const U32 depth) { @@ -416,14 +245,13 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, U32 rep_2=REPCODE_STARTVALUE, rep_1=REPCODE_STARTVALUE; const U32 maxSearches = 1U << ctx->params.searchLog; const U32 mls = ctx->params.searchLength; + const U32 sufficient_len = ctx->params.targetLength; - ZSTD_optimal_t opt[ZSTD_OPT_NUM+4]; + ZSTD_optimal_t opt[ZSTD_OPT_NUM+1]; ZSTD_match_t matches[ZSTD_OPT_NUM+1]; const BYTE* inr; U32 cur, match_num, last_pos, litlen, price; - const U32 sufficient_len = ctx->params.targetLength; - /* init */ ctx->nextToUpdate3 = ctx->nextToUpdate; ZSTD_resetSeqStore(seqStorePtr); @@ -445,7 +273,7 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, /* check repCode */ if (MEM_readMINMATCH(ip+1) == MEM_readMINMATCH(ip+1 - rep_1)) { /* repcode : we take it */ - mlen = (U32)ZSTD_count(ip+1+MINMATCH, ip+1+MINMATCH-rep_1, iend) + MINMATCH; + mlen = (U32)ZSTD_count(ip+1+MINMATCHOPT, ip+1+MINMATCHOPT-rep_1, iend) + MINMATCHOPT; ZSTD_LOG_PARSER("%d: start try REP rep=%d mlen=%d\n", (int)(ip-base), (int)rep_1, (int)mlen); if (depth==0 || mlen > sufficient_len || mlen >= ZSTD_OPT_NUM) { @@ -455,14 +283,14 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, litlen = opt[0].litlen + 1; do { - price = ZSTD_getPrice(seqStorePtr, litlen, litstart, 0, mlen); + price = ZSTD_GETPRICE(seqStorePtr, litlen, litstart, 0, mlen); if (mlen + 1 > last_pos || price < opt[mlen + 1].price) SET_PRICE(mlen + 1, mlen, 0, litlen, price); /* note : macro modifies last_pos */ mlen--; - } while (mlen >= MINMATCH); + } while (mlen >= MINMATCHOPT); } - match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, ip, ip, iend, maxSearches, mls, matches); /* first search (depth 0) */ + match_num = ZSTD_BTGETALLMATCHES_SELECTMLS(ctx, ip, ip, iend, maxSearches, mls, matches); /* first search (depth 0) */ ZSTD_LOG_PARSER("%d: match_num=%d last_pos=%d\n", (int)(ip-base), match_num, last_pos); if (!last_pos && !match_num) { ip++; continue; } @@ -479,7 +307,7 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, goto _storeSequence; } - best_mlen = (last_pos) ? last_pos : MINMATCH; + best_mlen = (last_pos) ? last_pos : MINMATCHOPT; // set prices using matches at position = 0 for (u = 0; u < match_num; u++) { @@ -488,13 +316,13 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, ZSTD_LOG_PARSER("%d: start Found mlen=%d off=%d best_mlen=%d last_pos=%d\n", (int)(ip-base), matches[u].len, matches[u].off, (int)best_mlen, (int)last_pos); litlen = opt[0].litlen; while (mlen <= best_mlen) { - price = ZSTD_getPrice(seqStorePtr, litlen, litstart, matches[u].off, mlen); + price = ZSTD_GETPRICE(seqStorePtr, litlen, litstart, matches[u].off, mlen); if (mlen > last_pos || price < opt[mlen].price) SET_PRICE(mlen, mlen, matches[u].off, litlen, price); mlen++; } } - if (last_pos < MINMATCH) { ip++; continue; } + if (last_pos < MINMATCHOPT) { ip++; continue; } /* check further positions */ for (cur = 1; cur <= last_pos; cur++) { @@ -554,7 +382,7 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, } if (MEM_readMINMATCH(inr) == MEM_readMINMATCH(inr - cur_rep)) { // check rep - mlen = (U32)ZSTD_count(inr+MINMATCH, inr+MINMATCH - cur_rep, iend) + MINMATCH; + mlen = (U32)ZSTD_count(inr+MINMATCHOPT, inr+MINMATCHOPT - cur_rep, iend) + MINMATCHOPT; ZSTD_LOG_PARSER("%d: Found REP mlen=%d off=%d rep=%d opt[%d].off=%d\n", (int)(inr-base), mlen, 0, opt[cur].rep, cur, opt[cur].off); if (mlen > sufficient_len || cur + mlen >= ZSTD_OPT_NUM) { @@ -568,14 +396,14 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, if (opt[cur].mlen == 1) { litlen = opt[cur].litlen; if (cur > litlen) { - price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr-litlen, 0, mlen); + price = opt[cur - litlen].price + ZSTD_GETPRICE(seqStorePtr, litlen, inr-litlen, 0, mlen); ZSTD_LOG_TRY_PRICE("%d: TRY5 opt[%d].price=%d price=%d cur=%d litlen=%d\n", (int)(inr-base), cur - litlen, opt[cur - litlen].price, price, cur, litlen); } else - price = ZSTD_getPrice(seqStorePtr, litlen, litstart, 0, mlen); + price = ZSTD_GETPRICE(seqStorePtr, litlen, litstart, 0, mlen); } else { litlen = 0; - price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, 0, mlen); - ZSTD_LOG_TRY_PRICE("%d: TRY7 price=%d cur=%d litlen=0 getprice=%d\n", (int)(inr-base), price, cur, (int)ZSTD_getPrice(seqStorePtr, 0, NULL, 0, mlen)); + price = opt[cur].price + ZSTD_GETPRICE(seqStorePtr, 0, NULL, 0, mlen); + ZSTD_LOG_TRY_PRICE("%d: TRY7 price=%d cur=%d litlen=0 getprice=%d\n", (int)(inr-base), price, cur, (int)ZSTD_GETPRICE(seqStorePtr, 0, NULL, 0, mlen)); } best_mlen = mlen; @@ -585,10 +413,10 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, if (cur + mlen > last_pos || price <= opt[cur + mlen].price) SET_PRICE(cur + mlen, mlen, 0, litlen, price); mlen--; - } while (mlen >= MINMATCH); + } while (mlen >= MINMATCHOPT); } - match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, inr, ip, iend, maxSearches, mls, matches); + match_num = ZSTD_BTGETALLMATCHES_SELECTMLS(ctx, inr, ip, iend, maxSearches, mls, matches); ZSTD_LOG_PARSER("%d: ZSTD_GetAllMatches match_num=%d\n", (int)(inr-base), match_num); if (match_num > 0 && matches[match_num-1].len > sufficient_len) { @@ -598,7 +426,7 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, goto _storeSequence; } - best_mlen = (best_mlen > MINMATCH) ? best_mlen : MINMATCH; + best_mlen = (best_mlen > MINMATCHOPT) ? best_mlen : MINMATCHOPT; /* set prices using matches at position = cur */ for (u = 0; u < match_num; u++) { @@ -611,12 +439,12 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, if (opt[cur].mlen == 1) { litlen = opt[cur].litlen; if (cur > litlen) - price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, ip+cur-litlen, matches[u].off, mlen); + price = opt[cur - litlen].price + ZSTD_GETPRICE(seqStorePtr, litlen, ip+cur-litlen, matches[u].off, mlen); else - price = ZSTD_getPrice(seqStorePtr, litlen, litstart, matches[u].off, mlen); + price = ZSTD_GETPRICE(seqStorePtr, litlen, litstart, matches[u].off, mlen); } else { litlen = 0; - price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, matches[u].off, mlen); + price = opt[cur].price + ZSTD_GETPRICE(seqStorePtr, 0, NULL, matches[u].off, mlen); } ZSTD_LOG_PARSER("%d: Found2 mlen=%d best_mlen=%d off=%d price=%d litlen=%d\n", (int)(inr-base), mlen, best_mlen, matches[u].off, price, litlen); @@ -688,7 +516,7 @@ _storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */ else ml2 = (U32)ZSTD_count(ip, ip-rep_1, iend); if (offset == 0 || offset >= 8) - if (ml2 < mlen && ml2 < MINMATCH) { + if (ml2 < mlen && ml2 < MINMATCHOPT) { printf("%d: ERROR iend=%d mlen=%d offset=%d ml2=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset, (int)ml2); exit(0); } if (ip < anchor) { printf("%d: ERROR ip < anchor iend=%d mlen=%d offset=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset); exit(0); } @@ -696,14 +524,14 @@ _storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */ printf("%d: ERROR ip - offset < base iend=%d mlen=%d offset=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset); exit(0); } if ((int)offset >= (1 << ctx->params.windowLog)) { printf("%d: offset >= (1 << params.windowLog) iend=%d mlen=%d offset=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset); exit(0); } - if (mlen < MINMATCH) { - printf("%d: ERROR mlen < MINMATCH iend=%d mlen=%d offset=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset); exit(0); } + if (mlen < MINMATCHOPT) { + printf("%d: ERROR mlen < MINMATCHOPT iend=%d mlen=%d offset=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset); exit(0); } if (ip + mlen > iend) { printf("%d: ERROR ip + mlen >= iend iend=%d mlen=%d offset=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset); exit(0); } #endif - ZSTD_updatePrice(seqStorePtr, litLength, anchor, offset, mlen-MINMATCH); - ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, mlen-MINMATCH); + ZSTD_updatePrice(seqStorePtr, litLength, anchor, offset, mlen-MINMATCHOPT); + ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, mlen-MINMATCHOPT); anchor = ip = ip + mlen; } /* for (cur=0; cur < last_pos; ) */ @@ -711,14 +539,14 @@ _storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */ while ( (anchor <= ilimit) && (MEM_readMINMATCH(anchor) == MEM_readMINMATCH(anchor - rep_2)) ) { /* store sequence */ - best_mlen = (U32)ZSTD_count(anchor+MINMATCH, anchor+MINMATCH-rep_2, iend); + best_mlen = (U32)ZSTD_count(anchor+MINMATCHOPT, anchor+MINMATCHOPT-rep_2, iend); best_off = rep_2; rep_2 = rep_1; rep_1 = best_off; ZSTD_LOG_ENCODE("%d/%d: ENCODE REP literals=%d mlen=%d off=%d rep1=%d rep2=%d\n", (int)(anchor-base), (int)(iend-base), (int)(0), (int)best_mlen, (int)(0), (int)rep_1, (int)rep_2); ZSTD_updatePrice(seqStorePtr, 0, anchor, 0, best_mlen); ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, best_mlen); - anchor += best_mlen+MINMATCH; + anchor += best_mlen+MINMATCHOPT; continue; /* faster when present ... (?) */ } if (anchor > ip) ip = anchor; @@ -734,7 +562,7 @@ _storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */ FORCE_INLINE -void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, +void ZSTD_COMPRESSBLOCK_OPT_EXTDICT_GENERIC(ZSTD_CCtx* ctx, const void* src, size_t srcSize, const U32 depth) { @@ -754,14 +582,13 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, U32 rep_2=REPCODE_STARTVALUE, rep_1=REPCODE_STARTVALUE; const U32 maxSearches = 1U << ctx->params.searchLog; const U32 mls = ctx->params.searchLength; + const U32 sufficient_len = ctx->params.targetLength; - ZSTD_optimal_t opt[ZSTD_OPT_NUM+4]; + ZSTD_optimal_t opt[ZSTD_OPT_NUM+1]; ZSTD_match_t matches[ZSTD_OPT_NUM+1]; const BYTE* inr; U32 cur, match_num, last_pos, litlen, price; - const U32 sufficient_len = ctx->params.targetLength; - /* init */ ctx->nextToUpdate3 = ctx->nextToUpdate; ZSTD_resetSeqStore(seqStorePtr); @@ -788,7 +615,7 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, && (MEM_readMINMATCH(ip+1) == MEM_readMINMATCH(repMatch)) ) { /* repcode detected we should take it */ const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; - mlen = (U32)ZSTD_count_2segments(ip+1+MINMATCH, repMatch+MINMATCH, iend, repEnd, prefixStart) + MINMATCH; + mlen = (U32)ZSTD_count_2segments(ip+1+MINMATCHOPT, repMatch+MINMATCHOPT, iend, repEnd, prefixStart) + MINMATCHOPT; ZSTD_LOG_PARSER("%d: start try REP rep=%d mlen=%d\n", (int)(ip-base), (int)rep_1, (int)mlen); if (depth==0 || mlen > sufficient_len || mlen >= ZSTD_OPT_NUM) { @@ -798,16 +625,16 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, litlen = opt[0].litlen + 1; do { - price = ZSTD_getPrice(seqStorePtr, litlen, litstart, 0, mlen); + price = ZSTD_GETPRICE(seqStorePtr, litlen, litstart, 0, mlen); if (mlen + 1 > last_pos || price < opt[mlen + 1].price) SET_PRICE(mlen + 1, mlen, 0, litlen, price); mlen--; - } while (mlen >= MINMATCH); + } while (mlen >= MINMATCHOPT); } } - best_mlen = (last_pos) ? last_pos : MINMATCH; + best_mlen = (last_pos) ? last_pos : MINMATCHOPT; - match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, ip, ip, iend, maxSearches, mls, matches); /* first search (depth 0) */ + match_num = ZSTD_BTGETALLMATCHES_SELECTMLS_EXTDICT(ctx, ip, ip, iend, maxSearches, mls, matches); /* first search (depth 0) */ ZSTD_LOG_PARSER("%d: match_num=%d last_pos=%d\n", (int)(ip-base), match_num, last_pos); if (!last_pos && !match_num) { ip++; continue; } @@ -831,13 +658,13 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, ZSTD_LOG_PARSER("%d: start Found mlen=%d off=%d best_mlen=%d last_pos=%d\n", (int)(ip-base), matches[u].len, matches[u].off, (int)best_mlen, (int)last_pos); litlen = opt[0].litlen; while (mlen <= best_mlen) { - price = ZSTD_getPrice(seqStorePtr, litlen, litstart, matches[u].off, mlen); + price = ZSTD_GETPRICE(seqStorePtr, litlen, litstart, matches[u].off, mlen); if (mlen > last_pos || price < opt[mlen].price) SET_PRICE(mlen, mlen, matches[u].off, litlen, price); mlen++; } } - if (last_pos < MINMATCH) { + if (last_pos < MINMATCHOPT) { // ip += ((ip-anchor) >> g_searchStrength) + 1; /* jump faster over incompressible sections */ ip++; continue; } @@ -906,7 +733,7 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, &&(MEM_readMINMATCH(inr) == MEM_readMINMATCH(repMatch)) ) { /* repcode detected */ const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; - mlen = (U32)ZSTD_count_2segments(inr+MINMATCH, repMatch+MINMATCH, iend, repEnd, prefixStart) + MINMATCH; + mlen = (U32)ZSTD_count_2segments(inr+MINMATCHOPT, repMatch+MINMATCHOPT, iend, repEnd, prefixStart) + MINMATCHOPT; ZSTD_LOG_PARSER("%d: Found REP mlen=%d off=%d rep=%d opt[%d].off=%d\n", (int)(inr-base), mlen, 0, opt[cur].rep, cur, opt[cur].off); if (mlen > sufficient_len || cur + mlen >= ZSTD_OPT_NUM) { @@ -920,14 +747,14 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, if (opt[cur].mlen == 1) { litlen = opt[cur].litlen; if (cur > litlen) { - price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr-litlen, 0, mlen); + price = opt[cur - litlen].price + ZSTD_GETPRICE(seqStorePtr, litlen, inr-litlen, 0, mlen); ZSTD_LOG_TRY_PRICE("%d: TRY5 opt[%d].price=%d price=%d cur=%d litlen=%d\n", (int)(inr-base), cur - litlen, opt[cur - litlen].price, price, cur, litlen); } else - price = ZSTD_getPrice(seqStorePtr, litlen, litstart, 0, mlen); + price = ZSTD_GETPRICE(seqStorePtr, litlen, litstart, 0, mlen); } else { litlen = 0; - price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, 0, mlen); - ZSTD_LOG_TRY_PRICE("%d: TRY7 price=%d cur=%d litlen=0 getprice=%d\n", (int)(inr-base), price, cur, (int)ZSTD_getPrice(seqStorePtr, 0, NULL, 0, mlen)); + price = opt[cur].price + ZSTD_GETPRICE(seqStorePtr, 0, NULL, 0, mlen); + ZSTD_LOG_TRY_PRICE("%d: TRY7 price=%d cur=%d litlen=0 getprice=%d\n", (int)(inr-base), price, cur, (int)ZSTD_GETPRICE(seqStorePtr, 0, NULL, 0, mlen)); } best_mlen = mlen; @@ -938,12 +765,12 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, if (cur + mlen > last_pos || price <= opt[cur + mlen].price) // || ((price == opt[cur + mlen].price) && (opt[cur].mlen == 1) && (cur != litlen))) // at equal price prefer REP instead of MATCH SET_PRICE(cur + mlen, mlen, 0, litlen, price); mlen--; - } while (mlen >= MINMATCH); + } while (mlen >= MINMATCHOPT); } - best_mlen = (best_mlen > MINMATCH) ? best_mlen : MINMATCH; + best_mlen = (best_mlen > MINMATCHOPT) ? best_mlen : MINMATCHOPT; - match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, inr, ip, iend, maxSearches, mls, matches); + match_num = ZSTD_BTGETALLMATCHES_SELECTMLS_EXTDICT(ctx, inr, ip, iend, maxSearches, mls, matches); ZSTD_LOG_PARSER("%d: ZSTD_GetAllMatches match_num=%d\n", (int)(inr-base), match_num); if (match_num > 0 && matches[match_num-1].len > sufficient_len) { @@ -964,12 +791,12 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, if (opt[cur].mlen == 1) { litlen = opt[cur].litlen; if (cur > litlen) - price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, ip+cur-litlen, matches[u].off, mlen); + price = opt[cur - litlen].price + ZSTD_GETPRICE(seqStorePtr, litlen, ip+cur-litlen, matches[u].off, mlen); else - price = ZSTD_getPrice(seqStorePtr, litlen, litstart, matches[u].off, mlen); + price = ZSTD_GETPRICE(seqStorePtr, litlen, litstart, matches[u].off, mlen); } else { litlen = 0; - price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, matches[u].off, mlen); + price = opt[cur].price + ZSTD_GETPRICE(seqStorePtr, 0, NULL, matches[u].off, mlen); } ZSTD_LOG_PARSER("%d: Found2 mlen=%d best_mlen=%d off=%d price=%d litlen=%d\n", (int)(inr-base), mlen, best_mlen, matches[u].off, price, litlen); @@ -1040,7 +867,7 @@ _storeSequence: // cur, last_pos, best_mlen, best_off have to be set ml2 = (U32)ZSTD_count(ip, ip-offset, iend); else ml2 = (U32)ZSTD_count(ip, ip-rep_1, iend); - if (ml2 < mlen && ml2 < MINMATCH) { + if (ml2 < mlen && ml2 < MINMATCHOPT) { printf("%d: ERROR iend=%d mlen=%d offset=%d ml2=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset, (int)ml2); exit(0); } if (ip < anchor) { printf("%d: ERROR ip < anchor iend=%d mlen=%d offset=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset); exit(0); } @@ -1048,14 +875,14 @@ _storeSequence: // cur, last_pos, best_mlen, best_off have to be set printf("%d: ERROR ip - offset < base iend=%d mlen=%d offset=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset); exit(0); } if ((int)offset >= (1 << ctx->params.windowLog)) { printf("%d: offset >= (1 << params.windowLog) iend=%d mlen=%d offset=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset); exit(0); } - if (mlen < MINMATCH) { - printf("%d: ERROR mlen < MINMATCH iend=%d mlen=%d offset=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset); exit(0); } + if (mlen < MINMATCHOPT) { + printf("%d: ERROR mlen < MINMATCHOPT iend=%d mlen=%d offset=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset); exit(0); } if (ip + mlen > iend) { printf("%d: ERROR ip + mlen >= iend iend=%d mlen=%d offset=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset); exit(0); } #endif - ZSTD_updatePrice(seqStorePtr, litLength, anchor, offset, mlen-MINMATCH); - ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, mlen-MINMATCH); + ZSTD_updatePrice(seqStorePtr, litLength, anchor, offset, mlen-MINMATCHOPT); + ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, mlen-MINMATCHOPT); anchor = ip = ip + mlen; } @@ -1068,11 +895,11 @@ _storeSequence: // cur, last_pos, best_mlen, best_off have to be set && (MEM_readMINMATCH(anchor) == MEM_readMINMATCH(repMatch)) ) { /* repcode detected, let's take it */ const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; - mlen = (U32)ZSTD_count_2segments(anchor+MINMATCH, repMatch+MINMATCH, iend, repEnd, prefixStart) + MINMATCH; + mlen = (U32)ZSTD_count_2segments(anchor+MINMATCHOPT, repMatch+MINMATCHOPT, iend, repEnd, prefixStart) + MINMATCHOPT; offset = rep_2; rep_2 = rep_1; rep_1 = offset; /* swap offset history */ ZSTD_LOG_ENCODE("%d/%d: ENCODE REP literals=%d mlen=%d off=%d rep1=%d rep2=%d\n", (int)(anchor-base), (int)(iend-base), (int)(0), (int)best_mlen, (int)(0), (int)rep_1, (int)rep_2); - ZSTD_updatePrice(seqStorePtr, 0, anchor, 0, mlen-MINMATCH); - ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, mlen-MINMATCH); + ZSTD_updatePrice(seqStorePtr, 0, anchor, 0, mlen-MINMATCHOPT); + ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, mlen-MINMATCHOPT); anchor += mlen; continue; /* faster when present ... (?) */ } diff --git a/lib/zstd_opt_internal.h b/lib/zstd_opt_internal.h new file mode 100644 index 00000000..29e936d3 --- /dev/null +++ b/lib/zstd_opt_internal.h @@ -0,0 +1,231 @@ +/* + zstd_opt_internal - common optimal parser functions to include + Header File for include + Copyright (C) 2016, Przemyslaw Skibinski, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - zstd source repository : https://github.com/Cyan4973/zstd +*/ + +/* Note : this file is intended to be included within zstd_compress.c */ + +#ifndef ZSTD_OPT_INTERNAL_H_MODULE +#define ZSTD_OPT_INTERNAL_H_MODULE + + +/*-******************************************* +* The optimal parser +*********************************************/ +/*- Constants -*/ +#define ZSTD_OPT_NUM (1<<12) +#define ZSTD_FREQ_START 1 +#define ZSTD_FREQ_STEP 1 +#define ZSTD_FREQ_DIV 6 + +/*- Debug -*/ +#if defined(ZSTD_OPT_DEBUG) && ZSTD_OPT_DEBUG>=9 + #define ZSTD_LOG_PARSER(...) printf(__VA_ARGS__) + #define ZSTD_LOG_ENCODE(...) printf(__VA_ARGS__) + #define ZSTD_LOG_TRY_PRICE(...) printf(__VA_ARGS__) +#else + #define ZSTD_LOG_PARSER(...) + #define ZSTD_LOG_ENCODE(...) + #define ZSTD_LOG_TRY_PRICE(...) +#endif + + +typedef struct { + U32 off; + U32 len; +} ZSTD_match_t; + +typedef struct { + U32 price; + U32 off; + U32 mlen; + U32 litlen; + U32 rep; + U32 rep2; +} ZSTD_optimal_t; + + +MEM_STATIC void ZSTD_rescaleFreqs(seqStore_t* ssPtr) +{ + unsigned u; + + // printf("matchLengthSum=%d litLengthSum=%d litSum=%d offCodeSum=%d\n", ssPtr->matchLengthSum, ssPtr->litLengthSum, ssPtr->litSum, ssPtr->offCodeSum); + + if (ssPtr->litLengthSum == 0) { + ssPtr->matchLengthSum = (1<litLengthSum = (1<litSum = (1<offCodeSum = (1<litFreq[u] = 1; + for (u=0; u<=MaxLL; u++) + ssPtr->litLengthFreq[u] = 1; + for (u=0; u<=MaxML; u++) + ssPtr->matchLengthFreq[u] = 1; + for (u=0; u<=MaxOff; u++) + ssPtr->offCodeFreq[u] = 1; + } else { + ssPtr->matchLengthSum = 0; + ssPtr->litLengthSum = 0; + ssPtr->litSum = 0; + ssPtr->offCodeSum = 0; + + for (u=0; u<=MaxLit; u++) { + ssPtr->litFreq[u] = ZSTD_FREQ_START + (ssPtr->litFreq[u]>>ZSTD_FREQ_DIV); + ssPtr->litSum += ssPtr->litFreq[u]; + } + for (u=0; u<=MaxLL; u++) { + ssPtr->litLengthFreq[u] = ZSTD_FREQ_START + (ssPtr->litLengthFreq[u]>>ZSTD_FREQ_DIV); + ssPtr->litLengthSum += ssPtr->litLengthFreq[u]; + } + for (u=0; u<=MaxML; u++) { + ssPtr->matchLengthFreq[u] = ZSTD_FREQ_START + (ssPtr->matchLengthFreq[u]>>ZSTD_FREQ_DIV); + ssPtr->matchLengthSum += ssPtr->matchLengthFreq[u]; + } + for (u=0; u<=MaxOff; u++) { + ssPtr->offCodeFreq[u] = ZSTD_FREQ_START + (ssPtr->offCodeFreq[u]>>ZSTD_FREQ_DIV); + ssPtr->offCodeSum += ssPtr->offCodeFreq[u]; + } + } +} + +MEM_STATIC void ZSTD_updatePrice(seqStore_t* seqStorePtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength) +{ + U32 u; + + /* literals */ + seqStorePtr->litSum += litLength * ZSTD_FREQ_STEP; + for (u=0; u < litLength; u++) + seqStorePtr->litFreq[literals[u]] += ZSTD_FREQ_STEP; + + /* literal Length */ + seqStorePtr->litLengthSum += ZSTD_FREQ_STEP; + if (litLength >= MaxLL) + seqStorePtr->litLengthFreq[MaxLL] += ZSTD_FREQ_STEP; + else + seqStorePtr->litLengthFreq[litLength] += ZSTD_FREQ_STEP; + + /* match offset */ + seqStorePtr->offCodeSum += ZSTD_FREQ_STEP; + BYTE offCode = offset ? (BYTE)ZSTD_highbit(offset) + 1 : 0; + seqStorePtr->offCodeFreq[offCode] += ZSTD_FREQ_STEP; + + /* match Length */ + seqStorePtr->matchLengthSum += ZSTD_FREQ_STEP; + if (matchLength >= MaxML) + seqStorePtr->matchLengthFreq[MaxML] += ZSTD_FREQ_STEP; + else + seqStorePtr->matchLengthFreq[matchLength] += ZSTD_FREQ_STEP; +} + +FORCE_INLINE U32 ZSTD_getLiteralPrice(seqStore_t* seqStorePtr, U32 litLength, const BYTE* literals) +{ + U32 price, u; + + /* literals */ + price = litLength * ZSTD_highbit(seqStorePtr->litSum); + for (u=0; u < litLength; u++) + price -= ZSTD_highbit(seqStorePtr->litFreq[literals[u]]); + + /* literal Length */ + price += ((litLength >= MaxLL)<<3) + ((litLength >= 255+MaxLL)<<4) + ((litLength>=(1<<15))<<3); + if (litLength >= MaxLL) litLength = MaxLL; + price += ZSTD_highbit(seqStorePtr->litLengthSum) - ZSTD_highbit(seqStorePtr->litLengthFreq[litLength]); + + return price; +} + +#define SET_PRICE(pos, mlen_, offset_, litlen_, price_) \ + { \ + while (last_pos < pos) { opt[last_pos+1].price = 1<<30; last_pos++; } \ + opt[pos].mlen = mlen_; \ + opt[pos].off = offset_; \ + opt[pos].litlen = litlen_; \ + opt[pos].price = price_; \ + ZSTD_LOG_PARSER("%d: SET price[%d/%d]=%d litlen=%d len=%d off=%d\n", (int)(inr-base), (int)pos, (int)last_pos, opt[pos].price, opt[pos].litlen, opt[pos].mlen, opt[pos].off); \ + } + +/* Update hashTable3 up to ip (excluded) + Assumption : always within prefix (ie. not within extDict) */ +static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_CCtx* zc, const BYTE* ip) +{ + U32* const hashTable3 = zc->hashTable3; + const U32 hashLog3 = zc->params.hashLog3; + const BYTE* const base = zc->base; + const U32 target = (U32)(ip - base); + U32 idx = zc->nextToUpdate3; + + while(idx < target) { + hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx; + idx++; + } + + zc->nextToUpdate3 = target; + return hashTable3[ZSTD_hash3Ptr(ip, hashLog3)]; +} + + +#define MINMATCHOPT 4 +#define MEM_readMINMATCH(ptr) (U32)(MEM_read32(ptr)) +#define ZSTD_GETPRICE ZSTD_getPrice4 +#define ZSTD_INSERTBTANDGETALLMATCHES ZSTD_insertBtAndGetAllMatches4 +#define ZSTD_BTGETALLMATCHES ZSTD_BtGetAllMatches4 +#define ZSTD_BTGETALLMATCHES_SELECTMLS ZSTD_BtGetAllMatches_selectMLS4 +#define ZSTD_BTGETALLMATCHES_EXTDICT ZSTD_BtGetAllMatches_extDict4 +#define ZSTD_BTGETALLMATCHES_SELECTMLS_EXTDICT ZSTD_BtGetAllMatches_selectMLS_extDict4 +#define ZSTD_COMPRESSBLOCK_OPT_GENERIC ZSTD_compressBlock_opt_generic4 +#define ZSTD_COMPRESSBLOCK_OPT_EXTDICT_GENERIC ZSTD_compressBlock_opt_extDict_generic4 +#include "zstd_opt.h" +#undef MINMATCHOPT +#undef MEM_readMINMATCH +#undef ZSTD_GETPRICE +#undef ZSTD_INSERTBTANDGETALLMATCHES +#undef ZSTD_BTGETALLMATCHES +#undef ZSTD_BTGETALLMATCHES_SELECTMLS +#undef ZSTD_BTGETALLMATCHES_EXTDICT +#undef ZSTD_BTGETALLMATCHES_SELECTMLS_EXTDICT +#undef ZSTD_COMPRESSBLOCK_OPT_GENERIC +#undef ZSTD_COMPRESSBLOCK_OPT_EXTDICT_GENERIC + +#define MINMATCHOPT 3 +#define MEM_readMINMATCH(ptr) ((U32)(MEM_read32(ptr)<<8)) +#define ZSTD_GETPRICE ZSTD_getPrice3 +#define ZSTD_INSERTBTANDGETALLMATCHES ZSTD_insertBtAndGetAllMatches3 +#define ZSTD_BTGETALLMATCHES ZSTD_BtGetAllMatches3 +#define ZSTD_BTGETALLMATCHES_SELECTMLS ZSTD_BtGetAllMatches_selectMLS3 +#define ZSTD_BTGETALLMATCHES_EXTDICT ZSTD_BtGetAllMatches_extDict3 +#define ZSTD_BTGETALLMATCHES_SELECTMLS_EXTDICT ZSTD_BtGetAllMatches_selectMLS_extDict3 +#define ZSTD_COMPRESSBLOCK_OPT_GENERIC ZSTD_compressBlock_opt_generic3 +#define ZSTD_COMPRESSBLOCK_OPT_EXTDICT_GENERIC ZSTD_compressBlock_opt_extDict_generic3 +#include "zstd_opt.h" + + +#endif /* ZSTD_OPT_INTERNAL_H_MODULE */ diff --git a/lib/zstd_static.h b/lib/zstd_static.h index 692b147d..0e280cef 100644 --- a/lib/zstd_static.h +++ b/lib/zstd_static.h @@ -69,7 +69,7 @@ extern "C" { #define ZSTD_SEARCHLOG_MAX (ZSTD_CONTENTLOG_MAX-1) #define ZSTD_SEARCHLOG_MIN 1 #define ZSTD_SEARCHLENGTH_MAX 7 -#define ZSTD_SEARCHLENGTH_MIN 4 +#define ZSTD_SEARCHLENGTH_MIN 3 #define ZSTD_TARGETLENGTH_MIN 4 #define ZSTD_TARGETLENGTH_MAX 999 From 6b3739c8e5cc3f8e96d45d72a23708b76fcfa0be Mon Sep 17 00:00:00 2001 From: inikep Date: Mon, 22 Feb 2016 15:53:42 +0100 Subject: [PATCH 012/247] MINMATCH=3 or 4 selected in ZSTD frame --- lib/zstd_compress.c | 17 ++++++++++------- lib/zstd_decompress.c | 10 ++++++---- 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index 8e572b4a..bb15afb8 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -2144,7 +2144,7 @@ size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* zc, if (ZSTD_isError(errorCode)) return errorCode; MEM_writeLE32(zc->headerBuffer, ZSTD_MAGICNUMBER); /* Write Header */ - ((BYTE*)zc->headerBuffer)[4] = (BYTE)(params.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN); + ((BYTE*)zc->headerBuffer)[4] = (BYTE)(params.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN + ((params.searchLength==3)<<4)); zc->hbSize = ZSTD_frameHeaderSize_min; zc->stage = 0; @@ -2259,7 +2259,7 @@ size_t ZSTD_compress(void* dst, size_t maxDstSize, const void* src, size_t srcSi /*-===== Pre-defined compression levels =====-*/ -#define ZSTD_MAX_CLEVEL 22 +#define ZSTD_MAX_CLEVEL 25 unsigned ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; } @@ -2284,11 +2284,14 @@ static const ZSTD_parameters ZSTD_defaultParameters[4][ZSTD_MAX_CLEVEL+1] = { { 0, 23, 23, 23, 0, 5, 5, 4, ZSTD_lazy2 }, /* level 15 */ { 0, 23, 22, 22, 0, 5, 5, 4, ZSTD_btlazy2 }, /* level 16 */ { 0, 24, 24, 23, 0, 4, 5, 4, ZSTD_btlazy2 }, /* level 17 */ - { 0, 24, 24, 23, 16, 5, 5, 30, ZSTD_btopt }, /* level 18 */ - { 0, 25, 25, 24, 16, 5, 4, 40, ZSTD_btopt }, /* level 19 */ - { 0, 26, 26, 25, 16, 8, 4,256, ZSTD_btopt }, /* level 20 */ - { 0, 26, 27, 25, 24, 10, 4,256, ZSTD_btopt }, /* level 21 */ - { 0, 26, 26, 25, 16, 8, 3,256, ZSTD_btopt }, /* level 20+MM3 */ + { 0, 24, 24, 23, 0, 5, 5, 30, ZSTD_btopt }, /* level 18 */ + { 0, 25, 25, 24, 0, 5, 4, 40, ZSTD_btopt }, /* level 19 */ + { 0, 26, 26, 25, 0, 8, 4,256, ZSTD_btopt }, /* level 20 */ + { 0, 26, 27, 25, 0, 10, 4,256, ZSTD_btopt }, /* level 21 */ + { 0, 24, 24, 23, 16, 5, 3, 30, ZSTD_btopt }, /* level 22 */ + { 0, 25, 25, 24, 16, 5, 3, 40, ZSTD_btopt }, /* level 23 */ + { 0, 26, 26, 25, 16, 8, 3,256, ZSTD_btopt }, /* level 24 */ + { 0, 26, 27, 25, 24, 10, 3,256, ZSTD_btopt }, /* level 25 */ }, { /* for srcSize <= 256 KB */ /* l, W, C, H, H3, S, L, T, strat */ diff --git a/lib/zstd_decompress.c b/lib/zstd_decompress.c index bfa0ea33..8cedb3ea 100644 --- a/lib/zstd_decompress.c +++ b/lib/zstd_decompress.c @@ -292,7 +292,8 @@ size_t ZSTD_getFrameParams(ZSTD_parameters* params, const void* src, size_t srcS if (magicNumber != ZSTD_MAGICNUMBER) return ERROR(prefix_unknown); memset(params, 0, sizeof(*params)); params->windowLog = (((const BYTE*)src)[4] & 15) + ZSTD_WINDOWLOG_ABSOLUTEMIN; - if ((((const BYTE*)src)[4] >> 4) != 0) return ERROR(frameParameter_unsupported); /* reserved bits */ + params->searchLength = (((const BYTE*)src)[4] & 16) ? MINMATCH-1 : MINMATCH; + if ((((const BYTE*)src)[4] >> 5) != 0) return ERROR(frameParameter_unsupported); /* reserved 3 bits */ return 0; } @@ -614,7 +615,7 @@ typedef struct { -static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState) +static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState, const U32 mls) { size_t litLength; size_t prevOffset; @@ -669,7 +670,7 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState) } if (dumps >= de) dumps = de-1; /* late correction, to avoid read overflow (data is now corrupted anyway) */ } - matchLength += MINMATCH; + matchLength += mls; /* save result */ seq->litLength = litLength; @@ -784,6 +785,7 @@ static size_t ZSTD_decompressSequences( const BYTE* const base = (const BYTE*) (dctx->base); const BYTE* const vBase = (const BYTE*) (dctx->vBase); const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); + const U32 mls = dctx->params.searchLength; /* Build Decoding Tables */ errorCode = ZSTD_decodeSeqHeaders(&nbSeq, &dumps, &dumpsLength, @@ -811,7 +813,7 @@ static size_t ZSTD_decompressSequences( for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq ; ) { size_t oneSeqSize; nbSeq--; - ZSTD_decodeSequence(&sequence, &seqState); + ZSTD_decodeSequence(&sequence, &seqState, mls); oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litLimit_8, base, vBase, dictEnd); if (ZSTD_isError(oneSeqSize)) return oneSeqSize; op += oneSeqSize; From 9f754d23dc93c6e2ab60e72d1c0df58bd5a0c5c7 Mon Sep 17 00:00:00 2001 From: inikep Date: Mon, 22 Feb 2016 17:00:04 +0100 Subject: [PATCH 013/247] dummy levels 22-25 --- lib/zstd_compress.c | 12 ++++++++++++ lib/zstd_internal.h | 2 +- lib/zstd_opt.h | 1 + 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index bb15afb8..6f753fc9 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -2317,6 +2317,10 @@ static const ZSTD_parameters ZSTD_defaultParameters[4][ZSTD_MAX_CLEVEL+1] = { { 0, 18, 19, 18, 0, 10, 4,256, ZSTD_btopt }, /* level 19.*/ { 0, 18, 19, 18, 0, 11, 4,256, ZSTD_btopt }, /* level 20.*/ { 0, 18, 19, 18, 0, 12, 4,256, ZSTD_btopt }, /* level 21.*/ + { 0, 18, 19, 18, 0, 12, 4,256, ZSTD_btopt }, /* level 21-2*/ + { 0, 18, 19, 18, 0, 12, 4,256, ZSTD_btopt }, /* level 21-3*/ + { 0, 18, 19, 18, 0, 12, 4,256, ZSTD_btopt }, /* level 21-4*/ + { 0, 18, 19, 18, 0, 12, 4,256, ZSTD_btopt }, /* level 21-5*/ }, { /* for srcSize <= 128 KB */ /* l, W, C, H, H3, S, L, T, strat */ @@ -2342,6 +2346,10 @@ static const ZSTD_parameters ZSTD_defaultParameters[4][ZSTD_MAX_CLEVEL+1] = { { 0, 17, 18, 17, 0, 9, 4,256, ZSTD_btopt }, /* level 19 */ { 0, 17, 18, 17, 0, 10, 4,512, ZSTD_btopt }, /* level 20 */ { 0, 17, 18, 17, 0, 11, 4,512, ZSTD_btopt }, /* level 21 */ + { 0, 17, 18, 17, 0, 11, 4,512, ZSTD_btopt }, /* level 21-2 */ + { 0, 17, 18, 17, 0, 11, 4,512, ZSTD_btopt }, /* level 21-3 */ + { 0, 17, 18, 17, 0, 11, 4,512, ZSTD_btopt }, /* level 21-4 */ + { 0, 17, 18, 17, 0, 11, 4,512, ZSTD_btopt }, /* level 21-5 */ }, { /* for srcSize <= 16 KB */ @@ -2368,6 +2376,10 @@ static const ZSTD_parameters ZSTD_defaultParameters[4][ZSTD_MAX_CLEVEL+1] = { { 0, 14, 15, 15, 0, 15, 4,256, ZSTD_btopt }, /* level 19 */ { 0, 14, 15, 15, 0, 16, 4,256, ZSTD_btopt }, /* level 20 */ { 0, 14, 15, 15, 0, 17, 4,256, ZSTD_btopt }, /* level 21 */ + { 0, 14, 15, 15, 0, 17, 4,256, ZSTD_btopt }, /* level 21-2 */ + { 0, 14, 15, 15, 0, 17, 4,256, ZSTD_btopt }, /* level 21-3 */ + { 0, 14, 15, 15, 0, 17, 4,256, ZSTD_btopt }, /* level 21-4 */ + { 0, 14, 15, 15, 0, 17, 4,256, ZSTD_btopt }, /* level 21-5 */ }, }; diff --git a/lib/zstd_internal.h b/lib/zstd_internal.h index b9826aa7..3d60e87b 100644 --- a/lib/zstd_internal.h +++ b/lib/zstd_internal.h @@ -50,7 +50,7 @@ /*-************************************* * Common constants ***************************************/ -#define ZSTD_OPT_DEBUG 1 // 1 = tableID=0; 5 = check encoded sequences; 9 = full logs +#define ZSTD_OPT_DEBUG 0 // 1 = tableID=0; 5 = check encoded sequences; 9 = full logs #if ZSTD_OPT_DEBUG > 0 #include /* for debug */ #endif diff --git a/lib/zstd_opt.h b/lib/zstd_opt.h index 24343d69..dfe76223 100644 --- a/lib/zstd_opt.h +++ b/lib/zstd_opt.h @@ -50,6 +50,7 @@ FORCE_INLINE U32 ZSTD_GETPRICE(seqStore_t* seqStorePtr, U32 litLength, const BYT return price + 1 + ((seqStorePtr->litSum>>4) / seqStorePtr->litLengthSum) + (matchLength==0); return price + ZSTD_getLiteralPrice(seqStorePtr, litLength, literals) + ((seqStorePtr->litSum>>4) / seqStorePtr->litLengthSum) + (matchLength==0); +// return price + ZSTD_getLiteralPrice(seqStorePtr, litLength, literals); } From c8fde3be45cba1fffd15520780ca58ce29c6c718 Mon Sep 17 00:00:00 2001 From: Christophe Chevalier Date: Tue, 23 Feb 2016 12:09:50 +0100 Subject: [PATCH 014/247] Added new ".VC.db" Visual Studio C++ database file to gitignore --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index a31ffdc7..7e142fcd 100644 --- a/.gitignore +++ b/.gitignore @@ -20,6 +20,7 @@ # Visual solution files *.suo *.user +*.VC.db # Build results [Dd]ebug/ @@ -49,4 +50,4 @@ _zstdbench lib/zstd_opt_LZ5.c lib/zstd_opt_llen.c -lib/zstd_opt_nollen.c \ No newline at end of file +lib/zstd_opt_nollen.c From 15174b0cfb4ba73ac7dfa37473299c8d0be6f48a Mon Sep 17 00:00:00 2001 From: inikep Date: Tue, 23 Feb 2016 12:41:56 +0100 Subject: [PATCH 015/247] statistics of encoded sequences --- lib/zstd_compress.c | 14 ++++++++++++++ lib/zstd_internal.h | 6 +++++- lib/zstd_opt.h | 7 +++++++ lib/zstd_opt_internal.h | 4 +--- 4 files changed, 27 insertions(+), 4 deletions(-) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index 6f753fc9..6312ea34 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -726,6 +726,12 @@ MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const B printf("pos %6u : %3u literals & match %3u bytes at distance %6u \n", (U32)(literals - g_start), (U32)litLength, (U32)matchCode+MINMATCH, (U32)offsetCode); #endif +#if ZSTD_OPT_DEBUG >= 3 + if (offsetCode == 0) seqStorePtr->realRepSum++; + seqStorePtr->realSeqSum++; + seqStorePtr->realMatchSum += matchCode; + seqStorePtr->realLitSum += litLength; +#endif /* copy Literals */ ZSTD_wildcopy(seqStorePtr->lit, literals, litLength); @@ -1912,6 +1918,9 @@ static size_t ZSTD_compress_generic (ZSTD_CCtx* zc, BYTE* const ostart = (BYTE*)dst; BYTE* op = ostart; const U32 maxDist = 1 << zc->params.windowLog; + seqStore_t* ssPtr = &zc->seqStore; + + ssPtr->realMatchSum = ssPtr->realLitSum = ssPtr->realSeqSum = ssPtr->realRepSum = 0; while (remaining) { size_t cSize; @@ -1945,6 +1954,11 @@ static size_t ZSTD_compress_generic (ZSTD_CCtx* zc, op += cSize; } +#if ZSTD_OPT_DEBUG >= 3 + ssPtr->realMatchSum += ssPtr->realSeqSum * ((zc->params.searchLength == 3) ? 3 : 4); + printf("avgMatchL=%.2f avgLitL=%.2f match=%.1f%% lit=%.1f%% reps=%d seq=%d\n", (float)ssPtr->realMatchSum/ssPtr->realSeqSum, (float)ssPtr->realLitSum/ssPtr->realSeqSum, 100.0*ssPtr->realMatchSum/(ssPtr->realMatchSum+ssPtr->realLitSum), 100.0*ssPtr->realLitSum/(ssPtr->realMatchSum+ssPtr->realLitSum), ssPtr->realRepSum, ssPtr->realSeqSum); +#endif + return op-ostart; } diff --git a/lib/zstd_internal.h b/lib/zstd_internal.h index 3d60e87b..39b76d8a 100644 --- a/lib/zstd_internal.h +++ b/lib/zstd_internal.h @@ -50,7 +50,7 @@ /*-************************************* * Common constants ***************************************/ -#define ZSTD_OPT_DEBUG 0 // 1 = tableID=0; 5 = check encoded sequences; 9 = full logs +#define ZSTD_OPT_DEBUG 3 // 1 = tableID=0; 3 = print block stats; 5 = check encoded sequences; 9 = full logs #if ZSTD_OPT_DEBUG > 0 #include /* for debug */ #endif @@ -179,6 +179,10 @@ typedef struct { U32 litLengthSum; U32 litSum; U32 offCodeSum; + U32 realMatchSum; + U32 realLitSum; + U32 realSeqSum; + U32 realRepSum; } seqStore_t; seqStore_t ZSTD_copySeqStore(const ZSTD_CCtx* ctx); diff --git a/lib/zstd_opt.h b/lib/zstd_opt.h index dfe76223..b5b10efd 100644 --- a/lib/zstd_opt.h +++ b/lib/zstd_opt.h @@ -46,11 +46,18 @@ FORCE_INLINE U32 ZSTD_GETPRICE(seqStore_t* seqStorePtr, U32 litLength, const BYT if (matchLength >= MaxML) matchLength = MaxML; price += ZSTD_highbit(seqStorePtr->matchLengthSum) - ZSTD_highbit(seqStorePtr->matchLengthFreq[matchLength]); +#if 0 if (!litLength) return price + 1 + ((seqStorePtr->litSum>>4) / seqStorePtr->litLengthSum) + (matchLength==0); return price + ZSTD_getLiteralPrice(seqStorePtr, litLength, literals) + ((seqStorePtr->litSum>>4) / seqStorePtr->litLengthSum) + (matchLength==0); +#else + if (!litLength) + return price + 1; + + return price + ZSTD_getLiteralPrice(seqStorePtr, litLength, literals); // return price + ZSTD_getLiteralPrice(seqStorePtr, litLength, literals); +#endif } diff --git a/lib/zstd_opt_internal.h b/lib/zstd_opt_internal.h index 29e936d3..47d5a498 100644 --- a/lib/zstd_opt_internal.h +++ b/lib/zstd_opt_internal.h @@ -75,9 +75,7 @@ typedef struct { MEM_STATIC void ZSTD_rescaleFreqs(seqStore_t* ssPtr) { unsigned u; - - // printf("matchLengthSum=%d litLengthSum=%d litSum=%d offCodeSum=%d\n", ssPtr->matchLengthSum, ssPtr->litLengthSum, ssPtr->litSum, ssPtr->offCodeSum); - + if (ssPtr->litLengthSum == 0) { ssPtr->matchLengthSum = (1<litLengthSum = (1< Date: Tue, 23 Feb 2016 16:25:04 +0100 Subject: [PATCH 016/247] priceFunc --- lib/zstd_compress.c | 8 ++++++-- lib/zstd_internal.h | 2 ++ lib/zstd_opt.h | 26 ++++++++++++++------------ lib/zstd_opt_internal.h | 4 +++- 4 files changed, 25 insertions(+), 15 deletions(-) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index 6312ea34..8a5f6f69 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -1919,8 +1919,10 @@ static size_t ZSTD_compress_generic (ZSTD_CCtx* zc, BYTE* op = ostart; const U32 maxDist = 1 << zc->params.windowLog; seqStore_t* ssPtr = &zc->seqStore; + static U32 priceFunc = 0; - ssPtr->realMatchSum = ssPtr->realLitSum = ssPtr->realSeqSum = ssPtr->realRepSum = 0; + ssPtr->realMatchSum = ssPtr->realLitSum = ssPtr->realSeqSum = ssPtr->realRepSum = 1; + ssPtr->priceFunc = priceFunc; while (remaining) { size_t cSize; @@ -1954,9 +1956,11 @@ static size_t ZSTD_compress_generic (ZSTD_CCtx* zc, op += cSize; } + #if ZSTD_OPT_DEBUG >= 3 ssPtr->realMatchSum += ssPtr->realSeqSum * ((zc->params.searchLength == 3) ? 3 : 4); - printf("avgMatchL=%.2f avgLitL=%.2f match=%.1f%% lit=%.1f%% reps=%d seq=%d\n", (float)ssPtr->realMatchSum/ssPtr->realSeqSum, (float)ssPtr->realLitSum/ssPtr->realSeqSum, 100.0*ssPtr->realMatchSum/(ssPtr->realMatchSum+ssPtr->realLitSum), 100.0*ssPtr->realLitSum/(ssPtr->realMatchSum+ssPtr->realLitSum), ssPtr->realRepSum, ssPtr->realSeqSum); + printf("avgMatchL=%.2f avgLitL=%.2f match=%.1f%% lit=%.1f%% reps=%d seq=%d priceFunc=%d\n", (float)ssPtr->realMatchSum/ssPtr->realSeqSum, (float)ssPtr->realLitSum/ssPtr->realSeqSum, 100.0*ssPtr->realMatchSum/(ssPtr->realMatchSum+ssPtr->realLitSum), 100.0*ssPtr->realLitSum/(ssPtr->realMatchSum+ssPtr->realLitSum), ssPtr->realRepSum, ssPtr->realSeqSum, ssPtr->priceFunc); + priceFunc++; #endif return op-ostart; diff --git a/lib/zstd_internal.h b/lib/zstd_internal.h index 39b76d8a..c575f463 100644 --- a/lib/zstd_internal.h +++ b/lib/zstd_internal.h @@ -176,6 +176,7 @@ typedef struct { U32* litFreq; U32* offCodeFreq; U32 matchLengthSum; + U32 matchSum; U32 litLengthSum; U32 litSum; U32 offCodeSum; @@ -183,6 +184,7 @@ typedef struct { U32 realLitSum; U32 realSeqSum; U32 realRepSum; + U32 priceFunc; } seqStore_t; seqStore_t ZSTD_copySeqStore(const ZSTD_CCtx* ctx); diff --git a/lib/zstd_opt.h b/lib/zstd_opt.h index b5b10efd..4169f874 100644 --- a/lib/zstd_opt.h +++ b/lib/zstd_opt.h @@ -46,18 +46,20 @@ FORCE_INLINE U32 ZSTD_GETPRICE(seqStore_t* seqStorePtr, U32 litLength, const BYT if (matchLength >= MaxML) matchLength = MaxML; price += ZSTD_highbit(seqStorePtr->matchLengthSum) - ZSTD_highbit(seqStorePtr->matchLengthFreq[matchLength]); -#if 0 - if (!litLength) - return price + 1 + ((seqStorePtr->litSum>>4) / seqStorePtr->litLengthSum) + (matchLength==0); - - return price + ZSTD_getLiteralPrice(seqStorePtr, litLength, literals) + ((seqStorePtr->litSum>>4) / seqStorePtr->litLengthSum) + (matchLength==0); -#else - if (!litLength) - return price + 1; - - return price + ZSTD_getLiteralPrice(seqStorePtr, litLength, literals); -// return price + ZSTD_getLiteralPrice(seqStorePtr, litLength, literals); -#endif +#define ZSTD_PRICE_MULT 2 + switch (seqStorePtr->priceFunc) + { + default: + case 0: + if (!litLength) return price + 1 + ((seqStorePtr->litSum<litSum + seqStorePtr->matchSum)) + (matchLength==0); + return price + ZSTD_getLiteralPrice(seqStorePtr, litLength, literals) + ((seqStorePtr->litSum<litSum + seqStorePtr->matchSum)) + (matchLength==0); + case 1: + if (!litLength) return price + 1 + ((seqStorePtr->litSum>>5) / seqStorePtr->litLengthSum) + (matchLength==0); + return price + ZSTD_getLiteralPrice(seqStorePtr, litLength, literals) + ((seqStorePtr->litSum>>5) / seqStorePtr->litLengthSum) + (matchLength==0); + case 2: + if (!litLength) return price + 1; + return price + ZSTD_getLiteralPrice(seqStorePtr, litLength, literals); + } } diff --git a/lib/zstd_opt_internal.h b/lib/zstd_opt_internal.h index 47d5a498..e9ca5f2e 100644 --- a/lib/zstd_opt_internal.h +++ b/lib/zstd_opt_internal.h @@ -43,7 +43,7 @@ #define ZSTD_OPT_NUM (1<<12) #define ZSTD_FREQ_START 1 #define ZSTD_FREQ_STEP 1 -#define ZSTD_FREQ_DIV 6 +#define ZSTD_FREQ_DIV 5 /*- Debug -*/ #if defined(ZSTD_OPT_DEBUG) && ZSTD_OPT_DEBUG>=9 @@ -94,6 +94,7 @@ MEM_STATIC void ZSTD_rescaleFreqs(seqStore_t* ssPtr) ssPtr->matchLengthSum = 0; ssPtr->litLengthSum = 0; ssPtr->litSum = 0; + ssPtr->matchSum = 0; ssPtr->offCodeSum = 0; for (u=0; u<=MaxLit; u++) { @@ -107,6 +108,7 @@ MEM_STATIC void ZSTD_rescaleFreqs(seqStore_t* ssPtr) for (u=0; u<=MaxML; u++) { ssPtr->matchLengthFreq[u] = ZSTD_FREQ_START + (ssPtr->matchLengthFreq[u]>>ZSTD_FREQ_DIV); ssPtr->matchLengthSum += ssPtr->matchLengthFreq[u]; + ssPtr->matchSum += ssPtr->matchLengthFreq[u] * (u + 3); } for (u=0; u<=MaxOff; u++) { ssPtr->offCodeFreq[u] = ZSTD_FREQ_START + (ssPtr->offCodeFreq[u]>>ZSTD_FREQ_DIV); From 4a981f79374077ea8b896ea7a3fd3ee187f7d2d0 Mon Sep 17 00:00:00 2001 From: inikep Date: Tue, 23 Feb 2016 19:08:20 +0100 Subject: [PATCH 017/247] improved ZSTD_GETPRICE --- lib/zstd_internal.h | 2 +- lib/zstd_opt.h | 12 ++++-------- lib/zstd_opt_internal.h | 10 +++++++--- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/lib/zstd_internal.h b/lib/zstd_internal.h index c575f463..c3874e4e 100644 --- a/lib/zstd_internal.h +++ b/lib/zstd_internal.h @@ -50,7 +50,7 @@ /*-************************************* * Common constants ***************************************/ -#define ZSTD_OPT_DEBUG 3 // 1 = tableID=0; 3 = print block stats; 5 = check encoded sequences; 9 = full logs +#define ZSTD_OPT_DEBUG 0 // 1 = tableID=0; 3 = print block stats; 5 = check encoded sequences; 9 = full logs #if ZSTD_OPT_DEBUG > 0 #include /* for debug */ #endif diff --git a/lib/zstd_opt.h b/lib/zstd_opt.h index 4169f874..cfd17e5a 100644 --- a/lib/zstd_opt.h +++ b/lib/zstd_opt.h @@ -44,21 +44,17 @@ FORCE_INLINE U32 ZSTD_GETPRICE(seqStore_t* seqStorePtr, U32 litLength, const BYT matchLength -= MINMATCHOPT; price += ((matchLength >= MaxML)<<3) + ((matchLength >= 255+MaxML)<<4) + ((matchLength>=(1<<15))<<3); if (matchLength >= MaxML) matchLength = MaxML; - price += ZSTD_highbit(seqStorePtr->matchLengthSum) - ZSTD_highbit(seqStorePtr->matchLengthFreq[matchLength]); + price += ZSTD_getLiteralPrice(seqStorePtr, litLength, literals) + ZSTD_highbit(seqStorePtr->matchLengthSum) - ZSTD_highbit(seqStorePtr->matchLengthFreq[matchLength]); -#define ZSTD_PRICE_MULT 2 switch (seqStorePtr->priceFunc) { default: case 0: - if (!litLength) return price + 1 + ((seqStorePtr->litSum<litSum + seqStorePtr->matchSum)) + (matchLength==0); - return price + ZSTD_getLiteralPrice(seqStorePtr, litLength, literals) + ((seqStorePtr->litSum<litSum + seqStorePtr->matchSum)) + (matchLength==0); + return price + ((seqStorePtr->litSum>>4) / seqStorePtr->litLengthSum); case 1: - if (!litLength) return price + 1 + ((seqStorePtr->litSum>>5) / seqStorePtr->litLengthSum) + (matchLength==0); - return price + ZSTD_getLiteralPrice(seqStorePtr, litLength, literals) + ((seqStorePtr->litSum>>5) / seqStorePtr->litLengthSum) + (matchLength==0); + return price + ((seqStorePtr->litSum<<1) / (seqStorePtr->litSum + seqStorePtr->matchSum)); case 2: - if (!litLength) return price + 1; - return price + ZSTD_getLiteralPrice(seqStorePtr, litLength, literals); + return price; } } diff --git a/lib/zstd_opt_internal.h b/lib/zstd_opt_internal.h index e9ca5f2e..c23d5d74 100644 --- a/lib/zstd_opt_internal.h +++ b/lib/zstd_opt_internal.h @@ -43,7 +43,7 @@ #define ZSTD_OPT_NUM (1<<12) #define ZSTD_FREQ_START 1 #define ZSTD_FREQ_STEP 1 -#define ZSTD_FREQ_DIV 5 +#define ZSTD_FREQ_DIV 4 /*- Debug -*/ #if defined(ZSTD_OPT_DEBUG) && ZSTD_OPT_DEBUG>=9 @@ -81,7 +81,8 @@ MEM_STATIC void ZSTD_rescaleFreqs(seqStore_t* ssPtr) ssPtr->litLengthSum = (1<litSum = (1<offCodeSum = (1<matchSum = 0; + for (u=0; u<=MaxLit; u++) ssPtr->litFreq[u] = 1; for (u=0; u<=MaxLL; u++) @@ -94,8 +95,8 @@ MEM_STATIC void ZSTD_rescaleFreqs(seqStore_t* ssPtr) ssPtr->matchLengthSum = 0; ssPtr->litLengthSum = 0; ssPtr->litSum = 0; - ssPtr->matchSum = 0; ssPtr->offCodeSum = 0; + ssPtr->matchSum = 0; for (u=0; u<=MaxLit; u++) { ssPtr->litFreq[u] = ZSTD_FREQ_START + (ssPtr->litFreq[u]>>ZSTD_FREQ_DIV); @@ -150,6 +151,9 @@ FORCE_INLINE U32 ZSTD_getLiteralPrice(seqStore_t* seqStorePtr, U32 litLength, co { U32 price, u; + if (litLength == 0) + return ZSTD_highbit(seqStorePtr->litLengthSum) - ZSTD_highbit(seqStorePtr->litLengthFreq[0]); + /* literals */ price = litLength * ZSTD_highbit(seqStorePtr->litSum); for (u=0; u < litLength; u++) From 2cf6d662a8ff11f947e135b052514a7fb64bbb1c Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Tue, 23 Feb 2016 21:21:29 +0100 Subject: [PATCH 018/247] fixed #131 --- programs/.gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/programs/.gitignore b/programs/.gitignore index 021e8937..2fc85021 100644 --- a/programs/.gitignore +++ b/programs/.gitignore @@ -29,3 +29,6 @@ datagen # Visual solution files *.suo *.user + +# Default dictionary name +dictionary From 3152a8c87a5261d03eb4b4a3c359523f331d0c46 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Tue, 23 Feb 2016 21:28:59 +0100 Subject: [PATCH 019/247] finer dictionary completion --- lib/zdict.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/lib/zdict.c b/lib/zdict.c index d3d5784d..2b3d3ae8 100644 --- a/lib/zdict.c +++ b/lib/zdict.c @@ -618,6 +618,14 @@ static void ZDICT_countEStats(EStats_ress_t esr, litlengthCount[*bytePtr]++; } +static size_t ZDICT_maxSampleSize(const size_t* fileSizes, unsigned nbFiles) +{ + unsigned u; + size_t max=0; + for (u=0; u>10)); - dictContentSize = (U32)ZDICT_fastSampling((char*)dictBuffer + g_provision_entropySize, - targetDictSize, samplesBuffer, sBuffSize); + dictContentSize = (U32)ZDICT_fastSampling(dictBuffer, targetDictSize, + samplesBuffer, sBuffSize); } /* dictionary header */ From 7de4f9fd81c997077b85d0d8bf76590bcfe258ee Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Tue, 23 Feb 2016 21:34:18 +0100 Subject: [PATCH 020/247] minor cosmetic --- programs/dibio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/programs/dibio.c b/programs/dibio.c index 646fe2c6..17f89586 100644 --- a/programs/dibio.c +++ b/programs/dibio.c @@ -259,7 +259,7 @@ int DiB_trainFromFiles(const char* dictFileName, unsigned maxDictSize, srcBuffer, fileSizes, nbFiles, params); if (ZDICT_isError(dictSize)) { - DISPLAYLEVEL(1, "dictionary training failed : %s", ZDICT_getErrorName(dictSize)); /* should not happen */ + DISPLAYLEVEL(1, "dictionary training failed : %s \n", ZDICT_getErrorName(dictSize)); /* should not happen */ result = 1; goto _cleanup; } From ee55628c9dc93f036e82b192745cee3bc6b5cdc3 Mon Sep 17 00:00:00 2001 From: inikep Date: Wed, 24 Feb 2016 14:40:30 +0100 Subject: [PATCH 021/247] combined price function --- lib/zstd_internal.h | 2 ++ lib/zstd_opt.h | 35 ++++++++++++++++++++++++++++++++--- lib/zstd_opt_internal.h | 20 ++++++++++---------- 3 files changed, 44 insertions(+), 13 deletions(-) diff --git a/lib/zstd_internal.h b/lib/zstd_internal.h index c3874e4e..d5266c44 100644 --- a/lib/zstd_internal.h +++ b/lib/zstd_internal.h @@ -184,6 +184,8 @@ typedef struct { U32 realLitSum; U32 realSeqSum; U32 realRepSum; + U32 factor; + U32 factor2; U32 priceFunc; } seqStore_t; diff --git a/lib/zstd_opt.h b/lib/zstd_opt.h index cfd17e5a..5bb40e60 100644 --- a/lib/zstd_opt.h +++ b/lib/zstd_opt.h @@ -48,12 +48,14 @@ FORCE_INLINE U32 ZSTD_GETPRICE(seqStore_t* seqStorePtr, U32 litLength, const BYT switch (seqStorePtr->priceFunc) { - default: + default: case 0: - return price + ((seqStorePtr->litSum>>4) / seqStorePtr->litLengthSum); + return price + seqStorePtr->factor + ((seqStorePtr->litSum>>5) / seqStorePtr->litLengthSum) + ((seqStorePtr->litSum<<1) / (seqStorePtr->litSum + seqStorePtr->matchSum)); case 1: - return price + ((seqStorePtr->litSum<<1) / (seqStorePtr->litSum + seqStorePtr->matchSum)); + return price + seqStorePtr->factor + ((seqStorePtr->factor2) ? ((seqStorePtr->litSum>>5) / seqStorePtr->litLengthSum) + ((seqStorePtr->litSum<<1) / (seqStorePtr->litSum + seqStorePtr->matchSum)) : 0); case 2: + return price + seqStorePtr->factor + ((seqStorePtr->factor2) ? ((seqStorePtr->litSum>>4) / seqStorePtr->litLengthSum) + ((seqStorePtr->litSum<<1) / (seqStorePtr->litSum + seqStorePtr->matchSum)) : 0); + case 3: return price; } } @@ -264,6 +266,33 @@ void ZSTD_COMPRESSBLOCK_OPT_GENERIC(ZSTD_CCtx* ctx, ZSTD_rescaleFreqs(seqStorePtr); if ((ip-base) < REPCODE_STARTVALUE) ip = base + REPCODE_STARTVALUE; + + size_t mostFrequent; + unsigned count[256], maxSymbolValue, usedSymbols = 0; + maxSymbolValue = 255; + mostFrequent = FSE_count(count, &maxSymbolValue, src, srcSize); + for (unsigned i=0; i<=maxSymbolValue; i++) + if (count[i]) usedSymbols++; + + seqStorePtr->factor = ((usedSymbols <= 18) && (mostFrequent < (1<<14))) ? mostFrequent>>10 : 0; // helps RTF files + seqStorePtr->factor2 = (usedSymbols==256) && (mostFrequent > (1<<14)); + +#if 0 + if (seqStorePtr->factor2) + printf("FACTOR2 usedSymbols==256;mostFrequent>(1<<14) maxSymbolValue=%d mostFrequent=%d usedSymbols=%d\n", maxSymbolValue, (int)mostFrequent, usedSymbols); + if (seqStorePtr->factor) { + printf("FACTOR1 usedSymbols<56;mostFrequent<(1<<14) maxSymbolValue=%d mostFrequent=%d usedSymbols=%d\n", maxSymbolValue, (int)mostFrequent, usedSymbols); +#if 0 + for (int i=0; i<256; i++) + if (count[i]) printf("%d=%d ", i, count[i]); + printf("\n"); + +#endif + } +#endif + + + /* Match Loop */ while (ip < ilimit) { U32 u; diff --git a/lib/zstd_opt_internal.h b/lib/zstd_opt_internal.h index c23d5d74..86288752 100644 --- a/lib/zstd_opt_internal.h +++ b/lib/zstd_opt_internal.h @@ -43,7 +43,7 @@ #define ZSTD_OPT_NUM (1<<12) #define ZSTD_FREQ_START 1 #define ZSTD_FREQ_STEP 1 -#define ZSTD_FREQ_DIV 4 +#define ZSTD_FREQ_DIV 5 /*- Debug -*/ #if defined(ZSTD_OPT_DEBUG) && ZSTD_OPT_DEBUG>=9 @@ -77,20 +77,20 @@ MEM_STATIC void ZSTD_rescaleFreqs(seqStore_t* ssPtr) unsigned u; if (ssPtr->litLengthSum == 0) { - ssPtr->matchLengthSum = (1<litLengthSum = (1<litSum = (1<offCodeSum = (1<matchSum = 0; + ssPtr->matchLengthSum = ZSTD_FREQ_START*(1<litLengthSum = ZSTD_FREQ_START*(1<litSum = ZSTD_FREQ_START*(1<offCodeSum = ZSTD_FREQ_START*(1<matchSum = ZSTD_FREQ_START*ssPtr->litSum; for (u=0; u<=MaxLit; u++) - ssPtr->litFreq[u] = 1; + ssPtr->litFreq[u] = ZSTD_FREQ_START; for (u=0; u<=MaxLL; u++) - ssPtr->litLengthFreq[u] = 1; + ssPtr->litLengthFreq[u] = ZSTD_FREQ_START; for (u=0; u<=MaxML; u++) - ssPtr->matchLengthFreq[u] = 1; + ssPtr->matchLengthFreq[u] = ZSTD_FREQ_START; for (u=0; u<=MaxOff; u++) - ssPtr->offCodeFreq[u] = 1; + ssPtr->offCodeFreq[u] = ZSTD_FREQ_START; } else { ssPtr->matchLengthSum = 0; ssPtr->litLengthSum = 0; From 02137f8c4262292bab948a046704fb90082ed4d0 Mon Sep 17 00:00:00 2001 From: inikep Date: Wed, 24 Feb 2016 18:09:36 +0100 Subject: [PATCH 022/247] more accurate gain function --- lib/zstd_opt.h | 16 ++++++++-------- lib/zstd_opt_internal.h | 12 ++++++------ 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/lib/zstd_opt.h b/lib/zstd_opt.h index 5bb40e60..55acef67 100644 --- a/lib/zstd_opt.h +++ b/lib/zstd_opt.h @@ -37,26 +37,26 @@ FORCE_INLINE U32 ZSTD_GETPRICE(seqStore_t* seqStorePtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength) { /* offset */ - BYTE offCode = offset ? (BYTE)ZSTD_highbit(offset) + 1 : 0; - U32 price = offCode + ZSTD_highbit(seqStorePtr->offCodeSum) - ZSTD_highbit(seqStorePtr->offCodeFreq[offCode]); + BYTE offCode = offset ? (BYTE)ZSTD_highbit(offset+1) + 1 : 0; + U32 price = (offCode-1) + (!offCode) + ZSTD_highbit(seqStorePtr->offCodeSum+1) - ZSTD_highbit(seqStorePtr->offCodeFreq[offCode]+1); /* match Length */ matchLength -= MINMATCHOPT; price += ((matchLength >= MaxML)<<3) + ((matchLength >= 255+MaxML)<<4) + ((matchLength>=(1<<15))<<3); if (matchLength >= MaxML) matchLength = MaxML; - price += ZSTD_getLiteralPrice(seqStorePtr, litLength, literals) + ZSTD_highbit(seqStorePtr->matchLengthSum) - ZSTD_highbit(seqStorePtr->matchLengthFreq[matchLength]); + price += ZSTD_getLiteralPrice(seqStorePtr, litLength, literals) + ZSTD_highbit(seqStorePtr->matchLengthSum+1) - ZSTD_highbit(seqStorePtr->matchLengthFreq[matchLength]+1); switch (seqStorePtr->priceFunc) { - default: + default: case 0: - return price + seqStorePtr->factor + ((seqStorePtr->litSum>>5) / seqStorePtr->litLengthSum) + ((seqStorePtr->litSum<<1) / (seqStorePtr->litSum + seqStorePtr->matchSum)); + return 1 + price + seqStorePtr->factor + ((seqStorePtr->litSum>>5) / seqStorePtr->litLengthSum) + ((seqStorePtr->litSum<<1) / (seqStorePtr->litSum + seqStorePtr->matchSum)); case 1: - return price + seqStorePtr->factor + ((seqStorePtr->factor2) ? ((seqStorePtr->litSum>>5) / seqStorePtr->litLengthSum) + ((seqStorePtr->litSum<<1) / (seqStorePtr->litSum + seqStorePtr->matchSum)) : 0); + return 1 + price + seqStorePtr->factor + ((seqStorePtr->factor2) ? ((seqStorePtr->litSum>>5) / seqStorePtr->litLengthSum) + ((seqStorePtr->litSum<<1) / (seqStorePtr->litSum + seqStorePtr->matchSum)) : 0); case 2: - return price + seqStorePtr->factor + ((seqStorePtr->factor2) ? ((seqStorePtr->litSum>>4) / seqStorePtr->litLengthSum) + ((seqStorePtr->litSum<<1) / (seqStorePtr->litSum + seqStorePtr->matchSum)) : 0); + return 1 + price + seqStorePtr->factor + ((seqStorePtr->factor2) ? ((seqStorePtr->litSum>>4) / seqStorePtr->litLengthSum) + ((seqStorePtr->litSum<<1) / (seqStorePtr->litSum + seqStorePtr->matchSum)) : 0); case 3: - return price; + return 1 + price; } } diff --git a/lib/zstd_opt_internal.h b/lib/zstd_opt_internal.h index 86288752..a40cad06 100644 --- a/lib/zstd_opt_internal.h +++ b/lib/zstd_opt_internal.h @@ -52,7 +52,7 @@ #define ZSTD_LOG_TRY_PRICE(...) printf(__VA_ARGS__) #else #define ZSTD_LOG_PARSER(...) - #define ZSTD_LOG_ENCODE(...) + #define ZSTD_LOG_ENCODE(...) // printf(__VA_ARGS__) #define ZSTD_LOG_TRY_PRICE(...) #endif @@ -136,7 +136,7 @@ MEM_STATIC void ZSTD_updatePrice(seqStore_t* seqStorePtr, U32 litLength, const B /* match offset */ seqStorePtr->offCodeSum += ZSTD_FREQ_STEP; - BYTE offCode = offset ? (BYTE)ZSTD_highbit(offset) + 1 : 0; + BYTE offCode = offset ? (BYTE)ZSTD_highbit(offset+1) + 1 : 0; seqStorePtr->offCodeFreq[offCode] += ZSTD_FREQ_STEP; /* match Length */ @@ -152,17 +152,17 @@ FORCE_INLINE U32 ZSTD_getLiteralPrice(seqStore_t* seqStorePtr, U32 litLength, co U32 price, u; if (litLength == 0) - return ZSTD_highbit(seqStorePtr->litLengthSum) - ZSTD_highbit(seqStorePtr->litLengthFreq[0]); + return ZSTD_highbit(seqStorePtr->litLengthSum+1) - ZSTD_highbit(seqStorePtr->litLengthFreq[0]+1); /* literals */ - price = litLength * ZSTD_highbit(seqStorePtr->litSum); + price = litLength * ZSTD_highbit(seqStorePtr->litSum+1); for (u=0; u < litLength; u++) - price -= ZSTD_highbit(seqStorePtr->litFreq[literals[u]]); + price -= ZSTD_highbit(seqStorePtr->litFreq[literals[u]]+1); /* literal Length */ price += ((litLength >= MaxLL)<<3) + ((litLength >= 255+MaxLL)<<4) + ((litLength>=(1<<15))<<3); if (litLength >= MaxLL) litLength = MaxLL; - price += ZSTD_highbit(seqStorePtr->litLengthSum) - ZSTD_highbit(seqStorePtr->litLengthFreq[litLength]); + price += ZSTD_highbit(seqStorePtr->litLengthSum+1) - ZSTD_highbit(seqStorePtr->litLengthFreq[litLength]+1); return price; } From ee0229287c1a87f7d917d0ee7e0ea4829e287a2b Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 25 Feb 2016 14:53:35 +0100 Subject: [PATCH 023/247] changed version to v0.6.0 --- lib/zstd.h | 4 ++-- lib/zstd_static.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/zstd.h b/lib/zstd.h index 53ed6973..26c6e275 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -60,8 +60,8 @@ extern "C" { * Version ***************************************/ #define ZSTD_VERSION_MAJOR 0 /* for breaking interface changes */ -#define ZSTD_VERSION_MINOR 5 /* for new (non-breaking) interface capabilities */ -#define ZSTD_VERSION_RELEASE 1 /* for tweaks, bug-fixes, or development */ +#define ZSTD_VERSION_MINOR 6 /* for new (non-breaking) interface capabilities */ +#define ZSTD_VERSION_RELEASE 0 /* for tweaks, bug-fixes, or development */ #define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE) ZSTDLIB_API unsigned ZSTD_versionNumber (void); diff --git a/lib/zstd_static.h b/lib/zstd_static.h index 61216535..8ca97f5b 100644 --- a/lib/zstd_static.h +++ b/lib/zstd_static.h @@ -51,7 +51,7 @@ extern "C" { /*-************************************* * Constants ***************************************/ -#define ZSTD_MAGICNUMBER 0xFD2FB525 /* v0.5 */ +#define ZSTD_MAGICNUMBER 0xFD2FB526 /* v0.6 */ /*-************************************* From afa9c9f5ab4499731ca185e299a9610736697265 Mon Sep 17 00:00:00 2001 From: inikep Date: Thu, 25 Feb 2016 16:47:57 +0100 Subject: [PATCH 024/247] turn off RTF optimization --- lib/zstd_opt.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/lib/zstd_opt.h b/lib/zstd_opt.h index 55acef67..922a569c 100644 --- a/lib/zstd_opt.h +++ b/lib/zstd_opt.h @@ -46,6 +46,7 @@ FORCE_INLINE U32 ZSTD_GETPRICE(seqStore_t* seqStorePtr, U32 litLength, const BYT if (matchLength >= MaxML) matchLength = MaxML; price += ZSTD_getLiteralPrice(seqStorePtr, litLength, literals) + ZSTD_highbit(seqStorePtr->matchLengthSum+1) - ZSTD_highbit(seqStorePtr->matchLengthFreq[matchLength]+1); +#if ZSTD_OPT_DEBUG >= 3 switch (seqStorePtr->priceFunc) { default: @@ -58,6 +59,9 @@ FORCE_INLINE U32 ZSTD_GETPRICE(seqStore_t* seqStorePtr, U32 litLength, const BYT case 3: return 1 + price; } +#else + return 1 + price + ((seqStorePtr->litSum>>5) / seqStorePtr->litLengthSum) + ((seqStorePtr->litSum<<1) / (seqStorePtr->litSum + seqStorePtr->matchSum)); +#endif } @@ -266,7 +270,7 @@ void ZSTD_COMPRESSBLOCK_OPT_GENERIC(ZSTD_CCtx* ctx, ZSTD_rescaleFreqs(seqStorePtr); if ((ip-base) < REPCODE_STARTVALUE) ip = base + REPCODE_STARTVALUE; - +#if ZSTD_OPT_DEBUG >= 3 size_t mostFrequent; unsigned count[256], maxSymbolValue, usedSymbols = 0; maxSymbolValue = 255; @@ -276,6 +280,7 @@ void ZSTD_COMPRESSBLOCK_OPT_GENERIC(ZSTD_CCtx* ctx, seqStorePtr->factor = ((usedSymbols <= 18) && (mostFrequent < (1<<14))) ? mostFrequent>>10 : 0; // helps RTF files seqStorePtr->factor2 = (usedSymbols==256) && (mostFrequent > (1<<14)); +#endif #if 0 if (seqStorePtr->factor2) From 59493e8669832b56624c1e1f844d5915a64bc147 Mon Sep 17 00:00:00 2001 From: inikep Date: Thu, 25 Feb 2016 19:15:08 +0100 Subject: [PATCH 025/247] fixed paramgrill --- lib/zstd_opt_internal.h | 2 +- programs/paramgrill.c | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/lib/zstd_opt_internal.h b/lib/zstd_opt_internal.h index a40cad06..0186ebc2 100644 --- a/lib/zstd_opt_internal.h +++ b/lib/zstd_opt_internal.h @@ -81,7 +81,7 @@ MEM_STATIC void ZSTD_rescaleFreqs(seqStore_t* ssPtr) ssPtr->litLengthSum = ZSTD_FREQ_START*(1<litSum = ZSTD_FREQ_START*(1<offCodeSum = ZSTD_FREQ_START*(1<matchSum = ZSTD_FREQ_START*ssPtr->litSum; + ssPtr->matchSum = ssPtr->litSum; for (u=0; u<=MaxLit; u++) ssPtr->litFreq[u] = ZSTD_FREQ_START; diff --git a/programs/paramgrill.c b/programs/paramgrill.c index 23a54d46..db8ff0e7 100644 --- a/programs/paramgrill.c +++ b/programs/paramgrill.c @@ -127,7 +127,7 @@ static U32 g_rand = 1; static U32 g_singleRun = 0; static U32 g_target = 0; static U32 g_noSeed = 0; -static ZSTD_parameters g_params = { 0, 0, 0, 0, 0, 0, 0, ZSTD_greedy }; +static ZSTD_parameters g_params = { 0, 0, 0, 0, 0, 0, 0, 0, ZSTD_greedy }; void BMK_SetNbIterations(int nbLoops) { @@ -406,7 +406,6 @@ const char* g_stratName[] = { "ZSTD_fast ", "ZSTD_lazy ", "ZSTD_lazy2 ", "ZSTD_btlazy2", - "ZSTD_opt ", "ZSTD_btopt " }; static void BMK_printWinner(FILE* f, U32 cLevel, BMK_result_t result, ZSTD_parameters params, size_t srcSize) @@ -549,7 +548,7 @@ static ZSTD_parameters* sanitizeParams(ZSTD_parameters params) g_params = params; if (params.strategy == ZSTD_fast) g_params.contentLog = 0, g_params.searchLog = 0; - if ((params.strategy != ZSTD_opt) && (params.strategy != ZSTD_btopt )) + if (params.strategy != ZSTD_btopt ) g_params.targetLength = 0; return &g_params; } From f414647c6b6639cb92a85faca16341e9f0a1eba3 Mon Sep 17 00:00:00 2001 From: inikep Date: Thu, 25 Feb 2016 22:31:07 +0100 Subject: [PATCH 026/247] fixed ZSTD_copyCCtx --- lib/zstd_compress.c | 5 +++-- lib/zstd_decompress.c | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index 8a5f6f69..5edd3434 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -228,8 +228,8 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc, size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx) { const U32 contentLog = (srcCCtx->params.strategy == ZSTD_fast) ? 1 : srcCCtx->params.contentLog; - const size_t tableSpace = ((1 << contentLog) + (1 << srcCCtx->params.hashLog)) * sizeof(U32); - + const size_t tableSpace = ((1 << contentLog) + (1 << srcCCtx->params.hashLog) + (1 << srcCCtx->params.hashLog3)) * sizeof(U32); + if (srcCCtx->stage!=0) return ERROR(stage_wrong); ZSTD_resetCCtx_advanced(dstCCtx, srcCCtx->params); @@ -243,6 +243,7 @@ size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx) /* copy dictionary pointers */ dstCCtx->nextToUpdate= srcCCtx->nextToUpdate; + dstCCtx->nextToUpdate3 = srcCCtx->nextToUpdate3; dstCCtx->nextSrc = srcCCtx->nextSrc; dstCCtx->base = srcCCtx->base; dstCCtx->dictBase = srcCCtx->dictBase; diff --git a/lib/zstd_decompress.c b/lib/zstd_decompress.c index 8cedb3ea..a5746986 100644 --- a/lib/zstd_decompress.c +++ b/lib/zstd_decompress.c @@ -159,6 +159,7 @@ size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx) dctx->dictEnd = NULL; dctx->hufTableX4[0] = HufLog; dctx->flagStaticTables = 0; + dctx->params.searchLength = MINMATCH; return 0; } From f647d99d61a884d70f18770072e8c9270818bd79 Mon Sep 17 00:00:00 2001 From: inikep Date: Mon, 29 Feb 2016 12:33:08 +0100 Subject: [PATCH 027/247] fixed bug with 3 or more succesive rep codes --- lib/zstd_compress.c | 2 +- lib/zstd_opt.h | 74 ++++++++++++++++------------------------- lib/zstd_opt_internal.h | 2 -- 3 files changed, 30 insertions(+), 48 deletions(-) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index 5edd3434..362d714b 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -1958,7 +1958,7 @@ static size_t ZSTD_compress_generic (ZSTD_CCtx* zc, } -#if ZSTD_OPT_DEBUG >= 3 +#if ZSTD_OPT_DEBUG == 3 ssPtr->realMatchSum += ssPtr->realSeqSum * ((zc->params.searchLength == 3) ? 3 : 4); printf("avgMatchL=%.2f avgLitL=%.2f match=%.1f%% lit=%.1f%% reps=%d seq=%d priceFunc=%d\n", (float)ssPtr->realMatchSum/ssPtr->realSeqSum, (float)ssPtr->realLitSum/ssPtr->realSeqSum, 100.0*ssPtr->realMatchSum/(ssPtr->realMatchSum+ssPtr->realLitSum), 100.0*ssPtr->realLitSum/(ssPtr->realMatchSum+ssPtr->realLitSum), ssPtr->realRepSum, ssPtr->realSeqSum, ssPtr->priceFunc); priceFunc++; diff --git a/lib/zstd_opt.h b/lib/zstd_opt.h index 922a569c..5cb04cd8 100644 --- a/lib/zstd_opt.h +++ b/lib/zstd_opt.h @@ -373,17 +373,13 @@ void ZSTD_COMPRESSBLOCK_OPT_GENERIC(ZSTD_CCtx* ctx, litlen = opt[cur-1].litlen + 1; if (cur > litlen) { price = opt[cur - litlen].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr-litlen); - ZSTD_LOG_TRY_PRICE("%d: TRY1 opt[%d].price=%d price=%d cur=%d litlen=%d\n", (int)(inr-base), cur - litlen, opt[cur - litlen].price, price, cur, litlen); } else price = ZSTD_getLiteralPrice(seqStorePtr, litlen, litstart); } else { litlen = 1; price = opt[cur - 1].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr-1); - ZSTD_LOG_TRY_PRICE("%d: TRY3 price=%d cur=%d litlen=%d litonly=%d\n", (int)(inr-base), price, cur, litlen, (int)ZSTD_getLiteralPrice(seqStorePtr, litlen, inr-1)); } - ZSTD_LOG_TRY_PRICE("%d: TRY4 price=%d opt[%d].price=%d\n", (int)(inr-base), price, cur, opt[cur].price); - if (cur > last_pos || price <= opt[cur].price) // || ((price == opt[cur].price) && (opt[cur-1].mlen == 1) && (cur != litlen))) SET_PRICE(cur, 1, 0, litlen, price); @@ -392,33 +388,33 @@ void ZSTD_COMPRESSBLOCK_OPT_GENERIC(ZSTD_CCtx* ctx, if (inr > ilimit) /* last match must start at a minimum distance of 8 from oend */ continue; - mlen = opt[cur].mlen; + mlen = opt[cur].mlen; - if (opt[cur-mlen].off) { + if (opt[cur].off) { opt[cur].rep2 = opt[cur-mlen].rep; - opt[cur].rep = opt[cur-mlen].off; - ZSTD_LOG_PARSER("%d: COPYREP1 cur=%d mlen=%d rep=%d rep2=%d\n", (int)(inr-base), cur, mlen, opt[cur].rep, opt[cur].rep2); - } else { - if (cur!=mlen && opt[cur-mlen].litlen == 0) { + opt[cur].rep = opt[cur].off; + ZSTD_LOG_ENCODE("%d: COPYREP_OFF cur=%d mlen=%d rep=%d rep2=%d\n", (int)(inr-base), cur, mlen, opt[cur].rep, opt[cur].rep2); + } else { + if (cur!=mlen && opt[cur].litlen == 0) { opt[cur].rep2 = opt[cur-mlen].rep; opt[cur].rep = opt[cur-mlen].rep2; - ZSTD_LOG_PARSER("%d: COPYREP2 cur=%d mlen=%d rep=%d rep2=%d\n", (int)(inr-base), cur, mlen, opt[cur].rep, opt[cur].rep2); + ZSTD_LOG_ENCODE("%d: COPYREP_SWI cur=%d mlen=%d rep=%d rep2=%d\n", (int)(inr-base), cur, mlen, opt[cur].rep, opt[cur].rep2); } else { opt[cur].rep2 = opt[cur-mlen].rep2; opt[cur].rep = opt[cur-mlen].rep; - ZSTD_LOG_PARSER("%d: COPYREP3 cur=%d mlen=%d rep=%d rep2=%d\n", (int)(inr-base), cur, mlen, opt[cur].rep, opt[cur].rep2); - } } + ZSTD_LOG_ENCODE("%d: COPYREP_NOR cur=%d mlen=%d rep=%d rep2=%d\n", (int)(inr-base), cur, mlen, opt[cur].rep, opt[cur].rep2); + } } - ZSTD_LOG_PARSER("%d: CURRENT price[%d/%d]=%d off=%d mlen=%d litlen=%d rep=%d rep2=%d\n", (int)(inr-base), cur, last_pos, opt[cur].price, opt[cur].off, opt[cur].mlen, opt[cur].litlen, opt[cur].rep, opt[cur].rep2); + ZSTD_LOG_PARSER("%d: CURRENT_NoExt price[%d/%d]=%d off=%d mlen=%d litlen=%d rep=%d rep2=%d\n", (int)(inr-base), cur, last_pos, opt[cur].price, opt[cur].off, opt[cur].mlen, opt[cur].litlen, opt[cur].rep, opt[cur].rep2); best_mlen = 0; - if (!opt[cur].off && opt[cur].mlen != 1) { + if (opt[cur].mlen != 1) { cur_rep = opt[cur].rep2; - ZSTD_LOG_PARSER("%d: try REP2 rep2=%u mlen=%u\n", (int)(inr-base), (U32)cur_rep, mlen); + ZSTD_LOG_PARSER("%d: tryNoExt REP2 rep2=%u mlen=%u\n", (int)(inr-base), (U32)cur_rep, mlen); } else { cur_rep = opt[cur].rep; - ZSTD_LOG_PARSER("%d: try REP1 rep=%u mlen=%u\n", (int)(inr-base), (U32)cur_rep, mlen); + ZSTD_LOG_PARSER("%d: tryNoExt REP1 rep=%u mlen=%u\n", (int)(inr-base), (U32)cur_rep, mlen); } if (MEM_readMINMATCH(inr) == MEM_readMINMATCH(inr - cur_rep)) { // check rep @@ -437,13 +433,11 @@ void ZSTD_COMPRESSBLOCK_OPT_GENERIC(ZSTD_CCtx* ctx, litlen = opt[cur].litlen; if (cur > litlen) { price = opt[cur - litlen].price + ZSTD_GETPRICE(seqStorePtr, litlen, inr-litlen, 0, mlen); - ZSTD_LOG_TRY_PRICE("%d: TRY5 opt[%d].price=%d price=%d cur=%d litlen=%d\n", (int)(inr-base), cur - litlen, opt[cur - litlen].price, price, cur, litlen); } else price = ZSTD_GETPRICE(seqStorePtr, litlen, litstart, 0, mlen); } else { litlen = 0; price = opt[cur].price + ZSTD_GETPRICE(seqStorePtr, 0, NULL, 0, mlen); - ZSTD_LOG_TRY_PRICE("%d: TRY7 price=%d cur=%d litlen=0 getprice=%d\n", (int)(inr-base), price, cur, (int)ZSTD_GETPRICE(seqStorePtr, 0, NULL, 0, mlen)); } best_mlen = mlen; @@ -488,7 +482,6 @@ void ZSTD_COMPRESSBLOCK_OPT_GENERIC(ZSTD_CCtx* ctx, } ZSTD_LOG_PARSER("%d: Found2 mlen=%d best_mlen=%d off=%d price=%d litlen=%d\n", (int)(inr-base), mlen, best_mlen, matches[u].off, price, litlen); - ZSTD_LOG_TRY_PRICE("%d: TRY8 price=%d opt[%d].price=%d\n", (int)(inr-base), price, cur + mlen, opt[cur + mlen].price); if (cur + mlen > last_pos || (price < opt[cur + mlen].price)) SET_PRICE(cur + mlen, mlen, matches[u].off, litlen, price); @@ -512,7 +505,6 @@ _storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */ while (1) { mlen = opt[cur].mlen; - ZSTD_LOG_PARSER("%d: cur=%d mlen=%d\n", (int)(ip-base), cur, mlen); offset = opt[cur].off; opt[cur].mlen = best_mlen; opt[cur].off = best_off; @@ -557,7 +549,7 @@ _storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */ ml2 = (U32)ZSTD_count(ip, ip-rep_1, iend); if (offset == 0 || offset >= 8) if (ml2 < mlen && ml2 < MINMATCHOPT) { - printf("%d: ERROR iend=%d mlen=%d offset=%d ml2=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset, (int)ml2); exit(0); } + printf("%d: ERROR_NoExt iend=%d mlen=%d offset=%d ml2=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset, (int)ml2); exit(0); } if (ip < anchor) { printf("%d: ERROR ip < anchor iend=%d mlen=%d offset=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset); exit(0); } if (ip - offset < ctx->base) { @@ -718,17 +710,13 @@ void ZSTD_COMPRESSBLOCK_OPT_EXTDICT_GENERIC(ZSTD_CCtx* ctx, litlen = opt[cur-1].litlen + 1; if (cur > litlen) { price = opt[cur - litlen].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr-litlen); - ZSTD_LOG_TRY_PRICE("%d: TRY1 opt[%d].price=%d price=%d cur=%d litlen=%d\n", (int)(inr-base), cur - litlen, opt[cur - litlen].price, price, cur, litlen); } else price = ZSTD_getLiteralPrice(seqStorePtr, litlen, litstart); } else { litlen = 1; price = opt[cur - 1].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr-1); - ZSTD_LOG_TRY_PRICE("%d: TRY3 price=%d cur=%d litlen=%d litonly=%d\n", (int)(inr-base), price, cur, litlen, (int)ZSTD_getLiteralPrice(seqStorePtr, litlen, inr-1)); } - ZSTD_LOG_TRY_PRICE("%d: TRY4 price=%d opt[%d].price=%d\n", (int)(inr-base), price, cur, opt[cur].price); - if (cur > last_pos || price <= opt[cur].price) // || ((price == opt[cur].price) && (opt[cur-1].mlen == 1) && (cur != litlen))) SET_PRICE(cur, 1, 0, litlen, price); @@ -737,33 +725,33 @@ void ZSTD_COMPRESSBLOCK_OPT_EXTDICT_GENERIC(ZSTD_CCtx* ctx, if (inr > ilimit) // last match must start at a minimum distance of 8 from oend continue; - mlen = opt[cur].mlen; + mlen = opt[cur].mlen; - if (opt[cur-mlen].off) { + if (opt[cur].off) { opt[cur].rep2 = opt[cur-mlen].rep; - opt[cur].rep = opt[cur-mlen].off; - ZSTD_LOG_PARSER("%d: COPYREP1 cur=%d mlen=%d rep=%d rep2=%d\n", (int)(inr-base), cur, mlen, opt[cur].rep, opt[cur].rep2); - } else { - if (cur!=mlen && opt[cur-mlen].litlen == 0) { + opt[cur].rep = opt[cur].off; + ZSTD_LOG_ENCODE("%d: COPYREP_OFF cur=%d mlen=%d rep=%d rep2=%d\n", (int)(inr-base), cur, mlen, opt[cur].rep, opt[cur].rep2); + } else { + if (cur!=mlen && opt[cur].litlen == 0) { opt[cur].rep2 = opt[cur-mlen].rep; opt[cur].rep = opt[cur-mlen].rep2; - ZSTD_LOG_PARSER("%d: COPYREP2 cur=%d mlen=%d rep=%d rep2=%d\n", (int)(inr-base), cur, mlen, opt[cur].rep, opt[cur].rep2); + ZSTD_LOG_ENCODE("%d: COPYREP_SWI cur=%d mlen=%d rep=%d rep2=%d\n", (int)(inr-base), cur, mlen, opt[cur].rep, opt[cur].rep2); } else { opt[cur].rep2 = opt[cur-mlen].rep2; opt[cur].rep = opt[cur-mlen].rep; - ZSTD_LOG_PARSER("%d: COPYREP3 cur=%d mlen=%d rep=%d rep2=%d\n", (int)(inr-base), cur, mlen, opt[cur].rep, opt[cur].rep2); - } } + ZSTD_LOG_ENCODE("%d: COPYREP_NOR cur=%d mlen=%d rep=%d rep2=%d\n", (int)(inr-base), cur, mlen, opt[cur].rep, opt[cur].rep2); + } } - ZSTD_LOG_PARSER("%d: CURRENT price[%d/%d]=%d off=%d mlen=%d litlen=%d rep=%d rep2=%d\n", (int)(inr-base), cur, last_pos, opt[cur].price, opt[cur].off, opt[cur].mlen, opt[cur].litlen, opt[cur].rep, opt[cur].rep2); + ZSTD_LOG_PARSER("%d: CURRENT_Ext price[%d/%d]=%d off=%d mlen=%d litlen=%d rep=%d rep2=%d\n", (int)(inr-base), cur, last_pos, opt[cur].price, opt[cur].off, opt[cur].mlen, opt[cur].litlen, opt[cur].rep, opt[cur].rep2); best_mlen = 0; - if (!opt[cur].off && opt[cur].mlen != 1) { + if (opt[cur].mlen != 1) { cur_rep = opt[cur].rep2; - ZSTD_LOG_PARSER("%d: try REP2 rep2=%u mlen=%u\n", (int)(inr-base), (U32)cur_rep, mlen); + ZSTD_LOG_PARSER("%d: tryExt REP2 rep2=%u mlen=%u\n", (int)(inr-base), (U32)cur_rep, mlen); } else { cur_rep = opt[cur].rep; - ZSTD_LOG_PARSER("%d: try REP1 rep=%u mlen=%u\n", (int)(inr-base), (U32)cur_rep, mlen); + ZSTD_LOG_PARSER("%d: tryExt REP1 rep=%u mlen=%u\n", (int)(inr-base), (U32)cur_rep, mlen); } const U32 repIndex = (U32)(current+cur - cur_rep); @@ -788,13 +776,11 @@ void ZSTD_COMPRESSBLOCK_OPT_EXTDICT_GENERIC(ZSTD_CCtx* ctx, litlen = opt[cur].litlen; if (cur > litlen) { price = opt[cur - litlen].price + ZSTD_GETPRICE(seqStorePtr, litlen, inr-litlen, 0, mlen); - ZSTD_LOG_TRY_PRICE("%d: TRY5 opt[%d].price=%d price=%d cur=%d litlen=%d\n", (int)(inr-base), cur - litlen, opt[cur - litlen].price, price, cur, litlen); } else price = ZSTD_GETPRICE(seqStorePtr, litlen, litstart, 0, mlen); } else { litlen = 0; price = opt[cur].price + ZSTD_GETPRICE(seqStorePtr, 0, NULL, 0, mlen); - ZSTD_LOG_TRY_PRICE("%d: TRY7 price=%d cur=%d litlen=0 getprice=%d\n", (int)(inr-base), price, cur, (int)ZSTD_GETPRICE(seqStorePtr, 0, NULL, 0, mlen)); } best_mlen = mlen; @@ -840,7 +826,6 @@ void ZSTD_COMPRESSBLOCK_OPT_EXTDICT_GENERIC(ZSTD_CCtx* ctx, } ZSTD_LOG_PARSER("%d: Found2 mlen=%d best_mlen=%d off=%d price=%d litlen=%d\n", (int)(inr-base), mlen, best_mlen, matches[u].off, price, litlen); - ZSTD_LOG_TRY_PRICE("%d: TRY8 price=%d opt[%d].price=%d\n", (int)(inr-base), price, cur + mlen, opt[cur + mlen].price); if (cur + mlen > last_pos || (price < opt[cur + mlen].price)) SET_PRICE(cur + mlen, mlen, matches[u].off, litlen, price); @@ -863,7 +848,6 @@ _storeSequence: // cur, last_pos, best_mlen, best_off have to be set while (1) { mlen = opt[cur].mlen; - ZSTD_LOG_PARSER("%d: cur=%d mlen=%d\n", (int)(ip-base), cur, mlen); offset = opt[cur].off; opt[cur].mlen = best_mlen; opt[cur].off = best_off; @@ -907,8 +891,8 @@ _storeSequence: // cur, last_pos, best_mlen, best_off have to be set ml2 = (U32)ZSTD_count(ip, ip-offset, iend); else ml2 = (U32)ZSTD_count(ip, ip-rep_1, iend); - if (ml2 < mlen && ml2 < MINMATCHOPT) { - printf("%d: ERROR iend=%d mlen=%d offset=%d ml2=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset, (int)ml2); exit(0); } + // if (ml2 < mlen && ml2 < MINMATCHOPT) { + // printf("%d: ERROR_Ext iend=%d mlen=%d offset=%d ml2=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset, (int)ml2); exit(0); } if (ip < anchor) { printf("%d: ERROR ip < anchor iend=%d mlen=%d offset=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset); exit(0); } if (ip - offset < ctx->base) { diff --git a/lib/zstd_opt_internal.h b/lib/zstd_opt_internal.h index 0186ebc2..81022dcb 100644 --- a/lib/zstd_opt_internal.h +++ b/lib/zstd_opt_internal.h @@ -49,11 +49,9 @@ #if defined(ZSTD_OPT_DEBUG) && ZSTD_OPT_DEBUG>=9 #define ZSTD_LOG_PARSER(...) printf(__VA_ARGS__) #define ZSTD_LOG_ENCODE(...) printf(__VA_ARGS__) - #define ZSTD_LOG_TRY_PRICE(...) printf(__VA_ARGS__) #else #define ZSTD_LOG_PARSER(...) #define ZSTD_LOG_ENCODE(...) // printf(__VA_ARGS__) - #define ZSTD_LOG_TRY_PRICE(...) #endif From 2d55563b92365b3fa5fc337b1c7db7149ee17a28 Mon Sep 17 00:00:00 2001 From: inikep Date: Mon, 29 Feb 2016 22:07:40 +0100 Subject: [PATCH 028/247] better logs --- lib/zstd_compress.c | 1 + lib/zstd_internal.h | 2 +- lib/zstd_opt.h | 75 +++++++++++++++++++++++------------------ lib/zstd_opt_internal.h | 4 ++- 4 files changed, 47 insertions(+), 35 deletions(-) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index 362d714b..04f5be09 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -2040,6 +2040,7 @@ size_t ZSTD_compressContinue (ZSTD_CCtx* zc, size_t ZSTD_compressBlock(ZSTD_CCtx* zc, void* dst, size_t maxDstSize, const void* src, size_t srcSize) { if (srcSize > BLOCKSIZE) return ERROR(srcSize_wrong); + zc->params.searchLength = MINMATCH; return ZSTD_compressContinue_internal(zc, dst, maxDstSize, src, srcSize, 0); } diff --git a/lib/zstd_internal.h b/lib/zstd_internal.h index d5266c44..54f9aaf6 100644 --- a/lib/zstd_internal.h +++ b/lib/zstd_internal.h @@ -50,7 +50,7 @@ /*-************************************* * Common constants ***************************************/ -#define ZSTD_OPT_DEBUG 0 // 1 = tableID=0; 3 = print block stats; 5 = check encoded sequences; 9 = full logs +#define ZSTD_OPT_DEBUG 0 // 1 = tableID=0; 3 = price func tests; 5 = check encoded sequences; 9 = full logs #if ZSTD_OPT_DEBUG > 0 #include /* for debug */ #endif diff --git a/lib/zstd_opt.h b/lib/zstd_opt.h index 5cb04cd8..739ef7a6 100644 --- a/lib/zstd_opt.h +++ b/lib/zstd_opt.h @@ -136,14 +136,24 @@ static U32 ZSTD_INSERTBTANDGETALLMATCHES ( match = base + matchIndex; if (match[matchLength] == ip[matchLength]) { #if ZSTD_OPT_DEBUG >= 5 - if (memcmp(match, ip, matchLength) != 0) - printf("%d: ERROR: matchLength=%d ZSTD_count=%d\n", current, (int)matchLength, (int)ZSTD_count(ip, match, ip+matchLength)); + size_t ml; + if (matchIndex < dictLimit) + ml = ZSTD_count_2segments(ip, dictBase + matchIndex, iLimit, dictEnd, prefixStart); + else + ml = ZSTD_count(ip, match, ip+matchLength); + if (ml < matchLength) + printf("%d: ERROR_NOEXT: offset=%d matchLength=%d matchIndex=%d dictLimit=%d ml=%d\n", current, (int)(current - matchIndex), (int)matchLength, (int)matchIndex, (int)dictLimit, (int)ml), exit(0); #endif matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iLimit) +1; } } else { match = dictBase + matchIndex; +#if ZSTD_OPT_DEBUG >= 5 + if (memcmp(match, ip, matchLength) != 0) + printf("%d: ERROR_EXT: matchLength=%d ZSTD_count=%d\n", current, (int)matchLength, (int)ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart)), exit(0); +#endif matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart); + ZSTD_LOG_PARSER("%d: ZSTD_INSERTBTANDGETALLMATCHES=%d offset=%d dictBase=%p dictEnd=%p prefixStart=%p ip=%p match=%p\n", (int)current, (int)matchLength, (int)(current - matchIndex), dictBase, dictEnd, prefixStart, ip, match); if (matchIndex+matchLength >= dictLimit) match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ } @@ -252,8 +262,9 @@ void ZSTD_COMPRESSBLOCK_OPT_GENERIC(ZSTD_CCtx* ctx, const BYTE* litstart; const BYTE* const iend = istart + srcSize; const BYTE* const ilimit = iend - 8; - const BYTE* const base = ctx->base + ctx->dictLimit; - + const BYTE* const base = ctx->base; + const BYTE* const prefixStart = base + ctx->dictLimit; + U32 rep_2=REPCODE_STARTVALUE, rep_1=REPCODE_STARTVALUE; const U32 maxSearches = 1U << ctx->params.searchLog; const U32 mls = ctx->params.searchLength; @@ -268,7 +279,7 @@ void ZSTD_COMPRESSBLOCK_OPT_GENERIC(ZSTD_CCtx* ctx, ctx->nextToUpdate3 = ctx->nextToUpdate; ZSTD_resetSeqStore(seqStorePtr); ZSTD_rescaleFreqs(seqStorePtr); - if ((ip-base) < REPCODE_STARTVALUE) ip = base + REPCODE_STARTVALUE; + if ((ip-prefixStart) < REPCODE_STARTVALUE) ip = prefixStart + REPCODE_STARTVALUE; #if ZSTD_OPT_DEBUG >= 3 size_t mostFrequent; @@ -296,7 +307,7 @@ void ZSTD_COMPRESSBLOCK_OPT_GENERIC(ZSTD_CCtx* ctx, } #endif - + ZSTD_LOG_BLOCK("%d: COMPBLOCK_OPT_GENERIC srcSz=%d maxSrch=%d mls=%d sufLen=%d\n", (int)(ip-base), (int)srcSize, maxSearches, mls, sufficient_len); /* Match Loop */ while (ip < ilimit) { @@ -467,7 +478,7 @@ void ZSTD_COMPRESSBLOCK_OPT_GENERIC(ZSTD_CCtx* ctx, mlen = (u>0) ? matches[u-1].len+1 : best_mlen; best_mlen = (cur + matches[u].len < ZSTD_OPT_NUM) ? matches[u].len : ZSTD_OPT_NUM - cur; - ZSTD_LOG_PARSER("%d: Found1 cur=%d mlen=%d off=%d best_mlen=%d last_pos=%d\n", (int)(inr-base), cur, matches[u].len, matches[u].off, best_mlen, last_pos); + // ZSTD_LOG_PARSER("%d: Found1 cur=%d mlen=%d off=%d best_mlen=%d last_pos=%d\n", (int)(inr-base), cur, matches[u].len, matches[u].off, best_mlen, last_pos); while (mlen <= best_mlen) { if (opt[cur].mlen == 1) { @@ -481,7 +492,7 @@ void ZSTD_COMPRESSBLOCK_OPT_GENERIC(ZSTD_CCtx* ctx, price = opt[cur].price + ZSTD_GETPRICE(seqStorePtr, 0, NULL, matches[u].off, mlen); } - ZSTD_LOG_PARSER("%d: Found2 mlen=%d best_mlen=%d off=%d price=%d litlen=%d\n", (int)(inr-base), mlen, best_mlen, matches[u].off, price, litlen); + // ZSTD_LOG_PARSER("%d: Found2 mlen=%d best_mlen=%d off=%d price=%d litlen=%d\n", (int)(inr-base), mlen, best_mlen, matches[u].off, price, litlen); if (cur + mlen > last_pos || (price < opt[cur + mlen].price)) SET_PRICE(cur + mlen, mlen, matches[u].off, litlen, price); @@ -492,7 +503,6 @@ void ZSTD_COMPRESSBLOCK_OPT_GENERIC(ZSTD_CCtx* ctx, best_mlen = opt[last_pos].mlen; best_off = opt[last_pos].off; cur = last_pos - best_mlen; - // printf("%d: start=%d best_mlen=%d best_off=%d cur=%d\n", (int)(ip - base), (int)(start - ip), (int)best_mlen, (int)best_off, cur); /* store sequence */ _storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */ @@ -547,19 +557,13 @@ _storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */ ml2 = (U32)ZSTD_count(ip, ip-offset, iend); else ml2 = (U32)ZSTD_count(ip, ip-rep_1, iend); - if (offset == 0 || offset >= 8) - if (ml2 < mlen && ml2 < MINMATCHOPT) { + if (offset >= 8) + if (ml2 < mlen || ml2 < MINMATCHOPT) { printf("%d: ERROR_NoExt iend=%d mlen=%d offset=%d ml2=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset, (int)ml2); exit(0); } if (ip < anchor) { - printf("%d: ERROR ip < anchor iend=%d mlen=%d offset=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset); exit(0); } - if (ip - offset < ctx->base) { - printf("%d: ERROR ip - offset < base iend=%d mlen=%d offset=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset); exit(0); } - if ((int)offset >= (1 << ctx->params.windowLog)) { - printf("%d: offset >= (1 << params.windowLog) iend=%d mlen=%d offset=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset); exit(0); } - if (mlen < MINMATCHOPT) { - printf("%d: ERROR mlen < MINMATCHOPT iend=%d mlen=%d offset=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset); exit(0); } + printf("%d: ERROR_NoExt ip < anchor iend=%d mlen=%d offset=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset); exit(0); } if (ip + mlen > iend) { - printf("%d: ERROR ip + mlen >= iend iend=%d mlen=%d offset=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset); exit(0); } + printf("%d: ERROR_NoExt ip + mlen >= iend iend=%d mlen=%d offset=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset); exit(0); } #endif ZSTD_updatePrice(seqStorePtr, litLength, anchor, offset, mlen-MINMATCHOPT); @@ -627,6 +631,8 @@ void ZSTD_COMPRESSBLOCK_OPT_EXTDICT_GENERIC(ZSTD_CCtx* ctx, ZSTD_rescaleFreqs(seqStorePtr); if ((ip - prefixStart) < REPCODE_STARTVALUE) ip += REPCODE_STARTVALUE; + ZSTD_LOG_BLOCK("%d: COMPBLOCK_OPT_EXTDICT srcSz=%d maxSrch=%d mls=%d sufLen=%d\n", (int)(ip-base), (int)srcSize, maxSearches, mls, sufficient_len); + /* Match Loop */ while (ip < ilimit) { U32 u, offset, best_off=0; @@ -811,7 +817,7 @@ void ZSTD_COMPRESSBLOCK_OPT_EXTDICT_GENERIC(ZSTD_CCtx* ctx, mlen = (u>0) ? matches[u-1].len+1 : best_mlen; best_mlen = (cur + matches[u].len < ZSTD_OPT_NUM) ? matches[u].len : ZSTD_OPT_NUM - cur; - ZSTD_LOG_PARSER("%d: Found1 cur=%d mlen=%d off=%d best_mlen=%d last_pos=%d\n", (int)(inr-base), cur, matches[u].len, matches[u].off, best_mlen, last_pos); + // ZSTD_LOG_PARSER("%d: Found1 cur=%d mlen=%d off=%d best_mlen=%d last_pos=%d\n", (int)(inr-base), cur, matches[u].len, matches[u].off, best_mlen, last_pos); while (mlen <= best_mlen) { if (opt[cur].mlen == 1) { @@ -825,7 +831,7 @@ void ZSTD_COMPRESSBLOCK_OPT_EXTDICT_GENERIC(ZSTD_CCtx* ctx, price = opt[cur].price + ZSTD_GETPRICE(seqStorePtr, 0, NULL, matches[u].off, mlen); } - ZSTD_LOG_PARSER("%d: Found2 mlen=%d best_mlen=%d off=%d price=%d litlen=%d\n", (int)(inr-base), mlen, best_mlen, matches[u].off, price, litlen); + // ZSTD_LOG_PARSER("%d: Found2 mlen=%d best_mlen=%d off=%d price=%d litlen=%d\n", (int)(inr-base), mlen, best_mlen, matches[u].off, price, litlen); if (cur + mlen > last_pos || (price < opt[cur + mlen].price)) SET_PRICE(cur + mlen, mlen, matches[u].off, litlen, price); @@ -836,7 +842,6 @@ void ZSTD_COMPRESSBLOCK_OPT_EXTDICT_GENERIC(ZSTD_CCtx* ctx, best_mlen = opt[last_pos].mlen; best_off = opt[last_pos].off; cur = last_pos - best_mlen; - // printf("%d: start=%d best_mlen=%d best_off=%d cur=%d\n", (int)(ip - base), (int)(start - ip), (int)best_mlen, (int)best_off, cur); /* store sequence */ _storeSequence: // cur, last_pos, best_mlen, best_off have to be set @@ -888,21 +893,25 @@ _storeSequence: // cur, last_pos, best_mlen, best_off have to be set #if ZSTD_OPT_DEBUG >= 5 U32 ml2; if (offset) - ml2 = (U32)ZSTD_count(ip, ip-offset, iend); + { + if (offset > (size_t)(ip - prefixStart)) + { + const BYTE* match = dictEnd - (offset - (ip - prefixStart)); + ml2 = ZSTD_count_2segments(ip, match, iend, dictEnd, prefixStart); + ZSTD_LOG_PARSER("%d: ZSTD_count_2segments=%d offset=%d dictBase=%p dictEnd=%p prefixStart=%p ip=%p match=%p\n", (int)current, (int)ml2, (int)offset, dictBase, dictEnd, prefixStart, ip, match); + } + else + ml2 = (U32)ZSTD_count(ip, ip-offset, iend); + } else ml2 = (U32)ZSTD_count(ip, ip-rep_1, iend); - // if (ml2 < mlen && ml2 < MINMATCHOPT) { - // printf("%d: ERROR_Ext iend=%d mlen=%d offset=%d ml2=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset, (int)ml2); exit(0); } + if (offset >= 8) + if (ml2 < mlen || ml2 < MINMATCHOPT) { + printf("%d: ERROR_Ext iend=%d mlen=%d offset=%d ml2=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset, (int)ml2); exit(0); } if (ip < anchor) { - printf("%d: ERROR ip < anchor iend=%d mlen=%d offset=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset); exit(0); } - if (ip - offset < ctx->base) { - printf("%d: ERROR ip - offset < base iend=%d mlen=%d offset=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset); exit(0); } - if ((int)offset >= (1 << ctx->params.windowLog)) { - printf("%d: offset >= (1 << params.windowLog) iend=%d mlen=%d offset=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset); exit(0); } - if (mlen < MINMATCHOPT) { - printf("%d: ERROR mlen < MINMATCHOPT iend=%d mlen=%d offset=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset); exit(0); } + printf("%d: ERROR_Ext ip < anchor iend=%d mlen=%d offset=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset); exit(0); } if (ip + mlen > iend) { - printf("%d: ERROR ip + mlen >= iend iend=%d mlen=%d offset=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset); exit(0); } + printf("%d: ERROR_Ext ip + mlen >= iend iend=%d mlen=%d offset=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset); exit(0); } #endif ZSTD_updatePrice(seqStorePtr, litLength, anchor, offset, mlen-MINMATCHOPT); diff --git a/lib/zstd_opt_internal.h b/lib/zstd_opt_internal.h index 81022dcb..d643c692 100644 --- a/lib/zstd_opt_internal.h +++ b/lib/zstd_opt_internal.h @@ -49,9 +49,11 @@ #if defined(ZSTD_OPT_DEBUG) && ZSTD_OPT_DEBUG>=9 #define ZSTD_LOG_PARSER(...) printf(__VA_ARGS__) #define ZSTD_LOG_ENCODE(...) printf(__VA_ARGS__) + #define ZSTD_LOG_BLOCK(...) printf(__VA_ARGS__) #else #define ZSTD_LOG_PARSER(...) - #define ZSTD_LOG_ENCODE(...) // printf(__VA_ARGS__) + #define ZSTD_LOG_ENCODE(...) + #define ZSTD_LOG_BLOCK(...) #endif From a4dde2549817b113fc7f594a9a3563333cacb3c4 Mon Sep 17 00:00:00 2001 From: inikep Date: Tue, 1 Mar 2016 14:14:35 +0100 Subject: [PATCH 029/247] fixed repcode before lowLimit --- lib/zstd_compress.c | 7 ++++++- lib/zstd_decompress.c | 8 ++++++-- lib/zstd_internal.h | 13 +++++++++++-- lib/zstd_opt.h | 28 +++++++++++----------------- lib/zstd_opt_internal.h | 11 ----------- 5 files changed, 34 insertions(+), 33 deletions(-) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index 04f5be09..c9847daf 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -2040,7 +2040,8 @@ size_t ZSTD_compressContinue (ZSTD_CCtx* zc, size_t ZSTD_compressBlock(ZSTD_CCtx* zc, void* dst, size_t maxDstSize, const void* src, size_t srcSize) { if (srcSize > BLOCKSIZE) return ERROR(srcSize_wrong); - zc->params.searchLength = MINMATCH; + zc->params.searchLength = MINMATCH; /* force ZSTD_btopt to MINMATCH in block mode */ + ZSTD_LOG_BLOCK("%p: ZSTD_compressBlock searchLength=%d\n", zc->base, zc->params.searchLength); return ZSTD_compressContinue_internal(zc, dst, maxDstSize, src, srcSize, 0); } @@ -2174,11 +2175,13 @@ size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* zc, size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* zc, const void* dict, size_t dictSize, int compressionLevel) { + ZSTD_LOG_BLOCK("%p: ZSTD_compressBegin_usingDict compressionLevel=%d\n", zc->base, compressionLevel); return ZSTD_compressBegin_advanced(zc, dict, dictSize, ZSTD_getParams(compressionLevel, MAX(128 KB, dictSize))); } size_t ZSTD_compressBegin(ZSTD_CCtx* zc, int compressionLevel) { + ZSTD_LOG_BLOCK("%p: ZSTD_compressBegin compressionLevel=%d\n", zc->base, compressionLevel); return ZSTD_compressBegin_advanced(zc, NULL, 0, ZSTD_getParams(compressionLevel, 0)); } @@ -2258,11 +2261,13 @@ size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx, size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize, const void* dict, size_t dictSize, int compressionLevel) { + ZSTD_LOG_BLOCK("%p: ZSTD_compress_usingDict srcSize=%d dictSize=%d compressionLevel=%d\n", ctx->base, (int)srcSize, (int)dictSize, compressionLevel); return ZSTD_compress_advanced(ctx, dst, maxDstSize, src, srcSize, dict, dictSize, ZSTD_getParams(compressionLevel, srcSize)); } size_t ZSTD_compressCCtx (ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize, int compressionLevel) { + ZSTD_LOG_BLOCK("%p: ZSTD_compressCCtx srcSize=%d compressionLevel=%d\n", ctx->base, (int)srcSize, compressionLevel); return ZSTD_compress_advanced(ctx, dst, maxDstSize, src, srcSize, NULL, 0, ZSTD_getParams(compressionLevel, srcSize)); } diff --git a/lib/zstd_decompress.c b/lib/zstd_decompress.c index a5746986..e9d3bdb6 100644 --- a/lib/zstd_decompress.c +++ b/lib/zstd_decompress.c @@ -159,7 +159,8 @@ size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx) dctx->dictEnd = NULL; dctx->hufTableX4[0] = HufLog; dctx->flagStaticTables = 0; - dctx->params.searchLength = MINMATCH; + dctx->params.searchLength = MINMATCH; /* overwritten by frame but forces ZSTD_btopt to MINMATCH in block mode */ + ZSTD_LOG_BLOCK("%p: ZSTD_decompressBegin searchLength=%d\n", dctx->base, dctx->params.searchLength); return 0; } @@ -749,7 +750,7 @@ FORCE_INLINE size_t ZSTD_execSequence(BYTE* op, } op += 8; match += 8; - if (oMatchEnd > oend-12) { + if (oMatchEnd > oend-(16-3)) { // 3 = MINMATCH if (op < oend_8) { ZSTD_wildcopy(op, match, oend_8 - op); match += oend_8 - op; @@ -857,6 +858,8 @@ static size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, if (srcSize >= BLOCKSIZE) return ERROR(srcSize_wrong); + ZSTD_LOG_BLOCK("%p: ZSTD_decompressBlock_internal searchLength=%d\n", dctx->base, dctx->params.searchLength); + /* Decode literals sub-block */ litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize); if (ZSTD_isError(litCSize)) return litCSize; @@ -966,6 +969,7 @@ size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) { ZSTD_decompressBegin_usingDict(dctx, dict, dictSize); + ZSTD_LOG_BLOCK("%p: ZSTD_decompressBegin_usingDict searchLength=%d\n", dctx->base, dctx->params.searchLength); ZSTD_checkContinuity(dctx, dst); return ZSTD_decompress_continueDCtx(dctx, dst, maxDstSize, src, srcSize); } diff --git a/lib/zstd_internal.h b/lib/zstd_internal.h index 54f9aaf6..68b66621 100644 --- a/lib/zstd_internal.h +++ b/lib/zstd_internal.h @@ -51,8 +51,17 @@ * Common constants ***************************************/ #define ZSTD_OPT_DEBUG 0 // 1 = tableID=0; 3 = price func tests; 5 = check encoded sequences; 9 = full logs -#if ZSTD_OPT_DEBUG > 0 - #include /* for debug */ +#if defined(ZSTD_OPT_DEBUG) && ZSTD_OPT_DEBUG>0 + #include +#endif +#if defined(ZSTD_OPT_DEBUG) && ZSTD_OPT_DEBUG>=9 + #define ZSTD_LOG_PARSER(...) printf(__VA_ARGS__) + #define ZSTD_LOG_ENCODE(...) printf(__VA_ARGS__) + #define ZSTD_LOG_BLOCK(...) printf(__VA_ARGS__) +#else + #define ZSTD_LOG_PARSER(...) + #define ZSTD_LOG_ENCODE(...) + #define ZSTD_LOG_BLOCK(...) #endif #define ZSTD_DICT_MAGIC 0xEC30A435 diff --git a/lib/zstd_opt.h b/lib/zstd_opt.h index 739ef7a6..a95f97b8 100644 --- a/lib/zstd_opt.h +++ b/lib/zstd_opt.h @@ -47,8 +47,7 @@ FORCE_INLINE U32 ZSTD_GETPRICE(seqStore_t* seqStorePtr, U32 litLength, const BYT price += ZSTD_getLiteralPrice(seqStorePtr, litLength, literals) + ZSTD_highbit(seqStorePtr->matchLengthSum+1) - ZSTD_highbit(seqStorePtr->matchLengthFreq[matchLength]+1); #if ZSTD_OPT_DEBUG >= 3 - switch (seqStorePtr->priceFunc) - { + switch (seqStorePtr->priceFunc) { default: case 0: return 1 + price + seqStorePtr->factor + ((seqStorePtr->litSum>>5) / seqStorePtr->litLengthSum) + ((seqStorePtr->litSum<<1) / (seqStorePtr->litSum + seqStorePtr->matchSum)); @@ -557,8 +556,7 @@ _storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */ ml2 = (U32)ZSTD_count(ip, ip-offset, iend); else ml2 = (U32)ZSTD_count(ip, ip-rep_1, iend); - if (offset >= 8) - if (ml2 < mlen || ml2 < MINMATCHOPT) { + if ((offset >= 8) && (ml2 < mlen || ml2 < MINMATCHOPT)) { printf("%d: ERROR_NoExt iend=%d mlen=%d offset=%d ml2=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset, (int)ml2); exit(0); } if (ip < anchor) { printf("%d: ERROR_NoExt ip < anchor iend=%d mlen=%d offset=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset); exit(0); } @@ -572,7 +570,7 @@ _storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */ } /* for (cur=0; cur < last_pos; ) */ /* check immediate repcode */ - while ( (anchor <= ilimit) + while ((anchor >= prefixStart + rep_2) && (anchor <= ilimit) && (MEM_readMINMATCH(anchor) == MEM_readMINMATCH(anchor - rep_2)) ) { /* store sequence */ best_mlen = (U32)ZSTD_count(anchor+MINMATCHOPT, anchor+MINMATCHOPT-rep_2, iend); @@ -614,7 +612,8 @@ void ZSTD_COMPRESSBLOCK_OPT_EXTDICT_GENERIC(ZSTD_CCtx* ctx, const BYTE* const prefixStart = base + dictLimit; const BYTE* const dictBase = ctx->dictBase; const BYTE* const dictEnd = dictBase + dictLimit; - + const U32 lowLimit = ctx->lowLimit; + U32 rep_2=REPCODE_STARTVALUE, rep_1=REPCODE_STARTVALUE; const U32 maxSearches = 1U << ctx->params.searchLog; const U32 mls = ctx->params.searchLength; @@ -892,21 +891,16 @@ _storeSequence: // cur, last_pos, best_mlen, best_off have to be set #if ZSTD_OPT_DEBUG >= 5 U32 ml2; - if (offset) - { - if (offset > (size_t)(ip - prefixStart)) - { + if (offset) { + if (offset > (size_t)(ip - prefixStart)) { const BYTE* match = dictEnd - (offset - (ip - prefixStart)); ml2 = ZSTD_count_2segments(ip, match, iend, dictEnd, prefixStart); ZSTD_LOG_PARSER("%d: ZSTD_count_2segments=%d offset=%d dictBase=%p dictEnd=%p prefixStart=%p ip=%p match=%p\n", (int)current, (int)ml2, (int)offset, dictBase, dictEnd, prefixStart, ip, match); } - else - ml2 = (U32)ZSTD_count(ip, ip-offset, iend); + else ml2 = (U32)ZSTD_count(ip, ip-offset, iend); } - else - ml2 = (U32)ZSTD_count(ip, ip-rep_1, iend); - if (offset >= 8) - if (ml2 < mlen || ml2 < MINMATCHOPT) { + else ml2 = (U32)ZSTD_count(ip, ip-rep_1, iend); + if ((offset >= 8) && (ml2 < mlen || ml2 < MINMATCHOPT)) { printf("%d: ERROR_Ext iend=%d mlen=%d offset=%d ml2=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset, (int)ml2); exit(0); } if (ip < anchor) { printf("%d: ERROR_Ext ip < anchor iend=%d mlen=%d offset=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset); exit(0); } @@ -920,7 +914,7 @@ _storeSequence: // cur, last_pos, best_mlen, best_off have to be set } /* check immediate repcode */ - while (anchor <= ilimit) { + while ((anchor >= base + lowLimit + rep_2) && (anchor <= ilimit)) { const U32 repIndex = (U32)((anchor-base) - rep_2); const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; const BYTE* const repMatch = repBase + repIndex; diff --git a/lib/zstd_opt_internal.h b/lib/zstd_opt_internal.h index d643c692..576f4115 100644 --- a/lib/zstd_opt_internal.h +++ b/lib/zstd_opt_internal.h @@ -45,17 +45,6 @@ #define ZSTD_FREQ_STEP 1 #define ZSTD_FREQ_DIV 5 -/*- Debug -*/ -#if defined(ZSTD_OPT_DEBUG) && ZSTD_OPT_DEBUG>=9 - #define ZSTD_LOG_PARSER(...) printf(__VA_ARGS__) - #define ZSTD_LOG_ENCODE(...) printf(__VA_ARGS__) - #define ZSTD_LOG_BLOCK(...) printf(__VA_ARGS__) -#else - #define ZSTD_LOG_PARSER(...) - #define ZSTD_LOG_ENCODE(...) - #define ZSTD_LOG_BLOCK(...) -#endif - typedef struct { U32 off; From 338533f741f79439662e05a1a3b7d77cd8eaa050 Mon Sep 17 00:00:00 2001 From: inikep Date: Wed, 2 Mar 2016 14:36:41 +0100 Subject: [PATCH 030/247] improved price function initialization --- lib/zstd_opt_internal.h | 53 ++++++++++++++++++++--------------------- 1 file changed, 26 insertions(+), 27 deletions(-) diff --git a/lib/zstd_opt_internal.h b/lib/zstd_opt_internal.h index 576f4115..f0a0e69e 100644 --- a/lib/zstd_opt_internal.h +++ b/lib/zstd_opt_internal.h @@ -41,8 +41,6 @@ *********************************************/ /*- Constants -*/ #define ZSTD_OPT_NUM (1<<12) -#define ZSTD_FREQ_START 1 -#define ZSTD_FREQ_STEP 1 #define ZSTD_FREQ_DIV 5 @@ -66,74 +64,75 @@ MEM_STATIC void ZSTD_rescaleFreqs(seqStore_t* ssPtr) unsigned u; if (ssPtr->litLengthSum == 0) { - ssPtr->matchLengthSum = ZSTD_FREQ_START*(1<litLengthSum = ZSTD_FREQ_START*(1<litSum = ZSTD_FREQ_START*(1<offCodeSum = ZSTD_FREQ_START*(1<matchSum = ssPtr->litSum; - + ssPtr->litSum = 2*(1<litLengthSum = 1*(1<matchLengthSum = 1*(1<offCodeSum = 1*(1<matchSum = 2*(1<litFreq[u] = ZSTD_FREQ_START; + ssPtr->litFreq[u] = 2; for (u=0; u<=MaxLL; u++) - ssPtr->litLengthFreq[u] = ZSTD_FREQ_START; + ssPtr->litLengthFreq[u] = 1; for (u=0; u<=MaxML; u++) - ssPtr->matchLengthFreq[u] = ZSTD_FREQ_START; + ssPtr->matchLengthFreq[u] = 1; for (u=0; u<=MaxOff; u++) - ssPtr->offCodeFreq[u] = ZSTD_FREQ_START; + ssPtr->offCodeFreq[u] = 1; } else { ssPtr->matchLengthSum = 0; ssPtr->litLengthSum = 0; - ssPtr->litSum = 0; ssPtr->offCodeSum = 0; ssPtr->matchSum = 0; + ssPtr->litSum = 0; for (u=0; u<=MaxLit; u++) { - ssPtr->litFreq[u] = ZSTD_FREQ_START + (ssPtr->litFreq[u]>>ZSTD_FREQ_DIV); + ssPtr->litFreq[u] = 1 + (ssPtr->litFreq[u]>>ZSTD_FREQ_DIV); ssPtr->litSum += ssPtr->litFreq[u]; } for (u=0; u<=MaxLL; u++) { - ssPtr->litLengthFreq[u] = ZSTD_FREQ_START + (ssPtr->litLengthFreq[u]>>ZSTD_FREQ_DIV); + ssPtr->litLengthFreq[u] = 1 + (ssPtr->litLengthFreq[u]>>ZSTD_FREQ_DIV); ssPtr->litLengthSum += ssPtr->litLengthFreq[u]; } for (u=0; u<=MaxML; u++) { - ssPtr->matchLengthFreq[u] = ZSTD_FREQ_START + (ssPtr->matchLengthFreq[u]>>ZSTD_FREQ_DIV); + ssPtr->matchLengthFreq[u] = 1 + (ssPtr->matchLengthFreq[u]>>ZSTD_FREQ_DIV); ssPtr->matchLengthSum += ssPtr->matchLengthFreq[u]; ssPtr->matchSum += ssPtr->matchLengthFreq[u] * (u + 3); } for (u=0; u<=MaxOff; u++) { - ssPtr->offCodeFreq[u] = ZSTD_FREQ_START + (ssPtr->offCodeFreq[u]>>ZSTD_FREQ_DIV); + ssPtr->offCodeFreq[u] = 1 + (ssPtr->offCodeFreq[u]>>ZSTD_FREQ_DIV); ssPtr->offCodeSum += ssPtr->offCodeFreq[u]; } } } + MEM_STATIC void ZSTD_updatePrice(seqStore_t* seqStorePtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength) { U32 u; /* literals */ - seqStorePtr->litSum += litLength * ZSTD_FREQ_STEP; + seqStorePtr->litSum += litLength; for (u=0; u < litLength; u++) - seqStorePtr->litFreq[literals[u]] += ZSTD_FREQ_STEP; + seqStorePtr->litFreq[literals[u]]++; /* literal Length */ - seqStorePtr->litLengthSum += ZSTD_FREQ_STEP; + seqStorePtr->litLengthSum++; if (litLength >= MaxLL) - seqStorePtr->litLengthFreq[MaxLL] += ZSTD_FREQ_STEP; + seqStorePtr->litLengthFreq[MaxLL]++; else - seqStorePtr->litLengthFreq[litLength] += ZSTD_FREQ_STEP; + seqStorePtr->litLengthFreq[litLength]++; /* match offset */ - seqStorePtr->offCodeSum += ZSTD_FREQ_STEP; + seqStorePtr->offCodeSum++; BYTE offCode = offset ? (BYTE)ZSTD_highbit(offset+1) + 1 : 0; - seqStorePtr->offCodeFreq[offCode] += ZSTD_FREQ_STEP; + seqStorePtr->offCodeFreq[offCode]++; /* match Length */ - seqStorePtr->matchLengthSum += ZSTD_FREQ_STEP; + seqStorePtr->matchLengthSum++; if (matchLength >= MaxML) - seqStorePtr->matchLengthFreq[MaxML] += ZSTD_FREQ_STEP; + seqStorePtr->matchLengthFreq[MaxML]++; else - seqStorePtr->matchLengthFreq[matchLength] += ZSTD_FREQ_STEP; + seqStorePtr->matchLengthFreq[matchLength]++; } FORCE_INLINE U32 ZSTD_getLiteralPrice(seqStore_t* seqStorePtr, U32 litLength, const BYTE* literals) From 87d4f3daa41ad92cbf7345f9a056b822ad3c1dbd Mon Sep 17 00:00:00 2001 From: inikep Date: Wed, 2 Mar 2016 15:56:24 +0100 Subject: [PATCH 031/247] priceTable moved to heap --- lib/zstd_compress.c | 23 +++++++++------- lib/zstd_internal.h | 17 ++++++++++++ lib/zstd_opt.h | 59 ++++++++++++++++++++--------------------- lib/zstd_opt_internal.h | 16 ----------- 4 files changed, 59 insertions(+), 56 deletions(-) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index c9847daf..2eb1e6d2 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -174,7 +174,9 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc, /* reserve table memory */ const U32 contentLog = (params.strategy == ZSTD_fast) ? 1 : params.contentLog; const size_t tableSpace = ((1 << contentLog) + (1 << params.hashLog) + (1 << params.hashLog3)) * sizeof(U32); - const size_t neededSpace = tableSpace + (256*sizeof(U32)) + (3*blockSize) + ((1<workSpaceSize < neededSpace) { free(zc->workSpace); zc->workSpace = malloc(neededSpace); @@ -199,20 +201,21 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc, zc->params = params; zc->blockSize = blockSize; - zc->seqStore.litFreq = (U32*) (zc->seqStore.buffer); - zc->seqStore.litLengthFreq = zc->seqStore.litFreq + (1<seqStore.matchLengthFreq = zc->seqStore.litLengthFreq + (1<seqStore.offCodeFreq = zc->seqStore.matchLengthFreq + (1<seqStore.offsetStart = zc->seqStore.offCodeFreq + (1<seqStore.offsetStart = (U32*) (zc->seqStore.buffer); zc->seqStore.offCodeStart = (BYTE*) (zc->seqStore.offsetStart + (blockSize>>2)); zc->seqStore.litStart = zc->seqStore.offCodeStart + (blockSize>>2); zc->seqStore.litLengthStart = zc->seqStore.litStart + blockSize; zc->seqStore.matchLengthStart = zc->seqStore.litLengthStart + (blockSize>>2); - zc->seqStore.dumpsStart = zc->seqStore.matchLengthStart + (blockSize>>2); - // zc->seqStore.XXX = zc->seqStore.dumpsStart + (blockSize>>4); - zc->seqStore.litLengthSum = 0; + zc->seqStore.dumpsStart = zc->seqStore.matchLengthStart + (blockSize>>2); + zc->seqStore.litFreq = (U32*)(zc->seqStore.dumpsStart + (blockSize>>2)); + zc->seqStore.litLengthFreq = zc->seqStore.litFreq + (1<seqStore.matchLengthFreq = zc->seqStore.litLengthFreq + (1<seqStore.offCodeFreq = zc->seqStore.matchLengthFreq + (1<seqStore.matchTable = (ZSTD_match_t*)(zc->seqStore.offCodeFreq + (1<seqStore.priceTable = (ZSTD_optimal_t*)(zc->seqStore.matchTable + ZSTD_OPT_NUM+1); + + zc->seqStore.litLengthSum = 0; zc->hbSize = 0; zc->stage = 0; zc->loadedDictEnd = 0; diff --git a/lib/zstd_internal.h b/lib/zstd_internal.h index 68b66621..4948e239 100644 --- a/lib/zstd_internal.h +++ b/lib/zstd_internal.h @@ -64,6 +64,7 @@ #define ZSTD_LOG_BLOCK(...) #endif +#define ZSTD_OPT_NUM (1<<12) #define ZSTD_DICT_MAGIC 0xEC30A435 #define KB *(1 <<10) @@ -165,6 +166,20 @@ MEM_STATIC unsigned ZSTD_highbit(U32 val) /*-******************************************* * Private interfaces *********************************************/ +typedef struct { + U32 off; + U32 len; +} ZSTD_match_t; + +typedef struct { + U32 price; + U32 off; + U32 mlen; + U32 litlen; + U32 rep; + U32 rep2; +} ZSTD_optimal_t; + typedef struct { void* buffer; U32* offsetStart; @@ -180,6 +195,8 @@ typedef struct { BYTE* dumpsStart; BYTE* dumps; /* opt */ + ZSTD_optimal_t* priceTable; + ZSTD_match_t* matchTable; U32* matchLengthFreq; U32* litLengthFreq; U32* litFreq; diff --git a/lib/zstd_opt.h b/lib/zstd_opt.h index a95f97b8..f6148598 100644 --- a/lib/zstd_opt.h +++ b/lib/zstd_opt.h @@ -269,8 +269,8 @@ void ZSTD_COMPRESSBLOCK_OPT_GENERIC(ZSTD_CCtx* ctx, const U32 mls = ctx->params.searchLength; const U32 sufficient_len = ctx->params.targetLength; - ZSTD_optimal_t opt[ZSTD_OPT_NUM+1]; - ZSTD_match_t matches[ZSTD_OPT_NUM+1]; + ZSTD_optimal_t* opt = seqStorePtr->priceTable; + ZSTD_match_t* matches = seqStorePtr->matchTable; const BYTE* inr; U32 cur, match_num, last_pos, litlen, price; @@ -280,32 +280,6 @@ void ZSTD_COMPRESSBLOCK_OPT_GENERIC(ZSTD_CCtx* ctx, ZSTD_rescaleFreqs(seqStorePtr); if ((ip-prefixStart) < REPCODE_STARTVALUE) ip = prefixStart + REPCODE_STARTVALUE; -#if ZSTD_OPT_DEBUG >= 3 - size_t mostFrequent; - unsigned count[256], maxSymbolValue, usedSymbols = 0; - maxSymbolValue = 255; - mostFrequent = FSE_count(count, &maxSymbolValue, src, srcSize); - for (unsigned i=0; i<=maxSymbolValue; i++) - if (count[i]) usedSymbols++; - - seqStorePtr->factor = ((usedSymbols <= 18) && (mostFrequent < (1<<14))) ? mostFrequent>>10 : 0; // helps RTF files - seqStorePtr->factor2 = (usedSymbols==256) && (mostFrequent > (1<<14)); -#endif - -#if 0 - if (seqStorePtr->factor2) - printf("FACTOR2 usedSymbols==256;mostFrequent>(1<<14) maxSymbolValue=%d mostFrequent=%d usedSymbols=%d\n", maxSymbolValue, (int)mostFrequent, usedSymbols); - if (seqStorePtr->factor) { - printf("FACTOR1 usedSymbols<56;mostFrequent<(1<<14) maxSymbolValue=%d mostFrequent=%d usedSymbols=%d\n", maxSymbolValue, (int)mostFrequent, usedSymbols); -#if 0 - for (int i=0; i<256; i++) - if (count[i]) printf("%d=%d ", i, count[i]); - printf("\n"); - -#endif - } -#endif - ZSTD_LOG_BLOCK("%d: COMPBLOCK_OPT_GENERIC srcSz=%d maxSrch=%d mls=%d sufLen=%d\n", (int)(ip-base), (int)srcSize, maxSearches, mls, sufficient_len); /* Match Loop */ @@ -619,8 +593,8 @@ void ZSTD_COMPRESSBLOCK_OPT_EXTDICT_GENERIC(ZSTD_CCtx* ctx, const U32 mls = ctx->params.searchLength; const U32 sufficient_len = ctx->params.targetLength; - ZSTD_optimal_t opt[ZSTD_OPT_NUM+1]; - ZSTD_match_t matches[ZSTD_OPT_NUM+1]; + ZSTD_optimal_t* opt = seqStorePtr->priceTable; + ZSTD_match_t* matches = seqStorePtr->matchTable; const BYTE* inr; U32 cur, match_num, last_pos, litlen, price; @@ -913,7 +887,31 @@ _storeSequence: // cur, last_pos, best_mlen, best_off have to be set anchor = ip = ip + mlen; } +#if 0 /* check immediate repcode */ + while ((anchor >= base + lowLimit + rep_2) && (anchor <= ilimit)) { + if ((anchor - rep_2) >= prefixStart) { + if (MEM_readMINMATCH(anchor) == MEM_readMINMATCH(anchor - rep_2)) + mlen = (U32)ZSTD_count(anchor+MINMATCHOPT, anchor - rep_2 + MINMATCHOPT, iend) + MINMATCHOPT; + else + break; + } else { + const BYTE* repMatch = dictBase + ((anchor-base) - rep_2); + if ((repMatch + MINMATCHOPT <= dictEnd) && (MEM_readMINMATCH(anchor) == MEM_readMINMATCH(repMatch))) + mlen = (U32)ZSTD_count_2segments(anchor+MINMATCHOPT, repMatch+MINMATCHOPT, iend, dictEnd, prefixStart) + MINMATCHOPT; + else + break; + } + + offset = rep_2; rep_2 = rep_1; rep_1 = offset; /* swap offset history */ + ZSTD_LOG_ENCODE("%d/%d: ENCODE REP literals=%d mlen=%d off=%d rep1=%d rep2=%d\n", (int)(anchor-base), (int)(iend-base), (int)(0), (int)best_mlen, (int)(0), (int)rep_1, (int)rep_2); + ZSTD_updatePrice(seqStorePtr, 0, anchor, 0, mlen-MINMATCHOPT); + ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, mlen-MINMATCHOPT); + anchor += mlen; + } +#else + /* check immediate repcode */ + /* minimal correctness condition = while ((anchor >= prefixStart + REPCODE_STARTVALUE) && (anchor <= ilimit)) { */ while ((anchor >= base + lowLimit + rep_2) && (anchor <= ilimit)) { const U32 repIndex = (U32)((anchor-base) - rep_2); const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; @@ -932,6 +930,7 @@ _storeSequence: // cur, last_pos, best_mlen, best_off have to be set } break; } +#endif if (anchor > ip) ip = anchor; } diff --git a/lib/zstd_opt_internal.h b/lib/zstd_opt_internal.h index f0a0e69e..b980eab0 100644 --- a/lib/zstd_opt_internal.h +++ b/lib/zstd_opt_internal.h @@ -40,25 +40,9 @@ * The optimal parser *********************************************/ /*- Constants -*/ -#define ZSTD_OPT_NUM (1<<12) #define ZSTD_FREQ_DIV 5 -typedef struct { - U32 off; - U32 len; -} ZSTD_match_t; - -typedef struct { - U32 price; - U32 off; - U32 mlen; - U32 litlen; - U32 rep; - U32 rep2; -} ZSTD_optimal_t; - - MEM_STATIC void ZSTD_rescaleFreqs(seqStore_t* ssPtr) { unsigned u; From 51bb9a0064fa8ef70d75e130027895d4c97ed14a Mon Sep 17 00:00:00 2001 From: inikep Date: Wed, 2 Mar 2016 19:17:13 +0100 Subject: [PATCH 032/247] fix clang -Wcast-align warning --- lib/zstd_compress.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index 2eb1e6d2..6a4cfe4c 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -202,13 +202,13 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc, zc->blockSize = blockSize; zc->seqStore.offsetStart = (U32*) (zc->seqStore.buffer); - zc->seqStore.offCodeStart = (BYTE*) (zc->seqStore.offsetStart + (blockSize>>2)); + zc->seqStore.offCodeStart = (BYTE*) (zc->seqStore.offsetStart) + blockSize; zc->seqStore.litStart = zc->seqStore.offCodeStart + (blockSize>>2); zc->seqStore.litLengthStart = zc->seqStore.litStart + blockSize; zc->seqStore.matchLengthStart = zc->seqStore.litLengthStart + (blockSize>>2); - zc->seqStore.dumpsStart = zc->seqStore.matchLengthStart + (blockSize>>2); - - zc->seqStore.litFreq = (U32*)(zc->seqStore.dumpsStart + (blockSize>>2)); + zc->seqStore.dumpsStart = zc->seqStore.matchLengthStart + (blockSize>>2); + BYTE* dumpsEnd = zc->seqStore.dumpsStart + (blockSize>>2); + zc->seqStore.litFreq = (U32*)(dumpsEnd); zc->seqStore.litLengthFreq = zc->seqStore.litFreq + (1<seqStore.matchLengthFreq = zc->seqStore.litLengthFreq + (1<seqStore.offCodeFreq = zc->seqStore.matchLengthFreq + (1< Date: Wed, 2 Mar 2016 20:37:49 +0100 Subject: [PATCH 033/247] fix clang -Wcast-align warning (part 2) --- lib/zstd_compress.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index 6a4cfe4c..50fa2fdc 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -207,8 +207,8 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc, zc->seqStore.litLengthStart = zc->seqStore.litStart + blockSize; zc->seqStore.matchLengthStart = zc->seqStore.litLengthStart + (blockSize>>2); zc->seqStore.dumpsStart = zc->seqStore.matchLengthStart + (blockSize>>2); - BYTE* dumpsEnd = zc->seqStore.dumpsStart + (blockSize>>2); - zc->seqStore.litFreq = (U32*)(dumpsEnd); + + zc->seqStore.litFreq = (U32*)((void*)(zc->seqStore.dumpsStart + (blockSize>>2))); zc->seqStore.litLengthFreq = zc->seqStore.litFreq + (1<seqStore.matchLengthFreq = zc->seqStore.litLengthFreq + (1<seqStore.offCodeFreq = zc->seqStore.matchLengthFreq + (1< Date: Fri, 4 Mar 2016 14:45:31 +0100 Subject: [PATCH 034/247] Support for nbSeq > 32767 tamed -Wstrict-aliasing warning --- lib/zstd_compress.c | 18 ++++++------------ lib/zstd_decompress.c | 9 +++++++-- lib/zstd_internal.h | 3 ++- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index 50fa2fdc..0c6ee881 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -212,8 +212,8 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc, zc->seqStore.litLengthFreq = zc->seqStore.litFreq + (1<seqStore.matchLengthFreq = zc->seqStore.litLengthFreq + (1<seqStore.offCodeFreq = zc->seqStore.matchLengthFreq + (1<seqStore.matchTable = (ZSTD_match_t*)(zc->seqStore.offCodeFreq + (1<seqStore.priceTable = (ZSTD_optimal_t*)(zc->seqStore.matchTable + ZSTD_OPT_NUM+1); + zc->seqStore.matchTable = (ZSTD_match_t*)(void*)(zc->seqStore.offCodeFreq + (1<seqStore.priceTable = (ZSTD_optimal_t*)(void*)(zc->seqStore.matchTable + ZSTD_OPT_NUM+1); zc->seqStore.litLengthSum = 0; zc->hbSize = 0; @@ -232,7 +232,7 @@ size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx) { const U32 contentLog = (srcCCtx->params.strategy == ZSTD_fast) ? 1 : srcCCtx->params.contentLog; const size_t tableSpace = ((1 << contentLog) + (1 << srcCCtx->params.hashLog) + (1 << srcCCtx->params.hashLog3)) * sizeof(U32); - + if (srcCCtx->stage!=0) return ERROR(stage_wrong); ZSTD_resetCCtx_advanced(dstCCtx, srcCCtx->params); @@ -546,17 +546,11 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, op += cSize; } -#if ZSTD_OPT_DEBUG >= 5 - if (nbSeq >= 32768) - printf("ERROR: nbSeq=%d\n", (int)nbSeq); -#endif - /* Sequences Header */ if ((oend-op) < MIN_SEQUENCES_SIZE) return ERROR(dstSize_tooSmall); - if (nbSeq < 128) *op++ = (BYTE)nbSeq; - else { - op[0] = (BYTE)((nbSeq>>8) + 128); op[1] = (BYTE)nbSeq; op+=2; - } + if (nbSeq < 0x7F) *op++ = (BYTE)nbSeq; + else if (nbSeq < LONGNBSEQ) op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2; + else op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3; if (nbSeq==0) goto _check_compressibility; /* dumps : contains rests of large lengths */ diff --git a/lib/zstd_decompress.c b/lib/zstd_decompress.c index e9d3bdb6..c4338bcc 100644 --- a/lib/zstd_decompress.c +++ b/lib/zstd_decompress.c @@ -496,9 +496,14 @@ size_t ZSTD_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLen /* SeqHead */ *nbSeq = *ip++; if (*nbSeq==0) return 1; - if (*nbSeq >= 128) - *nbSeq = ((nbSeq[0]-128)<<8) + *ip++; + if (*nbSeq >= 0x7F) { + if (*nbSeq == 0xFF) + *nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2; + else + *nbSeq = ((nbSeq[0]-0x80)<<8) + *ip++; + } + /* FSE table descriptors */ LLtype = *ip >> 6; Offtype = (*ip >> 4) & 3; MLtype = (*ip >> 2) & 3; diff --git a/lib/zstd_internal.h b/lib/zstd_internal.h index 4948e239..bbd19b00 100644 --- a/lib/zstd_internal.h +++ b/lib/zstd_internal.h @@ -105,12 +105,13 @@ static const size_t ZSTD_frameHeaderSize_min = 5; #define OffFSELog 9 #define MaxSeq MAX(MaxLL, MaxML) +#define LONGNBSEQ 0xFF00 + #define FSE_ENCODING_RAW 0 #define FSE_ENCODING_RLE 1 #define FSE_ENCODING_STATIC 2 #define FSE_ENCODING_DYNAMIC 3 - #define HufLog 12 #define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */ From 4c01580362e3a95638996ce939e6bb74506b00bc Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Fri, 4 Mar 2016 17:07:54 +0100 Subject: [PATCH 035/247] finer searchLength validation --- lib/zstd_compress.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index 0c6ee881..49eefb26 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -145,6 +145,8 @@ static unsigned ZSTD_highbit(U32 val); void ZSTD_validateParams(ZSTD_parameters* params) { const U32 btPlus = (params->strategy == ZSTD_btlazy2) || (params->strategy == ZSTD_btopt); + const U32 searchLengthMax = (params->strategy == ZSTD_fast) ? ZSTD_SEARCHLENGTH_MAX : ZSTD_SEARCHLENGTH_MAX-1; + const U32 searchLengthMin = (params->strategy == ZSTD_btopt) ? ZSTD_SEARCHLENGTH_MIN : ZSTD_SEARCHLENGTH_MIN+1; /* validate params */ if (MEM_32bits()) if (params->windowLog > 25) params->windowLog = 25; /* 32 bits mode cannot flush > 24 bits */ @@ -153,7 +155,7 @@ void ZSTD_validateParams(ZSTD_parameters* params) CLAMP(params->hashLog, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX); CLAMP(params->hashLog3, ZSTD_HASHLOG3_MIN, ZSTD_HASHLOG3_MAX); CLAMP(params->searchLog, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX); - CLAMP(params->searchLength, ZSTD_SEARCHLENGTH_MIN, ZSTD_SEARCHLENGTH_MAX); + CLAMP(params->searchLength, searchLengthMin, searchLengthMax); CLAMP(params->targetLength, ZSTD_TARGETLENGTH_MIN, ZSTD_TARGETLENGTH_MAX); if ((U32)params->strategy>(U32)ZSTD_btopt) params->strategy = ZSTD_btopt; From 4ec2998b1f4b9dcfda951abc988c54c39b711790 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Fri, 4 Mar 2016 19:09:28 +0100 Subject: [PATCH 036/247] Added zeroes test (#137) --- programs/fuzzer.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/programs/fuzzer.c b/programs/fuzzer.c index f09cf06f..7cbfd794 100644 --- a/programs/fuzzer.c +++ b/programs/fuzzer.c @@ -171,6 +171,7 @@ static int basicUnitTests(U32 seed, double compressibility) DISPLAYLEVEL(4, "test%3i : decompress %u bytes : ", testNb++, COMPRESSIBLE_NOISE_LENGTH); result = ZSTD_decompress(decodedBuffer, COMPRESSIBLE_NOISE_LENGTH, compressedBuffer, cSize); if (ZSTD_isError(result)) goto _output_error; + if (result != COMPRESSIBLE_NOISE_LENGTH) goto _output_error; DISPLAYLEVEL(4, "OK \n"); { @@ -195,6 +196,22 @@ static int basicUnitTests(U32 seed, double compressibility) if (result != (size_t)-ZSTD_error_srcSize_wrong) goto _output_error; DISPLAYLEVEL(4, "OK \n"); + /* All zeroes test (#137 verif) */ + #define ZEROESLENGTH 100 + DISPLAYLEVEL(4, "test%3i : compress %u zeroes : ", testNb++, ZEROESLENGTH); + memset(CNBuffer, 0, ZEROESLENGTH); + result = ZSTD_compress(compressedBuffer, ZSTD_compressBound(ZEROESLENGTH), CNBuffer, ZEROESLENGTH, 1); + if (ZSTD_isError(result)) goto _output_error; + cSize = result; + DISPLAYLEVEL(4, "OK (%u bytes : %.2f%%)\n", (U32)cSize, (double)cSize/ZEROESLENGTH*100); + + DISPLAYLEVEL(4, "test%3i : decompress %u zeroes : ", testNb++, ZEROESLENGTH); + result = ZSTD_decompress(decodedBuffer, ZEROESLENGTH, compressedBuffer, cSize); + if (ZSTD_isError(result)) goto _output_error; + if (result != ZEROESLENGTH) goto _output_error; + DISPLAYLEVEL(4, "OK \n"); + + /* Dictionary and Duplication tests */ { ZSTD_CCtx* ctxOrig = ZSTD_createCCtx(); From 4ab9c913a585da4f703a859cd1a7aecef7f8958f Mon Sep 17 00:00:00 2001 From: inikep Date: Fri, 4 Mar 2016 19:17:31 +0100 Subject: [PATCH 037/247] MEM_readMINMATCH replaced with inline function --- lib/mem.h | 10 +++ lib/zstd_compress.c | 35 ++++----- lib/zstd_opt.h | 161 ++++++++++++++++++++-------------------- lib/zstd_opt_internal.h | 41 ++-------- 4 files changed, 114 insertions(+), 133 deletions(-) diff --git a/lib/mem.h b/lib/mem.h index 0e357e53..a369033b 100644 --- a/lib/mem.h +++ b/lib/mem.h @@ -188,6 +188,16 @@ MEM_STATIC void MEM_write64(void* memPtr, U64 value) #endif /* MEM_FORCE_MEMORY_ACCESS */ +MEM_STATIC U32 MEM_readMINMATCH(const void* memPtr, U32 length) +{ + switch (length) + { + default : + case 4 : return MEM_read32(memPtr); + case 3 : return MEM_read32(memPtr)<<8; + } +} + MEM_STATIC U16 MEM_readLE16(const void* memPtr) { if (MEM_isLittleEndian()) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index 50fa2fdc..637ffb98 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -207,15 +207,16 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc, zc->seqStore.litLengthStart = zc->seqStore.litStart + blockSize; zc->seqStore.matchLengthStart = zc->seqStore.litLengthStart + (blockSize>>2); zc->seqStore.dumpsStart = zc->seqStore.matchLengthStart + (blockSize>>2); + if (params.strategy == ZSTD_btopt) { + zc->seqStore.litFreq = (U32*)((void*)(zc->seqStore.dumpsStart + (blockSize>>2))); + zc->seqStore.litLengthFreq = zc->seqStore.litFreq + (1<seqStore.matchLengthFreq = zc->seqStore.litLengthFreq + (1<seqStore.offCodeFreq = zc->seqStore.matchLengthFreq + (1<seqStore.matchTable = (ZSTD_match_t*)((void*)(zc->seqStore.offCodeFreq + (1<seqStore.priceTable = (ZSTD_optimal_t*)((void*)(zc->seqStore.matchTable + ZSTD_OPT_NUM+1)); + zc->seqStore.litLengthSum = 0; + } - zc->seqStore.litFreq = (U32*)((void*)(zc->seqStore.dumpsStart + (blockSize>>2))); - zc->seqStore.litLengthFreq = zc->seqStore.litFreq + (1<seqStore.matchLengthFreq = zc->seqStore.litLengthFreq + (1<seqStore.offCodeFreq = zc->seqStore.matchLengthFreq + (1<seqStore.matchTable = (ZSTD_match_t*)(zc->seqStore.offCodeFreq + (1<seqStore.priceTable = (ZSTD_optimal_t*)(zc->seqStore.matchTable + ZSTD_OPT_NUM+1); - - zc->seqStore.litLengthSum = 0; zc->hbSize = 0; zc->stage = 0; zc->loadedDictEnd = 0; @@ -1661,10 +1662,7 @@ _storeSequence: static void ZSTD_compressBlock_btopt(ZSTD_CCtx* ctx, const void* src, size_t srcSize) { - if (ctx->params.searchLength == 3) - ZSTD_compressBlock_opt_generic3(ctx, src, srcSize, 2); - else - ZSTD_compressBlock_opt_generic4(ctx, src, srcSize, 2); + ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 2); } static void ZSTD_compressBlock_btlazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize) @@ -1883,10 +1881,7 @@ static void ZSTD_compressBlock_btlazy2_extDict(ZSTD_CCtx* ctx, const void* src, static void ZSTD_compressBlock_btopt_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) { - if (ctx->params.searchLength == 3) - ZSTD_compressBlock_opt_extDict_generic3(ctx, src, srcSize, 2); - else - ZSTD_compressBlock_opt_extDict_generic4(ctx, src, srcSize, 2); + ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize, 2); } @@ -2404,10 +2399,10 @@ static const ZSTD_parameters ZSTD_defaultParameters[4][ZSTD_MAX_CLEVEL+1] = { { 0, 14, 15, 15, 0, 15, 4,256, ZSTD_btopt }, /* level 19 */ { 0, 14, 15, 15, 0, 16, 4,256, ZSTD_btopt }, /* level 20 */ { 0, 14, 15, 15, 0, 17, 4,256, ZSTD_btopt }, /* level 21 */ - { 0, 14, 15, 15, 0, 17, 4,256, ZSTD_btopt }, /* level 21-2 */ - { 0, 14, 15, 15, 0, 17, 4,256, ZSTD_btopt }, /* level 21-3 */ - { 0, 14, 15, 15, 0, 17, 4,256, ZSTD_btopt }, /* level 21-4 */ - { 0, 14, 15, 15, 0, 17, 4,256, ZSTD_btopt }, /* level 21-5 */ + { 0, 14, 15, 15, 0, 14, 3,256, ZSTD_btopt }, /* level 21-2 */ + { 0, 14, 15, 15, 0, 15, 3,256, ZSTD_btopt }, /* level 21-3 */ + { 0, 14, 15, 15, 0, 16, 3,256, ZSTD_btopt }, /* level 21-4 */ + { 0, 14, 15, 15, 0, 17, 3,256, ZSTD_btopt }, /* level 21-5 */ }, }; diff --git a/lib/zstd_opt.h b/lib/zstd_opt.h index f6148598..66700514 100644 --- a/lib/zstd_opt.h +++ b/lib/zstd_opt.h @@ -41,7 +41,6 @@ FORCE_INLINE U32 ZSTD_GETPRICE(seqStore_t* seqStorePtr, U32 litLength, const BYT U32 price = (offCode-1) + (!offCode) + ZSTD_highbit(seqStorePtr->offCodeSum+1) - ZSTD_highbit(seqStorePtr->offCodeFreq[offCode]+1); /* match Length */ - matchLength -= MINMATCHOPT; price += ((matchLength >= MaxML)<<3) + ((matchLength >= 255+MaxML)<<4) + ((matchLength>=(1<<15))<<3); if (matchLength >= MaxML) matchLength = MaxML; price += ZSTD_getLiteralPrice(seqStorePtr, litLength, literals) + ZSTD_highbit(seqStorePtr->matchLengthSum+1) - ZSTD_highbit(seqStorePtr->matchLengthFreq[matchLength]+1); @@ -95,36 +94,36 @@ static U32 ZSTD_INSERTBTANDGETALLMATCHES ( U32 dummy32; /* to be nullified at the end */ U32 mnum = 0; - size_t bestLength = MINMATCHOPT-1; + const U32 minMatch = (mls == 3) ? 3 : 4; + size_t bestLength = minMatch-1; hashTable[h] = current; /* Update Hash Table */ -#if MINMATCHOPT == 3 - /* HC3 match finder */ - U32 matchIndex3 = ZSTD_insertAndFindFirstIndexHash3 (zc, ip); + if (minMatch == 3) { /* HC3 match finder */ + U32 matchIndex3 = ZSTD_insertAndFindFirstIndexHash3 (zc, ip); - if (matchIndex3>windowLow) { - const BYTE* match; - size_t currentMl=0; - if ((!extDict) || matchIndex3 >= dictLimit) { - match = base + matchIndex3; - if (match[bestLength] == ip[bestLength]) currentMl = ZSTD_count(ip, match, iLimit); - } else { - match = dictBase + matchIndex3; - if (MEM_readMINMATCH(match) == MEM_readMINMATCH(ip)) /* assumption : matchIndex3 <= dictLimit-4 (by table construction) */ - currentMl = ZSTD_count_2segments(ip+MINMATCHOPT, match+MINMATCHOPT, iLimit, dictEnd, prefixStart) + MINMATCHOPT; - } + if (matchIndex3>windowLow) { + const BYTE* match; + size_t currentMl=0; + if ((!extDict) || matchIndex3 >= dictLimit) { + match = base + matchIndex3; + if (match[bestLength] == ip[bestLength]) currentMl = ZSTD_count(ip, match, iLimit); + } else { + match = dictBase + matchIndex3; + if (MEM_readMINMATCH(match, minMatch) == MEM_readMINMATCH(ip, minMatch)) /* assumption : matchIndex3 <= dictLimit-4 (by table construction) */ + currentMl = ZSTD_count_2segments(ip+minMatch, match+minMatch, iLimit, dictEnd, prefixStart) + minMatch; + } - /* save best solution */ - if (currentMl > bestLength) { - bestLength = currentMl; - matches[mnum].off = current - matchIndex3; - matches[mnum].len = (U32)currentMl; - mnum++; - if (currentMl > ZSTD_OPT_NUM) return mnum; - if (ip+currentMl == iLimit) return mnum; /* best possible, and avoid read overflow*/ + /* save best solution */ + if (currentMl > bestLength) { + bestLength = currentMl; + matches[mnum].off = current - matchIndex3; + matches[mnum].len = (U32)currentMl; + mnum++; + if (currentMl > ZSTD_OPT_NUM) return mnum; + if (ip+currentMl == iLimit) return mnum; /* best possible, and avoid read overflow*/ + } } } -#endif while (nbCompares-- && (matchIndex > windowLow)) { U32* nextPtr = bt + 2*(matchIndex & btMask); @@ -266,8 +265,9 @@ void ZSTD_COMPRESSBLOCK_OPT_GENERIC(ZSTD_CCtx* ctx, U32 rep_2=REPCODE_STARTVALUE, rep_1=REPCODE_STARTVALUE; const U32 maxSearches = 1U << ctx->params.searchLog; - const U32 mls = ctx->params.searchLength; const U32 sufficient_len = ctx->params.targetLength; + const U32 mls = ctx->params.searchLength; + const U32 minMatch = (ctx->params.searchLength == 3) ? 3 : 4; ZSTD_optimal_t* opt = seqStorePtr->priceTable; ZSTD_match_t* matches = seqStorePtr->matchTable; @@ -295,9 +295,9 @@ void ZSTD_COMPRESSBLOCK_OPT_GENERIC(ZSTD_CCtx* ctx, opt[0].litlen = (U32)(ip - litstart); /* check repCode */ - if (MEM_readMINMATCH(ip+1) == MEM_readMINMATCH(ip+1 - rep_1)) { + if (MEM_readMINMATCH(ip+1, minMatch) == MEM_readMINMATCH(ip+1 - rep_1, minMatch)) { /* repcode : we take it */ - mlen = (U32)ZSTD_count(ip+1+MINMATCHOPT, ip+1+MINMATCHOPT-rep_1, iend) + MINMATCHOPT; + mlen = (U32)ZSTD_count(ip+1+minMatch, ip+1+minMatch-rep_1, iend) + minMatch; ZSTD_LOG_PARSER("%d: start try REP rep=%d mlen=%d\n", (int)(ip-base), (int)rep_1, (int)mlen); if (depth==0 || mlen > sufficient_len || mlen >= ZSTD_OPT_NUM) { @@ -307,11 +307,11 @@ void ZSTD_COMPRESSBLOCK_OPT_GENERIC(ZSTD_CCtx* ctx, litlen = opt[0].litlen + 1; do { - price = ZSTD_GETPRICE(seqStorePtr, litlen, litstart, 0, mlen); + price = ZSTD_GETPRICE(seqStorePtr, litlen, litstart, 0, mlen - minMatch); if (mlen + 1 > last_pos || price < opt[mlen + 1].price) SET_PRICE(mlen + 1, mlen, 0, litlen, price); /* note : macro modifies last_pos */ mlen--; - } while (mlen >= MINMATCHOPT); + } while (mlen >= minMatch); } match_num = ZSTD_BTGETALLMATCHES_SELECTMLS(ctx, ip, ip, iend, maxSearches, mls, matches); /* first search (depth 0) */ @@ -331,7 +331,7 @@ void ZSTD_COMPRESSBLOCK_OPT_GENERIC(ZSTD_CCtx* ctx, goto _storeSequence; } - best_mlen = (last_pos) ? last_pos : MINMATCHOPT; + best_mlen = (last_pos) ? last_pos : minMatch; // set prices using matches at position = 0 for (u = 0; u < match_num; u++) { @@ -340,13 +340,13 @@ void ZSTD_COMPRESSBLOCK_OPT_GENERIC(ZSTD_CCtx* ctx, ZSTD_LOG_PARSER("%d: start Found mlen=%d off=%d best_mlen=%d last_pos=%d\n", (int)(ip-base), matches[u].len, matches[u].off, (int)best_mlen, (int)last_pos); litlen = opt[0].litlen; while (mlen <= best_mlen) { - price = ZSTD_GETPRICE(seqStorePtr, litlen, litstart, matches[u].off, mlen); + price = ZSTD_GETPRICE(seqStorePtr, litlen, litstart, matches[u].off, mlen - minMatch); if (mlen > last_pos || price < opt[mlen].price) SET_PRICE(mlen, mlen, matches[u].off, litlen, price); mlen++; } } - if (last_pos < MINMATCHOPT) { ip++; continue; } + if (last_pos < minMatch) { ip++; continue; } /* check further positions */ for (cur = 1; cur <= last_pos; cur++) { @@ -401,8 +401,8 @@ void ZSTD_COMPRESSBLOCK_OPT_GENERIC(ZSTD_CCtx* ctx, ZSTD_LOG_PARSER("%d: tryNoExt REP1 rep=%u mlen=%u\n", (int)(inr-base), (U32)cur_rep, mlen); } - if (MEM_readMINMATCH(inr) == MEM_readMINMATCH(inr - cur_rep)) { // check rep - mlen = (U32)ZSTD_count(inr+MINMATCHOPT, inr+MINMATCHOPT - cur_rep, iend) + MINMATCHOPT; + if (MEM_readMINMATCH(inr, minMatch) == MEM_readMINMATCH(inr - cur_rep, minMatch)) { // check rep + mlen = (U32)ZSTD_count(inr+minMatch, inr+minMatch - cur_rep, iend) + minMatch; ZSTD_LOG_PARSER("%d: Found REP mlen=%d off=%d rep=%d opt[%d].off=%d\n", (int)(inr-base), mlen, 0, opt[cur].rep, cur, opt[cur].off); if (mlen > sufficient_len || cur + mlen >= ZSTD_OPT_NUM) { @@ -416,12 +416,12 @@ void ZSTD_COMPRESSBLOCK_OPT_GENERIC(ZSTD_CCtx* ctx, if (opt[cur].mlen == 1) { litlen = opt[cur].litlen; if (cur > litlen) { - price = opt[cur - litlen].price + ZSTD_GETPRICE(seqStorePtr, litlen, inr-litlen, 0, mlen); + price = opt[cur - litlen].price + ZSTD_GETPRICE(seqStorePtr, litlen, inr-litlen, 0, mlen - minMatch); } else - price = ZSTD_GETPRICE(seqStorePtr, litlen, litstart, 0, mlen); + price = ZSTD_GETPRICE(seqStorePtr, litlen, litstart, 0, mlen - minMatch); } else { litlen = 0; - price = opt[cur].price + ZSTD_GETPRICE(seqStorePtr, 0, NULL, 0, mlen); + price = opt[cur].price + ZSTD_GETPRICE(seqStorePtr, 0, NULL, 0, mlen - minMatch); } best_mlen = mlen; @@ -431,7 +431,7 @@ void ZSTD_COMPRESSBLOCK_OPT_GENERIC(ZSTD_CCtx* ctx, if (cur + mlen > last_pos || price <= opt[cur + mlen].price) SET_PRICE(cur + mlen, mlen, 0, litlen, price); mlen--; - } while (mlen >= MINMATCHOPT); + } while (mlen >= minMatch); } match_num = ZSTD_BTGETALLMATCHES_SELECTMLS(ctx, inr, ip, iend, maxSearches, mls, matches); @@ -444,7 +444,7 @@ void ZSTD_COMPRESSBLOCK_OPT_GENERIC(ZSTD_CCtx* ctx, goto _storeSequence; } - best_mlen = (best_mlen > MINMATCHOPT) ? best_mlen : MINMATCHOPT; + best_mlen = (best_mlen > minMatch) ? best_mlen : minMatch; /* set prices using matches at position = cur */ for (u = 0; u < match_num; u++) { @@ -457,12 +457,12 @@ void ZSTD_COMPRESSBLOCK_OPT_GENERIC(ZSTD_CCtx* ctx, if (opt[cur].mlen == 1) { litlen = opt[cur].litlen; if (cur > litlen) - price = opt[cur - litlen].price + ZSTD_GETPRICE(seqStorePtr, litlen, ip+cur-litlen, matches[u].off, mlen); + price = opt[cur - litlen].price + ZSTD_GETPRICE(seqStorePtr, litlen, ip+cur-litlen, matches[u].off, mlen - minMatch); else - price = ZSTD_GETPRICE(seqStorePtr, litlen, litstart, matches[u].off, mlen); + price = ZSTD_GETPRICE(seqStorePtr, litlen, litstart, matches[u].off, mlen - minMatch); } else { litlen = 0; - price = opt[cur].price + ZSTD_GETPRICE(seqStorePtr, 0, NULL, matches[u].off, mlen); + price = opt[cur].price + ZSTD_GETPRICE(seqStorePtr, 0, NULL, matches[u].off, mlen - minMatch); } // ZSTD_LOG_PARSER("%d: Found2 mlen=%d best_mlen=%d off=%d price=%d litlen=%d\n", (int)(inr-base), mlen, best_mlen, matches[u].off, price, litlen); @@ -530,7 +530,7 @@ _storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */ ml2 = (U32)ZSTD_count(ip, ip-offset, iend); else ml2 = (U32)ZSTD_count(ip, ip-rep_1, iend); - if ((offset >= 8) && (ml2 < mlen || ml2 < MINMATCHOPT)) { + if ((offset >= 8) && (ml2 < mlen || ml2 < minMatch)) { printf("%d: ERROR_NoExt iend=%d mlen=%d offset=%d ml2=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset, (int)ml2); exit(0); } if (ip < anchor) { printf("%d: ERROR_NoExt ip < anchor iend=%d mlen=%d offset=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset); exit(0); } @@ -538,23 +538,23 @@ _storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */ printf("%d: ERROR_NoExt ip + mlen >= iend iend=%d mlen=%d offset=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset); exit(0); } #endif - ZSTD_updatePrice(seqStorePtr, litLength, anchor, offset, mlen-MINMATCHOPT); - ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, mlen-MINMATCHOPT); + ZSTD_updatePrice(seqStorePtr, litLength, anchor, offset, mlen-minMatch); + ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, mlen-minMatch); anchor = ip = ip + mlen; } /* for (cur=0; cur < last_pos; ) */ /* check immediate repcode */ while ((anchor >= prefixStart + rep_2) && (anchor <= ilimit) - && (MEM_readMINMATCH(anchor) == MEM_readMINMATCH(anchor - rep_2)) ) { + && (MEM_readMINMATCH(anchor, minMatch) == MEM_readMINMATCH(anchor - rep_2, minMatch)) ) { /* store sequence */ - best_mlen = (U32)ZSTD_count(anchor+MINMATCHOPT, anchor+MINMATCHOPT-rep_2, iend); + best_mlen = (U32)ZSTD_count(anchor+minMatch, anchor+minMatch-rep_2, iend); best_off = rep_2; rep_2 = rep_1; rep_1 = best_off; ZSTD_LOG_ENCODE("%d/%d: ENCODE REP literals=%d mlen=%d off=%d rep1=%d rep2=%d\n", (int)(anchor-base), (int)(iend-base), (int)(0), (int)best_mlen, (int)(0), (int)rep_1, (int)rep_2); ZSTD_updatePrice(seqStorePtr, 0, anchor, 0, best_mlen); ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, best_mlen); - anchor += best_mlen+MINMATCHOPT; + anchor += best_mlen+minMatch; continue; /* faster when present ... (?) */ } if (anchor > ip) ip = anchor; @@ -590,8 +590,9 @@ void ZSTD_COMPRESSBLOCK_OPT_EXTDICT_GENERIC(ZSTD_CCtx* ctx, U32 rep_2=REPCODE_STARTVALUE, rep_1=REPCODE_STARTVALUE; const U32 maxSearches = 1U << ctx->params.searchLog; - const U32 mls = ctx->params.searchLength; const U32 sufficient_len = ctx->params.targetLength; + const U32 mls = ctx->params.searchLength; + const U32 minMatch = (ctx->params.searchLength == 3) ? 3 : 4; ZSTD_optimal_t* opt = seqStorePtr->priceTable; ZSTD_match_t* matches = seqStorePtr->matchTable; @@ -623,10 +624,10 @@ void ZSTD_COMPRESSBLOCK_OPT_EXTDICT_GENERIC(ZSTD_CCtx* ctx, const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; const BYTE* const repMatch = repBase + repIndex; if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow */ - && (MEM_readMINMATCH(ip+1) == MEM_readMINMATCH(repMatch)) ) { + && (MEM_readMINMATCH(ip+1, minMatch) == MEM_readMINMATCH(repMatch, minMatch)) ) { /* repcode detected we should take it */ const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; - mlen = (U32)ZSTD_count_2segments(ip+1+MINMATCHOPT, repMatch+MINMATCHOPT, iend, repEnd, prefixStart) + MINMATCHOPT; + mlen = (U32)ZSTD_count_2segments(ip+1+minMatch, repMatch+minMatch, iend, repEnd, prefixStart) + minMatch; ZSTD_LOG_PARSER("%d: start try REP rep=%d mlen=%d\n", (int)(ip-base), (int)rep_1, (int)mlen); if (depth==0 || mlen > sufficient_len || mlen >= ZSTD_OPT_NUM) { @@ -636,14 +637,14 @@ void ZSTD_COMPRESSBLOCK_OPT_EXTDICT_GENERIC(ZSTD_CCtx* ctx, litlen = opt[0].litlen + 1; do { - price = ZSTD_GETPRICE(seqStorePtr, litlen, litstart, 0, mlen); + price = ZSTD_GETPRICE(seqStorePtr, litlen, litstart, 0, mlen - minMatch); if (mlen + 1 > last_pos || price < opt[mlen + 1].price) SET_PRICE(mlen + 1, mlen, 0, litlen, price); mlen--; - } while (mlen >= MINMATCHOPT); + } while (mlen >= minMatch); } } - best_mlen = (last_pos) ? last_pos : MINMATCHOPT; + best_mlen = (last_pos) ? last_pos : minMatch; match_num = ZSTD_BTGETALLMATCHES_SELECTMLS_EXTDICT(ctx, ip, ip, iend, maxSearches, mls, matches); /* first search (depth 0) */ @@ -669,13 +670,13 @@ void ZSTD_COMPRESSBLOCK_OPT_EXTDICT_GENERIC(ZSTD_CCtx* ctx, ZSTD_LOG_PARSER("%d: start Found mlen=%d off=%d best_mlen=%d last_pos=%d\n", (int)(ip-base), matches[u].len, matches[u].off, (int)best_mlen, (int)last_pos); litlen = opt[0].litlen; while (mlen <= best_mlen) { - price = ZSTD_GETPRICE(seqStorePtr, litlen, litstart, matches[u].off, mlen); + price = ZSTD_GETPRICE(seqStorePtr, litlen, litstart, matches[u].off, mlen - minMatch); if (mlen > last_pos || price < opt[mlen].price) SET_PRICE(mlen, mlen, matches[u].off, litlen, price); mlen++; } } - if (last_pos < MINMATCHOPT) { + if (last_pos < minMatch) { // ip += ((ip-anchor) >> g_searchStrength) + 1; /* jump faster over incompressible sections */ ip++; continue; } @@ -737,10 +738,10 @@ void ZSTD_COMPRESSBLOCK_OPT_EXTDICT_GENERIC(ZSTD_CCtx* ctx, const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; const BYTE* const repMatch = repBase + repIndex; if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow */ - &&(MEM_readMINMATCH(inr) == MEM_readMINMATCH(repMatch)) ) { + && (MEM_readMINMATCH(inr, minMatch) == MEM_readMINMATCH(repMatch, minMatch)) ) { /* repcode detected */ const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; - mlen = (U32)ZSTD_count_2segments(inr+MINMATCHOPT, repMatch+MINMATCHOPT, iend, repEnd, prefixStart) + MINMATCHOPT; + mlen = (U32)ZSTD_count_2segments(inr+minMatch, repMatch+minMatch, iend, repEnd, prefixStart) + minMatch; ZSTD_LOG_PARSER("%d: Found REP mlen=%d off=%d rep=%d opt[%d].off=%d\n", (int)(inr-base), mlen, 0, opt[cur].rep, cur, opt[cur].off); if (mlen > sufficient_len || cur + mlen >= ZSTD_OPT_NUM) { @@ -754,12 +755,12 @@ void ZSTD_COMPRESSBLOCK_OPT_EXTDICT_GENERIC(ZSTD_CCtx* ctx, if (opt[cur].mlen == 1) { litlen = opt[cur].litlen; if (cur > litlen) { - price = opt[cur - litlen].price + ZSTD_GETPRICE(seqStorePtr, litlen, inr-litlen, 0, mlen); + price = opt[cur - litlen].price + ZSTD_GETPRICE(seqStorePtr, litlen, inr-litlen, 0, mlen - minMatch); } else - price = ZSTD_GETPRICE(seqStorePtr, litlen, litstart, 0, mlen); + price = ZSTD_GETPRICE(seqStorePtr, litlen, litstart, 0, mlen - minMatch); } else { litlen = 0; - price = opt[cur].price + ZSTD_GETPRICE(seqStorePtr, 0, NULL, 0, mlen); + price = opt[cur].price + ZSTD_GETPRICE(seqStorePtr, 0, NULL, 0, mlen - minMatch); } best_mlen = mlen; @@ -770,10 +771,10 @@ void ZSTD_COMPRESSBLOCK_OPT_EXTDICT_GENERIC(ZSTD_CCtx* ctx, if (cur + mlen > last_pos || price <= opt[cur + mlen].price) // || ((price == opt[cur + mlen].price) && (opt[cur].mlen == 1) && (cur != litlen))) // at equal price prefer REP instead of MATCH SET_PRICE(cur + mlen, mlen, 0, litlen, price); mlen--; - } while (mlen >= MINMATCHOPT); + } while (mlen >= minMatch); } - best_mlen = (best_mlen > MINMATCHOPT) ? best_mlen : MINMATCHOPT; + best_mlen = (best_mlen > minMatch) ? best_mlen : minMatch; match_num = ZSTD_BTGETALLMATCHES_SELECTMLS_EXTDICT(ctx, inr, ip, iend, maxSearches, mls, matches); ZSTD_LOG_PARSER("%d: ZSTD_GetAllMatches match_num=%d\n", (int)(inr-base), match_num); @@ -796,12 +797,12 @@ void ZSTD_COMPRESSBLOCK_OPT_EXTDICT_GENERIC(ZSTD_CCtx* ctx, if (opt[cur].mlen == 1) { litlen = opt[cur].litlen; if (cur > litlen) - price = opt[cur - litlen].price + ZSTD_GETPRICE(seqStorePtr, litlen, ip+cur-litlen, matches[u].off, mlen); + price = opt[cur - litlen].price + ZSTD_GETPRICE(seqStorePtr, litlen, ip+cur-litlen, matches[u].off, mlen - minMatch); else - price = ZSTD_GETPRICE(seqStorePtr, litlen, litstart, matches[u].off, mlen); + price = ZSTD_GETPRICE(seqStorePtr, litlen, litstart, matches[u].off, mlen - minMatch); } else { litlen = 0; - price = opt[cur].price + ZSTD_GETPRICE(seqStorePtr, 0, NULL, matches[u].off, mlen); + price = opt[cur].price + ZSTD_GETPRICE(seqStorePtr, 0, NULL, matches[u].off, mlen - minMatch); } // ZSTD_LOG_PARSER("%d: Found2 mlen=%d best_mlen=%d off=%d price=%d litlen=%d\n", (int)(inr-base), mlen, best_mlen, matches[u].off, price, litlen); @@ -874,7 +875,7 @@ _storeSequence: // cur, last_pos, best_mlen, best_off have to be set else ml2 = (U32)ZSTD_count(ip, ip-offset, iend); } else ml2 = (U32)ZSTD_count(ip, ip-rep_1, iend); - if ((offset >= 8) && (ml2 < mlen || ml2 < MINMATCHOPT)) { + if ((offset >= 8) && (ml2 < mlen || ml2 < minMatch)) { printf("%d: ERROR_Ext iend=%d mlen=%d offset=%d ml2=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset, (int)ml2); exit(0); } if (ip < anchor) { printf("%d: ERROR_Ext ip < anchor iend=%d mlen=%d offset=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset); exit(0); } @@ -882,8 +883,8 @@ _storeSequence: // cur, last_pos, best_mlen, best_off have to be set printf("%d: ERROR_Ext ip + mlen >= iend iend=%d mlen=%d offset=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset); exit(0); } #endif - ZSTD_updatePrice(seqStorePtr, litLength, anchor, offset, mlen-MINMATCHOPT); - ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, mlen-MINMATCHOPT); + ZSTD_updatePrice(seqStorePtr, litLength, anchor, offset, mlen-minMatch); + ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, mlen-minMatch); anchor = ip = ip + mlen; } @@ -891,22 +892,22 @@ _storeSequence: // cur, last_pos, best_mlen, best_off have to be set /* check immediate repcode */ while ((anchor >= base + lowLimit + rep_2) && (anchor <= ilimit)) { if ((anchor - rep_2) >= prefixStart) { - if (MEM_readMINMATCH(anchor) == MEM_readMINMATCH(anchor - rep_2)) - mlen = (U32)ZSTD_count(anchor+MINMATCHOPT, anchor - rep_2 + MINMATCHOPT, iend) + MINMATCHOPT; + if (MEM_readMINMATCH(anchor, minMatch) == MEM_readMINMATCH(anchor - rep_2, minMatch)) + mlen = (U32)ZSTD_count(anchor+minMatch, anchor - rep_2 + minMatch, iend) + minMatch; else break; } else { const BYTE* repMatch = dictBase + ((anchor-base) - rep_2); - if ((repMatch + MINMATCHOPT <= dictEnd) && (MEM_readMINMATCH(anchor) == MEM_readMINMATCH(repMatch))) - mlen = (U32)ZSTD_count_2segments(anchor+MINMATCHOPT, repMatch+MINMATCHOPT, iend, dictEnd, prefixStart) + MINMATCHOPT; + if ((repMatch + minMatch <= dictEnd) && (MEM_readMINMATCH(anchor, minMatch) == MEM_readMINMATCH(repMatch, minMatch))) + mlen = (U32)ZSTD_count_2segments(anchor+minMatch, repMatch+minMatch, iend, dictEnd, prefixStart) + minMatch; else break; } offset = rep_2; rep_2 = rep_1; rep_1 = offset; /* swap offset history */ ZSTD_LOG_ENCODE("%d/%d: ENCODE REP literals=%d mlen=%d off=%d rep1=%d rep2=%d\n", (int)(anchor-base), (int)(iend-base), (int)(0), (int)best_mlen, (int)(0), (int)rep_1, (int)rep_2); - ZSTD_updatePrice(seqStorePtr, 0, anchor, 0, mlen-MINMATCHOPT); - ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, mlen-MINMATCHOPT); + ZSTD_updatePrice(seqStorePtr, 0, anchor, 0, mlen-minMatch); + ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, mlen-minMatch); anchor += mlen; } #else @@ -917,14 +918,14 @@ _storeSequence: // cur, last_pos, best_mlen, best_off have to be set const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; const BYTE* const repMatch = repBase + repIndex; if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow */ - && (MEM_readMINMATCH(anchor) == MEM_readMINMATCH(repMatch)) ) { + && (MEM_readMINMATCH(anchor, minMatch) == MEM_readMINMATCH(repMatch, minMatch)) ) { /* repcode detected, let's take it */ const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; - mlen = (U32)ZSTD_count_2segments(anchor+MINMATCHOPT, repMatch+MINMATCHOPT, iend, repEnd, prefixStart) + MINMATCHOPT; + mlen = (U32)ZSTD_count_2segments(anchor+minMatch, repMatch+minMatch, iend, repEnd, prefixStart) + minMatch; offset = rep_2; rep_2 = rep_1; rep_1 = offset; /* swap offset history */ ZSTD_LOG_ENCODE("%d/%d: ENCODE REP literals=%d mlen=%d off=%d rep1=%d rep2=%d\n", (int)(anchor-base), (int)(iend-base), (int)(0), (int)best_mlen, (int)(0), (int)rep_1, (int)rep_2); - ZSTD_updatePrice(seqStorePtr, 0, anchor, 0, mlen-MINMATCHOPT); - ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, mlen-MINMATCHOPT); + ZSTD_updatePrice(seqStorePtr, 0, anchor, 0, mlen-minMatch); + ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, mlen-minMatch); anchor += mlen; continue; /* faster when present ... (?) */ } diff --git a/lib/zstd_opt_internal.h b/lib/zstd_opt_internal.h index b980eab0..6dd5f516 100644 --- a/lib/zstd_opt_internal.h +++ b/lib/zstd_opt_internal.h @@ -168,39 +168,14 @@ static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_CCtx* zc, const BYTE* ip) return hashTable3[ZSTD_hash3Ptr(ip, hashLog3)]; } - -#define MINMATCHOPT 4 -#define MEM_readMINMATCH(ptr) (U32)(MEM_read32(ptr)) -#define ZSTD_GETPRICE ZSTD_getPrice4 -#define ZSTD_INSERTBTANDGETALLMATCHES ZSTD_insertBtAndGetAllMatches4 -#define ZSTD_BTGETALLMATCHES ZSTD_BtGetAllMatches4 -#define ZSTD_BTGETALLMATCHES_SELECTMLS ZSTD_BtGetAllMatches_selectMLS4 -#define ZSTD_BTGETALLMATCHES_EXTDICT ZSTD_BtGetAllMatches_extDict4 -#define ZSTD_BTGETALLMATCHES_SELECTMLS_EXTDICT ZSTD_BtGetAllMatches_selectMLS_extDict4 -#define ZSTD_COMPRESSBLOCK_OPT_GENERIC ZSTD_compressBlock_opt_generic4 -#define ZSTD_COMPRESSBLOCK_OPT_EXTDICT_GENERIC ZSTD_compressBlock_opt_extDict_generic4 -#include "zstd_opt.h" -#undef MINMATCHOPT -#undef MEM_readMINMATCH -#undef ZSTD_GETPRICE -#undef ZSTD_INSERTBTANDGETALLMATCHES -#undef ZSTD_BTGETALLMATCHES -#undef ZSTD_BTGETALLMATCHES_SELECTMLS -#undef ZSTD_BTGETALLMATCHES_EXTDICT -#undef ZSTD_BTGETALLMATCHES_SELECTMLS_EXTDICT -#undef ZSTD_COMPRESSBLOCK_OPT_GENERIC -#undef ZSTD_COMPRESSBLOCK_OPT_EXTDICT_GENERIC - -#define MINMATCHOPT 3 -#define MEM_readMINMATCH(ptr) ((U32)(MEM_read32(ptr)<<8)) -#define ZSTD_GETPRICE ZSTD_getPrice3 -#define ZSTD_INSERTBTANDGETALLMATCHES ZSTD_insertBtAndGetAllMatches3 -#define ZSTD_BTGETALLMATCHES ZSTD_BtGetAllMatches3 -#define ZSTD_BTGETALLMATCHES_SELECTMLS ZSTD_BtGetAllMatches_selectMLS3 -#define ZSTD_BTGETALLMATCHES_EXTDICT ZSTD_BtGetAllMatches_extDict3 -#define ZSTD_BTGETALLMATCHES_SELECTMLS_EXTDICT ZSTD_BtGetAllMatches_selectMLS_extDict3 -#define ZSTD_COMPRESSBLOCK_OPT_GENERIC ZSTD_compressBlock_opt_generic3 -#define ZSTD_COMPRESSBLOCK_OPT_EXTDICT_GENERIC ZSTD_compressBlock_opt_extDict_generic3 +#define ZSTD_GETPRICE ZSTD_getPrice +#define ZSTD_INSERTBTANDGETALLMATCHES ZSTD_insertBtAndGetAllMatches +#define ZSTD_BTGETALLMATCHES ZSTD_BtGetAllMatches +#define ZSTD_BTGETALLMATCHES_SELECTMLS ZSTD_BtGetAllMatches_selectMLS +#define ZSTD_BTGETALLMATCHES_EXTDICT ZSTD_BtGetAllMatches_extDict +#define ZSTD_BTGETALLMATCHES_SELECTMLS_EXTDICT ZSTD_BtGetAllMatches_selectMLS_extDict +#define ZSTD_COMPRESSBLOCK_OPT_GENERIC ZSTD_compressBlock_opt_generic +#define ZSTD_COMPRESSBLOCK_OPT_EXTDICT_GENERIC ZSTD_compressBlock_opt_extDict_generic #include "zstd_opt.h" From 944d0d22852696d25e0b749961ab25236a92d45f Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Fri, 4 Mar 2016 19:26:59 +0100 Subject: [PATCH 038/247] updated fse version --- lib/fse.c | 53 +++++++++--------- lib/fse.h | 164 ++++++++++++++++++++++++------------------------------ 2 files changed, 100 insertions(+), 117 deletions(-) diff --git a/lib/fse.c b/lib/fse.c index 986a0da1..a445f328 100644 --- a/lib/fse.c +++ b/lib/fse.c @@ -628,12 +628,12 @@ size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr, /*-************************************************************** * FSE Compression Code ****************************************************************/ -/*! -FSE_CTable is a variable size structure which contains : - U16 tableLog; - U16 maxSymbolValue; - U16 nextStateNumber[1 << tableLog]; // This size is variable - FSE_symbolCompressionTransform symbolTT[maxSymbolValue+1]; // This size is variable +/*! FSE_sizeof_CTable() : + FSE_CTable is a variable size structure which contains : + `U16 tableLog;` + `U16 maxSymbolValue;` + `U16 nextStateNumber[1 << tableLog];` // This size is variable + `FSE_symbolCompressionTransform symbolTT[maxSymbolValue+1];` // This size is variable Allocation is manual, since C standard does not support variable-size structures. */ @@ -654,10 +654,7 @@ FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog) return (FSE_CTable*)malloc(size); } -void FSE_freeCTable (FSE_CTable* ct) -{ - free(ct); -} +void FSE_freeCTable (FSE_CTable* ct) { free(ct); } /* provides the minimum logSize to safely represent a distribution */ static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue) @@ -888,31 +885,32 @@ static size_t FSE_compress_usingCTable_generic (void* dst, size_t dstSize, const FSE_CTable* ct, const unsigned fast) { const BYTE* const istart = (const BYTE*) src; - const BYTE* ip; const BYTE* const iend = istart + srcSize; + const BYTE* ip=iend; size_t errorCode; BIT_CStream_t bitC; FSE_CState_t CState1, CState2; - /* init */ + if (srcSize <= 2) return 0; errorCode = BIT_initCStream(&bitC, dst, dstSize); if (FSE_isError(errorCode)) return 0; - FSE_initCState(&CState1, ct); - CState2 = CState1; - - ip=iend; #define FSE_FLUSHBITS(s) (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s)) - /* join to even */ if (srcSize & 1) { + FSE_initCState2(&CState1, ct, *--ip); + FSE_initCState2(&CState2, ct, *--ip); FSE_encodeSymbol(&bitC, &CState1, *--ip); FSE_FLUSHBITS(&bitC); + } else { + FSE_initCState2(&CState2, ct, *--ip); + FSE_initCState2(&CState1, ct, *--ip); } /* join to mod 4 */ + srcSize -= 2; if ((sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) && (srcSize & 2)) { /* test bit 2 */ FSE_encodeSymbol(&bitC, &CState2, *--ip); FSE_encodeSymbol(&bitC, &CState1, *--ip); @@ -1106,24 +1104,25 @@ FORCE_INLINE size_t FSE_decompress_usingDTable_generic( /* tail */ /* note : BIT_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly BIT_DStream_completed */ while (1) { - if ( (BIT_reloadDStream(&bitD)>BIT_DStream_completed) || (op==omax) || (BIT_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state1))) ) - break; + if (op>(omax-2)) return ERROR(dstSize_tooSmall); *op++ = FSE_GETSYMBOL(&state1); - if ( (BIT_reloadDStream(&bitD)>BIT_DStream_completed) || (op==omax) || (BIT_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state2))) ) + if (BIT_reloadDStream(&bitD)==BIT_DStream_overflow) { + *op++ = FSE_GETSYMBOL(&state2); break; + } + + if (op>(omax-2)) return ERROR(dstSize_tooSmall); *op++ = FSE_GETSYMBOL(&state2); - } - /* end ? */ - if (BIT_endOfDStream(&bitD) && FSE_endOfDState(&state1) && FSE_endOfDState(&state2)) - return op-ostart; + if (BIT_reloadDStream(&bitD)==BIT_DStream_overflow) { + *op++ = FSE_GETSYMBOL(&state1); + break; + } } - if (op==omax) return ERROR(dstSize_tooSmall); /* dst buffer is full, but cSrc unfinished */ - - return ERROR(corruption_detected); + return op-ostart; } diff --git a/lib/fse.h b/lib/fse.h index db6f49cf..6dce6830 100644 --- a/lib/fse.h +++ b/lib/fse.h @@ -1,7 +1,7 @@ /* ****************************************************************** - FSE : Finite State Entropy coder - header file - Copyright (C) 2013-2015, Yann Collet. + FSE : Finite State Entropy codec + Public Prototypes declaration + Copyright (C) 2013-2016, Yann Collet. BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) @@ -30,7 +30,6 @@ You can contact the author at : - Source repository : https://github.com/Cyan4973/FiniteStateEntropy - - Public forum : https://groups.google.com/forum/#!forum/lz4c ****************************************************************** */ #ifndef FSE_H #define FSE_H @@ -40,8 +39,8 @@ extern "C" { #endif -/* ***************************************** -* Includes +/*-***************************************** +* Dependencies ******************************************/ #include /* size_t, ptrdiff_t */ @@ -49,32 +48,32 @@ extern "C" { /*-**************************************** * FSE simple functions ******************************************/ -size_t FSE_compress(void* dst, size_t maxDstSize, - const void* src, size_t srcSize); -size_t FSE_decompress(void* dst, size_t maxDstSize, - const void* cSrc, size_t cSrcSize); -/*! -FSE_compress(): +/*! FSE_compress() : Compress content of buffer 'src', of size 'srcSize', into destination buffer 'dst'. - 'dst' buffer must be already allocated. Compression runs faster is maxDstSize >= FSE_compressBound(srcSize) - return : size of compressed data (<= maxDstSize) + 'dst' buffer must be already allocated. Compression runs faster is dstCapacity >= FSE_compressBound(srcSize). + @return : size of compressed data (<= dstCapacity). Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!! if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression instead. if FSE_isError(return), compression failed (more details using FSE_getErrorName()) +*/ +size_t FSE_compress(void* dst, size_t dstCapacity, + const void* src, size_t srcSize); -FSE_decompress(): +/*! FSE_decompress(): Decompress FSE data from buffer 'cSrc', of size 'cSrcSize', - into already allocated destination buffer 'dst', of size 'maxDstSize'. - return : size of regenerated data (<= maxDstSize) - or an error code, which can be tested using FSE_isError() + into already allocated destination buffer 'dst', of size 'dstCapacity'. + @return : size of regenerated data (<= maxDstSize), + or an error code, which can be tested using FSE_isError() . - ** Important ** : FSE_decompress() doesn't decompress non-compressible nor RLE data !!! + ** Important ** : FSE_decompress() does not decompress non-compressible nor RLE data !!! Why ? : making this distinction requires a header. Header management is intentionally delegated to the user layer, which can better manage special cases. */ +size_t FSE_decompress(void* dst, size_t dstCapacity, + const void* cSrc, size_t cSrcSize); -/* ***************************************** +/*-***************************************** * Tool functions ******************************************/ size_t FSE_compressBound(size_t size); /* maximum compressed size */ @@ -84,14 +83,13 @@ unsigned FSE_isError(size_t code); /* tells if a return value is an er const char* FSE_getErrorName(size_t code); /* provides error code string (useful for debugging) */ -/* ***************************************** +/*-***************************************** * FSE advanced functions ******************************************/ -/*! -FSE_compress2(): +/*! FSE_compress2() : Same as FSE_compress(), but allows the selection of 'maxSymbolValue' and 'tableLog' Both parameters can be defined as '0' to mean : use default value - return : size of compressed data + @return : size of compressed data Special values : if return == 0, srcData is not compressible => Nothing is stored within cSrc !!! if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression. if FSE_isError(return), it's an error code. @@ -99,7 +97,7 @@ FSE_compress2(): size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog); -/* ***************************************** +/*-***************************************** * FSE detailed API ******************************************/ /*! @@ -122,65 +120,56 @@ or to save and provide normalized distribution using external method. /* *** COMPRESSION *** */ -/*! -FSE_count(): - Provides the precise count of each byte within a table 'count' - 'count' is a table of unsigned int, of minimum size (*maxSymbolValuePtr+1). - *maxSymbolValuePtr will be updated if detected smaller than initial value. - @return : the count of the most frequent symbol (which is not identified) - if return == srcSize, there is only one symbol. - Can also return an error code, which can be tested with FSE_isError() */ +/*! FSE_count(): + Provides the precise count of each byte within a table 'count'. + 'count' is a table of unsigned int, of minimum size (*maxSymbolValuePtr+1). + *maxSymbolValuePtr will be updated if detected smaller than initial value. + @return : the count of the most frequent symbol (which is not identified). + if return == srcSize, there is only one symbol. + Can also return an error code, which can be tested with FSE_isError(). */ size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize); -/*! -FSE_optimalTableLog(): - dynamically downsize 'tableLog' when conditions are met. - It saves CPU time, by using smaller tables, while preserving or even improving compression ratio. - return : recommended tableLog (necessarily <= initial 'tableLog') */ +/*! FSE_optimalTableLog(): + dynamically downsize 'tableLog' when conditions are met. + It saves CPU time, by using smaller tables, while preserving or even improving compression ratio. + @return : recommended tableLog (necessarily <= initial 'tableLog') */ unsigned FSE_optimalTableLog(unsigned tableLog, size_t srcSize, unsigned maxSymbolValue); -/*! -FSE_normalizeCount(): - normalize counters so that sum(count[]) == Power_of_2 (2^tableLog) - 'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1). - return : tableLog, - or an errorCode, which can be tested using FSE_isError() */ +/*! FSE_normalizeCount(): + normalize counts so that sum(count[]) == Power_of_2 (2^tableLog) + 'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1). + @return : tableLog, + or an errorCode, which can be tested using FSE_isError() */ size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog, const unsigned* count, size_t srcSize, unsigned maxSymbolValue); -/*! -FSE_NCountWriteBound(): - Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog' - Typically useful for allocation purpose. */ +/*! FSE_NCountWriteBound(): + Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'. + Typically useful for allocation purpose. */ size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog); -/*! -FSE_writeNCount(): - Compactly save 'normalizedCounter' into 'buffer'. - return : size of the compressed table - or an errorCode, which can be tested using FSE_isError() */ +/*! FSE_writeNCount(): + Compactly save 'normalizedCounter' into 'buffer'. + @return : size of the compressed table, + or an errorCode, which can be tested using FSE_isError(). */ size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog); -/*! -Constructor and Destructor of type FSE_CTable - Note that its size depends on 'tableLog' and 'maxSymbolValue' */ +/*! Constructor and Destructor of FSE_CTable. + Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */ typedef unsigned FSE_CTable; /* don't allocate that. It's only meant to be more restrictive than void* */ FSE_CTable* FSE_createCTable (unsigned tableLog, unsigned maxSymbolValue); void FSE_freeCTable (FSE_CTable* ct); -/*! -FSE_buildCTable(): - Builds @ct, which must be already allocated, using FSE_createCTable() - return : 0 - or an errorCode, which can be tested using FSE_isError() */ +/*! FSE_buildCTable(): + Builds `ct`, which must be already allocated, using FSE_createCTable(). + @return : 0, or an errorCode, which can be tested using FSE_isError() */ size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog); -/*! -FSE_compress_usingCTable(): - Compress @src using @ct into @dst which must be already allocated - return : size of compressed data (<= @dstCapacity) - or 0 if compressed data could not fit into @dst - or an errorCode, which can be tested using FSE_isError() */ +/*! FSE_compress_usingCTable(): + Compress `src` using `ct` into `dst` which must be already allocated. + @return : size of compressed data (<= `dstCapacity`), + or 0 if compressed data could not fit into `dst`, + or an errorCode, which can be tested using FSE_isError() */ size_t FSE_compress_usingCTable (void* dst, size_t dstCapacity, const void* src, size_t srcSize, const FSE_CTable* ct); /*! @@ -221,7 +210,7 @@ If there is an error, both functions will return an ErrorCode (which can be test 'CTable' can then be used to compress 'src', with FSE_compress_usingCTable(). Similar to FSE_count(), the convention is that 'src' is assumed to be a table of char of size 'srcSize' -The function returns the size of compressed data (without header), necessarily <= @dstCapacity. +The function returns the size of compressed data (without header), necessarily <= `dstCapacity`. If it returns '0', compressed data could not fit into 'dst'. If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()). */ @@ -229,34 +218,29 @@ If there is an error, the function will return an ErrorCode (which can be tested /* *** DECOMPRESSION *** */ -/*! -FSE_readNCount(): - Read compactly saved 'normalizedCounter' from 'rBuffer'. - return : size read from 'rBuffer' - or an errorCode, which can be tested using FSE_isError() - maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */ +/*! FSE_readNCount(): + Read compactly saved 'normalizedCounter' from 'rBuffer'. + @return : size read from 'rBuffer', + or an errorCode, which can be tested using FSE_isError(). + maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, const void* rBuffer, size_t rBuffSize); -/*! -Constructor and Destructor of type FSE_DTable +/*! Constructor and Destructor of FSE_DTable. Note that its size depends on 'tableLog' */ typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */ FSE_DTable* FSE_createDTable(unsigned tableLog); void FSE_freeDTable(FSE_DTable* dt); -/*! -FSE_buildDTable(): - Builds 'dt', which must be already allocated, using FSE_createDTable() - return : 0, - or an errorCode, which can be tested using FSE_isError() */ +/*! FSE_buildDTable(): + Builds 'dt', which must be already allocated, using FSE_createDTable(). + return : 0, or an errorCode, which can be tested using FSE_isError() */ size_t FSE_buildDTable (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog); -/*! -FSE_decompress_usingDTable(): - Decompress compressed source @cSrc of size @cSrcSize using @dt - into @dst which must be already allocated. - return : size of regenerated data (necessarily <= @dstCapacity) - or an errorCode, which can be tested using FSE_isError() */ +/*! FSE_decompress_usingDTable(): + Decompress compressed source `cSrc` of size `cSrcSize` using `dt` + into `dst` which must be already allocated. + @return : size of regenerated data (necessarily <= `dstCapacity`), + or an errorCode, which can be tested using FSE_isError() */ size_t FSE_decompress_usingDTable(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, const FSE_DTable* dt); /*! @@ -281,9 +265,9 @@ This is performed by the function FSE_buildDTable(). The space required by 'FSE_DTable' must be already allocated using FSE_createDTable(). If there is an error, the function will return an error code, which can be tested using FSE_isError(). -'FSE_DTable' can then be used to decompress 'cSrc', with FSE_decompress_usingDTable(). -'cSrcSize' must be strictly correct, otherwise decompression will fail. -FSE_decompress_usingDTable() result will tell how many bytes were regenerated (<=maxDstSize). +`FSE_DTable` can then be used to decompress `cSrc`, with FSE_decompress_usingDTable(). +`cSrcSize` must be strictly correct, otherwise decompression will fail. +FSE_decompress_usingDTable() result will tell how many bytes were regenerated (<=`dstCapacity`). If there is an error, the function will return an error code, which can be tested using FSE_isError(). (ex: dst buffer too small) */ From 805d2a7465bd921cd5b2d0044875bb636ffc2952 Mon Sep 17 00:00:00 2001 From: inikep Date: Fri, 4 Mar 2016 19:31:57 +0100 Subject: [PATCH 039/247] removed zstd_opt_internal.h --- lib/zstd_compress.c | 2 +- lib/zstd_opt.h | 209 +++++++++++++++++++++++++++++++++------- lib/zstd_opt_internal.h | 182 ---------------------------------- 3 files changed, 173 insertions(+), 220 deletions(-) delete mode 100644 lib/zstd_opt_internal.h diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index 637ffb98..8f14ff03 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -1380,7 +1380,7 @@ static void ZSTD_updateTree_extDict(ZSTD_CCtx* zc, const BYTE* const ip, const B while (idx < target) idx += ZSTD_insertBt1(zc, base+idx, mls, iend, nbCompares, 1); } -#include "zstd_opt_internal.h" +#include "zstd_opt.h" /** Tree updater, providing best match */ static size_t ZSTD_BtFindBestMatch_extDict ( diff --git a/lib/zstd_opt.h b/lib/zstd_opt.h index 66700514..f6f9acec 100644 --- a/lib/zstd_opt.h +++ b/lib/zstd_opt.h @@ -31,10 +31,83 @@ - Zstd source repository : https://www.zstd.net */ -/* Note : this file is intended to be included within zstd_opt_internal.h */ +/* Note : this file is intended to be included within zstd_compress.c */ -FORCE_INLINE U32 ZSTD_GETPRICE(seqStore_t* seqStorePtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength) +#define ZSTD_FREQ_DIV 5 + +/*-************************************* +* Price functions for optimal parser +***************************************/ +MEM_STATIC void ZSTD_rescaleFreqs(seqStore_t* ssPtr) +{ + unsigned u; + + if (ssPtr->litLengthSum == 0) { + ssPtr->litSum = 2*(1<litLengthSum = 1*(1<matchLengthSum = 1*(1<offCodeSum = 1*(1<matchSum = 2*(1<litFreq[u] = 2; + for (u=0; u<=MaxLL; u++) + ssPtr->litLengthFreq[u] = 1; + for (u=0; u<=MaxML; u++) + ssPtr->matchLengthFreq[u] = 1; + for (u=0; u<=MaxOff; u++) + ssPtr->offCodeFreq[u] = 1; + } else { + ssPtr->matchLengthSum = 0; + ssPtr->litLengthSum = 0; + ssPtr->offCodeSum = 0; + ssPtr->matchSum = 0; + ssPtr->litSum = 0; + + for (u=0; u<=MaxLit; u++) { + ssPtr->litFreq[u] = 1 + (ssPtr->litFreq[u]>>ZSTD_FREQ_DIV); + ssPtr->litSum += ssPtr->litFreq[u]; + } + for (u=0; u<=MaxLL; u++) { + ssPtr->litLengthFreq[u] = 1 + (ssPtr->litLengthFreq[u]>>ZSTD_FREQ_DIV); + ssPtr->litLengthSum += ssPtr->litLengthFreq[u]; + } + for (u=0; u<=MaxML; u++) { + ssPtr->matchLengthFreq[u] = 1 + (ssPtr->matchLengthFreq[u]>>ZSTD_FREQ_DIV); + ssPtr->matchLengthSum += ssPtr->matchLengthFreq[u]; + ssPtr->matchSum += ssPtr->matchLengthFreq[u] * (u + 3); + } + for (u=0; u<=MaxOff; u++) { + ssPtr->offCodeFreq[u] = 1 + (ssPtr->offCodeFreq[u]>>ZSTD_FREQ_DIV); + ssPtr->offCodeSum += ssPtr->offCodeFreq[u]; + } + } +} + + +FORCE_INLINE U32 ZSTD_getLiteralPrice(seqStore_t* seqStorePtr, U32 litLength, const BYTE* literals) +{ + U32 price, u; + + if (litLength == 0) + return ZSTD_highbit(seqStorePtr->litLengthSum+1) - ZSTD_highbit(seqStorePtr->litLengthFreq[0]+1); + + /* literals */ + price = litLength * ZSTD_highbit(seqStorePtr->litSum+1); + for (u=0; u < litLength; u++) + price -= ZSTD_highbit(seqStorePtr->litFreq[literals[u]]+1); + + /* literal Length */ + price += ((litLength >= MaxLL)<<3) + ((litLength >= 255+MaxLL)<<4) + ((litLength>=(1<<15))<<3); + if (litLength >= MaxLL) litLength = MaxLL; + price += ZSTD_highbit(seqStorePtr->litLengthSum+1) - ZSTD_highbit(seqStorePtr->litLengthFreq[litLength]+1); + + return price; +} + + +FORCE_INLINE U32 ZSTD_getPrice(seqStore_t* seqStorePtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength) { /* offset */ BYTE offCode = offset ? (BYTE)ZSTD_highbit(offset+1) + 1 : 0; @@ -63,10 +136,72 @@ FORCE_INLINE U32 ZSTD_GETPRICE(seqStore_t* seqStorePtr, U32 litLength, const BYT } +MEM_STATIC void ZSTD_updatePrice(seqStore_t* seqStorePtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength) +{ + U32 u; + + /* literals */ + seqStorePtr->litSum += litLength; + for (u=0; u < litLength; u++) + seqStorePtr->litFreq[literals[u]]++; + + /* literal Length */ + seqStorePtr->litLengthSum++; + if (litLength >= MaxLL) + seqStorePtr->litLengthFreq[MaxLL]++; + else + seqStorePtr->litLengthFreq[litLength]++; + + /* match offset */ + seqStorePtr->offCodeSum++; + BYTE offCode = offset ? (BYTE)ZSTD_highbit(offset+1) + 1 : 0; + seqStorePtr->offCodeFreq[offCode]++; + + /* match Length */ + seqStorePtr->matchLengthSum++; + if (matchLength >= MaxML) + seqStorePtr->matchLengthFreq[MaxML]++; + else + seqStorePtr->matchLengthFreq[matchLength]++; +} + + +#define SET_PRICE(pos, mlen_, offset_, litlen_, price_) \ + { \ + while (last_pos < pos) { opt[last_pos+1].price = 1<<30; last_pos++; } \ + opt[pos].mlen = mlen_; \ + opt[pos].off = offset_; \ + opt[pos].litlen = litlen_; \ + opt[pos].price = price_; \ + ZSTD_LOG_PARSER("%d: SET price[%d/%d]=%d litlen=%d len=%d off=%d\n", (int)(inr-base), (int)pos, (int)last_pos, opt[pos].price, opt[pos].litlen, opt[pos].mlen, opt[pos].off); \ + } + + + /*-************************************* * Binary Tree search ***************************************/ -static U32 ZSTD_INSERTBTANDGETALLMATCHES ( +/* Update hashTable3 up to ip (excluded) + Assumption : always within prefix (ie. not within extDict) */ +static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_CCtx* zc, const BYTE* ip) +{ + U32* const hashTable3 = zc->hashTable3; + const U32 hashLog3 = zc->params.hashLog3; + const BYTE* const base = zc->base; + const U32 target = (U32)(ip - base); + U32 idx = zc->nextToUpdate3; + + while(idx < target) { + hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx; + idx++; + } + + zc->nextToUpdate3 = target; + return hashTable3[ZSTD_hash3Ptr(ip, hashLog3)]; +} + + +static U32 ZSTD_insertBtAndGetAllMatches ( ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iLimit, U32 nbCompares, const U32 mls, @@ -191,18 +326,18 @@ static U32 ZSTD_INSERTBTANDGETALLMATCHES ( /** Tree updater, providing best match */ -static U32 ZSTD_BTGETALLMATCHES ( +static U32 ZSTD_BtGetAllMatches ( ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iLimit, const U32 maxNbAttempts, const U32 mls, ZSTD_match_t* matches) { if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */ ZSTD_updateTree(zc, ip, iLimit, maxNbAttempts, mls); - return ZSTD_INSERTBTANDGETALLMATCHES(zc, ip, iLimit, maxNbAttempts, mls, 0, matches); + return ZSTD_insertBtAndGetAllMatches(zc, ip, iLimit, maxNbAttempts, mls, 0, matches); } -static U32 ZSTD_BTGETALLMATCHES_SELECTMLS ( +static U32 ZSTD_BtGetAllMatches_selectMLS ( ZSTD_CCtx* zc, /* Index table will be updated */ const BYTE* ip, const BYTE* const iLowLimit, const BYTE* const iHighLimit, const U32 maxNbAttempts, const U32 matchLengthSearch, ZSTD_match_t* matches) @@ -211,25 +346,25 @@ static U32 ZSTD_BTGETALLMATCHES_SELECTMLS ( switch(matchLengthSearch) { default : - case 4 : return ZSTD_BTGETALLMATCHES(zc, ip, iHighLimit, maxNbAttempts, 4, matches); - case 5 : return ZSTD_BTGETALLMATCHES(zc, ip, iHighLimit, maxNbAttempts, 5, matches); - case 6 : return ZSTD_BTGETALLMATCHES(zc, ip, iHighLimit, maxNbAttempts, 6, matches); + case 4 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 4, matches); + case 5 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 5, matches); + case 6 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 6, matches); } } /** Tree updater, providing best match */ -static U32 ZSTD_BTGETALLMATCHES_EXTDICT ( +static U32 ZSTD_BtGetAllMatches_extDict ( ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iLimit, const U32 maxNbAttempts, const U32 mls, ZSTD_match_t* matches) { if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */ ZSTD_updateTree_extDict(zc, ip, iLimit, maxNbAttempts, mls); - return ZSTD_INSERTBTANDGETALLMATCHES(zc, ip, iLimit, maxNbAttempts, mls, 1, matches); + return ZSTD_insertBtAndGetAllMatches(zc, ip, iLimit, maxNbAttempts, mls, 1, matches); } -static U32 ZSTD_BTGETALLMATCHES_SELECTMLS_EXTDICT ( +static U32 ZSTD_BtGetAllMatches_selectMLS_extDict ( ZSTD_CCtx* zc, /* Index table will be updated */ const BYTE* ip, const BYTE* const iLowLimit, const BYTE* const iHighLimit, const U32 maxNbAttempts, const U32 matchLengthSearch, ZSTD_match_t* matches) @@ -238,9 +373,9 @@ static U32 ZSTD_BTGETALLMATCHES_SELECTMLS_EXTDICT ( switch(matchLengthSearch) { default : - case 4 : return ZSTD_BTGETALLMATCHES_EXTDICT(zc, ip, iHighLimit, maxNbAttempts, 4, matches); - case 5 : return ZSTD_BTGETALLMATCHES_EXTDICT(zc, ip, iHighLimit, maxNbAttempts, 5, matches); - case 6 : return ZSTD_BTGETALLMATCHES_EXTDICT(zc, ip, iHighLimit, maxNbAttempts, 6, matches); + case 4 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 4, matches); + case 5 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 5, matches); + case 6 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 6, matches); } } @@ -249,7 +384,7 @@ static U32 ZSTD_BTGETALLMATCHES_SELECTMLS_EXTDICT ( * Optimal parser *********************************/ FORCE_INLINE -void ZSTD_COMPRESSBLOCK_OPT_GENERIC(ZSTD_CCtx* ctx, +void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, const void* src, size_t srcSize, const U32 depth) { @@ -307,14 +442,14 @@ void ZSTD_COMPRESSBLOCK_OPT_GENERIC(ZSTD_CCtx* ctx, litlen = opt[0].litlen + 1; do { - price = ZSTD_GETPRICE(seqStorePtr, litlen, litstart, 0, mlen - minMatch); + price = ZSTD_getPrice(seqStorePtr, litlen, litstart, 0, mlen - minMatch); if (mlen + 1 > last_pos || price < opt[mlen + 1].price) SET_PRICE(mlen + 1, mlen, 0, litlen, price); /* note : macro modifies last_pos */ mlen--; } while (mlen >= minMatch); } - match_num = ZSTD_BTGETALLMATCHES_SELECTMLS(ctx, ip, ip, iend, maxSearches, mls, matches); /* first search (depth 0) */ + match_num = ZSTD_BtGetAllMatches_selectMLS(ctx, ip, ip, iend, maxSearches, mls, matches); /* first search (depth 0) */ ZSTD_LOG_PARSER("%d: match_num=%d last_pos=%d\n", (int)(ip-base), match_num, last_pos); if (!last_pos && !match_num) { ip++; continue; } @@ -340,7 +475,7 @@ void ZSTD_COMPRESSBLOCK_OPT_GENERIC(ZSTD_CCtx* ctx, ZSTD_LOG_PARSER("%d: start Found mlen=%d off=%d best_mlen=%d last_pos=%d\n", (int)(ip-base), matches[u].len, matches[u].off, (int)best_mlen, (int)last_pos); litlen = opt[0].litlen; while (mlen <= best_mlen) { - price = ZSTD_GETPRICE(seqStorePtr, litlen, litstart, matches[u].off, mlen - minMatch); + price = ZSTD_getPrice(seqStorePtr, litlen, litstart, matches[u].off, mlen - minMatch); if (mlen > last_pos || price < opt[mlen].price) SET_PRICE(mlen, mlen, matches[u].off, litlen, price); mlen++; @@ -416,12 +551,12 @@ void ZSTD_COMPRESSBLOCK_OPT_GENERIC(ZSTD_CCtx* ctx, if (opt[cur].mlen == 1) { litlen = opt[cur].litlen; if (cur > litlen) { - price = opt[cur - litlen].price + ZSTD_GETPRICE(seqStorePtr, litlen, inr-litlen, 0, mlen - minMatch); + price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr-litlen, 0, mlen - minMatch); } else - price = ZSTD_GETPRICE(seqStorePtr, litlen, litstart, 0, mlen - minMatch); + price = ZSTD_getPrice(seqStorePtr, litlen, litstart, 0, mlen - minMatch); } else { litlen = 0; - price = opt[cur].price + ZSTD_GETPRICE(seqStorePtr, 0, NULL, 0, mlen - minMatch); + price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, 0, mlen - minMatch); } best_mlen = mlen; @@ -434,7 +569,7 @@ void ZSTD_COMPRESSBLOCK_OPT_GENERIC(ZSTD_CCtx* ctx, } while (mlen >= minMatch); } - match_num = ZSTD_BTGETALLMATCHES_SELECTMLS(ctx, inr, ip, iend, maxSearches, mls, matches); + match_num = ZSTD_BtGetAllMatches_selectMLS(ctx, inr, ip, iend, maxSearches, mls, matches); ZSTD_LOG_PARSER("%d: ZSTD_GetAllMatches match_num=%d\n", (int)(inr-base), match_num); if (match_num > 0 && matches[match_num-1].len > sufficient_len) { @@ -457,12 +592,12 @@ void ZSTD_COMPRESSBLOCK_OPT_GENERIC(ZSTD_CCtx* ctx, if (opt[cur].mlen == 1) { litlen = opt[cur].litlen; if (cur > litlen) - price = opt[cur - litlen].price + ZSTD_GETPRICE(seqStorePtr, litlen, ip+cur-litlen, matches[u].off, mlen - minMatch); + price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, ip+cur-litlen, matches[u].off, mlen - minMatch); else - price = ZSTD_GETPRICE(seqStorePtr, litlen, litstart, matches[u].off, mlen - minMatch); + price = ZSTD_getPrice(seqStorePtr, litlen, litstart, matches[u].off, mlen - minMatch); } else { litlen = 0; - price = opt[cur].price + ZSTD_GETPRICE(seqStorePtr, 0, NULL, matches[u].off, mlen - minMatch); + price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, matches[u].off, mlen - minMatch); } // ZSTD_LOG_PARSER("%d: Found2 mlen=%d best_mlen=%d off=%d price=%d litlen=%d\n", (int)(inr-base), mlen, best_mlen, matches[u].off, price, litlen); @@ -570,7 +705,7 @@ _storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */ FORCE_INLINE -void ZSTD_COMPRESSBLOCK_OPT_EXTDICT_GENERIC(ZSTD_CCtx* ctx, +void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, const void* src, size_t srcSize, const U32 depth) { @@ -637,7 +772,7 @@ void ZSTD_COMPRESSBLOCK_OPT_EXTDICT_GENERIC(ZSTD_CCtx* ctx, litlen = opt[0].litlen + 1; do { - price = ZSTD_GETPRICE(seqStorePtr, litlen, litstart, 0, mlen - minMatch); + price = ZSTD_getPrice(seqStorePtr, litlen, litstart, 0, mlen - minMatch); if (mlen + 1 > last_pos || price < opt[mlen + 1].price) SET_PRICE(mlen + 1, mlen, 0, litlen, price); mlen--; @@ -646,7 +781,7 @@ void ZSTD_COMPRESSBLOCK_OPT_EXTDICT_GENERIC(ZSTD_CCtx* ctx, best_mlen = (last_pos) ? last_pos : minMatch; - match_num = ZSTD_BTGETALLMATCHES_SELECTMLS_EXTDICT(ctx, ip, ip, iend, maxSearches, mls, matches); /* first search (depth 0) */ + match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, ip, ip, iend, maxSearches, mls, matches); /* first search (depth 0) */ ZSTD_LOG_PARSER("%d: match_num=%d last_pos=%d\n", (int)(ip-base), match_num, last_pos); if (!last_pos && !match_num) { ip++; continue; } @@ -670,7 +805,7 @@ void ZSTD_COMPRESSBLOCK_OPT_EXTDICT_GENERIC(ZSTD_CCtx* ctx, ZSTD_LOG_PARSER("%d: start Found mlen=%d off=%d best_mlen=%d last_pos=%d\n", (int)(ip-base), matches[u].len, matches[u].off, (int)best_mlen, (int)last_pos); litlen = opt[0].litlen; while (mlen <= best_mlen) { - price = ZSTD_GETPRICE(seqStorePtr, litlen, litstart, matches[u].off, mlen - minMatch); + price = ZSTD_getPrice(seqStorePtr, litlen, litstart, matches[u].off, mlen - minMatch); if (mlen > last_pos || price < opt[mlen].price) SET_PRICE(mlen, mlen, matches[u].off, litlen, price); mlen++; @@ -755,12 +890,12 @@ void ZSTD_COMPRESSBLOCK_OPT_EXTDICT_GENERIC(ZSTD_CCtx* ctx, if (opt[cur].mlen == 1) { litlen = opt[cur].litlen; if (cur > litlen) { - price = opt[cur - litlen].price + ZSTD_GETPRICE(seqStorePtr, litlen, inr-litlen, 0, mlen - minMatch); + price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr-litlen, 0, mlen - minMatch); } else - price = ZSTD_GETPRICE(seqStorePtr, litlen, litstart, 0, mlen - minMatch); + price = ZSTD_getPrice(seqStorePtr, litlen, litstart, 0, mlen - minMatch); } else { litlen = 0; - price = opt[cur].price + ZSTD_GETPRICE(seqStorePtr, 0, NULL, 0, mlen - minMatch); + price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, 0, mlen - minMatch); } best_mlen = mlen; @@ -776,7 +911,7 @@ void ZSTD_COMPRESSBLOCK_OPT_EXTDICT_GENERIC(ZSTD_CCtx* ctx, best_mlen = (best_mlen > minMatch) ? best_mlen : minMatch; - match_num = ZSTD_BTGETALLMATCHES_SELECTMLS_EXTDICT(ctx, inr, ip, iend, maxSearches, mls, matches); + match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, inr, ip, iend, maxSearches, mls, matches); ZSTD_LOG_PARSER("%d: ZSTD_GetAllMatches match_num=%d\n", (int)(inr-base), match_num); if (match_num > 0 && matches[match_num-1].len > sufficient_len) { @@ -797,12 +932,12 @@ void ZSTD_COMPRESSBLOCK_OPT_EXTDICT_GENERIC(ZSTD_CCtx* ctx, if (opt[cur].mlen == 1) { litlen = opt[cur].litlen; if (cur > litlen) - price = opt[cur - litlen].price + ZSTD_GETPRICE(seqStorePtr, litlen, ip+cur-litlen, matches[u].off, mlen - minMatch); + price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, ip+cur-litlen, matches[u].off, mlen - minMatch); else - price = ZSTD_GETPRICE(seqStorePtr, litlen, litstart, matches[u].off, mlen - minMatch); + price = ZSTD_getPrice(seqStorePtr, litlen, litstart, matches[u].off, mlen - minMatch); } else { litlen = 0; - price = opt[cur].price + ZSTD_GETPRICE(seqStorePtr, 0, NULL, matches[u].off, mlen - minMatch); + price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, matches[u].off, mlen - minMatch); } // ZSTD_LOG_PARSER("%d: Found2 mlen=%d best_mlen=%d off=%d price=%d litlen=%d\n", (int)(inr-base), mlen, best_mlen, matches[u].off, price, litlen); diff --git a/lib/zstd_opt_internal.h b/lib/zstd_opt_internal.h deleted file mode 100644 index 6dd5f516..00000000 --- a/lib/zstd_opt_internal.h +++ /dev/null @@ -1,182 +0,0 @@ -/* - zstd_opt_internal - common optimal parser functions to include - Header File for include - Copyright (C) 2016, Przemyslaw Skibinski, Yann Collet. - - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - zstd source repository : https://github.com/Cyan4973/zstd -*/ - -/* Note : this file is intended to be included within zstd_compress.c */ - -#ifndef ZSTD_OPT_INTERNAL_H_MODULE -#define ZSTD_OPT_INTERNAL_H_MODULE - - -/*-******************************************* -* The optimal parser -*********************************************/ -/*- Constants -*/ -#define ZSTD_FREQ_DIV 5 - - -MEM_STATIC void ZSTD_rescaleFreqs(seqStore_t* ssPtr) -{ - unsigned u; - - if (ssPtr->litLengthSum == 0) { - ssPtr->litSum = 2*(1<litLengthSum = 1*(1<matchLengthSum = 1*(1<offCodeSum = 1*(1<matchSum = 2*(1<litFreq[u] = 2; - for (u=0; u<=MaxLL; u++) - ssPtr->litLengthFreq[u] = 1; - for (u=0; u<=MaxML; u++) - ssPtr->matchLengthFreq[u] = 1; - for (u=0; u<=MaxOff; u++) - ssPtr->offCodeFreq[u] = 1; - } else { - ssPtr->matchLengthSum = 0; - ssPtr->litLengthSum = 0; - ssPtr->offCodeSum = 0; - ssPtr->matchSum = 0; - ssPtr->litSum = 0; - - for (u=0; u<=MaxLit; u++) { - ssPtr->litFreq[u] = 1 + (ssPtr->litFreq[u]>>ZSTD_FREQ_DIV); - ssPtr->litSum += ssPtr->litFreq[u]; - } - for (u=0; u<=MaxLL; u++) { - ssPtr->litLengthFreq[u] = 1 + (ssPtr->litLengthFreq[u]>>ZSTD_FREQ_DIV); - ssPtr->litLengthSum += ssPtr->litLengthFreq[u]; - } - for (u=0; u<=MaxML; u++) { - ssPtr->matchLengthFreq[u] = 1 + (ssPtr->matchLengthFreq[u]>>ZSTD_FREQ_DIV); - ssPtr->matchLengthSum += ssPtr->matchLengthFreq[u]; - ssPtr->matchSum += ssPtr->matchLengthFreq[u] * (u + 3); - } - for (u=0; u<=MaxOff; u++) { - ssPtr->offCodeFreq[u] = 1 + (ssPtr->offCodeFreq[u]>>ZSTD_FREQ_DIV); - ssPtr->offCodeSum += ssPtr->offCodeFreq[u]; - } - } -} - - -MEM_STATIC void ZSTD_updatePrice(seqStore_t* seqStorePtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength) -{ - U32 u; - - /* literals */ - seqStorePtr->litSum += litLength; - for (u=0; u < litLength; u++) - seqStorePtr->litFreq[literals[u]]++; - - /* literal Length */ - seqStorePtr->litLengthSum++; - if (litLength >= MaxLL) - seqStorePtr->litLengthFreq[MaxLL]++; - else - seqStorePtr->litLengthFreq[litLength]++; - - /* match offset */ - seqStorePtr->offCodeSum++; - BYTE offCode = offset ? (BYTE)ZSTD_highbit(offset+1) + 1 : 0; - seqStorePtr->offCodeFreq[offCode]++; - - /* match Length */ - seqStorePtr->matchLengthSum++; - if (matchLength >= MaxML) - seqStorePtr->matchLengthFreq[MaxML]++; - else - seqStorePtr->matchLengthFreq[matchLength]++; -} - -FORCE_INLINE U32 ZSTD_getLiteralPrice(seqStore_t* seqStorePtr, U32 litLength, const BYTE* literals) -{ - U32 price, u; - - if (litLength == 0) - return ZSTD_highbit(seqStorePtr->litLengthSum+1) - ZSTD_highbit(seqStorePtr->litLengthFreq[0]+1); - - /* literals */ - price = litLength * ZSTD_highbit(seqStorePtr->litSum+1); - for (u=0; u < litLength; u++) - price -= ZSTD_highbit(seqStorePtr->litFreq[literals[u]]+1); - - /* literal Length */ - price += ((litLength >= MaxLL)<<3) + ((litLength >= 255+MaxLL)<<4) + ((litLength>=(1<<15))<<3); - if (litLength >= MaxLL) litLength = MaxLL; - price += ZSTD_highbit(seqStorePtr->litLengthSum+1) - ZSTD_highbit(seqStorePtr->litLengthFreq[litLength]+1); - - return price; -} - -#define SET_PRICE(pos, mlen_, offset_, litlen_, price_) \ - { \ - while (last_pos < pos) { opt[last_pos+1].price = 1<<30; last_pos++; } \ - opt[pos].mlen = mlen_; \ - opt[pos].off = offset_; \ - opt[pos].litlen = litlen_; \ - opt[pos].price = price_; \ - ZSTD_LOG_PARSER("%d: SET price[%d/%d]=%d litlen=%d len=%d off=%d\n", (int)(inr-base), (int)pos, (int)last_pos, opt[pos].price, opt[pos].litlen, opt[pos].mlen, opt[pos].off); \ - } - -/* Update hashTable3 up to ip (excluded) - Assumption : always within prefix (ie. not within extDict) */ -static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_CCtx* zc, const BYTE* ip) -{ - U32* const hashTable3 = zc->hashTable3; - const U32 hashLog3 = zc->params.hashLog3; - const BYTE* const base = zc->base; - const U32 target = (U32)(ip - base); - U32 idx = zc->nextToUpdate3; - - while(idx < target) { - hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx; - idx++; - } - - zc->nextToUpdate3 = target; - return hashTable3[ZSTD_hash3Ptr(ip, hashLog3)]; -} - -#define ZSTD_GETPRICE ZSTD_getPrice -#define ZSTD_INSERTBTANDGETALLMATCHES ZSTD_insertBtAndGetAllMatches -#define ZSTD_BTGETALLMATCHES ZSTD_BtGetAllMatches -#define ZSTD_BTGETALLMATCHES_SELECTMLS ZSTD_BtGetAllMatches_selectMLS -#define ZSTD_BTGETALLMATCHES_EXTDICT ZSTD_BtGetAllMatches_extDict -#define ZSTD_BTGETALLMATCHES_SELECTMLS_EXTDICT ZSTD_BtGetAllMatches_selectMLS_extDict -#define ZSTD_COMPRESSBLOCK_OPT_GENERIC ZSTD_compressBlock_opt_generic -#define ZSTD_COMPRESSBLOCK_OPT_EXTDICT_GENERIC ZSTD_compressBlock_opt_extDict_generic -#include "zstd_opt.h" - - -#endif /* ZSTD_OPT_INTERNAL_H_MODULE */ From e29caf7d1c5327ff7c2d585d55dec505f53ed325 Mon Sep 17 00:00:00 2001 From: inikep Date: Fri, 4 Mar 2016 19:52:23 +0100 Subject: [PATCH 040/247] code cleanup --- lib/zstd_compress.c | 5 +++-- lib/zstd_internal.h | 4 ++-- lib/zstd_opt.h | 8 ++------ 3 files changed, 7 insertions(+), 10 deletions(-) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index 8f14ff03..385b99dc 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -731,7 +731,7 @@ MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const B printf("pos %6u : %3u literals & match %3u bytes at distance %6u \n", (U32)(literals - g_start), (U32)litLength, (U32)matchCode+MINMATCH, (U32)offsetCode); #endif -#if ZSTD_OPT_DEBUG >= 3 +#if ZSTD_OPT_DEBUG == 3 if (offsetCode == 0) seqStorePtr->realRepSum++; seqStorePtr->realSeqSum++; seqStorePtr->realMatchSum += matchCode; @@ -1917,11 +1917,12 @@ static size_t ZSTD_compress_generic (ZSTD_CCtx* zc, BYTE* const ostart = (BYTE*)dst; BYTE* op = ostart; const U32 maxDist = 1 << zc->params.windowLog; +#if ZSTD_OPT_DEBUG == 3 seqStore_t* ssPtr = &zc->seqStore; static U32 priceFunc = 0; - ssPtr->realMatchSum = ssPtr->realLitSum = ssPtr->realSeqSum = ssPtr->realRepSum = 1; ssPtr->priceFunc = priceFunc; +#endif while (remaining) { size_t cSize; diff --git a/lib/zstd_internal.h b/lib/zstd_internal.h index 4948e239..e136a89c 100644 --- a/lib/zstd_internal.h +++ b/lib/zstd_internal.h @@ -206,13 +206,13 @@ typedef struct { U32 litLengthSum; U32 litSum; U32 offCodeSum; +#if ZSTD_OPT_DEBUG == 3 U32 realMatchSum; U32 realLitSum; U32 realSeqSum; U32 realRepSum; - U32 factor; - U32 factor2; U32 priceFunc; +#endif } seqStore_t; seqStore_t ZSTD_copySeqStore(const ZSTD_CCtx* ctx); diff --git a/lib/zstd_opt.h b/lib/zstd_opt.h index f6f9acec..780c5d41 100644 --- a/lib/zstd_opt.h +++ b/lib/zstd_opt.h @@ -118,16 +118,12 @@ FORCE_INLINE U32 ZSTD_getPrice(seqStore_t* seqStorePtr, U32 litLength, const BYT if (matchLength >= MaxML) matchLength = MaxML; price += ZSTD_getLiteralPrice(seqStorePtr, litLength, literals) + ZSTD_highbit(seqStorePtr->matchLengthSum+1) - ZSTD_highbit(seqStorePtr->matchLengthFreq[matchLength]+1); -#if ZSTD_OPT_DEBUG >= 3 +#if ZSTD_OPT_DEBUG == 3 switch (seqStorePtr->priceFunc) { default: case 0: - return 1 + price + seqStorePtr->factor + ((seqStorePtr->litSum>>5) / seqStorePtr->litLengthSum) + ((seqStorePtr->litSum<<1) / (seqStorePtr->litSum + seqStorePtr->matchSum)); + return 1 + price + ((seqStorePtr->litSum>>5) / seqStorePtr->litLengthSum) + ((seqStorePtr->litSum<<1) / (seqStorePtr->litSum + seqStorePtr->matchSum)); case 1: - return 1 + price + seqStorePtr->factor + ((seqStorePtr->factor2) ? ((seqStorePtr->litSum>>5) / seqStorePtr->litLengthSum) + ((seqStorePtr->litSum<<1) / (seqStorePtr->litSum + seqStorePtr->matchSum)) : 0); - case 2: - return 1 + price + seqStorePtr->factor + ((seqStorePtr->factor2) ? ((seqStorePtr->litSum>>4) / seqStorePtr->litLengthSum) + ((seqStorePtr->litSum<<1) / (seqStorePtr->litSum + seqStorePtr->matchSum)) : 0); - case 3: return 1 + price; } #else From f3c6503e5517a855f065428eca44971c1a0dd482 Mon Sep 17 00:00:00 2001 From: inikep Date: Fri, 4 Mar 2016 20:04:25 +0100 Subject: [PATCH 041/247] resolve conflict in zstd_internal.h --- lib/zstd_internal.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/zstd_internal.h b/lib/zstd_internal.h index e136a89c..b06097a2 100644 --- a/lib/zstd_internal.h +++ b/lib/zstd_internal.h @@ -105,12 +105,13 @@ static const size_t ZSTD_frameHeaderSize_min = 5; #define OffFSELog 9 #define MaxSeq MAX(MaxLL, MaxML) +#define LONGNBSEQ 0xFF00 + #define FSE_ENCODING_RAW 0 #define FSE_ENCODING_RLE 1 #define FSE_ENCODING_STATIC 2 #define FSE_ENCODING_DYNAMIC 3 - #define HufLog 12 #define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */ From 1df2594bfe9cdd8364193e943a4b37c7ce9d3d34 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sat, 5 Mar 2016 18:43:21 +0100 Subject: [PATCH 042/247] new compression level scale for <= 16KB --- lib/zstd_compress.c | 48 ++++++++++++++++++++++----------------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index e24016c7..bc56b4e5 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -2377,35 +2377,35 @@ static const ZSTD_parameters ZSTD_defaultParameters[4][ZSTD_MAX_CLEVEL+1] = { { 0, 0, 0, 0, 0, 0, 0, 0, ZSTD_fast }, /* level 0 -- never used */ { 0, 14, 14, 14, 0, 1, 4, 4, ZSTD_fast }, /* level 1 */ { 0, 14, 14, 15, 0, 1, 4, 4, ZSTD_fast }, /* level 2 */ - { 0, 14, 13, 15, 0, 4, 4, 4, ZSTD_greedy }, /* level 3 */ - { 0, 14, 14, 15, 0, 3, 4, 4, ZSTD_lazy }, /* level 4 */ - { 0, 14, 14, 14, 0, 6, 4, 4, ZSTD_lazy }, /* level 5 */ + { 0, 14, 14, 14, 0, 4, 4, 4, ZSTD_greedy }, /* level 3.*/ + { 0, 14, 14, 14, 0, 3, 4, 4, ZSTD_lazy }, /* level 4.*/ + { 0, 14, 14, 14, 0, 4, 4, 4, ZSTD_lazy2 }, /* level 5 */ { 0, 14, 14, 14, 0, 5, 4, 4, ZSTD_lazy2 }, /* level 6 */ - { 0, 14, 14, 14, 0, 7, 4, 4, ZSTD_lazy2 }, /* level 7 */ - { 0, 14, 14, 14, 0, 8, 4, 4, ZSTD_lazy2 }, /* level 8 */ - { 0, 14, 14, 14, 0, 9, 4, 4, ZSTD_lazy2 }, /* level 9 */ - { 0, 14, 14, 14, 0, 10, 4, 4, ZSTD_lazy2 }, /* level 10 */ - { 0, 14, 14, 14, 0, 11, 4, 4, ZSTD_lazy2 }, /* level 11 */ - { 0, 14, 15, 15, 0, 12, 4, 32, ZSTD_btopt }, /* level 12 */ - { 0, 14, 15, 15, 0, 12, 4, 64, ZSTD_btopt }, /* level 13 */ - { 0, 14, 15, 15, 0, 12, 4, 96, ZSTD_btopt }, /* level 14 */ - { 0, 14, 15, 15, 0, 12, 4,128, ZSTD_btopt }, /* level 15 */ - { 0, 14, 15, 15, 0, 12, 4,256, ZSTD_btopt }, /* level 16 */ - { 0, 14, 15, 15, 0, 13, 4,256, ZSTD_btopt }, /* level 17 */ - { 0, 14, 15, 15, 0, 14, 4,256, ZSTD_btopt }, /* level 18 */ - { 0, 14, 15, 15, 0, 15, 4,256, ZSTD_btopt }, /* level 19 */ - { 0, 14, 15, 15, 0, 16, 4,256, ZSTD_btopt }, /* level 20 */ - { 0, 14, 15, 15, 0, 17, 4,256, ZSTD_btopt }, /* level 21 */ - { 0, 14, 15, 15, 0, 14, 3,256, ZSTD_btopt }, /* level 21-2 */ - { 0, 14, 15, 15, 0, 15, 3,256, ZSTD_btopt }, /* level 21-3 */ - { 0, 14, 15, 15, 0, 16, 3,256, ZSTD_btopt }, /* level 21-4 */ - { 0, 14, 15, 15, 0, 17, 3,256, ZSTD_btopt }, /* level 21-5 */ + { 0, 14, 14, 14, 0, 6, 4, 4, ZSTD_lazy2 }, /* level 7.*/ + { 0, 14, 14, 14, 0, 7, 4, 4, ZSTD_lazy2 }, /* level 8.*/ + { 0, 14, 15, 14, 0, 6, 4, 4, ZSTD_btlazy2 }, /* level 9.*/ + { 0, 14, 15, 14, 0, 3, 3, 6, ZSTD_btopt }, /* level 10.*/ + { 0, 14, 15, 14, 0, 6, 3, 8, ZSTD_btopt }, /* level 11.*/ + { 0, 14, 15, 14, 0, 6, 3, 16, ZSTD_btopt }, /* level 12.*/ + { 0, 14, 15, 14, 0, 6, 3, 24, ZSTD_btopt }, /* level 13.*/ + { 0, 14, 15, 15, 0, 6, 3, 48, ZSTD_btopt }, /* level 14.*/ + { 0, 14, 15, 15, 0, 6, 3, 64, ZSTD_btopt }, /* level 15.*/ + { 0, 14, 15, 15, 0, 6, 3, 96, ZSTD_btopt }, /* level 16.*/ + { 0, 14, 15, 15, 0, 6, 3,128, ZSTD_btopt }, /* level 17.*/ + { 0, 14, 15, 15, 0, 6, 3,256, ZSTD_btopt }, /* level 18.*/ + { 0, 14, 15, 15, 0, 7, 3,256, ZSTD_btopt }, /* level 19.*/ + { 0, 14, 15, 15, 0, 8, 3,256, ZSTD_btopt }, /* level 20.*/ + { 0, 14, 15, 15, 0, 9, 3,256, ZSTD_btopt }, /* level 21.*/ + { 0, 14, 15, 15, 0, 10, 3,256, ZSTD_btopt }, /* level 22.*/ + { 0, 14, 15, 15, 0, 11, 3,256, ZSTD_btopt }, /* level 23.*/ + { 0, 14, 15, 15, 0, 12, 3,256, ZSTD_btopt }, /* level 24.*/ + { 0, 14, 15, 15, 0, 13, 3,256, ZSTD_btopt }, /* level 25.*/ }, }; -/*! ZSTD_getParams +/*! ZSTD_getParams() : * @return ZSTD_parameters structure for a selected compression level and srcSize. -* @srcSizeHint value is optional, select 0 if not known */ +* `srcSizeHint` value is optional, select 0 if not known */ ZSTD_parameters ZSTD_getParams(int compressionLevel, U64 srcSizeHint) { ZSTD_parameters result; From 89c9e1a847625aa143b79735a8f5a4633e95925a Mon Sep 17 00:00:00 2001 From: inikep Date: Sun, 6 Mar 2016 23:21:52 +0100 Subject: [PATCH 043/247] added missing "case 3:" in ZSTD_BtGetAllMatches --- lib/zstd_internal.h | 2 +- lib/zstd_opt.h | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/zstd_internal.h b/lib/zstd_internal.h index b06097a2..462161bd 100644 --- a/lib/zstd_internal.h +++ b/lib/zstd_internal.h @@ -105,7 +105,7 @@ static const size_t ZSTD_frameHeaderSize_min = 5; #define OffFSELog 9 #define MaxSeq MAX(MaxLL, MaxML) -#define LONGNBSEQ 0xFF00 +#define LONGNBSEQ 0x7F00 #define FSE_ENCODING_RAW 0 #define FSE_ENCODING_RLE 1 diff --git a/lib/zstd_opt.h b/lib/zstd_opt.h index 780c5d41..c6205089 100644 --- a/lib/zstd_opt.h +++ b/lib/zstd_opt.h @@ -342,6 +342,7 @@ static U32 ZSTD_BtGetAllMatches_selectMLS ( switch(matchLengthSearch) { default : + case 3 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 3, matches); case 4 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 4, matches); case 5 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 5, matches); case 6 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 6, matches); @@ -369,6 +370,7 @@ static U32 ZSTD_BtGetAllMatches_selectMLS_extDict ( switch(matchLengthSearch) { default : + case 3 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 3, matches); case 4 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 4, matches); case 5 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 5, matches); case 6 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 6, matches); From e2446b0e36d01e4452ac23f6913b361242dac466 Mon Sep 17 00:00:00 2001 From: inikep Date: Mon, 7 Mar 2016 10:07:08 +0100 Subject: [PATCH 044/247] added support for MINMATCH=3 on big endian architecture changed ZSTD_HASHLOG3_MIN to 2 (4 bytes for MINMACH > 3) --- lib/mem.h | 25 ++++++++++++++----------- lib/zstd_compress.c | 2 +- lib/zstd_opt.h | 14 ++++++-------- lib/zstd_static.h | 2 +- 4 files changed, 22 insertions(+), 21 deletions(-) diff --git a/lib/mem.h b/lib/mem.h index a369033b..ceafd57b 100644 --- a/lib/mem.h +++ b/lib/mem.h @@ -187,17 +187,6 @@ MEM_STATIC void MEM_write64(void* memPtr, U64 value) #endif /* MEM_FORCE_MEMORY_ACCESS */ - -MEM_STATIC U32 MEM_readMINMATCH(const void* memPtr, U32 length) -{ - switch (length) - { - default : - case 4 : return MEM_read32(memPtr); - case 3 : return MEM_read32(memPtr)<<8; - } -} - MEM_STATIC U16 MEM_readLE16(const void* memPtr) { if (MEM_isLittleEndian()) @@ -286,6 +275,20 @@ MEM_STATIC void MEM_writeLEST(void* memPtr, size_t val) MEM_writeLE64(memPtr, (U64)val); } + /* function safe only for comparisons */ +MEM_STATIC U32 MEM_readMINMATCH(const void* memPtr, U32 length) +{ + switch (length) + { + default : + case 4 : return MEM_read32(memPtr); + case 3 : if (MEM_isLittleEndian()) + return MEM_read32(memPtr)<<8; + else + return MEM_read32(memPtr)>>8; + } +} + #if defined (__cplusplus) } #endif diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index e24016c7..87e8e72e 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -876,7 +876,7 @@ static size_t ZSTD_count_2segments(const BYTE* ip, const BYTE* match, const BYTE ***************************************/ static const U32 prime3bytes = 506832829U; static U32 ZSTD_hash3(U32 u, U32 h) { return ((u << (32-24)) * prime3bytes) >> (32-h) ; } -static size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_read32(ptr), h); } +static size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); } static const U32 prime4bytes = 2654435761U; static U32 ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; } diff --git a/lib/zstd_opt.h b/lib/zstd_opt.h index c6205089..0c4a80f7 100644 --- a/lib/zstd_opt.h +++ b/lib/zstd_opt.h @@ -335,10 +335,9 @@ static U32 ZSTD_BtGetAllMatches ( static U32 ZSTD_BtGetAllMatches_selectMLS ( ZSTD_CCtx* zc, /* Index table will be updated */ - const BYTE* ip, const BYTE* const iLowLimit, const BYTE* const iHighLimit, + const BYTE* ip, const BYTE* const iHighLimit, const U32 maxNbAttempts, const U32 matchLengthSearch, ZSTD_match_t* matches) { - (void)iLowLimit; /* unused */ switch(matchLengthSearch) { default : @@ -363,10 +362,9 @@ static U32 ZSTD_BtGetAllMatches_extDict ( static U32 ZSTD_BtGetAllMatches_selectMLS_extDict ( ZSTD_CCtx* zc, /* Index table will be updated */ - const BYTE* ip, const BYTE* const iLowLimit, const BYTE* const iHighLimit, + const BYTE* ip, const BYTE* const iHighLimit, const U32 maxNbAttempts, const U32 matchLengthSearch, ZSTD_match_t* matches) { - (void)iLowLimit; switch(matchLengthSearch) { default : @@ -447,7 +445,7 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, } while (mlen >= minMatch); } - match_num = ZSTD_BtGetAllMatches_selectMLS(ctx, ip, ip, iend, maxSearches, mls, matches); /* first search (depth 0) */ + match_num = ZSTD_BtGetAllMatches_selectMLS(ctx, ip, iend, maxSearches, mls, matches); /* first search (depth 0) */ ZSTD_LOG_PARSER("%d: match_num=%d last_pos=%d\n", (int)(ip-base), match_num, last_pos); if (!last_pos && !match_num) { ip++; continue; } @@ -567,7 +565,7 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, } while (mlen >= minMatch); } - match_num = ZSTD_BtGetAllMatches_selectMLS(ctx, inr, ip, iend, maxSearches, mls, matches); + match_num = ZSTD_BtGetAllMatches_selectMLS(ctx, inr, iend, maxSearches, mls, matches); ZSTD_LOG_PARSER("%d: ZSTD_GetAllMatches match_num=%d\n", (int)(inr-base), match_num); if (match_num > 0 && matches[match_num-1].len > sufficient_len) { @@ -779,7 +777,7 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, best_mlen = (last_pos) ? last_pos : minMatch; - match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, ip, ip, iend, maxSearches, mls, matches); /* first search (depth 0) */ + match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, ip, iend, maxSearches, mls, matches); /* first search (depth 0) */ ZSTD_LOG_PARSER("%d: match_num=%d last_pos=%d\n", (int)(ip-base), match_num, last_pos); if (!last_pos && !match_num) { ip++; continue; } @@ -909,7 +907,7 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, best_mlen = (best_mlen > minMatch) ? best_mlen : minMatch; - match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, inr, ip, iend, maxSearches, mls, matches); + match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, inr, iend, maxSearches, mls, matches); ZSTD_LOG_PARSER("%d: ZSTD_GetAllMatches match_num=%d\n", (int)(inr-base), match_num); if (match_num > 0 && matches[match_num-1].len > sufficient_len) { diff --git a/lib/zstd_static.h b/lib/zstd_static.h index fa285ad4..0effaa7c 100644 --- a/lib/zstd_static.h +++ b/lib/zstd_static.h @@ -65,7 +65,7 @@ extern "C" { #define ZSTD_HASHLOG_MAX 28 #define ZSTD_HASHLOG_MIN 12 #define ZSTD_HASHLOG3_MAX 24 -#define ZSTD_HASHLOG3_MIN 12 +#define ZSTD_HASHLOG3_MIN 2 #define ZSTD_SEARCHLOG_MAX (ZSTD_CONTENTLOG_MAX-1) #define ZSTD_SEARCHLOG_MIN 1 #define ZSTD_SEARCHLENGTH_MAX 7 From 4f7f79ef9dd03eb128abb9a489eb4daab10766ee Mon Sep 17 00:00:00 2001 From: inikep Date: Mon, 7 Mar 2016 16:14:58 +0100 Subject: [PATCH 045/247] fixed update of nextToUpdate --- lib/zstd_opt.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/lib/zstd_opt.h b/lib/zstd_opt.h index 0c4a80f7..68a3c315 100644 --- a/lib/zstd_opt.h +++ b/lib/zstd_opt.h @@ -250,8 +250,8 @@ static U32 ZSTD_insertBtAndGetAllMatches ( matches[mnum].off = current - matchIndex3; matches[mnum].len = (U32)currentMl; mnum++; - if (currentMl > ZSTD_OPT_NUM) return mnum; - if (ip+currentMl == iLimit) return mnum; /* best possible, and avoid read overflow*/ + if (currentMl > ZSTD_OPT_NUM) goto update; + if (ip+currentMl == iLimit) goto update; /* best possible, and avoid read overflow*/ } } } @@ -316,6 +316,7 @@ static U32 ZSTD_insertBtAndGetAllMatches ( *smallerPtr = *largerPtr = 0; +update: zc->nextToUpdate = (matchEndIdx > current + 8) ? matchEndIdx - 8 : current+1; return mnum; } @@ -340,8 +341,8 @@ static U32 ZSTD_BtGetAllMatches_selectMLS ( { switch(matchLengthSearch) { - default : case 3 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 3, matches); + default : case 4 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 4, matches); case 5 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 5, matches); case 6 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 6, matches); @@ -367,8 +368,8 @@ static U32 ZSTD_BtGetAllMatches_selectMLS_extDict ( { switch(matchLengthSearch) { - default : case 3 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 3, matches); + default : case 4 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 4, matches); case 5 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 5, matches); case 6 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 6, matches); From 4ba85344e3205ab77e8f7b18a858c9eee48418bb Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 7 Mar 2016 20:01:45 +0100 Subject: [PATCH 046/247] added test to generate lots of small sequences (3-bytes) --- programs/fuzzer.c | 68 ++++++++++++++++++++++++++++++++++++----------- 1 file changed, 52 insertions(+), 16 deletions(-) diff --git a/programs/fuzzer.c b/programs/fuzzer.c index 7cbfd794..50d496b9 100644 --- a/programs/fuzzer.c +++ b/programs/fuzzer.c @@ -196,22 +196,6 @@ static int basicUnitTests(U32 seed, double compressibility) if (result != (size_t)-ZSTD_error_srcSize_wrong) goto _output_error; DISPLAYLEVEL(4, "OK \n"); - /* All zeroes test (#137 verif) */ - #define ZEROESLENGTH 100 - DISPLAYLEVEL(4, "test%3i : compress %u zeroes : ", testNb++, ZEROESLENGTH); - memset(CNBuffer, 0, ZEROESLENGTH); - result = ZSTD_compress(compressedBuffer, ZSTD_compressBound(ZEROESLENGTH), CNBuffer, ZEROESLENGTH, 1); - if (ZSTD_isError(result)) goto _output_error; - cSize = result; - DISPLAYLEVEL(4, "OK (%u bytes : %.2f%%)\n", (U32)cSize, (double)cSize/ZEROESLENGTH*100); - - DISPLAYLEVEL(4, "test%3i : decompress %u zeroes : ", testNb++, ZEROESLENGTH); - result = ZSTD_decompress(decodedBuffer, ZEROESLENGTH, compressedBuffer, cSize); - if (ZSTD_isError(result)) goto _output_error; - if (result != ZEROESLENGTH) goto _output_error; - DISPLAYLEVEL(4, "OK \n"); - - /* Dictionary and Duplication tests */ { ZSTD_CCtx* ctxOrig = ZSTD_createCCtx(); @@ -344,6 +328,58 @@ static int basicUnitTests(U32 seed, double compressibility) DISPLAYLEVEL(4, "OK \n"); } + /* All zeroes test (#137 verif) */ + #define ZEROESLENGTH 100 + DISPLAYLEVEL(4, "test%3i : compress %u zeroes : ", testNb++, ZEROESLENGTH); + memset(CNBuffer, 0, ZEROESLENGTH); + result = ZSTD_compress(compressedBuffer, ZSTD_compressBound(ZEROESLENGTH), CNBuffer, ZEROESLENGTH, 1); + if (ZSTD_isError(result)) goto _output_error; + cSize = result; + DISPLAYLEVEL(4, "OK (%u bytes : %.2f%%)\n", (U32)cSize, (double)cSize/ZEROESLENGTH*100); + + DISPLAYLEVEL(4, "test%3i : decompress %u zeroes : ", testNb++, ZEROESLENGTH); + result = ZSTD_decompress(decodedBuffer, ZEROESLENGTH, compressedBuffer, cSize); + if (ZSTD_isError(result)) goto _output_error; + if (result != ZEROESLENGTH) goto _output_error; + DISPLAYLEVEL(4, "OK \n"); + + /* nbSeq limit test */ + { + #define _3BYTESTESTLENGTH 131000 + #define NB3BYTESSEQLOG 9 + #define NB3BYTESSEQ (1 << NB3BYTESSEQLOG) + #define NB3BYTESSEQMASK (NB3BYTESSEQ-1) + BYTE _3BytesSeqs[NB3BYTESSEQ][3]; + U32 r = 1; + int i; + + for (i=0; i < NB3BYTESSEQ; i++) { + _3BytesSeqs[i][0] = (BYTE)(FUZ_rand(&r) & 255); + _3BytesSeqs[i][1] = (BYTE)(FUZ_rand(&r) & 255); + _3BytesSeqs[i][2] = (BYTE)(FUZ_rand(&r) & 255); + } + + for (i=0; i < _3BYTESTESTLENGTH; ){ + U32 id = FUZ_rand(&r) & NB3BYTESSEQMASK; + ((BYTE*)CNBuffer)[i+0] = _3BytesSeqs[id][0]; + ((BYTE*)CNBuffer)[i+1] = _3BytesSeqs[id][1]; + ((BYTE*)CNBuffer)[i+2] = _3BytesSeqs[id][2]; + i += 3; + } + + DISPLAYLEVEL(4, "test%3i : compress lots 3-bytes sequences : ", testNb++); + result = ZSTD_compress(compressedBuffer, ZSTD_compressBound(_3BYTESTESTLENGTH), CNBuffer, _3BYTESTESTLENGTH, 19); + if (ZSTD_isError(result)) goto _output_error; + cSize = result; + DISPLAYLEVEL(4, "OK (%u bytes : %.2f%%)\n", (U32)cSize, (double)cSize/_3BYTESTESTLENGTH*100); + + DISPLAYLEVEL(4, "test%3i : decompress lots 3-bytes sequence : ", testNb++); + result = ZSTD_decompress(decodedBuffer, _3BYTESTESTLENGTH, compressedBuffer, cSize); + if (ZSTD_isError(result)) goto _output_error; + if (result != _3BYTESTESTLENGTH) goto _output_error; + DISPLAYLEVEL(4, "OK \n"); + } + _end: free(CNBuffer); free(compressedBuffer); From 370b08e84041845f8ae7845aaad65b90cb6541f7 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Tue, 8 Mar 2016 00:03:59 +0100 Subject: [PATCH 047/247] fix compression ratio for blocks <= 16 KB --- lib/zstd_compress.c | 52 ++++++++++++++++++++++--------------------- lib/zstd_decompress.c | 3 ++- programs/fuzzer.c | 1 + 3 files changed, 30 insertions(+), 26 deletions(-) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index 4858831e..a5127b74 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -147,13 +147,14 @@ void ZSTD_validateParams(ZSTD_parameters* params) const U32 btPlus = (params->strategy == ZSTD_btlazy2) || (params->strategy == ZSTD_btopt); const U32 searchLengthMax = (params->strategy == ZSTD_fast) ? ZSTD_SEARCHLENGTH_MAX : ZSTD_SEARCHLENGTH_MAX-1; const U32 searchLengthMin = (params->strategy == ZSTD_btopt) ? ZSTD_SEARCHLENGTH_MIN : ZSTD_SEARCHLENGTH_MIN+1; + const U32 hashLog3Min = (params->strategy == ZSTD_btopt) ? ZSTD_HASHLOG3_MIN : 0; /* validate params */ if (MEM_32bits()) if (params->windowLog > 25) params->windowLog = 25; /* 32 bits mode cannot flush > 24 bits */ CLAMP(params->windowLog, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX); CLAMP(params->contentLog, ZSTD_CONTENTLOG_MIN, ZSTD_CONTENTLOG_MAX); CLAMP(params->hashLog, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX); - CLAMP(params->hashLog3, ZSTD_HASHLOG3_MIN, ZSTD_HASHLOG3_MAX); + CLAMP(params->hashLog3, hashLog3Min, ZSTD_HASHLOG3_MAX); CLAMP(params->searchLog, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX); CLAMP(params->searchLength, searchLengthMin, searchLengthMax); CLAMP(params->targetLength, ZSTD_TARGETLENGTH_MIN, ZSTD_TARGETLENGTH_MAX); @@ -174,10 +175,11 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc, { /* note : params considered validated here */ const size_t blockSize = MIN(BLOCKSIZE, (size_t)1 << params.windowLog); /* reserve table memory */ - const U32 contentLog = (params.strategy == ZSTD_fast) ? 1 : params.contentLog; + const U32 contentLog = (params.strategy == ZSTD_fast) ? 1 : params.contentLog; const size_t tableSpace = ((1 << contentLog) + (1 << params.hashLog) + (1 << params.hashLog3)) * sizeof(U32); - const size_t neededSpace = tableSpace + (256*sizeof(U32)) + (3*blockSize) - + ((params.strategy == ZSTD_btopt) ? ((1<workSpaceSize < neededSpace) { free(zc->workSpace); @@ -204,7 +206,7 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc, zc->blockSize = blockSize; zc->seqStore.offsetStart = (U32*) (zc->seqStore.buffer); - zc->seqStore.offCodeStart = (BYTE*) (zc->seqStore.offsetStart) + blockSize; + zc->seqStore.offCodeStart = (BYTE*) (zc->seqStore.offsetStart + (blockSize>>2)); zc->seqStore.litStart = zc->seqStore.offCodeStart + (blockSize>>2); zc->seqStore.litLengthStart = zc->seqStore.litStart + blockSize; zc->seqStore.matchLengthStart = zc->seqStore.litLengthStart + (blockSize>>2); @@ -227,9 +229,9 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc, } -/*! ZSTD_copyCCtx -* Duplicate an existing context @srcCCtx into another one @dstCCtx. -* Only works during stage 0 (i.e. before first call to ZSTD_compressContinue()) +/*! ZSTD_copyCCtx() : +* Duplicate an existing context `srcCCtx` into another one `dstCCtx`. +* Only works during stage 0 (i.e. before first call to ZSTD_compressContinue()). * @return : 0, or an error code */ size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx) { @@ -2363,7 +2365,7 @@ static const ZSTD_parameters ZSTD_defaultParameters[4][ZSTD_MAX_CLEVEL+1] = { { 0, 17, 18, 17, 0, 7, 4,128, ZSTD_btopt }, /* level 16 */ { 0, 17, 18, 17, 0, 8, 4,128, ZSTD_btopt }, /* level 17 */ { 0, 17, 18, 17, 0, 8, 4,256, ZSTD_btopt }, /* level 18 */ - { 0, 17, 18, 17, 0, 9, 4,256, ZSTD_btopt }, /* level 19 */ + { 0, 17, 18, 17, 16, 9, 3,256, ZSTD_btopt }, /* level 19 */ { 0, 17, 18, 17, 0, 10, 4,512, ZSTD_btopt }, /* level 20 */ { 0, 17, 18, 17, 0, 11, 4,512, ZSTD_btopt }, /* level 21 */ { 0, 17, 18, 17, 0, 11, 4,512, ZSTD_btopt }, /* level 21-2 */ @@ -2384,22 +2386,22 @@ static const ZSTD_parameters ZSTD_defaultParameters[4][ZSTD_MAX_CLEVEL+1] = { { 0, 14, 14, 14, 0, 6, 4, 4, ZSTD_lazy2 }, /* level 7.*/ { 0, 14, 14, 14, 0, 7, 4, 4, ZSTD_lazy2 }, /* level 8.*/ { 0, 14, 15, 14, 0, 6, 4, 4, ZSTD_btlazy2 }, /* level 9.*/ - { 0, 14, 15, 14, 0, 3, 3, 6, ZSTD_btopt }, /* level 10.*/ - { 0, 14, 15, 14, 0, 6, 3, 8, ZSTD_btopt }, /* level 11.*/ - { 0, 14, 15, 14, 0, 6, 3, 16, ZSTD_btopt }, /* level 12.*/ - { 0, 14, 15, 14, 0, 6, 3, 24, ZSTD_btopt }, /* level 13.*/ - { 0, 14, 15, 15, 0, 6, 3, 48, ZSTD_btopt }, /* level 14.*/ - { 0, 14, 15, 15, 0, 6, 3, 64, ZSTD_btopt }, /* level 15.*/ - { 0, 14, 15, 15, 0, 6, 3, 96, ZSTD_btopt }, /* level 16.*/ - { 0, 14, 15, 15, 0, 6, 3,128, ZSTD_btopt }, /* level 17.*/ - { 0, 14, 15, 15, 0, 6, 3,256, ZSTD_btopt }, /* level 18.*/ - { 0, 14, 15, 15, 0, 7, 3,256, ZSTD_btopt }, /* level 19.*/ - { 0, 14, 15, 15, 0, 8, 3,256, ZSTD_btopt }, /* level 20.*/ - { 0, 14, 15, 15, 0, 9, 3,256, ZSTD_btopt }, /* level 21.*/ - { 0, 14, 15, 15, 0, 10, 3,256, ZSTD_btopt }, /* level 22.*/ - { 0, 14, 15, 15, 0, 11, 3,256, ZSTD_btopt }, /* level 23.*/ - { 0, 14, 15, 15, 0, 12, 3,256, ZSTD_btopt }, /* level 24.*/ - { 0, 14, 15, 15, 0, 13, 3,256, ZSTD_btopt }, /* level 25.*/ + { 0, 14, 15, 14, 16, 3, 3, 6, ZSTD_btopt }, /* level 10.*/ + { 0, 14, 15, 14, 16, 6, 3, 8, ZSTD_btopt }, /* level 11.*/ + { 0, 14, 15, 14, 16, 6, 3, 16, ZSTD_btopt }, /* level 12.*/ + { 0, 14, 15, 14, 16, 6, 3, 24, ZSTD_btopt }, /* level 13.*/ + { 0, 14, 15, 15, 16, 6, 3, 48, ZSTD_btopt }, /* level 14.*/ + { 0, 14, 15, 15, 16, 6, 3, 64, ZSTD_btopt }, /* level 15.*/ + { 0, 14, 15, 15, 16, 6, 3, 96, ZSTD_btopt }, /* level 16.*/ + { 0, 14, 15, 15, 16, 6, 3,128, ZSTD_btopt }, /* level 17.*/ + { 0, 14, 15, 15, 16, 6, 3,256, ZSTD_btopt }, /* level 18.*/ + { 0, 14, 15, 15, 16, 7, 3,256, ZSTD_btopt }, /* level 19.*/ + { 0, 14, 15, 15, 16, 8, 3,256, ZSTD_btopt }, /* level 20.*/ + { 0, 14, 15, 15, 16, 9, 3,256, ZSTD_btopt }, /* level 21.*/ + { 0, 14, 15, 15, 16, 10, 3,256, ZSTD_btopt }, /* level 22.*/ + { 0, 14, 15, 15, 16, 11, 3,256, ZSTD_btopt }, /* level 23.*/ + { 0, 14, 15, 15, 16, 12, 3,256, ZSTD_btopt }, /* level 24.*/ + { 0, 14, 15, 15, 16, 13, 3,256, ZSTD_btopt }, /* level 25.*/ }, }; diff --git a/lib/zstd_decompress.c b/lib/zstd_decompress.c index c4338bcc..844aa976 100644 --- a/lib/zstd_decompress.c +++ b/lib/zstd_decompress.c @@ -822,7 +822,8 @@ static size_t ZSTD_decompressSequences( nbSeq--; ZSTD_decodeSequence(&sequence, &seqState, mls); oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litLimit_8, base, vBase, dictEnd); - if (ZSTD_isError(oneSeqSize)) return oneSeqSize; + if (ZSTD_isError(oneSeqSize)) + return oneSeqSize; op += oneSeqSize; } diff --git a/programs/fuzzer.c b/programs/fuzzer.c index 50d496b9..6d57080d 100644 --- a/programs/fuzzer.c +++ b/programs/fuzzer.c @@ -344,6 +344,7 @@ static int basicUnitTests(U32 seed, double compressibility) DISPLAYLEVEL(4, "OK \n"); /* nbSeq limit test */ + if (0) { #define _3BYTESTESTLENGTH 131000 #define NB3BYTESSEQLOG 9 From dd54bbc184c7b260a7ed419fbbc248e03afd9942 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Tue, 8 Mar 2016 02:35:34 +0100 Subject: [PATCH 048/247] Fixed large NbSeq > 32 K Added a test in Fuzzer to check NbSeq > 32 K --- lib/zstd_compress.c | 19 +++++++++++-------- programs/fuzzer.c | 3 +-- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index a5127b74..50979ce5 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -165,8 +165,8 @@ void ZSTD_validateParams(ZSTD_parameters* params) U32 srcLog = ZSTD_highbit((U32)(params->srcSize)-1) + 1; if (params->windowLog > srcLog) params->windowLog = srcLog; } - if (params->windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN) params->windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN; /* required for frame header */ - if (params->contentLog > params->windowLog+btPlus) params->contentLog = params->windowLog+btPlus; /* <= ZSTD_CONTENTLOG_MAX */ + if (params->windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN) params->windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN; /* required for frame header */ + if (params->contentLog > params->windowLog+btPlus) params->contentLog = params->windowLog+btPlus; /* <= ZSTD_CONTENTLOG_MAX */ } @@ -176,9 +176,12 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc, const size_t blockSize = MIN(BLOCKSIZE, (size_t)1 << params.windowLog); /* reserve table memory */ const U32 contentLog = (params.strategy == ZSTD_fast) ? 1 : params.contentLog; + const U32 divider = (params.searchLength==3) ? 3 : 4; + const size_t maxNbSeq = blockSize / divider; + const size_t tokenSpace = blockSize + 8*maxNbSeq; const size_t tableSpace = ((1 << contentLog) + (1 << params.hashLog) + (1 << params.hashLog3)) * sizeof(U32); const size_t optSpace = ((1<workSpaceSize < neededSpace) { @@ -206,13 +209,13 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc, zc->blockSize = blockSize; zc->seqStore.offsetStart = (U32*) (zc->seqStore.buffer); - zc->seqStore.offCodeStart = (BYTE*) (zc->seqStore.offsetStart + (blockSize>>2)); - zc->seqStore.litStart = zc->seqStore.offCodeStart + (blockSize>>2); + zc->seqStore.offCodeStart = (BYTE*) (zc->seqStore.offsetStart + maxNbSeq); + zc->seqStore.litStart = zc->seqStore.offCodeStart + maxNbSeq; zc->seqStore.litLengthStart = zc->seqStore.litStart + blockSize; - zc->seqStore.matchLengthStart = zc->seqStore.litLengthStart + (blockSize>>2); - zc->seqStore.dumpsStart = zc->seqStore.matchLengthStart + (blockSize>>2); + zc->seqStore.matchLengthStart = zc->seqStore.litLengthStart + maxNbSeq; + zc->seqStore.dumpsStart = zc->seqStore.matchLengthStart + maxNbSeq; if (params.strategy == ZSTD_btopt) { - zc->seqStore.litFreq = (U32*)((void*)(zc->seqStore.dumpsStart + (blockSize>>2))); + zc->seqStore.litFreq = (U32*)((void*)(zc->seqStore.dumpsStart + maxNbSeq)); zc->seqStore.litLengthFreq = zc->seqStore.litFreq + (1<seqStore.matchLengthFreq = zc->seqStore.litLengthFreq + (1<seqStore.offCodeFreq = zc->seqStore.matchLengthFreq + (1< Date: Tue, 8 Mar 2016 11:03:55 +0100 Subject: [PATCH 049/247] fixed update of hashTable for matches with length of 4+ --- lib/zstd_opt.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/zstd_opt.h b/lib/zstd_opt.h index 68a3c315..961964bd 100644 --- a/lib/zstd_opt.h +++ b/lib/zstd_opt.h @@ -227,7 +227,6 @@ static U32 ZSTD_insertBtAndGetAllMatches ( const U32 minMatch = (mls == 3) ? 3 : 4; size_t bestLength = minMatch-1; - hashTable[h] = current; /* Update Hash Table */ if (minMatch == 3) { /* HC3 match finder */ U32 matchIndex3 = ZSTD_insertAndFindFirstIndexHash3 (zc, ip); @@ -256,6 +255,8 @@ static U32 ZSTD_insertBtAndGetAllMatches ( } } + hashTable[h] = current; /* Update Hash Table */ + while (nbCompares-- && (matchIndex > windowLow)) { U32* nextPtr = bt + 2*(matchIndex & btMask); size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ From 2e91dde43e3ef09cde7ee967316a3af427fcfe8d Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Tue, 8 Mar 2016 12:22:11 +0100 Subject: [PATCH 050/247] improved memory size evaluation by paramgrill --- lib/zstd_compress.c | 19 +++++++++++++++++-- programs/.gitignore | 4 +++- programs/paramgrill.c | 7 +++---- 3 files changed, 23 insertions(+), 7 deletions(-) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index 50979ce5..b668a934 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -129,7 +129,7 @@ size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx) return 0; /* reserved as a potential error code in the future */ } -seqStore_t ZSTD_copySeqStore(const ZSTD_CCtx* ctx) +seqStore_t ZSTD_copySeqStore(const ZSTD_CCtx* ctx) /* hidden interface */ { return ctx->seqStore; } @@ -170,11 +170,26 @@ void ZSTD_validateParams(ZSTD_parameters* params) } +size_t ZSTD_sizeofCCtx(ZSTD_parameters params) /* hidden interface, for paramagrill */ +{ /* copy / pasted from ZSTD_resetCCtx_advanced */ + const size_t blockSize = MIN(BLOCKSIZE, (size_t)1 << params.windowLog); + const U32 contentLog = (params.strategy == ZSTD_fast) ? 1 : params.contentLog; + const U32 divider = (params.searchLength==3) ? 3 : 4; + const size_t maxNbSeq = blockSize / divider; + const size_t tokenSpace = blockSize + 8*maxNbSeq; + const size_t tableSpace = ((1 << contentLog) + (1 << params.hashLog) + (1 << params.hashLog3)) * sizeof(U32); + const size_t optSpace = ((1< diff --git a/visual/2013/fuzzer/fuzzer.vcxproj b/visual/2013/fuzzer/fuzzer.vcxproj index 0844efe7..49738e0c 100644 --- a/visual/2013/fuzzer/fuzzer.vcxproj +++ b/visual/2013/fuzzer/fuzzer.vcxproj @@ -22,6 +22,7 @@ {6FD4352B-346C-4703-96EA-D4A8B9A6976E} Win32Proj fuzzer + $(SolutionDir)bin\$(Platform)\$(Configuration)\ diff --git a/visual/2013/zstd/zstd.vcxproj b/visual/2013/zstd/zstd.vcxproj index 99d308b9..6d8b2b6c 100644 --- a/visual/2013/zstd/zstd.vcxproj +++ b/visual/2013/zstd/zstd.vcxproj @@ -69,6 +69,7 @@ {4E52A41A-F33B-4C7A-8C36-A1A6B4F4277C} Win32Proj zstd + $(SolutionDir)bin\$(Platform)\$(Configuration)\ @@ -127,13 +128,11 @@ false $(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath); false - $(SolutionDir)$(Configuration)\ false $(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath); false - $(SolutionDir)$(Configuration)\ diff --git a/visual/2013/zstdlib/zstdlib.vcxproj b/visual/2013/zstdlib/zstdlib.vcxproj index b13bc98f..e44699d4 100644 --- a/visual/2013/zstdlib/zstdlib.vcxproj +++ b/visual/2013/zstdlib/zstdlib.vcxproj @@ -24,8 +24,6 @@ - - @@ -48,6 +46,7 @@ {8BFD8150-94D5-4BF9-8A50-7BD9929A0850} Win32Proj zstdlib + $(SolutionDir)bin\$(Platform)\$(Configuration)\ @@ -96,7 +95,6 @@ true zstdlib_x86 $(Platform)\$(Configuration)\ - $(SolutionDir)$(Platform)\$(Configuration)\ $(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath); true @@ -104,14 +102,12 @@ true zstdlib_x64 $(Platform)\$(Configuration)\ - $(SolutionDir)$(Platform)\$(Configuration)\ $(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath); true false zstdlib_x86 - $(SolutionDir)$(Platform)\$(Configuration)\ $(Platform)\$(Configuration)\ $(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath); false @@ -119,7 +115,6 @@ false zstdlib_x64 - $(SolutionDir)$(Platform)\$(Configuration)\ $(Platform)\$(Configuration)\ $(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath); false From fade9b7ae3807d347f4e820760153a887a16c404 Mon Sep 17 00:00:00 2001 From: inikep Date: Fri, 18 Mar 2016 16:10:35 +0100 Subject: [PATCH 105/247] ZSTD_btopt with 4 repcodes --- lib/zstd_compress.c | 17 ++-- lib/zstd_decompress.c | 9 +-- lib/zstd_internal.h | 3 +- lib/zstd_opt.h | 177 +++++++++++++++++++----------------------- 4 files changed, 92 insertions(+), 114 deletions(-) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index f0518b04..397e7618 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -1699,22 +1699,17 @@ _storeSequence: rep[1] = rep[0]; rep[0] = offset - ZSTD_REP_MOVE; #else - if (kSlotNew < 3) - rep[3] = rep[2]; - if (kSlotNew < 2) - rep[2] = rep[1]; - if (kSlotNew < 1) - rep[1] = rep[0]; + if (kSlotNew < 3) rep[3] = rep[2]; + if (kSlotNew < 2) rep[2] = rep[1]; + if (kSlotNew < 1) rep[1] = rep[0]; rep[kSlotNew] = offset - ZSTD_REP_MOVE; #endif } else { if (offset != 0) { size_t temp = rep[offset]; - if (offset != 1) { - if (offset == 3) rep[3] = rep[2]; - rep[2] = rep[1]; - } - rep[1] = rep[0]; + if (offset > 2) rep[3] = rep[2]; + if (offset > 1) rep[2] = rep[1]; + if (offset > 0) rep[1] = rep[0]; rep[0] = temp; } diff --git a/lib/zstd_decompress.c b/lib/zstd_decompress.c index d4974b30..374e6371 100644 --- a/lib/zstd_decompress.c +++ b/lib/zstd_decompress.c @@ -710,12 +710,9 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState, const U32 mls) seqState->prevOffset[1] = seqState->prevOffset[0]; seqState->prevOffset[0] = offset; #else - if (kSlotNew < 3) - seqState->prevOffset[3] = seqState->prevOffset[2]; - if (kSlotNew < 2) - seqState->prevOffset[2] = seqState->prevOffset[1]; - if (kSlotNew < 1) - seqState->prevOffset[1] = seqState->prevOffset[0]; + if (kSlotNew < 3) seqState->prevOffset[3] = seqState->prevOffset[2]; + if (kSlotNew < 2) seqState->prevOffset[2] = seqState->prevOffset[1]; + if (kSlotNew < 1) seqState->prevOffset[1] = seqState->prevOffset[0]; seqState->prevOffset[kSlotNew] = offset; #endif } diff --git a/lib/zstd_internal.h b/lib/zstd_internal.h index 669d4380..c4052ba8 100644 --- a/lib/zstd_internal.h +++ b/lib/zstd_internal.h @@ -183,8 +183,7 @@ typedef struct { U32 off; U32 mlen; U32 litlen; - U32 rep; - U32 rep2; + U32 rep[ZSTD_REP_NUM]; } ZSTD_optimal_t; typedef struct { diff --git a/lib/zstd_opt.h b/lib/zstd_opt.h index 5567b44a..69750aaa 100644 --- a/lib/zstd_opt.h +++ b/lib/zstd_opt.h @@ -259,7 +259,7 @@ static U32 ZSTD_insertBtAndGetAllMatches ( /* save best solution */ if (currentMl > bestLength) { bestLength = currentMl; - matches[mnum].off = current - matchIndex3; + matches[mnum].off = ZSTD_REP_MOVE + current - matchIndex3; matches[mnum].len = (U32)currentMl; mnum++; if (currentMl > ZSTD_OPT_NUM) goto update; @@ -304,7 +304,7 @@ static U32 ZSTD_insertBtAndGetAllMatches ( if (matchLength > bestLength) { if (matchLength > matchEndIdx - matchIndex) matchEndIdx = matchIndex + (U32)matchLength; bestLength = matchLength; - matches[mnum].off = current - matchIndex; + matches[mnum].off = ZSTD_REP_MOVE + current - matchIndex; matches[mnum].len = (U32)matchLength; mnum++; if (matchLength > ZSTD_OPT_NUM) break; @@ -444,21 +444,22 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, opt[0].litlen = (U32)(ip - litstart); /* check repCode */ - if (MEM_readMINMATCH(ip+1, minMatch) == MEM_readMINMATCH(ip+1 - rep[0], minMatch)) { + for (int i=0; i sufficient_len || mlen >= ZSTD_OPT_NUM) { - ip+=1; best_mlen = mlen; best_off = 0; cur = 0; last_pos = 1; + best_mlen = mlen; best_off = i; cur = 0; last_pos = 1; goto _storeSequence; } - litlen = opt[0].litlen + 1; + litlen = opt[0].litlen; do { - price = ZSTD_getPrice(seqStorePtr, litlen, litstart, 0, mlen - minMatch); - if (mlen + 1 > last_pos || price < opt[mlen + 1].price) - SET_PRICE(mlen + 1, mlen, 0, litlen, price); /* note : macro modifies last_pos */ + price = ZSTD_getPrice(seqStorePtr, litlen, litstart, i, mlen - minMatch); + if (mlen > last_pos || price < opt[mlen].price) + SET_PRICE(mlen, mlen, i, litlen, price); /* note : macro modifies last_pos */ mlen--; } while (mlen >= minMatch); } @@ -468,8 +469,10 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, ZSTD_LOG_PARSER("%d: match_num=%d last_pos=%d\n", (int)(ip-base), match_num, last_pos); if (!last_pos && !match_num) { ip++; continue; } - opt[0].rep = rep[0]; - opt[0].rep2 = rep[1]; + opt[0].rep[0] = rep[0]; + opt[0].rep[1] = rep[1]; + opt[0].rep[2] = rep[2]; + opt[0].rep[3] = rep[3]; opt[0].mlen = 1; if (match_num && matches[match_num-1].len > sufficient_len) { @@ -499,7 +502,6 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, /* check further positions */ for (cur = 1; cur <= last_pos; cur++) { - size_t cur_rep; inr = ip + cur; if (opt[cur-1].mlen == 1) { @@ -523,40 +525,32 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, mlen = opt[cur].mlen; - if (opt[cur].off) { - opt[cur].rep2 = opt[cur-mlen].rep; - opt[cur].rep = opt[cur].off; - ZSTD_LOG_ENCODE("%d: COPYREP_OFF cur=%d mlen=%d rep=%d rep2=%d\n", (int)(inr-base), cur, mlen, opt[cur].rep, opt[cur].rep2); + if (opt[cur].off >= ZSTD_REP_NUM) { + opt[cur].rep[3] = (kSlotNew < 3) ? opt[cur-mlen].rep[2] : opt[cur-mlen].rep[3]; + opt[cur].rep[2] = (kSlotNew < 2) ? opt[cur-mlen].rep[1] : opt[cur-mlen].rep[2]; + opt[cur].rep[1] = (kSlotNew < 1) ? opt[cur-mlen].rep[0] : opt[cur-mlen].rep[1]; + opt[cur].rep[kSlotNew] = opt[cur].off - ZSTD_REP_MOVE; + ZSTD_LOG_ENCODE("%d: COPYREP_OFF cur=%d mlen=%d rep=%d rep[1]=%d\n", (int)(inr-base), cur, mlen, opt[cur].rep[0], opt[cur].rep[1]); } else { - if (cur!=mlen && opt[cur].litlen == 0) { - opt[cur].rep2 = opt[cur-mlen].rep; - opt[cur].rep = opt[cur-mlen].rep2; - ZSTD_LOG_ENCODE("%d: COPYREP_SWI cur=%d mlen=%d rep=%d rep2=%d\n", (int)(inr-base), cur, mlen, opt[cur].rep, opt[cur].rep2); - } else { - opt[cur].rep2 = opt[cur-mlen].rep2; - opt[cur].rep = opt[cur-mlen].rep; - ZSTD_LOG_ENCODE("%d: COPYREP_NOR cur=%d mlen=%d rep=%d rep2=%d\n", (int)(inr-base), cur, mlen, opt[cur].rep, opt[cur].rep2); - } } + opt[cur].rep[3] = (opt[cur].off > 2) ? opt[cur-mlen].rep[2] : opt[cur-mlen].rep[3]; + opt[cur].rep[2] = (opt[cur].off > 1) ? opt[cur-mlen].rep[1] : opt[cur-mlen].rep[2]; + opt[cur].rep[1] = (opt[cur].off > 0) ? opt[cur-mlen].rep[0] : opt[cur-mlen].rep[1]; + opt[cur].rep[0] = opt[cur-mlen].rep[opt[cur].off]; + ZSTD_LOG_ENCODE("%d: COPYREP_NOR cur=%d mlen=%d rep=%d rep[1]=%d\n", (int)(inr-base), cur, mlen, opt[cur].rep[0], opt[cur].rep[1]); + } - ZSTD_LOG_PARSER("%d: CURRENT_NoExt price[%d/%d]=%d off=%d mlen=%d litlen=%d rep=%d rep2=%d\n", (int)(inr-base), cur, last_pos, opt[cur].price, opt[cur].off, opt[cur].mlen, opt[cur].litlen, opt[cur].rep, opt[cur].rep2); + ZSTD_LOG_PARSER("%d: CURRENT_NoExt price[%d/%d]=%d off=%d mlen=%d litlen=%d rep=%d rep[1]=%d\n", (int)(inr-base), cur, last_pos, opt[cur].price, opt[cur].off, opt[cur].mlen, opt[cur].litlen, opt[cur].rep[0], opt[cur].rep[1]); best_mlen = 0; - if (opt[cur].mlen != 1) { - cur_rep = opt[cur].rep2; - ZSTD_LOG_PARSER("%d: tryNoExt REP2 rep2=%u mlen=%u\n", (int)(inr-base), (U32)cur_rep, mlen); - } else { - cur_rep = opt[cur].rep; - ZSTD_LOG_PARSER("%d: tryNoExt REP1 rep=%u mlen=%u\n", (int)(inr-base), (U32)cur_rep, mlen); - } - - if (MEM_readMINMATCH(inr, minMatch) == MEM_readMINMATCH(inr - cur_rep, minMatch)) { // check rep - mlen = (U32)ZSTD_count(inr+minMatch, inr+minMatch - cur_rep, iend) + minMatch; - ZSTD_LOG_PARSER("%d: Found REP mlen=%d off=%d rep=%d opt[%d].off=%d\n", (int)(inr-base), mlen, 0, opt[cur].rep, cur, opt[cur].off); + for (int i=0; i sufficient_len || cur + mlen >= ZSTD_OPT_NUM) { best_mlen = mlen; - best_off = 0; + best_off = i; ZSTD_LOG_PARSER("%d: REP sufficient_len=%d best_mlen=%d best_off=%d last_pos=%d\n", (int)(inr-base), sufficient_len, best_mlen, best_off, last_pos); last_pos = cur + 1; goto _storeSequence; @@ -565,20 +559,20 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, if (opt[cur].mlen == 1) { litlen = opt[cur].litlen; if (cur > litlen) { - price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr-litlen, 0, mlen - minMatch); + price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr-litlen, i, mlen - minMatch); } else - price = ZSTD_getPrice(seqStorePtr, litlen, litstart, 0, mlen - minMatch); + price = ZSTD_getPrice(seqStorePtr, litlen, litstart, i, mlen - minMatch); } else { litlen = 0; - price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, 0, mlen - minMatch); + price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, i, mlen - minMatch); } best_mlen = mlen; - ZSTD_LOG_PARSER("%d: Found REP mlen=%d off=%d price=%d litlen=%d\n", (int)(inr-base), mlen, 0, price, litlen); + ZSTD_LOG_PARSER("%d: Found REP mlen=%d off=%d price=%d litlen=%d\n", (int)(inr-base), mlen, i, price, litlen); do { if (cur + mlen > last_pos || price <= opt[cur + mlen].price) - SET_PRICE(cur + mlen, mlen, 0, litlen, price); + SET_PRICE(cur + mlen, mlen, i, litlen, price); mlen--; } while (mlen >= minMatch); } @@ -629,7 +623,7 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, /* store sequence */ _storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */ for (u = 1; u <= last_pos; u++) - ZSTD_LOG_PARSER("%d: price[%d/%d]=%d off=%d mlen=%d litlen=%d rep=%d rep2=%d\n", (int)(ip-base+u), u, last_pos, opt[u].price, opt[u].off, opt[u].mlen, opt[u].litlen, opt[u].rep, opt[u].rep2); + ZSTD_LOG_PARSER("%d: price[%d/%d]=%d off=%d mlen=%d litlen=%d rep=%d rep[1]=%d\n", (int)(ip-base+u), u, last_pos, opt[u].price, opt[u].off, opt[u].mlen, opt[u].litlen, opt[u].rep, opt[u].rep[1]); ZSTD_LOG_PARSER("%d: cur=%d/%d best_mlen=%d best_off=%d rep=%d\n", (int)(ip-base+cur), (int)cur, (int)last_pos, (int)best_mlen, (int)best_off, opt[cur].rep); opt[0].mlen = 1; @@ -647,31 +641,40 @@ _storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */ } for (u = 0; u <= last_pos;) { - ZSTD_LOG_PARSER("%d: price2[%d/%d]=%d off=%d mlen=%d litlen=%d rep=%d rep2=%d\n", (int)(ip-base+u), u, last_pos, opt[u].price, opt[u].off, opt[u].mlen, opt[u].litlen, opt[u].rep, opt[u].rep2); + ZSTD_LOG_PARSER("%d: price2[%d/%d]=%d off=%d mlen=%d litlen=%d rep=%d rep[1]=%d\n", (int)(ip-base+u), u, last_pos, opt[u].price, opt[u].off, opt[u].mlen, opt[u].litlen, opt[u].rep, opt[u].rep[1]); u += opt[u].mlen; } for (cur=0; cur < last_pos; ) { - ZSTD_LOG_PARSER("%d: price3[%d/%d]=%d off=%d mlen=%d litlen=%d rep=%d rep2=%d\n", (int)(ip-base+cur), cur, last_pos, opt[cur].price, opt[cur].off, opt[cur].mlen, opt[cur].litlen, opt[cur].rep, opt[cur].rep2); + ZSTD_LOG_PARSER("%d: price3[%d/%d]=%d off=%d mlen=%d litlen=%d rep=%d rep[1]=%d\n", (int)(ip-base+cur), cur, last_pos, opt[cur].price, opt[cur].off, opt[cur].mlen, opt[cur].litlen, opt[cur].rep, opt[cur].rep[1]); mlen = opt[cur].mlen; if (mlen == 1) { ip++; cur++; continue; } offset = opt[cur].off; cur += mlen; U32 litLength = (U32)(ip - anchor); - ZSTD_LOG_ENCODE("%d/%d: ENCODE literals=%d mlen=%d off=%d rep1=%d rep2=%d\n", (int)(ip-base), (int)(iend-base), (int)(litLength), (int)mlen, (int)(offset), (int)rep[0], (int)rep[1]); + ZSTD_LOG_ENCODE("%d/%d: ENCODE literals=%d mlen=%d off=%d rep1=%d rep[1]=%d\n", (int)(ip-base), (int)(iend-base), (int)(litLength), (int)mlen, (int)(offset), (int)rep[0], (int)rep[1]); - if (offset) { - rep[1] = rep[0]; - rep[0] = offset; + if (offset >= ZSTD_REP_NUM) { + if (kSlotNew < 3) rep[3] = rep[2]; + if (kSlotNew < 2) rep[2] = rep[1]; + if (kSlotNew < 1) rep[1] = rep[0]; + rep[kSlotNew] = offset - ZSTD_REP_MOVE; } else { - if (litLength == 0) { - best_off = rep[1]; + if (offset != 0) { + size_t temp = rep[offset]; + if (offset != 1) { + if (offset == 3) rep[3] = rep[2]; + rep[2] = rep[1]; + } rep[1] = rep[0]; - rep[0] = best_off; - } } + rep[0] = temp; + } - // ZSTD_LOG_ENCODE("%d/%d: ENCODE2 literals=%d mlen=%d off=%d rep1=%d rep2=%d\n", (int)(ip-base), (int)(iend-base), (int)(litLength), (int)mlen, (int)(offset), (int)rep[1], (int)rep_2); + if (offset<=1 && litLength==0) offset = 1-offset; + } + + // ZSTD_LOG_ENCODE("%d/%d: ENCODE2 literals=%d mlen=%d off=%d rep1=%d rep[1]=%d\n", (int)(ip-base), (int)(iend-base), (int)(litLength), (int)mlen, (int)(offset), (int)rep[1], (int)rep_2); #if ZSTD_OPT_DEBUG >= 5 U32 ml2; @@ -688,25 +691,9 @@ _storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */ #endif ZSTD_updatePrice(seqStorePtr, litLength, anchor, offset, mlen-minMatch); - ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset ? offset + ZSTD_REP_MOVE : 0, mlen-minMatch); + ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, mlen-minMatch); anchor = ip = ip + mlen; } /* for (cur=0; cur < last_pos; ) */ - - /* check immediate repcode */ - while ((anchor >= prefixStart + rep[1]) && (anchor <= ilimit) - && (MEM_readMINMATCH(anchor, minMatch) == MEM_readMINMATCH(anchor - rep[1], minMatch)) ) { - /* store sequence */ - best_mlen = (U32)ZSTD_count(anchor+minMatch, anchor+minMatch-rep[1], iend); - best_off = rep[1]; - rep[1] = rep[0]; - rep[0] = best_off; - ZSTD_LOG_ENCODE("%d/%d: ENCODE REP literals=%d mlen=%d off=%d rep1=%d rep2=%d\n", (int)(anchor-base), (int)(iend-base), (int)(0), (int)best_mlen, (int)(0), (int)rep[0], (int)rep[1]); - ZSTD_updatePrice(seqStorePtr, 0, anchor, 0, best_mlen); - ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, best_mlen); - anchor += best_mlen+minMatch; - continue; /* faster when present ... (?) */ - } - if (anchor > ip) ip = anchor; } { /* Last Literals */ @@ -803,8 +790,8 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, ZSTD_LOG_PARSER("%d: match_num=%d last_pos=%d\n", (int)(ip-base), match_num, last_pos); if (!last_pos && !match_num) { ip++; continue; } - opt[0].rep = rep[0]; - opt[0].rep2 = rep[1]; + opt[0].rep[0] = rep[0]; + opt[0].rep[1] = rep[1]; opt[0].mlen = 1; if (match_num && matches[match_num-1].len > sufficient_len) { @@ -860,29 +847,29 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, mlen = opt[cur].mlen; if (opt[cur].off) { - opt[cur].rep2 = opt[cur-mlen].rep; - opt[cur].rep = opt[cur].off; - ZSTD_LOG_ENCODE("%d: COPYREP_OFF cur=%d mlen=%d rep=%d rep2=%d\n", (int)(inr-base), cur, mlen, opt[cur].rep, opt[cur].rep2); + opt[cur].rep[1] = opt[cur-mlen].rep[0]; + opt[cur].rep[0]= opt[cur].off; + ZSTD_LOG_ENCODE("%d: COPYREP_OFF cur=%d mlen=%d rep=%d rep[1]=%d\n", (int)(inr-base), cur, mlen, opt[cur].rep[0], opt[cur].rep[1]); } else { if (cur!=mlen && opt[cur].litlen == 0) { - opt[cur].rep2 = opt[cur-mlen].rep; - opt[cur].rep = opt[cur-mlen].rep2; - ZSTD_LOG_ENCODE("%d: COPYREP_SWI cur=%d mlen=%d rep=%d rep2=%d\n", (int)(inr-base), cur, mlen, opt[cur].rep, opt[cur].rep2); + opt[cur].rep[1] = opt[cur-mlen].rep[0]; + opt[cur].rep[0] = opt[cur-mlen].rep[1]; + ZSTD_LOG_ENCODE("%d: COPYREP_SWI cur=%d mlen=%d rep=%d rep[1]=%d\n", (int)(inr-base), cur, mlen, opt[cur].rep[0], opt[cur].rep[1]); } else { - opt[cur].rep2 = opt[cur-mlen].rep2; - opt[cur].rep = opt[cur-mlen].rep; - ZSTD_LOG_ENCODE("%d: COPYREP_NOR cur=%d mlen=%d rep=%d rep2=%d\n", (int)(inr-base), cur, mlen, opt[cur].rep, opt[cur].rep2); + opt[cur].rep[1] = opt[cur-mlen].rep[1]; + opt[cur].rep[0] = opt[cur-mlen].rep[0]; + ZSTD_LOG_ENCODE("%d: COPYREP_NOR cur=%d mlen=%d rep=%d rep[1]=%d\n", (int)(inr-base), cur, mlen, opt[cur].rep[0], opt[cur].rep[1]); } } - ZSTD_LOG_PARSER("%d: CURRENT_Ext price[%d/%d]=%d off=%d mlen=%d litlen=%d rep=%d rep2=%d\n", (int)(inr-base), cur, last_pos, opt[cur].price, opt[cur].off, opt[cur].mlen, opt[cur].litlen, opt[cur].rep, opt[cur].rep2); + ZSTD_LOG_PARSER("%d: CURRENT_Ext price[%d/%d]=%d off=%d mlen=%d litlen=%d rep=%d rep[1]=%d\n", (int)(inr-base), cur, last_pos, opt[cur].price, opt[cur].off, opt[cur].mlen, opt[cur].litlen, opt[cur].rep[0], opt[cur].rep[1]); best_mlen = 0; if (opt[cur].mlen != 1) { - cur_rep = opt[cur].rep2; - ZSTD_LOG_PARSER("%d: tryExt REP2 rep2=%u mlen=%u\n", (int)(inr-base), (U32)cur_rep, mlen); + cur_rep = opt[cur].rep[1]; + ZSTD_LOG_PARSER("%d: tryExt REP2 rep[1]=%u mlen=%u\n", (int)(inr-base), (U32)cur_rep, mlen); } else { - cur_rep = opt[cur].rep; + cur_rep = opt[cur].rep[0]; ZSTD_LOG_PARSER("%d: tryExt REP1 rep=%u mlen=%u\n", (int)(inr-base), (U32)cur_rep, mlen); } @@ -972,7 +959,7 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, /* store sequence */ _storeSequence: // cur, last_pos, best_mlen, best_off have to be set for (u = 1; u <= last_pos; u++) - ZSTD_LOG_PARSER("%d: price[%u/%d]=%d off=%d mlen=%d litlen=%d rep=%d rep2=%d\n", (int)(ip-base+u), u, last_pos, opt[u].price, opt[u].off, opt[u].mlen, opt[u].litlen, opt[u].rep, opt[u].rep2); + ZSTD_LOG_PARSER("%d: price[%u/%d]=%d off=%d mlen=%d litlen=%d rep=%d rep[1]=%d\n", (int)(ip-base+u), u, last_pos, opt[u].price, opt[u].off, opt[u].mlen, opt[u].litlen, opt[u].rep, opt[u].rep[1]); ZSTD_LOG_PARSER("%d: cur=%d/%d best_mlen=%d best_off=%d rep=%d\n", (int)(ip-base+cur), (int)cur, (int)last_pos, (int)best_mlen, (int)best_off, opt[cur].rep); opt[0].mlen = 1; @@ -989,20 +976,20 @@ _storeSequence: // cur, last_pos, best_mlen, best_off have to be set } for (u = 0; u <= last_pos; ) { - ZSTD_LOG_PARSER("%d: price2[%d/%d]=%d off=%d mlen=%d litlen=%d rep=%d rep2=%d\n", (int)(ip-base+u), u, last_pos, opt[u].price, opt[u].off, opt[u].mlen, opt[u].litlen, opt[u].rep, opt[u].rep2); + ZSTD_LOG_PARSER("%d: price2[%d/%d]=%d off=%d mlen=%d litlen=%d rep=%d rep[1]=%d\n", (int)(ip-base+u), u, last_pos, opt[u].price, opt[u].off, opt[u].mlen, opt[u].litlen, opt[u].rep, opt[u].rep[1]); u += opt[u].mlen; } for (cur=0; cur < last_pos; ) { U32 litLength; - ZSTD_LOG_PARSER("%d: price3[%d/%d]=%d off=%d mlen=%d litlen=%d rep=%d rep2=%d\n", (int)(ip-base+cur), cur, last_pos, opt[cur].price, opt[cur].off, opt[cur].mlen, opt[cur].litlen, opt[cur].rep, opt[cur].rep2); + ZSTD_LOG_PARSER("%d: price3[%d/%d]=%d off=%d mlen=%d litlen=%d rep=%d rep[1]=%d\n", (int)(ip-base+cur), cur, last_pos, opt[cur].price, opt[cur].off, opt[cur].mlen, opt[cur].litlen, opt[cur].rep, opt[cur].rep[1]); mlen = opt[cur].mlen; if (mlen == 1) { ip++; cur++; continue; } offset = opt[cur].off; cur += mlen; litLength = (U32)(ip - anchor); - ZSTD_LOG_ENCODE("%d/%d: ENCODE1 literals=%d mlen=%d off=%d rep1=%d rep2=%d\n", (int)(ip-base), (int)(iend-base), (int)(litLength), (int)mlen, (int)(offset), (int)rep[0], (int)rep[1]); + ZSTD_LOG_ENCODE("%d/%d: ENCODE1 literals=%d mlen=%d off=%d rep1=%d rep[1]=%d\n", (int)(ip-base), (int)(iend-base), (int)(litLength), (int)mlen, (int)(offset), (int)rep[0], (int)rep[1]); if (offset) { rep[1] = rep[0]; @@ -1014,7 +1001,7 @@ _storeSequence: // cur, last_pos, best_mlen, best_off have to be set rep[0] = best_off; } } - ZSTD_LOG_ENCODE("%d/%d: ENCODE2 literals=%d mlen=%d off=%d rep1=%d rep2=%d\n", (int)(ip-base), (int)(iend-base), (int)(litLength), (int)mlen, (int)(offset), (int)rep[0], (int)rep[1]); + ZSTD_LOG_ENCODE("%d/%d: ENCODE2 literals=%d mlen=%d off=%d rep1=%d rep[1]=%d\n", (int)(ip-base), (int)(iend-base), (int)(litLength), (int)mlen, (int)(offset), (int)rep[0], (int)rep[1]); #if ZSTD_OPT_DEBUG >= 5 U32 ml2; @@ -1057,7 +1044,7 @@ _storeSequence: // cur, last_pos, best_mlen, best_off have to be set } offset = rep[1]; rep[1] = rep[0]; rep[0] = offset; /* swap offset history */ - ZSTD_LOG_ENCODE("%d/%d: ENCODE REP literals=%d mlen=%d off=%d rep1=%d rep2=%d\n", (int)(anchor-base), (int)(iend-base), (int)(0), (int)best_mlen, (int)(0), (int)rep[0], (int)rep[1]); + ZSTD_LOG_ENCODE("%d/%d: ENCODE REP literals=%d mlen=%d off=%d rep1=%d rep[1]=%d\n", (int)(anchor-base), (int)(iend-base), (int)(0), (int)best_mlen, (int)(0), (int)rep[0], (int)rep[1]); ZSTD_updatePrice(seqStorePtr, 0, anchor, 0, mlen-minMatch); ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, mlen-minMatch); anchor += mlen; @@ -1075,7 +1062,7 @@ _storeSequence: // cur, last_pos, best_mlen, best_off have to be set const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; mlen = (U32)ZSTD_count_2segments(anchor+minMatch, repMatch+minMatch, iend, repEnd, prefixStart) + minMatch; offset = rep[1]; rep[1] = rep[0]; rep[0] = offset; /* swap offset history */ - ZSTD_LOG_ENCODE("%d/%d: ENCODE REP literals=%d mlen=%d off=%d rep1=%d rep2=%d\n", (int)(anchor-base), (int)(iend-base), (int)(0), (int)best_mlen, (int)(0), (int)rep[0], (int)rep[1]); + ZSTD_LOG_ENCODE("%d/%d: ENCODE REP literals=%d mlen=%d off=%d rep1=%d rep[1]=%d\n", (int)(anchor-base), (int)(iend-base), (int)(0), (int)best_mlen, (int)(0), (int)rep[0], (int)rep[1]); ZSTD_updatePrice(seqStorePtr, 0, anchor, 0, mlen-minMatch); ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, mlen-minMatch); anchor += mlen; From 39c596ca5803bc9407f7686d634db53853593efb Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Fri, 18 Mar 2016 21:40:56 +0100 Subject: [PATCH 106/247] minor simplifications --- lib/zstd_decompress.c | 62 +++++++++++++++++++------------------------ 1 file changed, 27 insertions(+), 35 deletions(-) diff --git a/lib/zstd_decompress.c b/lib/zstd_decompress.c index da9665cd..7a38bf9b 100644 --- a/lib/zstd_decompress.c +++ b/lib/zstd_decompress.c @@ -516,7 +516,6 @@ size_t ZSTD_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLen const BYTE* ip = istart; const BYTE* const iend = istart + srcSize; U32 LLtype, Offtype, MLtype; - U32 LLlog, Offlog, MLlog; size_t dumpsLength; /* check */ @@ -553,82 +552,75 @@ size_t ZSTD_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLen /* check */ if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */ - /* sequences */ + /* Build DTables */ { S16 norm[MaxML+1]; /* assumption : MaxML >= MaxLL >= MaxOff */ - size_t headerSize; - /* Build DTables */ switch(LLtype) { - U32 max; case FSE_ENCODING_RLE : - LLlog = 0; FSE_buildDTable_rle(DTableLL, *ip++); break; case FSE_ENCODING_RAW : - LLlog = LLbits; FSE_buildDTable_raw(DTableLL, LLbits); break; case FSE_ENCODING_STATIC: break; - case FSE_ENCODING_DYNAMIC : default : /* impossible */ - max = MaxLL; - headerSize = FSE_readNCount(norm, &max, &LLlog, ip, iend-ip); - if (FSE_isError(headerSize)) return ERROR(GENERIC); - if (LLlog > LLFSELog) return ERROR(corruption_detected); - ip += headerSize; - FSE_buildDTable(DTableLL, norm, max, LLlog); + case FSE_ENCODING_DYNAMIC : + { U32 LLlog, max = MaxLL; + size_t const headerSize = FSE_readNCount(norm, &max, &LLlog, ip, iend-ip); + if (FSE_isError(headerSize)) return ERROR(GENERIC); + if (LLlog > LLFSELog) return ERROR(corruption_detected); + ip += headerSize; + FSE_buildDTable(DTableLL, norm, max, LLlog); + } } switch(Offtype) { - U32 max; case FSE_ENCODING_RLE : - Offlog = 0; if (ip > iend-2) return ERROR(srcSize_wrong); /* min : "raw", hence no header, but at least xxLog bits */ FSE_buildDTable_rle(DTableOffb, *ip++ & MaxOff); /* if *ip > MaxOff, data is corrupted */ break; case FSE_ENCODING_RAW : - Offlog = Offbits; FSE_buildDTable_raw(DTableOffb, Offbits); break; case FSE_ENCODING_STATIC: break; - case FSE_ENCODING_DYNAMIC : default : /* impossible */ - max = MaxOff; - headerSize = FSE_readNCount(norm, &max, &Offlog, ip, iend-ip); - if (FSE_isError(headerSize)) return ERROR(GENERIC); - if (Offlog > OffFSELog) return ERROR(corruption_detected); - ip += headerSize; - FSE_buildDTable(DTableOffb, norm, max, Offlog); + case FSE_ENCODING_DYNAMIC : + { + U32 Offlog, max = MaxOff; + size_t const headerSize = FSE_readNCount(norm, &max, &Offlog, ip, iend-ip); + if (FSE_isError(headerSize)) return ERROR(GENERIC); + if (Offlog > OffFSELog) return ERROR(corruption_detected); + ip += headerSize; + FSE_buildDTable(DTableOffb, norm, max, Offlog); + } } switch(MLtype) { - U32 max; case FSE_ENCODING_RLE : - MLlog = 0; if (ip > iend-2) return ERROR(srcSize_wrong); /* min : "raw", hence no header, but at least xxLog bits */ FSE_buildDTable_rle(DTableML, *ip++); break; case FSE_ENCODING_RAW : - MLlog = MLbits; FSE_buildDTable_raw(DTableML, MLbits); break; case FSE_ENCODING_STATIC: break; - case FSE_ENCODING_DYNAMIC : default : /* impossible */ - max = MaxML; - headerSize = FSE_readNCount(norm, &max, &MLlog, ip, iend-ip); - if (FSE_isError(headerSize)) return ERROR(GENERIC); - if (MLlog > MLFSELog) return ERROR(corruption_detected); - ip += headerSize; - FSE_buildDTable(DTableML, norm, max, MLlog); - } } + case FSE_ENCODING_DYNAMIC : + { U32 MLlog, max = MaxML; + size_t const headerSize = FSE_readNCount(norm, &max, &MLlog, ip, iend-ip); + if (FSE_isError(headerSize)) return ERROR(GENERIC); + if (MLlog > MLFSELog) return ERROR(corruption_detected); + ip += headerSize; + FSE_buildDTable(DTableML, norm, max, MLlog); + } + } } /* Build DTables */ return ip-istart; } From 4db09efde416632cb6b18025e9c976db7ba70f97 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Fri, 18 Mar 2016 22:23:49 +0100 Subject: [PATCH 107/247] simplifications --- lib/zstd_decompress.c | 141 ++++++++++++++++++------------------------ lib/zstd_internal.h | 6 +- 2 files changed, 62 insertions(+), 85 deletions(-) diff --git a/lib/zstd_decompress.c b/lib/zstd_decompress.c index 7a38bf9b..d8d7837a 100644 --- a/lib/zstd_decompress.c +++ b/lib/zstd_decompress.c @@ -508,6 +508,39 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, } +/*! ZSTD_buildSeqTable() : + @return : nb bytes read from src, + or an error code if it fails, testable with ZSTD_isError() +*/ +static size_t ZSTD_buildSeqTable(FSE_DTable* DTable, U32 type, U32 rawBits, U32 maxLog, + const void* src, size_t srcSize) +{ + switch(type) + { + case FSE_ENCODING_RLE : + if (!srcSize) return ERROR(srcSize_wrong); + FSE_buildDTable_rle(DTable, *(const BYTE*)src); + return 1; + case FSE_ENCODING_RAW : + FSE_buildDTable_raw(DTable, rawBits); + return 0; + case FSE_ENCODING_STATIC: + return 0; + default : /* impossible */ + case FSE_ENCODING_DYNAMIC : + { U32 tableLog, max = (1< maxLog) return ERROR(corruption_detected); + FSE_buildDTable(DTable, norm, max, tableLog); + return headerSize; + } } +} + + + + size_t ZSTD_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLengthPtr, FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb, const void* src, size_t srcSize) @@ -516,11 +549,9 @@ size_t ZSTD_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLen const BYTE* ip = istart; const BYTE* const iend = istart + srcSize; U32 LLtype, Offtype, MLtype; - size_t dumpsLength; /* check */ - if (srcSize < MIN_SEQUENCES_SIZE) - return ERROR(srcSize_wrong); + if (srcSize < MIN_SEQUENCES_SIZE) return ERROR(srcSize_wrong); /* SeqHead */ *nbSeq = *ip++; @@ -536,91 +567,37 @@ size_t ZSTD_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLen LLtype = *ip >> 6; Offtype = (*ip >> 4) & 3; MLtype = (*ip >> 2) & 3; - if (*ip & 2) { - dumpsLength = ip[2]; - dumpsLength += ip[1] << 8; - ip += 3; - } else { - dumpsLength = ip[1]; - dumpsLength += (ip[0] & 1) << 8; - ip += 2; + { size_t dumpsLength; + if (*ip & 2) { + dumpsLength = ip[2]; + dumpsLength += ip[1] << 8; + ip += 3; + } else { + dumpsLength = ip[1]; + dumpsLength += (ip[0] & 1) << 8; + ip += 2; + } + *dumpsPtr = ip; + ip += dumpsLength; + *dumpsLengthPtr = dumpsLength; } - *dumpsPtr = ip; - ip += dumpsLength; - *dumpsLengthPtr = dumpsLength; /* check */ if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */ /* Build DTables */ - { - S16 norm[MaxML+1]; /* assumption : MaxML >= MaxLL >= MaxOff */ - - switch(LLtype) - { - case FSE_ENCODING_RLE : - FSE_buildDTable_rle(DTableLL, *ip++); - break; - case FSE_ENCODING_RAW : - FSE_buildDTable_raw(DTableLL, LLbits); - break; - case FSE_ENCODING_STATIC: - break; - default : /* impossible */ - case FSE_ENCODING_DYNAMIC : - { U32 LLlog, max = MaxLL; - size_t const headerSize = FSE_readNCount(norm, &max, &LLlog, ip, iend-ip); - if (FSE_isError(headerSize)) return ERROR(GENERIC); - if (LLlog > LLFSELog) return ERROR(corruption_detected); - ip += headerSize; - FSE_buildDTable(DTableLL, norm, max, LLlog); - } - } - - switch(Offtype) - { - case FSE_ENCODING_RLE : - if (ip > iend-2) return ERROR(srcSize_wrong); /* min : "raw", hence no header, but at least xxLog bits */ - FSE_buildDTable_rle(DTableOffb, *ip++ & MaxOff); /* if *ip > MaxOff, data is corrupted */ - break; - case FSE_ENCODING_RAW : - FSE_buildDTable_raw(DTableOffb, Offbits); - break; - case FSE_ENCODING_STATIC: - break; - default : /* impossible */ - case FSE_ENCODING_DYNAMIC : - { - U32 Offlog, max = MaxOff; - size_t const headerSize = FSE_readNCount(norm, &max, &Offlog, ip, iend-ip); - if (FSE_isError(headerSize)) return ERROR(GENERIC); - if (Offlog > OffFSELog) return ERROR(corruption_detected); - ip += headerSize; - FSE_buildDTable(DTableOffb, norm, max, Offlog); - } - } - - switch(MLtype) - { - case FSE_ENCODING_RLE : - if (ip > iend-2) return ERROR(srcSize_wrong); /* min : "raw", hence no header, but at least xxLog bits */ - FSE_buildDTable_rle(DTableML, *ip++); - break; - case FSE_ENCODING_RAW : - FSE_buildDTable_raw(DTableML, MLbits); - break; - case FSE_ENCODING_STATIC: - break; - default : /* impossible */ - case FSE_ENCODING_DYNAMIC : - { U32 MLlog, max = MaxML; - size_t const headerSize = FSE_readNCount(norm, &max, &MLlog, ip, iend-ip); - if (FSE_isError(headerSize)) return ERROR(GENERIC); - if (MLlog > MLFSELog) return ERROR(corruption_detected); - ip += headerSize; - FSE_buildDTable(DTableML, norm, max, MLlog); - } - } } /* Build DTables */ + { size_t const bhSize = ZSTD_buildSeqTable(DTableLL, LLtype, LLbits, LLFSELog, ip, iend-ip); + if (ZSTD_isError(bhSize)) return ERROR(corruption_detected); + ip += bhSize; + } + { size_t const bhSize = ZSTD_buildSeqTable(DTableOffb, Offtype, Offbits, OffFSELog, ip, iend-ip); + if (ZSTD_isError(bhSize)) return ERROR(corruption_detected); + ip += bhSize; + } + { size_t const bhSize = ZSTD_buildSeqTable(DTableML, MLtype, MLbits, MLFSELog, ip, iend-ip); + if (ZSTD_isError(bhSize)) return ERROR(corruption_detected); + ip += bhSize; + } return ip-istart; } diff --git a/lib/zstd_internal.h b/lib/zstd_internal.h index 642c3777..c5cc64cd 100644 --- a/lib/zstd_internal.h +++ b/lib/zstd_internal.h @@ -90,6 +90,8 @@ static const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE; #define REPCODE_STARTVALUE 1 #define ZSTD_WINDOWLOG_ABSOLUTEMIN 12 +#define LONGNBSEQ 0x7F00 + #define Litbits 8 #define MLbits 7 #define LLbits 6 @@ -101,9 +103,7 @@ static const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE; #define MLFSELog 10 #define LLFSELog 10 #define OffFSELog 9 -#define MaxSeq MAX(MaxLL, MaxML) - -#define LONGNBSEQ 0x7F00 +#define MaxSeq MAX(MaxLL, MaxML) /* Assumption : MaxOff < MaxLL,MaxML */ #define FSE_ENCODING_RAW 0 #define FSE_ENCODING_RLE 1 From f3120413cce912886d808726b481a06462e6a8db Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sat, 19 Mar 2016 11:40:19 +0100 Subject: [PATCH 108/247] Fixed : decoder error on erroneous data --- programs/fuzzer.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/programs/fuzzer.c b/programs/fuzzer.c index 55e0c7d7..72d71ea9 100644 --- a/programs/fuzzer.c +++ b/programs/fuzzer.c @@ -596,18 +596,16 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit } /* decompress noisy source */ - { - U32 noiseSrc = FUZ_rand(&lseed) % 5; - const U32 endMark = 0xA9B1C3D6; - U32 endCheck; + { U32 const noiseSrc = FUZ_rand(&lseed) % 5; + U32 const endMark = 0xA9B1C3D6; srcBuffer = cNoiseBuffer[noiseSrc]; memcpy(dstBuffer+sampleSize, &endMark, 4); errorCode = ZSTD_decompress(dstBuffer, sampleSize, cBuffer, cSize); /* result *may* be an unlikely success, but even then, it must strictly respect dest buffer boundaries */ CHECK((!ZSTD_isError(errorCode)) && (errorCode>sampleSize), "ZSTD_decompress on noisy src : result is too large : %u > %u (dst buffer)", (U32)errorCode, (U32)sampleSize); - memcpy(&endCheck, dstBuffer+sampleSize, 4); - CHECK(endMark!=endCheck, "ZSTD_decompress on noisy src : dst buffer overflow"); + { U32 endCheck; memcpy(&endCheck, dstBuffer+sampleSize, 4); + CHECK(endMark!=endCheck, "ZSTD_decompress on noisy src : dst buffer overflow"); } } } From d1d210f3fb3675e0f6b79545f3fef309b02339dc Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sat, 19 Mar 2016 12:12:07 +0100 Subject: [PATCH 109/247] minor code refactor --- lib/bitstream.h | 57 +++++++------- lib/fse.c | 2 +- lib/zstd_decompress.c | 24 +++--- programs/.gitignore | 2 + programs/fuzzer.c | 178 ++++++++++++++++-------------------------- 5 files changed, 109 insertions(+), 154 deletions(-) diff --git a/lib/bitstream.h b/lib/bitstream.h index 006a927f..44c0241f 100644 --- a/lib/bitstream.h +++ b/lib/bitstream.h @@ -56,10 +56,9 @@ extern "C" { /*-****************************************** * bitStream encoding API (write forward) ********************************************/ -/*! -* bitStream can mix input from multiple sources. -* A critical property of these streams is that they encode and decode in **reverse** direction. -* So the first bit sequence you add will be the last to be read, like a LIFO stack. +/* bitStream can mix input from multiple sources. +* A critical property of these streams is that they encode and decode in **reverse** direction. +* So the first bit sequence you add will be the last to be read, like a LIFO stack. */ typedef struct { @@ -75,10 +74,9 @@ MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC); MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC); -/*! -* Start by initCStream, providing the size of buffer to write into. +/*Start by initCStream, providing the size of buffer to write into. * bitStream will never write outside of this buffer. -* @dstCapacity must be >= sizeof(size_t), otherwise @return will be an error code. +* `dstCapacity` must be >= sizeof(size_t), otherwise @return will be an error code. * * bits are first added to a local register. * Local register is size_t, hence 64-bits on 64-bits systems, or 32-bits on 32-bits systems. @@ -90,7 +88,7 @@ MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC); * * Last operation is to close the bitStream. * The function returns the final size of CStream in bytes. -* If data couldn't fit into @dstBuffer, it will return a 0 ( == not storable) +* If data couldn't fit into `dstBuffer`, it will return a 0 ( == not storable) */ @@ -117,8 +115,7 @@ MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD); MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD); -/*! -* Start by invoking BIT_initDStream(). +/*Start by invoking BIT_initDStream(). * A chunk of the bitStream is then stored into a local register. * Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t). * You can then retrieve bitFields stored into the local register, **in reverse order**. @@ -190,7 +187,7 @@ MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits) bitC->bitPos += nbBits; } -/*! BIT_addBitsFast +/*! BIT_addBitsFast() : * works only if `value` is _clean_, meaning all high bits above nbBits are 0 */ MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits) { @@ -198,7 +195,7 @@ MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBi bitC->bitPos += nbBits; } -/*! BIT_flushBitsFast +/*! BIT_flushBitsFast() : * unsafe version; does not check buffer overflow */ MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC) { @@ -219,8 +216,8 @@ MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC) bitC->bitContainer >>= nbBytes*8; /* if bitPos >= sizeof(bitContainer)*8 --> undefined behavior */ } -/*! BIT_closeCStream - * @result : size of CStream, in bytes, or 0 if it cannot fit into dstBuffer */ +/*! BIT_closeCStream() : + * @return : size of CStream, in bytes, or 0 if it cannot fit into dstBuffer */ MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC) { char* endPtr; @@ -241,12 +238,12 @@ MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC) /*-******************************************************** * bitStream decoding **********************************************************/ -/*!BIT_initDStream +/*!BIT_initDStream() : * Initialize a BIT_DStream_t. -* @bitD : a pointer to an already allocated BIT_DStream_t structure -* @srcBuffer must point at the beginning of a bitStream -* @srcSize must be the exact size of the bitStream -* @result : size of stream (== srcSize) or an errorCode if a problem is detected +* `bitD` : a pointer to an already allocated BIT_DStream_t structure. +* `srcBuffer` must point at the beginning of a bitStream. +* `srcSize` must be the exact size of the bitStream. +* @return : size of stream (== srcSize) or an errorCode if a problem is detected */ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize) { @@ -284,24 +281,24 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si return srcSize; } -/*!BIT_lookBits - * Provides next n bits from local register - * local register is not modified (bits are still present for next read/look) - * On 32-bits, maxNbBits==25 - * On 64-bits, maxNbBits==57 +/*!BIT_lookBits() : + * Provides next n bits from local register. + * local register is not modified (bits are still present for next read/look). + * On 32-bits, maxNbBits==24. + * On 64-bits, maxNbBits==56. * @return : value extracted */ MEM_STATIC size_t BIT_lookBits(BIT_DStream_t* bitD, U32 nbBits) { - const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1; + U32 const bitMask = sizeof(bitD->bitContainer)*8 - 1; return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask); } -/*! BIT_lookBitsFast : +/*! BIT_lookBitsFast*() : * unsafe version; only works only if nbBits >= 1 */ MEM_STATIC size_t BIT_lookBitsFast(BIT_DStream_t* bitD, U32 nbBits) { - const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1; + U32 const bitMask = sizeof(bitD->bitContainer)*8 - 1; return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask+1)-nbBits) & bitMask); } @@ -310,7 +307,7 @@ MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits) bitD->bitsConsumed += nbBits; } -/*!BIT_readBits +/*!BIT_readBits() : * Read next n bits from local register. * pay attention to not read more than nbBits contained into local register. * @return : extracted value. @@ -322,7 +319,7 @@ MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, U32 nbBits) return value; } -/*!BIT_readBitsFast : +/*!BIT_readBitsFast() : * unsafe version; only works only if nbBits >= 1 */ MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits) { @@ -360,7 +357,7 @@ MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD) } } -/*! BIT_endOfDStream +/*! BIT_endOfDStream() : * @return Tells if DStream has reached its exact end */ MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream) diff --git a/lib/fse.c b/lib/fse.c index a445f328..291e6419 100644 --- a/lib/fse.c +++ b/lib/fse.c @@ -466,7 +466,7 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t bitStream >>= 2; } { - const short max = (short)((2*threshold-1)-remaining); + short const max = (short)((2*threshold-1)-remaining); short count; if ((bitStream & (threshold-1)) < (U32)max) { diff --git a/lib/zstd_decompress.c b/lib/zstd_decompress.c index d8d7837a..5cf9c17c 100644 --- a/lib/zstd_decompress.c +++ b/lib/zstd_decompress.c @@ -515,11 +515,12 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, static size_t ZSTD_buildSeqTable(FSE_DTable* DTable, U32 type, U32 rawBits, U32 maxLog, const void* src, size_t srcSize) { + U32 max = (1< max, data is corrupted */ return 1; case FSE_ENCODING_RAW : FSE_buildDTable_raw(DTable, rawBits); @@ -528,7 +529,7 @@ static size_t ZSTD_buildSeqTable(FSE_DTable* DTable, U32 type, U32 rawBits, U32 return 0; default : /* impossible */ case FSE_ENCODING_DYNAMIC : - { U32 tableLog, max = (1<stateOffb)); /* <= maxOff, by table construction */ - const U32 nbBits = offsetCode ? offsetCode-1 : 0; + { static const U32 offsetPrefix[MaxOff+1] = { + 1 /*fake*/, 1, 2, 4, 8, 0x10, 0x20, 0x40, + 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, 0x2000, 0x4000, + 0x8000, 0x10000, 0x20000, 0x40000, 0x80000, 0x100000, 0x200000, 0x400000, + 0x800000, 0x1000000, 0x2000000, 0x4000000, /*fake*/ 1, 1, 1, 1 }; + U32 const offsetCode = FSE_peakSymbol(&(seqState->stateOffb)); /* <= maxOff, by table construction */ + U32 const nbBits = offsetCode ? offsetCode-1 : 0; offset = offsetPrefix[offsetCode] + BIT_readBits(&(seqState->DStream), nbBits); if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream)); if (offsetCode==0) offset = litLength ? seq->offset : seqState->prevOffset; @@ -727,8 +726,7 @@ FORCE_INLINE size_t ZSTD_execSequence(BYTE* op, return sequenceLength; } /* span extDict & currentPrefixSegment */ - { - size_t length1 = dictEnd - match; + { size_t const length1 = dictEnd - match; memmove(oLitEnd, match, length1); op = oLitEnd + length1; sequence.matchLength -= length1; diff --git a/programs/.gitignore b/programs/.gitignore index 525037b9..a1d72aad 100644 --- a/programs/.gitignore +++ b/programs/.gitignore @@ -5,6 +5,8 @@ fullbench fullbench32 fuzzer fuzzer32 +zbufftest +zbufftest32 datagen paramgrill diff --git a/programs/fuzzer.c b/programs/fuzzer.c index 72d71ea9..8bd2a186 100644 --- a/programs/fuzzer.c +++ b/programs/fuzzer.c @@ -153,8 +153,7 @@ static int basicUnitTests(U32 seed, double compressibility) CNBuffer = malloc(COMPRESSIBLE_NOISE_LENGTH); compressedBuffer = malloc(ZSTD_compressBound(COMPRESSIBLE_NOISE_LENGTH)); decodedBuffer = malloc(COMPRESSIBLE_NOISE_LENGTH); - if (!CNBuffer || !compressedBuffer || !decodedBuffer) - { + if (!CNBuffer || !compressedBuffer || !decodedBuffer) { DISPLAY("Not enough memory, aborting\n"); testResult = 1; goto _end; @@ -174,11 +173,9 @@ static int basicUnitTests(U32 seed, double compressibility) if (result != COMPRESSIBLE_NOISE_LENGTH) goto _output_error; DISPLAYLEVEL(4, "OK \n"); - { - size_t i; + { size_t i; DISPLAYLEVEL(4, "test%3i : check decompressed result : ", testNb++); - for (i=0; i>= 3; - if (buffNb & 7) - { + if (buffNb & 7) { const U32 tnb[2] = { 1, 3 }; buffNb = tnb[buffNb >> 3]; - } - else - { + } else { const U32 tnb[2] = { 0, 4 }; buffNb = tnb[buffNb >> 3]; } @@ -506,7 +492,6 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit crcOrig = XXH64(sampleBuffer, sampleSize, 0); /* compression test */ - //cLevelMod = MAX(1, 38 - (int)(MAX(9, sampleSizeLog) * 2)); /* high levels only for small samples, for manageable speed */ cLevelMod = MIN( ZSTD_maxCLevel(), (U32)MAX(1, 55 - 3*(int)sampleSizeLog) ); /* high levels only for small samples, for manageable speed */ cLevel = (FUZ_rand(&lseed) % cLevelMod) +1; cSize = ZSTD_compressCCtx(ctx, cBuffer, cBufferSize, sampleBuffer, sampleSize, cLevel); @@ -517,12 +502,11 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit const size_t missing = (FUZ_rand(&lseed) % (cSize-2)) + 1; /* no problem, as cSize > 4 (frameHeaderSizer) */ const size_t tooSmallSize = cSize - missing; static const U32 endMark = 0x4DC2B1A9; - U32 endCheck; memcpy(dstBuffer+tooSmallSize, &endMark, 4); errorCode = ZSTD_compressCCtx(ctx, dstBuffer, tooSmallSize, sampleBuffer, sampleSize, cLevel); CHECK(!ZSTD_isError(errorCode), "ZSTD_compressCCtx should have failed ! (buffer too small : %u < %u)", (U32)tooSmallSize, (U32)cSize); - memcpy(&endCheck, dstBuffer+tooSmallSize, 4); - CHECK(endCheck != endMark, "ZSTD_compressCCtx : dst buffer overflow"); + { U32 endCheck; memcpy(&endCheck, dstBuffer+tooSmallSize, 4); + CHECK(endCheck != endMark, "ZSTD_compressCCtx : dst buffer overflow"); } } /* decompression header test */ @@ -542,8 +526,7 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit free(sampleBuffer); /* no longer useful after this point */ /* truncated src decompression test */ - { - const size_t missing = (FUZ_rand(&lseed) % (cSize-2)) + 1; /* no problem, as cSize > 4 (frameHeaderSizer) */ + { const size_t missing = (FUZ_rand(&lseed) % (cSize-2)) + 1; /* no problem, as cSize > 4 (frameHeaderSizer) */ const size_t tooSmallSize = cSize - missing; void* cBufferTooSmall = malloc(tooSmallSize); /* valgrind will catch overflows */ CHECK(cBufferTooSmall == NULL, "not enough memory !"); @@ -554,8 +537,7 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit } /* too small dst decompression test */ - if (sampleSize > 3) - { + if (sampleSize > 3) { const size_t missing = (FUZ_rand(&lseed) % (sampleSize-2)) + 1; /* no problem, as cSize > 4 (frameHeaderSizer) */ const size_t tooSmallSize = sampleSize - missing; static const BYTE token = 0xA9; @@ -566,39 +548,32 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit } /* noisy src decompression test */ - if (cSize > 6) - { - const U32 maxNbBits = FUZ_highbit32((U32)(cSize-4)); - size_t pos = 4; /* preserve magic number (too easy to detect) */ - U32 nbBits = FUZ_rand(&lseed) % maxNbBits; - size_t mask = (1<0) nbBits--; - mask = (1< cSize ) noiseLength = cSize-pos; - noiseStart = FUZ_rand(&lseed) % (srcBufferSize - noiseLength); - memcpy(cBuffer + pos, srcBuffer + noiseStart, noiseLength); - pos += noiseLength; - - /* keep some original src */ - nbBits = FUZ_rand(&lseed) % maxNbBits; - mask = (1< 6) { + /* insert noise into src */ + { U32 const maxNbBits = FUZ_highbit32((U32)(cSize-4)); + size_t pos = 4; /* preserve magic number (too easy to detect) */ + for (;;) { + /* keep some original src */ + { U32 const nbBits = FUZ_rand(&lseed) % maxNbBits; + size_t const mask = (1<0) nbBits--; + mask = (1< cSize ) noiseLength = cSize-pos; + noiseStart = FUZ_rand(&lseed) % (srcBufferSize - noiseLength); + memcpy(cBuffer + pos, srcBuffer + noiseStart, noiseLength); + pos += noiseLength; + } } } /* decompress noisy source */ - { U32 const noiseSrc = FUZ_rand(&lseed) % 5; - U32 const endMark = 0xA9B1C3D6; - srcBuffer = cNoiseBuffer[noiseSrc]; + { U32 const endMark = 0xA9B1C3D6; memcpy(dstBuffer+sampleSize, &endMark, 4); errorCode = ZSTD_decompress(dstBuffer, sampleSize, cBuffer, cSize); /* result *may* be an unlikely success, but even then, it must strictly respect dest buffer boundaries */ @@ -606,8 +581,7 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit "ZSTD_decompress on noisy src : result is too large : %u > %u (dst buffer)", (U32)errorCode, (U32)sampleSize); { U32 endCheck; memcpy(&endCheck, dstBuffer+sampleSize, 4); CHECK(endMark!=endCheck, "ZSTD_decompress on noisy src : dst buffer overflow"); } - } - } + } } /* noisy src decompression test */ /* Streaming compression of scattered segments test */ XXH64_reset(xxh64, 0); @@ -629,8 +603,7 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit errorCode = ZSTD_copyCCtx(ctx, refCtx); CHECK (ZSTD_isError(errorCode), "ZSTD_copyCCtx error : %s", ZSTD_getErrorName(errorCode)); totalTestSize = 0; cSize = 0; - for (n=0; n='0') && (*argument<='9')) - { + while ((*argument>='0') && (*argument<='9')) { nbTests *= 10; nbTests += *argument - '0'; argument++; @@ -777,8 +742,7 @@ int main(int argc, char** argv) case 'T': argument++; nbTests=0; g_testTime=0; - while ((*argument>='0') && (*argument<='9')) - { + while ((*argument>='0') && (*argument<='9')) { g_testTime *= 10; g_testTime += *argument - '0'; argument++; @@ -792,8 +756,7 @@ int main(int argc, char** argv) argument++; seed=0; seedset=1; - while ((*argument>='0') && (*argument<='9')) - { + while ((*argument>='0') && (*argument<='9')) { seed *= 10; seed += *argument - '0'; argument++; @@ -803,8 +766,7 @@ int main(int argc, char** argv) case 't': argument++; testNb=0; - while ((*argument>='0') && (*argument<='9')) - { + while ((*argument>='0') && (*argument<='9')) { testNb *= 10; testNb += *argument - '0'; argument++; @@ -814,8 +776,7 @@ int main(int argc, char** argv) case 'P': /* compressibility % */ argument++; proba=0; - while ((*argument>='0') && (*argument<='9')) - { + while ((*argument>='0') && (*argument<='9')) { proba *= 10; proba += *argument - '0'; argument++; @@ -826,10 +787,7 @@ int main(int argc, char** argv) default: return FUZ_usage(programName); - } - } - } - } + } } } } /* for (argNb=1; argNb Date: Sat, 19 Mar 2016 12:47:52 +0100 Subject: [PATCH 110/247] minor refactoring --- programs/fuzzer.c | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/programs/fuzzer.c b/programs/fuzzer.c index 8bd2a186..a7aa728d 100644 --- a/programs/fuzzer.c +++ b/programs/fuzzer.c @@ -445,13 +445,12 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit srcBuffer = cNoiseBuffer[2]; /* catch up testNb */ - for (testNb=1; testNb < startTest; testNb++) - FUZ_rand(&coreSeed); + for (testNb=1; testNb < startTest; testNb++) FUZ_rand(&coreSeed); - /* test loop */ + /* main test loop */ for ( ; (testNb <= nbTests) || (FUZ_GetMilliSpan(startTime) < g_testTime); testNb++ ) { size_t sampleSize, sampleStart, maxTestSize, totalTestSize; - size_t cSize, dSize, dSupSize, errorCode, totalCSize, totalGenSize; + size_t cSize, dSize, errorCode, totalCSize, totalGenSize; U32 sampleSizeLog, buffNb, cLevelMod, nbChunks, n; XXH64_CREATESTATE_STATIC(xxh64); U64 crcOrig, crcDest; @@ -460,9 +459,8 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit const BYTE* dict; size_t dictSize; - /* init */ - if (nbTests >= testNb) - { DISPLAYUPDATE(2, "\r%6u/%6u ", testNb, nbTests); } + /* notification */ + if (nbTests >= testNb) { DISPLAYUPDATE(2, "\r%6u/%6u ", testNb, nbTests); } else { DISPLAYUPDATE(2, "\r%6u ", testNb); } FUZ_rand(&coreSeed); @@ -477,8 +475,7 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit } else { const U32 tnb[2] = { 0, 4 }; buffNb = tnb[buffNb >> 3]; - } - } + } } srcBuffer = cNoiseBuffer[buffNb]; sampleSizeLog = FUZ_rand(&lseed) % maxSampleLog; sampleSize = (size_t)1 << sampleSizeLog; @@ -509,19 +506,20 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit CHECK(endCheck != endMark, "ZSTD_compressCCtx : dst buffer overflow"); } } - /* decompression header test */ + /* frame header decompression test */ { ZSTD_frameParams dParams; size_t const check = ZSTD_getFrameParams(&dParams, cBuffer, cSize); CHECK(ZSTD_isError(check), "Frame Parameters extraction failed"); CHECK(dParams.frameContentSize != sampleSize, "Frame content size incorrect"); } - /* successfull decompression tests*/ - dSupSize = (FUZ_rand(&lseed) & 1) ? 0 : (FUZ_rand(&lseed) & 31) + 1; - dSize = ZSTD_decompress(dstBuffer, sampleSize + dSupSize, cBuffer, cSize); - CHECK(dSize != sampleSize, "ZSTD_decompress failed (%s) (srcSize : %u ; cSize : %u)", ZSTD_getErrorName(dSize), (U32)sampleSize, (U32)cSize); - crcDest = XXH64(dstBuffer, sampleSize, 0); - CHECK(crcOrig != crcDest, "decompression result corrupted (pos %u / %u)", (U32)findDiff(sampleBuffer, dstBuffer, sampleSize), (U32)sampleSize); + /* successful decompression test */ + { size_t margin = (FUZ_rand(&lseed) & 1) ? 0 : (FUZ_rand(&lseed) & 31) + 1; + dSize = ZSTD_decompress(dstBuffer, sampleSize + margin, cBuffer, cSize); + CHECK(dSize != sampleSize, "ZSTD_decompress failed (%s) (srcSize : %u ; cSize : %u)", ZSTD_getErrorName(dSize), (U32)sampleSize, (U32)cSize); + crcDest = XXH64(dstBuffer, sampleSize, 0); + CHECK(crcOrig != crcDest, "decompression result corrupted (pos %u / %u)", (U32)findDiff(sampleBuffer, dstBuffer, sampleSize), (U32)sampleSize); + } free(sampleBuffer); /* no longer useful after this point */ @@ -576,7 +574,7 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit { U32 const endMark = 0xA9B1C3D6; memcpy(dstBuffer+sampleSize, &endMark, 4); errorCode = ZSTD_decompress(dstBuffer, sampleSize, cBuffer, cSize); - /* result *may* be an unlikely success, but even then, it must strictly respect dest buffer boundaries */ + /* result *may* be an unlikely success, but even then, it must strictly respect dst buffer boundaries */ CHECK((!ZSTD_isError(errorCode)) && (errorCode>sampleSize), "ZSTD_decompress on noisy src : result is too large : %u > %u (dst buffer)", (U32)errorCode, (U32)sampleSize); { U32 endCheck; memcpy(&endCheck, dstBuffer+sampleSize, 4); From 0d9ce04cd5769247572b64e15f454c0c16b8dfe2 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sat, 19 Mar 2016 13:21:08 +0100 Subject: [PATCH 111/247] minor refactoring --- programs/fuzzer.c | 112 +++++++++++++++++++++------------------------- 1 file changed, 51 insertions(+), 61 deletions(-) diff --git a/programs/fuzzer.c b/programs/fuzzer.c index a7aa728d..d53586c1 100644 --- a/programs/fuzzer.c +++ b/programs/fuzzer.c @@ -1,6 +1,6 @@ /* Fuzzer test tool for zstd - Copyright (C) Yann Collet 2014-2105 + Copyright (C) Yann Collet 2014-2016 GPL v2 License @@ -19,11 +19,10 @@ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. You can contact the author at : - - ZSTD source repository : https://github.com/Cyan4973/zstd - - ZSTD public forum : https://groups.google.com/forum/#!forum/lz4c + - ZSTD homepage : http://www.zstd.net */ -/************************************** +/*-************************************ * Compiler specific **************************************/ #ifdef _MSC_VER /* Visual Studio */ @@ -32,14 +31,8 @@ # pragma warning(disable : 4146) /* disable: C4146: minus unsigned expression */ #endif -#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) -#ifdef __GNUC__ -# pragma GCC diagnostic ignored "-Wmissing-braces" /* GCC bug 53119 : doesn't accept { 0 } as initializer (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53119) */ -# pragma GCC diagnostic ignored "-Wmissing-field-initializers" /* GCC bug 53119 : doesn't accept { 0 } as initializer (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53119) */ -#endif - -/************************************** +/*-************************************ * Includes **************************************/ #include /* free */ @@ -52,8 +45,8 @@ #include "mem.h" -/************************************** - Constants +/*-************************************ +* Constants **************************************/ #ifndef ZSTD_VERSION # define ZSTD_VERSION "" @@ -63,15 +56,12 @@ #define MB *(1U<<20) #define GB *(1U<<30) +static const size_t COMPRESSIBLE_NOISE_LENGTH = 10 MB; /* capital, used to be a macro */ +static const U32 FUZ_compressibility_default = 50; static const U32 nbTestsDefault = 30000; -#define COMPRESSIBLE_NOISE_LENGTH (10 MB) -#define FUZ_COMPRESSIBILITY_DEFAULT 50 -static const U32 prime1 = 2654435761U; -static const U32 prime2 = 2246822519U; - -/************************************** +/*-************************************ * Display Macros **************************************/ #define DISPLAY(...) fprintf(stderr, __VA_ARGS__) @@ -85,10 +75,8 @@ static U32 g_displayLevel = 2; static const U32 g_refreshRate = 150; static U32 g_displayTime = 0; -static U32 g_testTime = 0; - -/********************************************************* +/*-******************************************************* * Fuzzer functions *********************************************************/ #define MIN(a,b) ((a)<(b)?(a):(b)) @@ -117,6 +105,8 @@ static U32 FUZ_GetMilliSpan(U32 nTimeStart) # define FUZ_rotl32(x,r) ((x << r) | (x >> (32 - r))) unsigned int FUZ_rand(unsigned int* src) { + static const U32 prime1 = 2654435761U; + static const U32 prime2 = 2246822519U; U32 rand32 = *src; rand32 *= prime1; rand32 += prime2; @@ -130,8 +120,7 @@ static unsigned FUZ_highbit32(U32 v32) { unsigned nbBits = 0; if (v32==0) return 0; - while (v32) - { + while (v32) { v32 >>= 1; nbBits ++; } @@ -161,13 +150,13 @@ static int basicUnitTests(U32 seed, double compressibility) RDG_genBuffer(CNBuffer, COMPRESSIBLE_NOISE_LENGTH, compressibility, 0., randState); /* Basic tests */ - DISPLAYLEVEL(4, "test%3i : compress %u bytes : ", testNb++, COMPRESSIBLE_NOISE_LENGTH); + DISPLAYLEVEL(4, "test%3i : compress %u bytes : ", testNb++, (U32)COMPRESSIBLE_NOISE_LENGTH); result = ZSTD_compress(compressedBuffer, ZSTD_compressBound(COMPRESSIBLE_NOISE_LENGTH), CNBuffer, COMPRESSIBLE_NOISE_LENGTH, 1); if (ZSTD_isError(result)) goto _output_error; cSize = result; DISPLAYLEVEL(4, "OK (%u bytes : %.2f%%)\n", (U32)cSize, (double)cSize/COMPRESSIBLE_NOISE_LENGTH*100); - DISPLAYLEVEL(4, "test%3i : decompress %u bytes : ", testNb++, COMPRESSIBLE_NOISE_LENGTH); + DISPLAYLEVEL(4, "test%3i : decompress %u bytes : ", testNb++, (U32)COMPRESSIBLE_NOISE_LENGTH); result = ZSTD_decompress(decodedBuffer, COMPRESSIBLE_NOISE_LENGTH, compressedBuffer, cSize); if (ZSTD_isError(result)) goto _output_error; if (result != COMPRESSIBLE_NOISE_LENGTH) goto _output_error; @@ -342,35 +331,36 @@ static int basicUnitTests(U32 seed, double compressibility) #define NB3BYTESSEQLOG 9 #define NB3BYTESSEQ (1 << NB3BYTESSEQLOG) #define NB3BYTESSEQMASK (NB3BYTESSEQ-1) + /* creates a buffer full of 3-bytes sequences */ { BYTE _3BytesSeqs[NB3BYTESSEQ][3]; - U32 r = 1; + U32 rSeed = 1; + /* create batch of 3-bytes sequences */ { int i; for (i=0; i < NB3BYTESSEQ; i++) { - _3BytesSeqs[i][0] = (BYTE)(FUZ_rand(&r) & 255); - _3BytesSeqs[i][1] = (BYTE)(FUZ_rand(&r) & 255); - _3BytesSeqs[i][2] = (BYTE)(FUZ_rand(&r) & 255); + _3BytesSeqs[i][0] = (BYTE)(FUZ_rand(&rSeed) & 255); + _3BytesSeqs[i][1] = (BYTE)(FUZ_rand(&rSeed) & 255); + _3BytesSeqs[i][2] = (BYTE)(FUZ_rand(&rSeed) & 255); }} - { int i; for (i=0; i < _3BYTESTESTLENGTH; ) { - U32 id = FUZ_rand(&r) & NB3BYTESSEQMASK; + /* randomly fills CNBuffer with prepared 3-bytes sequences */ + { int i; for (i=0; i < _3BYTESTESTLENGTH; ) { /* note : CNBuffer size > _3BYTESTESTLENGTH+3 */ + U32 id = FUZ_rand(&rSeed) & NB3BYTESSEQMASK; ((BYTE*)CNBuffer)[i+0] = _3BytesSeqs[id][0]; ((BYTE*)CNBuffer)[i+1] = _3BytesSeqs[id][1]; ((BYTE*)CNBuffer)[i+2] = _3BytesSeqs[id][2]; i += 3; - }} + } }} + DISPLAYLEVEL(4, "test%3i : compress lots 3-bytes sequences : ", testNb++); + result = ZSTD_compress(compressedBuffer, ZSTD_compressBound(_3BYTESTESTLENGTH), CNBuffer, _3BYTESTESTLENGTH, 19); + if (ZSTD_isError(result)) goto _output_error; + cSize = result; + DISPLAYLEVEL(4, "OK (%u bytes : %.2f%%)\n", (U32)cSize, (double)cSize/_3BYTESTESTLENGTH*100); - DISPLAYLEVEL(4, "test%3i : compress lots 3-bytes sequences : ", testNb++); - result = ZSTD_compress(compressedBuffer, ZSTD_compressBound(_3BYTESTESTLENGTH), CNBuffer, _3BYTESTESTLENGTH, 19); - if (ZSTD_isError(result)) goto _output_error; - cSize = result; - DISPLAYLEVEL(4, "OK (%u bytes : %.2f%%)\n", (U32)cSize, (double)cSize/_3BYTESTESTLENGTH*100); - - DISPLAYLEVEL(4, "test%3i : decompress lots 3-bytes sequence : ", testNb++); - result = ZSTD_decompress(decodedBuffer, _3BYTESTESTLENGTH, compressedBuffer, cSize); - if (ZSTD_isError(result)) goto _output_error; - if (result != _3BYTESTESTLENGTH) goto _output_error; - DISPLAYLEVEL(4, "OK \n"); - } + DISPLAYLEVEL(4, "test%3i : decompress lots 3-bytes sequence : ", testNb++); + result = ZSTD_decompress(decodedBuffer, _3BYTESTESTLENGTH, compressedBuffer, cSize); + if (ZSTD_isError(result)) goto _output_error; + if (result != _3BYTESTESTLENGTH) goto _output_error; + DISPLAYLEVEL(4, "OK \n"); _end: free(CNBuffer); @@ -396,13 +386,13 @@ static size_t findDiff(const void* buf1, const void* buf2, size_t max) return i; } -# define CHECK(cond, ...) if (cond) { DISPLAY("Error => "); DISPLAY(__VA_ARGS__); \ - DISPLAY(" (seed %u, test nb %u) \n", seed, testNb); goto _output_error; } +#define CHECK(cond, ...) if (cond) { DISPLAY("Error => "); DISPLAY(__VA_ARGS__); \ + DISPLAY(" (seed %u, test nb %u) \n", seed, testNb); goto _output_error; } static const U32 maxSrcLog = 23; static const U32 maxSampleLog = 22; -int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibility) +int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, U32 maxDuration, double compressibility) { BYTE* cNoiseBuffer[5]; BYTE* srcBuffer; @@ -448,7 +438,7 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit for (testNb=1; testNb < startTest; testNb++) FUZ_rand(&coreSeed); /* main test loop */ - for ( ; (testNb <= nbTests) || (FUZ_GetMilliSpan(startTime) < g_testTime); testNb++ ) { + for ( ; (testNb <= nbTests) || (FUZ_GetMilliSpan(startTime) < maxDuration); testNb++ ) { size_t sampleSize, sampleStart, maxTestSize, totalTestSize; size_t cSize, dSize, errorCode, totalCSize, totalGenSize; U32 sampleSizeLog, buffNb, cLevelMod, nbChunks, n; @@ -464,7 +454,7 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit else { DISPLAYUPDATE(2, "\r%6u ", testNb); } FUZ_rand(&coreSeed); - lseed = coreSeed ^ prime1; + { U32 const prime1 = 2654435761U; lseed = coreSeed ^ prime1; } buffNb = FUZ_rand(&lseed) & 127; if (buffNb & 7) buffNb=2; else { @@ -680,7 +670,7 @@ int FUZ_usage(const char* programName) DISPLAY( " -i# : Nb of tests (default:%u) \n", nbTestsDefault); DISPLAY( " -s# : Select seed (default:prompt user)\n"); DISPLAY( " -t# : Select starting test number (default:0)\n"); - DISPLAY( " -P# : Select compressibility in %% (default:%i%%)\n", FUZ_COMPRESSIBILITY_DEFAULT); + DISPLAY( " -P# : Select compressibility in %% (default:%u%%)\n", FUZ_compressibility_default); DISPLAY( " -v : verbose\n"); DISPLAY( " -p : pause at the end\n"); DISPLAY( " -h : display help and exit\n"); @@ -695,9 +685,10 @@ int main(int argc, const char** argv) int argNb; int nbTests = nbTestsDefault; int testNb = 0; - int proba = FUZ_COMPRESSIBILITY_DEFAULT; + U32 proba = FUZ_compressibility_default; int result=0; U32 mainPause = 0; + U32 maxDuration = 0; const char* programName; /* Check command line */ @@ -728,7 +719,7 @@ int main(int argc, const char** argv) break; case 'i': - argument++; g_testTime=0; + argument++; maxDuration=0; nbTests=0; while ((*argument>='0') && (*argument<='9')) { nbTests *= 10; @@ -739,15 +730,15 @@ int main(int argc, const char** argv) case 'T': argument++; - nbTests=0; g_testTime=0; + nbTests=0; maxDuration=0; while ((*argument>='0') && (*argument<='9')) { - g_testTime *= 10; - g_testTime += *argument - '0'; + maxDuration *= 10; + maxDuration += *argument - '0'; argument++; } - if (*argument=='m') g_testTime *=60, argument++; + if (*argument=='m') maxDuration *=60, argument++; if (*argument=='n') argument++; - g_testTime *= 1000; + maxDuration *= 1000; break; case 's': @@ -779,7 +770,6 @@ int main(int argc, const char** argv) proba += *argument - '0'; argument++; } - if (proba<0) proba=0; if (proba>100) proba=100; break; @@ -792,12 +782,12 @@ int main(int argc, const char** argv) if (!seedset) seed = FUZ_GetMilliStart() % 10000; DISPLAY("Seed = %u\n", seed); - if (proba!=FUZ_COMPRESSIBILITY_DEFAULT) DISPLAY("Compressibility : %i%%\n", proba); + if (proba!=FUZ_compressibility_default) DISPLAY("Compressibility : %u%%\n", proba); if (testNb==0) result = basicUnitTests(0, ((double)proba) / 100); /* constant seed for predictability */ if (!result) - result = fuzzerTests(seed, nbTests, testNb, ((double)proba) / 100); + result = fuzzerTests(seed, nbTests, testNb, maxDuration, ((double)proba) / 100); if (mainPause) { int unused; DISPLAY("Press Enter \n"); From 01e5b951996411f5280e48c0703debeea9d47382 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sat, 19 Mar 2016 14:14:31 +0100 Subject: [PATCH 112/247] code refactor --- lib/bitstream.h | 119 +++++++++++++++++++++++------------------- lib/zstd_decompress.c | 93 ++++++++++++++------------------- 2 files changed, 104 insertions(+), 108 deletions(-) diff --git a/lib/bitstream.h b/lib/bitstream.h index 44c0241f..e1237930 100644 --- a/lib/bitstream.h +++ b/lib/bitstream.h @@ -41,7 +41,7 @@ extern "C" { /* -* This API consists of small unitary functions, which highly benefit from being inlined. +* This API consists of small unitary functions, which must be inlined for best performance. * Since link-time-optimization is not available for all compilers, * these functions are defined into a .h to be included. */ @@ -74,21 +74,21 @@ MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC); MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC); -/*Start by initCStream, providing the size of buffer to write into. -* bitStream will never write outside of this buffer. -* `dstCapacity` must be >= sizeof(size_t), otherwise @return will be an error code. +/* Start with initCStream, providing the size of buffer to write into. +* bitStream will never write outside of this buffer. +* `dstCapacity` must be >= sizeof(size_t), otherwise @return will be an error code. * -* bits are first added to a local register. -* Local register is size_t, hence 64-bits on 64-bits systems, or 32-bits on 32-bits systems. -* Writing data into memory is an explicit operation, performed by the flushBits function. -* Hence keep track how many bits are potentially stored into local register to avoid register overflow. -* After a flushBits, a maximum of 7 bits might still be stored into local register. +* bits are first added to a local register. +* Local register is size_t, hence 64-bits on 64-bits systems, or 32-bits on 32-bits systems. +* Writing data into memory is an explicit operation, performed by the flushBits function. +* Hence keep track how many bits are potentially stored into local register to avoid register overflow. +* After a flushBits, a maximum of 7 bits might still be stored into local register. * -* Avoid storing elements of more than 24 bits if you want compatibility with 32-bits bitstream readers. +* Avoid storing elements of more than 24 bits if you want compatibility with 32-bits bitstream readers. * -* Last operation is to close the bitStream. -* The function returns the final size of CStream in bytes. -* If data couldn't fit into `dstBuffer`, it will return a 0 ( == not storable) +* Last operation is to close the bitStream. +* The function returns the final size of CStream in bytes. +* If data couldn't fit into `dstBuffer`, it will return a 0 ( == not storable) */ @@ -115,14 +115,14 @@ MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD); MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD); -/*Start by invoking BIT_initDStream(). -* A chunk of the bitStream is then stored into a local register. -* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t). -* You can then retrieve bitFields stored into the local register, **in reverse order**. -* Local register is explicitly reloaded from memory by the BIT_reloadDStream() method. -* A reload guarantee a minimum of ((8*sizeof(size_t))-7) bits when its result is BIT_DStream_unfinished. -* Otherwise, it can be less than that, so proceed accordingly. -* Checking if DStream has reached its end can be performed with BIT_endOfDStream() +/* Start by invoking BIT_initDStream(). +* A chunk of the bitStream is then stored into a local register. +* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t). +* You can then retrieve bitFields stored into the local register, **in reverse order**. +* Local register is explicitly reloaded from memory by the BIT_reloadDStream() method. +* A reload guarantee a minimum of ((8*sizeof(size_t))-7) bits when its result is BIT_DStream_unfinished. +* Otherwise, it can be less than that, so proceed accordingly. +* Checking if DStream has reached its end can be performed with BIT_endOfDStream() */ @@ -169,17 +169,24 @@ MEM_STATIC unsigned BIT_highbit32 (register U32 val) /*-************************************************************** * bitStream encoding ****************************************************************/ -MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* startPtr, size_t maxSize) +/*! BIT_initCStream() : + * `dstCapacity` must be > sizeof(void*) + * @return : 0 if success, + otherwise an error code (can be tested using ERR_isError() ) */ +MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* startPtr, size_t dstCapacity) { bitC->bitContainer = 0; bitC->bitPos = 0; bitC->startPtr = (char*)startPtr; bitC->ptr = bitC->startPtr; - bitC->endPtr = bitC->startPtr + maxSize - sizeof(bitC->ptr); - if (maxSize < sizeof(bitC->ptr)) return ERROR(dstSize_tooSmall); + bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->ptr); + if (dstCapacity <= sizeof(bitC->ptr)) return ERROR(dstSize_tooSmall); return 0; } +/*! BIT_addBits() : + can add up to 26 bits into `bitC`. + Does not check for register overflow ! */ MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits) { static const unsigned mask[] = { 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF, 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF, 0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF }; /* up to 26 bits */ @@ -206,6 +213,9 @@ MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC) bitC->bitContainer >>= nbBytes*8; /* if bitPos >= sizeof(bitContainer)*8 --> undefined behavior */ } +/*! BIT_flushBits() : + * safe version; check for buffer overflow, and prevents it. + * note : does not signal buffer overflow. This will be revealed later on using BIT_closeCStream() */ MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC) { size_t nbBytes = bitC->bitPos >> 3; @@ -217,33 +227,28 @@ MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC) } /*! BIT_closeCStream() : - * @return : size of CStream, in bytes, or 0 if it cannot fit into dstBuffer */ + * @return : size of CStream, in bytes, + or 0 if it could not fit into dstBuffer */ MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC) { - char* endPtr; - BIT_addBitsFast(bitC, 1, 1); /* endMark */ BIT_flushBits(bitC); - if (bitC->ptr >= bitC->endPtr) /* too close to buffer's end */ - return 0; /* not storable */ + if (bitC->ptr >= bitC->endPtr) return 0; /* doesn't fit within authorized budget : cancel */ - endPtr = bitC->ptr; - endPtr += bitC->bitPos > 0; /* remaining bits (incomplete byte) */ - - return (endPtr - bitC->startPtr); + return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0); } /*-******************************************************** * bitStream decoding **********************************************************/ -/*!BIT_initDStream() : -* Initialize a BIT_DStream_t. -* `bitD` : a pointer to an already allocated BIT_DStream_t structure. -* `srcBuffer` must point at the beginning of a bitStream. -* `srcSize` must be the exact size of the bitStream. -* @return : size of stream (== srcSize) or an errorCode if a problem is detected +/*! BIT_initDStream() : +* Initialize a BIT_DStream_t. +* `bitD` : a pointer to an already allocated BIT_DStream_t structure. +* `srcBuffer` must point at the beginning of a bitStream. +* `srcSize` must be the exact size of the bitStream, in bytes. +* @return : size of stream (== srcSize) or an errorCode if a problem is detected */ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize) { @@ -281,12 +286,12 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si return srcSize; } -/*!BIT_lookBits() : - * Provides next n bits from local register. - * local register is not modified (bits are still present for next read/look). - * On 32-bits, maxNbBits==24. - * On 64-bits, maxNbBits==56. - * @return : value extracted +/*! BIT_lookBits() : + * Provides next n bits from local register. + * local register is not modified (bits are still present for next read/look). + * On 32-bits, maxNbBits==24. + * On 64-bits, maxNbBits==56. + * @return : value extracted */ MEM_STATIC size_t BIT_lookBits(BIT_DStream_t* bitD, U32 nbBits) { @@ -294,7 +299,7 @@ MEM_STATIC size_t BIT_lookBits(BIT_DStream_t* bitD, U32 nbBits) return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask); } -/*! BIT_lookBitsFast*() : +/*! BIT_lookBitsFast() : * unsafe version; only works only if nbBits >= 1 */ MEM_STATIC size_t BIT_lookBitsFast(BIT_DStream_t* bitD, U32 nbBits) { @@ -307,10 +312,10 @@ MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits) bitD->bitsConsumed += nbBits; } -/*!BIT_readBits() : - * Read next n bits from local register. - * pay attention to not read more than nbBits contained into local register. - * @return : extracted value. +/*! BIT_readBits() : + * Read next n bits from local register. + * pay attention to not read more than nbBits contained into local register. + * @return : extracted value. */ MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, U32 nbBits) { @@ -319,8 +324,8 @@ MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, U32 nbBits) return value; } -/*!BIT_readBitsFast() : -* unsafe version; only works only if nbBits >= 1 */ +/*! BIT_readBitsFast() : +* unsafe version; only works only if nbBits >= 1 */ MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits) { size_t value = BIT_lookBitsFast(bitD, nbBits); @@ -328,6 +333,11 @@ MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits) return value; } +/*! BIT_reloadDStream() : +* Refill `BIT_DStream_t` from src buffer previously defined (see BIT_initDStream() ). +* This function is safe, it guarantees it will not read beyond src buffer. +* @return : status of `BIT_DStream_t` internal register. + if status == unfinished, internal register is filled with >= (sizeof(size_t)*8 - 7) bits */ MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD) { if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* should never happen */ @@ -343,8 +353,7 @@ MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD) if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer; return BIT_DStream_completed; } - { - U32 nbBytes = bitD->bitsConsumed >> 3; + { U32 nbBytes = bitD->bitsConsumed >> 3; BIT_DStream_status result = BIT_DStream_unfinished; if (bitD->ptr - nbBytes < bitD->start) { nbBytes = (U32)(bitD->ptr - bitD->start); /* ptr > start */ @@ -358,7 +367,7 @@ MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD) } /*! BIT_endOfDStream() : -* @return Tells if DStream has reached its exact end +* @return Tells if DStream has exactly reached its end (all bits consumed). */ MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream) { diff --git a/lib/zstd_decompress.c b/lib/zstd_decompress.c index 5cf9c17c..35404417 100644 --- a/lib/zstd_decompress.c +++ b/lib/zstd_decompress.c @@ -26,7 +26,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. You can contact the author at : - - zstd source repository : https://github.com/Cyan4973/zstd + - zstd homepage : http://www.zstd.net */ /* *************************************************************** @@ -84,16 +84,6 @@ #endif -/*-************************************* -* Local types -***************************************/ -typedef struct -{ - blockType_t blockType; - U32 origSize; -} blockProperties_t; - - /*_******************************************************* * Memory operations **********************************************************/ @@ -335,7 +325,7 @@ size_t ZSTD_getFrameParams(ZSTD_frameParams* fparamsPtr, const void* src, size_t /** ZSTD_decodeFrameHeader() : * `srcSize` must be the size provided by ZSTD_frameHeaderSize(). -* @return : 0, or an error code, which can be tested using ZSTD_isError() */ +* @return : 0 if success, or an error code, which can be tested using ZSTD_isError() */ static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* zc, const void* src, size_t srcSize) { size_t result = ZSTD_getFrameParams(&(zc->fParams), src, srcSize); @@ -344,19 +334,23 @@ static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* zc, const void* src, size_t srcS } +typedef struct +{ + blockType_t blockType; + U32 origSize; +} blockProperties_t; + +/*! ZSTD_getcBlockSize() : +* Provides the size of compressed block from block header `src` */ size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr) { const BYTE* const in = (const BYTE* const)src; - BYTE headerFlags; - U32 cSize; + size_t cSize; - if (srcSize < 3) - return ERROR(srcSize_wrong); + if (srcSize < ZSTD_blockHeaderSize) return ERROR(srcSize_wrong); - headerFlags = *in; + bpPtr->blockType = (blockType_t)((*in) >> 6); cSize = in[2] + (in[1]<<8) + ((in[0] & 7)<<16); - - bpPtr->blockType = (blockType_t)(headerFlags >> 6); bpPtr->origSize = (bpPtr->blockType == bt_rle) ? cSize : 0; if (bpPtr->blockType == bt_end) return 0; @@ -365,9 +359,9 @@ size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bp } -static size_t ZSTD_copyRawBlock(void* dst, size_t maxDstSize, const void* src, size_t srcSize) +static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity, const void* src, size_t srcSize) { - if (srcSize > maxDstSize) return ERROR(dstSize_tooSmall); + if (srcSize > dstCapacity) return ERROR(dstSize_tooSmall); memcpy(dst, src, srcSize); return srcSize; } @@ -619,7 +613,6 @@ typedef struct { } seqState_t; - static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState, const U32 mls) { const BYTE* dumps = seqState->dumps; @@ -659,8 +652,7 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState, const U32 mls) if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream)); /* MatchLength */ - { - size_t matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream)); + { size_t matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream)); if (matchLength == MaxML) { const U32 add = *dumps++; if (add < 255) matchLength += add; @@ -755,8 +747,7 @@ FORCE_INLINE size_t ZSTD_execSequence(BYTE* op, match += oend_8 - op; op = oend_8; } - while (op < oMatchEnd) - *op++ = *match++; + while (op < oMatchEnd) *op++ = *match++; } else { ZSTD_wildcopy(op, match, sequence.matchLength-8); /* works even if matchLength < 8 */ } @@ -826,8 +817,7 @@ static size_t ZSTD_decompressSequences( } /* last literal segment */ - { - size_t lastLLSize = litEnd - litPtr; + { size_t const lastLLSize = litEnd - litPtr; if (litPtr > litEnd) return ERROR(corruption_detected); /* too many literals already used */ if (op+lastLLSize > oend) return ERROR(dstSize_tooSmall); memcpy(op, litPtr, lastLLSize); @@ -882,39 +872,36 @@ size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, /*! ZSTD_decompress_continueDCtx() : * `dctx` must have been properly initialized */ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, - void* dst, size_t maxDstSize, + void* dst, size_t dstCapacity, const void* src, size_t srcSize) { const BYTE* ip = (const BYTE*)src; const BYTE* iend = ip + srcSize; BYTE* const ostart = (BYTE* const)dst; BYTE* op = ostart; - BYTE* const oend = ostart + maxDstSize; + BYTE* const oend = ostart + dstCapacity; size_t remainingSize = srcSize; blockProperties_t blockProperties; - /* Frame Header */ - { - size_t frameHeaderSize, errorCode; - if (srcSize < ZSTD_frameHeaderSize_min+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong); + /* check */ + if (srcSize < ZSTD_frameHeaderSize_min+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong); #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1) - { - const U32 magicNumber = MEM_readLE32(src); - if (ZSTD_isLegacy(magicNumber)) - return ZSTD_decompressLegacy(dst, maxDstSize, src, srcSize, magicNumber); - } + { const U32 magicNumber = MEM_readLE32(src); + if (ZSTD_isLegacy(magicNumber)) + return ZSTD_decompressLegacy(dst, dstCapacity, src, srcSize, magicNumber); + } #endif - frameHeaderSize = ZSTD_frameHeaderSize(src, ZSTD_frameHeaderSize_min); + + /* Frame Header */ + { size_t const frameHeaderSize = ZSTD_frameHeaderSize(src, ZSTD_frameHeaderSize_min); if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize; if (srcSize < frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong); - errorCode = ZSTD_decodeFrameHeader(dctx, src, frameHeaderSize); - if (ZSTD_isError(errorCode)) return errorCode; + if (ZSTD_decodeFrameHeader(dctx, src, frameHeaderSize)) return ERROR(corruption_detected); ip += frameHeaderSize; remainingSize -= frameHeaderSize; } /* Loop on each block */ - while (1) - { + while (1) { size_t decodedSize=0; size_t cBlockSize = ZSTD_getcBlockSize(ip, iend-ip, &blockProperties); if (ZSTD_isError(cBlockSize)) return cBlockSize; @@ -954,45 +941,45 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, size_t ZSTD_decompress_usingPreparedDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* refDCtx, - void* dst, size_t maxDstSize, + void* dst, size_t dstCapacity, const void* src, size_t srcSize) { ZSTD_copyDCtx(dctx, refDCtx); ZSTD_checkContinuity(dctx, dst); - return ZSTD_decompressFrame(dctx, dst, maxDstSize, src, srcSize); + return ZSTD_decompressFrame(dctx, dst, dstCapacity, src, srcSize); } size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx, - void* dst, size_t maxDstSize, + void* dst, size_t dstCapacity, const void* src, size_t srcSize, const void* dict, size_t dictSize) { ZSTD_decompressBegin_usingDict(dctx, dict, dictSize); ZSTD_LOG_BLOCK("%p: ZSTD_decompressBegin_usingDict searchLength=%d\n", dctx->base, dctx->params.searchLength); ZSTD_checkContinuity(dctx, dst); - return ZSTD_decompressFrame(dctx, dst, maxDstSize, src, srcSize); + return ZSTD_decompressFrame(dctx, dst, dstCapacity, src, srcSize); } -size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) +size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) { - return ZSTD_decompress_usingDict(dctx, dst, maxDstSize, src, srcSize, NULL, 0); + return ZSTD_decompress_usingDict(dctx, dst, dstCapacity, src, srcSize, NULL, 0); } -size_t ZSTD_decompress(void* dst, size_t maxDstSize, const void* src, size_t srcSize) +size_t ZSTD_decompress(void* dst, size_t dstCapacity, const void* src, size_t srcSize) { #if defined(ZSTD_HEAPMODE) && (ZSTD_HEAPMODE==1) size_t regenSize; ZSTD_DCtx* dctx = ZSTD_createDCtx(); if (dctx==NULL) return ERROR(memory_allocation); - regenSize = ZSTD_decompressDCtx(dctx, dst, maxDstSize, src, srcSize); + regenSize = ZSTD_decompressDCtx(dctx, dst, dstCapacity, src, srcSize); ZSTD_freeDCtx(dctx); return regenSize; #else ZSTD_DCtx dctx; - return ZSTD_decompressDCtx(&dctx, dst, maxDstSize, src, srcSize); + return ZSTD_decompressDCtx(&dctx, dst, dstCapacity, src, srcSize); #endif } From 90a03a95a1a20ce8b8cf4c5036a6869678ff0515 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sat, 19 Mar 2016 14:47:31 +0100 Subject: [PATCH 113/247] minor performance fix (decompression) --- lib/zstd_decompress.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/zstd_decompress.c b/lib/zstd_decompress.c index 35404417..6b9b1297 100644 --- a/lib/zstd_decompress.c +++ b/lib/zstd_decompress.c @@ -506,7 +506,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, @return : nb bytes read from src, or an error code if it fails, testable with ZSTD_isError() */ -static size_t ZSTD_buildSeqTable(FSE_DTable* DTable, U32 type, U32 rawBits, U32 maxLog, +FORCE_INLINE size_t ZSTD_buildSeqTable(FSE_DTable* DTable, U32 type, U32 rawBits, U32 maxLog, const void* src, size_t srcSize) { U32 max = (1< maxLog) return ERROR(corruption_detected); FSE_buildDTable(DTable, norm, max, tableLog); return headerSize; From 37f3d1b2ab8266a90a046e86ff7575176e5259ba Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sat, 19 Mar 2016 15:11:42 +0100 Subject: [PATCH 114/247] minor refactor --- lib/zstd_compress.c | 12 ++++-------- lib/zstd_decompress.c | 3 --- lib/zstd_internal.h | 33 ++++++++++++++++----------------- 3 files changed, 20 insertions(+), 28 deletions(-) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index ebe4cdc9..e7c249bf 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -948,7 +948,6 @@ static size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls) /*-************************************* * Fast Scan ***************************************/ -#define FILLHASHSTEP 3 static void ZSTD_fillHashTable (ZSTD_CCtx* zc, const void* end, const U32 mls) { U32* const hashTable = zc->hashTable; @@ -956,10 +955,11 @@ static void ZSTD_fillHashTable (ZSTD_CCtx* zc, const void* end, const U32 mls) const BYTE* const base = zc->base; const BYTE* ip = base + zc->nextToUpdate; const BYTE* const iend = ((const BYTE*)end) - 8; + const size_t fastHashFillStep = 3; while(ip <= iend) { hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip - base); - ip += FILLHASHSTEP; + ip += fastHashFillStep; } } @@ -980,10 +980,8 @@ void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* zc, const BYTE* const lowest = base + lowIndex; const BYTE* const iend = istart + srcSize; const BYTE* const ilimit = iend - 8; - size_t offset_2=REPCODE_STARTVALUE, offset_1=REPCODE_STARTVALUE; - /* init */ ZSTD_resetSeqStore(seqStorePtr); if (ip < lowest+REPCODE_STARTVALUE) ip = lowest+REPCODE_STARTVALUE; @@ -2196,9 +2194,7 @@ size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* zc, /* Write Frame Header into ctx headerBuffer */ MEM_writeLE32(zc->headerBuffer, ZSTD_MAGICNUMBER); - { - BYTE* const op = (BYTE*)zc->headerBuffer; - U32 const fcsSize[4] = { 0, 1, 2, 8 }; + { BYTE* const op = (BYTE*)zc->headerBuffer; U32 const fcsId = (params.srcSize>0) + (params.srcSize>=256) + (params.srcSize>=65536+256); /* 0-3 */ BYTE fdescriptor = (BYTE)(params.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN); /* windowLog : 4 KB - 128 MB */ fdescriptor |= (BYTE)((params.searchLength==3)<<4); /* mml : 3-4 */ @@ -2212,7 +2208,7 @@ size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* zc, case 2 : MEM_writeLE16(op+5, (U16)(params.srcSize-256)); break; case 3 : MEM_writeLE64(op+5, (U64)(params.srcSize)); break; } - zc->hbSize = ZSTD_frameHeaderSize_min + fcsSize[fcsId]; + zc->hbSize = ZSTD_frameHeaderSize_min + ZSTD_fcs_fieldSize[fcsId]; } zc->stage = 0; diff --git a/lib/zstd_decompress.c b/lib/zstd_decompress.c index 6b9b1297..ecc4329b 100644 --- a/lib/zstd_decompress.c +++ b/lib/zstd_decompress.c @@ -75,7 +75,6 @@ # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ # pragma warning(disable : 4324) /* disable: C4324: padded structure */ #else -# define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) # ifdef __GNUC__ # define FORCE_INLINE static inline __attribute__((always_inline)) # else @@ -276,8 +275,6 @@ void ZSTD_copyDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx) TO DO */ -static const size_t ZSTD_fcs_fieldSize[4] = { 0, 1, 2, 8 }; - /** ZSTD_frameHeaderSize() : * srcSize must be >= ZSTD_frameHeaderSize_min. * @return : size of the Frame Header */ diff --git a/lib/zstd_internal.h b/lib/zstd_internal.h index c5cc64cd..ba350c4f 100644 --- a/lib/zstd_internal.h +++ b/lib/zstd_internal.h @@ -71,9 +71,6 @@ #define MB *(1 <<20) #define GB *(1U<<30) -#define ZSTD_BLOCKHEADERSIZE 3 /* because C standard does not allow a static const value to be defined using another static const value .... :( */ -static const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE; - #define BIT7 128 #define BIT6 64 #define BIT5 32 @@ -81,16 +78,28 @@ static const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE; #define BIT1 2 #define BIT0 1 +#define ZSTD_WINDOWLOG_ABSOLUTEMIN 12 +static const size_t ZSTD_fcs_fieldSize[4] = { 0, 1, 2, 8 }; + +#define ZSTD_BLOCKHEADERSIZE 3 /* because C standard does not allow a static const value to be defined using another static const value .... :( */ +static const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE; +typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t; + +#define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */ +#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */) /* for a non-null block */ + +#define HufLog 12 + #define IS_HUF 0 #define IS_PCH 1 #define IS_RAW 2 #define IS_RLE 3 +#define LONGNBSEQ 0x7F00 + #define MINMATCH 4 #define REPCODE_STARTVALUE 1 -#define ZSTD_WINDOWLOG_ABSOLUTEMIN 12 - -#define LONGNBSEQ 0x7F00 +#define HASHLOG3 17 #define Litbits 8 #define MLbits 7 @@ -110,26 +119,16 @@ static const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE; #define FSE_ENCODING_STATIC 2 #define FSE_ENCODING_DYNAMIC 3 -#define HufLog 12 -#define HASHLOG3 17 - -#define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */ -#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */) /* for a non-null block */ - -#define WILDCOPY_OVERLENGTH 8 - -typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t; - /*-******************************************* * Shared functions to include for inlining *********************************************/ static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); } - #define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; } /*! ZSTD_wildcopy() : * custom version of memcpy(), can copy up to 7 bytes too many (8 bytes if length==0) */ +#define WILDCOPY_OVERLENGTH 8 MEM_STATIC void ZSTD_wildcopy(void* dst, const void* src, size_t length) { const BYTE* ip = (const BYTE*)src; From bb604486f1e60beb21e73110d27802b16b31bc59 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sat, 19 Mar 2016 15:18:42 +0100 Subject: [PATCH 115/247] fixed clang warning --- lib/zstd_compress.c | 2 +- lib/zstd_decompress.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index e7c249bf..7b2cc143 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -62,7 +62,7 @@ /*-************************************* * Constants ***************************************/ -static const U32 g_searchStrength = 8; +static const U32 g_searchStrength = 8; /* control skip over incompressible data */ /*-************************************* diff --git a/lib/zstd_decompress.c b/lib/zstd_decompress.c index ecc4329b..88734829 100644 --- a/lib/zstd_decompress.c +++ b/lib/zstd_decompress.c @@ -342,7 +342,7 @@ typedef struct size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr) { const BYTE* const in = (const BYTE* const)src; - size_t cSize; + U32 cSize; if (srcSize < ZSTD_blockHeaderSize) return ERROR(srcSize_wrong); From 1269e2f7db27c7994b57c93b6bee63b64ab401e9 Mon Sep 17 00:00:00 2001 From: inikep Date: Sat, 19 Mar 2016 15:32:21 +0100 Subject: [PATCH 116/247] working kSlotNew for ZSTD_btopt --- lib/zstd_opt.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/zstd_opt.h b/lib/zstd_opt.h index 69750aaa..a978c72f 100644 --- a/lib/zstd_opt.h +++ b/lib/zstd_opt.h @@ -528,7 +528,8 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, if (opt[cur].off >= ZSTD_REP_NUM) { opt[cur].rep[3] = (kSlotNew < 3) ? opt[cur-mlen].rep[2] : opt[cur-mlen].rep[3]; opt[cur].rep[2] = (kSlotNew < 2) ? opt[cur-mlen].rep[1] : opt[cur-mlen].rep[2]; - opt[cur].rep[1] = (kSlotNew < 1) ? opt[cur-mlen].rep[0] : opt[cur-mlen].rep[1]; + opt[cur].rep[1] = (kSlotNew < 1) ? opt[cur-mlen].rep[0] : opt[cur-mlen].rep[1]; + opt[cur].rep[0] = opt[cur-mlen].rep[0]; opt[cur].rep[kSlotNew] = opt[cur].off - ZSTD_REP_MOVE; ZSTD_LOG_ENCODE("%d: COPYREP_OFF cur=%d mlen=%d rep=%d rep[1]=%d\n", (int)(inr-base), cur, mlen, opt[cur].rep[0], opt[cur].rep[1]); } else { From e74215e3c73283e09c2f12dd8845f80882add813 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sat, 19 Mar 2016 16:09:09 +0100 Subject: [PATCH 117/247] performance fix for small files --- lib/zstd_compress.c | 59 ++++++++++++++++++++++----------------------- 1 file changed, 29 insertions(+), 30 deletions(-) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index 7b2cc143..bdf5569e 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -143,17 +143,15 @@ static unsigned ZSTD_highbit(U32 val); optimize for `srcSize` if srcSize > 0 */ void ZSTD_validateParams(ZSTD_parameters* params) { - const U32 btPlus = (params->strategy == ZSTD_btlazy2) || (params->strategy == ZSTD_btopt); - const U32 searchLengthMax = (params->strategy == ZSTD_fast) ? ZSTD_SEARCHLENGTH_MAX : ZSTD_SEARCHLENGTH_MAX-1; - const U32 searchLengthMin = (params->strategy == ZSTD_btopt) ? ZSTD_SEARCHLENGTH_MIN : ZSTD_SEARCHLENGTH_MIN+1; - /* validate params */ if (MEM_32bits()) if (params->windowLog > 25) params->windowLog = 25; /* 32 bits mode cannot flush > 24 bits */ CLAMP(params->windowLog, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX); CLAMP(params->contentLog, ZSTD_CONTENTLOG_MIN, ZSTD_CONTENTLOG_MAX); CLAMP(params->hashLog, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX); CLAMP(params->searchLog, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX); - CLAMP(params->searchLength, searchLengthMin, searchLengthMax); + { U32 const searchLengthMin = (params->strategy == ZSTD_btopt) ? ZSTD_SEARCHLENGTH_MIN : ZSTD_SEARCHLENGTH_MIN+1; + U32 const searchLengthMax = (params->strategy == ZSTD_fast) ? ZSTD_SEARCHLENGTH_MAX : ZSTD_SEARCHLENGTH_MAX-1; + CLAMP(params->searchLength, searchLengthMin, searchLengthMax); } CLAMP(params->targetLength, ZSTD_TARGETLENGTH_MIN, ZSTD_TARGETLENGTH_MAX); if ((U32)params->strategy>(U32)ZSTD_btopt) params->strategy = ZSTD_btopt; @@ -163,23 +161,18 @@ void ZSTD_validateParams(ZSTD_parameters* params) if (params->windowLog > srcLog) params->windowLog = srcLog; } if (params->windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN) params->windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN; /* required for frame header */ - if (params->contentLog > params->windowLog+btPlus) params->contentLog = params->windowLog+btPlus; /* <= ZSTD_CONTENTLOG_MAX */ + { U32 const btPlus = (params->strategy == ZSTD_btlazy2) || (params->strategy == ZSTD_btopt); + if (params->contentLog > params->windowLog+btPlus) params->contentLog = params->windowLog+btPlus; } /* <= ZSTD_CONTENTLOG_MAX */ } size_t ZSTD_sizeofCCtx(ZSTD_parameters params) /* hidden interface, for paramagrill */ -{ /* copy / pasted from ZSTD_resetCCtx_advanced */ - const size_t blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << params.windowLog); - const U32 contentLog = (params.strategy == ZSTD_fast) ? 1 : params.contentLog; - const U32 divider = (params.searchLength==3) ? 3 : 4; - const size_t maxNbSeq = blockSize / divider; - const size_t tokenSpace = blockSize + 8*maxNbSeq; - const size_t tableSpace = ((1 << contentLog) + (1 << params.hashLog) + (1 << HASHLOG3)) * sizeof(U32); - const size_t optSpace = ((1<workSpaceSize; + ZSTD_freeCCtx(zc); + return size; } } @@ -187,24 +180,29 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc, ZSTD_parameters params) { /* note : params considered validated here */ const size_t blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << params.windowLog); - const U32 contentLog = (params.strategy == ZSTD_fast) ? 1 : params.contentLog; const U32 divider = (params.searchLength==3) ? 3 : 4; const size_t maxNbSeq = blockSize / divider; const size_t tokenSpace = blockSize + 8*maxNbSeq; - const size_t tableSpace = ((1 << contentLog) + (1 << params.hashLog) + (1 << HASHLOG3)) * sizeof(U32); - const size_t optSpace = ((1<workSpaceSize < neededSpace) { - free(zc->workSpace); - zc->workSpace = malloc(neededSpace); - if (zc->workSpace == NULL) return ERROR(memory_allocation); - zc->workSpaceSize = neededSpace; + /* Check if workSpace is large enough, alloc a new one if needed */ + { size_t const optSpace = ((1<workSpaceSize < neededSpace) { + free(zc->workSpace); + zc->workSpace = malloc(neededSpace); + if (zc->workSpace == NULL) return ERROR(memory_allocation); + zc->workSpaceSize = neededSpace; + } } + memset(zc->workSpace, 0, tableSpace ); /* reset only tables */ zc->hashTable3 = (U32*)(zc->workSpace); - zc->hashTable = zc->hashTable3 + ((size_t)1 << HASHLOG3); + zc->hashTable = zc->hashTable3 + h3Size; zc->contentTable = zc->hashTable + ((size_t)1 << params.hashLog); zc->seqStore.buffer = zc->contentTable + ((size_t)1 << contentLog); zc->hufTable = (HUF_CElt*)zc->seqStore.buffer; @@ -251,7 +249,8 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc, size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx) { const U32 contentLog = (srcCCtx->params.strategy == ZSTD_fast) ? 1 : srcCCtx->params.contentLog; - const size_t tableSpace = ((1 << contentLog) + (1 << srcCCtx->params.hashLog) + (1 << HASHLOG3)) * sizeof(U32); + const size_t h3Size = (srcCCtx->params.searchLength == 3) ? (1 << HASHLOG3) : 0; + const size_t tableSpace = ((1 << contentLog) + (1 << srcCCtx->params.hashLog) + h3Size) * sizeof(U32); if (srcCCtx->stage!=0) return ERROR(stage_wrong); From c6eea2b2f2ed0a9c8a47fd57aab3873f3024ff20 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sat, 19 Mar 2016 17:18:00 +0100 Subject: [PATCH 118/247] Improved speed for small files --- lib/zstd_compress.c | 51 ++++++++++++++++++++++++--------------------- 1 file changed, 27 insertions(+), 24 deletions(-) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index bdf5569e..87e54fa5 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -157,12 +157,14 @@ void ZSTD_validateParams(ZSTD_parameters* params) /* correct params, to use less memory */ if ((params->srcSize > 0) && (params->srcSize < (1<srcSize)-1) + 1; + U32 const srcLog = ZSTD_highbit((U32)(params->srcSize)-1) + 1; if (params->windowLog > srcLog) params->windowLog = srcLog; } - if (params->windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN) params->windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN; /* required for frame header */ + if (params->hashLog > params->windowLog) params->hashLog = params->windowLog; { U32 const btPlus = (params->strategy == ZSTD_btlazy2) || (params->strategy == ZSTD_btopt); if (params->contentLog > params->windowLog+btPlus) params->contentLog = params->windowLog+btPlus; } /* <= ZSTD_CONTENTLOG_MAX */ + + if (params->windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN) params->windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN; /* required for frame header */ } @@ -170,9 +172,9 @@ size_t ZSTD_sizeofCCtx(ZSTD_parameters params) /* hidden interface, for parama { ZSTD_CCtx* zc = ZSTD_createCCtx(); ZSTD_compressBegin_advanced(zc, NULL, 0, params); - { size_t size = sizeof(*zc) + zc->workSpaceSize; - ZSTD_freeCCtx(zc); - return size; } + { size_t const size = sizeof(*zc) + zc->workSpaceSize; + ZSTD_freeCCtx(zc); + return size; } } @@ -183,9 +185,10 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc, const U32 divider = (params.searchLength==3) ? 3 : 4; const size_t maxNbSeq = blockSize / divider; const size_t tokenSpace = blockSize + 8*maxNbSeq; - const U32 contentLog = (params.strategy == ZSTD_fast) ? 1 : params.contentLog; + const size_t contentSize = (params.strategy == ZSTD_fast) ? 0 : (1 << params.contentLog); + const size_t hSize = 1 << params.hashLog; const size_t h3Size = (params.searchLength==3) ? (1 << HASHLOG3) : 0; - const size_t tableSpace = ((1 << contentLog) + (1 << params.hashLog) + h3Size) * sizeof(U32); + const size_t tableSpace = (contentSize + hSize + h3Size) * sizeof(U32); /* Check if workSpace is large enough, alloc a new one if needed */ { size_t const optSpace = ((1<workSpace = malloc(neededSpace); if (zc->workSpace == NULL) return ERROR(memory_allocation); zc->workSpaceSize = neededSpace; - } - } + } } memset(zc->workSpace, 0, tableSpace ); /* reset only tables */ zc->hashTable3 = (U32*)(zc->workSpace); zc->hashTable = zc->hashTable3 + h3Size; - zc->contentTable = zc->hashTable + ((size_t)1 << params.hashLog); - zc->seqStore.buffer = zc->contentTable + ((size_t)1 << contentLog); + zc->contentTable = zc->hashTable + hSize; + zc->seqStore.buffer = zc->contentTable + contentSize; zc->hufTable = (HUF_CElt*)zc->seqStore.buffer; zc->flagStaticTables = 0; zc->seqStore.buffer = (U32*)(zc->seqStore.buffer) + 256; @@ -248,30 +250,31 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc, * @return : 0, or an error code */ size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx) { - const U32 contentLog = (srcCCtx->params.strategy == ZSTD_fast) ? 1 : srcCCtx->params.contentLog; - const size_t h3Size = (srcCCtx->params.searchLength == 3) ? (1 << HASHLOG3) : 0; - const size_t tableSpace = ((1 << contentLog) + (1 << srcCCtx->params.hashLog) + h3Size) * sizeof(U32); - if (srcCCtx->stage!=0) return ERROR(stage_wrong); ZSTD_resetCCtx_advanced(dstCCtx, srcCCtx->params); /* copy tables */ - memcpy(dstCCtx->workSpace, srcCCtx->workSpace, tableSpace); + { const size_t contentSize = (srcCCtx->params.strategy == ZSTD_fast) ? 0 : (1 << srcCCtx->params.contentLog); + const size_t hSize = 1 << srcCCtx->params.hashLog; + const size_t h3Size = (srcCCtx->params.searchLength == 3) ? (1 << HASHLOG3) : 0; + const size_t tableSpace = (contentSize + hSize + h3Size) * sizeof(U32); + memcpy(dstCCtx->workSpace, srcCCtx->workSpace, tableSpace); + } /* copy frame header */ dstCCtx->hbSize = srcCCtx->hbSize; memcpy(dstCCtx->headerBuffer , srcCCtx->headerBuffer, srcCCtx->hbSize); /* copy dictionary pointers */ - dstCCtx->nextToUpdate= srcCCtx->nextToUpdate; - dstCCtx->nextToUpdate3 = srcCCtx->nextToUpdate3; - dstCCtx->nextSrc = srcCCtx->nextSrc; - dstCCtx->base = srcCCtx->base; - dstCCtx->dictBase = srcCCtx->dictBase; - dstCCtx->dictLimit = srcCCtx->dictLimit; - dstCCtx->lowLimit = srcCCtx->lowLimit; - dstCCtx->loadedDictEnd = srcCCtx->loadedDictEnd; + dstCCtx->nextToUpdate = srcCCtx->nextToUpdate; + dstCCtx->nextToUpdate3= srcCCtx->nextToUpdate3; + dstCCtx->nextSrc = srcCCtx->nextSrc; + dstCCtx->base = srcCCtx->base; + dstCCtx->dictBase = srcCCtx->dictBase; + dstCCtx->dictLimit = srcCCtx->dictLimit; + dstCCtx->lowLimit = srcCCtx->lowLimit; + dstCCtx->loadedDictEnd= srcCCtx->loadedDictEnd; /* copy entropy tables */ dstCCtx->flagStaticTables = srcCCtx->flagStaticTables; From 70e45771791b4f6363dcc934725a5ad6722ba9fd Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sat, 19 Mar 2016 18:08:32 +0100 Subject: [PATCH 119/247] minor code refactoring --- lib/zstd_compress.c | 142 ++++++++++++++++++++------------------------ programs/bench.c | 2 +- 2 files changed, 66 insertions(+), 78 deletions(-) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index 87e54fa5..c075d50e 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -155,7 +155,7 @@ void ZSTD_validateParams(ZSTD_parameters* params) CLAMP(params->targetLength, ZSTD_TARGETLENGTH_MIN, ZSTD_TARGETLENGTH_MAX); if ((U32)params->strategy>(U32)ZSTD_btopt) params->strategy = ZSTD_btopt; - /* correct params, to use less memory */ + /* resize params, to use less memory when necessary */ if ((params->srcSize > 0) && (params->srcSize < (1<srcSize)-1) + 1; if (params->windowLog > srcLog) params->windowLog = srcLog; @@ -703,8 +703,7 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, zc->flagStaticTables = 0; /* Encoding Sequences */ - { - BIT_CStream_t blockStream; + { BIT_CStream_t blockStream; FSE_CState_t stateMatchLength; FSE_CState_t stateOffsetBits; FSE_CState_t stateLitLength; @@ -739,14 +738,13 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, FSE_flushCState(&blockStream, &stateLitLength); { size_t const streamSize = BIT_closeCStream(&blockStream); - if (streamSize==0) return ERROR(dstSize_tooSmall); /* not enough space */ - op += streamSize; } + if (streamSize==0) return ERROR(dstSize_tooSmall); /* not enough space */ + op += streamSize; } } /* check compressibility */ _check_compressibility: - { - size_t const minGain = ZSTD_minGain(srcSize); + { size_t const minGain = ZSTD_minGain(srcSize); size_t const maxCSize = srcSize - minGain; if ((size_t)(op-ostart) >= maxCSize) return 0; } @@ -1028,8 +1026,8 @@ void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* zc, while ( (ip <= ilimit) && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) { /* store sequence */ - size_t rlCode = ZSTD_count(ip+MINMATCH, ip+MINMATCH-offset_2, iend); - size_t tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */ + size_t const rlCode = ZSTD_count(ip+MINMATCH, ip+MINMATCH-offset_2, iend); + { size_t const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */ hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip-base); ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, rlCode); ip += rlCode+MINMATCH; @@ -1037,8 +1035,8 @@ void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* zc, continue; /* faster when present ... (?) */ } } } - { /* Last Literals */ - size_t lastLLSize = iend - anchor; + /* Last Literals */ + { size_t const lastLLSize = iend - anchor; memcpy(seqStorePtr->lit, anchor, lastLLSize); seqStorePtr->lit += lastLLSize; } @@ -1156,8 +1154,7 @@ static void ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx, } } } /* Last Literals */ - { - size_t lastLLSize = iend - anchor; + { size_t const lastLLSize = iend - anchor; memcpy(seqStorePtr->lit, anchor, lastLLSize); seqStorePtr->lit += lastLLSize; } @@ -1600,10 +1597,9 @@ void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx, if (depth==0) goto _storeSequence; } - { - /* first search (depth 0) */ - size_t offsetFound = 99999999; - size_t ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls); + /* first search (depth 0) */ + { size_t offsetFound = 99999999; + size_t const ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls); if (ml2 > matchLength) matchLength = ml2, start = ip, offset=offsetFound; } @@ -1618,17 +1614,16 @@ void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx, while (ip= MINMATCH) && (gain2 > gain1)) matchLength = mlRep, offset = 0, start = ip; } - { - size_t offset2=999999; - size_t ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); - int gain2 = (int)(ml2*4 - ZSTD_highbit((U32)offset2+1)); /* raw approx */ - int gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 4); + { size_t offset2=99999999; + size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); + int const gain2 = (int)(ml2*4 - ZSTD_highbit((U32)offset2+1)); /* raw approx */ + int const gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 4); if ((ml2 >= MINMATCH) && (gain2 > gain1)) { matchLength = ml2, offset = offset2, start = ip; continue; /* search a better one */ @@ -1638,17 +1633,16 @@ void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx, if ((depth==2) && (ip= MINMATCH) && (gain2 > gain1)) matchLength = ml2, offset = 0, start = ip; } - { - size_t offset2=999999; - size_t ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); - int gain2 = (int)(ml2*4 - ZSTD_highbit((U32)offset2+1)); /* raw approx */ - int gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 7); + { size_t offset2=99999999; + size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); + int const gain2 = (int)(ml2*4 - ZSTD_highbit((U32)offset2+1)); /* raw approx */ + int const gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 7); if ((ml2 >= MINMATCH) && (gain2 > gain1)) { matchLength = ml2, offset = offset2, start = ip; continue; @@ -1665,8 +1659,7 @@ void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx, /* store sequence */ _storeSequence: - { - size_t litLength = start - anchor; + { size_t const litLength = start - anchor; ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, matchLength-MINMATCH); anchor = ip = start + matchLength; } @@ -1686,8 +1679,7 @@ _storeSequence: } } /* Last Literals */ - { - size_t lastLLSize = iend - anchor; + { size_t const lastLLSize = iend - anchor; memcpy(seqStorePtr->lit, anchor, lastLLSize); seqStorePtr->lit += lastLLSize; } @@ -1771,10 +1763,9 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx, if (depth==0) goto _storeSequence; } } - { - /* first search (depth 0) */ - size_t offsetFound = 99999999; - size_t ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls); + /* first search (depth 0) */ + { size_t offsetFound = 99999999; + size_t const ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls); if (ml2 > matchLength) matchLength = ml2, start = ip, offset=offsetFound; } @@ -1798,19 +1789,18 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx, if (MEM_read32(ip) == MEM_read32(repMatch)) { /* repcode detected */ const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; - size_t repLength = ZSTD_count_2segments(ip+MINMATCH, repMatch+MINMATCH, iend, repEnd, prefixStart) + MINMATCH; - int gain2 = (int)(repLength * 3); - int gain1 = (int)(matchLength*3 - ZSTD_highbit((U32)offset+1) + 1); + size_t const repLength = ZSTD_count_2segments(ip+MINMATCH, repMatch+MINMATCH, iend, repEnd, prefixStart) + MINMATCH; + int const gain2 = (int)(repLength * 3); + int const gain1 = (int)(matchLength*3 - ZSTD_highbit((U32)offset+1) + 1); if ((repLength >= MINMATCH) && (gain2 > gain1)) matchLength = repLength, offset = 0, start = ip; } } /* search match, depth 1 */ - { - size_t offset2=999999; - size_t ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); - int gain2 = (int)(ml2*4 - ZSTD_highbit((U32)offset2+1)); /* raw approx */ - int gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 4); + { size_t offset2=99999999; + size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); + int const gain2 = (int)(ml2*4 - ZSTD_highbit((U32)offset2+1)); /* raw approx */ + int const gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 4); if ((ml2 >= MINMATCH) && (gain2 > gain1)) { matchLength = ml2, offset = offset2, start = ip; continue; /* search a better one */ @@ -1837,11 +1827,10 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx, } } /* search match, depth 2 */ - { - size_t offset2=999999; - size_t ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); - int gain2 = (int)(ml2*4 - ZSTD_highbit((U32)offset2+1)); /* raw approx */ - int gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 7); + { size_t offset2=99999999; + size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); + int const gain2 = (int)(ml2*4 - ZSTD_highbit((U32)offset2+1)); /* raw approx */ + int const gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 7); if ((ml2 >= MINMATCH) && (gain2 > gain1)) { matchLength = ml2, offset = offset2, start = ip; continue; @@ -1964,8 +1953,9 @@ static size_t ZSTD_compress_generic (ZSTD_CCtx* zc, if (dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE) return ERROR(dstSize_tooSmall); /* not enough space to store compressed block */ if (remaining < blockSize) blockSize = remaining; - if ((U32)(ip+blockSize - zc->base) > zc->loadedDictEnd + maxDist) { /* enforce maxDist */ - U32 newLowLimit = (U32)(ip+blockSize - zc->base) - maxDist; + if ((U32)(ip+blockSize - zc->base) > zc->loadedDictEnd + maxDist) { + /* enforce maxDist */ + U32 const newLowLimit = (U32)(ip+blockSize - zc->base) - maxDist; if (zc->lowLimit < newLowLimit) zc->lowLimit = newLowLimit; if (zc->dictLimit < zc->lowLimit) zc->dictLimit = zc->lowLimit; } @@ -2020,7 +2010,7 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* zc, /* Check if blocks follow each other */ if (src != zc->nextSrc) { /* not contiguous */ - size_t delta = zc->nextSrc - ip; + size_t const delta = zc->nextSrc - ip; zc->lowLimit = zc->dictLimit; zc->dictLimit = (U32)(zc->nextSrc - zc->base); zc->dictBase = zc->base; @@ -2031,10 +2021,10 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* zc, /* preemptive overflow correction */ if (zc->lowLimit > (1<<30)) { - U32 btplus = (zc->params.strategy == ZSTD_btlazy2) || (zc->params.strategy == ZSTD_btopt); - U32 contentMask = (1 << (zc->params.contentLog - btplus)) - 1; - U32 newLowLimit = zc->lowLimit & contentMask; /* preserve position % contentSize */ - U32 correction = zc->lowLimit - newLowLimit; + U32 const btplus = (zc->params.strategy == ZSTD_btlazy2) || (zc->params.strategy == ZSTD_btopt); + U32 const contentMask = (1 << (zc->params.contentLog - btplus)) - 1; + U32 const newLowLimit = zc->lowLimit & contentMask; /* preserve position % contentSize */ + U32 const correction = zc->lowLimit - newLowLimit; ZSTD_reduceIndex(zc, correction); zc->base += correction; zc->dictBase += correction; @@ -2051,10 +2041,9 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* zc, } zc->nextSrc = ip + srcSize; - { - size_t cSize; - if (frame) cSize = ZSTD_compress_generic (zc, dst, dstSize, src, srcSize); - else cSize = ZSTD_compressBlock_internal (zc, dst, dstSize, src, srcSize); + { size_t const cSize = frame ? + ZSTD_compress_generic (zc, dst, dstSize, src, srcSize) : + ZSTD_compressBlock_internal (zc, dst, dstSize, src, srcSize); if (ZSTD_isError(cSize)) return cSize; return cSize + hbSize; } @@ -2138,7 +2127,7 @@ static size_t ZSTD_loadDictEntropyStats(ZSTD_CCtx* zc, const void* dict, size_t short litlengthNCount[MaxLL+1]; unsigned litlengthMaxValue = MaxLL, litlengthLog = LLFSELog; - const size_t hufHeaderSize = HUF_readCTable(zc->hufTable, 255, dict, dictSize); + size_t const hufHeaderSize = HUF_readCTable(zc->hufTable, 255, dict, dictSize); if (HUF_isError(hufHeaderSize)) return ERROR(dictionary_corrupted); zc->flagStaticTables = 1; dict = (const char*)dict + hufHeaderSize; @@ -2241,7 +2230,7 @@ size_t ZSTD_compressEnd(ZSTD_CCtx* zc, void* dst, size_t dstCapacity) BYTE* op = (BYTE*)dst; size_t hbSize = 0; - /* empty frame */ + /* special case : empty frame : header still within internal buffer */ if (zc->stage==0) { hbSize = zc->hbSize; if (dstCapacity <= hbSize) return ERROR(dstSize_tooSmall); @@ -2265,16 +2254,15 @@ size_t ZSTD_compress_usingPreparedCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* prepare void* dst, size_t dstCapacity, const void* src, size_t srcSize) { - size_t outSize; - size_t errorCode = ZSTD_copyCCtx(cctx, preparedCCtx); - if (ZSTD_isError(errorCode)) return errorCode; - errorCode = ZSTD_compressContinue(cctx, dst, dstCapacity, src, srcSize); - if (ZSTD_isError(errorCode)) return errorCode; - outSize = errorCode; - errorCode = ZSTD_compressEnd(cctx, (char*)dst+outSize, dstCapacity-outSize); - if (ZSTD_isError(errorCode)) return errorCode; - outSize += errorCode; - return outSize; + { size_t const errorCode = ZSTD_copyCCtx(cctx, preparedCCtx); + if (ZSTD_isError(errorCode)) return errorCode; + } + { size_t const cSize = ZSTD_compressContinue(cctx, dst, dstCapacity, src, srcSize); + if (ZSTD_isError(cSize)) return cSize; + { size_t const endSize = ZSTD_compressEnd(cctx, (char*)dst+cSize, dstCapacity-cSize); + if (ZSTD_isError(endSize)) return endSize; + return cSize + endSize; + } } } diff --git a/programs/bench.c b/programs/bench.c index d1341fe1..40be13f5 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -347,7 +347,7 @@ _findError: #endif } - DISPLAY("%2i-\n", cLevel); + DISPLAY("%2i#\n", cLevel); } /* clean up */ From a5c2c08c68a232d951fbd720880a1409370c1030 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sun, 20 Mar 2016 01:09:18 +0100 Subject: [PATCH 120/247] code refactoring --- lib/zstd_compress.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index c075d50e..7147a329 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -501,7 +501,7 @@ static size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, cons } -size_t ZSTD_minGain(size_t srcSize) { return (srcSize >> 6) + 2; } +static size_t ZSTD_minGain(size_t srcSize) { return (srcSize >> 6) + 2; } static size_t ZSTD_compressLiterals (ZSTD_CCtx* zc, void* dst, size_t dstCapacity, @@ -514,8 +514,14 @@ static size_t ZSTD_compressLiterals (ZSTD_CCtx* zc, U32 hType = IS_HUF; size_t cLitSize; - if (dstCapacity < lhSize+1) return ERROR(dstSize_tooSmall); /* not enough space for compression */ + /* small ? don't even attempt compression (speed opt) */ +# define LITERAL_NOENTROPY 63 + { size_t const minLitSize = zc->flagStaticTables ? 6 : LITERAL_NOENTROPY; + if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); + } + + if (dstCapacity < lhSize+1) return ERROR(dstSize_tooSmall); /* not enough space for compression */ if (zc->flagStaticTables && (lhSize==3)) { hType = IS_PCH; singleStream = 1; @@ -581,14 +587,9 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, BYTE* seqHead; /* Compress literals */ -# define LITERAL_NOENTROPY 63 /* don't even attempt to compress literals below this threshold (cheap heuristic) */ - { - const BYTE* const literals = seqStorePtr->litStart; + { const BYTE* const literals = seqStorePtr->litStart; size_t const litSize = seqStorePtr->lit - literals; - size_t const minLitSize = zc->flagStaticTables ? 6 : LITERAL_NOENTROPY; - size_t const cSize = (litSize <= minLitSize) ? - ZSTD_noCompressLiterals(op, dstCapacity, literals, litSize) : - ZSTD_compressLiterals(zc, op, dstCapacity, literals, litSize); + size_t const cSize = ZSTD_compressLiterals(zc, op, dstCapacity, literals, litSize); if (ZSTD_isError(cSize)) return cSize; op += cSize; } @@ -604,8 +605,7 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, if ((oend-op) < 3 /* dumps */ + 1 /*seqHead*/) return ERROR(dstSize_tooSmall); seqHead = op; - { - size_t const dumpsLength = seqStorePtr->dumps - seqStorePtr->dumpsStart; + { size_t const dumpsLength = seqStorePtr->dumps - seqStorePtr->dumpsStart; if (dumpsLength < 512) { op[0] = (BYTE)(dumpsLength >> 8); op[1] = (BYTE)(dumpsLength); From 516ba880223d756b9d16ded3e12b3989b20ecd0e Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sun, 20 Mar 2016 05:40:39 +0100 Subject: [PATCH 121/247] updated huff0 --- lib/huff0.c | 231 ++++++++++++++++++++------------------------- lib/huff0.h | 22 ++--- lib/huff0_static.h | 4 +- 3 files changed, 114 insertions(+), 143 deletions(-) diff --git a/lib/huff0.c b/lib/huff0.c index 7afb1337..505adcec 100644 --- a/lib/huff0.c +++ b/lib/huff0.c @@ -1,6 +1,6 @@ /* ****************************************************************** Huff0 : Huffman coder, part of New Generation Entropy library - Copyright (C) 2013-2015, Yann Collet. + Copyright (C) 2013-2016, Yann Collet. BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) @@ -103,8 +103,7 @@ typedef struct nodeElt_s { } nodeElt; /*! HUF_writeCTable() : - @dst : destination buffer - @CTable : huffman tree to save, using huff0 representation + `CTable` : huffman tree to save, using huff0 representation. @return : size of saved CTable */ size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, U32 maxSymbolValue, U32 huffLog) @@ -181,66 +180,58 @@ size_t HUF_readCTable (HUF_CElt* CTable, U32 maxSymbolValue, const void* src, si BYTE huffWeight[HUF_MAX_SYMBOL_VALUE + 1]; U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1]; /* large enough for values from 0 to 16 */ U32 tableLog = 0; - size_t iSize; + size_t readSize; U32 nbSymbols = 0; - U32 n; - U32 nextRankStart; //memset(huffWeight, 0, sizeof(huffWeight)); /* is not necessary, even though some analyzer complain ... */ /* get symbol weights */ - iSize = HUF_readStats(huffWeight, HUF_MAX_SYMBOL_VALUE+1, rankVal, &nbSymbols, &tableLog, src, srcSize); - if (HUF_isError(iSize)) return iSize; + readSize = HUF_readStats(huffWeight, HUF_MAX_SYMBOL_VALUE+1, rankVal, &nbSymbols, &tableLog, src, srcSize); + if (HUF_isError(readSize)) return readSize; /* check result */ if (tableLog > HUF_MAX_TABLELOG) return ERROR(tableLog_tooLarge); if (nbSymbols > maxSymbolValue+1) return ERROR(maxSymbolValue_tooSmall); /* Prepare base value per rank */ - nextRankStart = 0; - for (n=1; n<=tableLog; n++) { - U32 current = nextRankStart; - nextRankStart += (rankVal[n] << (n-1)); - rankVal[n] = current; - } + { U32 n, nextRankStart = 0; + for (n=1; n<=tableLog; n++) { + U32 current = nextRankStart; + nextRankStart += (rankVal[n] << (n-1)); + rankVal[n] = current; + } } /* fill nbBits */ - for (n=0; n0; n--) { + { U32 n; for (n=0; n0; n--) { valPerRank[n] = min; /* get starting value within each rank */ min += nbPerRank[n]; min >>= 1; } } - for (n=0; n<=maxSymbolValue; n++) - CTable[n].val = valPerRank[CTable[n].nbBits]++; /* assign value within rank, symbol order */ + /* assign value within rank, symbol order */ + { U32 n; for (n=0; n<=maxSymbolValue; n++) CTable[n].val = valPerRank[CTable[n].nbBits]++; } } - return iSize; + return readSize; } static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits) { - int totalCost = 0; const U32 largestBits = huffNode[lastNonNull].nbBits; - - /* early exit : all is fine */ - if (largestBits <= maxNbBits) return largestBits; + if (largestBits <= maxNbBits) return largestBits; /* early exit : no elt > maxNbBits */ /* there are several too large elements (at least >= 2) */ - { + { int totalCost = 0; const U32 baseCost = 1 << (largestBits - maxNbBits); U32 n = lastNonNull; @@ -248,26 +239,25 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits) totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits)); huffNode[n].nbBits = (BYTE)maxNbBits; n --; - } /* n stops at huffNode[n].nbBits <= maxNbBits */ - while (huffNode[n].nbBits == maxNbBits) n--; /* n end at index of smallest symbol using (maxNbBits-1) */ + } /* n stops at huffNode[n].nbBits <= maxNbBits */ + while (huffNode[n].nbBits == maxNbBits) n--; /* n end at index of smallest symbol using < maxNbBits */ /* renorm totalCost */ totalCost >>= (largestBits - maxNbBits); /* note : totalCost is necessarily a multiple of baseCost */ /* repay normalized cost */ - { - const U32 noSymbol = 0xF0F0F0F0; + { U32 const noSymbol = 0xF0F0F0F0; U32 rankLast[HUF_MAX_TABLELOG+1]; - U32 currentNbBits = maxNbBits; int pos; /* Get pos of last (smallest) symbol per rank */ memset(rankLast, 0xF0, sizeof(rankLast)); - for (pos=n ; pos >= 0; pos--) { - if (huffNode[pos].nbBits >= currentNbBits) continue; - currentNbBits = huffNode[pos].nbBits; /* < maxNbBits */ - rankLast[maxNbBits-currentNbBits] = pos; - } + { U32 currentNbBits = maxNbBits; + for (pos=n ; pos >= 0; pos--) { + if (huffNode[pos].nbBits >= currentNbBits) continue; + currentNbBits = huffNode[pos].nbBits; /* < maxNbBits */ + rankLast[maxNbBits-currentNbBits] = pos; + } } while (totalCost > 0) { U32 nBitsToDecrease = BIT_highbit32(totalCost) + 1; @@ -276,9 +266,8 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits) U32 lowPos = rankLast[nBitsToDecrease-1]; if (highPos == noSymbol) continue; if (lowPos == noSymbol) break; - { - U32 highTotal = huffNode[highPos].count; - U32 lowTotal = 2 * huffNode[lowPos].count; + { U32 const highTotal = huffNode[highPos].count; + U32 const lowTotal = 2 * huffNode[lowPos].count; if (highTotal <= lowTotal) break; } } /* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */ @@ -294,7 +283,7 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits) rankLast[nBitsToDecrease]--; if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease) rankLast[nBitsToDecrease] = noSymbol; /* this rank is now empty */ - } } + } } /* while (totalCost > 0) */ while (totalCost < 0) { /* Sometimes, cost correction overshoot */ if (rankLast[1] == noSymbol) { /* special case : no rank 1 symbol (using maxNbBits-1); let's create one from largest rank 0 (using maxNbBits) */ @@ -307,7 +296,7 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits) huffNode[ rankLast[1] + 1 ].nbBits--; rankLast[1]++; totalCost ++; - } } } + } } } /* there are several too large elements (at least >= 2) */ return maxNbBits; } @@ -331,8 +320,8 @@ static void HUF_sort(nodeElt* huffNode, const U32* count, U32 maxSymbolValue) for (n=30; n>0; n--) rank[n-1].base += rank[n].base; for (n=0; n<32; n++) rank[n].current = rank[n].base; for (n=0; n<=maxSymbolValue; n++) { - U32 c = count[n]; - U32 r = BIT_highbit32(c+1) + 1; + U32 const c = count[n]; + U32 const r = BIT_highbit32(c+1) + 1; U32 pos = rank[r].current++; while ((pos > rank[r].base) && (c > huffNode[pos-1].count)) huffNode[pos]=huffNode[pos-1], pos--; huffNode[pos].count = c; @@ -389,21 +378,18 @@ size_t HUF_buildCTable (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U3 maxNbBits = HUF_setMaxHeight(huffNode, nonNullRank, maxNbBits); /* fill result into tree (val, nbBits) */ - { - U16 nbPerRank[HUF_MAX_TABLELOG+1] = {0}; + { U16 nbPerRank[HUF_MAX_TABLELOG+1] = {0}; U16 valPerRank[HUF_MAX_TABLELOG+1] = {0}; if (maxNbBits > HUF_MAX_TABLELOG) return ERROR(GENERIC); /* check fit into table */ for (n=0; n<=nonNullRank; n++) nbPerRank[huffNode[n].nbBits]++; - { - /* determine stating value per rank */ - U16 min = 0; + /* determine stating value per rank */ + { U16 min = 0; for (n=maxNbBits; n>0; n--) { valPerRank[n] = min; /* get starting value within each rank */ min += nbPerRank[n]; min >>= 1; - } - } + } } for (n=0; n<=maxSymbolValue; n++) tree[huffNode[n].byte].nbBits = huffNode[n].nbBits; /* push nbBits per symbol, symbol order */ for (n=0; n<=maxSymbolValue; n++) @@ -432,17 +418,16 @@ size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, si { const BYTE* ip = (const BYTE*) src; BYTE* const ostart = (BYTE*)dst; - BYTE* op = ostart; BYTE* const oend = ostart + dstSize; + BYTE* op = ostart; size_t n; const unsigned fast = (dstSize >= HUF_BLOCKBOUND(srcSize)); - size_t errorCode; BIT_CStream_t bitC; /* init */ if (dstSize < 8) return 0; /* not enough space to compress */ - errorCode = BIT_initCStream(&bitC, op, oend-op); - if (HUF_isError(errorCode)) return 0; + { size_t const errorCode = BIT_initCStream(&bitC, op, oend-op); + if (HUF_isError(errorCode)) return 0; } n = srcSize & ~3; /* join to mod 4 */ switch (srcSize & 3) @@ -475,12 +460,12 @@ size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, si size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) { size_t segmentSize = (srcSize+3)/4; /* first 3 segments */ - size_t errorCode; const BYTE* ip = (const BYTE*) src; const BYTE* const iend = ip + srcSize; BYTE* const ostart = (BYTE*) dst; - BYTE* op = ostart; BYTE* const oend = ostart + dstSize; + BYTE* op = ostart; + size_t errorCode; if (dstSize < 6 + 1 + 1 + 1 + 8) return 0; /* minimum space to compress successfully */ if (srcSize < 12) return 0; /* no saving possible : too small input */ @@ -523,8 +508,8 @@ static size_t HUF_compress_internal ( unsigned singleStream) { BYTE* const ostart = (BYTE*)dst; - BYTE* op = ostart; BYTE* const oend = ostart + dstSize; + BYTE* op = ostart; U32 count[HUF_MAX_SYMBOL_VALUE+1]; HUF_CElt CTable[HUF_MAX_SYMBOL_VALUE+1]; @@ -573,8 +558,8 @@ static size_t HUF_compress_internal ( size_t HUF_compress1X (void* dst, size_t dstSize, - const void* src, size_t srcSize, - unsigned maxSymbolValue, unsigned huffLog) + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned huffLog) { return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1); } @@ -602,9 +587,9 @@ typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX4; /* doubl typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t; -/*! HUF_readStats - Read compact Huffman tree, saved by HUF_writeCTable - @huffWeight : destination buffer +/*! HUF_readStats() : + Read compact Huffman tree, saved by HUF_writeCTable(). + `huffWeight` is destination buffer. @return : size read from `src` */ static size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, @@ -616,13 +601,12 @@ static size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, const BYTE* ip = (const BYTE*) src; size_t iSize = ip[0]; size_t oSize; - U32 n; //memset(huffWeight, 0, hwSize); /* is not necessary, even though some analyzer complain ... */ if (iSize >= 128) { /* special header */ if (iSize >= (242)) { /* RLE */ - static int l[14] = { 1, 2, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64, 127, 128 }; + static U32 l[14] = { 1, 2, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64, 127, 128 }; oSize = l[iSize-242]; memset(huffWeight, 1, hwSize); iSize = 0; @@ -633,10 +617,11 @@ static size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, if (iSize+1 > srcSize) return ERROR(srcSize_wrong); if (oSize >= hwSize) return ERROR(corruption_detected); ip += 1; - for (n=0; n> 4; - huffWeight[n+1] = ip[n/2] & 15; - } } } + { U32 n; + for (n=0; n> 4; + huffWeight[n+1] = ip[n/2] & 15; + } } } } else { /* header compressed with FSE (normal case) */ if (iSize+1 > srcSize) return ERROR(srcSize_wrong); oSize = FSE_decompress(huffWeight, hwSize-1, ip+1, iSize); /* max (hwSize-1) values decoded, as last one is implied */ @@ -646,20 +631,20 @@ static size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, /* collect weight stats */ memset(rankStats, 0, (HUF_ABSOLUTEMAX_TABLELOG + 1) * sizeof(U32)); weightTotal = 0; - for (n=0; n= HUF_ABSOLUTEMAX_TABLELOG) return ERROR(corruption_detected); rankStats[huffWeight[n]]++; weightTotal += (1 << huffWeight[n]) >> 1; - } + }} /* get last non-null symbol weight (implied, total must be 2^n) */ tableLog = BIT_highbit32(weightTotal) + 1; if (tableLog > HUF_ABSOLUTEMAX_TABLELOG) return ERROR(corruption_detected); - { /* determine last weight */ - U32 total = 1 << tableLog; - U32 rest = total - weightTotal; - U32 verif = 1 << BIT_highbit32(rest); - U32 lastWeight = BIT_highbit32(rest) + 1; + /* determine last weight */ + { U32 const total = 1 << tableLog; + U32 const rest = total - weightTotal; + U32 const verif = 1 << BIT_highbit32(rest); + U32 const lastWeight = BIT_highbit32(rest) + 1; if (verif != rest) return ERROR(corruption_detected); /* last value must be a clean power of 2 */ huffWeight[oSize] = (BYTE)lastWeight; rankStats[lastWeight]++; @@ -724,12 +709,13 @@ size_t HUF_readDTableX2 (U16* DTable, const void* src, size_t srcSize) return iSize; } + static BYTE HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, const U32 dtLog) { - const size_t val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */ - const BYTE c = dt[val].byte; - BIT_skipBits(Dstream, dt[val].nbBits); - return c; + const size_t val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */ + const BYTE c = dt[val].byte; + BIT_skipBits(Dstream, dt[val].nbBits); + return c; } #define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \ @@ -773,13 +759,13 @@ size_t HUF_decompress1X2_usingDTable( { BYTE* op = (BYTE*)dst; BYTE* const oend = op + dstSize; - size_t errorCode; const U32 dtLog = DTable[0]; const void* dtPtr = DTable; const HUF_DEltX2* const dt = ((const HUF_DEltX2*)dtPtr)+1; BIT_DStream_t bitD; - errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize); - if (HUF_isError(errorCode)) return errorCode; + + { size_t const errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize); + if (HUF_isError(errorCode)) return errorCode; } HUF_decodeStreamX2(op, &bitD, oend, dt, dtLog); @@ -793,9 +779,8 @@ size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cS { HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_MAX_TABLELOG); const BYTE* ip = (const BYTE*) cSrc; - size_t errorCode; - errorCode = HUF_readDTableX2 (DTable, cSrc, cSrcSize); + size_t const errorCode = HUF_readDTableX2 (DTable, cSrc, cSrcSize); if (HUF_isError(errorCode)) return errorCode; if (errorCode >= cSrcSize) return ERROR(srcSize_wrong); ip += errorCode; @@ -812,8 +797,8 @@ size_t HUF_decompress4X2_usingDTable( { /* Check */ if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ - { - const BYTE* const istart = (const BYTE*) cSrc; + + { const BYTE* const istart = (const BYTE*) cSrc; BYTE* const ostart = (BYTE*) dst; BYTE* const oend = ostart + dstSize; const void* const dtPtr = DTable; @@ -903,9 +888,8 @@ size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cS { HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_MAX_TABLELOG); const BYTE* ip = (const BYTE*) cSrc; - size_t errorCode; - errorCode = HUF_readDTableX2 (DTable, cSrc, cSrcSize); + size_t const errorCode = HUF_readDTableX2 (DTable, cSrc, cSrcSize); if (HUF_isError(errorCode)) return errorCode; if (errorCode >= cSrcSize) return ERROR(srcSize_wrong); ip += errorCode; @@ -926,7 +910,6 @@ static void HUF_fillDTableX4Level2(HUF_DEltX4* DTable, U32 sizeLog, const U32 co { HUF_DEltX4 DElt; U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1]; - U32 s; /* get pre-calculated rankVal */ memcpy(rankVal, rankValOrigin, sizeof(rankVal)); @@ -942,7 +925,7 @@ static void HUF_fillDTableX4Level2(HUF_DEltX4* DTable, U32 sizeLog, const U32 co } /* fill DTable */ - for (s=0; s= 1 */ rankVal[weight] += length; - } + }} } typedef U32 rankVal_t[HUF_ABSOLUTEMAX_TABLELOG][HUF_ABSOLUTEMAX_TABLELOG + 1]; @@ -992,16 +975,14 @@ static void HUF_fillDTableX4(HUF_DEltX4* DTable, const U32 targetLog, sortedList+sortedRank, sortedListSize-sortedRank, nbBitsBaseline, symbol); } else { - U32 i; - const U32 end = start + length; HUF_DEltX4 DElt; - MEM_writeLE16(&(DElt.sequence), symbol); - DElt.nbBits = (BYTE)(nbBits); - DElt.length = 1; - for (i = start; i < end; i++) - DTable[i] = DElt; - } + DElt.nbBits = (BYTE)(nbBits); + DElt.length = 1; + { U32 u; + const U32 end = start + length; + for (u = start; u < end; u++) DTable[u] = DElt; + } } rankVal[weight] += length; } } @@ -1034,8 +1015,7 @@ size_t HUF_readDTableX4 (U32* DTable, const void* src, size_t srcSize) for (maxW = tableLog; rankStats[maxW]==0; maxW--) {} /* necessarily finds a solution before 0 */ /* Get start index of each weight */ - { - U32 w, nextRankStart = 0; + { U32 w, nextRankStart = 0; for (w=1; w<=maxW; w++) { U32 current = nextRankStart; nextRankStart += rankStats[w]; @@ -1046,8 +1026,7 @@ size_t HUF_readDTableX4 (U32* DTable, const void* src, size_t srcSize) } /* sort symbols by weight */ - { - U32 s; + { U32 s; for (s=0; s= cSrcSize) return ERROR(srcSize_wrong); ip += hSize; @@ -1194,8 +1171,7 @@ size_t HUF_decompress4X4_usingDTable( { if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ - { - const BYTE* const istart = (const BYTE*) cSrc; + { const BYTE* const istart = (const BYTE*) cSrc; BYTE* const ostart = (BYTE*) dst; BYTE* const oend = ostart + dstSize; const void* const dtPtr = DTable; @@ -1385,8 +1361,7 @@ size_t HUF_readDTableX6 (U32* DTable, const void* src, size_t srcSize) for (maxW = tableLog; maxW && rankStats[maxW]==0; maxW--) {} /* necessarily finds a solution before 0 */ /* Get start index of each weight */ - { - U32 w, nextRankStart = 0; + { U32 w, nextRankStart = 0; for (w=1; w<=maxW; w++) { U32 current = nextRankStart; nextRankStart += rankStats[w]; @@ -1397,8 +1372,7 @@ size_t HUF_readDTableX6 (U32* DTable, const void* src, size_t srcSize) } /* sort symbols by weight */ - { - U32 s; + { U32 s; for (s=0; s= cSrcSize) return ERROR(srcSize_wrong); ip += hSize; diff --git a/lib/huff0.h b/lib/huff0.h index fe28d7be..9d6e11f7 100644 --- a/lib/huff0.h +++ b/lib/huff0.h @@ -48,24 +48,24 @@ extern "C" { /* **************************************** * Huff0 simple functions ******************************************/ -size_t HUF_compress(void* dst, size_t maxDstSize, +size_t HUF_compress(void* dst, size_t dstCapacity, const void* src, size_t srcSize); size_t HUF_decompress(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); -/*! -HUF_compress(): +/* +HUF_compress() : Compress content of buffer 'src', of size 'srcSize', into destination buffer 'dst'. - 'dst' buffer must be already allocated. Compression runs faster if maxDstSize >= HUF_compressBound(srcSize). + 'dst' buffer must be already allocated. Compression runs faster if dstCapacity >= HUF_compressBound(srcSize). Note : srcSize must be <= 128 KB - @return : size of compressed data (<= maxDstSize) + @return : size of compressed data (<= dstCapacity) Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!! - if return == 1, srcData is a single repeated byte symbol (RLE compression) + if return == 1, srcData is a single repeated byte symbol (RLE compression). if HUF_isError(return), compression failed (more details using HUF_getErrorName()) -HUF_decompress(): +HUF_decompress() : Decompress Huff0 data from buffer 'cSrc', of size 'cSrcSize', into already allocated destination buffer 'dst', of size 'dstSize'. - @dstSize : must be the **exact** size of original (uncompressed) data. + `dstSize` : must be the **exact** size of original (uncompressed) data. Note : in contrast with FSE, HUF_decompress can regenerate RLE (cSrcSize==1) and uncompressed (cSrcSize==dstSize) data, because it knows size to regenerate. @@ -77,11 +77,11 @@ HUF_decompress(): /* **************************************** * Tool functions ******************************************/ -size_t HUF_compressBound(size_t size); /* maximum compressed size */ +size_t HUF_compressBound(size_t size); /**< maximum compressed size */ /* Error Management */ -unsigned HUF_isError(size_t code); /* tells if a return value is an error code */ -const char* HUF_getErrorName(size_t code); /* provides error code string (useful for debugging) */ +unsigned HUF_isError(size_t code); /**< tells if a return value is an error code */ +const char* HUF_getErrorName(size_t code); /**< provides error code string (useful for debugging) */ /* **************************************** diff --git a/lib/huff0_static.h b/lib/huff0_static.h index 84033964..32d66d35 100644 --- a/lib/huff0_static.h +++ b/lib/huff0_static.h @@ -85,7 +85,7 @@ HUF_compress() does the following: 1. count symbol occurrence from source[] into table count[] using FSE_count() 2. build Huffman table from count using HUF_buildCTable() 3. save Huffman table to memory buffer using HUF_writeCTable() -4. encode the data stream using HUF_compress_usingCTable() +4. encode the data stream using HUF_compress4X_usingCTable() The following API allows targeting specific sub-functions for advanced tasks. For example, it's possible to compress several blocks using the same 'CTable', @@ -95,7 +95,7 @@ or to save and regenerate 'CTable' using external methods. typedef struct HUF_CElt_s HUF_CElt; /* incomplete type */ size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits); size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog); -size_t HUF_compress4X_into4Segments(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable); +size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable); /*! From de406eebcd2330ada599d3c8d6e3192fff1abe0f Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sun, 20 Mar 2016 15:46:10 +0100 Subject: [PATCH 122/247] minor code refactor --- lib/error_private.h | 12 +++---- lib/error_public.h | 5 ++- lib/zstd_compress.c | 8 ++--- lib/zstd_static.h | 5 ++- programs/bench.c | 77 +++++++++++++++++++++------------------------ 5 files changed, 49 insertions(+), 58 deletions(-) diff --git a/lib/error_private.h b/lib/error_private.h index c0c3f490..ff0b829f 100644 --- a/lib/error_private.h +++ b/lib/error_private.h @@ -28,7 +28,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. You can contact the author at : - - Source repository : https://github.com/Cyan4973/zstd + - Homepage : http://www.zstd.net ****************************************************************** */ /* Note : this module is expected to remain private, do not expose it */ @@ -62,7 +62,7 @@ extern "C" { /*-**************************************** -* Customization +* Customization (error_public.h) ******************************************/ typedef ZSTD_ErrorCode ERR_enum; #define PREFIX(name) ZSTD_error_##name @@ -74,7 +74,7 @@ typedef ZSTD_ErrorCode ERR_enum; #ifdef ERROR # undef ERROR /* reported already defined on VS 2015 (Rich Geldreich) */ #endif -#define ERROR(name) (size_t)-PREFIX(name) +#define ERROR(name) ((size_t)-PREFIX(name)) ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); } @@ -101,12 +101,12 @@ ERR_STATIC const char* ERR_getErrorName(size_t code) case PREFIX(dstSize_tooSmall): return "Destination buffer is too small"; case PREFIX(srcSize_wrong): return "Src size incorrect"; case PREFIX(corruption_detected): return "Corrupted block detected"; - case PREFIX(tableLog_tooLarge): return "tableLog requires too much memory"; - case PREFIX(maxSymbolValue_tooLarge): return "Unsupported max possible Symbol Value : too large"; + case PREFIX(tableLog_tooLarge): return "tableLog requires too much memory : unsupported"; + case PREFIX(maxSymbolValue_tooLarge): return "Unsupported max Symbol Value : too large"; case PREFIX(maxSymbolValue_tooSmall): return "Specified maxSymbolValue is too small"; case PREFIX(dictionary_corrupted): return "Dictionary is corrupted"; case PREFIX(maxCode): - default: return notErrorCode; /* should be impossible, due to ERR_getError() */ + default: return notErrorCode; /* impossible, due to ERR_getError() */ } } diff --git a/lib/error_public.h b/lib/error_public.h index 655e28e0..073b8c6a 100644 --- a/lib/error_public.h +++ b/lib/error_public.h @@ -28,7 +28,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. You can contact the author at : - - Source repository : https://github.com/Cyan4973/zstd + - Homepage : http://www.zstd.net ****************************************************************** */ #ifndef ERROR_PUBLIC_H_MODULE #define ERROR_PUBLIC_H_MODULE @@ -60,8 +60,7 @@ typedef enum { ZSTD_error_maxCode } ZSTD_ErrorCode; -/* note : functions provide error codes in reverse negative order, - so compare with (size_t)(0-enum) */ +/* note : compare with size_t function results using ZSTD_getError() */ #if defined (__cplusplus) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index 7147a329..689d48fe 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -2431,18 +2431,18 @@ static const ZSTD_parameters ZSTD_defaultParameters[4][ZSTD_MAX_CLEVEL+1] = { /*! ZSTD_getParams() : * @return ZSTD_parameters structure for a selected compression level and srcSize. -* `srcSizeHint` value is optional, select 0 if not known */ -ZSTD_parameters ZSTD_getParams(int compressionLevel, U64 srcSizeHint) +* `srcSize` value is optional, select 0 if not known */ +ZSTD_parameters ZSTD_getParams(int compressionLevel, U64 srcSize) { ZSTD_parameters result; - int tableID = ((srcSizeHint-1) <= 256 KB) + ((srcSizeHint-1) <= 128 KB) + ((srcSizeHint-1) <= 16 KB); /* intentional underflow for srcSizeHint == 0 */ + int tableID = ((srcSize-1) <= 256 KB) + ((srcSize-1) <= 128 KB) + ((srcSize-1) <= 16 KB); /* intentional underflow for srcSizeHint == 0 */ if (compressionLevel<=0) compressionLevel = 1; if (compressionLevel > ZSTD_MAX_CLEVEL) compressionLevel = ZSTD_MAX_CLEVEL; #if ZSTD_OPT_DEBUG >= 1 tableID=0; #endif result = ZSTD_defaultParameters[tableID][compressionLevel]; - result.srcSize = srcSizeHint; + result.srcSize = srcSize; return result; } diff --git a/lib/zstd_static.h b/lib/zstd_static.h index cb140a49..4ae771fd 100644 --- a/lib/zstd_static.h +++ b/lib/zstd_static.h @@ -73,8 +73,7 @@ extern "C" { /* from faster to stronger */ typedef enum { ZSTD_fast, ZSTD_greedy, ZSTD_lazy, ZSTD_lazy2, ZSTD_btlazy2, ZSTD_btopt } ZSTD_strategy; -typedef struct -{ +typedef struct { U64 srcSize; /* optional : tells how much bytes are present in the frame. Use 0 if not known. */ U32 windowLog; /* largest match distance : larger == more compression, more memory needed during decompression */ U32 contentLog; /* full search segment : larger == more compression, slower, more memory (useless for fast) */ @@ -245,7 +244,7 @@ size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, cons ***************************************/ #include "error_public.h" /*! ZSTD_getErrorCode() : - convert a `size_t` function result into a `ZSTD_error_code` enum type, + convert a `size_t` function result into a `ZSTD_ErrorCode` enum type, which can be used to compare directly with enum list published into "error_public.h" */ ZSTD_ErrorCode ZSTD_getError(size_t code); diff --git a/programs/bench.c b/programs/bench.c index 40be13f5..c74c03df 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -190,7 +190,6 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, { const size_t blockSize = (g_blockSize ? g_blockSize : srcSize) + (!srcSize); /* avoid div by 0 */ const U32 maxNbBlocks = (U32) ((srcSize + (blockSize-1)) / blockSize) + nbFiles; - size_t largestBlockSize = 0; blockParam_t* const blockTable = (blockParam_t*) malloc(maxNbBlocks * sizeof(blockParam_t)); const size_t maxCompressedSize = ZSTD_compressBound(srcSize) + (maxNbBlocks * 1024); /* add some room for safety */ void* const compressedBuffer = malloc(maxCompressedSize); @@ -199,28 +198,27 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, ZSTD_CCtx* ctx = ZSTD_createCCtx(); ZSTD_DCtx* refDCtx = ZSTD_createDCtx(); ZSTD_DCtx* dctx = ZSTD_createDCtx(); - U64 crcOrig = XXH64(srcBuffer, srcSize, 0); - U32 nbBlocks = 0; + U64 const crcOrig = XXH64(srcBuffer, srcSize, 0); + U32 nbBlocks; + + /* checks */ + if (!compressedBuffer || !resultBuffer || !blockTable || !refCtx || !ctx || !refDCtx || !dctx) + EXM_THROW(31, "not enough memory"); /* init */ if (strlen(displayName)>17) displayName += strlen(displayName)-17; /* can only display 17 characters */ - /* Memory allocation & restrictions */ - if (!compressedBuffer || !resultBuffer || !blockTable || !refCtx || !ctx || !refDCtx || !dctx) - EXM_THROW(31, "not enough memory"); - /* Init blockTable data */ - { - const char* srcPtr = (const char*)srcBuffer; + { const char* srcPtr = (const char*)srcBuffer; char* cPtr = (char*)compressedBuffer; char* resPtr = (char*)resultBuffer; U32 fileNb; - for (fileNb=0; fileNb largestBlockSize) largestBlockSize = thisBlockSize; } } } /* warmimg up memory */ RDG_genBuffer(compressedBuffer, maxCompressedSize, 0.10, 0.50, 1); /* Bench */ - { - U32 loopNb; - size_t cSize = 0; + { size_t cSize = 0; double fastestC = 100000000., fastestD = 100000000.; double ratio = 0.; U64 crcCheck = 0; clock_t coolTime = clock(); + U32 testNb; DISPLAY("\r%79s\r", ""); - for (loopNb = 1; loopNb <= (g_nbIterations + !g_nbIterations); loopNb++) { + for (testNb = 1; testNb <= (g_nbIterations + !g_nbIterations); testNb++) { int nbLoops; - U32 blockNb; clock_t clockStart, clockSpan; clock_t const clockLoop = g_nbIterations ? TIMELOOP_S * CLOCKS_PER_SEC : 10; @@ -260,45 +255,43 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, } /* Compression */ - DISPLAY("%2i-%-17.17s :%10u ->\r", loopNb, displayName, (U32)srcSize); + DISPLAY("%2i-%-17.17s :%10u ->\r", testNb, displayName, (U32)srcSize); memset(compressedBuffer, 0xE5, maxCompressedSize); /* warm up and erase result buffer */ - nbLoops = 0; clockStart = clock(); while (clock() == clockStart); clockStart = clock(); - while (BMK_clockSpan(clockStart) < clockLoop) { - ZSTD_compressBegin_advanced(refCtx, dictBuffer, dictBufferSize, ZSTD_getParams(cLevel, MAX(dictBufferSize, largestBlockSize))); + + for (nbLoops = 0 ; BMK_clockSpan(clockStart) < clockLoop ; nbLoops++) { + U32 blockNb; + ZSTD_compressBegin_usingDict(refCtx, dictBuffer, dictBufferSize, cLevel); for (blockNb=0; blockNb%10u (%5.3f),%6.1f MB/s\r", - loopNb, displayName, (U32)srcSize, (U32)cSize, ratio, + testNb, displayName, (U32)srcSize, (U32)cSize, ratio, (double)srcSize / 1000000. / (fastestC / CLOCKS_PER_SEC) ); #if 1 /* Decompression */ memset(resultBuffer, 0xD6, srcSize); /* warm result buffer */ - nbLoops = 0; clockStart = clock(); while (clock() == clockStart); clockStart = clock(); - for ( ; BMK_clockSpan(clockStart) < clockLoop; nbLoops++) { + for (nbLoops = 0 ; BMK_clockSpan(clockStart) < clockLoop ; nbLoops++) { + U32 blockNb; ZSTD_decompressBegin_usingDict(refDCtx, dictBuffer, dictBufferSize); for (blockNb=0; blockNb%10u (%5.3f),%6.1f MB/s ,%6.1f MB/s\r", - loopNb, displayName, (U32)srcSize, (U32)cSize, ratio, + testNb, displayName, (U32)srcSize, (U32)cSize, ratio, (double)srcSize / 1000000. / (fastestC / CLOCKS_PER_SEC), (double)srcSize / 1000000. / (fastestD / CLOCKS_PER_SEC) ); @@ -343,12 +336,11 @@ _findError: printf("no difference detected\n"); } } break; - } + } /* if (crcOrig!=crcCheck) */ #endif - } - + } /* for (testNb = 1; testNb <= (g_nbIterations + !g_nbIterations); testNb++) */ DISPLAY("%2i#\n", cLevel); - } + } /* Bench */ /* clean up */ free(compressedBuffer); @@ -363,19 +355,20 @@ _findError: static size_t BMK_findMaxMem(U64 requiredMem) { - size_t step = 64 MB; + size_t const step = 64 MB; BYTE* testmem = NULL; requiredMem = (((requiredMem >> 26) + 1) << 26); - requiredMem += 2 * step; + requiredMem += step; if (requiredMem > maxMemory) requiredMem = maxMemory; - while (!testmem) { - requiredMem -= step; + do { testmem = (BYTE*)malloc((size_t)requiredMem); - } + requiredMem -= step; + } while (!testmem); + free(testmem); - return (size_t)(requiredMem - step); + return (size_t)(requiredMem); } static void BMK_benchCLevel(void* srcBuffer, size_t benchedSize, From 524473804667495da0263108644bc2c60feb9e3b Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sun, 20 Mar 2016 16:00:00 +0100 Subject: [PATCH 123/247] minor code refactoring --- lib/zstd_compress.c | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index 689d48fe..a699f3e4 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -1105,7 +1105,7 @@ static void ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx, U32 offset; hashTable[h] = current; /* update hash table */ - if ( ((repIndex <= dictLimit-4) || (repIndex >= dictLimit)) + if ( ((repIndex >= dictLimit) || (repIndex <= dictLimit-4)) && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { const BYTE* repMatchEnd = repIndex < dictLimit ? dictEnd : iend; mlCode = ZSTD_count_2segments(ip+1+MINMATCH, repMatch+MINMATCH, iend, repMatchEnd, lowPrefixPtr); @@ -1113,10 +1113,11 @@ static void ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx, offset = 0; } else { if ( (matchIndex < lowLimit) || - (MEM_read32(match) != MEM_read32(ip)) ) - { ip += ((ip-anchor) >> g_searchStrength) + 1; continue; } - { - const BYTE* matchEnd = matchIndex < dictLimit ? dictEnd : iend; + (MEM_read32(match) != MEM_read32(ip)) ) { + ip += ((ip-anchor) >> g_searchStrength) + 1; + continue; + } + { const BYTE* matchEnd = matchIndex < dictLimit ? dictEnd : iend; const BYTE* lowMatchPtr = matchIndex < dictLimit ? dictStart : lowPrefixPtr; mlCode = ZSTD_count_2segments(ip+MINMATCH, match+MINMATCH, iend, matchEnd, lowPrefixPtr); while ((ip>anchor) && (match>lowMatchPtr) && (ip[-1] == match[-1])) { ip--; match--; mlCode++; } /* catch up */ @@ -1373,7 +1374,7 @@ static void ZSTD_updateTree(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* con idx += ZSTD_insertBt1(zc, base+idx, mls, iend, nbCompares, 0); } -/** Tree updater, providing best match */ +/** ZSTD_BtFindBestMatch() : Tree updater, providing best match */ static size_t ZSTD_BtFindBestMatch ( ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iLimit, @@ -1462,7 +1463,7 @@ U32 ZSTD_insertAndFindFirstIndex (ZSTD_CCtx* zc, const BYTE* ip, U32 mls) U32 idx = zc->nextToUpdate; while(idx < target) { - size_t h = ZSTD_hashPtr(base+idx, hashLog, mls); + size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls); NEXT_IN_CHAIN(idx, chainMask) = hashTable[h]; hashTable[h] = idx; idx++; @@ -1499,9 +1500,8 @@ size_t ZSTD_HcFindBestMatch_generic ( /* HC4 match finder */ matchIndex = ZSTD_insertAndFindFirstIndex (zc, ip, mls); - while ((matchIndex>lowLimit) && (nbAttempts)) { + for ( ; (matchIndex>lowLimit) && (nbAttempts) ; nbAttempts--) { size_t currentMl=0; - nbAttempts--; if ((!extDict) || matchIndex >= dictLimit) { match = base + matchIndex; if (match[ml] == ip[ml]) /* potentially better */ @@ -1849,8 +1849,7 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx, /* store sequence */ _storeSequence: - { - size_t litLength = start - anchor; + { size_t const litLength = start - anchor; ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, matchLength-MINMATCH); anchor = ip = start + matchLength; } @@ -1875,8 +1874,7 @@ _storeSequence: } } /* Last Literals */ - { - size_t lastLLSize = iend - anchor; + { size_t const lastLLSize = iend - anchor; memcpy(seqStorePtr->lit, anchor, lastLLSize); seqStorePtr->lit += lastLLSize; } From ecabfe3777a08e007ed6c5cb660f6174dc75c783 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sun, 20 Mar 2016 16:20:06 +0100 Subject: [PATCH 124/247] Improved index overflow protection --- lib/zstd_compress.c | 33 +++++++++++++++++++++------------ programs/playTests.sh | 2 +- 2 files changed, 22 insertions(+), 13 deletions(-) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index a699f3e4..cfb2519a 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -289,22 +289,31 @@ size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx) } -/*! ZSTD_reduceIndex() : -* rescale indexes to avoid future overflow (indexes are U32) */ -static void ZSTD_reduceIndex (ZSTD_CCtx* zc, - const U32 reducerValue) +/*! ZSTD_reduceTable() : +* rescale indexes from a table (indexes are U32) */ +static void ZSTD_reduceTable (U32* const table, U32 const size, U32 const reducerValue) { - const U32 contentLog = (zc->params.strategy == ZSTD_fast) ? 1 : zc->params.contentLog; - const U32 tableSpaceU32 = (1 << contentLog) + (1 << zc->params.hashLog); - U32* table32 = zc->hashTable; - U32 index; - - for (index=0 ; index < tableSpaceU32 ; index++) { - if (table32[index] < reducerValue) table32[index] = 0; - else table32[index] -= reducerValue; + U32 u; + for (u=0 ; u < size ; u++) { + if (table[u] < reducerValue) table[u] = 0; + else table[u] -= reducerValue; } } +/*! ZSTD_reduceIndex() : +* rescale all indexes to avoid future overflow (indexes are U32) */ +static void ZSTD_reduceIndex (ZSTD_CCtx* zc, const U32 reducerValue) +{ + { const U32 hSize = 1 << zc->params.hashLog; + ZSTD_reduceTable(zc->hashTable, hSize, reducerValue); } + + { const U32 contentSize = (zc->params.strategy == ZSTD_fast) ? 0 : (1 << zc->params.contentLog); + ZSTD_reduceTable(zc->contentTable, contentSize, reducerValue); } + + { const U32 h3Size = (zc->params.searchLength == 3) ? (1 << HASHLOG3) : 0; + ZSTD_reduceTable(zc->hashTable3, h3Size, reducerValue); } +} + /*-******************************************************* * Block entropic compression diff --git a/programs/playTests.sh b/programs/playTests.sh index 891ab9d4..444d91eb 100755 --- a/programs/playTests.sh +++ b/programs/playTests.sh @@ -143,7 +143,7 @@ roundTripTest -g50000000 -P94 18 roundTripTest -g50000000 -P94 19 roundTripTest -g99000000 -P99 20 -roundTripTest -g6000000000 -P99 q +roundTripTest -g6000000000 -P99 1 rm tmp* From 597847a2ae9b6c1c0872b000f85e4cfee1627a2d Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sun, 20 Mar 2016 19:14:22 +0100 Subject: [PATCH 125/247] first emulation --- lib/zdict.c | 3 +- lib/zstd_compress.c | 97 +++++++++++++++++++++++++++++++++++++++++---- lib/zstd_internal.h | 6 +-- programs/bench.c | 2 +- 4 files changed, 94 insertions(+), 14 deletions(-) diff --git a/lib/zdict.c b/lib/zdict.c index a7c8090a..4c1ffb08 100644 --- a/lib/zdict.c +++ b/lib/zdict.c @@ -284,8 +284,7 @@ static dictItem ZDICT_analyzePos( return solution; } - { - int i; + { int i; U32 searchLength; U32 refinedStart = start; U32 refinedEnd = end; diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index cfb2519a..ba84fde7 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -184,7 +184,7 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc, const size_t blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << params.windowLog); const U32 divider = (params.searchLength==3) ? 3 : 4; const size_t maxNbSeq = blockSize / divider; - const size_t tokenSpace = blockSize + 8*maxNbSeq; + const size_t tokenSpace = blockSize + 10*maxNbSeq; const size_t contentSize = (params.strategy == ZSTD_fast) ? 0 : (1 << params.contentLog); const size_t hSize = 1 << params.hashLog; const size_t h3Size = (params.searchLength==3) ? (1 << HASHLOG3) : 0; @@ -209,7 +209,7 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc, zc->seqStore.buffer = zc->contentTable + contentSize; zc->hufTable = (HUF_CElt*)zc->seqStore.buffer; zc->flagStaticTables = 0; - zc->seqStore.buffer = (U32*)(zc->seqStore.buffer) + 256; + zc->seqStore.buffer = ((U32*)(zc->seqStore.buffer)) + 256; zc->nextToUpdate = 1; zc->nextSrc = NULL; @@ -221,10 +221,11 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc, zc->blockSize = blockSize; zc->seqStore.offsetStart = (U32*) (zc->seqStore.buffer); - zc->seqStore.offCodeStart = (BYTE*) (zc->seqStore.offsetStart + maxNbSeq); + zc->seqStore.litLengthStart = (U16*) (void*)(zc->seqStore.offsetStart + maxNbSeq); + zc->seqStore.llCodeStart = (BYTE*) (zc->seqStore.litLengthStart + maxNbSeq); + zc->seqStore.offCodeStart = zc->seqStore.llCodeStart + maxNbSeq; zc->seqStore.litStart = zc->seqStore.offCodeStart + maxNbSeq; - zc->seqStore.litLengthStart = zc->seqStore.litStart + blockSize; - zc->seqStore.matchLengthStart = zc->seqStore.litLengthStart + maxNbSeq; + zc->seqStore.matchLengthStart = zc->seqStore.litStart + blockSize; zc->seqStore.dumpsStart = zc->seqStore.matchLengthStart + maxNbSeq; if (params.strategy == ZSTD_btopt) { zc->seqStore.litFreq = (U32*)((void*)(zc->seqStore.dumpsStart + maxNbSeq)); @@ -584,11 +585,12 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, FSE_CTable* CTable_OffsetBits = zc->offcodeCTable; FSE_CTable* CTable_MatchLength = zc->matchlengthCTable; U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */ - const BYTE* const llTable = seqStorePtr->litLengthStart; - const BYTE* const llPtr = seqStorePtr->litLength; + const U16* const llTable = seqStorePtr->litLengthStart; + const U16* const llPtr = seqStorePtr->litLength; const BYTE* const mlTable = seqStorePtr->matchLengthStart; const U32* const offsetTable = seqStorePtr->offsetStart; BYTE* const offCodeTable = seqStorePtr->offCodeStart; + BYTE* const llCodeTable = seqStorePtr->llCodeStart; BYTE* const ostart = (BYTE*)dst; BYTE* const oend = ostart + dstCapacity; BYTE* op = ostart; @@ -633,7 +635,49 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, #define MIN_SEQ_FOR_DYNAMIC_FSE 64 #define MAX_SEQ_FOR_STATIC_FSE 1000 + /* LL codes */ +static const BYTE llCode[64] = { 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 16, 17, 17, 18, 18, 19, 19, + 20, 20, 20, 20, 21, 21, 21, 21, + 22, 22, 22, 22, 22, 22, 22, 22, + 23, 23, 23, 23, 23, 23, 23, 23, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24 }; +static const BYTE deltaCode = 18; + + { size_t i; + for (i=0; i63) ? ZSTD_highbit(ll) + deltaCode : llCode[ll]; + } } + /* CTable for Literal Lengths */ +#if 1 + { U32 max = 36; + size_t const mostFrequent = FSE_countFast(count, &max, llCodeTable, nbSeq); + if ((mostFrequent == nbSeq) && (nbSeq > 2)) { + *op++ = llCodeTable[0]; + FSE_buildCTable_rle(CTable_LitLength, (BYTE)max); + LLtype = FSE_ENCODING_RLE; + } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { + LLtype = FSE_ENCODING_STATIC; + } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (LLbits-1)))) { + FSE_buildCTable_raw(CTable_LitLength, LLbits); + LLtype = FSE_ENCODING_RAW; + } else { + size_t NCountSize; + size_t nbSeq_1 = nbSeq; + const U32 tableLog = FSE_optimalTableLog(LLFSELog, nbSeq, max); + if (count[llCodeTable[nbSeq-1]]>1) { count[llCodeTable[nbSeq-1]]--; nbSeq_1--; } + FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max); + NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ + if (FSE_isError(NCountSize)) return ERROR(GENERIC); + op += NCountSize; + FSE_buildCTable(CTable_LitLength, norm, max, tableLog); + LLtype = FSE_ENCODING_DYNAMIC; + }} +#else { U32 max = MaxLL; size_t const mostFrequent = FSE_countFast(count, &max, llTable, nbSeq); if ((mostFrequent == nbSeq) && (nbSeq > 2)) { @@ -657,6 +701,7 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, FSE_buildCTable(CTable_LitLength, norm, max, tableLog); LLtype = FSE_ENCODING_DYNAMIC; }} +#endif // 0 /* Offset codes */ { size_t i; for (i=0; ilit += litLength; /* literal Length */ +#if 1 + *seqStorePtr->litLength++ = (U16)litLength; /* take care of litLength >= 65535 ! */ +#else if (litLength >= MaxLL) { *(seqStorePtr->litLength++) = MaxLL; if (litLength<255 + MaxLL) { @@ -802,6 +882,7 @@ MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const B seqStorePtr->dumps += 3; } } } else *(seqStorePtr->litLength++) = (BYTE)litLength; +#endif // 0 /* match offset */ *(seqStorePtr->offset++) = (U32)offsetCode; diff --git a/lib/zstd_internal.h b/lib/zstd_internal.h index ba350c4f..7a3f2134 100644 --- a/lib/zstd_internal.h +++ b/lib/zstd_internal.h @@ -184,11 +184,11 @@ typedef struct { U32* offsetStart; U32* offset; BYTE* offCodeStart; - BYTE* offCode; BYTE* litStart; BYTE* lit; - BYTE* litLengthStart; - BYTE* litLength; + U16* litLengthStart; + U16* litLength; + BYTE* llCodeStart; BYTE* matchLengthStart; BYTE* matchLength; BYTE* dumpsStart; diff --git a/programs/bench.c b/programs/bench.c index c74c03df..e5b231d3 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -282,7 +282,7 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, testNb, displayName, (U32)srcSize, (U32)cSize, ratio, (double)srcSize / 1000000. / (fastestC / CLOCKS_PER_SEC) ); -#if 1 +#if 0 /* Decompression */ memset(resultBuffer, 0xD6, srcSize); /* warm result buffer */ From d64f435f637fc876ac4f9aba4b474856f86cf63c Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 21 Mar 2016 00:07:42 +0100 Subject: [PATCH 126/247] handles litLength >= 65535 --- lib/bitstream.h | 4 ++-- lib/zstd_compress.c | 23 +++++++++++++---------- lib/zstd_internal.h | 3 ++- programs/bench.c | 4 ++-- 4 files changed, 19 insertions(+), 15 deletions(-) diff --git a/lib/bitstream.h b/lib/bitstream.h index e1237930..0fe36eae 100644 --- a/lib/bitstream.h +++ b/lib/bitstream.h @@ -206,7 +206,7 @@ MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBi * unsafe version; does not check buffer overflow */ MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC) { - size_t nbBytes = bitC->bitPos >> 3; + size_t const nbBytes = bitC->bitPos >> 3; MEM_writeLEST(bitC->ptr, bitC->bitContainer); bitC->ptr += nbBytes; bitC->bitPos &= 7; @@ -218,7 +218,7 @@ MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC) * note : does not signal buffer overflow. This will be revealed later on using BIT_closeCStream() */ MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC) { - size_t nbBytes = bitC->bitPos >> 3; + size_t const nbBytes = bitC->bitPos >> 3; MEM_writeLEST(bitC->ptr, bitC->bitContainer); bitC->ptr += nbBytes; if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr; diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index ba84fde7..937cce85 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -172,9 +172,9 @@ size_t ZSTD_sizeofCCtx(ZSTD_parameters params) /* hidden interface, for parama { ZSTD_CCtx* zc = ZSTD_createCCtx(); ZSTD_compressBegin_advanced(zc, NULL, 0, params); - { size_t const size = sizeof(*zc) + zc->workSpaceSize; + { size_t const ccsize = sizeof(*zc) + zc->workSpaceSize; ZSTD_freeCCtx(zc); - return size; } + return ccsize; } } @@ -291,7 +291,7 @@ size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx) /*! ZSTD_reduceTable() : -* rescale indexes from a table (indexes are U32) */ +* reduce table indexes by `reducerValue` */ static void ZSTD_reduceTable (U32* const table, U32 const size, U32 const reducerValue) { U32 u; @@ -586,15 +586,15 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, FSE_CTable* CTable_MatchLength = zc->matchlengthCTable; U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */ const U16* const llTable = seqStorePtr->litLengthStart; - const U16* const llPtr = seqStorePtr->litLength; const BYTE* const mlTable = seqStorePtr->matchLengthStart; const U32* const offsetTable = seqStorePtr->offsetStart; + const U32* const offsetTableEnd = seqStorePtr->offset; BYTE* const offCodeTable = seqStorePtr->offCodeStart; BYTE* const llCodeTable = seqStorePtr->llCodeStart; BYTE* const ostart = (BYTE*)dst; BYTE* const oend = ostart + dstCapacity; BYTE* op = ostart; - size_t const nbSeq = llPtr - llTable; + size_t const nbSeq = offsetTableEnd - offsetTable; BYTE* seqHead; /* Compress literals */ @@ -648,7 +648,8 @@ static const BYTE deltaCode = 18; { size_t i; for (i=0; ilitLengthLong; llCodeTable[i] = (ll>63) ? ZSTD_highbit(ll) + deltaCode : llCode[ll]; } } @@ -788,10 +789,11 @@ static const U32 llBits[36] = { 0, 0, 0, 0, 0, 0, 0, 0, const BYTE LLCode = llCodeTable[n]; /* (7)*/ /* (7)*/ FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode); /* 17 */ /* 17 */ if (MEM_32bits()) BIT_flushBits(&blockStream); /* 7 */ - FSE_encodeSymbol(&blockStream, &stateLitLength, LLCode); /* 17 */ /* 27 */ - FSE_encodeSymbol(&blockStream, &stateOffsetBits, offCode); /* 26 */ /* 36 */ + FSE_encodeSymbol(&blockStream, &stateLitLength, LLCode); /* 16 */ /* 26 */ + FSE_encodeSymbol(&blockStream, &stateOffsetBits, offCode); /* 25 */ /* 35 */ if (MEM_32bits()) BIT_flushBits(&blockStream); /* 7 */ - BIT_addBits(&blockStream, offset, nbBits); /* 31 */ /* 62 */ /* 24 bits max in 32-bits mode */ + //BIT_flushBits(&blockStream); /* 7 */ /* 7 */ + BIT_addBits(&blockStream, offset, nbBits); /* 31 */ /* 61 */ /* 24 bits max in 32-bits mode */ BIT_addBits(&blockStream, llTable[n], llBits[LLCode]); BIT_flushBits(&blockStream); /* 7 */ /* 7 */ } } @@ -866,7 +868,8 @@ MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const B /* literal Length */ #if 1 - *seqStorePtr->litLength++ = (U16)litLength; /* take care of litLength >= 65535 ! */ + if (litLength>=65535) { *(seqStorePtr->litLength++) = 65535; seqStorePtr->litLengthLong = (U32)litLength; } + else *seqStorePtr->litLength++ = (U16)litLength; #else if (litLength >= MaxLL) { *(seqStorePtr->litLength++) = MaxLL; diff --git a/lib/zstd_internal.h b/lib/zstd_internal.h index 7a3f2134..2b830e31 100644 --- a/lib/zstd_internal.h +++ b/lib/zstd_internal.h @@ -110,7 +110,7 @@ typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t; #define MaxLL ((1< Date: Mon, 21 Mar 2016 00:39:19 +0100 Subject: [PATCH 127/247] support default LL distribution --- lib/fse.c | 113 ++++++++++++++++++++---------------------- lib/zstd_compress.c | 10 +++- lib/zstd_decompress.c | 2 +- programs/bench.c | 1 + 4 files changed, 65 insertions(+), 61 deletions(-) diff --git a/lib/fse.c b/lib/fse.c index 291e6419..dabe2830 100644 --- a/lib/fse.c +++ b/lib/fse.c @@ -145,21 +145,18 @@ static U32 FSE_tableStep(U32 tableSize) { return (tableSize>>1) + (tableSize>>3) size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) { - const unsigned tableSize = 1 << tableLog; - const unsigned tableMask = tableSize - 1; + U32 const tableSize = 1 << tableLog; + U32 const tableMask = tableSize - 1; void* const ptr = ct; U16* const tableU16 = ( (U16*) ptr) + 2; void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableLog ? tableSize>>1 : 1) ; FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT); - const unsigned step = FSE_tableStep(tableSize); - unsigned cumul[FSE_MAX_SYMBOL_VALUE+2]; - U32 position = 0; + U32 const step = FSE_tableStep(tableSize); + U32 cumul[FSE_MAX_SYMBOL_VALUE+2]; FSE_FUNCTION_TYPE tableSymbol[FSE_MAX_TABLESIZE]; /* memset() is not necessary, even if static analyzer complain about it */ U32 highThreshold = tableSize-1; - unsigned symbol; - unsigned i; - /* header */ + /* CTable header */ tableU16[-2] = (U16) tableLog; tableU16[-1] = (U16) maxSymbolValue; @@ -167,42 +164,44 @@ size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned * http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */ /* symbol start positions */ - cumul[0] = 0; - for (i=1; i<=maxSymbolValue+1; i++) { - if (normalizedCounter[i-1]==-1) { /* Low proba symbol */ - cumul[i] = cumul[i-1] + 1; - tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(i-1); - } else { - cumul[i] = cumul[i-1] + normalizedCounter[i-1]; - } } - cumul[maxSymbolValue+1] = tableSize+1; - - /* Spread symbols */ - for (symbol=0; symbol<=maxSymbolValue; symbol++) { - int nbOccurences; - for (nbOccurences=0; nbOccurences highThreshold) position = (position + step) & tableMask; /* Low proba area */ - } } - - if (position!=0) return ERROR(GENERIC); /* Must have gone through all positions */ - - /* Build table */ - for (i=0; i highThreshold) position = (position + step) & tableMask; /* Low proba area */ + } } + if (position!=0) return ERROR(GENERIC); /* Must have gone through all positions */ + } + + /* Build table */ + { U32 u; for (u=0; u highThreshold) position = (position + step) & tableMask; /* lowprob area */ - } } - - if (position!=0) return ERROR(GENERIC); /* position must reach all cells once, otherwise normalizedCounter is incorrect */ + { U32 position = 0; + for (s=0; s<=maxSymbolValue; s++) { + int i; + for (i=0; i highThreshold) position = (position + step) & tableMask; /* lowprob area */ + } } + if (position!=0) return ERROR(GENERIC); /* position must reach all cells once, otherwise normalizedCounter is incorrect */ + } /* Build Decoding table */ - { - U32 i; - for (i=0; i>= 2; } - { - short const max = (short)((2*threshold-1)-remaining); + { short const max = (short)((2*threshold-1)-remaining); short count; if ((bitStream & (threshold-1)) < (U32)max) { diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index 937cce85..c4cd7952 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -655,7 +655,7 @@ static const BYTE deltaCode = 18; /* CTable for Literal Lengths */ #if 1 - { U32 max = 36; + { U32 max = 35; size_t const mostFrequent = FSE_countFast(count, &max, llCodeTable, nbSeq); if ((mostFrequent == nbSeq) && (nbSeq > 2)) { *op++ = llCodeTable[0]; @@ -664,7 +664,13 @@ static const BYTE deltaCode = 18; } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { LLtype = FSE_ENCODING_STATIC; } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (LLbits-1)))) { - FSE_buildCTable_raw(CTable_LitLength, LLbits); + static const S16 LL_defaultNorm[36] = { 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 1, 1, 1, 1, + 1, 1, 1, 1 }; + static const U32 LL_defaultNormLog = 6; + FSE_buildCTable(CTable_LitLength, LL_defaultNorm, 35, LL_defaultNormLog); LLtype = FSE_ENCODING_RAW; } else { size_t NCountSize; diff --git a/lib/zstd_decompress.c b/lib/zstd_decompress.c index 88734829..3a6887e9 100644 --- a/lib/zstd_decompress.c +++ b/lib/zstd_decompress.c @@ -325,7 +325,7 @@ size_t ZSTD_getFrameParams(ZSTD_frameParams* fparamsPtr, const void* src, size_t * @return : 0 if success, or an error code, which can be tested using ZSTD_isError() */ static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* zc, const void* src, size_t srcSize) { - size_t result = ZSTD_getFrameParams(&(zc->fParams), src, srcSize); + size_t const result = ZSTD_getFrameParams(&(zc->fParams), src, srcSize); if ((MEM_32bits()) && (zc->fParams.windowLog > 25)) return ERROR(frameParameter_unsupportedBy32bits); return result; } diff --git a/programs/bench.c b/programs/bench.c index 7acfcca3..2ee3c417 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -282,6 +282,7 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, testNb, displayName, (U32)srcSize, (U32)cSize, ratio, (double)srcSize / 1000000. / (fastestC / CLOCKS_PER_SEC) ); + (void)crcCheck; (void)fastestD; (void)crcOrig; /* unused when decompression disabled */ #if 0 /* Decompression */ memset(resultBuffer, 0xD6, srcSize); /* warm result buffer */ From 5c71491a4614f6e2045b83863529fe08056daa7d Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 21 Mar 2016 02:23:34 +0100 Subject: [PATCH 128/247] first working version with alternate LL codes --- programs/bench.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/programs/bench.c b/programs/bench.c index 2ee3c417..b46dd10c 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -283,7 +283,7 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, (double)srcSize / 1000000. / (fastestC / CLOCKS_PER_SEC) ); (void)crcCheck; (void)fastestD; (void)crcOrig; /* unused when decompression disabled */ -#if 0 +#if 1 /* Decompression */ memset(resultBuffer, 0xD6, srcSize); /* warm result buffer */ From b0aec17a90210db47ef67a521580e1f23eb656fd Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 21 Mar 2016 13:24:16 +0100 Subject: [PATCH 129/247] code simplification (but reduce decompression speed ...) --- lib/fse.c | 49 ++++++++++--------------- lib/zstd_compress.c | 85 ++++++++++++------------------------------- lib/zstd_decompress.c | 80 ++++++++++++++++++++++++++-------------- lib/zstd_internal.h | 11 +++++- lib/zstd_opt.h | 16 ++++---- 5 files changed, 113 insertions(+), 128 deletions(-) diff --git a/lib/fse.c b/lib/fse.c index dabe2830..63898ab1 100644 --- a/lib/fse.c +++ b/lib/fse.c @@ -363,8 +363,7 @@ static size_t FSE_writeNCount_generic (void* header, size_t headerBufferSize, bitStream >>= 16; bitCount -= 16; } } - { - short count = normalizedCounter[charnum++]; + { short count = normalizedCounter[charnum++]; const short max = (short)((2*threshold-1)-remaining); remaining -= FSE_abs(count); if (remaining<1) return ERROR(GENERIC); @@ -506,11 +505,11 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t * Counting histogram ****************************************************************/ /*! FSE_count_simple - This function just counts byte values within @src, - and store the histogram into @count. - This function is unsafe : it doesn't check that all values within @src can fit into @count. - For this reason, prefer using a table @count with 256 elements. - @return : highest count for a single element + This function just counts byte values within `src`, + and store the histogram into table `count`. + This function is unsafe : it doesn't check that all values within `src` can fit into `count`. + For this reason, prefer using a table `count` with 256 elements. + @return : count of most numerous element */ static size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize) @@ -519,7 +518,6 @@ static size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, const BYTE* const end = ip + srcSize; unsigned maxSymbolValue = *maxSymbolValuePtr; unsigned max=0; - U32 s; memset(count, 0, (maxSymbolValue+1)*sizeof(*count)); if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; } @@ -529,7 +527,7 @@ static size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, while (!count[maxSymbolValue]) maxSymbolValue--; *maxSymbolValuePtr = maxSymbolValue; - for (s=0; s<=maxSymbolValue; s++) if (count[s] > max) max = count[s]; + { U32 s; for (s=0; s<=maxSymbolValue; s++) if (count[s] > max) max = count[s]; } return (size_t)max; } @@ -543,7 +541,6 @@ static size_t FSE_count_parallel(unsigned* count, unsigned* maxSymbolValuePtr, const BYTE* const iend = ip+sourceSize; unsigned maxSymbolValue = *maxSymbolValuePtr; unsigned max=0; - U32 s; U32 Counting1[256] = { 0 }; U32 Counting2[256] = { 0 }; @@ -558,8 +555,8 @@ static size_t FSE_count_parallel(unsigned* count, unsigned* maxSymbolValuePtr, } if (!maxSymbolValue) maxSymbolValue = 255; /* 0 == default */ - { /* by stripes of 16 bytes */ - U32 cached = MEM_read32(ip); ip += 4; + /* by stripes of 16 bytes */ + { U32 cached = MEM_read32(ip); ip += 4; while (ip < iend-15) { U32 c = cached; cached = MEM_read32(ip); ip += 4; Counting1[(BYTE) c ]++; @@ -589,15 +586,15 @@ static size_t FSE_count_parallel(unsigned* count, unsigned* maxSymbolValuePtr, while (ipmaxSymbolValue; s--) { + U32 s; for (s=255; s>maxSymbolValue; s--) { Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s]; if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall); } } - for (s=0; s<=maxSymbolValue; s++) { + { U32 s; for (s=0; s<=maxSymbolValue; s++) { count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s]; if (count[s] > max) max = count[s]; - } + }} while (!count[maxSymbolValue]) maxSymbolValue--; *maxSymbolValuePtr = maxSymbolValue; @@ -631,7 +628,7 @@ size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr, `U16 maxSymbolValue;` `U16 nextStateNumber[1 << tableLog];` // This size is variable `FSE_symbolCompressionTransform symbolTT[maxSymbolValue+1];` // This size is variable -Allocation is manual, since C standard does not support variable-size structures. +Allocation is manual (C standard does not support variable-size structures). */ size_t FSE_sizeof_CTable (unsigned maxSymbolValue, unsigned tableLog) @@ -727,7 +724,7 @@ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, /* all values are pretty poor; probably incompressible data (should have already been detected); find max, then give all remaining points to max */ - U32 maxV = 0, maxC =0; + U32 maxV = 0, maxC = 0; for (s=0; s<=maxSymbolValue; s++) if (count[s] > maxC) maxV=s, maxC=count[s]; norm[maxV] += (short)ToDistribute; @@ -765,8 +762,7 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog, if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); /* Unsupported size */ if (tableLog < FSE_minTableLog(total, maxSymbolValue)) return ERROR(GENERIC); /* Too small tableLog, compression potentially impossible */ - { - U32 const rtbTable[] = { 0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 }; + { U32 const rtbTable[] = { 0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 }; U64 const scale = 62 - tableLog; U64 const step = ((U64)1<<62) / total; /* <== here, one division ! */ U64 const vStep = 1ULL<<(scale-20); @@ -842,13 +838,11 @@ size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits) tableU16[s] = (U16)(tableSize + s); /* Build Symbol Transformation Table */ - { - const U32 deltaNbBits = (nbBits << 16) - (1 << nbBits); + { const U32 deltaNbBits = (nbBits << 16) - (1 << nbBits); for (s=0; s<=maxSymbolValue; s++) { symbolTT[s].deltaNbBits = deltaNbBits; symbolTT[s].deltaFindState = s-1; - } - } + } } return 0; } @@ -884,15 +878,13 @@ static size_t FSE_compress_usingCTable_generic (void* dst, size_t dstSize, const BYTE* const istart = (const BYTE*) src; const BYTE* const iend = istart + srcSize; const BYTE* ip=iend; - - size_t errorCode; BIT_CStream_t bitC; FSE_CState_t CState1, CState2; /* init */ if (srcSize <= 2) return 0; - errorCode = BIT_initCStream(&bitC, dst, dstSize); - if (FSE_isError(errorCode)) return 0; + { size_t const errorCode = BIT_initCStream(&bitC, dst, dstSize); + if (FSE_isError(errorCode)) return 0; } #define FSE_FLUSHBITS(s) (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s)) @@ -915,8 +907,7 @@ static size_t FSE_compress_usingCTable_generic (void* dst, size_t dstSize, } /* 2 or 4 encoding per loop */ - for ( ; ip>istart ; ) - { + for ( ; ip>istart ; ) { FSE_encodeSymbol(&bitC, &CState2, *--ip); if (sizeof(bitC.bitContainer)*8 < FSE_MAX_TABLELOG*2+7 ) /* this test must be static */ diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index c4cd7952..f098c02a 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -191,7 +191,7 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc, const size_t tableSpace = (contentSize + hSize + h3Size) * sizeof(U32); /* Check if workSpace is large enough, alloc a new one if needed */ - { size_t const optSpace = ((1<seqStore.litFreq = (U32*)((void*)(zc->seqStore.dumpsStart + maxNbSeq)); zc->seqStore.litLengthFreq = zc->seqStore.litFreq + (1<seqStore.matchLengthFreq = zc->seqStore.litLengthFreq + (1<seqStore.matchLengthFreq = zc->seqStore.litLengthFreq + (MaxLL+1); zc->seqStore.offCodeFreq = zc->seqStore.matchLengthFreq + (1<seqStore.matchTable = (ZSTD_match_t*)((void*)(zc->seqStore.offCodeFreq + (1<seqStore.priceTable = (ZSTD_optimal_t*)((void*)(zc->seqStore.matchTable + ZSTD_OPT_NUM+1)); @@ -585,7 +585,7 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, FSE_CTable* CTable_OffsetBits = zc->offcodeCTable; FSE_CTable* CTable_MatchLength = zc->matchlengthCTable; U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */ - const U16* const llTable = seqStorePtr->litLengthStart; + U16* const llTable = seqStorePtr->litLengthStart; const BYTE* const mlTable = seqStorePtr->matchLengthStart; const U32* const offsetTable = seqStorePtr->offsetStart; const U32* const offsetTableEnd = seqStorePtr->offset; @@ -636,26 +636,24 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, #define MAX_SEQ_FOR_STATIC_FSE 1000 /* LL codes */ -static const BYTE llCode[64] = { 0, 1, 2, 3, 4, 5, 6, 7, - 8, 9, 10, 11, 12, 13, 14, 15, - 16, 16, 17, 17, 18, 18, 19, 19, - 20, 20, 20, 20, 21, 21, 21, 21, - 22, 22, 22, 22, 22, 22, 22, 22, - 23, 23, 23, 23, 23, 23, 23, 23, - 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24 }; -static const BYTE deltaCode = 18; - - { size_t i; + { static const BYTE LL_Code[64] = { 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 16, 17, 17, 18, 18, 19, 19, + 20, 20, 20, 20, 21, 21, 21, 21, + 22, 22, 22, 22, 22, 22, 22, 22, + 23, 23, 23, 23, 23, 23, 23, 23, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24 }; + const BYTE deltaCode = 19; + size_t i; for (i=0; ilitLengthLong; - llCodeTable[i] = (ll>63) ? ZSTD_highbit(ll) + deltaCode : llCode[ll]; + if (llTable[i] == 65535) { ll = seqStorePtr->litLengthLong; llTable[i] = (U16)ll; } + llCodeTable[i] = (ll>63) ? ZSTD_highbit(ll) + deltaCode : LL_Code[ll]; } } /* CTable for Literal Lengths */ -#if 1 - { U32 max = 35; + { U32 max = MaxLL; size_t const mostFrequent = FSE_countFast(count, &max, llCodeTable, nbSeq); if ((mostFrequent == nbSeq) && (nbSeq > 2)) { *op++ = llCodeTable[0]; @@ -663,14 +661,8 @@ static const BYTE deltaCode = 18; LLtype = FSE_ENCODING_RLE; } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { LLtype = FSE_ENCODING_STATIC; - } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (LLbits-1)))) { - static const S16 LL_defaultNorm[36] = { 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 1, 1, 1, 1, - 1, 1, 1, 1 }; - static const U32 LL_defaultNormLog = 6; - FSE_buildCTable(CTable_LitLength, LL_defaultNorm, 35, LL_defaultNormLog); + } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (LL_defaultNormLog-1)))) { + FSE_buildCTable(CTable_LitLength, LL_defaultNorm, MaxLL, LL_defaultNormLog); LLtype = FSE_ENCODING_RAW; } else { size_t NCountSize; @@ -684,31 +676,6 @@ static const BYTE deltaCode = 18; FSE_buildCTable(CTable_LitLength, norm, max, tableLog); LLtype = FSE_ENCODING_DYNAMIC; }} -#else - { U32 max = MaxLL; - size_t const mostFrequent = FSE_countFast(count, &max, llTable, nbSeq); - if ((mostFrequent == nbSeq) && (nbSeq > 2)) { - *op++ = llTable[0]; - FSE_buildCTable_rle(CTable_LitLength, (BYTE)max); - LLtype = FSE_ENCODING_RLE; - } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { - LLtype = FSE_ENCODING_STATIC; - } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (LLbits-1)))) { - FSE_buildCTable_raw(CTable_LitLength, LLbits); - LLtype = FSE_ENCODING_RAW; - } else { - size_t NCountSize; - size_t nbSeq_1 = nbSeq; - const U32 tableLog = FSE_optimalTableLog(LLFSELog, nbSeq, max); - if (count[llTable[nbSeq-1]]>1) { count[llTable[nbSeq-1]]--; nbSeq_1--; } - FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max); - NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ - if (FSE_isError(NCountSize)) return ERROR(GENERIC); - op += NCountSize; - FSE_buildCTable(CTable_LitLength, norm, max, tableLog); - LLtype = FSE_ENCODING_DYNAMIC; - }} -#endif // 0 /* Offset codes */ { size_t i; for (i=0; i 198618400) && (pos < 198618500)) printf("pos %6u : %3u literals & match %3u bytes at distance %6u \n", - (U32)(literals - g_start), (U32)litLength, (U32)matchCode+MINMATCH, (U32)offsetCode); + pos, (U32)litLength, (U32)matchCode+MINMATCH, (U32)offsetCode); #endif #if ZSTD_OPT_DEBUG == 3 if (offsetCode == 0) seqStorePtr->realRepSum++; @@ -2278,7 +2241,7 @@ size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* zc, ZSTD_validateParams(¶ms); { size_t const errorCode = ZSTD_resetCCtx_advanced(zc, params); - if (ZSTD_isError(errorCode)) return errorCode; } + if (ZSTD_isError(errorCode)) return errorCode; } /* Write Frame Header into ctx headerBuffer */ MEM_writeLE32(zc->headerBuffer, ZSTD_MAGICNUMBER); diff --git a/lib/zstd_decompress.c b/lib/zstd_decompress.c index 3a6887e9..2c1acab5 100644 --- a/lib/zstd_decompress.c +++ b/lib/zstd_decompress.c @@ -531,6 +531,34 @@ FORCE_INLINE size_t ZSTD_buildSeqTable(FSE_DTable* DTable, U32 type, U32 rawBits } +FORCE_INLINE size_t ZSTD_buildSeqTableLL(FSE_DTable* DTable, U32 type, U32 max, U32 maxLog, + const void* src, size_t srcSize) +{ + switch(type) + { + case FSE_ENCODING_RLE : + if (!srcSize) return ERROR(srcSize_wrong); + if ( (*(const BYTE*)src) > max) return ERROR(corruption_detected); + FSE_buildDTable_rle(DTable, *(const BYTE*)src); /* if *src > max, data is corrupted */ + return 1; + case FSE_ENCODING_RAW : + FSE_buildDTable(DTable, LL_defaultNorm, max, LL_defaultNormLog); + return 0; + case FSE_ENCODING_STATIC: + return 0; + default : /* impossible */ + case FSE_ENCODING_DYNAMIC : + { U32 tableLog; + S16 norm[MaxSeq+1]; + size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize); + if (FSE_isError(headerSize)) return ERROR(corruption_detected); + if (tableLog > maxLog) return ERROR(corruption_detected); + FSE_buildDTable(DTable, norm, max, tableLog); + return headerSize; + } } +} + + size_t ZSTD_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLengthPtr, FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb, const void* src, size_t srcSize) @@ -576,7 +604,7 @@ size_t ZSTD_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLen if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */ /* Build DTables */ - { size_t const bhSize = ZSTD_buildSeqTable(DTableLL, LLtype, LLbits, LLFSELog, ip, iend-ip); + { size_t const bhSize = ZSTD_buildSeqTableLL(DTableLL, LLtype, 35, LLFSELog, ip, iend-ip); if (ZSTD_isError(bhSize)) return ERROR(corruption_detected); ip += bhSize; } @@ -612,21 +640,13 @@ typedef struct { static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState, const U32 mls) { - const BYTE* dumps = seqState->dumps; - const BYTE* const de = seqState->dumpsEnd; - size_t litLength, offset; - /* Literal length */ - litLength = FSE_peakSymbol(&(seqState->stateLL)); - if (litLength == MaxLL) { - const U32 add = *dumps++; - if (add < 255) litLength += add; - else { - litLength = MEM_readLE32(dumps) & 0xFFFFFF; /* no risk : dumps is always followed by seq tables > 1 byte */ - if (litLength&1) litLength>>=1, dumps += 3; - else litLength = (U16)(litLength)>>1, dumps += 2; - } - if (dumps >= de) dumps = de-1; /* late correction, to avoid read overflow (data is now corrupted anyway) */ + U32 const litCode = FSE_peakSymbol(&(seqState->stateLL)); + { static const U32 LL_base[MaxLL+1] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 18, 20, 22, 24, 28, 32, 40, 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, + 0x2000, 0x4000, 0x8000, 0x10000 }; + seq->litLength = LL_base[litCode] + BIT_readBits(&(seqState->DStream), LL_bits[litCode]); } /* Offset */ @@ -637,11 +657,12 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState, const U32 mls) 0x800000, 0x1000000, 0x2000000, 0x4000000, /*fake*/ 1, 1, 1, 1 }; U32 const offsetCode = FSE_peakSymbol(&(seqState->stateOffb)); /* <= maxOff, by table construction */ U32 const nbBits = offsetCode ? offsetCode-1 : 0; - offset = offsetPrefix[offsetCode] + BIT_readBits(&(seqState->DStream), nbBits); + size_t const offset = offsetCode ? offsetPrefix[offsetCode] + BIT_readBits(&(seqState->DStream), nbBits) : + litCode ? seq->offset : seqState->prevOffset; + if (offsetCode | !litCode) seqState->prevOffset = seq->offset; /* cmove */ + seq->offset = offset; if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream)); - if (offsetCode==0) offset = litLength ? seq->offset : seqState->prevOffset; - if (offsetCode | !litLength) seqState->prevOffset = seq->offset; /* cmove */ - FSE_decodeSymbol(&(seqState->stateOffb), &(seqState->DStream)); /* update */ + FSE_decodeSymbol(&(seqState->stateOffb), &(seqState->DStream)); /* update */ } /* Literal length update */ @@ -650,7 +671,9 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState, const U32 mls) /* MatchLength */ { size_t matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream)); + const BYTE* dumps = seqState->dumps; if (matchLength == MaxML) { + const BYTE* const de = seqState->dumpsEnd; const U32 add = *dumps++; if (add < 255) matchLength += add; else { @@ -662,13 +685,9 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState, const U32 mls) } matchLength += mls; seq->matchLength = matchLength; + seqState->dumps = dumps; } - /* save result */ - seq->litLength = litLength; - seq->offset = offset; - seqState->dumps = dumps; - #if 0 /* debug */ { static U64 totalDecoded = 0; @@ -799,13 +818,18 @@ static size_t ZSTD_decompressSequences( FSE_initDState(&(seqState.stateOffb), &(seqState.DStream), DTableOffb); FSE_initDState(&(seqState.stateML), &(seqState.DStream), DTableML); - for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq ; ) { + for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq ; nbSeq--) { size_t oneSeqSize; - nbSeq--; ZSTD_decodeSequence(&sequence, &seqState, mls); +#if 0 /* for debug */ + { U32 pos = (U32)(op-base); + if ((pos > 198618400) && (pos < 198618500)) + printf("pos %6u : %3u literals & match %3u bytes at distance %6u \n", + pos, (U32)sequence.litLength, (U32)sequence.matchLength, (U32)sequence.offset); + } +#endif oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litLimit_8, base, vBase, dictEnd); - if (ZSTD_isError(oneSeqSize)) - return oneSeqSize; + if (ZSTD_isError(oneSeqSize)) return oneSeqSize; op += oneSeqSize; } diff --git a/lib/zstd_internal.h b/lib/zstd_internal.h index 2b830e31..51784833 100644 --- a/lib/zstd_internal.h +++ b/lib/zstd_internal.h @@ -103,11 +103,10 @@ typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t; #define Litbits 8 #define MLbits 7 -#define LLbits 6 #define Offbits 5 #define MaxLit ((1<litLengthSum == 0) { ssPtr->litSum = (2<litLengthSum = (1<litLengthSum = MaxLL+1; ssPtr->matchLengthSum = (1<offCodeSum = (1<matchSum = (2<offCodeSum += ssPtr->offCodeFreq[u]; } } - + ZSTD_setLog2Prices(ssPtr); } @@ -243,7 +243,7 @@ static U32 ZSTD_insertBtAndGetAllMatches ( if (minMatch == 3) { /* HC3 match finder */ U32 matchIndex3 = ZSTD_insertAndFindFirstIndexHash3 (zc, ip); - + if (matchIndex3>windowLow && (current - matchIndex3 < (1<<18))) { const BYTE* match; size_t currentMl=0; @@ -408,7 +408,7 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, const BYTE* const ilimit = iend - 8; const BYTE* const base = ctx->base; const BYTE* const prefixStart = base + ctx->dictLimit; - + U32 rep_2=REPCODE_STARTVALUE, rep_1=REPCODE_STARTVALUE; const U32 maxSearches = 1U << ctx->params.searchLog; const U32 sufficient_len = ctx->params.targetLength; @@ -733,7 +733,7 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, const BYTE* const dictBase = ctx->dictBase; const BYTE* const dictEnd = dictBase + dictLimit; const U32 lowLimit = ctx->lowLimit; - + U32 rep_2=REPCODE_STARTVALUE, rep_1=REPCODE_STARTVALUE; const U32 maxSearches = 1U << ctx->params.searchLog; const U32 sufficient_len = ctx->params.targetLength; @@ -1044,12 +1044,12 @@ _storeSequence: // cur, last_pos, best_mlen, best_off have to be set break; } else { const BYTE* repMatch = dictBase + ((anchor-base) - rep_2); - if ((repMatch + minMatch <= dictEnd) && (MEM_readMINMATCH(anchor, minMatch) == MEM_readMINMATCH(repMatch, minMatch))) + if ((repMatch + minMatch <= dictEnd) && (MEM_readMINMATCH(anchor, minMatch) == MEM_readMINMATCH(repMatch, minMatch))) mlen = (U32)ZSTD_count_2segments(anchor+minMatch, repMatch+minMatch, iend, dictEnd, prefixStart) + minMatch; else break; } - + offset = rep_2; rep_2 = rep_1; rep_1 = offset; /* swap offset history */ ZSTD_LOG_ENCODE("%d/%d: ENCODE REP literals=%d mlen=%d off=%d rep1=%d rep2=%d\n", (int)(anchor-base), (int)(iend-base), (int)(0), (int)best_mlen, (int)(0), (int)rep_1, (int)rep_2); ZSTD_updatePrice(seqStorePtr, 0, anchor, 0, mlen-minMatch); From e91477c1716987479e10201c5f50d0f2f31fa1d4 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 21 Mar 2016 14:22:38 +0100 Subject: [PATCH 130/247] minor variable isolation --- lib/zstd_decompress.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/lib/zstd_decompress.c b/lib/zstd_decompress.c index 88734829..c90b8855 100644 --- a/lib/zstd_decompress.c +++ b/lib/zstd_decompress.c @@ -762,7 +762,7 @@ static size_t ZSTD_decompressSequences( BYTE* const ostart = (BYTE* const)dst; BYTE* op = ostart; BYTE* const oend = ostart + maxDstSize; - size_t errorCode, dumpsLength; + size_t dumpsLength; const BYTE* litPtr = dctx->litPtr; const BYTE* const litLimit_8 = litPtr + dctx->litBufSize - 8; const BYTE* const litEnd = litPtr + dctx->litSize; @@ -777,11 +777,12 @@ static size_t ZSTD_decompressSequences( const U32 mls = dctx->fParams.mml; /* Build Decoding Tables */ - errorCode = ZSTD_decodeSeqHeaders(&nbSeq, &dumps, &dumpsLength, + { size_t const errorCode = ZSTD_decodeSeqHeaders(&nbSeq, &dumps, &dumpsLength, DTableLL, DTableML, DTableOffb, ip, seqSize); - if (ZSTD_isError(errorCode)) return errorCode; - ip += errorCode; + if (ZSTD_isError(errorCode)) return errorCode; + ip += errorCode; + } /* Regen sequences */ if (nbSeq) { @@ -793,8 +794,8 @@ static size_t ZSTD_decompressSequences( seqState.dumps = dumps; seqState.dumpsEnd = dumps + dumpsLength; seqState.prevOffset = REPCODE_STARTVALUE; - errorCode = BIT_initDStream(&(seqState.DStream), ip, iend-ip); - if (ERR_isError(errorCode)) return ERROR(corruption_detected); + { size_t const errorCode = BIT_initDStream(&(seqState.DStream), ip, iend-ip); + if (ERR_isError(errorCode)) return ERROR(corruption_detected); } FSE_initDState(&(seqState.stateLL), &(seqState.DStream), DTableLL); FSE_initDState(&(seqState.stateOffb), &(seqState.DStream), DTableOffb); FSE_initDState(&(seqState.stateML), &(seqState.DStream), DTableML); @@ -804,8 +805,7 @@ static size_t ZSTD_decompressSequences( nbSeq--; ZSTD_decodeSequence(&sequence, &seqState, mls); oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litLimit_8, base, vBase, dictEnd); - if (ZSTD_isError(oneSeqSize)) - return oneSeqSize; + if (ZSTD_isError(oneSeqSize)) return oneSeqSize; op += oneSeqSize; } From be4605d851ba2e9a2be5bb27999c2018f4c0db6b Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 21 Mar 2016 14:29:26 +0100 Subject: [PATCH 131/247] improved decompression speed --- lib/zstd_decompress.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/zstd_decompress.c b/lib/zstd_decompress.c index 2c1acab5..2c3f560f 100644 --- a/lib/zstd_decompress.c +++ b/lib/zstd_decompress.c @@ -818,8 +818,9 @@ static size_t ZSTD_decompressSequences( FSE_initDState(&(seqState.stateOffb), &(seqState.DStream), DTableOffb); FSE_initDState(&(seqState.stateML), &(seqState.DStream), DTableML); - for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq ; nbSeq--) { + for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq ; ) { size_t oneSeqSize; + nbSeq--; ZSTD_decodeSequence(&sequence, &seqState, mls); #if 0 /* for debug */ { U32 pos = (U32)(op-base); From e7ec8f2b9b8ea06f45c54b167d808ead3d3b05db Mon Sep 17 00:00:00 2001 From: inikep Date: Mon, 21 Mar 2016 17:23:02 +0100 Subject: [PATCH 132/247] improved ratio --- lib/zstd_decompress.c | 33 ++- lib/zstd_internal.h | 4 +- lib/zstd_opt.h | 470 +++++++----------------------------------- 3 files changed, 89 insertions(+), 418 deletions(-) diff --git a/lib/zstd_decompress.c b/lib/zstd_decompress.c index 374e6371..74f84add 100644 --- a/lib/zstd_decompress.c +++ b/lib/zstd_decompress.c @@ -160,7 +160,7 @@ size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx) dctx->hufTableX4[0] = HufLog; dctx->flagStaticTables = 0; dctx->fParams.mml = MINMATCH; /* overwritten by frame but forces ZSTD_btopt to MINMATCH in block mode */ - ZSTD_LOG_BLOCK("%p: ZSTD_decompressBegin searchLength=%d\n", dctx->base, dctx->params.searchLength); + ZSTD_LOG_BLOCK("%p: ZSTD_decompressBegin searchLength=%d\n", dctx->base, dctx->fParams.mml); return 0; } @@ -683,38 +683,35 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState, const U32 mls) if (offsetCode==0) offset = 0; if (offset < ZSTD_REP_NUM) { +#if 0 if (!litLength && offset <= 1) offset = 1-offset; - if (offset != 0) - { - #if 1 +#endif + if (offset != 0) { size_t temp = seqState->prevOffset[offset]; if (offset != 1) { if (offset == 3) seqState->prevOffset[3] = seqState->prevOffset[2]; seqState->prevOffset[2] = seqState->prevOffset[1]; } offset = temp; - #else - offset = seqState->prevOffset[1]; - #endif seqState->prevOffset[1] = seqState->prevOffset[0]; seqState->prevOffset[0] = offset; - } - else + } else { offset = seqState->prevOffset[0]; + } + + if (litLength == 0) { + size_t temp = seqState->prevOffset[1]; + seqState->prevOffset[1] = seqState->prevOffset[0]; + seqState->prevOffset[0] = temp; + // offset = seqState->prevOffset[0]; + } } else { offset -= ZSTD_REP_MOVE; - #if 0 - // seqState->prevOffset[3] = seqState->prevOffset[2]; - // seqState->prevOffset[2] = seqState->prevOffset[1]; - seqState->prevOffset[1] = seqState->prevOffset[0]; - seqState->prevOffset[0] = offset; - #else if (kSlotNew < 3) seqState->prevOffset[3] = seqState->prevOffset[2]; if (kSlotNew < 2) seqState->prevOffset[2] = seqState->prevOffset[1]; if (kSlotNew < 1) seqState->prevOffset[1] = seqState->prevOffset[0]; seqState->prevOffset[kSlotNew] = offset; - #endif } #else // ZSTD_REP_NUM == 1 #if 0 @@ -944,7 +941,7 @@ static size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, if (srcSize >= ZSTD_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); - ZSTD_LOG_BLOCK("%p: ZSTD_decompressBlock_internal searchLength=%d\n", dctx->base, dctx->params.searchLength); + ZSTD_LOG_BLOCK("%p: ZSTD_decompressBlock_internal searchLength=%d\n", dctx->base, dctx->fParams.mml); /* Decode literals sub-block */ litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize); @@ -1055,7 +1052,7 @@ size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) { ZSTD_decompressBegin_usingDict(dctx, dict, dictSize); - ZSTD_LOG_BLOCK("%p: ZSTD_decompressBegin_usingDict searchLength=%d\n", dctx->base, dctx->params.searchLength); + ZSTD_LOG_BLOCK("%p: ZSTD_decompressBegin_usingDict searchLength=%d\n", dctx->base, dctx->fParams.mml); ZSTD_checkContinuity(dctx, dst); return ZSTD_decompressFrame(dctx, dst, maxDstSize, src, srcSize); } diff --git a/lib/zstd_internal.h b/lib/zstd_internal.h index c4052ba8..0abdcdfa 100644 --- a/lib/zstd_internal.h +++ b/lib/zstd_internal.h @@ -67,7 +67,7 @@ #if 1 #define ZSTD_REP_NUM 4 #define ZSTD_REP_INIT 4 - #define ZSTD_REP_MOVE (ZSTD_REP_NUM-1) + #define ZSTD_REP_MOVE (ZSTD_REP_NUM-1) #else #define ZSTD_REP_NUM 1 #define ZSTD_REP_INIT 2 @@ -183,7 +183,7 @@ typedef struct { U32 off; U32 mlen; U32 litlen; - U32 rep[ZSTD_REP_NUM]; + U32 rep[ZSTD_REP_INIT]; } ZSTD_optimal_t; typedef struct { diff --git a/lib/zstd_opt.h b/lib/zstd_opt.h index a978c72f..1074cdfc 100644 --- a/lib/zstd_opt.h +++ b/lib/zstd_opt.h @@ -444,22 +444,21 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, opt[0].litlen = (U32)(ip - litstart); /* check repCode */ - for (int i=0; i sufficient_len || mlen >= ZSTD_OPT_NUM) { - best_mlen = mlen; best_off = i; cur = 0; last_pos = 1; + ip+=1; best_mlen = mlen; best_off = 0; cur = 0; last_pos = 1; goto _storeSequence; } - litlen = opt[0].litlen; + litlen = opt[0].litlen + 1; do { - price = ZSTD_getPrice(seqStorePtr, litlen, litstart, i, mlen - minMatch); - if (mlen > last_pos || price < opt[mlen].price) - SET_PRICE(mlen, mlen, i, litlen, price); /* note : macro modifies last_pos */ + price = ZSTD_getPrice(seqStorePtr, litlen, litstart, 0, mlen - minMatch); + if (mlen + 1 > last_pos || price < opt[mlen + 1].price) + SET_PRICE(mlen + 1, mlen, 0, litlen, price); /* note : macro modifies last_pos */ mlen--; } while (mlen >= minMatch); } @@ -469,10 +468,8 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, ZSTD_LOG_PARSER("%d: match_num=%d last_pos=%d\n", (int)(ip-base), match_num, last_pos); if (!last_pos && !match_num) { ip++; continue; } - opt[0].rep[0] = rep[0]; - opt[0].rep[1] = rep[1]; - opt[0].rep[2] = rep[2]; - opt[0].rep[3] = rep[3]; + for (int i=0; i sufficient_len) { @@ -525,7 +522,7 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, mlen = opt[cur].mlen; - if (opt[cur].off >= ZSTD_REP_NUM) { + if (opt[cur].off >= ZSTD_REP_NUM) { opt[cur].rep[3] = (kSlotNew < 3) ? opt[cur-mlen].rep[2] : opt[cur-mlen].rep[3]; opt[cur].rep[2] = (kSlotNew < 2) ? opt[cur-mlen].rep[1] : opt[cur-mlen].rep[2]; opt[cur].rep[1] = (kSlotNew < 1) ? opt[cur-mlen].rep[0] : opt[cur-mlen].rep[1]; @@ -533,11 +530,34 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, opt[cur].rep[kSlotNew] = opt[cur].off - ZSTD_REP_MOVE; ZSTD_LOG_ENCODE("%d: COPYREP_OFF cur=%d mlen=%d rep=%d rep[1]=%d\n", (int)(inr-base), cur, mlen, opt[cur].rep[0], opt[cur].rep[1]); } else { +#if 1 opt[cur].rep[3] = (opt[cur].off > 2) ? opt[cur-mlen].rep[2] : opt[cur-mlen].rep[3]; opt[cur].rep[2] = (opt[cur].off > 1) ? opt[cur-mlen].rep[1] : opt[cur-mlen].rep[2]; opt[cur].rep[1] = (opt[cur].off > 0) ? opt[cur-mlen].rep[0] : opt[cur-mlen].rep[1]; opt[cur].rep[0] = opt[cur-mlen].rep[opt[cur].off]; - ZSTD_LOG_ENCODE("%d: COPYREP_NOR cur=%d mlen=%d rep=%d rep[1]=%d\n", (int)(inr-base), cur, mlen, opt[cur].rep[0], opt[cur].rep[1]); + +#if 1 + if (cur!=mlen && opt[cur].litlen == 0) { + U32 temp = opt[cur].rep[0]; + opt[cur].rep[0] = opt[cur].rep[1]; + opt[cur].rep[1] = temp; + } +#endif +#else + if (cur!=mlen && opt[cur].litlen == 0) { + opt[cur].rep[3] = (opt[cur].off > 2) ? opt[cur-mlen].rep[2] : opt[cur-mlen].rep[3]; + opt[cur].rep[2] = (opt[cur].off > 1) ? opt[cur-mlen].rep[1] : opt[cur-mlen].rep[2]; + opt[cur].rep[0] = (opt[cur].off > 0) ? opt[cur-mlen].rep[0] : opt[cur-mlen].rep[1]; + opt[cur].rep[1] = opt[cur-mlen].rep[opt[cur].off]; + ZSTD_LOG_ENCODE("%d: COPYREP_SWI cur=%d mlen=%d rep=%d rep[1]=%d\n", (int)(inr-base), cur, mlen, opt[cur].rep[0], opt[cur].rep[1]); + } else { + opt[cur].rep[3] = (opt[cur].off > 2) ? opt[cur-mlen].rep[2] : opt[cur-mlen].rep[3]; + opt[cur].rep[2] = (opt[cur].off > 1) ? opt[cur-mlen].rep[1] : opt[cur-mlen].rep[2]; + opt[cur].rep[1] = (opt[cur].off > 0) ? opt[cur-mlen].rep[0] : opt[cur-mlen].rep[1]; + opt[cur].rep[0] = opt[cur-mlen].rep[opt[cur].off]; + ZSTD_LOG_ENCODE("%d: COPYREP_NOR cur=%d mlen=%d rep=%d rep[1]=%d\n", (int)(inr-base), cur, mlen, opt[cur].rep[0], opt[cur].rep[1]); + } +#endif } ZSTD_LOG_PARSER("%d: CURRENT_NoExt price[%d/%d]=%d off=%d mlen=%d litlen=%d rep=%d rep[1]=%d\n", (int)(inr-base), cur, last_pos, opt[cur].price, opt[cur].off, opt[cur].mlen, opt[cur].litlen, opt[cur].rep[0], opt[cur].rep[1]); @@ -547,11 +567,12 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, for (int i=0; i sufficient_len || cur + mlen >= ZSTD_OPT_NUM) { best_mlen = mlen; - best_off = i; ZSTD_LOG_PARSER("%d: REP sufficient_len=%d best_mlen=%d best_off=%d last_pos=%d\n", (int)(inr-base), sufficient_len, best_mlen, best_off, last_pos); last_pos = cur + 1; goto _storeSequence; @@ -560,24 +581,25 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, if (opt[cur].mlen == 1) { litlen = opt[cur].litlen; if (cur > litlen) { - price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr-litlen, i, mlen - minMatch); + price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr-litlen, best_off, mlen - minMatch); } else - price = ZSTD_getPrice(seqStorePtr, litlen, litstart, i, mlen - minMatch); + price = ZSTD_getPrice(seqStorePtr, litlen, litstart, best_off, mlen - minMatch); } else { litlen = 0; - price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, i, mlen - minMatch); + price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, best_off, mlen - minMatch); } best_mlen = mlen; - ZSTD_LOG_PARSER("%d: Found REP mlen=%d off=%d price=%d litlen=%d\n", (int)(inr-base), mlen, i, price, litlen); + ZSTD_LOG_PARSER("%d: Found REP mlen=%d off=%d price=%d litlen=%d\n", (int)(inr-base), mlen, best_off, price, litlen); do { if (cur + mlen > last_pos || price <= opt[cur + mlen].price) - SET_PRICE(cur + mlen, mlen, i, litlen, price); + SET_PRICE(cur + mlen, mlen, best_off, litlen, price); mlen--; } while (mlen >= minMatch); } + match_num = ZSTD_BtGetAllMatches_selectMLS(ctx, inr, iend, maxSearches, mls, matches); ZSTD_LOG_PARSER("%d: ZSTD_GetAllMatches match_num=%d\n", (int)(inr-base), match_num); @@ -624,8 +646,8 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, /* store sequence */ _storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */ for (u = 1; u <= last_pos; u++) - ZSTD_LOG_PARSER("%d: price[%d/%d]=%d off=%d mlen=%d litlen=%d rep=%d rep[1]=%d\n", (int)(ip-base+u), u, last_pos, opt[u].price, opt[u].off, opt[u].mlen, opt[u].litlen, opt[u].rep, opt[u].rep[1]); - ZSTD_LOG_PARSER("%d: cur=%d/%d best_mlen=%d best_off=%d rep=%d\n", (int)(ip-base+cur), (int)cur, (int)last_pos, (int)best_mlen, (int)best_off, opt[cur].rep); + ZSTD_LOG_PARSER("%d: price[%d/%d]=%d off=%d mlen=%d litlen=%d rep=%d rep[1]=%d\n", (int)(ip-base+u), u, last_pos, opt[u].price, opt[u].off, opt[u].mlen, opt[u].litlen, opt[u].rep[0], opt[u].rep[1]); + ZSTD_LOG_PARSER("%d: cur=%d/%d best_mlen=%d best_off=%d rep=%d\n", (int)(ip-base+cur), (int)cur, (int)last_pos, (int)best_mlen, (int)best_off, opt[cur].rep[0]); opt[0].mlen = 1; U32 offset; @@ -642,12 +664,12 @@ _storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */ } for (u = 0; u <= last_pos;) { - ZSTD_LOG_PARSER("%d: price2[%d/%d]=%d off=%d mlen=%d litlen=%d rep=%d rep[1]=%d\n", (int)(ip-base+u), u, last_pos, opt[u].price, opt[u].off, opt[u].mlen, opt[u].litlen, opt[u].rep, opt[u].rep[1]); + ZSTD_LOG_PARSER("%d: price2[%d/%d]=%d off=%d mlen=%d litlen=%d rep=%d rep[1]=%d\n", (int)(ip-base+u), u, last_pos, opt[u].price, opt[u].off, opt[u].mlen, opt[u].litlen, opt[u].rep[0], opt[u].rep[1]); u += opt[u].mlen; } for (cur=0; cur < last_pos; ) { - ZSTD_LOG_PARSER("%d: price3[%d/%d]=%d off=%d mlen=%d litlen=%d rep=%d rep[1]=%d\n", (int)(ip-base+cur), cur, last_pos, opt[cur].price, opt[cur].off, opt[cur].mlen, opt[cur].litlen, opt[cur].rep, opt[cur].rep[1]); + ZSTD_LOG_PARSER("%d: price3[%d/%d]=%d off=%d mlen=%d litlen=%d rep=%d rep[1]=%d\n", (int)(ip-base+cur), cur, last_pos, opt[cur].price, opt[cur].off, opt[cur].mlen, opt[cur].litlen, opt[cur].rep[0], opt[cur].rep[1]); mlen = opt[cur].mlen; if (mlen == 1) { ip++; cur++; continue; } offset = opt[cur].off; @@ -662,7 +684,22 @@ _storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */ if (kSlotNew < 1) rep[1] = rep[0]; rep[kSlotNew] = offset - ZSTD_REP_MOVE; } else { - if (offset != 0) { + U32 temp = rep[offset]; + rep[3] = (offset > 2) ? rep[2] : rep[3]; + rep[2] = (offset > 1) ? rep[1] : rep[2]; + rep[1] = (offset > 0) ? rep[0] : rep[1]; + rep[0] = temp; + +#if 1 + if (litLength == 0) { + temp = rep[0]; + rep[0] = rep[1]; + rep[1] = temp; + } +#endif + + +/* if (offset != 0) { size_t temp = rep[offset]; if (offset != 1) { if (offset == 3) rep[3] = rep[2]; @@ -672,7 +709,11 @@ _storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */ rep[0] = temp; } - if (offset<=1 && litLength==0) offset = 1-offset; + if (litLength == 0) { + best_off = rep[1]; + rep[1] = rep[0]; + rep[0] = best_off; + }*/ } // ZSTD_LOG_ENCODE("%d/%d: ENCODE2 literals=%d mlen=%d off=%d rep1=%d rep[1]=%d\n", (int)(ip-base), (int)(iend-base), (int)(litLength), (int)mlen, (int)(offset), (int)rep[1], (int)rep_2); @@ -711,374 +752,7 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, const void* src, size_t srcSize, const U32 depth) { - seqStore_t* seqStorePtr = &(ctx->seqStore); - const BYTE* const istart = (const BYTE*)src; - const BYTE* ip = istart; - const BYTE* anchor = istart; - const BYTE* litstart; - const BYTE* const iend = istart + srcSize; - const BYTE* const ilimit = iend - 8; - const BYTE* const base = ctx->base; - const U32 dictLimit = ctx->dictLimit; - const BYTE* const prefixStart = base + dictLimit; - const BYTE* const dictBase = ctx->dictBase; - const BYTE* const dictEnd = dictBase + dictLimit; - const U32 lowLimit = ctx->lowLimit; - - const U32 maxSearches = 1U << ctx->params.searchLog; - const U32 sufficient_len = ctx->params.targetLength; - const U32 mls = ctx->params.searchLength; - const U32 minMatch = (ctx->params.searchLength == 3) ? 3 : 4; - - ZSTD_optimal_t* opt = seqStorePtr->priceTable; - ZSTD_match_t* matches = seqStorePtr->matchTable; - const BYTE* inr; - U32 cur, match_num, last_pos, litlen, price; - - /* init */ - U32 rep[ZSTD_REP_INIT]; - for (int i=0; inextToUpdate3 = ctx->nextToUpdate; - ZSTD_resetSeqStore(seqStorePtr); - ZSTD_rescaleFreqs(seqStorePtr); - if ((ip - prefixStart) < REPCODE_STARTVALUE) ip += REPCODE_STARTVALUE; - - ZSTD_LOG_BLOCK("%d: COMPBLOCK_OPT_EXTDICT srcSz=%d maxSrch=%d mls=%d sufLen=%d\n", (int)(ip-base), (int)srcSize, maxSearches, mls, sufficient_len); - - /* Match Loop */ - while (ip < ilimit) { - U32 u, offset, best_off=0; - U32 mlen=0, best_mlen=0; - U32 current = (U32)(ip-base); - memset(opt, 0, sizeof(ZSTD_optimal_t)); - last_pos = 0; - inr = ip; - litstart = ((U32)(ip - anchor) > 128) ? ip - 128 : anchor; - opt[0].litlen = (U32)(ip - litstart); - - /* check repCode */ - { - const U32 repIndex = (U32)(current+1 - rep[0]); - const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; - const BYTE* const repMatch = repBase + repIndex; - if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow */ - && (MEM_readMINMATCH(ip+1, minMatch) == MEM_readMINMATCH(repMatch, minMatch)) ) { - /* repcode detected we should take it */ - const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; - mlen = (U32)ZSTD_count_2segments(ip+1+minMatch, repMatch+minMatch, iend, repEnd, prefixStart) + minMatch; - - ZSTD_LOG_PARSER("%d: start try REP rep=%d mlen=%d\n", (int)(ip-base), (int)rep[0], (int)mlen); - if (depth==0 || mlen > sufficient_len || mlen >= ZSTD_OPT_NUM) { - ip+=1; best_mlen = mlen; best_off = 0; cur = 0; last_pos = 1; - goto _storeSequence; - } - - litlen = opt[0].litlen + 1; - do { - price = ZSTD_getPrice(seqStorePtr, litlen, litstart, 0, mlen - minMatch); - if (mlen + 1 > last_pos || price < opt[mlen + 1].price) - SET_PRICE(mlen + 1, mlen, 0, litlen, price); - mlen--; - } while (mlen >= minMatch); - } } - - best_mlen = (last_pos) ? last_pos : minMatch; - - match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, ip, iend, maxSearches, mls, matches); /* first search (depth 0) */ - - ZSTD_LOG_PARSER("%d: match_num=%d last_pos=%d\n", (int)(ip-base), match_num, last_pos); - if (!last_pos && !match_num) { ip++; continue; } - - opt[0].rep[0] = rep[0]; - opt[0].rep[1] = rep[1]; - opt[0].mlen = 1; - - if (match_num && matches[match_num-1].len > sufficient_len) { - best_mlen = matches[match_num-1].len; - best_off = matches[match_num-1].off; - cur = 0; - last_pos = 1; - goto _storeSequence; - } - - // set prices using matches at position = 0 - for (u = 0; u < match_num; u++) { - mlen = (u>0) ? matches[u-1].len+1 : best_mlen; - best_mlen = (matches[u].len < ZSTD_OPT_NUM) ? matches[u].len : ZSTD_OPT_NUM; - ZSTD_LOG_PARSER("%d: start Found mlen=%d off=%d best_mlen=%d last_pos=%d\n", (int)(ip-base), matches[u].len, matches[u].off, (int)best_mlen, (int)last_pos); - litlen = opt[0].litlen; - while (mlen <= best_mlen) { - price = ZSTD_getPrice(seqStorePtr, litlen, litstart, matches[u].off, mlen - minMatch); - if (mlen > last_pos || price < opt[mlen].price) - SET_PRICE(mlen, mlen, matches[u].off, litlen, price); - mlen++; - } } - - if (last_pos < minMatch) { - // ip += ((ip-anchor) >> g_searchStrength) + 1; /* jump faster over incompressible sections */ - ip++; continue; - } - - /* check further positions */ - for (cur = 1; cur <= last_pos; cur++) { - size_t cur_rep; - inr = ip + cur; - - if (opt[cur-1].mlen == 1) { - litlen = opt[cur-1].litlen + 1; - if (cur > litlen) { - price = opt[cur - litlen].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr-litlen); - } else - price = ZSTD_getLiteralPrice(seqStorePtr, litlen, litstart); - } else { - litlen = 1; - price = opt[cur - 1].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr-1); - } - - if (cur > last_pos || price <= opt[cur].price) // || ((price == opt[cur].price) && (opt[cur-1].mlen == 1) && (cur != litlen))) - SET_PRICE(cur, 1, 0, litlen, price); - - if (cur == last_pos) break; - - if (inr > ilimit) // last match must start at a minimum distance of 8 from oend - continue; - - mlen = opt[cur].mlen; - - if (opt[cur].off) { - opt[cur].rep[1] = opt[cur-mlen].rep[0]; - opt[cur].rep[0]= opt[cur].off; - ZSTD_LOG_ENCODE("%d: COPYREP_OFF cur=%d mlen=%d rep=%d rep[1]=%d\n", (int)(inr-base), cur, mlen, opt[cur].rep[0], opt[cur].rep[1]); - } else { - if (cur!=mlen && opt[cur].litlen == 0) { - opt[cur].rep[1] = opt[cur-mlen].rep[0]; - opt[cur].rep[0] = opt[cur-mlen].rep[1]; - ZSTD_LOG_ENCODE("%d: COPYREP_SWI cur=%d mlen=%d rep=%d rep[1]=%d\n", (int)(inr-base), cur, mlen, opt[cur].rep[0], opt[cur].rep[1]); - } else { - opt[cur].rep[1] = opt[cur-mlen].rep[1]; - opt[cur].rep[0] = opt[cur-mlen].rep[0]; - ZSTD_LOG_ENCODE("%d: COPYREP_NOR cur=%d mlen=%d rep=%d rep[1]=%d\n", (int)(inr-base), cur, mlen, opt[cur].rep[0], opt[cur].rep[1]); - } } - - ZSTD_LOG_PARSER("%d: CURRENT_Ext price[%d/%d]=%d off=%d mlen=%d litlen=%d rep=%d rep[1]=%d\n", (int)(inr-base), cur, last_pos, opt[cur].price, opt[cur].off, opt[cur].mlen, opt[cur].litlen, opt[cur].rep[0], opt[cur].rep[1]); - - best_mlen = 0; - - if (opt[cur].mlen != 1) { - cur_rep = opt[cur].rep[1]; - ZSTD_LOG_PARSER("%d: tryExt REP2 rep[1]=%u mlen=%u\n", (int)(inr-base), (U32)cur_rep, mlen); - } else { - cur_rep = opt[cur].rep[0]; - ZSTD_LOG_PARSER("%d: tryExt REP1 rep=%u mlen=%u\n", (int)(inr-base), (U32)cur_rep, mlen); - } - - const U32 repIndex = (U32)(current+cur - cur_rep); - const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; - const BYTE* const repMatch = repBase + repIndex; - if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow */ - && (MEM_readMINMATCH(inr, minMatch) == MEM_readMINMATCH(repMatch, minMatch)) ) { - /* repcode detected */ - const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; - mlen = (U32)ZSTD_count_2segments(inr+minMatch, repMatch+minMatch, iend, repEnd, prefixStart) + minMatch; - ZSTD_LOG_PARSER("%d: Found REP mlen=%d off=%d rep=%d opt[%d].off=%d\n", (int)(inr-base), mlen, 0, opt[cur].rep, cur, opt[cur].off); - - if (mlen > sufficient_len || cur + mlen >= ZSTD_OPT_NUM) { - best_mlen = mlen; - best_off = 0; - ZSTD_LOG_PARSER("%d: REP sufficient_len=%d best_mlen=%d best_off=%d last_pos=%d\n", (int)(inr-base), sufficient_len, best_mlen, best_off, last_pos); - last_pos = cur + 1; - goto _storeSequence; - } - - if (opt[cur].mlen == 1) { - litlen = opt[cur].litlen; - if (cur > litlen) { - price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr-litlen, 0, mlen - minMatch); - } else - price = ZSTD_getPrice(seqStorePtr, litlen, litstart, 0, mlen - minMatch); - } else { - litlen = 0; - price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, 0, mlen - minMatch); - } - - best_mlen = mlen; - - ZSTD_LOG_PARSER("%d: Found REP mlen=%d off=%d price=%d litlen=%d\n", (int)(inr-base), mlen, 0, price, litlen); - - do { - if (cur + mlen > last_pos || price <= opt[cur + mlen].price) // || ((price == opt[cur + mlen].price) && (opt[cur].mlen == 1) && (cur != litlen))) // at equal price prefer REP instead of MATCH - SET_PRICE(cur + mlen, mlen, 0, litlen, price); - mlen--; - } while (mlen >= minMatch); - } - - best_mlen = (best_mlen > minMatch) ? best_mlen : minMatch; - - match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, inr, iend, maxSearches, mls, matches); - ZSTD_LOG_PARSER("%d: ZSTD_GetAllMatches match_num=%d\n", (int)(inr-base), match_num); - - if (match_num > 0 && matches[match_num-1].len > sufficient_len) { - best_mlen = matches[match_num-1].len; - best_off = matches[match_num-1].off; - last_pos = cur + 1; - goto _storeSequence; - } - - // set prices using matches at position = cur - for (u = 0; u < match_num; u++) { - mlen = (u>0) ? matches[u-1].len+1 : best_mlen; - best_mlen = (cur + matches[u].len < ZSTD_OPT_NUM) ? matches[u].len : ZSTD_OPT_NUM - cur; - - // ZSTD_LOG_PARSER("%d: Found1 cur=%d mlen=%d off=%d best_mlen=%d last_pos=%d\n", (int)(inr-base), cur, matches[u].len, matches[u].off, best_mlen, last_pos); - - while (mlen <= best_mlen) { - if (opt[cur].mlen == 1) { - litlen = opt[cur].litlen; - if (cur > litlen) - price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, ip+cur-litlen, matches[u].off, mlen - minMatch); - else - price = ZSTD_getPrice(seqStorePtr, litlen, litstart, matches[u].off, mlen - minMatch); - } else { - litlen = 0; - price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, matches[u].off, mlen - minMatch); - } - - // ZSTD_LOG_PARSER("%d: Found2 mlen=%d best_mlen=%d off=%d price=%d litlen=%d\n", (int)(inr-base), mlen, best_mlen, matches[u].off, price, litlen); - - if (cur + mlen > last_pos || (price < opt[cur + mlen].price)) - SET_PRICE(cur + mlen, mlen, matches[u].off, litlen, price); - - mlen++; - } } } // for (cur = 1; cur <= last_pos; cur++) - - best_mlen = opt[last_pos].mlen; - best_off = opt[last_pos].off; - cur = last_pos - best_mlen; - - /* store sequence */ -_storeSequence: // cur, last_pos, best_mlen, best_off have to be set - for (u = 1; u <= last_pos; u++) - ZSTD_LOG_PARSER("%d: price[%u/%d]=%d off=%d mlen=%d litlen=%d rep=%d rep[1]=%d\n", (int)(ip-base+u), u, last_pos, opt[u].price, opt[u].off, opt[u].mlen, opt[u].litlen, opt[u].rep, opt[u].rep[1]); - ZSTD_LOG_PARSER("%d: cur=%d/%d best_mlen=%d best_off=%d rep=%d\n", (int)(ip-base+cur), (int)cur, (int)last_pos, (int)best_mlen, (int)best_off, opt[cur].rep); - - opt[0].mlen = 1; - - while (1) { - mlen = opt[cur].mlen; - offset = opt[cur].off; - opt[cur].mlen = best_mlen; - opt[cur].off = best_off; - best_mlen = mlen; - best_off = offset; - if (mlen > cur) break; - cur -= mlen; - } - - for (u = 0; u <= last_pos; ) { - ZSTD_LOG_PARSER("%d: price2[%d/%d]=%d off=%d mlen=%d litlen=%d rep=%d rep[1]=%d\n", (int)(ip-base+u), u, last_pos, opt[u].price, opt[u].off, opt[u].mlen, opt[u].litlen, opt[u].rep, opt[u].rep[1]); - u += opt[u].mlen; - } - - for (cur=0; cur < last_pos; ) { - U32 litLength; - ZSTD_LOG_PARSER("%d: price3[%d/%d]=%d off=%d mlen=%d litlen=%d rep=%d rep[1]=%d\n", (int)(ip-base+cur), cur, last_pos, opt[cur].price, opt[cur].off, opt[cur].mlen, opt[cur].litlen, opt[cur].rep, opt[cur].rep[1]); - mlen = opt[cur].mlen; - if (mlen == 1) { ip++; cur++; continue; } - offset = opt[cur].off; - cur += mlen; - - litLength = (U32)(ip - anchor); - ZSTD_LOG_ENCODE("%d/%d: ENCODE1 literals=%d mlen=%d off=%d rep1=%d rep[1]=%d\n", (int)(ip-base), (int)(iend-base), (int)(litLength), (int)mlen, (int)(offset), (int)rep[0], (int)rep[1]); - - if (offset) { - rep[1] = rep[0]; - rep[0] = offset; - } else { - if (litLength == 0) { - best_off = rep[1]; - rep[1] = rep[0]; - rep[0] = best_off; - } } - - ZSTD_LOG_ENCODE("%d/%d: ENCODE2 literals=%d mlen=%d off=%d rep1=%d rep[1]=%d\n", (int)(ip-base), (int)(iend-base), (int)(litLength), (int)mlen, (int)(offset), (int)rep[0], (int)rep[1]); - -#if ZSTD_OPT_DEBUG >= 5 - U32 ml2; - if (offset) { - if (offset > (size_t)(ip - prefixStart)) { - const BYTE* match = dictEnd - (offset - (ip - prefixStart)); - ml2 = ZSTD_count_2segments(ip, match, iend, dictEnd, prefixStart); - ZSTD_LOG_PARSER("%d: ZSTD_count_2segments=%d offset=%d dictBase=%p dictEnd=%p prefixStart=%p ip=%p match=%p\n", (int)current, (int)ml2, (int)offset, dictBase, dictEnd, prefixStart, ip, match); - } - else ml2 = (U32)ZSTD_count(ip, ip-offset, iend); - } - else ml2 = (U32)ZSTD_count(ip, ip-rep[0], iend); - if ((offset >= 8) && (ml2 < mlen || ml2 < minMatch)) { - printf("%d: ERROR_Ext iend=%d mlen=%d offset=%d ml2=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset, (int)ml2); exit(0); } - if (ip < anchor) { - printf("%d: ERROR_Ext ip < anchor iend=%d mlen=%d offset=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset); exit(0); } - if (ip + mlen > iend) { - printf("%d: ERROR_Ext ip + mlen >= iend iend=%d mlen=%d offset=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset); exit(0); } -#endif - - ZSTD_updatePrice(seqStorePtr, litLength, anchor, offset, mlen-minMatch); - ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset ? offset + ZSTD_REP_MOVE : 0, mlen-minMatch); - anchor = ip = ip + mlen; - } - -#if 0 - /* check immediate repcode */ - while ((anchor >= base + lowLimit + rep[1]) && (anchor <= ilimit)) { - if ((anchor - rep[1]) >= prefixStart) { - if (MEM_readMINMATCH(anchor, minMatch) == MEM_readMINMATCH(anchor - rep[1], minMatch)) - mlen = (U32)ZSTD_count(anchor+minMatch, anchor - rep[1] + minMatch, iend) + minMatch; - else - break; - } else { - const BYTE* repMatch = dictBase + ((anchor-base) - rep[1]); - if ((repMatch + minMatch <= dictEnd) && (MEM_readMINMATCH(anchor, minMatch) == MEM_readMINMATCH(repMatch, minMatch))) - mlen = (U32)ZSTD_count_2segments(anchor+minMatch, repMatch+minMatch, iend, dictEnd, prefixStart) + minMatch; - else - break; - } - - offset = rep[1]; rep[1] = rep[0]; rep[0] = offset; /* swap offset history */ - ZSTD_LOG_ENCODE("%d/%d: ENCODE REP literals=%d mlen=%d off=%d rep1=%d rep[1]=%d\n", (int)(anchor-base), (int)(iend-base), (int)(0), (int)best_mlen, (int)(0), (int)rep[0], (int)rep[1]); - ZSTD_updatePrice(seqStorePtr, 0, anchor, 0, mlen-minMatch); - ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, mlen-minMatch); - anchor += mlen; - } -#else - /* check immediate repcode */ - /* minimal correctness condition = while ((anchor >= prefixStart + REPCODE_STARTVALUE) && (anchor <= ilimit)) { */ - while ((anchor >= base + lowLimit + rep[1]) && (anchor <= ilimit)) { - const U32 repIndex = (U32)((anchor-base) - rep[1]); - const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; - const BYTE* const repMatch = repBase + repIndex; - if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow */ - && (MEM_readMINMATCH(anchor, minMatch) == MEM_readMINMATCH(repMatch, minMatch)) ) { - /* repcode detected, let's take it */ - const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; - mlen = (U32)ZSTD_count_2segments(anchor+minMatch, repMatch+minMatch, iend, repEnd, prefixStart) + minMatch; - offset = rep[1]; rep[1] = rep[0]; rep[0] = offset; /* swap offset history */ - ZSTD_LOG_ENCODE("%d/%d: ENCODE REP literals=%d mlen=%d off=%d rep1=%d rep[1]=%d\n", (int)(anchor-base), (int)(iend-base), (int)(0), (int)best_mlen, (int)(0), (int)rep[0], (int)rep[1]); - ZSTD_updatePrice(seqStorePtr, 0, anchor, 0, mlen-minMatch); - ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, mlen-minMatch); - anchor += mlen; - continue; /* faster when present ... (?) */ - } - break; - } -#endif - if (anchor > ip) ip = anchor; - } - - { /* Last Literals */ - size_t lastLLSize = iend - anchor; - ZSTD_LOG_ENCODE("%d: lastLLSize literals=%u\n", (int)(ip-base), (U32)(lastLLSize)); - memcpy(seqStorePtr->lit, anchor, lastLLSize); - seqStorePtr->lit += lastLLSize; - } + (void)ctx; (void)src; (void)srcSize; (void)depth; + (void)ZSTD_BtGetAllMatches_selectMLS_extDict; + printf("ZSTD_compressBlock_opt_extDict_generic\n"), exit(0); } From fc1d7bd93bdea912fbbb54c5bb0ebf655a12bc01 Mon Sep 17 00:00:00 2001 From: inikep Date: Mon, 21 Mar 2016 19:02:16 +0100 Subject: [PATCH 133/247] changed update logic of repcodes --- lib/zstd_decompress.c | 18 ++++++------ lib/zstd_opt.h | 66 +++++++++++++++++++------------------------ 2 files changed, 37 insertions(+), 47 deletions(-) diff --git a/lib/zstd_decompress.c b/lib/zstd_decompress.c index 74f84add..500157ef 100644 --- a/lib/zstd_decompress.c +++ b/lib/zstd_decompress.c @@ -683,10 +683,14 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState, const U32 mls) if (offsetCode==0) offset = 0; if (offset < ZSTD_REP_NUM) { -#if 0 - if (!litLength && offset <= 1) - offset = 1-offset; -#endif + if (litLength == 0) { + if (offset <= 1) offset = 1-offset; + offset = seqState->prevOffset[offset]; + size_t temp = seqState->prevOffset[1]; + seqState->prevOffset[1] = seqState->prevOffset[0]; + seqState->prevOffset[0] = temp; + } + else if (offset != 0) { size_t temp = seqState->prevOffset[offset]; if (offset != 1) { @@ -700,12 +704,6 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState, const U32 mls) offset = seqState->prevOffset[0]; } - if (litLength == 0) { - size_t temp = seqState->prevOffset[1]; - seqState->prevOffset[1] = seqState->prevOffset[0]; - seqState->prevOffset[0] = temp; - // offset = seqState->prevOffset[0]; - } } else { offset -= ZSTD_REP_MOVE; if (kSlotNew < 3) seqState->prevOffset[3] = seqState->prevOffset[2]; diff --git a/lib/zstd_opt.h b/lib/zstd_opt.h index 1074cdfc..df65d690 100644 --- a/lib/zstd_opt.h +++ b/lib/zstd_opt.h @@ -530,19 +530,18 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, opt[cur].rep[kSlotNew] = opt[cur].off - ZSTD_REP_MOVE; ZSTD_LOG_ENCODE("%d: COPYREP_OFF cur=%d mlen=%d rep=%d rep[1]=%d\n", (int)(inr-base), cur, mlen, opt[cur].rep[0], opt[cur].rep[1]); } else { -#if 1 - opt[cur].rep[3] = (opt[cur].off > 2) ? opt[cur-mlen].rep[2] : opt[cur-mlen].rep[3]; - opt[cur].rep[2] = (opt[cur].off > 1) ? opt[cur-mlen].rep[1] : opt[cur-mlen].rep[2]; - opt[cur].rep[1] = (opt[cur].off > 0) ? opt[cur-mlen].rep[0] : opt[cur-mlen].rep[1]; - opt[cur].rep[0] = opt[cur-mlen].rep[opt[cur].off]; - #if 1 if (cur!=mlen && opt[cur].litlen == 0) { - U32 temp = opt[cur].rep[0]; - opt[cur].rep[0] = opt[cur].rep[1]; - opt[cur].rep[1] = temp; + opt[cur].rep[0] = opt[cur-mlen].rep[1]; + opt[cur].rep[1] = opt[cur-mlen].rep[0]; + opt[cur].rep[2] = opt[cur-mlen].rep[2]; + opt[cur].rep[3] = opt[cur-mlen].rep[3]; + } else { + opt[cur].rep[3] = (opt[cur].off > 2) ? opt[cur-mlen].rep[2] : opt[cur-mlen].rep[3]; + opt[cur].rep[2] = (opt[cur].off > 1) ? opt[cur-mlen].rep[1] : opt[cur-mlen].rep[2]; + opt[cur].rep[1] = (opt[cur].off > 0) ? opt[cur-mlen].rep[0] : opt[cur-mlen].rep[1]; + opt[cur].rep[0] = opt[cur-mlen].rep[opt[cur].off]; } -#endif #else if (cur!=mlen && opt[cur].litlen == 0) { opt[cur].rep[3] = (opt[cur].off > 2) ? opt[cur-mlen].rep[2] : opt[cur-mlen].rep[3]; @@ -569,10 +568,13 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, mlen = (U32)ZSTD_count(inr+minMatch, inr+minMatch - opt[cur].rep[i], iend) + minMatch; ZSTD_LOG_PARSER("%d: Found REP %d/%d mlen=%d off=%d rep=%d opt[%d].off=%d\n", (int)(inr-base), i, ZSTD_REP_NUM, mlen, i, opt[cur].rep[i], cur, opt[cur].off); - best_off = i;//(i<=1 && opt[cur].mlen != 1) ? 1-i : i; + best_off = (i<=1 && opt[cur].mlen != 1) ? 1-i : i; if (mlen > sufficient_len || cur + mlen >= ZSTD_OPT_NUM) { best_mlen = mlen; +#if 1 + best_off = i; +#endif ZSTD_LOG_PARSER("%d: REP sufficient_len=%d best_mlen=%d best_off=%d last_pos=%d\n", (int)(inr-base), sufficient_len, best_mlen, best_off, last_pos); last_pos = cur + 1; goto _storeSequence; @@ -594,7 +596,11 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, do { if (cur + mlen > last_pos || price <= opt[cur + mlen].price) +#if 1 + SET_PRICE(cur + mlen, mlen, i, litlen, price); +#else SET_PRICE(cur + mlen, mlen, best_off, litlen, price); +#endif mlen--; } while (mlen >= minMatch); } @@ -684,37 +690,23 @@ _storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */ if (kSlotNew < 1) rep[1] = rep[0]; rep[kSlotNew] = offset - ZSTD_REP_MOVE; } else { - U32 temp = rep[offset]; - rep[3] = (offset > 2) ? rep[2] : rep[3]; - rep[2] = (offset > 1) ? rep[1] : rep[2]; - rep[1] = (offset > 0) ? rep[0] : rep[1]; - rep[0] = temp; - -#if 1 if (litLength == 0) { - temp = rep[0]; + U32 temp = rep[0]; rep[0] = rep[1]; rep[1] = temp; - } -#endif - - -/* if (offset != 0) { - size_t temp = rep[offset]; - if (offset != 1) { - if (offset == 3) rep[3] = rep[2]; - rep[2] = rep[1]; + if (offset<=1) offset = 1-offset; + } else { + if (offset != 0) { + size_t temp = rep[offset]; + if (offset != 1) { + if (offset == 3) rep[3] = rep[2]; + rep[2] = rep[1]; + } + rep[1] = rep[0]; + rep[0] = temp; } - rep[1] = rep[0]; - rep[0] = temp; } - - if (litLength == 0) { - best_off = rep[1]; - rep[1] = rep[0]; - rep[0] = best_off; - }*/ - } + } // ZSTD_LOG_ENCODE("%d/%d: ENCODE2 literals=%d mlen=%d off=%d rep1=%d rep[1]=%d\n", (int)(ip-base), (int)(iend-base), (int)(litLength), (int)mlen, (int)(offset), (int)rep[1], (int)rep_2); From 378be62a9e4e055c53eaa6a7956f0be49f8bae96 Mon Sep 17 00:00:00 2001 From: inikep Date: Mon, 21 Mar 2016 19:48:54 +0100 Subject: [PATCH 134/247] better repcodes --- lib/zstd_opt.h | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/lib/zstd_opt.h b/lib/zstd_opt.h index df65d690..cd1ef187 100644 --- a/lib/zstd_opt.h +++ b/lib/zstd_opt.h @@ -444,21 +444,23 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, opt[0].litlen = (U32)(ip - litstart); /* check repCode */ - if (MEM_readMINMATCH(ip+1, minMatch) == MEM_readMINMATCH(ip+1 - rep[0], minMatch)) { + for (int i=0; i sufficient_len || mlen >= ZSTD_OPT_NUM) { - ip+=1; best_mlen = mlen; best_off = 0; cur = 0; last_pos = 1; + best_mlen = mlen; best_off = i; cur = 0; last_pos = 1; goto _storeSequence; } - litlen = opt[0].litlen + 1; + litlen = opt[0].litlen; do { - price = ZSTD_getPrice(seqStorePtr, litlen, litstart, 0, mlen - minMatch); - if (mlen + 1 > last_pos || price < opt[mlen + 1].price) - SET_PRICE(mlen + 1, mlen, 0, litlen, price); /* note : macro modifies last_pos */ + price = ZSTD_getPrice(seqStorePtr, litlen, litstart, best_off, mlen - minMatch); + if (mlen > last_pos || price < opt[mlen].price) + SET_PRICE(mlen, mlen, i, litlen, price); /* note : macro modifies last_pos */ mlen--; } while (mlen >= minMatch); } @@ -531,7 +533,7 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, ZSTD_LOG_ENCODE("%d: COPYREP_OFF cur=%d mlen=%d rep=%d rep[1]=%d\n", (int)(inr-base), cur, mlen, opt[cur].rep[0], opt[cur].rep[1]); } else { #if 1 - if (cur!=mlen && opt[cur].litlen == 0) { + if (opt[cur].litlen == 0) { opt[cur].rep[0] = opt[cur-mlen].rep[1]; opt[cur].rep[1] = opt[cur-mlen].rep[0]; opt[cur].rep[2] = opt[cur-mlen].rep[2]; @@ -571,10 +573,8 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, best_off = (i<=1 && opt[cur].mlen != 1) ? 1-i : i; if (mlen > sufficient_len || cur + mlen >= ZSTD_OPT_NUM) { - best_mlen = mlen; -#if 1 + best_mlen = mlen; best_off = i; -#endif ZSTD_LOG_PARSER("%d: REP sufficient_len=%d best_mlen=%d best_off=%d last_pos=%d\n", (int)(inr-base), sufficient_len, best_mlen, best_off, last_pos); last_pos = cur + 1; goto _storeSequence; @@ -596,11 +596,7 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, do { if (cur + mlen > last_pos || price <= opt[cur + mlen].price) -#if 1 SET_PRICE(cur + mlen, mlen, i, litlen, price); -#else - SET_PRICE(cur + mlen, mlen, best_off, litlen, price); -#endif mlen--; } while (mlen >= minMatch); } From f2fa0e11982e9c38aca83c6fa3931c1b18a30c7d Mon Sep 17 00:00:00 2001 From: inikep Date: Mon, 21 Mar 2016 20:14:03 +0100 Subject: [PATCH 135/247] final repcodes for ZSTD_btopt --- lib/zstd_decompress.c | 10 ++------ lib/zstd_opt.h | 55 +++++++++++-------------------------------- 2 files changed, 16 insertions(+), 49 deletions(-) diff --git a/lib/zstd_decompress.c b/lib/zstd_decompress.c index 500157ef..f0475bf2 100644 --- a/lib/zstd_decompress.c +++ b/lib/zstd_decompress.c @@ -683,14 +683,8 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState, const U32 mls) if (offsetCode==0) offset = 0; if (offset < ZSTD_REP_NUM) { - if (litLength == 0) { - if (offset <= 1) offset = 1-offset; - offset = seqState->prevOffset[offset]; - size_t temp = seqState->prevOffset[1]; - seqState->prevOffset[1] = seqState->prevOffset[0]; - seqState->prevOffset[0] = temp; - } - else + if (litLength == 0 && offset <= 1) offset = 1-offset; + if (offset != 0) { size_t temp = seqState->prevOffset[offset]; if (offset != 1) { diff --git a/lib/zstd_opt.h b/lib/zstd_opt.h index cd1ef187..c41c8c88 100644 --- a/lib/zstd_opt.h +++ b/lib/zstd_opt.h @@ -532,33 +532,11 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, opt[cur].rep[kSlotNew] = opt[cur].off - ZSTD_REP_MOVE; ZSTD_LOG_ENCODE("%d: COPYREP_OFF cur=%d mlen=%d rep=%d rep[1]=%d\n", (int)(inr-base), cur, mlen, opt[cur].rep[0], opt[cur].rep[1]); } else { -#if 1 - if (opt[cur].litlen == 0) { - opt[cur].rep[0] = opt[cur-mlen].rep[1]; - opt[cur].rep[1] = opt[cur-mlen].rep[0]; - opt[cur].rep[2] = opt[cur-mlen].rep[2]; - opt[cur].rep[3] = opt[cur-mlen].rep[3]; - } else { - opt[cur].rep[3] = (opt[cur].off > 2) ? opt[cur-mlen].rep[2] : opt[cur-mlen].rep[3]; - opt[cur].rep[2] = (opt[cur].off > 1) ? opt[cur-mlen].rep[1] : opt[cur-mlen].rep[2]; - opt[cur].rep[1] = (opt[cur].off > 0) ? opt[cur-mlen].rep[0] : opt[cur-mlen].rep[1]; - opt[cur].rep[0] = opt[cur-mlen].rep[opt[cur].off]; - } -#else - if (cur!=mlen && opt[cur].litlen == 0) { - opt[cur].rep[3] = (opt[cur].off > 2) ? opt[cur-mlen].rep[2] : opt[cur-mlen].rep[3]; - opt[cur].rep[2] = (opt[cur].off > 1) ? opt[cur-mlen].rep[1] : opt[cur-mlen].rep[2]; - opt[cur].rep[0] = (opt[cur].off > 0) ? opt[cur-mlen].rep[0] : opt[cur-mlen].rep[1]; - opt[cur].rep[1] = opt[cur-mlen].rep[opt[cur].off]; - ZSTD_LOG_ENCODE("%d: COPYREP_SWI cur=%d mlen=%d rep=%d rep[1]=%d\n", (int)(inr-base), cur, mlen, opt[cur].rep[0], opt[cur].rep[1]); - } else { - opt[cur].rep[3] = (opt[cur].off > 2) ? opt[cur-mlen].rep[2] : opt[cur-mlen].rep[3]; - opt[cur].rep[2] = (opt[cur].off > 1) ? opt[cur-mlen].rep[1] : opt[cur-mlen].rep[2]; - opt[cur].rep[1] = (opt[cur].off > 0) ? opt[cur-mlen].rep[0] : opt[cur-mlen].rep[1]; - opt[cur].rep[0] = opt[cur-mlen].rep[opt[cur].off]; - ZSTD_LOG_ENCODE("%d: COPYREP_NOR cur=%d mlen=%d rep=%d rep[1]=%d\n", (int)(inr-base), cur, mlen, opt[cur].rep[0], opt[cur].rep[1]); - } -#endif + opt[cur].rep[3] = (opt[cur].off > 2) ? opt[cur-mlen].rep[2] : opt[cur-mlen].rep[3]; + opt[cur].rep[2] = (opt[cur].off > 1) ? opt[cur-mlen].rep[1] : opt[cur-mlen].rep[2]; + opt[cur].rep[1] = (opt[cur].off > 0) ? opt[cur-mlen].rep[0] : opt[cur-mlen].rep[1]; + opt[cur].rep[0] = opt[cur-mlen].rep[opt[cur].off]; + ZSTD_LOG_ENCODE("%d: COPYREP_NOR cur=%d mlen=%d rep=%d rep[1]=%d\n", (int)(inr-base), cur, mlen, opt[cur].rep[0], opt[cur].rep[1]); } ZSTD_LOG_PARSER("%d: CURRENT_NoExt price[%d/%d]=%d off=%d mlen=%d litlen=%d rep=%d rep[1]=%d\n", (int)(inr-base), cur, last_pos, opt[cur].price, opt[cur].off, opt[cur].mlen, opt[cur].litlen, opt[cur].rep[0], opt[cur].rep[1]); @@ -686,22 +664,17 @@ _storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */ if (kSlotNew < 1) rep[1] = rep[0]; rep[kSlotNew] = offset - ZSTD_REP_MOVE; } else { - if (litLength == 0) { - U32 temp = rep[0]; - rep[0] = rep[1]; - rep[1] = temp; - if (offset<=1) offset = 1-offset; - } else { - if (offset != 0) { - size_t temp = rep[offset]; - if (offset != 1) { - if (offset == 3) rep[3] = rep[2]; - rep[2] = rep[1]; - } - rep[1] = rep[0]; - rep[0] = temp; + if (offset != 0) { + size_t temp = rep[offset]; + if (offset != 1) { + if (offset == 3) rep[3] = rep[2]; + rep[2] = rep[1]; } + rep[1] = rep[0]; + rep[0] = temp; } + + if (litLength == 0 && offset<=1) offset = 1-offset; } // ZSTD_LOG_ENCODE("%d/%d: ENCODE2 literals=%d mlen=%d off=%d rep1=%d rep[1]=%d\n", (int)(ip-base), (int)(iend-base), (int)(litLength), (int)mlen, (int)(offset), (int)rep[1], (int)rep_2); From 5a854af006bf2ab28133c4cf3ec3d30c53feb73a Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Tue, 22 Mar 2016 00:22:50 +0100 Subject: [PATCH 136/247] Fixed #153, reported by @thatsafunnyname --- programs/Makefile | 22 +++++++++++----------- programs/zstdcli.c | 30 +++++++++++++++--------------- 2 files changed, 26 insertions(+), 26 deletions(-) diff --git a/programs/Makefile b/programs/Makefile index ca2ba99f..6aca9768 100644 --- a/programs/Makefile +++ b/programs/Makefile @@ -215,19 +215,19 @@ test-zbuff: zbufftest test-zbuff32: zbufftest32 ./zbufftest32 $(ZBUFFTEST) +valgrindTest: VALGRIND = valgrind --leak-check=full --error-exitcode=1 valgrindTest: zstd datagen fuzzer fullbench zbufftest @echo "\n ---- valgrind tests : memory analyzer ----" - valgrind --leak-check=yes --error-exitcode=1 ./datagen -g50M > $(VOID) - ./datagen -g16KB > tmp - valgrind --leak-check=yes --error-exitcode=1 ./zstd -vf tmp -o $(VOID) - ./datagen -g2930KB > tmp - valgrind --leak-check=yes --error-exitcode=1 ./zstd -5 -vf tmp -o tmp2 - valgrind --leak-check=yes --error-exitcode=1 ./zstd -vdf tmp2 -o $(VOID) - ./datagen -g64MB > tmp - valgrind --leak-check=yes --error-exitcode=1 ./zstd -vf tmp -o $(VOID) + $(VALGRIND) ./datagen -g50M > $(VOID) + $(VALGRIND) ./zstd ; if [ $$? -eq 0 ] ; then echo "zstd without argument should have failed"; false; fi + ./datagen -g80 | $(VALGRIND) ./zstd - -c > $(VOID) + ./datagen -g16KB | $(VALGRIND) ./zstd -vf - -o $(VOID) + ./datagen -g2930KB | $(VALGRIND) ./zstd -5 -vf - -o tmp + $(VALGRIND) ./zstd -vdf tmp -o $(VOID) + ./datagen -g64MB | $(VALGRIND) ./zstd -vf - -o $(VOID) @rm tmp - valgrind --leak-check=yes --error-exitcode=1 ./fuzzer -T1mn -t1 - valgrind --leak-check=yes --error-exitcode=1 ./fullbench -i1 - valgrind --leak-check=yes --error-exitcode=1 ./zbufftest -T1mn + $(VALGRIND) ./fuzzer -T1mn -t1 + $(VALGRIND) ./fullbench -i1 + $(VALGRIND) ./zbufftest -T1mn endif diff --git a/programs/zstdcli.c b/programs/zstdcli.c index adfaa793..86d6bff7 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -167,6 +167,8 @@ static void waitEnter(void) } +#define CLEAN_RETURN(i) { operationResult = (i); goto _end; } + int main(int argCount, const char** argv) { int i, @@ -211,8 +213,8 @@ int main(int argCount, const char** argv) /* long commands (--long-word) */ if (!strcmp(argument, "--decompress")) { decode=1; continue; } if (!strcmp(argument, "--force")) { FIO_overwriteMode(); continue; } - if (!strcmp(argument, "--version")) { displayOut=stdout; DISPLAY(WELCOME_MESSAGE); return 0; } - if (!strcmp(argument, "--help")) { displayOut=stdout; return usage_advanced(programName); } + if (!strcmp(argument, "--version")) { displayOut=stdout; DISPLAY(WELCOME_MESSAGE); CLEAN_RETURN(0); } + if (!strcmp(argument, "--help")) { displayOut=stdout; CLEAN_RETURN(usage_advanced(programName)); } if (!strcmp(argument, "--verbose")) { displayLevel=4; continue; } if (!strcmp(argument, "--quiet")) { displayLevel--; continue; } if (!strcmp(argument, "--stdout")) { forceStdout=1; outFileName=stdoutmark; displayLevel=1; continue; } @@ -244,16 +246,16 @@ int main(int argCount, const char** argv) } dictCLevel = cLevel; if (dictCLevel > ZSTD_maxCLevel()) - return badusage(programName); + CLEAN_RETURN(badusage(programName)); continue; } switch(argument[0]) { /* Display help */ - case 'V': displayOut=stdout; DISPLAY(WELCOME_MESSAGE); return 0; /* Version Only */ + case 'V': displayOut=stdout; DISPLAY(WELCOME_MESSAGE); CLEAN_RETURN(0); /* Version Only */ case 'H': - case 'h': displayOut=stdout; return usage_advanced(programName); + case 'h': displayOut=stdout; CLEAN_RETURN(usage_advanced(programName)); /* Decoding */ case 'd': decode=1; argument++; break; @@ -288,8 +290,7 @@ int main(int argCount, const char** argv) /* Modify Nb Iterations (benchmark only) */ case 'i': - { - int iters= 0; + { U32 iters= 0; argument++; while ((*argument >='0') && (*argument <='9')) iters *= 10, iters += *argument++ - '0'; @@ -299,8 +300,7 @@ int main(int argCount, const char** argv) /* cut input into blocks (benchmark only) */ case 'B': - { - size_t bSize = 0; + { size_t bSize = 0; argument++; while ((*argument >='0') && (*argument <='9')) bSize *= 10, bSize += *argument++ - '0'; @@ -329,11 +329,11 @@ int main(int argCount, const char** argv) case 'p': main_pause=1; argument++; break; /* unknown command */ - default : return badusage(programName); + default : CLEAN_RETURN(badusage(programName)); } } continue; - } + } /* if (argument[0]=='-') */ if (nextEntryIsDictionary) { nextEntryIsDictionary = 0; @@ -389,17 +389,17 @@ int main(int argCount, const char** argv) if(!filenameIdx) filenameIdx=1, filenameTable[0]=stdinmark, outFileName=stdoutmark; /* Check if input/output defined as console; trigger an error in this case */ - if (!strcmp(filenameTable[0], stdinmark) && IS_CONSOLE(stdin) ) return badusage(programName); - if (outFileName && !strcmp(outFileName, stdoutmark) && IS_CONSOLE(stdout) && !forceStdout) return badusage(programName); + if (!strcmp(filenameTable[0], stdinmark) && IS_CONSOLE(stdin) ) CLEAN_RETURN(badusage(programName)); + if (outFileName && !strcmp(outFileName, stdoutmark) && IS_CONSOLE(stdout) && !forceStdout) CLEAN_RETURN(badusage(programName)); /* user-selected output filename, only possible with a single file */ if (outFileName && strcmp(outFileName,stdoutmark) && strcmp(outFileName,nulmark) && (filenameIdx>1)) { DISPLAY("Too many files (%u) on the command line. \n", filenameIdx); - return filenameIdx; + CLEAN_RETURN(filenameIdx); } /* No warning message in pipe mode (stdin + stdout) or multiple mode */ - if (!strcmp(filenameTable[0], stdinmark) && !strcmp(outFileName,stdoutmark) && (displayLevel==2)) displayLevel=1; + if (!strcmp(filenameTable[0], stdinmark) && outFileName && !strcmp(outFileName,stdoutmark) && (displayLevel==2)) displayLevel=1; if ((filenameIdx>1) && (displayLevel==2)) displayLevel=1; /* IO Stream/File */ From 2b942881ac258727cbc9870e27c7d60206350a17 Mon Sep 17 00:00:00 2001 From: inikep Date: Tue, 22 Mar 2016 11:56:22 +0100 Subject: [PATCH 137/247] improved 4reps in ZSTD_compressBlock_lazy_generic kSlotNew = 0 --- lib/zstd_compress.c | 14 +++++++------- lib/zstd_decompress.c | 11 +++++++++-- lib/zstd_internal.h | 2 +- programs/Makefile | 2 +- programs/bench.c | 16 ++++++++-------- 5 files changed, 26 insertions(+), 19 deletions(-) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index 397e7618..e9b9d4a6 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -1617,8 +1617,8 @@ void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx, if (depth==0) goto _storeSequence; } else { size_t mlRep = ZSTD_count(ip+MINMATCH, ip+MINMATCH-rep[i], iend) + MINMATCH; - int gain2 = (int)(mlRep * 3 /*- ZSTD_highbit((U32)i+1)*/); - int gain1 = (int)(matchLength*3 - /*ZSTD_highbit((U32)offset+1)*/ + 1); + int gain2 = (int)(mlRep * 3 /*- ZSTD_highbit((U32)i+1)*/ + (i==1)); + int gain1 = (int)(matchLength*3 - /*ZSTD_highbit((U32)offset+1)*/ + 1 + (offset==1)); if (gain2 > gain1) matchLength = mlRep, offset = i; } @@ -1642,10 +1642,10 @@ void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx, while (ip= ZSTD_REP_NUM) && (MEM_read32(ip) == MEM_read32(ip - rep[i]))) { + if (MEM_read32(ip) == MEM_read32(ip - rep[i])) { size_t mlRep = ZSTD_count(ip+MINMATCH, ip+MINMATCH-rep[i], iend) + MINMATCH; int gain2 = (int)(mlRep * 3); - int gain1 = (int)(matchLength*3 - ZSTD_highbit((U32)offset+1) + 1); + int gain1 = (int)(matchLength*3 - ZSTD_highbit((U32)offset+1) + 1 + (offset= MINMATCH) && (gain2 > gain1)) matchLength = mlRep, offset = i, start = ip; } @@ -1663,10 +1663,10 @@ void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx, if ((depth==2) && (ip= ZSTD_REP_NUM) && (MEM_read32(ip) == MEM_read32(ip - rep[i]))) { + if (MEM_read32(ip) == MEM_read32(ip - rep[i])) { size_t ml2 = ZSTD_count(ip+MINMATCH, ip+MINMATCH-rep[i], iend) + MINMATCH; int gain2 = (int)(ml2 * 4); - int gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 1); + int gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 1 + (offset= MINMATCH) && (gain2 > gain1)) matchLength = ml2, offset = i, start = ip; } @@ -1693,7 +1693,7 @@ _storeSequence: { #if ZSTD_REP_NUM == 4 if (offset >= ZSTD_REP_NUM) { -#if 0 +#if 1 rep[3] = rep[2]; rep[2] = rep[1]; rep[1] = rep[0]; diff --git a/lib/zstd_decompress.c b/lib/zstd_decompress.c index f0475bf2..651087c4 100644 --- a/lib/zstd_decompress.c +++ b/lib/zstd_decompress.c @@ -691,19 +691,26 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState, const U32 mls) if (offset == 3) seqState->prevOffset[3] = seqState->prevOffset[2]; seqState->prevOffset[2] = seqState->prevOffset[1]; } - offset = temp; seqState->prevOffset[1] = seqState->prevOffset[0]; - seqState->prevOffset[0] = offset; + seqState->prevOffset[0] = offset = temp; + } else { offset = seqState->prevOffset[0]; } } else { offset -= ZSTD_REP_MOVE; +#if 1 // faster without kSlotNew + seqState->prevOffset[3] = seqState->prevOffset[2]; + seqState->prevOffset[2] = seqState->prevOffset[1]; + seqState->prevOffset[1] = seqState->prevOffset[0]; + seqState->prevOffset[0] = offset; +#else if (kSlotNew < 3) seqState->prevOffset[3] = seqState->prevOffset[2]; if (kSlotNew < 2) seqState->prevOffset[2] = seqState->prevOffset[1]; if (kSlotNew < 1) seqState->prevOffset[1] = seqState->prevOffset[0]; seqState->prevOffset[kSlotNew] = offset; +#endif } #else // ZSTD_REP_NUM == 1 #if 0 diff --git a/lib/zstd_internal.h b/lib/zstd_internal.h index 0abdcdfa..5809f875 100644 --- a/lib/zstd_internal.h +++ b/lib/zstd_internal.h @@ -71,7 +71,7 @@ #else #define ZSTD_REP_NUM 1 #define ZSTD_REP_INIT 2 - #define ZSTD_REP_MOVE 0//(ZSTD_REP_NUM-1) + #define ZSTD_REP_MOVE 0 //(ZSTD_REP_NUM-1) #endif #define KB *(1 <<10) diff --git a/programs/Makefile b/programs/Makefile index ca2ba99f..cd432f79 100644 --- a/programs/Makefile +++ b/programs/Makefile @@ -46,7 +46,7 @@ DESTDIR?= PREFIX ?= /usr/local CPPFLAGS= -I../lib -DZSTD_VERSION=\"$(VERSION)\" CFLAGS ?= -O3 # -falign-loops=32 # not always beneficial -CFLAGS += -std=c99 -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow -Wstrict-aliasing=1 -Wswitch-enum -Wstrict-prototypes -Wundef +CFLAGS += -std=gnu99 -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow -Wstrict-aliasing=1 -Wswitch-enum -Wstrict-prototypes -Wundef FLAGS = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) $(MOREFLAGS) BINDIR = $(PREFIX)/bin diff --git a/programs/bench.c b/programs/bench.c index 5808c2ec..20f2ee0a 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -52,14 +52,14 @@ #include /* fprintf, fopen, ftello64 */ #include /* stat64 */ #include /* stat64 */ -#include /* clock_t, clock, CLOCKS_PER_SEC */ +#include /* clock_t, clock, nanosleep, CLOCKS_PER_SEC */ #ifdef WINDOWS - #define mili_sleep(mili) Sleep(mili) + #define mili_sleep(mili) Sleep(mili) #else - #define mili_sleep(mili) usleep(mili*1000) #include /* setpriority */ + #define mili_sleep(mili) { struct timespec t; t.tv_sec=0; t.tv_nsec=mili*1000000L; nanosleep(&t, NULL); } #endif - + /* sleep : posix - windows - others */ #if !defined(_WIN32) && (defined(__unix__) || defined(__unix) || (defined(__APPLE__) && defined(__MACH__))) # include @@ -379,11 +379,11 @@ _findError: if (crcOrig == crcCheck) { - DISPLAYLEVEL(2, "%2i-%-17.17s :%10i ->%10i (%5.3f),%6.1f MB/s ,%6.1f MB/s \n", cLevel, displayName, (int)srcSize, (int)cSize, ratio, (double)srcSize / fastestC / 1000., (double)srcSize / fastestD / 1000.); result->ratio = ratio; result->cSize = cSize; - result->cSpeed = (double)srcSize / fastestC / 1000.; - result->dSpeed = (double)srcSize / fastestD / 1000.; + result->cSpeed = (double)srcSize / 1000000. / (fastestC / CLOCKS_PER_SEC); + result->dSpeed = (double)srcSize / 1000000. / (fastestD / CLOCKS_PER_SEC); + DISPLAYLEVEL(2, "%2i-%-17.17s :%10i ->%10i (%5.3f),%6.1f MB/s ,%6.1f MB/s \n", cLevel, displayName, (int)srcSize, (int)cSize, ratio, result->cSpeed, result->dSpeed); } else DISPLAYLEVEL(2, "%2i-\n", cLevel); @@ -449,7 +449,7 @@ static void BMK_benchCLevel(void* srcBuffer, size_t benchedSize, dictBuffer, dictBufferSize, &result); if (g_displayLevel == 1) { if (additionalParam) - DISPLAY("%-3i%11i (%5.3f) %6.1f MB/s %6.1f MB/s %s (p=%d)\n", -l, (int)result.cSize, result.ratio, result.cSpeed, result.dSpeed, displayName, additionalParam); + DISPLAY("%-3i%11i (%5.3f) %6.1f MB/s %6.1f MB/s %s (param=%d)\n", -l, (int)result.cSize, result.ratio, result.cSpeed, result.dSpeed, displayName, additionalParam); else DISPLAY("%-3i%11i (%5.3f) %6.1f MB/s %6.1f MB/s %s\n", -l, (int)result.cSize, result.ratio, result.cSpeed, result.dSpeed, displayName); total.cSize += result.cSize; From fadda6c875f5eb20a16c1f97d8589c8e850e99f2 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Tue, 22 Mar 2016 12:14:26 +0100 Subject: [PATCH 138/247] first prototype with ML encoding scheme (but not yet decoding scheme) --- lib/zstd_compress.c | 289 ++++++++++++++++++++------------------------ lib/zstd_internal.h | 19 ++- programs/bench.c | 2 +- 3 files changed, 148 insertions(+), 162 deletions(-) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index f098c02a..d0cd50ea 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -184,7 +184,7 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc, const size_t blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << params.windowLog); const U32 divider = (params.searchLength==3) ? 3 : 4; const size_t maxNbSeq = blockSize / divider; - const size_t tokenSpace = blockSize + 10*maxNbSeq; + const size_t tokenSpace = blockSize + 12*maxNbSeq; const size_t contentSize = (params.strategy == ZSTD_fast) ? 0 : (1 << params.contentLog); const size_t hSize = 1 << params.hashLog; const size_t h3Size = (params.searchLength==3) ? (1 << HASHLOG3) : 0; @@ -222,11 +222,12 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc, zc->seqStore.offsetStart = (U32*) (zc->seqStore.buffer); zc->seqStore.litLengthStart = (U16*) (void*)(zc->seqStore.offsetStart + maxNbSeq); - zc->seqStore.llCodeStart = (BYTE*) (zc->seqStore.litLengthStart + maxNbSeq); - zc->seqStore.offCodeStart = zc->seqStore.llCodeStart + maxNbSeq; + zc->seqStore.matchLengthStart = (U16*) (void*)(zc->seqStore.litLengthStart + maxNbSeq); + zc->seqStore.llCodeStart = (BYTE*) (zc->seqStore.matchLengthStart + maxNbSeq); + zc->seqStore.mlCodeStart = zc->seqStore.llCodeStart + maxNbSeq; + zc->seqStore.offCodeStart = zc->seqStore.mlCodeStart + maxNbSeq; zc->seqStore.litStart = zc->seqStore.offCodeStart + maxNbSeq; - zc->seqStore.matchLengthStart = zc->seqStore.litStart + blockSize; - zc->seqStore.dumpsStart = zc->seqStore.matchLengthStart + maxNbSeq; + zc->seqStore.dumpsStart = zc->seqStore.litStart + maxNbSeq; if (params.strategy == ZSTD_btopt) { zc->seqStore.litFreq = (U32*)((void*)(zc->seqStore.dumpsStart + maxNbSeq)); zc->seqStore.litLengthFreq = zc->seqStore.litFreq + (1<matchlengthCTable; U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */ U16* const llTable = seqStorePtr->litLengthStart; - const BYTE* const mlTable = seqStorePtr->matchLengthStart; + U16* const mlTable = seqStorePtr->matchLengthStart; const U32* const offsetTable = seqStorePtr->offsetStart; const U32* const offsetTableEnd = seqStorePtr->offset; BYTE* const offCodeTable = seqStorePtr->offCodeStart; BYTE* const llCodeTable = seqStorePtr->llCodeStart; + BYTE* const mlCodeTable = seqStorePtr->mlCodeStart; BYTE* const ostart = (BYTE*)dst; BYTE* const oend = ostart + dstCapacity; BYTE* op = ostart; size_t const nbSeq = offsetTableEnd - offsetTable; BYTE* seqHead; + static U32 blockNb = 0; + blockNb++; + + if (blockNb==79) + blockNb += !nbSeq; + /* Compress literals */ { const BYTE* const literals = seqStorePtr->litStart; size_t const litSize = seqStorePtr->lit - literals; @@ -644,88 +652,106 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, 23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24 }; - const BYTE deltaCode = 19; - size_t i; - for (i=0; ilitLengthLong; llTable[i] = (U16)ll; } - llCodeTable[i] = (ll>63) ? ZSTD_highbit(ll) + deltaCode : LL_Code[ll]; + const BYTE LL_deltaCode = 19; + size_t u; + for (u=0; ulongLength; llTable[u] = (U16)ll; } + llCodeTable[u] = (ll>63) ? ZSTD_highbit(ll) + LL_deltaCode : LL_Code[ll]; } } /* CTable for Literal Lengths */ - { U32 max = MaxLL; - size_t const mostFrequent = FSE_countFast(count, &max, llCodeTable, nbSeq); - if ((mostFrequent == nbSeq) && (nbSeq > 2)) { - *op++ = llCodeTable[0]; - FSE_buildCTable_rle(CTable_LitLength, (BYTE)max); - LLtype = FSE_ENCODING_RLE; - } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { - LLtype = FSE_ENCODING_STATIC; - } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (LL_defaultNormLog-1)))) { - FSE_buildCTable(CTable_LitLength, LL_defaultNorm, MaxLL, LL_defaultNormLog); - LLtype = FSE_ENCODING_RAW; - } else { - size_t NCountSize; - size_t nbSeq_1 = nbSeq; - const U32 tableLog = FSE_optimalTableLog(LLFSELog, nbSeq, max); - if (count[llCodeTable[nbSeq-1]]>1) { count[llCodeTable[nbSeq-1]]--; nbSeq_1--; } - FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max); - NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ - if (FSE_isError(NCountSize)) return ERROR(GENERIC); - op += NCountSize; - FSE_buildCTable(CTable_LitLength, norm, max, tableLog); - LLtype = FSE_ENCODING_DYNAMIC; - }} + { U32 max = MaxLL; + size_t const mostFrequent = FSE_countFast(count, &max, llCodeTable, nbSeq); + if ((mostFrequent == nbSeq) && (nbSeq > 2)) { + *op++ = llCodeTable[0]; + FSE_buildCTable_rle(CTable_LitLength, (BYTE)max); + LLtype = FSE_ENCODING_RLE; + } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { + LLtype = FSE_ENCODING_STATIC; + } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (LL_defaultNormLog-1)))) { + FSE_buildCTable(CTable_LitLength, LL_defaultNorm, MaxLL, LL_defaultNormLog); + LLtype = FSE_ENCODING_RAW; + } else { + size_t NCountSize; + size_t nbSeq_1 = nbSeq; + const U32 tableLog = FSE_optimalTableLog(LLFSELog, nbSeq, max); + if (count[llCodeTable[nbSeq-1]]>1) { count[llCodeTable[nbSeq-1]]--; nbSeq_1--; } + FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max); + NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ + if (FSE_isError(NCountSize)) return ERROR(GENERIC); + op += NCountSize; + FSE_buildCTable(CTable_LitLength, norm, max, tableLog); + LLtype = FSE_ENCODING_DYNAMIC; + } } /* Offset codes */ { size_t i; for (i=0; i 2)) { - *op++ = offCodeTable[0]; - FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max); - Offtype = FSE_ENCODING_RLE; - } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { - Offtype = FSE_ENCODING_STATIC; - } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (Offbits-1)))) { - FSE_buildCTable_raw(CTable_OffsetBits, Offbits); - Offtype = FSE_ENCODING_RAW; - } else { - size_t NCountSize; - size_t nbSeq_1 = nbSeq; - const U32 tableLog = FSE_optimalTableLog(OffFSELog, nbSeq, max); - if (count[offCodeTable[nbSeq-1]]>1) { count[offCodeTable[nbSeq-1]]--; nbSeq_1--; } - FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max); - NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ - if (FSE_isError(NCountSize)) return ERROR(GENERIC); - op += NCountSize; - FSE_buildCTable(CTable_OffsetBits, norm, max, tableLog); - Offtype = FSE_ENCODING_DYNAMIC; - }} + { U32 max = MaxOff; + size_t const mostFrequent = FSE_countFast(count, &max, offCodeTable, nbSeq); + if ((mostFrequent == nbSeq) && (nbSeq > 2)) { + *op++ = offCodeTable[0]; + FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max); + Offtype = FSE_ENCODING_RLE; + } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { + Offtype = FSE_ENCODING_STATIC; + } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (Offbits-1)))) { + FSE_buildCTable_raw(CTable_OffsetBits, Offbits); + Offtype = FSE_ENCODING_RAW; + } else { + size_t NCountSize; + size_t nbSeq_1 = nbSeq; + const U32 tableLog = FSE_optimalTableLog(OffFSELog, nbSeq, max); + if (count[offCodeTable[nbSeq-1]]>1) { count[offCodeTable[nbSeq-1]]--; nbSeq_1--; } + FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max); + NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ + if (FSE_isError(NCountSize)) return ERROR(GENERIC); + op += NCountSize; + FSE_buildCTable(CTable_OffsetBits, norm, max, tableLog); + Offtype = FSE_ENCODING_DYNAMIC; + } } + + /* ML codes */ + { static const BYTE ML_Code[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 20, 31, + 32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37, + 38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39, + 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, + 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 }; + const BYTE ML_deltaCode = 36; + size_t u; + for (u=0; ulongLength; mlTable[u] = (U16)ml; } + mlCodeTable[u] = (ml>127) ? ZSTD_highbit(ml) + ML_deltaCode : ML_Code[ml]; + } } /* CTable for MatchLengths */ - { U32 max = MaxML; - size_t const mostFrequent = FSE_countFast(count, &max, mlTable, nbSeq); - if ((mostFrequent == nbSeq) && (nbSeq > 2)) { - *op++ = *mlTable; - FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max); - MLtype = FSE_ENCODING_RLE; - } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { - MLtype = FSE_ENCODING_STATIC; - } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (MLbits-1)))) { - FSE_buildCTable_raw(CTable_MatchLength, MLbits); - MLtype = FSE_ENCODING_RAW; - } else { - size_t NCountSize; - const U32 tableLog = FSE_optimalTableLog(MLFSELog, nbSeq, max); - FSE_normalizeCount(norm, tableLog, count, nbSeq, max); - NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ - if (FSE_isError(NCountSize)) return ERROR(GENERIC); - op += NCountSize; - FSE_buildCTable(CTable_MatchLength, norm, max, tableLog); - MLtype = FSE_ENCODING_DYNAMIC; - }} + { U32 max = MaxML; + size_t const mostFrequent = FSE_countFast(count, &max, mlCodeTable, nbSeq); + if ((mostFrequent == nbSeq) && (nbSeq > 2)) { + *op++ = *mlTable; + FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max); + MLtype = FSE_ENCODING_RLE; + } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { + MLtype = FSE_ENCODING_STATIC; + } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (ML_defaultNormLog-1)))) { + FSE_buildCTable(CTable_MatchLength, ML_defaultNorm, MaxML, ML_defaultNormLog); + MLtype = FSE_ENCODING_RAW; + } else { + size_t nbSeq_1 = nbSeq; + const U32 tableLog = FSE_optimalTableLog(MLFSELog, nbSeq, max); + if (count[mlCodeTable[nbSeq-1]]>1) { count[mlCodeTable[nbSeq-1]]--; nbSeq_1--; } + FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max); + { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ + if (FSE_isError(NCountSize)) return ERROR(GENERIC); + op += NCountSize; } + FSE_buildCTable(CTable_MatchLength, norm, max, tableLog); + MLtype = FSE_ENCODING_DYNAMIC; + } } seqHead[0] += (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2)); zc->flagStaticTables = 0; @@ -739,55 +765,34 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, { size_t const errorCode = BIT_initCStream(&blockStream, op, oend-op); if (ERR_isError(errorCode)) return ERROR(dstSize_tooSmall); } /* not enough space remaining */ -#if 1 /* first symbols */ - FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlTable[nbSeq-1]); + FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]); FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, offCodeTable[nbSeq-1]); FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]); + BIT_addBits(&blockStream, mlTable[nbSeq-1], ML_bits[mlCodeTable[nbSeq-1]]); BIT_addBits(&blockStream, offsetTable[nbSeq-1], offCodeTable[nbSeq-1] ? (offCodeTable[nbSeq-1]-1) : 0); BIT_addBits(&blockStream, llTable[nbSeq-1], LL_bits[llCodeTable[nbSeq-1]]); BIT_flushBits(&blockStream); - { size_t n; - for (n=nbSeq-2; n=0; i--) { - const BYTE mlCode = mlTable[i]; - const U32 offset = offsetTable[i]; - const BYTE offCode = offCodeTable[i]; /* 32b*/ /* 64b*/ - const U32 nbBits = (offCode-1) + (!offCode); - const BYTE litLength = llTable[i]; /* (7)*/ /* (7)*/ - FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode); /* 17 */ /* 17 */ - if (MEM_32bits()) BIT_flushBits(&blockStream); /* 7 */ - FSE_encodeSymbol(&blockStream, &stateLitLength, litLength); /* 17 */ /* 27 */ - FSE_encodeSymbol(&blockStream, &stateOffsetBits, offCode); /* 26 */ /* 36 */ - if (MEM_32bits()) BIT_flushBits(&blockStream); /* 7 */ - BIT_addBits(&blockStream, offset, nbBits); /* 31 */ /* 62 */ /* 24 bits max in 32-bits mode */ - BIT_flushBits(&blockStream); /* 7 */ /* 7 */ - }} -#endif // 0 + { size_t n; + for (n=nbSeq-2 ; n= maxCSize) return 0; - } + { size_t const minGain = ZSTD_minGain(srcSize); + size_t const maxCSize = srcSize - minGain; + if ((size_t)(op-ostart) >= maxCSize) return 0; } return op - ostart; } @@ -836,44 +840,15 @@ MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const B seqStorePtr->lit += litLength; /* literal Length */ -#if 1 - if (litLength>=65535) { *(seqStorePtr->litLength++) = 65535; seqStorePtr->litLengthLong = (U32)litLength; } + if (litLength>=65535) { *(seqStorePtr->litLength++) = 65535; seqStorePtr->longLength = (U32)litLength; } else *seqStorePtr->litLength++ = (U16)litLength; -#else - if (litLength >= MaxLL) { - *(seqStorePtr->litLength++) = MaxLL; - if (litLength<255 + MaxLL) { - *(seqStorePtr->dumps++) = (BYTE)(litLength - MaxLL); - } else { - *(seqStorePtr->dumps++) = 255; - if (litLength < (1<<15)) { - MEM_writeLE16(seqStorePtr->dumps, (U16)(litLength<<1)); - seqStorePtr->dumps += 2; - } else { - MEM_writeLE32(seqStorePtr->dumps, (U32)((litLength<<1)+1)); - seqStorePtr->dumps += 3; - } } } - else *(seqStorePtr->litLength++) = (BYTE)litLength; -#endif // 0 /* match offset */ *(seqStorePtr->offset++) = (U32)offsetCode; /* match Length */ - if (matchCode >= MaxML) { - *(seqStorePtr->matchLength++) = MaxML; - if (matchCode < 255+MaxML) { - *(seqStorePtr->dumps++) = (BYTE)(matchCode - MaxML); - } else { - *(seqStorePtr->dumps++) = 255; - if (matchCode < (1<<15)) { - MEM_writeLE16(seqStorePtr->dumps, (U16)(matchCode<<1)); - seqStorePtr->dumps += 2; - } else { - MEM_writeLE32(seqStorePtr->dumps, (U32)((matchCode<<1)+1)); - seqStorePtr->dumps += 3; - } } } - else *(seqStorePtr->matchLength++) = (BYTE)matchCode; + if (matchCode>=65535) { *(seqStorePtr->matchLength++) = 65535; seqStorePtr->longLength = (U32)matchCode; } + else *seqStorePtr->matchLength++ = (U16)matchCode; } diff --git a/lib/zstd_internal.h b/lib/zstd_internal.h index 51784833..3b83059b 100644 --- a/lib/zstd_internal.h +++ b/lib/zstd_internal.h @@ -105,7 +105,7 @@ typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t; #define MLbits 7 #define Offbits 5 #define MaxLit ((1< Date: Tue, 22 Mar 2016 13:05:26 +0100 Subject: [PATCH 139/247] #define _POSIX_C_SOURCE 199309L --- programs/Makefile | 2 +- programs/bench.c | 25 +++++++++++-------------- 2 files changed, 12 insertions(+), 15 deletions(-) diff --git a/programs/Makefile b/programs/Makefile index cd432f79..ca2ba99f 100644 --- a/programs/Makefile +++ b/programs/Makefile @@ -46,7 +46,7 @@ DESTDIR?= PREFIX ?= /usr/local CPPFLAGS= -I../lib -DZSTD_VERSION=\"$(VERSION)\" CFLAGS ?= -O3 # -falign-loops=32 # not always beneficial -CFLAGS += -std=gnu99 -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow -Wstrict-aliasing=1 -Wswitch-enum -Wstrict-prototypes -Wundef +CFLAGS += -std=c99 -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow -Wstrict-aliasing=1 -Wswitch-enum -Wstrict-prototypes -Wundef FLAGS = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) $(MOREFLAGS) BINDIR = $(PREFIX)/bin diff --git a/programs/bench.c b/programs/bench.c index 20f2ee0a..a7b4a87a 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -44,31 +44,28 @@ /* ************************************* * Includes ***************************************/ -#if defined(WIN32) || defined(_WIN32) || defined(__WIN32__) || defined(WIN64) || defined(_WIN64) - #define WINDOWS -#endif +#define _POSIX_C_SOURCE 199309L /* before time.h */ #include /* malloc, free */ #include /* memset */ #include /* fprintf, fopen, ftello64 */ #include /* stat64 */ #include /* stat64 */ #include /* clock_t, clock, nanosleep, CLOCKS_PER_SEC */ -#ifdef WINDOWS - #define mili_sleep(mili) Sleep(mili) -#else - #include /* setpriority */ - #define mili_sleep(mili) { struct timespec t; t.tv_sec=0; t.tv_nsec=mili*1000000L; nanosleep(&t, NULL); } -#endif /* sleep : posix - windows - others */ #if !defined(_WIN32) && (defined(__unix__) || defined(__unix) || (defined(__APPLE__) && defined(__MACH__))) # include +# include /* setpriority */ # define BMK_sleep(s) sleep(s) +# define mili_sleep(mili) { struct timespec t; t.tv_sec=0; t.tv_nsec=mili*1000000L; nanosleep(&t, NULL); } #elif defined(_WIN32) # include # define BMK_sleep(s) Sleep(1000*s) +# define mili_sleep(mili) Sleep(mili) #else # define BMK_sleep(s) /* disabled */ +# define mili_sleep(mili) /* disabled */ +#error "disabled" #endif #include "mem.h" @@ -424,7 +421,7 @@ static void BMK_benchCLevel(void* srcBuffer, size_t benchedSize, { benchResult_t result, total; int l; -#ifdef WINDOWS +#ifdef _WIN32 SetPriorityClass(GetCurrentProcess(), REALTIME_PRIORITY_CLASS); #else setpriority(PRIO_PROCESS, 0, -20); @@ -437,8 +434,8 @@ static void BMK_benchCLevel(void* srcBuffer, size_t benchedSize, memset(&result, 0, sizeof(result)); memset(&total, 0, sizeof(total)); - if (g_displayLevel == 1 && !additionalParam) - DISPLAY("bench %s: input %u bytes, %i iterations, %u KB blocks\n", ZSTD_VERSION, (U32)benchedSize, g_nbIterations, (U32)(g_blockSize>>10)); + // if (g_displayLevel == 1 && !additionalParam) + // DISPLAY("bench %s: input %u bytes, %i iterations, %u KB blocks\n", ZSTD_VERSION, (U32)benchedSize, g_nbIterations, (U32)(g_blockSize>>10)); if (cLevelLast < cLevel) cLevelLast = cLevel; @@ -448,8 +445,8 @@ static void BMK_benchCLevel(void* srcBuffer, size_t benchedSize, fileSizes, nbFiles, dictBuffer, dictBufferSize, &result); if (g_displayLevel == 1) { - if (additionalParam) - DISPLAY("%-3i%11i (%5.3f) %6.1f MB/s %6.1f MB/s %s (param=%d)\n", -l, (int)result.cSize, result.ratio, result.cSpeed, result.dSpeed, displayName, additionalParam); + if (1)// && additionalParam) + DISPLAY("%-3i%11i (%5.3f) %6.1f MB/s %6.1f MB/s %s (kSlotNew=%d)\n", -l, (int)result.cSize, result.ratio, result.cSpeed, result.dSpeed, displayName, additionalParam); else DISPLAY("%-3i%11i (%5.3f) %6.1f MB/s %6.1f MB/s %s\n", -l, (int)result.cSize, result.ratio, result.cSpeed, result.dSpeed, displayName); total.cSize += result.cSize; From be391438ff62d5d41088db1770093cea9c33ee7c Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Tue, 22 Mar 2016 23:19:28 +0100 Subject: [PATCH 140/247] first working version with both encoder and decode alternate LL + ML coding scheme. decompression speed highly impacted --- lib/zstd_compress.c | 44 ++++------------- lib/zstd_decompress.c | 107 ++++++++++++++++++++++-------------------- lib/zstd_internal.h | 7 +-- lib/zstd_opt.h | 2 +- programs/bench.c | 6 +-- 5 files changed, 71 insertions(+), 95 deletions(-) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index d0cd50ea..81db040c 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -80,7 +80,6 @@ static void ZSTD_resetSeqStore(seqStore_t* ssPtr) ssPtr->lit = ssPtr->litStart; ssPtr->litLength = ssPtr->litLengthStart; ssPtr->matchLength = ssPtr->matchLengthStart; - ssPtr->dumps = ssPtr->dumpsStart; } @@ -184,14 +183,14 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc, const size_t blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << params.windowLog); const U32 divider = (params.searchLength==3) ? 3 : 4; const size_t maxNbSeq = blockSize / divider; - const size_t tokenSpace = blockSize + 12*maxNbSeq; + const size_t tokenSpace = blockSize + 11*maxNbSeq; const size_t contentSize = (params.strategy == ZSTD_fast) ? 0 : (1 << params.contentLog); const size_t hSize = 1 << params.hashLog; const size_t h3Size = (params.searchLength==3) ? (1 << HASHLOG3) : 0; const size_t tableSpace = (contentSize + hSize + h3Size) * sizeof(U32); /* Check if workSpace is large enough, alloc a new one if needed */ - { size_t const optSpace = ((1<seqStore.mlCodeStart = zc->seqStore.llCodeStart + maxNbSeq; zc->seqStore.offCodeStart = zc->seqStore.mlCodeStart + maxNbSeq; zc->seqStore.litStart = zc->seqStore.offCodeStart + maxNbSeq; - zc->seqStore.dumpsStart = zc->seqStore.litStart + maxNbSeq; if (params.strategy == ZSTD_btopt) { - zc->seqStore.litFreq = (U32*)((void*)(zc->seqStore.dumpsStart + maxNbSeq)); + zc->seqStore.litFreq = (U32*)((void*)(zc->seqStore.litStart + blockSize)); zc->seqStore.litLengthFreq = zc->seqStore.litFreq + (1<seqStore.matchLengthFreq = zc->seqStore.litLengthFreq + (MaxLL+1); - zc->seqStore.offCodeFreq = zc->seqStore.matchLengthFreq + (1<seqStore.offCodeFreq = zc->seqStore.matchLengthFreq + (MaxML+1); zc->seqStore.matchTable = (ZSTD_match_t*)((void*)(zc->seqStore.offCodeFreq + (1<seqStore.priceTable = (ZSTD_optimal_t*)((void*)(zc->seqStore.matchTable + ZSTD_OPT_NUM+1)); zc->seqStore.litLengthSum = 0; @@ -599,12 +597,6 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, size_t const nbSeq = offsetTableEnd - offsetTable; BYTE* seqHead; - static U32 blockNb = 0; - blockNb++; - - if (blockNb==79) - blockNb += !nbSeq; - /* Compress literals */ { const BYTE* const literals = seqStorePtr->litStart; size_t const litSize = seqStorePtr->lit - literals; @@ -620,25 +612,8 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, else op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3; if (nbSeq==0) goto _check_compressibility; - /* dumps : contains rests of large lengths */ - if ((oend-op) < 3 /* dumps */ + 1 /*seqHead*/) - return ERROR(dstSize_tooSmall); - seqHead = op; - { size_t const dumpsLength = seqStorePtr->dumps - seqStorePtr->dumpsStart; - if (dumpsLength < 512) { - op[0] = (BYTE)(dumpsLength >> 8); - op[1] = (BYTE)(dumpsLength); - op += 2; - } else { - op[0] = 2; - op[1] = (BYTE)(dumpsLength>>8); - op[2] = (BYTE)(dumpsLength); - op += 3; - } - if ((size_t)(oend-op) < dumpsLength+6) return ERROR(dstSize_tooSmall); - memcpy(op, seqStorePtr->dumpsStart, dumpsLength); - op += dumpsLength; - } + /* seqHead : flags for FSE encoding type */ + seqHead = op++; #define MIN_SEQ_FOR_DYNAMIC_FSE 64 #define MAX_SEQ_FOR_STATIC_FSE 1000 @@ -714,7 +689,7 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, /* ML codes */ { static const BYTE ML_Code[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 20, 31, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37, 38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, @@ -753,7 +728,7 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, MLtype = FSE_ENCODING_DYNAMIC; } } - seqHead[0] += (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2)); + *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2)); zc->flagStaticTables = 0; /* Encoding Sequences */ @@ -791,6 +766,7 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, BIT_addBits(&blockStream, mlTable[n], ML_bits[MLCode]); BIT_addBits(&blockStream, offset, nbBits); /* 31 */ /* 61 */ /* 24 bits max in 32-bits mode */ BIT_addBits(&blockStream, llTable[n], LL_bits[LLCode]); + //if (blockStream.bitPos > 63) printf("pb : blockStream.bitPos == %u > 63 \n", blockStream.bitPos); BIT_flushBits(&blockStream); /* 7 */ /* 7 */ } } @@ -824,7 +800,7 @@ MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const B static const BYTE* g_start = NULL; const U32 pos = (U32)(literals - g_start); if (g_start==NULL) g_start = literals; - if ((pos > 198618400) && (pos < 198618500)) + if ((pos > 10354000) && (pos < 10355000)) printf("pos %6u : %3u literals & match %3u bytes at distance %6u \n", pos, (U32)litLength, (U32)matchCode+MINMATCH, (U32)offsetCode); #endif diff --git a/lib/zstd_decompress.c b/lib/zstd_decompress.c index b1f51561..96b8846c 100644 --- a/lib/zstd_decompress.c +++ b/lib/zstd_decompress.c @@ -559,9 +559,37 @@ FORCE_INLINE size_t ZSTD_buildSeqTableLL(FSE_DTable* DTable, U32 type, U32 max, } -size_t ZSTD_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLengthPtr, - FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb, - const void* src, size_t srcSize) +FORCE_INLINE size_t ZSTD_buildSeqTableML(FSE_DTable* DTable, U32 type, U32 max, U32 maxLog, + const void* src, size_t srcSize) +{ + switch(type) + { + case FSE_ENCODING_RLE : + if (!srcSize) return ERROR(srcSize_wrong); + if ( (*(const BYTE*)src) > max) return ERROR(corruption_detected); + FSE_buildDTable_rle(DTable, *(const BYTE*)src); /* if *src > max, data is corrupted */ + return 1; + case FSE_ENCODING_RAW : + FSE_buildDTable(DTable, ML_defaultNorm, max, ML_defaultNormLog); + return 0; + case FSE_ENCODING_STATIC: + return 0; + default : /* impossible */ + case FSE_ENCODING_DYNAMIC : + { U32 tableLog; + S16 norm[MaxSeq+1]; + size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize); + if (FSE_isError(headerSize)) return ERROR(corruption_detected); + if (tableLog > maxLog) return ERROR(corruption_detected); + FSE_buildDTable(DTable, norm, max, tableLog); + return headerSize; + } } +} + + +size_t ZSTD_decodeSeqHeaders(int* nbSeq, + FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb, + const void* src, size_t srcSize) { const BYTE* const istart = (const BYTE* const)src; const BYTE* ip = istart; @@ -585,26 +613,13 @@ size_t ZSTD_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLen LLtype = *ip >> 6; Offtype = (*ip >> 4) & 3; MLtype = (*ip >> 2) & 3; - { size_t dumpsLength; - if (*ip & 2) { - dumpsLength = ip[2]; - dumpsLength += ip[1] << 8; - ip += 3; - } else { - dumpsLength = ip[1]; - dumpsLength += (ip[0] & 1) << 8; - ip += 2; - } - *dumpsPtr = ip; - ip += dumpsLength; - *dumpsLengthPtr = dumpsLength; - } + ip++; /* check */ if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */ /* Build DTables */ - { size_t const bhSize = ZSTD_buildSeqTableLL(DTableLL, LLtype, 35, LLFSELog, ip, iend-ip); + { size_t const bhSize = ZSTD_buildSeqTableLL(DTableLL, LLtype, MaxLL, LLFSELog, ip, iend-ip); if (ZSTD_isError(bhSize)) return ERROR(corruption_detected); ip += bhSize; } @@ -612,7 +627,7 @@ size_t ZSTD_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLen if (ZSTD_isError(bhSize)) return ERROR(corruption_detected); ip += bhSize; } - { size_t const bhSize = ZSTD_buildSeqTable(DTableML, MLtype, MLbits, MLFSELog, ip, iend-ip); + { size_t const bhSize = ZSTD_buildSeqTableML(DTableML, MLtype, MaxML, MLFSELog, ip, iend-ip); if (ZSTD_isError(bhSize)) return ERROR(corruption_detected); ip += bhSize; } @@ -633,8 +648,6 @@ typedef struct { FSE_DState_t stateOffb; FSE_DState_t stateML; size_t prevOffset; - const BYTE* dumps; - const BYTE* dumpsEnd; } seqState_t; @@ -662,31 +675,26 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState, const U32 mls) if (offsetCode | !litCode) seqState->prevOffset = seq->offset; /* cmove */ seq->offset = offset; if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream)); - FSE_decodeSymbol(&(seqState->stateOffb), &(seqState->DStream)); /* update */ } - /* Literal length update */ + { static const U32 ML_base[MaxML+1] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 34, 36, 38, 40, 44, 48, 56, 64, 80, 96, 0x80, 0x100, 0x200, 0x400, 0x800, + 0x1000, 0x2000, 0x4000, 0x8000, 0x10000 }; + U32 const mlCode = FSE_peakSymbol(&(seqState->stateML)); + seq->matchLength = ML_base[mlCode] + BIT_readBits(&(seqState->DStream), ML_bits[mlCode]) + mls; + } + + /* ANS update */ FSE_decodeSymbol(&(seqState->stateLL), &(seqState->DStream)); /* update */ if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream)); - /* MatchLength */ - { size_t matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream)); - const BYTE* dumps = seqState->dumps; - if (matchLength == MaxML) { - const BYTE* const de = seqState->dumpsEnd; - const U32 add = *dumps++; - if (add < 255) matchLength += add; - else { - matchLength = MEM_readLE32(dumps) & 0xFFFFFF; /* no pb : dumps is always followed by seq tables > 1 byte */ - if (matchLength&1) matchLength>>=1, dumps += 3; - else matchLength = (U16)(matchLength)>>1, dumps += 2; - } - if (dumps >= de) dumps = de-1; /* late correction, to avoid read overflow (data is now corrupted anyway) */ - } - matchLength += mls; - seq->matchLength = matchLength; - seqState->dumps = dumps; - } + FSE_decodeSymbol(&(seqState->stateOffb), &(seqState->DStream)); /* update */ + if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream)); + + FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream)); /* update */ + if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream)); #if 0 /* debug */ { @@ -781,12 +789,10 @@ static size_t ZSTD_decompressSequences( BYTE* const ostart = (BYTE* const)dst; BYTE* op = ostart; BYTE* const oend = ostart + maxDstSize; - size_t dumpsLength; const BYTE* litPtr = dctx->litPtr; const BYTE* const litLimit_8 = litPtr + dctx->litBufSize - 8; const BYTE* const litEnd = litPtr + dctx->litSize; int nbSeq; - const BYTE* dumps; U32* DTableLL = dctx->LLTable; U32* DTableML = dctx->MLTable; U32* DTableOffb = dctx->OffTable; @@ -796,7 +802,7 @@ static size_t ZSTD_decompressSequences( const U32 mls = dctx->fParams.mml; /* Build Decoding Tables */ - { size_t const errorCode = ZSTD_decodeSeqHeaders(&nbSeq, &dumps, &dumpsLength, + { size_t const errorCode = ZSTD_decodeSeqHeaders(&nbSeq, DTableLL, DTableML, DTableOffb, ip, seqSize); if (ZSTD_isError(errorCode)) return errorCode; @@ -810,8 +816,6 @@ static size_t ZSTD_decompressSequences( memset(&sequence, 0, sizeof(sequence)); sequence.offset = REPCODE_STARTVALUE; - seqState.dumps = dumps; - seqState.dumpsEnd = dumps + dumpsLength; seqState.prevOffset = REPCODE_STARTVALUE; { size_t const errorCode = BIT_initDStream(&(seqState.DStream), ip, iend-ip); if (ERR_isError(errorCode)) return ERROR(corruption_detected); } @@ -825,7 +829,7 @@ static size_t ZSTD_decompressSequences( ZSTD_decodeSequence(&sequence, &seqState, mls); #if 0 /* for debug */ { U32 pos = (U32)(op-base); - if ((pos > 198618400) && (pos < 198618500)) + if ((pos > 10354000) && (pos < 10355000)) printf("pos %6u : %3u literals & match %3u bytes at distance %6u \n", pos, (U32)sequence.litLength, (U32)sequence.matchLength, (U32)sequence.offset); } @@ -867,17 +871,16 @@ static size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, const void* src, size_t srcSize) { /* blockType == blockCompressed */ const BYTE* ip = (const BYTE*)src; - size_t litCSize; if (srcSize >= ZSTD_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); ZSTD_LOG_BLOCK("%p: ZSTD_decompressBlock_internal searchLength=%d\n", dctx->base, dctx->params.searchLength); /* Decode literals sub-block */ - litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize); - if (ZSTD_isError(litCSize)) return litCSize; - ip += litCSize; - srcSize -= litCSize; + { size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize); + if (ZSTD_isError(litCSize)) return litCSize; + ip += litCSize; + srcSize -= litCSize; } return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize); } diff --git a/lib/zstd_internal.h b/lib/zstd_internal.h index 3b83059b..fa6c93ca 100644 --- a/lib/zstd_internal.h +++ b/lib/zstd_internal.h @@ -102,16 +102,15 @@ typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t; #define HASHLOG3 17 #define Litbits 8 -#define MLbits 7 #define Offbits 5 #define MaxLit ((1<litLengthSum == 0) { ssPtr->litSum = (2<litLengthSum = MaxLL+1; - ssPtr->matchLengthSum = (1<matchLengthSum = MaxML+1; ssPtr->offCodeSum = (1<matchSum = (2< /* fprintf, fopen, ftello64 */ #include /* stat64 */ #include /* stat64 */ -#include /* clock_t, clock, CLOCKS_PER_SEC */ +#include /* clock_t, clock, CLOCKS_PER_SEC */ /* sleep : posix - windows - others */ #if !defined(_WIN32) && (defined(__unix__) || defined(__unix) || (defined(__APPLE__) && defined(__MACH__))) @@ -65,7 +65,7 @@ #include "mem.h" #include "zstd_static.h" #include "xxhash.h" -#include "datagen.h" /* RDG_genBuffer */ +#include "datagen.h" /* RDG_genBuffer */ /* ************************************* @@ -283,7 +283,7 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, (double)srcSize / 1000000. / (fastestC / CLOCKS_PER_SEC) ); (void)crcCheck; (void)fastestD; (void)crcOrig; /* unused when decompression disabled */ -#if 0 +#if 1 /* Decompression */ memset(resultBuffer, 0xD6, srcSize); /* warm result buffer */ From add08d6f61c69a5ea27ff4ced268585d2baca654 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 23 Mar 2016 01:32:41 +0100 Subject: [PATCH 141/247] minor variation - DSpeed at 640 --- lib/bitstream.h | 7 ++-- lib/fse_static.h | 33 +++++++++-------- lib/zdict.c | 3 ++ lib/zstd_compress.c | 24 +++++-------- lib/zstd_decompress.c | 83 +++++++++++++------------------------------ 5 files changed, 58 insertions(+), 92 deletions(-) diff --git a/lib/bitstream.h b/lib/bitstream.h index 0fe36eae..d90c9b24 100644 --- a/lib/bitstream.h +++ b/lib/bitstream.h @@ -246,8 +246,7 @@ MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC) /*! BIT_initDStream() : * Initialize a BIT_DStream_t. * `bitD` : a pointer to an already allocated BIT_DStream_t structure. -* `srcBuffer` must point at the beginning of a bitStream. -* `srcSize` must be the exact size of the bitStream, in bytes. +* `srcSize` must be the *exact* size of the bitStream, in bytes. * @return : size of stream (== srcSize) or an errorCode if a problem is detected */ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize) @@ -293,7 +292,7 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si * On 64-bits, maxNbBits==56. * @return : value extracted */ -MEM_STATIC size_t BIT_lookBits(BIT_DStream_t* bitD, U32 nbBits) +MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits) { U32 const bitMask = sizeof(bitD->bitContainer)*8 - 1; return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask); @@ -301,7 +300,7 @@ MEM_STATIC size_t BIT_lookBits(BIT_DStream_t* bitD, U32 nbBits) /*! BIT_lookBitsFast() : * unsafe version; only works only if nbBits >= 1 */ -MEM_STATIC size_t BIT_lookBitsFast(BIT_DStream_t* bitD, U32 nbBits) +MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits) { U32 const bitMask = sizeof(bitD->bitContainer)*8 - 1; return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask+1)-nbBits) & bitMask); diff --git a/lib/fse_static.h b/lib/fse_static.h index ca303db8..f3c3d44e 100644 --- a/lib/fse_static.h +++ b/lib/fse_static.h @@ -267,7 +267,7 @@ MEM_STATIC void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* statePt BIT_flushBits(bitC); } -/* decompression */ +/*<===== Decompression =====>*/ typedef struct { U16 tableLog; @@ -290,34 +290,39 @@ MEM_STATIC void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, con DStatePtr->table = dt + 1; } -MEM_STATIC size_t FSE_getStateValue(FSE_DState_t* DStatePtr) +MEM_STATIC BYTE FSE_peekSymbol(const FSE_DState_t* DStatePtr) { - return DStatePtr->state; + FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + return DInfo.symbol; } -MEM_STATIC BYTE FSE_peakSymbol(FSE_DState_t* DStatePtr) +MEM_STATIC void FSE_updateState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) { - const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; - return DInfo.symbol; + FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + U32 const nbBits = DInfo.nbBits; + size_t const lowBits = BIT_readBits(bitD, nbBits); + DStatePtr->state = DInfo.newState + lowBits; } MEM_STATIC BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) { - const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; - const U32 nbBits = DInfo.nbBits; - BYTE symbol = DInfo.symbol; - size_t lowBits = BIT_readBits(bitD, nbBits); + FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + U32 const nbBits = DInfo.nbBits; + BYTE const symbol = DInfo.symbol; + size_t const lowBits = BIT_readBits(bitD, nbBits); DStatePtr->state = DInfo.newState + lowBits; return symbol; } +/*! FSE_decodeSymbolFast() : + unsafe, only works if no symbol has a probability > 50% */ MEM_STATIC BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) { - const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; - const U32 nbBits = DInfo.nbBits; - BYTE symbol = DInfo.symbol; - size_t lowBits = BIT_readBitsFast(bitD, nbBits); + FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + U32 const nbBits = DInfo.nbBits; + BYTE const symbol = DInfo.symbol; + size_t const lowBits = BIT_readBitsFast(bitD, nbBits); DStatePtr->state = DInfo.newState + lowBits; return symbol; diff --git a/lib/zdict.c b/lib/zdict.c index 4c1ffb08..c99cabe1 100644 --- a/lib/zdict.c +++ b/lib/zdict.c @@ -611,10 +611,13 @@ static void ZDICT_countEStats(EStats_ress_t esr, if (*u32Ptr==0) offcode=0; offsetcodeCount[offcode]++; } + (void)matchlengthCount; (void)litlengthCount; + /* for(bytePtr = seqStore.matchLengthStart; bytePtr < seqStore.matchLength; bytePtr++) matchlengthCount[*bytePtr]++; for(bytePtr = seqStore.litLengthStart; bytePtr < seqStore.litLength; bytePtr++) litlengthCount[*bytePtr]++; + */ } static size_t ZDICT_maxSampleSize(const size_t* fileSizes, unsigned nbFiles) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index 81db040c..5ae6d37d 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -648,14 +648,13 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, FSE_buildCTable(CTable_LitLength, LL_defaultNorm, MaxLL, LL_defaultNormLog); LLtype = FSE_ENCODING_RAW; } else { - size_t NCountSize; size_t nbSeq_1 = nbSeq; const U32 tableLog = FSE_optimalTableLog(LLFSELog, nbSeq, max); if (count[llCodeTable[nbSeq-1]]>1) { count[llCodeTable[nbSeq-1]]--; nbSeq_1--; } FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max); - NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ - if (FSE_isError(NCountSize)) return ERROR(GENERIC); - op += NCountSize; + { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ + if (FSE_isError(NCountSize)) return ERROR(GENERIC); + op += NCountSize; } FSE_buildCTable(CTable_LitLength, norm, max, tableLog); LLtype = FSE_ENCODING_DYNAMIC; } } @@ -675,14 +674,13 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, FSE_buildCTable_raw(CTable_OffsetBits, Offbits); Offtype = FSE_ENCODING_RAW; } else { - size_t NCountSize; size_t nbSeq_1 = nbSeq; const U32 tableLog = FSE_optimalTableLog(OffFSELog, nbSeq, max); if (count[offCodeTable[nbSeq-1]]>1) { count[offCodeTable[nbSeq-1]]--; nbSeq_1--; } FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max); - NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ - if (FSE_isError(NCountSize)) return ERROR(GENERIC); - op += NCountSize; + { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ + if (FSE_isError(NCountSize)) return ERROR(GENERIC); + op += NCountSize; } FSE_buildCTable(CTable_OffsetBits, norm, max, tableLog); Offtype = FSE_ENCODING_DYNAMIC; } } @@ -744,8 +742,8 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]); FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, offCodeTable[nbSeq-1]); FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]); - BIT_addBits(&blockStream, mlTable[nbSeq-1], ML_bits[mlCodeTable[nbSeq-1]]); BIT_addBits(&blockStream, offsetTable[nbSeq-1], offCodeTable[nbSeq-1] ? (offCodeTable[nbSeq-1]-1) : 0); + BIT_addBits(&blockStream, mlTable[nbSeq-1], ML_bits[mlCodeTable[nbSeq-1]]); BIT_addBits(&blockStream, llTable[nbSeq-1], LL_bits[llCodeTable[nbSeq-1]]); BIT_flushBits(&blockStream); @@ -757,16 +755,12 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, const U32 nbBits = (offCode-1) + (!offCode); const BYTE LLCode = llCodeTable[n]; /* (7)*/ /* (7)*/ - FSE_encodeSymbol(&blockStream, &stateMatchLength, MLCode); /* 17 */ /* 17 */ - if (MEM_32bits()) BIT_flushBits(&blockStream); /* 7 */ FSE_encodeSymbol(&blockStream, &stateOffsetBits, offCode); /* 25 */ /* 35 */ + FSE_encodeSymbol(&blockStream, &stateMatchLength, MLCode); /* 17 */ /* 17 */ FSE_encodeSymbol(&blockStream, &stateLitLength, LLCode); /* 16 */ /* 26 */ - if (MEM_32bits()) BIT_flushBits(&blockStream); /* 7 */ - //BIT_flushBits(&blockStream); /* 7 */ /* 7 */ - BIT_addBits(&blockStream, mlTable[n], ML_bits[MLCode]); BIT_addBits(&blockStream, offset, nbBits); /* 31 */ /* 61 */ /* 24 bits max in 32-bits mode */ + BIT_addBits(&blockStream, mlTable[n], ML_bits[MLCode]); BIT_addBits(&blockStream, llTable[n], LL_bits[LLCode]); - //if (blockStream.bitPos > 63) printf("pb : blockStream.bitPos == %u > 63 \n", blockStream.bitPos); BIT_flushBits(&blockStream); /* 7 */ /* 7 */ } } diff --git a/lib/zstd_decompress.c b/lib/zstd_decompress.c index 96b8846c..ee57b853 100644 --- a/lib/zstd_decompress.c +++ b/lib/zstd_decompress.c @@ -503,7 +503,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, @return : nb bytes read from src, or an error code if it fails, testable with ZSTD_isError() */ -FORCE_INLINE size_t ZSTD_buildSeqTable(FSE_DTable* DTable, U32 type, U32 rawBits, U32 maxLog, +FORCE_INLINE size_t ZSTD_buildSeqTableOff(FSE_DTable* DTable, U32 type, U32 rawBits, U32 maxLog, const void* src, size_t srcSize) { U32 max = (1< max, data is corrupted */ return 1; case FSE_ENCODING_RAW : - FSE_buildDTable(DTable, LL_defaultNorm, max, LL_defaultNormLog); - return 0; - case FSE_ENCODING_STATIC: - return 0; - default : /* impossible */ - case FSE_ENCODING_DYNAMIC : - { U32 tableLog; - S16 norm[MaxSeq+1]; - size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize); - if (FSE_isError(headerSize)) return ERROR(corruption_detected); - if (tableLog > maxLog) return ERROR(corruption_detected); - FSE_buildDTable(DTable, norm, max, tableLog); - return headerSize; - } } -} - - -FORCE_INLINE size_t ZSTD_buildSeqTableML(FSE_DTable* DTable, U32 type, U32 max, U32 maxLog, - const void* src, size_t srcSize) -{ - switch(type) - { - case FSE_ENCODING_RLE : - if (!srcSize) return ERROR(srcSize_wrong); - if ( (*(const BYTE*)src) > max) return ERROR(corruption_detected); - FSE_buildDTable_rle(DTable, *(const BYTE*)src); /* if *src > max, data is corrupted */ - return 1; - case FSE_ENCODING_RAW : - FSE_buildDTable(DTable, ML_defaultNorm, max, ML_defaultNormLog); + FSE_buildDTable(DTable, defaultNorm, max, defaultLog); return 0; case FSE_ENCODING_STATIC: return 0; @@ -619,15 +592,15 @@ size_t ZSTD_decodeSeqHeaders(int* nbSeq, if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */ /* Build DTables */ - { size_t const bhSize = ZSTD_buildSeqTableLL(DTableLL, LLtype, MaxLL, LLFSELog, ip, iend-ip); + { size_t const bhSize = ZSTD_buildSeqTable(DTableLL, LLtype, MaxLL, LLFSELog, ip, iend-ip, LL_defaultNorm, LL_defaultNormLog); if (ZSTD_isError(bhSize)) return ERROR(corruption_detected); ip += bhSize; } - { size_t const bhSize = ZSTD_buildSeqTable(DTableOffb, Offtype, Offbits, OffFSELog, ip, iend-ip); + { size_t const bhSize = ZSTD_buildSeqTableOff(DTableOffb, Offtype, Offbits, OffFSELog, ip, iend-ip); if (ZSTD_isError(bhSize)) return ERROR(corruption_detected); ip += bhSize; } - { size_t const bhSize = ZSTD_buildSeqTableML(DTableML, MLtype, MaxML, MLFSELog, ip, iend-ip); + { size_t const bhSize = ZSTD_buildSeqTable(DTableML, MLtype, MaxML, MLFSELog, ip, iend-ip, ML_defaultNorm, ML_defaultNormLog); if (ZSTD_isError(bhSize)) return ERROR(corruption_detected); ip += bhSize; } @@ -654,7 +627,7 @@ typedef struct { static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState, const U32 mls) { /* Literal length */ - U32 const litCode = FSE_peakSymbol(&(seqState->stateLL)); + U32 const litCode = FSE_peekSymbol(&(seqState->stateLL)); { static const U32 LL_base[MaxLL+1] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 20, 22, 24, 28, 32, 40, 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, @@ -662,13 +635,23 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState, const U32 mls) seq->litLength = LL_base[litCode] + BIT_readBits(&(seqState->DStream), LL_bits[litCode]); } + /* MatchLength */ + { static const U32 ML_base[MaxML+1] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 34, 36, 38, 40, 44, 48, 56, 64, 80, 96, 0x80, 0x100, 0x200, 0x400, 0x800, + 0x1000, 0x2000, 0x4000, 0x8000, 0x10000 }; + U32 const mlCode = FSE_peekSymbol(&(seqState->stateML)); + seq->matchLength = ML_base[mlCode] + BIT_readBits(&(seqState->DStream), ML_bits[mlCode]) + mls; + } + /* Offset */ { static const U32 offsetPrefix[MaxOff+1] = { 1 /*fake*/, 1, 2, 4, 8, 0x10, 0x20, 0x40, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, 0x2000, 0x4000, 0x8000, 0x10000, 0x20000, 0x40000, 0x80000, 0x100000, 0x200000, 0x400000, 0x800000, 0x1000000, 0x2000000, 0x4000000, /*fake*/ 1, 1, 1, 1 }; - U32 const offsetCode = FSE_peakSymbol(&(seqState->stateOffb)); /* <= maxOff, by table construction */ + U32 const offsetCode = FSE_peekSymbol(&(seqState->stateOffb)); /* <= maxOff, by table construction */ U32 const nbBits = offsetCode ? offsetCode-1 : 0; size_t const offset = offsetCode ? offsetPrefix[offsetCode] + BIT_readBits(&(seqState->DStream), nbBits) : litCode ? seq->offset : seqState->prevOffset; @@ -677,33 +660,15 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState, const U32 mls) if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream)); } - { static const U32 ML_base[MaxML+1] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - 32, 34, 36, 38, 40, 44, 48, 56, 64, 80, 96, 0x80, 0x100, 0x200, 0x400, 0x800, - 0x1000, 0x2000, 0x4000, 0x8000, 0x10000 }; - U32 const mlCode = FSE_peakSymbol(&(seqState->stateML)); - seq->matchLength = ML_base[mlCode] + BIT_readBits(&(seqState->DStream), ML_bits[mlCode]) + mls; - } - - /* ANS update */ - FSE_decodeSymbol(&(seqState->stateLL), &(seqState->DStream)); /* update */ + /* ANS state update */ + FSE_updateState(&(seqState->stateLL), &(seqState->DStream)); if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream)); - FSE_decodeSymbol(&(seqState->stateOffb), &(seqState->DStream)); /* update */ + FSE_updateState(&(seqState->stateML), &(seqState->DStream)); if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream)); - FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream)); /* update */ + FSE_updateState(&(seqState->stateOffb), &(seqState->DStream)); if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream)); - -#if 0 /* debug */ - { - static U64 totalDecoded = 0; - printf("pos %6u : %3u literals & match %3u bytes at distance %6u \n", - (U32)(totalDecoded), (U32)litLength, (U32)matchLength, (U32)offset); - totalDecoded += litLength + matchLength; - } -#endif } From 9c34df93b686531449df97ed78a8eb2b39e2c251 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 23 Mar 2016 01:54:25 +0100 Subject: [PATCH 142/247] new decodeSequence, merging parts --- lib/zstd_decompress.c | 41 +++++++++++++++++++---------------------- 1 file changed, 19 insertions(+), 22 deletions(-) diff --git a/lib/zstd_decompress.c b/lib/zstd_decompress.c index ee57b853..a854aaf7 100644 --- a/lib/zstd_decompress.c +++ b/lib/zstd_decompress.c @@ -627,48 +627,45 @@ typedef struct { static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState, const U32 mls) { /* Literal length */ - U32 const litCode = FSE_peekSymbol(&(seqState->stateLL)); - { static const U32 LL_base[MaxLL+1] = { + U32 const llCode = FSE_peekSymbol(&(seqState->stateLL)); + U32 const mlCode = FSE_peekSymbol(&(seqState->stateML)); + U32 const ofCode = FSE_peekSymbol(&(seqState->stateOffb)); /* <= maxOff, by table construction */ + + U32 const llBits = LL_bits[llCode]; + U32 const mlBits = ML_bits[mlCode]; + U32 const ofBits = ofCode ? ofCode-1 : 0; + + static const U32 LL_base[MaxLL+1] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 20, 22, 24, 28, 32, 40, 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, 0x2000, 0x4000, 0x8000, 0x10000 }; - seq->litLength = LL_base[litCode] + BIT_readBits(&(seqState->DStream), LL_bits[litCode]); - } - /* MatchLength */ - { static const U32 ML_base[MaxML+1] = { + static const U32 ML_base[MaxML+1] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 34, 36, 38, 40, 44, 48, 56, 64, 80, 96, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, 0x2000, 0x4000, 0x8000, 0x10000 }; - U32 const mlCode = FSE_peekSymbol(&(seqState->stateML)); - seq->matchLength = ML_base[mlCode] + BIT_readBits(&(seqState->DStream), ML_bits[mlCode]) + mls; - } - /* Offset */ - { static const U32 offsetPrefix[MaxOff+1] = { + static const U32 OF_base[MaxOff+1] = { 1 /*fake*/, 1, 2, 4, 8, 0x10, 0x20, 0x40, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, 0x2000, 0x4000, 0x8000, 0x10000, 0x20000, 0x40000, 0x80000, 0x100000, 0x200000, 0x400000, 0x800000, 0x1000000, 0x2000000, 0x4000000, /*fake*/ 1, 1, 1, 1 }; - U32 const offsetCode = FSE_peekSymbol(&(seqState->stateOffb)); /* <= maxOff, by table construction */ - U32 const nbBits = offsetCode ? offsetCode-1 : 0; - size_t const offset = offsetCode ? offsetPrefix[offsetCode] + BIT_readBits(&(seqState->DStream), nbBits) : - litCode ? seq->offset : seqState->prevOffset; - if (offsetCode | !litCode) seqState->prevOffset = seq->offset; /* cmove */ + + seq->litLength = LL_base[llCode] + BIT_readBits(&(seqState->DStream), llBits); + seq->matchLength = ML_base[mlCode] + BIT_readBits(&(seqState->DStream), mlBits) + mls; + + /* Offset */ + { size_t const offset = ofCode ? OF_base[ofCode] + BIT_readBits(&(seqState->DStream), ofBits) : + llCode ? seq->offset : seqState->prevOffset; + if (ofCode | !llCode) seqState->prevOffset = seq->offset; /* cmove */ seq->offset = offset; - if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream)); } /* ANS state update */ FSE_updateState(&(seqState->stateLL), &(seqState->DStream)); - if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream)); - FSE_updateState(&(seqState->stateML), &(seqState->DStream)); - if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream)); - FSE_updateState(&(seqState->stateOffb), &(seqState->DStream)); - if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream)); } From fd6922508695c64d850aafe676e2f94ca12f1501 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 23 Mar 2016 02:47:33 +0100 Subject: [PATCH 143/247] bench : added HIGH_PRIORTY --- programs/bench.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/programs/bench.c b/programs/bench.c index bc28e460..905f84ed 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -53,13 +53,17 @@ /* sleep : posix - windows - others */ #if !defined(_WIN32) && (defined(__unix__) || defined(__unix) || (defined(__APPLE__) && defined(__MACH__))) -# include +# include /* sleep */ +# include /* setpriority */ # define BMK_sleep(s) sleep(s) +# define HIGH_PRIORITY setpriority(PRIO_PROCESS, 0, -20) #elif defined(_WIN32) # include # define BMK_sleep(s) Sleep(1000*s) +# define HIGH_PRIORITY SetPriorityClass(GetCurrentProcess(), REALTIME_PRIORITY_CLASS); #else # define BMK_sleep(s) /* disabled */ +# define HIGH_PRIORITY #endif #include "mem.h" @@ -207,6 +211,7 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, /* init */ if (strlen(displayName)>17) displayName += strlen(displayName)-17; /* can only display 17 characters */ + HIGH_PRIORITY; /* Init blockTable data */ { const char* srcPtr = (const char*)srcBuffer; From 472638c8617fb4cb6485d09dfbce9a90bdcdc6f2 Mon Sep 17 00:00:00 2001 From: inikep Date: Wed, 23 Mar 2016 12:28:28 +0100 Subject: [PATCH 144/247] added mili_sleep and setHighPriority --- lib/zstd_compress.c | 5 ----- lib/zstd_internal.h | 2 -- programs/bench.c | 20 +++++++++++++++----- 3 files changed, 15 insertions(+), 12 deletions(-) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index 266999ca..28a7a1c3 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -98,7 +98,6 @@ struct ZSTD_CCtx_s U32 nextToUpdate3; /* index from which to continue dictionary update */ U32 loadedDictEnd; U32 stage; - U32 additionalParam; ZSTD_parameters params; void* workSpace; size_t workSpaceSize; @@ -276,7 +275,6 @@ size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx) dstCCtx->dictLimit = srcCCtx->dictLimit; dstCCtx->lowLimit = srcCCtx->lowLimit; dstCCtx->loadedDictEnd= srcCCtx->loadedDictEnd; - dstCCtx->additionalParam = srcCCtx->additionalParam; /* copy entropy tables */ dstCCtx->flagStaticTables = srcCCtx->flagStaticTables; @@ -2454,6 +2452,3 @@ ZSTD_parameters ZSTD_getParams(int compressionLevel, U64 srcSize) result.srcSize = srcSize; return result; } - - -void ZSTD_setAdditionalParam(ZSTD_CCtx* ctx, int additionalParam) { ctx->additionalParam = additionalParam; }; diff --git a/lib/zstd_internal.h b/lib/zstd_internal.h index 39258b21..ba350c4f 100644 --- a/lib/zstd_internal.h +++ b/lib/zstd_internal.h @@ -222,7 +222,5 @@ typedef struct { seqStore_t ZSTD_copySeqStore(const ZSTD_CCtx* ctx); -void ZSTD_setAdditionalParam(ZSTD_CCtx* ctx, int additionalParam); - #endif /* ZSTD_CCOMMON_H_MODULE */ diff --git a/programs/bench.c b/programs/bench.c index 5b2e5644..84380400 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -44,27 +44,34 @@ /* ************************************* * Includes ***************************************/ +#define _POSIX_C_SOURCE 199309L /* before - needed for nanosleep() */ #include /* malloc, free */ #include /* memset */ #include /* fprintf, fopen, ftello64 */ #include /* stat64 */ #include /* stat64 */ -#include /* clock_t, clock, CLOCKS_PER_SEC */ +#include /* clock_t, nanosleep, clock, CLOCKS_PER_SEC */ /* sleep : posix - windows - others */ #if !defined(_WIN32) && (defined(__unix__) || defined(__unix) || (defined(__APPLE__) && defined(__MACH__))) # include +# include /* setpriority */ # define BMK_sleep(s) sleep(s) +# define mili_sleep(mili) { struct timespec t; t.tv_sec=0; t.tv_nsec=mili*1000000L; nanosleep(&t, NULL); } +# define setHighPriority() setpriority(PRIO_PROCESS, 0, -20) #elif defined(_WIN32) # include # define BMK_sleep(s) Sleep(1000*s) +# define mili_sleep(mili) Sleep(mili) +# define setHighPriority() SetPriorityClass(GetCurrentProcess(), REALTIME_PRIORITY_CLASS) #else # define BMK_sleep(s) /* disabled */ +# define mili_sleep(mili) /* disabled */ +# define setHighPriority() /* disabled */ #endif #include "mem.h" #include "zstd_static.h" -#include "zstd_internal.h" /* ZSTD_setAdditionalParam */ #include "xxhash.h" #include "datagen.h" /* RDG_genBuffer */ @@ -199,7 +206,7 @@ typedef struct #define MAX(a,b) ((a)>(b) ? (a) : (b)) static int BMK_benchMem(const void* srcBuffer, size_t srcSize, - const char* displayName, int cLevel, int additionalParam, + const char* displayName, int cLevel, const size_t* fileSizes, U32 nbFiles, const void* dictBuffer, size_t dictBufferSize, benchResult_t *result) { @@ -247,7 +254,6 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, } } } /* warmimg up memory */ - ZSTD_setAdditionalParam(refCtx, additionalParam); RDG_genBuffer(compressedBuffer, maxCompressedSize, 0.10, 0.50, 1); /* Bench */ @@ -275,6 +281,7 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, DISPLAYLEVEL(2, "%2i-%-17.17s :%10u ->\r", testNb, displayName, (U32)srcSize); memset(compressedBuffer, 0xE5, maxCompressedSize); /* warm up and erase result buffer */ + mili_sleep(1); /* give processor time to other processes */ clockStart = clock(); while (clock() == clockStart); clockStart = clock(); @@ -303,6 +310,7 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, /* Decompression */ memset(resultBuffer, 0xD6, srcSize); /* warm result buffer */ + mili_sleep(1); /* give processor time to other processes */ clockStart = clock(); while (clock() == clockStart); clockStart = clock(); @@ -403,6 +411,8 @@ static void BMK_benchCLevel(void* srcBuffer, size_t benchedSize, benchResult_t result, total; int l; + setHighPriority(); + const char* pch = strrchr(displayName, '\\'); /* Windows */ if (!pch) pch = strrchr(displayName, '/'); /* Linux */ if (pch) displayName = pch+1; @@ -417,7 +427,7 @@ static void BMK_benchCLevel(void* srcBuffer, size_t benchedSize, for (l=cLevel; l <= cLevelLast; l++) { BMK_benchMem(srcBuffer, benchedSize, - displayName, l, additionalParam, + displayName, l, fileSizes, nbFiles, dictBuffer, dictBufferSize, &result); if (g_displayLevel == 1) { From afab02098ac3c25ed7c689336dbd2b5d2faeaf97 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 23 Mar 2016 13:57:49 +0100 Subject: [PATCH 145/247] improved decoding speed (660) --- lib/bitstream.h | 12 ++++++++++-- lib/zstd_decompress.c | 8 +++++--- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/lib/bitstream.h b/lib/bitstream.h index d90c9b24..af9151ad 100644 --- a/lib/bitstream.h +++ b/lib/bitstream.h @@ -285,6 +285,14 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si return srcSize; } +MEM_STATIC size_t BIT_consumeFirstBits(size_t* bitDPtr, U32 const nbBits) +{ + static const unsigned mask[] = { 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF, 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF, 0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF }; /* up to 26 bits */ + size_t const result = *bitDPtr & mask[nbBits]; + *bitDPtr >>= nbBits; + return result; +} + /*! BIT_lookBits() : * Provides next n bits from local register. * local register is not modified (bits are still present for next read/look). @@ -318,7 +326,7 @@ MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits) */ MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, U32 nbBits) { - size_t value = BIT_lookBits(bitD, nbBits); + size_t const value = BIT_lookBits(bitD, nbBits); BIT_skipBits(bitD, nbBits); return value; } @@ -327,7 +335,7 @@ MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, U32 nbBits) * unsafe version; only works only if nbBits >= 1 */ MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits) { - size_t value = BIT_lookBitsFast(bitD, nbBits); + size_t const value = BIT_lookBitsFast(bitD, nbBits); BIT_skipBits(bitD, nbBits); return value; } diff --git a/lib/zstd_decompress.c b/lib/zstd_decompress.c index a854aaf7..92af2b35 100644 --- a/lib/zstd_decompress.c +++ b/lib/zstd_decompress.c @@ -652,16 +652,18 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState, const U32 mls) 0x8000, 0x10000, 0x20000, 0x40000, 0x80000, 0x100000, 0x200000, 0x400000, 0x800000, 0x1000000, 0x2000000, 0x4000000, /*fake*/ 1, 1, 1, 1 }; - seq->litLength = LL_base[llCode] + BIT_readBits(&(seqState->DStream), llBits); - seq->matchLength = ML_base[mlCode] + BIT_readBits(&(seqState->DStream), mlBits) + mls; + size_t allBits = BIT_readBits(&(seqState->DStream), llBits+mlBits+ofBits); /* Offset */ - { size_t const offset = ofCode ? OF_base[ofCode] + BIT_readBits(&(seqState->DStream), ofBits) : + { size_t const offset = ofCode ? OF_base[ofCode] + BIT_consumeFirstBits(&allBits, ofBits) : llCode ? seq->offset : seqState->prevOffset; if (ofCode | !llCode) seqState->prevOffset = seq->offset; /* cmove */ seq->offset = offset; } + seq->matchLength = ML_base[mlCode] + BIT_consumeFirstBits(&allBits, mlBits) + mls; + seq->litLength = LL_base[llCode] + BIT_consumeFirstBits(&allBits, llBits); + /* ANS state update */ FSE_updateState(&(seqState->stateLL), &(seqState->DStream)); FSE_updateState(&(seqState->stateML), &(seqState->DStream)); From 2512597576d4a27608d1135bad2d49da122b3141 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 23 Mar 2016 14:00:09 +0100 Subject: [PATCH 146/247] minor reordering (DSpeed 665) --- lib/zstd_compress.c | 8 ++++---- lib/zstd_decompress.c | 11 +++++------ 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index 5ae6d37d..e2d42faf 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -742,9 +742,9 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]); FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, offCodeTable[nbSeq-1]); FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]); - BIT_addBits(&blockStream, offsetTable[nbSeq-1], offCodeTable[nbSeq-1] ? (offCodeTable[nbSeq-1]-1) : 0); - BIT_addBits(&blockStream, mlTable[nbSeq-1], ML_bits[mlCodeTable[nbSeq-1]]); BIT_addBits(&blockStream, llTable[nbSeq-1], LL_bits[llCodeTable[nbSeq-1]]); + BIT_addBits(&blockStream, mlTable[nbSeq-1], ML_bits[mlCodeTable[nbSeq-1]]); + BIT_addBits(&blockStream, offsetTable[nbSeq-1], offCodeTable[nbSeq-1] ? (offCodeTable[nbSeq-1]-1) : 0); BIT_flushBits(&blockStream); { size_t n; @@ -758,9 +758,9 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, FSE_encodeSymbol(&blockStream, &stateOffsetBits, offCode); /* 25 */ /* 35 */ FSE_encodeSymbol(&blockStream, &stateMatchLength, MLCode); /* 17 */ /* 17 */ FSE_encodeSymbol(&blockStream, &stateLitLength, LLCode); /* 16 */ /* 26 */ - BIT_addBits(&blockStream, offset, nbBits); /* 31 */ /* 61 */ /* 24 bits max in 32-bits mode */ - BIT_addBits(&blockStream, mlTable[n], ML_bits[MLCode]); BIT_addBits(&blockStream, llTable[n], LL_bits[LLCode]); + BIT_addBits(&blockStream, mlTable[n], ML_bits[MLCode]); + BIT_addBits(&blockStream, offset, nbBits); /* 31 */ /* 61 */ /* 24 bits max in 32-bits mode */ BIT_flushBits(&blockStream); /* 7 */ /* 7 */ } } diff --git a/lib/zstd_decompress.c b/lib/zstd_decompress.c index 92af2b35..1de3da1e 100644 --- a/lib/zstd_decompress.c +++ b/lib/zstd_decompress.c @@ -635,6 +635,8 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState, const U32 mls) U32 const mlBits = ML_bits[mlCode]; U32 const ofBits = ofCode ? ofCode-1 : 0; + size_t allBits = BIT_readBits(&(seqState->DStream), llBits+mlBits+ofBits); + static const U32 LL_base[MaxLL+1] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 20, 22, 24, 28, 32, 40, 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, @@ -652,18 +654,15 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState, const U32 mls) 0x8000, 0x10000, 0x20000, 0x40000, 0x80000, 0x100000, 0x200000, 0x400000, 0x800000, 0x1000000, 0x2000000, 0x4000000, /*fake*/ 1, 1, 1, 1 }; - size_t allBits = BIT_readBits(&(seqState->DStream), llBits+mlBits+ofBits); - - /* Offset */ + /* sequence */ + seq->litLength = LL_base[llCode] + BIT_consumeFirstBits(&allBits, llBits); + seq->matchLength = ML_base[mlCode] + BIT_consumeFirstBits(&allBits, mlBits) + mls; { size_t const offset = ofCode ? OF_base[ofCode] + BIT_consumeFirstBits(&allBits, ofBits) : llCode ? seq->offset : seqState->prevOffset; if (ofCode | !llCode) seqState->prevOffset = seq->offset; /* cmove */ seq->offset = offset; } - seq->matchLength = ML_base[mlCode] + BIT_consumeFirstBits(&allBits, mlBits) + mls; - seq->litLength = LL_base[llCode] + BIT_consumeFirstBits(&allBits, llBits); - /* ANS state update */ FSE_updateState(&(seqState->stateLL), &(seqState->DStream)); FSE_updateState(&(seqState->stateML), &(seqState->DStream)); From 3c017867decd1dcedfc9d8699bb6daa89e4d1434 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 23 Mar 2016 14:09:51 +0100 Subject: [PATCH 147/247] minor optimization (DSpeed 665) --- lib/bitstream.h | 11 +++++++++++ lib/zstd_decompress.c | 8 ++++---- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/lib/bitstream.h b/lib/bitstream.h index af9151ad..f2ed51bc 100644 --- a/lib/bitstream.h +++ b/lib/bitstream.h @@ -285,6 +285,17 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si return srcSize; } +MEM_STATIC size_t BIT_getUpperBits(size_t bitD, U32 const start) +{ + return bitD >> start; +} + +MEM_STATIC size_t BIT_getNBits(size_t bitD, U32 const nbBits, U32 const start) +{ + static const unsigned mask[] = { 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF, 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF, 0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF }; /* up to 26 bits */ + return (bitD >> start) & mask[nbBits]; +} + MEM_STATIC size_t BIT_consumeFirstBits(size_t* bitDPtr, U32 const nbBits) { static const unsigned mask[] = { 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF, 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF, 0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF }; /* up to 26 bits */ diff --git a/lib/zstd_decompress.c b/lib/zstd_decompress.c index 1de3da1e..3abd6bbc 100644 --- a/lib/zstd_decompress.c +++ b/lib/zstd_decompress.c @@ -635,7 +635,7 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState, const U32 mls) U32 const mlBits = ML_bits[mlCode]; U32 const ofBits = ofCode ? ofCode-1 : 0; - size_t allBits = BIT_readBits(&(seqState->DStream), llBits+mlBits+ofBits); + size_t const allBits = BIT_readBits(&(seqState->DStream), llBits+mlBits+ofBits); static const U32 LL_base[MaxLL+1] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, @@ -655,9 +655,9 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState, const U32 mls) 0x800000, 0x1000000, 0x2000000, 0x4000000, /*fake*/ 1, 1, 1, 1 }; /* sequence */ - seq->litLength = LL_base[llCode] + BIT_consumeFirstBits(&allBits, llBits); - seq->matchLength = ML_base[mlCode] + BIT_consumeFirstBits(&allBits, mlBits) + mls; - { size_t const offset = ofCode ? OF_base[ofCode] + BIT_consumeFirstBits(&allBits, ofBits) : + seq->litLength = LL_base[llCode] + BIT_getNBits(allBits, llBits, 0); + seq->matchLength = ML_base[mlCode] + BIT_getNBits(allBits, mlBits, llBits) + mls; + { size_t const offset = ofCode ? OF_base[ofCode] + BIT_getUpperBits(allBits, llBits+mlBits) : llCode ? seq->offset : seqState->prevOffset; if (ofCode | !llCode) seqState->prevOffset = seq->offset; /* cmove */ seq->offset = offset; From 6cf45dac81a7e1c2168d6effdfab7bcc7995c1bb Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 23 Mar 2016 14:18:37 +0100 Subject: [PATCH 148/247] shared const BIT_mask --- lib/bitstream.h | 20 +++++++++----------- lib/zstd_decompress.c | 4 ++-- 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/lib/bitstream.h b/lib/bitstream.h index f2ed51bc..62d2cb2b 100644 --- a/lib/bitstream.h +++ b/lib/bitstream.h @@ -141,7 +141,7 @@ MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits); /*-************************************************************** -* Helper functions +* Internal functions ****************************************************************/ MEM_STATIC unsigned BIT_highbit32 (register U32 val) { @@ -165,6 +165,9 @@ MEM_STATIC unsigned BIT_highbit32 (register U32 val) # endif } +/*===== Local Constants =====*/ +static const unsigned BIT_mask[] = { 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF, 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF, 0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF }; /* up to 26 bits */ + /*-************************************************************** * bitStream encoding @@ -189,8 +192,7 @@ MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* startPtr, size_t ds Does not check for register overflow ! */ MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits) { - static const unsigned mask[] = { 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF, 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF, 0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF }; /* up to 26 bits */ - bitC->bitContainer |= (value & mask[nbBits]) << bitC->bitPos; + bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos; bitC->bitPos += nbBits; } @@ -290,18 +292,14 @@ MEM_STATIC size_t BIT_getUpperBits(size_t bitD, U32 const start) return bitD >> start; } -MEM_STATIC size_t BIT_getNBits(size_t bitD, U32 const nbBits, U32 const start) +MEM_STATIC size_t BIT_getMiddleBits(size_t bitD, U32 const nbBits, U32 const start) { - static const unsigned mask[] = { 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF, 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF, 0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF }; /* up to 26 bits */ - return (bitD >> start) & mask[nbBits]; + return (bitD >> start) & BIT_mask[nbBits]; } -MEM_STATIC size_t BIT_consumeFirstBits(size_t* bitDPtr, U32 const nbBits) +MEM_STATIC size_t BIT_getLowerBits(size_t bitD, U32 const nbBits) { - static const unsigned mask[] = { 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF, 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF, 0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF }; /* up to 26 bits */ - size_t const result = *bitDPtr & mask[nbBits]; - *bitDPtr >>= nbBits; - return result; + return bitD & BIT_mask[nbBits]; } /*! BIT_lookBits() : diff --git a/lib/zstd_decompress.c b/lib/zstd_decompress.c index 3abd6bbc..7749ffd3 100644 --- a/lib/zstd_decompress.c +++ b/lib/zstd_decompress.c @@ -655,8 +655,8 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState, const U32 mls) 0x800000, 0x1000000, 0x2000000, 0x4000000, /*fake*/ 1, 1, 1, 1 }; /* sequence */ - seq->litLength = LL_base[llCode] + BIT_getNBits(allBits, llBits, 0); - seq->matchLength = ML_base[mlCode] + BIT_getNBits(allBits, mlBits, llBits) + mls; + seq->litLength = LL_base[llCode] + BIT_getLowerBits(allBits, llBits); + seq->matchLength = ML_base[mlCode] + mls + BIT_getMiddleBits(allBits, mlBits, llBits); { size_t const offset = ofCode ? OF_base[ofCode] + BIT_getUpperBits(allBits, llBits+mlBits) : llCode ? seq->offset : seqState->prevOffset; if (ofCode | !llCode) seqState->prevOffset = seq->offset; /* cmove */ From 7adceef9748ddb1e4a6b956dbb35b6fc66ddb8c9 Mon Sep 17 00:00:00 2001 From: inikep Date: Wed, 23 Mar 2016 15:53:38 +0100 Subject: [PATCH 149/247] hashLog3 added to ZSTD_CCtx --- lib/zstd_compress.c | 12 +++++++++--- lib/zstd_decompress.c | 6 +++--- lib/zstd_internal.h | 5 +---- lib/zstd_opt.h | 5 +++-- lib/zstd_static.h | 1 + programs/bench.c | 8 ++++++++ 6 files changed, 25 insertions(+), 12 deletions(-) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index 28a7a1c3..2e62ec2c 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -96,6 +96,7 @@ struct ZSTD_CCtx_s U32 lowLimit; /* below that point, no more data */ U32 nextToUpdate; /* index from which to continue dictionary update */ U32 nextToUpdate3; /* index from which to continue dictionary update */ + U32 hashLog3; /* dispatch table : larger == faster, more memory */ U32 loadedDictEnd; U32 stage; ZSTD_parameters params; @@ -187,7 +188,7 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc, const size_t tokenSpace = blockSize + 8*maxNbSeq; const size_t contentSize = (params.strategy == ZSTD_fast) ? 0 : (1 << params.contentLog); const size_t hSize = 1 << params.hashLog; - const size_t h3Size = (params.searchLength==3) ? (1 << HASHLOG3) : 0; + const size_t h3Size = (zc->hashLog3) ? 1 << zc->hashLog3 : 0; const size_t tableSpace = (contentSize + hSize + h3Size) * sizeof(U32); /* Check if workSpace is large enough, alloc a new one if needed */ @@ -252,12 +253,13 @@ size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx) { if (srcCCtx->stage!=0) return ERROR(stage_wrong); + dstCCtx->hashLog3 = srcCCtx->hashLog3; /* must be before ZSTD_resetCCtx_advanced */ ZSTD_resetCCtx_advanced(dstCCtx, srcCCtx->params); /* copy tables */ { const size_t contentSize = (srcCCtx->params.strategy == ZSTD_fast) ? 0 : (1 << srcCCtx->params.contentLog); const size_t hSize = 1 << srcCCtx->params.hashLog; - const size_t h3Size = (srcCCtx->params.searchLength == 3) ? (1 << HASHLOG3) : 0; + const size_t h3Size = (srcCCtx->hashLog3) ? 1 << srcCCtx->hashLog3 : 0; const size_t tableSpace = (contentSize + hSize + h3Size) * sizeof(U32); memcpy(dstCCtx->workSpace, srcCCtx->workSpace, tableSpace); } @@ -310,7 +312,7 @@ static void ZSTD_reduceIndex (ZSTD_CCtx* zc, const U32 reducerValue) { const U32 contentSize = (zc->params.strategy == ZSTD_fast) ? 0 : (1 << zc->params.contentLog); ZSTD_reduceTable(zc->contentTable, contentSize, reducerValue); } - { const U32 h3Size = (zc->params.searchLength == 3) ? (1 << HASHLOG3) : 0; + { const U32 h3Size = (zc->hashLog3) ? 1 << zc->hashLog3 : 0; ZSTD_reduceTable(zc->hashTable3, h3Size, reducerValue); } } @@ -2185,7 +2187,11 @@ size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* zc, const void* dict, size_t dictSize, ZSTD_parameters params) { +// printf("windowLog=%d hashLog=%d\n", params.windowLog, params.hashLog); ZSTD_validateParams(¶ms); + zc->hashLog3 = (params.searchLength==3) ? ZSTD_HASHLOG3 : 0; +// if (zc->hashLog3 > params.windowLog) zc->hashLog3 = params.windowLog; +// printf("windowLog=%d hashLog=%d hashLog3=%d \n", params.windowLog, params.hashLog, zc->hashLog3); { size_t const errorCode = ZSTD_resetCCtx_advanced(zc, params); if (ZSTD_isError(errorCode)) return errorCode; } diff --git a/lib/zstd_decompress.c b/lib/zstd_decompress.c index c90b8855..ef99175e 100644 --- a/lib/zstd_decompress.c +++ b/lib/zstd_decompress.c @@ -149,7 +149,7 @@ size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx) dctx->hufTableX4[0] = HufLog; dctx->flagStaticTables = 0; dctx->fParams.mml = MINMATCH; /* overwritten by frame but forces ZSTD_btopt to MINMATCH in block mode */ - ZSTD_LOG_BLOCK("%p: ZSTD_decompressBegin searchLength=%d\n", dctx->base, dctx->params.searchLength); + ZSTD_LOG_BLOCK("%p: ZSTD_decompressBegin searchLength=%d\n", dctx->base, dctx->fParams.mml); return 0; } @@ -845,7 +845,7 @@ static size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, if (srcSize >= ZSTD_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); - ZSTD_LOG_BLOCK("%p: ZSTD_decompressBlock_internal searchLength=%d\n", dctx->base, dctx->params.searchLength); + ZSTD_LOG_BLOCK("%p: ZSTD_decompressBlock_internal searchLength=%d\n", dctx->base, dctx->fParams.mml); /* Decode literals sub-block */ litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize); @@ -953,7 +953,7 @@ size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) { ZSTD_decompressBegin_usingDict(dctx, dict, dictSize); - ZSTD_LOG_BLOCK("%p: ZSTD_decompressBegin_usingDict searchLength=%d\n", dctx->base, dctx->params.searchLength); + ZSTD_LOG_BLOCK("%p: ZSTD_decompressBegin_usingDict searchLength=%d\n", dctx->base, dctx->fParams.mml); ZSTD_checkContinuity(dctx, dst); return ZSTD_decompressFrame(dctx, dst, dstCapacity, src, srcSize); } diff --git a/lib/zstd_internal.h b/lib/zstd_internal.h index ba350c4f..3b68654e 100644 --- a/lib/zstd_internal.h +++ b/lib/zstd_internal.h @@ -51,9 +51,7 @@ * Common constants ***************************************/ #define ZSTD_OPT_DEBUG 0 // 1 = tableID=0; 3 = price func tests; 5 = check encoded sequences; 9 = full logs -#if defined(ZSTD_OPT_DEBUG) && ZSTD_OPT_DEBUG>0 - #include -#endif +#include #if defined(ZSTD_OPT_DEBUG) && ZSTD_OPT_DEBUG>=9 #define ZSTD_LOG_PARSER(...) printf(__VA_ARGS__) #define ZSTD_LOG_ENCODE(...) printf(__VA_ARGS__) @@ -99,7 +97,6 @@ typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t; #define MINMATCH 4 #define REPCODE_STARTVALUE 1 -#define HASHLOG3 17 #define Litbits 8 #define MLbits 7 diff --git a/lib/zstd_opt.h b/lib/zstd_opt.h index b8c9d67a..e20379c1 100644 --- a/lib/zstd_opt.h +++ b/lib/zstd_opt.h @@ -196,17 +196,18 @@ MEM_STATIC void ZSTD_updatePrice(seqStore_t* seqStorePtr, U32 litLength, const B static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_CCtx* zc, const BYTE* ip) { U32* const hashTable3 = zc->hashTable3; + U32 const hashLog3 = zc->hashLog3; const BYTE* const base = zc->base; const U32 target = (U32)(ip - base); U32 idx = zc->nextToUpdate3; while(idx < target) { - hashTable3[ZSTD_hash3Ptr(base+idx, HASHLOG3)] = idx; + hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx; idx++; } zc->nextToUpdate3 = target; - return hashTable3[ZSTD_hash3Ptr(ip, HASHLOG3)]; + return hashTable3[ZSTD_hash3Ptr(ip, hashLog3)]; } diff --git a/lib/zstd_static.h b/lib/zstd_static.h index 4ae771fd..d41bc841 100644 --- a/lib/zstd_static.h +++ b/lib/zstd_static.h @@ -63,6 +63,7 @@ extern "C" { #define ZSTD_CONTENTLOG_MIN 4 #define ZSTD_HASHLOG_MAX 28 #define ZSTD_HASHLOG_MIN 12 +#define ZSTD_HASHLOG3 17 #define ZSTD_SEARCHLOG_MAX (ZSTD_CONTENTLOG_MAX-1) #define ZSTD_SEARCHLOG_MIN 1 #define ZSTD_SEARCHLENGTH_MAX 7 diff --git a/programs/bench.c b/programs/bench.c index 84380400..d3e9c1f1 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -288,7 +288,15 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, for (nbLoops = 0 ; BMK_clockSpan(clockStart) < clockLoop ; nbLoops++) { U32 blockNb; +#if 0 ZSTD_compressBegin_usingDict(refCtx, dictBuffer, dictBufferSize, cLevel); +#else + ZSTD_parameters params = ZSTD_getParams(cLevel, dictBufferSize ? dictBufferSize : blockSize); + // printf("cLevel=%d dictBufferSize=%d srcSize=%d params.srcSize=%d \n", cLevel, (int)dictBufferSize, (int)blockTable[0].srcSize, (int)params.srcSize); + params.srcSize = 0; + ZSTD_compressBegin_advanced(refCtx, dictBuffer, dictBufferSize, params); +#endif + for (blockNb=0; blockNb Date: Wed, 23 Mar 2016 18:45:23 +0100 Subject: [PATCH 150/247] Added BMI instructions --- lib/bitstream.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/lib/bitstream.h b/lib/bitstream.h index 62d2cb2b..749dc02f 100644 --- a/lib/bitstream.h +++ b/lib/bitstream.h @@ -292,9 +292,14 @@ MEM_STATIC size_t BIT_getUpperBits(size_t bitD, U32 const start) return bitD >> start; } +#include MEM_STATIC size_t BIT_getMiddleBits(size_t bitD, U32 const nbBits, U32 const start) { +#if defined(__BMI__) && defined(__GNUC__) + return __builtin_ia32_bextr_u64(bitD, (nbBits<<8) | start ); +#else return (bitD >> start) & BIT_mask[nbBits]; +#endif } MEM_STATIC size_t BIT_getLowerBits(size_t bitD, U32 const nbBits) @@ -309,10 +314,15 @@ MEM_STATIC size_t BIT_getLowerBits(size_t bitD, U32 const nbBits) * On 64-bits, maxNbBits==56. * @return : value extracted */ -MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits) + MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits) { +#if defined(__BMI__) && defined(__GNUC__) + return __builtin_ia32_bextr_u64(bitD->bitContainer, (nbBits<<8) | (64 - bitD->bitsConsumed - nbBits) ); +#else U32 const bitMask = sizeof(bitD->bitContainer)*8 - 1; return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask); + //return (bitD->bitContainer >> (64 - bitD->bitsConsumed - nbBits)) & BIT_mask[nbBits]; +#endif } /*! BIT_lookBitsFast() : From eaba91a161b44f7b85c54469d25dd2f82f67fb17 Mon Sep 17 00:00:00 2001 From: inikep Date: Wed, 23 Mar 2016 20:30:26 +0100 Subject: [PATCH 151/247] added ZSTD_compressBegin_targetSrcSize --- lib/zstd_compress.c | 21 ++++++++++++++++++--- lib/zstd_internal.h | 2 +- lib/zstd_static.h | 3 ++- programs/bench.c | 38 +++++++++++++++++--------------------- programs/bench.h | 3 ++- programs/zstdcli.c | 14 +++++++------- 6 files changed, 47 insertions(+), 34 deletions(-) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index 2e62ec2c..43e6e663 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -97,6 +97,7 @@ struct ZSTD_CCtx_s U32 nextToUpdate; /* index from which to continue dictionary update */ U32 nextToUpdate3; /* index from which to continue dictionary update */ U32 hashLog3; /* dispatch table : larger == faster, more memory */ + U32 targetSrcSize; /* optimize compression for this source size */ U32 loadedDictEnd; U32 stage; ZSTD_parameters params; @@ -240,6 +241,7 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc, zc->hbSize = 0; zc->stage = 0; zc->loadedDictEnd = 0; + zc->targetSrcSize = 0; return 0; } @@ -277,6 +279,7 @@ size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx) dstCCtx->dictLimit = srcCCtx->dictLimit; dstCCtx->lowLimit = srcCCtx->lowLimit; dstCCtx->loadedDictEnd= srcCCtx->loadedDictEnd; + dstCCtx->targetSrcSize= srcCCtx->targetSrcSize; /* copy entropy tables */ dstCCtx->flagStaticTables = srcCCtx->flagStaticTables; @@ -2180,6 +2183,7 @@ static size_t ZSTD_compress_insertDictionary(ZSTD_CCtx* zc, const void* dict, si } } +extern int g_additionalParam; /*! ZSTD_compressBegin_advanced() : * @return : 0, or an error code */ @@ -2187,10 +2191,10 @@ size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* zc, const void* dict, size_t dictSize, ZSTD_parameters params) { -// printf("windowLog=%d hashLog=%d\n", params.windowLog, params.hashLog); +// printf("windowLog=%d hashLog=%d targetSrcSize=%d\n", params.windowLog, params.hashLog, zc->targetSrcSize); ZSTD_validateParams(¶ms); - zc->hashLog3 = (params.searchLength==3) ? ZSTD_HASHLOG3 : 0; -// if (zc->hashLog3 > params.windowLog) zc->hashLog3 = params.windowLog; + U32 hashLog3 = (!zc->targetSrcSize || zc->targetSrcSize >= 8192) ? ZSTD_HASHLOG3_MAX : ((zc->targetSrcSize >= 2048) ? ZSTD_HASHLOG3_MIN + 1 : ZSTD_HASHLOG3_MIN); + zc->hashLog3 = (params.searchLength==3) ? hashLog3 : 0; // printf("windowLog=%d hashLog=%d hashLog3=%d \n", params.windowLog, params.hashLog, zc->hashLog3); { size_t const errorCode = ZSTD_resetCCtx_advanced(zc, params); @@ -2220,6 +2224,15 @@ size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* zc, } +size_t ZSTD_compressBegin_targetSrcSize(ZSTD_CCtx* zc, const void* dict, size_t dictSize, size_t targetSrcSize, int compressionLevel) +{ + zc->targetSrcSize = dictSize ? dictSize : targetSrcSize; + ZSTD_parameters params = ZSTD_getParams(compressionLevel, zc->targetSrcSize); + params.srcSize = 0; + ZSTD_LOG_BLOCK("%p: ZSTD_compressBegin_targetSrcSize compressionLevel=%d\n", zc->base, compressionLevel); + return ZSTD_compressBegin_advanced(zc, dict, dictSize, params); +} + size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* zc, const void* dict, size_t dictSize, int compressionLevel) { ZSTD_parameters params = ZSTD_getParams(compressionLevel, dictSize); @@ -2309,12 +2322,14 @@ size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx, size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, const void* dict, size_t dictSize, int compressionLevel) { ZSTD_LOG_BLOCK("%p: ZSTD_compress_usingDict srcSize=%d dictSize=%d compressionLevel=%d\n", ctx->base, (int)srcSize, (int)dictSize, compressionLevel); + ctx->targetSrcSize = srcSize; return ZSTD_compress_advanced(ctx, dst, dstCapacity, src, srcSize, dict, dictSize, ZSTD_getParams(compressionLevel, srcSize)); } size_t ZSTD_compressCCtx (ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel) { ZSTD_LOG_BLOCK("%p: ZSTD_compressCCtx srcSize=%d compressionLevel=%d\n", ctx->base, (int)srcSize, compressionLevel); + ctx->targetSrcSize = srcSize; return ZSTD_compress_advanced(ctx, dst, dstCapacity, src, srcSize, NULL, 0, ZSTD_getParams(compressionLevel, srcSize)); } diff --git a/lib/zstd_internal.h b/lib/zstd_internal.h index 3b68654e..1358eb8a 100644 --- a/lib/zstd_internal.h +++ b/lib/zstd_internal.h @@ -218,6 +218,6 @@ typedef struct { } seqStore_t; seqStore_t ZSTD_copySeqStore(const ZSTD_CCtx* ctx); - +size_t ZSTD_compressBegin_targetSrcSize(ZSTD_CCtx* zc, const void* dict, size_t dictSize, size_t targetSrcSize, int compressionLevel); #endif /* ZSTD_CCOMMON_H_MODULE */ diff --git a/lib/zstd_static.h b/lib/zstd_static.h index d41bc841..d88fbce8 100644 --- a/lib/zstd_static.h +++ b/lib/zstd_static.h @@ -63,7 +63,8 @@ extern "C" { #define ZSTD_CONTENTLOG_MIN 4 #define ZSTD_HASHLOG_MAX 28 #define ZSTD_HASHLOG_MIN 12 -#define ZSTD_HASHLOG3 17 +#define ZSTD_HASHLOG3_MAX 17 +#define ZSTD_HASHLOG3_MIN 15 #define ZSTD_SEARCHLOG_MAX (ZSTD_CONTENTLOG_MAX-1) #define ZSTD_SEARCHLOG_MIN 1 #define ZSTD_SEARCHLENGTH_MAX 7 diff --git a/programs/bench.c b/programs/bench.c index d3e9c1f1..1468bcb2 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -72,8 +72,9 @@ #include "mem.h" #include "zstd_static.h" +#include "zstd_internal.h" /* ZSTD_compressBegin_targetSrcSize */ +#include "datagen.h" /* RDG_genBuffer */ #include "xxhash.h" -#include "datagen.h" /* RDG_genBuffer */ /* ************************************* @@ -139,9 +140,12 @@ static U32 g_displayLevel = 2; /* 0 : no display; 1: errors; 2 : + result ***************************************/ static U32 g_nbIterations = NBLOOPS; static size_t g_blockSize = 0; +int g_additionalParam = 0; void BMK_setNotificationLevel(unsigned level) { g_displayLevel=level; } +void BMK_setAdditionalParam(int additionalParam) { g_additionalParam=additionalParam; } + void BMK_SetNbIterations(unsigned nbLoops) { g_nbIterations = nbLoops; @@ -288,15 +292,7 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, for (nbLoops = 0 ; BMK_clockSpan(clockStart) < clockLoop ; nbLoops++) { U32 blockNb; -#if 0 - ZSTD_compressBegin_usingDict(refCtx, dictBuffer, dictBufferSize, cLevel); -#else - ZSTD_parameters params = ZSTD_getParams(cLevel, dictBufferSize ? dictBufferSize : blockSize); - // printf("cLevel=%d dictBufferSize=%d srcSize=%d params.srcSize=%d \n", cLevel, (int)dictBufferSize, (int)blockTable[0].srcSize, (int)params.srcSize); - params.srcSize = 0; - ZSTD_compressBegin_advanced(refCtx, dictBuffer, dictBufferSize, params); -#endif - + ZSTD_compressBegin_targetSrcSize(refCtx, dictBuffer, dictBufferSize, blockSize, cLevel); for (blockNb=0; blockNb>10)); if (cLevelLast < cLevel) cLevelLast = cLevel; @@ -439,8 +435,8 @@ static void BMK_benchCLevel(void* srcBuffer, size_t benchedSize, fileSizes, nbFiles, dictBuffer, dictBufferSize, &result); if (g_displayLevel == 1) { - if (additionalParam) - DISPLAY("%-3i%11i (%5.3f) %6.1f MB/s %6.1f MB/s %s (p=%d)\n", -l, (int)result.cSize, result.ratio, result.cSpeed, result.dSpeed, displayName, additionalParam); + if (g_additionalParam) + DISPLAY("%-3i%11i (%5.3f) %6.1f MB/s %6.1f MB/s %s (param=%d)\n", -l, (int)result.cSize, result.ratio, result.cSpeed, result.dSpeed, displayName, g_additionalParam); else DISPLAY("%-3i%11i (%5.3f) %6.1f MB/s %6.1f MB/s %s\n", -l, (int)result.cSize, result.ratio, result.cSpeed, result.dSpeed, displayName); total.cSize += result.cSize; @@ -491,7 +487,7 @@ static void BMK_loadFiles(void* buffer, size_t bufferSize, } static void BMK_benchFileTable(const char** fileNamesTable, unsigned nbFiles, - const char* dictFileName, int cLevel, int cLevelLast, int additionalParam) + const char* dictFileName, int cLevel, int cLevelLast) { void* srcBuffer; size_t benchedSize; @@ -531,7 +527,7 @@ static void BMK_benchFileTable(const char** fileNamesTable, unsigned nbFiles, else displayName = fileNamesTable[0]; BMK_benchCLevel(srcBuffer, benchedSize, - displayName, cLevel, cLevelLast, additionalParam, + displayName, cLevel, cLevelLast, fileSizes, nbFiles, dictBuffer, dictBufferSize); @@ -542,7 +538,7 @@ static void BMK_benchFileTable(const char** fileNamesTable, unsigned nbFiles, } -static void BMK_syntheticTest(int cLevel, int cLevelLast, int additionalParam, double compressibility) +static void BMK_syntheticTest(int cLevel, int cLevelLast, double compressibility) { char name[20] = {0}; size_t benchedSize = 10000000; @@ -556,7 +552,7 @@ static void BMK_syntheticTest(int cLevel, int cLevelLast, int additionalParam, d /* Bench */ snprintf (name, sizeof(name), "Synthetic %2u%%", (unsigned)(compressibility*100)); - BMK_benchCLevel(srcBuffer, benchedSize, name, cLevel, cLevelLast, additionalParam, &benchedSize, 1, NULL, 0); + BMK_benchCLevel(srcBuffer, benchedSize, name, cLevel, cLevelLast, &benchedSize, 1, NULL, 0); /* clean up */ free(srcBuffer); @@ -564,14 +560,14 @@ static void BMK_syntheticTest(int cLevel, int cLevelLast, int additionalParam, d int BMK_benchFiles(const char** fileNamesTable, unsigned nbFiles, - const char* dictFileName, int cLevel, int cLevelLast, int additionalParam) + const char* dictFileName, int cLevel, int cLevelLast) { double const compressibility = (double)g_compressibilityDefault / 100; if (nbFiles == 0) - BMK_syntheticTest(cLevel, cLevelLast, additionalParam, compressibility); + BMK_syntheticTest(cLevel, cLevelLast, compressibility); else - BMK_benchFileTable(fileNamesTable, nbFiles, dictFileName, cLevel, cLevelLast, additionalParam); + BMK_benchFileTable(fileNamesTable, nbFiles, dictFileName, cLevel, cLevelLast); return 0; } diff --git a/programs/bench.h b/programs/bench.h index bc5ffa42..3a1ca3a2 100644 --- a/programs/bench.h +++ b/programs/bench.h @@ -27,10 +27,11 @@ /* Main function */ int BMK_benchFiles(const char** fileNamesTable, unsigned nbFiles, - const char* dictFileName, int cLevel, int cLevelLast, int additionalParam); + const char* dictFileName, int cLevel, int cLevelLast); /* Set Parameters */ void BMK_SetNbIterations(unsigned nbLoops); void BMK_SetBlockSize(size_t blockSize); +void BMK_setAdditionalParam(int additionalParam); void BMK_setNotificationLevel(unsigned level); diff --git a/programs/zstdcli.c b/programs/zstdcli.c index a14fe26d..0c060bdb 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -183,7 +183,6 @@ int main(int argCount, const char** argv) nextArgumentIsMaxDict=0; unsigned cLevel = 1; unsigned cLevelLast = 1; - int additionalParam = 0; const char** filenameTable = (const char**)malloc(argCount * sizeof(const char*)); /* argCount >= 1 */ unsigned filenameIdx = 0; const char* programName = argv[0]; @@ -195,7 +194,7 @@ int main(int argCount, const char** argv) unsigned dictSelect = g_defaultSelectivityLevel; /* init */ - (void)additionalParam; (void)cLevelLast; (void)dictCLevel; /* not used when ZSTD_NOBENCH / ZSTD_NODICT set */ + (void)cLevelLast; (void)dictCLevel; /* not used when ZSTD_NOBENCH / ZSTD_NODICT set */ if (filenameTable==NULL) { DISPLAY("not enough memory\n"); exit(1); } displayOut = stderr; /* Pick out program name from path. Don't rely on stdlib because of conflicting behavior */ @@ -321,7 +320,6 @@ int main(int argCount, const char** argv) cLevelLast = 0; while ((*argument >= '0') && (*argument <= '9')) cLevelLast *= 10, cLevelLast += *argument++ - '0'; - continue; } break; #endif /* ZSTD_NOBENCH */ @@ -336,12 +334,14 @@ int main(int argCount, const char** argv) /* Pause at the end (-p) or set an additional param (-p#) (hidden option) */ case 'p': argument++; if ((*argument>='0') && (*argument<='9')) { - additionalParam = 0; + int additionalParam = 0; while ((*argument >= '0') && (*argument <= '9')) additionalParam *= 10, additionalParam += *argument++ - '0'; - continue; + BMK_setAdditionalParam(additionalParam); + } else { + main_pause=1; } - main_pause=1; break; + break; /* unknown command */ default : CLEAN_RETURN(badusage(programName)); } @@ -383,7 +383,7 @@ int main(int argCount, const char** argv) if (bench) { #ifndef ZSTD_NOBENCH BMK_setNotificationLevel(displayLevel); - BMK_benchFiles(filenameTable, filenameIdx, dictFileName, cLevel, cLevelLast, additionalParam); + BMK_benchFiles(filenameTable, filenameIdx, dictFileName, cLevel, cLevelLast); #endif goto _end; } From 72d706a020a3d3a1ed1ad214783e732ccad00ed9 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 23 Mar 2016 20:44:12 +0100 Subject: [PATCH 152/247] fixed crash at -O3 with customized block size (wrong alignment) --- lib/zstd_compress.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index e2d42faf..121a9fcd 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -219,6 +219,16 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc, zc->params = params; zc->blockSize = blockSize; + if (params.strategy == ZSTD_btopt) { + zc->seqStore.litFreq = (U32*)(zc->seqStore.buffer); + zc->seqStore.litLengthFreq = zc->seqStore.litFreq + (1<seqStore.matchLengthFreq = zc->seqStore.litLengthFreq + (MaxLL+1); + zc->seqStore.offCodeFreq = zc->seqStore.matchLengthFreq + (MaxML+1); + zc->seqStore.matchTable = (ZSTD_match_t*)((void*)(zc->seqStore.offCodeFreq + (1<seqStore.priceTable = (ZSTD_optimal_t*)((void*)(zc->seqStore.matchTable + ZSTD_OPT_NUM+1)); + zc->seqStore.buffer = zc->seqStore.priceTable + ZSTD_OPT_NUM+1; + zc->seqStore.litLengthSum = 0; + } zc->seqStore.offsetStart = (U32*) (zc->seqStore.buffer); zc->seqStore.litLengthStart = (U16*) (void*)(zc->seqStore.offsetStart + maxNbSeq); zc->seqStore.matchLengthStart = (U16*) (void*)(zc->seqStore.litLengthStart + maxNbSeq); @@ -226,15 +236,6 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc, zc->seqStore.mlCodeStart = zc->seqStore.llCodeStart + maxNbSeq; zc->seqStore.offCodeStart = zc->seqStore.mlCodeStart + maxNbSeq; zc->seqStore.litStart = zc->seqStore.offCodeStart + maxNbSeq; - if (params.strategy == ZSTD_btopt) { - zc->seqStore.litFreq = (U32*)((void*)(zc->seqStore.litStart + blockSize)); - zc->seqStore.litLengthFreq = zc->seqStore.litFreq + (1<seqStore.matchLengthFreq = zc->seqStore.litLengthFreq + (MaxLL+1); - zc->seqStore.offCodeFreq = zc->seqStore.matchLengthFreq + (MaxML+1); - zc->seqStore.matchTable = (ZSTD_match_t*)((void*)(zc->seqStore.offCodeFreq + (1<seqStore.priceTable = (ZSTD_optimal_t*)((void*)(zc->seqStore.matchTable + ZSTD_OPT_NUM+1)); - zc->seqStore.litLengthSum = 0; - } zc->hbSize = 0; zc->stage = 0; @@ -632,7 +633,7 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, for (u=0; ulongLength; llTable[u] = (U16)ll; } - llCodeTable[u] = (ll>63) ? ZSTD_highbit(ll) + LL_deltaCode : LL_Code[ll]; + llCodeTable[u] = (ll>63) ? (BYTE)ZSTD_highbit(ll) + LL_deltaCode : LL_Code[ll]; } } /* CTable for Literal Lengths */ @@ -699,14 +700,14 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, for (u=0; ulongLength; mlTable[u] = (U16)ml; } - mlCodeTable[u] = (ml>127) ? ZSTD_highbit(ml) + ML_deltaCode : ML_Code[ml]; + mlCodeTable[u] = (ml>127) ? (BYTE)ZSTD_highbit(ml) + ML_deltaCode : ML_Code[ml]; } } /* CTable for MatchLengths */ { U32 max = MaxML; size_t const mostFrequent = FSE_countFast(count, &max, mlCodeTable, nbSeq); if ((mostFrequent == nbSeq) && (nbSeq > 2)) { - *op++ = *mlTable; + *op++ = *mlCodeTable; FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max); MLtype = FSE_ENCODING_RLE; } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { From 7cbe79ab25cb3381fb2e249de8d6422d8b62565f Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 23 Mar 2016 22:31:57 +0100 Subject: [PATCH 153/247] fixed write overflow found by fuzzer --- lib/zstd_compress.c | 67 +++++++++++++++++++++++---------------------- programs/fuzzer.c | 4 +-- 2 files changed, 37 insertions(+), 34 deletions(-) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index 121a9fcd..8d2eff0b 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -589,7 +589,7 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, U16* const mlTable = seqStorePtr->matchLengthStart; const U32* const offsetTable = seqStorePtr->offsetStart; const U32* const offsetTableEnd = seqStorePtr->offset; - BYTE* const offCodeTable = seqStorePtr->offCodeStart; + BYTE* const ofCodeTable = seqStorePtr->offCodeStart; BYTE* const llCodeTable = seqStorePtr->llCodeStart; BYTE* const mlCodeTable = seqStorePtr->mlCodeStart; BYTE* const ostart = (BYTE*)dst; @@ -607,7 +607,7 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, } /* Sequences Header */ - if ((oend-op) < MIN_SEQUENCES_SIZE) return ERROR(dstSize_tooSmall); + if ((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead */) return ERROR(dstSize_tooSmall); if (nbSeq < 0x7F) *op++ = (BYTE)nbSeq; else if (nbSeq < LONGNBSEQ) op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2; else op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3; @@ -661,12 +661,12 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, } } /* Offset codes */ - { size_t i; for (i=0; i 2)) { - *op++ = offCodeTable[0]; + *op++ = ofCodeTable[0]; FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max); Offtype = FSE_ENCODING_RLE; } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { @@ -677,7 +677,7 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, } else { size_t nbSeq_1 = nbSeq; const U32 tableLog = FSE_optimalTableLog(OffFSELog, nbSeq, max); - if (count[offCodeTable[nbSeq-1]]>1) { count[offCodeTable[nbSeq-1]]--; nbSeq_1--; } + if (count[ofCodeTable[nbSeq-1]]>1) { count[ofCodeTable[nbSeq-1]]--; nbSeq_1--; } FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max); { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ if (FSE_isError(NCountSize)) return ERROR(GENERIC); @@ -741,27 +741,30 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, /* first symbols */ FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]); - FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, offCodeTable[nbSeq-1]); + FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, ofCodeTable[nbSeq-1]); FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]); BIT_addBits(&blockStream, llTable[nbSeq-1], LL_bits[llCodeTable[nbSeq-1]]); BIT_addBits(&blockStream, mlTable[nbSeq-1], ML_bits[mlCodeTable[nbSeq-1]]); - BIT_addBits(&blockStream, offsetTable[nbSeq-1], offCodeTable[nbSeq-1] ? (offCodeTable[nbSeq-1]-1) : 0); + BIT_addBits(&blockStream, offsetTable[nbSeq-1], ofCodeTable[nbSeq-1] ? (ofCodeTable[nbSeq-1]-1) : 0); BIT_flushBits(&blockStream); { size_t n; for (n=nbSeq-2 ; n 64 - 7 - 27) + BIT_flushBits(&blockStream); + BIT_addBits(&blockStream, llTable[n], llBits); + BIT_addBits(&blockStream, mlTable[n], mlBits); + BIT_addBits(&blockStream, offsetTable[n], ofBits); /* 31 */ /* 61 */ /* 24 bits max in 32-bits mode */ BIT_flushBits(&blockStream); /* 7 */ /* 7 */ } } @@ -1997,7 +2000,7 @@ static size_t ZSTD_compress_generic (ZSTD_CCtx* zc, static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* zc, - void* dst, size_t dstSize, + void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 frame) { @@ -2006,10 +2009,10 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* zc, if (frame && (zc->stage==0)) { hbSize = zc->hbSize; - if (dstSize <= hbSize) return ERROR(dstSize_tooSmall); + if (dstCapacity <= hbSize) return ERROR(dstSize_tooSmall); zc->stage = 1; memcpy(dst, zc->headerBuffer, hbSize); - dstSize -= hbSize; + dstCapacity -= hbSize; dst = (char*)dst + hbSize; } @@ -2048,8 +2051,8 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* zc, zc->nextSrc = ip + srcSize; { size_t const cSize = frame ? - ZSTD_compress_generic (zc, dst, dstSize, src, srcSize) : - ZSTD_compressBlock_internal (zc, dst, dstSize, src, srcSize); + ZSTD_compress_generic (zc, dst, dstCapacity, src, srcSize) : + ZSTD_compressBlock_internal (zc, dst, dstCapacity, src, srcSize); if (ZSTD_isError(cSize)) return cSize; return cSize + hbSize; } @@ -2057,10 +2060,10 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* zc, size_t ZSTD_compressContinue (ZSTD_CCtx* zc, - void* dst, size_t dstSize, + void* dst, size_t dstCapacity, const void* src, size_t srcSize) { - return ZSTD_compressContinue_internal(zc, dst, dstSize, src, srcSize, 1); + return ZSTD_compressContinue_internal(zc, dst, dstCapacity, src, srcSize, 1); } @@ -2283,18 +2286,18 @@ size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx, /* Init */ { size_t const errorCode = ZSTD_compressBegin_advanced(ctx, dict, dictSize, params); - if(ZSTD_isError(errorCode)) return errorCode; } + if(ZSTD_isError(errorCode)) return errorCode; } /* body (compression) */ { size_t const oSize = ZSTD_compressContinue (ctx, op, dstCapacity, src, srcSize); - if(ZSTD_isError(oSize)) return oSize; - op += oSize; - dstCapacity -= oSize; } + if(ZSTD_isError(oSize)) return oSize; + op += oSize; + dstCapacity -= oSize; } /* Close frame */ { size_t const oSize = ZSTD_compressEnd(ctx, op, dstCapacity); - if(ZSTD_isError(oSize)) return oSize; - op += oSize; } + if(ZSTD_isError(oSize)) return oSize; + op += oSize; } return (op - ostart); } diff --git a/programs/fuzzer.c b/programs/fuzzer.c index d53586c1..29bf4861 100644 --- a/programs/fuzzer.c +++ b/programs/fuzzer.c @@ -488,12 +488,12 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, U32 maxDuration, doub if (cSize > 3) { const size_t missing = (FUZ_rand(&lseed) % (cSize-2)) + 1; /* no problem, as cSize > 4 (frameHeaderSizer) */ const size_t tooSmallSize = cSize - missing; - static const U32 endMark = 0x4DC2B1A9; + const U32 endMark = 0x4DC2B1A9; memcpy(dstBuffer+tooSmallSize, &endMark, 4); errorCode = ZSTD_compressCCtx(ctx, dstBuffer, tooSmallSize, sampleBuffer, sampleSize, cLevel); CHECK(!ZSTD_isError(errorCode), "ZSTD_compressCCtx should have failed ! (buffer too small : %u < %u)", (U32)tooSmallSize, (U32)cSize); { U32 endCheck; memcpy(&endCheck, dstBuffer+tooSmallSize, 4); - CHECK(endCheck != endMark, "ZSTD_compressCCtx : dst buffer overflow"); } + CHECK(endCheck != endMark, "ZSTD_compressCCtx : dst buffer overflow"); } } /* frame header decompression test */ From b21ce15efe3175564ddda8bbdf287fd73474645c Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 24 Mar 2016 01:27:55 +0100 Subject: [PATCH 154/247] minor variables isolation --- lib/bitstream.h | 25 +++++------- lib/zstd_decompress.c | 95 ++++++++++++++++++++----------------------- 2 files changed, 56 insertions(+), 64 deletions(-) diff --git a/lib/bitstream.h b/lib/bitstream.h index 749dc02f..40400680 100644 --- a/lib/bitstream.h +++ b/lib/bitstream.h @@ -122,7 +122,7 @@ MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD); * Local register is explicitly reloaded from memory by the BIT_reloadDStream() method. * A reload guarantee a minimum of ((8*sizeof(size_t))-7) bits when its result is BIT_DStream_unfinished. * Otherwise, it can be less than that, so proceed accordingly. -* Checking if DStream has reached its end can be performed with BIT_endOfDStream() +* Checking if DStream has reached its end can be performed with BIT_endOfDStream(). */ @@ -256,15 +256,13 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); } if (srcSize >= sizeof(size_t)) { /* normal case */ - U32 contain32; bitD->start = (const char*)srcBuffer; bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(size_t); bitD->bitContainer = MEM_readLEST(bitD->ptr); - contain32 = ((const BYTE*)srcBuffer)[srcSize-1]; - if (contain32 == 0) return ERROR(GENERIC); /* endMark not present */ - bitD->bitsConsumed = 8 - BIT_highbit32(contain32); + { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1]; + if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ + bitD->bitsConsumed = 8 - BIT_highbit32(lastByte); } } else { - U32 contain32; bitD->start = (const char*)srcBuffer; bitD->ptr = bitD->start; bitD->bitContainer = *(const BYTE*)(bitD->start); @@ -278,9 +276,9 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si case 2: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[1]) << 8; default:; } - contain32 = ((const BYTE*)srcBuffer)[srcSize-1]; - if (contain32 == 0) return ERROR(GENERIC); /* endMark not present */ - bitD->bitsConsumed = 8 - BIT_highbit32(contain32); + { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1]; + if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ + bitD->bitsConsumed = 8 - BIT_highbit32(lastByte); } bitD->bitsConsumed += (U32)(sizeof(size_t) - srcSize)*8; } @@ -295,7 +293,7 @@ MEM_STATIC size_t BIT_getUpperBits(size_t bitD, U32 const start) #include MEM_STATIC size_t BIT_getMiddleBits(size_t bitD, U32 const nbBits, U32 const start) { -#if defined(__BMI__) && defined(__GNUC__) +#if defined(__BMI__) && defined(__GNUC__) /* experimental */ return __builtin_ia32_bextr_u64(bitD, (nbBits<<8) | start ); #else return (bitD >> start) & BIT_mask[nbBits]; @@ -316,12 +314,11 @@ MEM_STATIC size_t BIT_getLowerBits(size_t bitD, U32 const nbBits) */ MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits) { -#if defined(__BMI__) && defined(__GNUC__) +#if defined(__BMI__) && defined(__GNUC__) /* experimental */ return __builtin_ia32_bextr_u64(bitD->bitContainer, (nbBits<<8) | (64 - bitD->bitsConsumed - nbBits) ); #else U32 const bitMask = sizeof(bitD->bitContainer)*8 - 1; return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask); - //return (bitD->bitContainer >> (64 - bitD->bitsConsumed - nbBits)) & BIT_mask[nbBits]; #endif } @@ -339,8 +336,8 @@ MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits) } /*! BIT_readBits() : - * Read next n bits from local register. - * pay attention to not read more than nbBits contained into local register. + * Read (consume) next n bits from local register and update. + * Pay attention to not read more than nbBits contained into local register. * @return : extracted value. */ MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, U32 nbBits) diff --git a/lib/zstd_decompress.c b/lib/zstd_decompress.c index 7749ffd3..b4a2184e 100644 --- a/lib/zstd_decompress.c +++ b/lib/zstd_decompress.c @@ -560,50 +560,51 @@ FORCE_INLINE size_t ZSTD_buildSeqTable(FSE_DTable* DTable, U32 type, U32 max, U3 } -size_t ZSTD_decodeSeqHeaders(int* nbSeq, +size_t ZSTD_decodeSeqHeaders(int* nbSeqPtr, FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb, const void* src, size_t srcSize) { const BYTE* const istart = (const BYTE* const)src; - const BYTE* ip = istart; const BYTE* const iend = istart + srcSize; - U32 LLtype, Offtype, MLtype; + const BYTE* ip = istart; /* check */ if (srcSize < MIN_SEQUENCES_SIZE) return ERROR(srcSize_wrong); /* SeqHead */ - *nbSeq = *ip++; - if (*nbSeq==0) return 1; - if (*nbSeq >= 0x7F) { - if (*nbSeq == 0xFF) - *nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2; - else - *nbSeq = ((nbSeq[0]-0x80)<<8) + *ip++; + { int nbSeq = *ip++; + if (!nbSeq) { *nbSeqPtr=0; return 1; } + if (nbSeq >= 0x7F) { + if (nbSeq == 0xFF) + nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2; + else + nbSeq = ((nbSeq-0x80)<<8) + *ip++; + } + *nbSeqPtr = nbSeq; } /* FSE table descriptors */ - LLtype = *ip >> 6; - Offtype = (*ip >> 4) & 3; - MLtype = (*ip >> 2) & 3; - ip++; + { U32 const LLtype = *ip >> 6; + U32 const Offtype = (*ip >> 4) & 3; + U32 const MLtype = (*ip >> 2) & 3; + ip++; - /* check */ - if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */ + /* check */ + if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */ - /* Build DTables */ - { size_t const bhSize = ZSTD_buildSeqTable(DTableLL, LLtype, MaxLL, LLFSELog, ip, iend-ip, LL_defaultNorm, LL_defaultNormLog); - if (ZSTD_isError(bhSize)) return ERROR(corruption_detected); - ip += bhSize; - } - { size_t const bhSize = ZSTD_buildSeqTableOff(DTableOffb, Offtype, Offbits, OffFSELog, ip, iend-ip); - if (ZSTD_isError(bhSize)) return ERROR(corruption_detected); - ip += bhSize; - } - { size_t const bhSize = ZSTD_buildSeqTable(DTableML, MLtype, MaxML, MLFSELog, ip, iend-ip, ML_defaultNorm, ML_defaultNormLog); - if (ZSTD_isError(bhSize)) return ERROR(corruption_detected); - ip += bhSize; - } + /* Build DTables */ + { size_t const bhSize = ZSTD_buildSeqTable(DTableLL, LLtype, MaxLL, LLFSELog, ip, iend-ip, LL_defaultNorm, LL_defaultNormLog); + if (ZSTD_isError(bhSize)) return ERROR(corruption_detected); + ip += bhSize; + } + { size_t const bhSize = ZSTD_buildSeqTableOff(DTableOffb, Offtype, Offbits, OffFSELog, ip, iend-ip); + if (ZSTD_isError(bhSize)) return ERROR(corruption_detected); + ip += bhSize; + } + { size_t const bhSize = ZSTD_buildSeqTable(DTableML, MLtype, MaxML, MLFSELog, ip, iend-ip, ML_defaultNorm, ML_defaultNormLog); + if (ZSTD_isError(bhSize)) return ERROR(corruption_detected); + ip += bhSize; + } } return ip-istart; } @@ -675,10 +676,8 @@ FORCE_INLINE size_t ZSTD_execSequence(BYTE* op, const BYTE** litPtr, const BYTE* const litLimit_8, const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd) { - static const int dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */ - static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* substracted */ BYTE* const oLitEnd = op + sequence.litLength; - const size_t sequenceLength = sequence.litLength + sequence.matchLength; + size_t const sequenceLength = sequence.litLength + sequence.matchLength; BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */ BYTE* const oend_8 = oend-8; const BYTE* const litEnd = *litPtr + sequence.litLength; @@ -687,7 +686,7 @@ FORCE_INLINE size_t ZSTD_execSequence(BYTE* op, /* check */ if (oLitEnd > oend_8) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of 8 from oend */ if (oMatchEnd > oend) return ERROR(dstSize_tooSmall); /* overwrite beyond dst buffer */ - if (litEnd > litLimit_8) return ERROR(corruption_detected); /* risk read beyond lit buffer */ + if (litEnd > litLimit_8) return ERROR(corruption_detected); /* over-read beyond lit buffer */ /* copy Literals */ ZSTD_wildcopy(op, *litPtr, sequence.litLength); /* note : oLitEnd <= oend-8 : no risk of overwrite beyond oend */ @@ -697,8 +696,7 @@ FORCE_INLINE size_t ZSTD_execSequence(BYTE* op, /* copy Match */ if (sequence.offset > (size_t)(oLitEnd - base)) { /* offset beyond prefix */ - if (sequence.offset > (size_t)(oLitEnd - vBase)) - return ERROR(corruption_detected); + if (sequence.offset > (size_t)(oLitEnd - vBase)) return ERROR(corruption_detected); match = dictEnd - (base-match); if (match + sequence.matchLength <= dictEnd) { memmove(oLitEnd, match, sequence.matchLength); @@ -715,7 +713,9 @@ FORCE_INLINE size_t ZSTD_execSequence(BYTE* op, /* match within prefix */ if (sequence.offset < 8) { /* close range match, overlap */ - const int sub2 = dec64table[sequence.offset]; + static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */ + static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* substracted */ + int const sub2 = dec64table[sequence.offset]; op[0] = match[0]; op[1] = match[1]; op[2] = match[2]; @@ -892,7 +892,7 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, /* Loop on each block */ while (1) { size_t decodedSize=0; - size_t cBlockSize = ZSTD_getcBlockSize(ip, iend-ip, &blockProperties); + size_t const cBlockSize = ZSTD_getcBlockSize(ip, iend-ip, &blockProperties); if (ZSTD_isError(cBlockSize)) return cBlockSize; ip += ZSTD_blockHeaderSize; @@ -992,7 +992,6 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, co { case ZSTDds_getFrameHeaderSize : { - /* get frame header size */ if (srcSize != ZSTD_frameHeaderSize_min) return ERROR(srcSize_wrong); /* impossible */ dctx->headerSize = ZSTD_frameHeaderSize(src, ZSTD_frameHeaderSize_min); if (ZSTD_isError(dctx->headerSize)) return dctx->headerSize; @@ -1006,7 +1005,6 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, co } case ZSTDds_decodeFrameHeader: { - /* get frame header */ size_t result; memcpy(dctx->headerBuffer + ZSTD_frameHeaderSize_min, src, dctx->expected); result = ZSTD_decodeFrameHeader(dctx, dctx->headerBuffer, dctx->headerSize); @@ -1017,16 +1015,14 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, co } case ZSTDds_decodeBlockHeader: { - /* Decode block header */ blockProperties_t bp; - size_t blockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp); - if (ZSTD_isError(blockSize)) return blockSize; + size_t const cBlockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp); + if (ZSTD_isError(cBlockSize)) return cBlockSize; if (bp.blockType == bt_end) { dctx->expected = 0; dctx->stage = ZSTDds_getFrameHeaderSize; - } - else { - dctx->expected = blockSize; + } else { + dctx->expected = cBlockSize; dctx->bType = bp.blockType; dctx->stage = ZSTDds_decompressBlock; } @@ -1113,7 +1109,7 @@ static size_t ZSTD_loadEntropy(ZSTD_DCtx* dctx, const void* dict, size_t dictSiz static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) { size_t eSize; - U32 magic = MEM_readLE32(dict); + U32 const magic = MEM_readLE32(dict); if (magic != ZSTD_DICT_MAGIC) { /* pure content mode */ ZSTD_refDictContent(dctx, dict, dictSize); @@ -1136,12 +1132,11 @@ static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx* dctx, const void* dict size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) { - size_t errorCode; - errorCode = ZSTD_decompressBegin(dctx); - if (ZSTD_isError(errorCode)) return errorCode; + { size_t const errorCode = ZSTD_decompressBegin(dctx); + if (ZSTD_isError(errorCode)) return errorCode; } if (dict && dictSize) { - errorCode = ZSTD_decompress_insertDictionary(dctx, dict, dictSize); + size_t const errorCode = ZSTD_decompress_insertDictionary(dctx, dict, dictSize); if (ZSTD_isError(errorCode)) return ERROR(dictionary_corrupted); } From 646693e3be39ffeb40451499b7689edbd2ff8d1d Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 24 Mar 2016 02:31:27 +0100 Subject: [PATCH 155/247] support for alternate offset (fusion) --- lib/zstd_compress.c | 14 +++++++------- lib/zstd_decompress.c | 19 +++++++++++-------- lib/zstd_internal.h | 2 +- 3 files changed, 19 insertions(+), 16 deletions(-) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index 8d2eff0b..f79109a1 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -661,7 +661,7 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, } } /* Offset codes */ - { size_t i; for (i=0; i 64 - 7 - 27) + if (ofBits + mlBits + llBits > 64 - 7 - (LLFSELog+MLFSELog+OffFSELog)) BIT_flushBits(&blockStream); BIT_addBits(&blockStream, llTable[n], llBits); BIT_addBits(&blockStream, mlTable[n], mlBits); @@ -798,8 +798,8 @@ MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const B static const BYTE* g_start = NULL; const U32 pos = (U32)(literals - g_start); if (g_start==NULL) g_start = literals; - if ((pos > 10354000) && (pos < 10355000)) - printf("pos %6u : %3u literals & match %3u bytes at distance %6u \n", + if ((pos > 23945000) && (pos < 23946800)) + printf("Cpos %6u :%4u literals & match %3u bytes at distance %6u \n", pos, (U32)litLength, (U32)matchCode+MINMATCH, (U32)offsetCode); #endif #if ZSTD_OPT_DEBUG == 3 @@ -818,7 +818,7 @@ MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const B else *seqStorePtr->litLength++ = (U16)litLength; /* match offset */ - *(seqStorePtr->offset++) = (U32)offsetCode; + *(seqStorePtr->offset++) = (U32)offsetCode + 1; /* match Length */ if (matchCode>=65535) { *(seqStorePtr->matchLength++) = 65535; seqStorePtr->longLength = (U32)matchCode; } diff --git a/lib/zstd_decompress.c b/lib/zstd_decompress.c index b4a2184e..48614c35 100644 --- a/lib/zstd_decompress.c +++ b/lib/zstd_decompress.c @@ -634,9 +634,12 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState, const U32 mls) U32 const llBits = LL_bits[llCode]; U32 const mlBits = ML_bits[mlCode]; - U32 const ofBits = ofCode ? ofCode-1 : 0; + U32 const ofBits = ofCode; + U32 const totalBits = llBits+mlBits+ofBits; - size_t const allBits = BIT_readBits(&(seqState->DStream), llBits+mlBits+ofBits); + size_t const allBits = BIT_readBits(&(seqState->DStream), totalBits); + + if (totalBits > 64 - 7 - (LLFSELog+MLFSELog+OffFSELog)) BIT_reloadDStream(&(seqState->DStream)); static const U32 LL_base[MaxLL+1] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, @@ -650,10 +653,10 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState, const U32 mls) 0x1000, 0x2000, 0x4000, 0x8000, 0x10000 }; static const U32 OF_base[MaxOff+1] = { - 1 /*fake*/, 1, 2, 4, 8, 0x10, 0x20, 0x40, - 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, 0x2000, 0x4000, - 0x8000, 0x10000, 0x20000, 0x40000, 0x80000, 0x100000, 0x200000, 0x400000, - 0x800000, 0x1000000, 0x2000000, 0x4000000, /*fake*/ 1, 1, 1, 1 }; + 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, + 0xFF, 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, + 0xFFFF, 0x1FFFF, 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF, + 0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF, /*fake*/ 1, 1, 1, 1, 1 }; /* sequence */ seq->litLength = LL_base[llCode] + BIT_getLowerBits(allBits, llBits); @@ -792,8 +795,8 @@ static size_t ZSTD_decompressSequences( ZSTD_decodeSequence(&sequence, &seqState, mls); #if 0 /* for debug */ { U32 pos = (U32)(op-base); - if ((pos > 10354000) && (pos < 10355000)) - printf("pos %6u : %3u literals & match %3u bytes at distance %6u \n", + if ((pos > 23945280) && (pos < 23946797)) + printf("Dpos %6u : %3u literals & match %3u bytes at distance %6u \n", pos, (U32)sequence.litLength, (U32)sequence.matchLength, (U32)sequence.offset); } #endif diff --git a/lib/zstd_internal.h b/lib/zstd_internal.h index fa6c93ca..4ce44399 100644 --- a/lib/zstd_internal.h +++ b/lib/zstd_internal.h @@ -110,7 +110,7 @@ typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t; #define MaxSeq MAX(MaxLL, MaxML) /* Assumption : MaxOff < MaxLL,MaxML */ #define MLFSELog 9 #define LLFSELog 9 -#define OffFSELog 9 +#define OffFSELog 8 #define FSE_ENCODING_RAW 0 #define FSE_ENCODING_RLE 1 From 5cc4efdaf8531399242d37d8a2ebc3df1e9364f6 Mon Sep 17 00:00:00 2001 From: inikep Date: Fri, 25 Mar 2016 10:52:25 +0100 Subject: [PATCH 156/247] created zstd_stats.h --- lib/zstd_compress.c | 32 +++------ lib/zstd_internal.h | 20 +++--- lib/zstd_opt.h | 4 +- lib/zstd_stats.h | 164 ++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 188 insertions(+), 32 deletions(-) create mode 100644 lib/zstd_stats.h diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index 43e6e663..ddb816b1 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -97,11 +97,11 @@ struct ZSTD_CCtx_s U32 nextToUpdate; /* index from which to continue dictionary update */ U32 nextToUpdate3; /* index from which to continue dictionary update */ U32 hashLog3; /* dispatch table : larger == faster, more memory */ - U32 targetSrcSize; /* optimize compression for this source size */ U32 loadedDictEnd; U32 stage; ZSTD_parameters params; void* workSpace; + size_t targetSrcSize; /* optimize compression for this source size */ size_t workSpaceSize; size_t blockSize; size_t hbSize; @@ -781,12 +781,7 @@ MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const B printf("pos %6u : %3u literals & match %3u bytes at distance %6u \n", (U32)(literals - g_start), (U32)litLength, (U32)matchCode+MINMATCH, (U32)offsetCode); #endif -#if ZSTD_OPT_DEBUG == 3 - if (offsetCode == 0) seqStorePtr->realRepSum++; - seqStorePtr->realSeqSum++; - seqStorePtr->realMatchSum += matchCode; - seqStorePtr->realLitSum += litLength; -#endif + ZSTD_statsUpdatePrices(&seqStorePtr->stats, litLength, literals, offsetCode, matchCode); /* copy Literals */ ZSTD_wildcopy(seqStorePtr->lit, literals, litLength); @@ -1696,6 +1691,7 @@ _storeSequence: { size_t const lastLLSize = iend - anchor; memcpy(seqStorePtr->lit, anchor, lastLLSize); seqStorePtr->lit += lastLLSize; + ZSTD_statsUpdatePrices(&seqStorePtr->stats, lastLLSize, anchor, 0, 0); } } @@ -1942,6 +1938,8 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCa } + + static size_t ZSTD_compress_generic (ZSTD_CCtx* zc, void* dst, size_t dstCapacity, const void* src, size_t srcSize) @@ -1952,15 +1950,13 @@ static size_t ZSTD_compress_generic (ZSTD_CCtx* zc, BYTE* const ostart = (BYTE*)dst; BYTE* op = ostart; const U32 maxDist = 1 << zc->params.windowLog; -#if ZSTD_OPT_DEBUG == 3 - seqStore_t* ssPtr = &zc->seqStore; - static U32 priceFunc = 0; - ssPtr->realMatchSum = ssPtr->realLitSum = ssPtr->realSeqSum = ssPtr->realRepSum = 1; - ssPtr->priceFunc = priceFunc; -#endif + ZSTD_stats_t* stats = &zc->seqStore.stats; + + ZSTD_statsInit(stats); while (remaining) { size_t cSize; + ZSTD_statsResetFreqs(stats); if (dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE) return ERROR(dstSize_tooSmall); /* not enough space to store compressed block */ if (remaining < blockSize) blockSize = remaining; @@ -1992,12 +1988,7 @@ static size_t ZSTD_compress_generic (ZSTD_CCtx* zc, op += cSize; } -#if ZSTD_OPT_DEBUG == 3 - ssPtr->realMatchSum += ssPtr->realSeqSum * ((zc->params.searchLength == 3) ? 3 : 4); - printf("avgMatchL=%.2f avgLitL=%.2f match=%.1f%% lit=%.1f%% reps=%d seq=%d priceFunc=%d\n", (float)ssPtr->realMatchSum/ssPtr->realSeqSum, (float)ssPtr->realLitSum/ssPtr->realSeqSum, 100.0*ssPtr->realMatchSum/(ssPtr->realMatchSum+ssPtr->realLitSum), 100.0*ssPtr->realLitSum/(ssPtr->realMatchSum+ssPtr->realLitSum), ssPtr->realRepSum, ssPtr->realSeqSum, ssPtr->priceFunc); - priceFunc++; -#endif - + ZSTD_statsPrint(stats, zc->params.searchLength); return op-ostart; } @@ -2466,9 +2457,6 @@ ZSTD_parameters ZSTD_getParams(int compressionLevel, U64 srcSize) int tableID = ((srcSize-1) <= 256 KB) + ((srcSize-1) <= 128 KB) + ((srcSize-1) <= 16 KB); /* intentional underflow for srcSizeHint == 0 */ if (compressionLevel<=0) compressionLevel = 1; if (compressionLevel > ZSTD_MAX_CLEVEL) compressionLevel = ZSTD_MAX_CLEVEL; -#if ZSTD_OPT_DEBUG >= 1 - tableID=0; -#endif result = ZSTD_defaultParameters[tableID][compressionLevel]; result.srcSize = srcSize; return result; diff --git a/lib/zstd_internal.h b/lib/zstd_internal.h index 1358eb8a..3561291f 100644 --- a/lib/zstd_internal.h +++ b/lib/zstd_internal.h @@ -50,7 +50,7 @@ /*-************************************* * Common constants ***************************************/ -#define ZSTD_OPT_DEBUG 0 // 1 = tableID=0; 3 = price func tests; 5 = check encoded sequences; 9 = full logs +#define ZSTD_OPT_DEBUG 0 // 3 = compression stats; 5 = check encoded sequences; 9 = full logs #include #if defined(ZSTD_OPT_DEBUG) && ZSTD_OPT_DEBUG>=9 #define ZSTD_LOG_PARSER(...) printf(__VA_ARGS__) @@ -176,6 +176,16 @@ typedef struct { U32 rep2; } ZSTD_optimal_t; +#if ZSTD_OPT_DEBUG == 3 + #include "zstd_stats.h" +#else + typedef struct { U32 unused; } ZSTD_stats_t; + MEM_STATIC void ZSTD_statsPrint(ZSTD_stats_t* stats, U32 searchLength) { (void)stats; (void)searchLength; }; + MEM_STATIC void ZSTD_statsInit(ZSTD_stats_t* stats) { (void)stats; }; + MEM_STATIC void ZSTD_statsResetFreqs(ZSTD_stats_t* stats) { (void)stats; }; + MEM_STATIC void ZSTD_statsUpdatePrices(ZSTD_stats_t* stats, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength) { (void)stats; (void)litLength; (void)literals; (void)offset; (void)matchLength; }; +#endif + typedef struct { void* buffer; U32* offsetStart; @@ -208,13 +218,7 @@ typedef struct { U32 log2litSum; U32 log2offCodeSum; U32 factor; -#if ZSTD_OPT_DEBUG == 3 - U32 realMatchSum; - U32 realLitSum; - U32 realSeqSum; - U32 realRepSum; - U32 priceFunc; -#endif + ZSTD_stats_t stats; } seqStore_t; seqStore_t ZSTD_copySeqStore(const ZSTD_CCtx* ctx); diff --git a/lib/zstd_opt.h b/lib/zstd_opt.h index e20379c1..bbfb11ca 100644 --- a/lib/zstd_opt.h +++ b/lib/zstd_opt.h @@ -130,8 +130,8 @@ FORCE_INLINE U32 ZSTD_getPrice(seqStore_t* seqStorePtr, U32 litLength, const BYT if (matchLength >= MaxML) matchLength = MaxML; price += ZSTD_getLiteralPrice(seqStorePtr, litLength, literals) + seqStorePtr->log2matchLengthSum - ZSTD_highbit(seqStorePtr->matchLengthFreq[matchLength]+1); -#if ZSTD_OPT_DEBUG == 3 - switch (seqStorePtr->priceFunc) { +#if ZSTD_OPT_DEBUG == 333 + switch (seqStorePtr->) { default: case 0: return 1 + price + ((seqStorePtr->litSum>>5) / seqStorePtr->litLengthSum) + ((seqStorePtr->litSum<<1) / (seqStorePtr->litSum + seqStorePtr->matchSum)); diff --git a/lib/zstd_stats.h b/lib/zstd_stats.h new file mode 100644 index 00000000..8d70191c --- /dev/null +++ b/lib/zstd_stats.h @@ -0,0 +1,164 @@ +/* + zstd - standard compression library + Header File for static linking only + Copyright (C) 2014-2016, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - zstd homepage : http://www.zstd.net +*/ +#ifndef ZSTD_STATS_H +#define ZSTD_STATS_H + + +#if defined (__cplusplus) +extern "C" { +#endif + +/*-************************************* +* Dependencies +***************************************/ +//#include "zstd.h" +//#include "mem.h" + + +/*-************************************* +* Constants +***************************************/ +//#define ZSTD_MAGICNUMBER 0xFD2FB526 /* v0.6 */ + + +/*-************************************* +* Types +***************************************/ +typedef struct { + U32 priceOffset, priceOffCode, priceMatchLength, priceLiteral, priceLitLength, priceDumpsLength; + U32 totalMatchSum, totalLitSum, totalSeqSum, totalRepSum; + U32 litSum, matchLengthSum, litLengthSum, offCodeSum; + U32 matchLengthFreq[1<totalMatchSum += stats->totalSeqSum * ((searchLength == 3) ? 3 : 4); + printf("avgMatchL=%.2f avgLitL=%.2f match=%.1f%% lit=%.1f%% reps=%d seq=%d\n", (float)stats->totalMatchSum/stats->totalSeqSum, (float)stats->totalLitSum/stats->totalSeqSum, 100.0*stats->totalMatchSum/(stats->totalMatchSum+stats->totalLitSum), 100.0*stats->totalLitSum/(stats->totalMatchSum+stats->totalLitSum), stats->totalRepSum, stats->totalSeqSum); + printf("SumBytes=%d Offset=%d OffCode=%d Match=%d Literal=%d LitLength=%d DumpsLength=%d\n", (stats->priceOffset+stats->priceOffCode+stats->priceMatchLength+stats->priceLiteral+stats->priceLitLength+stats->priceDumpsLength)/8, stats->priceOffset/8, stats->priceOffCode/8, stats->priceMatchLength/8, stats->priceLiteral/8, stats->priceLitLength/8, stats->priceDumpsLength/8); +} + +MEM_STATIC void ZSTD_statsInit(ZSTD_stats_t* stats) +{ + stats->totalLitSum = stats->totalMatchSum = stats->totalSeqSum = stats->totalRepSum = 1; + stats->priceOffset = stats->priceOffCode = stats->priceMatchLength = stats->priceLiteral = stats->priceLitLength = stats->priceDumpsLength = 0; +} + +MEM_STATIC void ZSTD_statsResetFreqs(ZSTD_stats_t* stats) +{ + unsigned u; + + stats->litSum = (1<litLengthSum = (1<matchLengthSum = (1<offCodeSum = (1<litFreq[u] = 1; + for (u=0; u<=MaxLL; u++) + stats->litLengthFreq[u] = 1; + for (u=0; u<=MaxML; u++) + stats->matchLengthFreq[u] = 1; + for (u=0; u<=MaxOff; u++) + stats->offCodeFreq[u] = 1; +} + +MEM_STATIC void ZSTD_statsUpdatePrices(ZSTD_stats_t* stats, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength) +{ + /* offset */ + BYTE offCode = offset ? (BYTE)ZSTD_highbit(offset+1) + 1 : 0; + stats->priceOffCode += ZSTD_highbit(stats->offCodeSum+1) - ZSTD_highbit(stats->offCodeFreq[offCode]+1); + stats->priceOffset += (offCode-1) + (!offCode); + + /* match Length */ + stats->priceDumpsLength += ((matchLength >= MaxML)<<3) + ((matchLength >= 255+MaxML)<<4) + ((matchLength>=(1<<15))<<3); + stats->priceMatchLength += ZSTD_highbit(stats->matchLengthSum+1) - ZSTD_highbit(stats->matchLengthFreq[(matchLength >= MaxML) ? MaxML : matchLength]+1); + + if (litLength) { + /* literals */ + U32 u; + stats->priceLiteral += litLength * ZSTD_highbit(stats->litSum+1); + for (u=0; u < litLength; u++) + stats->priceLiteral -= ZSTD_highbit(stats->litFreq[literals[u]]+1); + + /* literal Length */ + stats->priceDumpsLength += ((litLength >= MaxLL)<<3) + ((litLength >= 255+MaxLL)<<4) + ((litLength>=(1<<15))<<3); + stats->priceLitLength += ZSTD_highbit(stats->litLengthSum+1) - ZSTD_highbit(stats->litLengthFreq[(litLength >= MaxLL) ? MaxLL : litLength]+1); + } else { + stats->priceLitLength += ZSTD_highbit(stats->litLengthSum+1) - ZSTD_highbit(stats->litLengthFreq[0]+1); + } + + + if (offset == 0) stats->totalRepSum++; + stats->totalSeqSum++; + stats->totalMatchSum += matchLength; + stats->totalLitSum += litLength; + + U32 u; + /* literals */ + stats->litSum += litLength; + for (u=0; u < litLength; u++) + stats->litFreq[literals[u]]++; + + /* literal Length */ + stats->litLengthSum++; + if (litLength >= MaxLL) + stats->litLengthFreq[MaxLL]++; + else + stats->litLengthFreq[litLength]++; + + /* match offset */ + stats->offCodeSum++; + stats->offCodeFreq[offCode]++; + + /* match Length */ + stats->matchLengthSum++; + if (matchLength >= MaxML) + stats->matchLengthFreq[MaxML]++; + else + stats->matchLengthFreq[matchLength]++; +} + + + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_STATIC_H */ From 433a5cce7ec75f421da8bbb677d07e36dd55e467 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Fri, 25 Mar 2016 11:43:48 +0100 Subject: [PATCH 157/247] improved decompression speed (680) --- lib/zstd_compress.c | 6 +++--- lib/zstd_decompress.c | 14 +++++++------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index f79109a1..1c4d83c9 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -798,9 +798,9 @@ MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const B static const BYTE* g_start = NULL; const U32 pos = (U32)(literals - g_start); if (g_start==NULL) g_start = literals; - if ((pos > 23945000) && (pos < 23946800)) - printf("Cpos %6u :%4u literals & match %3u bytes at distance %6u \n", - pos, (U32)litLength, (U32)matchCode+MINMATCH, (U32)offsetCode); + if ((pos > 15181500) && (pos < 15183150)) + printf("Cpos %6u :%4u literals & match %3u bytes at distance %6u \n", + pos, (U32)litLength, (U32)matchCode+MINMATCH, (U32)offsetCode); #endif #if ZSTD_OPT_DEBUG == 3 if (offsetCode == 0) seqStorePtr->realRepSum++; diff --git a/lib/zstd_decompress.c b/lib/zstd_decompress.c index 48614c35..f0b695a5 100644 --- a/lib/zstd_decompress.c +++ b/lib/zstd_decompress.c @@ -637,9 +637,7 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState, const U32 mls) U32 const ofBits = ofCode; U32 const totalBits = llBits+mlBits+ofBits; - size_t const allBits = BIT_readBits(&(seqState->DStream), totalBits); - - if (totalBits > 64 - 7 - (LLFSELog+MLFSELog+OffFSELog)) BIT_reloadDStream(&(seqState->DStream)); + //size_t const allBits = BIT_readBits(&(seqState->DStream), totalBits); static const U32 LL_base[MaxLL+1] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, @@ -659,13 +657,15 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState, const U32 mls) 0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF, /*fake*/ 1, 1, 1, 1, 1 }; /* sequence */ - seq->litLength = LL_base[llCode] + BIT_getLowerBits(allBits, llBits); - seq->matchLength = ML_base[mlCode] + mls + BIT_getMiddleBits(allBits, mlBits, llBits); - { size_t const offset = ofCode ? OF_base[ofCode] + BIT_getUpperBits(allBits, llBits+mlBits) : + { size_t const offset = ofCode ? OF_base[ofCode] + BIT_readBits(&(seqState->DStream), ofBits) : llCode ? seq->offset : seqState->prevOffset; if (ofCode | !llCode) seqState->prevOffset = seq->offset; /* cmove */ seq->offset = offset; } + seq->matchLength = ML_base[mlCode] + mls + ((mlCode>31) ? BIT_readBits(&(seqState->DStream), mlBits) : 0); + seq->litLength = LL_base[llCode] + ((llCode>15) ? BIT_readBits(&(seqState->DStream), llBits) : 0); + + if (totalBits > 64 - 7 - (LLFSELog+MLFSELog+OffFSELog)) BIT_reloadDStream(&(seqState->DStream)); /* ANS state update */ FSE_updateState(&(seqState->stateLL), &(seqState->DStream)); @@ -795,7 +795,7 @@ static size_t ZSTD_decompressSequences( ZSTD_decodeSequence(&sequence, &seqState, mls); #if 0 /* for debug */ { U32 pos = (U32)(op-base); - if ((pos > 23945280) && (pos < 23946797)) + if ((pos > 15181500) && (pos < 15183150)) printf("Dpos %6u : %3u literals & match %3u bytes at distance %6u \n", pos, (U32)sequence.litLength, (U32)sequence.matchLength, (U32)sequence.offset); } From 5553442376fd3c3d86ffa1cb7ba2899b7b013729 Mon Sep 17 00:00:00 2001 From: inikep Date: Fri, 25 Mar 2016 13:35:33 +0100 Subject: [PATCH 158/247] fixed compilation with ZSTD_NOBENCH --- programs/zstdcli.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/programs/zstdcli.c b/programs/zstdcli.c index 0c060bdb..022d7513 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -333,14 +333,15 @@ int main(int argCount, const char** argv) /* Pause at the end (-p) or set an additional param (-p#) (hidden option) */ case 'p': argument++; +#ifndef ZSTD_NOBENCH if ((*argument>='0') && (*argument<='9')) { int additionalParam = 0; while ((*argument >= '0') && (*argument <= '9')) additionalParam *= 10, additionalParam += *argument++ - '0'; BMK_setAdditionalParam(additionalParam); - } else { + } else +#endif main_pause=1; - } break; /* unknown command */ default : CLEAN_RETURN(badusage(programName)); From 97c88e716cbb1f3d63692160be53ed2f5845b8da Mon Sep 17 00:00:00 2001 From: inikep Date: Fri, 25 Mar 2016 13:39:14 +0100 Subject: [PATCH 159/247] fix for -Wshorten-64-to-32 warning in ZSTD_statsUpdatePrices --- lib/zstd_internal.h | 2 +- lib/zstd_stats.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/zstd_internal.h b/lib/zstd_internal.h index 3561291f..c0ff68ea 100644 --- a/lib/zstd_internal.h +++ b/lib/zstd_internal.h @@ -183,7 +183,7 @@ typedef struct { MEM_STATIC void ZSTD_statsPrint(ZSTD_stats_t* stats, U32 searchLength) { (void)stats; (void)searchLength; }; MEM_STATIC void ZSTD_statsInit(ZSTD_stats_t* stats) { (void)stats; }; MEM_STATIC void ZSTD_statsResetFreqs(ZSTD_stats_t* stats) { (void)stats; }; - MEM_STATIC void ZSTD_statsUpdatePrices(ZSTD_stats_t* stats, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength) { (void)stats; (void)litLength; (void)literals; (void)offset; (void)matchLength; }; + MEM_STATIC void ZSTD_statsUpdatePrices(ZSTD_stats_t* stats, size_t litLength, const BYTE* literals, size_t offset, size_t matchLength) { (void)stats; (void)litLength; (void)literals; (void)offset; (void)matchLength; }; #endif typedef struct { diff --git a/lib/zstd_stats.h b/lib/zstd_stats.h index 8d70191c..d0189f87 100644 --- a/lib/zstd_stats.h +++ b/lib/zstd_stats.h @@ -99,7 +99,7 @@ MEM_STATIC void ZSTD_statsResetFreqs(ZSTD_stats_t* stats) stats->offCodeFreq[u] = 1; } -MEM_STATIC void ZSTD_statsUpdatePrices(ZSTD_stats_t* stats, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength) +MEM_STATIC void ZSTD_statsUpdatePrices(ZSTD_stats_t* stats, size_t litLength, const BYTE* literals, size_t offset, size_t matchLength) { /* offset */ BYTE offCode = offset ? (BYTE)ZSTD_highbit(offset+1) + 1 : 0; From b58c685cf783268f34e010d36e138a41f4466aec Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Fri, 25 Mar 2016 20:29:35 +0100 Subject: [PATCH 160/247] fixed : fullbench link to deprecate function (wrong prototype) --- programs/fullbench.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/programs/fullbench.c b/programs/fullbench.c index b4afcf11..c5c56496 100644 --- a/programs/fullbench.c +++ b/programs/fullbench.c @@ -119,7 +119,7 @@ static clock_t BMK_clockSpan( clock_t clockStart ) static size_t BMK_findMaxMem(U64 requiredMem) { - const size_t step = 64 MB; + size_t const step = 64 MB; void* testmem = NULL; requiredMem = (((requiredMem >> 26) + 1) << 26); @@ -183,15 +183,13 @@ size_t local_ZSTD_decodeLiteralsBlock(void* dst, size_t dstSize, void* buff2, co } extern size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr); -extern size_t ZSTD_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLengthPtr, FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb, const void* src, size_t srcSize); +extern size_t ZSTD_decodeSeqHeaders(int* nbSeq, FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb, const void* src, size_t srcSize); size_t local_ZSTD_decodeSeqHeaders(void* dst, size_t dstSize, void* buff2, const void* src, size_t srcSize) { U32 DTableML[FSE_DTABLE_SIZE_U32(10)], DTableLL[FSE_DTABLE_SIZE_U32(10)], DTableOffb[FSE_DTABLE_SIZE_U32(9)]; /* MLFSELog, LLFSELog and OffFSELog are not public values */ - const BYTE* dumps; - size_t length; int nbSeq; (void)src; (void)srcSize; (void)dst; (void)dstSize; - return ZSTD_decodeSeqHeaders(&nbSeq, &dumps, &length, DTableLL, DTableML, DTableOffb, buff2, g_cSize); + return ZSTD_decodeSeqHeaders(&nbSeq, DTableLL, DTableML, DTableOffb, buff2, g_cSize); } From a5b66e34c7c9601a56c2db127946db7da39fa700 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sat, 26 Mar 2016 01:48:27 +0100 Subject: [PATCH 161/247] minor variable isolation and remove a goto --- programs/bench.c | 130 ++++++++++++++++++++++++----------------------- 1 file changed, 66 insertions(+), 64 deletions(-) diff --git a/programs/bench.c b/programs/bench.c index 905f84ed..2bde4c03 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -239,17 +239,15 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, RDG_genBuffer(compressedBuffer, maxCompressedSize, 0.10, 0.50, 1); /* Bench */ - { size_t cSize = 0; - double fastestC = 100000000., fastestD = 100000000.; - double ratio = 0.; - U64 crcCheck = 0; + { double fastestC = 100000000., fastestD = 100000000.; clock_t coolTime = clock(); U32 testNb; DISPLAY("\r%79s\r", ""); for (testNb = 1; testNb <= (g_nbIterations + !g_nbIterations); testNb++) { - int nbLoops; - clock_t clockStart, clockSpan; + size_t cSize; + double ratio = 0.; + clock_t clockStart; clock_t const clockLoop = g_nbIterations ? TIMELOOP_S * CLOCKS_PER_SEC : 10; /* overheat protection */ @@ -266,20 +264,21 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, clockStart = clock(); while (clock() == clockStart); clockStart = clock(); - - for (nbLoops = 0 ; BMK_clockSpan(clockStart) < clockLoop ; nbLoops++) { - U32 blockNb; - ZSTD_compressBegin_usingDict(refCtx, dictBuffer, dictBufferSize, cLevel); - for (blockNb=0; blockNb%10u (%5.3f),%6.1f MB/s ,%6.1f MB/s\r", testNb, displayName, (U32)srcSize, (U32)cSize, ratio, (double)srcSize / 1000000. / (fastestC / CLOCKS_PER_SEC), (double)srcSize / 1000000. / (fastestD / CLOCKS_PER_SEC) ); /* CRC Checking */ -_findError: - crcCheck = XXH64(resultBuffer, srcSize, 0); - if (crcOrig!=crcCheck) { - size_t u; - DISPLAY("\n!!! WARNING !!! %14s : Invalid Checksum : %x != %x\n", displayName, (unsigned)crcOrig, (unsigned)crcCheck); - for (u=0; u u) break; - bacc += blockTable[segNb].srcSize; + { U64 const crcCheck = XXH64(resultBuffer, srcSize, 0); + if (crcOrig!=crcCheck) { + size_t u; + DISPLAY("!!! WARNING !!! %14s : Invalid Checksum : %x != %x \n", displayName, (unsigned)crcOrig, (unsigned)crcCheck); + for (u=0; u u) break; + bacc += blockTable[segNb].srcSize; + } + pos = (U32)(u - bacc); + bNb = pos / (128 KB); + DISPLAY("(block %u, sub %u, pos %u) \n", segNb, bNb, pos); + break; } - pos = (U32)(u - bacc); - bNb = pos / (128 KB); - printf("(block %u, sub %u, pos %u) \n", segNb, bNb, pos); - break; - } - if (u==srcSize-1) { /* should never happen */ - printf("no difference detected\n"); - } } - break; - } /* if (crcOrig!=crcCheck) */ + if (u==srcSize-1) { /* should never happen */ + DISPLAY("no difference detected\n"); + } } + break; + } } /* CRC Checking */ #endif } /* for (testNb = 1; testNb <= (g_nbIterations + !g_nbIterations); testNb++) */ DISPLAY("%2i#\n", cLevel); @@ -407,23 +407,25 @@ static U64 BMK_getTotalFileSize(const char** fileNamesTable, unsigned nbFiles) return total; } +/*! BMK_loadFiles() : + Loads `buffer` with content of files listed within `fileNamesTable`. + At most, fills `buffer` entirely */ static void BMK_loadFiles(void* buffer, size_t bufferSize, size_t* fileSizes, - const char** fileNamesTable, unsigned const nbFiles) + const char** fileNamesTable, unsigned nbFiles) { size_t pos = 0; unsigned n; for (n=0; n bufferSize-pos) fileSize = bufferSize-pos; - readSize = fread(((char*)buffer)+pos, 1, (size_t)fileSize, f); - if (readSize != (size_t)fileSize) EXM_THROW(11, "could not read %s", fileNamesTable[n]); - pos += readSize; + if (fileSize > bufferSize-pos) fileSize = bufferSize-pos, nbFiles=n; /* buffer too small - stop after this file */ + { size_t const readSize = fread(((char*)buffer)+pos, 1, (size_t)fileSize, f); + if (readSize != (size_t)fileSize) EXM_THROW(11, "could not read %s", fileNamesTable[n]); + pos += readSize; } fileSizes[n] = (size_t)fileSize; fclose(f); } From b9151406dece85691c52b09ce49b22e8477ee2a9 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sat, 26 Mar 2016 17:18:11 +0100 Subject: [PATCH 162/247] fixed 32-bits compatibility --- lib/zstd_compress.c | 35 ++++++++++++++++++++--------------- lib/zstd_decompress.c | 25 ++++++++++++++----------- programs/bench.c | 5 +++-- 3 files changed, 37 insertions(+), 28 deletions(-) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index 1c4d83c9..90857a82 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -744,38 +744,43 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, ofCodeTable[nbSeq-1]); FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]); BIT_addBits(&blockStream, llTable[nbSeq-1], LL_bits[llCodeTable[nbSeq-1]]); + if (MEM_32bits()) BIT_flushBits(&blockStream); BIT_addBits(&blockStream, mlTable[nbSeq-1], ML_bits[mlCodeTable[nbSeq-1]]); + if (MEM_32bits()) BIT_flushBits(&blockStream); BIT_addBits(&blockStream, offsetTable[nbSeq-1], ofCodeTable[nbSeq-1]); BIT_flushBits(&blockStream); { size_t n; for (n=nbSeq-2 ; n 64 - 7 - (LLFSELog+MLFSELog+OffFSELog)) - BIT_flushBits(&blockStream); + FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode); /* 15 */ /* 15 */ + FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode); /* 24 */ /* 24 */ + if (MEM_32bits()) BIT_flushBits(&blockStream); /* (7)*/ + FSE_encodeSymbol(&blockStream, &stateLitLength, llCode); /* 16 */ /* 33 */ + if (MEM_32bits() || (ofBits+mlBits+llBits > 64-7-(LLFSELog+MLFSELog+OffFSELog))) + BIT_flushBits(&blockStream); /* (7)*/ BIT_addBits(&blockStream, llTable[n], llBits); + if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream); BIT_addBits(&blockStream, mlTable[n], mlBits); - BIT_addBits(&blockStream, offsetTable[n], ofBits); /* 31 */ /* 61 */ /* 24 bits max in 32-bits mode */ - BIT_flushBits(&blockStream); /* 7 */ /* 7 */ + if (MEM_32bits()) BIT_flushBits(&blockStream); /* (7)*/ + BIT_addBits(&blockStream, offsetTable[n], ofBits); /* 31 */ + BIT_flushBits(&blockStream); /* (7)*/ } } FSE_flushCState(&blockStream, &stateMatchLength); FSE_flushCState(&blockStream, &stateOffsetBits); FSE_flushCState(&blockStream, &stateLitLength); - { size_t const streamSize = BIT_closeCStream(&blockStream); - if (streamSize==0) return ERROR(dstSize_tooSmall); /* not enough space */ - op += streamSize; } - } + { size_t const streamSize = BIT_closeCStream(&blockStream); + if (streamSize==0) return ERROR(dstSize_tooSmall); /* not enough space */ + op += streamSize; + } } /* check compressibility */ _check_compressibility: @@ -798,8 +803,8 @@ MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const B static const BYTE* g_start = NULL; const U32 pos = (U32)(literals - g_start); if (g_start==NULL) g_start = literals; - if ((pos > 15181500) && (pos < 15183150)) - printf("Cpos %6u :%4u literals & match %3u bytes at distance %6u \n", + if ((pos > 200000000) && (pos < 200900000)) + printf("Cpos %6u :%5u literals & match %3u bytes at distance %6u \n", pos, (U32)litLength, (U32)matchCode+MINMATCH, (U32)offsetCode); #endif #if ZSTD_OPT_DEBUG == 3 diff --git a/lib/zstd_decompress.c b/lib/zstd_decompress.c index f0b695a5..6457e4c8 100644 --- a/lib/zstd_decompress.c +++ b/lib/zstd_decompress.c @@ -637,8 +637,6 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState, const U32 mls) U32 const ofBits = ofCode; U32 const totalBits = llBits+mlBits+ofBits; - //size_t const allBits = BIT_readBits(&(seqState->DStream), totalBits); - static const U32 LL_base[MaxLL+1] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 20, 22, 24, 28, 32, 40, 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, @@ -657,20 +655,25 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState, const U32 mls) 0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF, /*fake*/ 1, 1, 1, 1, 1 }; /* sequence */ - { size_t const offset = ofCode ? OF_base[ofCode] + BIT_readBits(&(seqState->DStream), ofBits) : + { size_t const offset = ofCode ? OF_base[ofCode] + BIT_readBits(&(seqState->DStream), ofBits) : /* <= 26 bits */ llCode ? seq->offset : seqState->prevOffset; + if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream)); if (ofCode | !llCode) seqState->prevOffset = seq->offset; /* cmove */ seq->offset = offset; } - seq->matchLength = ML_base[mlCode] + mls + ((mlCode>31) ? BIT_readBits(&(seqState->DStream), mlBits) : 0); - seq->litLength = LL_base[llCode] + ((llCode>15) ? BIT_readBits(&(seqState->DStream), llBits) : 0); - if (totalBits > 64 - 7 - (LLFSELog+MLFSELog+OffFSELog)) BIT_reloadDStream(&(seqState->DStream)); + seq->matchLength = ML_base[mlCode] + mls + ((mlCode>31) ? BIT_readBits(&(seqState->DStream), mlBits) : 0); /* <= 16 bits */ + if (MEM_32bits() && (mlBits+llBits>24)) BIT_reloadDStream(&(seqState->DStream)); + + seq->litLength = LL_base[llCode] + ((llCode>15) ? BIT_readBits(&(seqState->DStream), llBits) : 0); /* <= 16 bits */ + if (MEM_32bits() || + (totalBits > 64 - 7 - (LLFSELog+MLFSELog+OffFSELog)) ) BIT_reloadDStream(&(seqState->DStream)); /* ANS state update */ - FSE_updateState(&(seqState->stateLL), &(seqState->DStream)); - FSE_updateState(&(seqState->stateML), &(seqState->DStream)); - FSE_updateState(&(seqState->stateOffb), &(seqState->DStream)); + FSE_updateState(&(seqState->stateLL), &(seqState->DStream)); /* <= 9 bits */ + FSE_updateState(&(seqState->stateML), &(seqState->DStream)); /* <= 9 bits */ + if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream)); /* <= 18 bits */ + FSE_updateState(&(seqState->stateOffb), &(seqState->DStream)); /* <= 8 bits */ } @@ -795,8 +798,8 @@ static size_t ZSTD_decompressSequences( ZSTD_decodeSequence(&sequence, &seqState, mls); #if 0 /* for debug */ { U32 pos = (U32)(op-base); - if ((pos > 15181500) && (pos < 15183150)) - printf("Dpos %6u : %3u literals & match %3u bytes at distance %6u \n", + if ((pos > 200802300) && (pos < 200802400)) + printf("Dpos %6u :%5u literals & match %3u bytes at distance %6u \n", pos, (U32)sequence.litLength, (U32)sequence.matchLength, (U32)sequence.offset); } #endif diff --git a/programs/bench.c b/programs/bench.c index 2bde4c03..0d18ea64 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -195,14 +195,13 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, size_t const blockSize = (g_blockSize ? g_blockSize : srcSize) + (!srcSize); /* avoid div by 0 */ U32 const maxNbBlocks = (U32) ((srcSize + (blockSize-1)) / blockSize) + nbFiles; blockParam_t* const blockTable = (blockParam_t*) malloc(maxNbBlocks * sizeof(blockParam_t)); - const size_t maxCompressedSize = ZSTD_compressBound(srcSize) + (maxNbBlocks * 1024); /* add some room for safety */ + size_t const maxCompressedSize = ZSTD_compressBound(srcSize) + (maxNbBlocks * 1024); /* add some room for safety */ void* const compressedBuffer = malloc(maxCompressedSize); void* const resultBuffer = malloc(srcSize); ZSTD_CCtx* refCtx = ZSTD_createCCtx(); ZSTD_CCtx* ctx = ZSTD_createCCtx(); ZSTD_DCtx* refDCtx = ZSTD_createDCtx(); ZSTD_DCtx* dctx = ZSTD_createDCtx(); - U64 const crcOrig = XXH64(srcBuffer, srcSize, 0); U32 nbBlocks; /* checks */ @@ -240,6 +239,7 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, /* Bench */ { double fastestC = 100000000., fastestD = 100000000.; + U64 const crcOrig = XXH64(srcBuffer, srcSize, 0); clock_t coolTime = clock(); U32 testNb; @@ -306,6 +306,7 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, if (ZSTD_isError(regenSize)) { DISPLAY("ZSTD_decompress_usingPreparedDCtx() failed on block %u : %s \n", blockNb, ZSTD_getErrorName(regenSize)); + clockStart -= clockLoop+1; /* force immediate test end */ break; } blockTable[blockNb].resSize = regenSize; From 74bd11954b922d2b32f6840b371c15959ba5b6ea Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sat, 26 Mar 2016 17:50:26 +0100 Subject: [PATCH 163/247] fixed ARM compatibility --- Makefile | 5 ++--- lib/bitstream.h | 9 ++++++++- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 93d5e052..494f59dc 100644 --- a/Makefile +++ b/Makefile @@ -89,9 +89,8 @@ gpptest: clean $(MAKE) all CC=g++ CFLAGS="-O3 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Werror" armtest: clean -# $(MAKE) -C $(ZSTDDIR) all CC=arm-linux-gnueabi-gcc MOREFLAGS="-Werror" $(MAKE) -C $(PRGDIR) datagen # use native, faster - $(MAKE) -C $(PRGDIR) test CC=arm-linux-gnueabi-gcc ZSTDRTTEST= MOREFLAGS=-static # MOREFLAGS="-Werror -static" + $(MAKE) -C $(PRGDIR) test CC=arm-linux-gnueabi-gcc ZSTDRTTEST= MOREFLAGS="-Werror -static" # for Travis CI arminstall: clean @@ -105,7 +104,7 @@ armtest-w-install: clean arminstall armtest ppctest: clean $(MAKE) -C $(PRGDIR) datagen # use native, faster - $(MAKE) -C $(PRGDIR) test CC=powerpc-linux-gnu-gcc ZSTDRTTEST= MOREFLAGS=-static # MOREFLAGS="-Werror -static" + $(MAKE) -C $(PRGDIR) test CC=powerpc-linux-gnu-gcc ZSTDRTTEST= MOREFLAGS="-Werror -static" # for Travis CI ppcinstall: clean diff --git a/lib/bitstream.h b/lib/bitstream.h index 40400680..fd114e55 100644 --- a/lib/bitstream.h +++ b/lib/bitstream.h @@ -53,6 +53,14 @@ extern "C" { #include "error_private.h" /* error codes and messages */ +/*========================================= +* Target specific +=========================================*/ +#if defined(__BMI__) && defined(__GNUC__) +# include /* support for bextr (experimental) */ +#endif + + /*-****************************************** * bitStream encoding API (write forward) ********************************************/ @@ -290,7 +298,6 @@ MEM_STATIC size_t BIT_getUpperBits(size_t bitD, U32 const start) return bitD >> start; } -#include MEM_STATIC size_t BIT_getMiddleBits(size_t bitD, U32 const nbBits, U32 const start) { #if defined(__BMI__) && defined(__GNUC__) /* experimental */ From b44be742447f258b4ecec2753abc9f8c373ba55c Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sat, 26 Mar 2016 20:52:14 +0100 Subject: [PATCH 164/247] Fixed zdict more dictionary compression tests --- lib/zdict.c | 96 ++++++++++++++++++++++++------------------- lib/zstd_compress.c | 93 +++++++++++++++++++++++------------------ lib/zstd_internal.h | 3 +- programs/fileio.c | 27 ++++++------ programs/playTests.sh | 7 +++- 5 files changed, 130 insertions(+), 96 deletions(-) diff --git a/lib/zdict.c b/lib/zdict.c index c99cabe1..a643f4f5 100644 --- a/lib/zdict.c +++ b/lib/zdict.c @@ -574,7 +574,6 @@ static void ZDICT_fillNoise(void* buffer, size_t length) { unsigned acc = PRIME1; size_t p=0;; - for (p=0; p> 21); @@ -594,30 +593,37 @@ static void ZDICT_countEStats(EStats_ress_t esr, U32* countLit, U32* offsetcodeCount, U32* matchlengthCount, U32* litlengthCount, const void* src, size_t srcSize) { - const BYTE* bytePtr; - const U32* u32Ptr; - seqStore_t seqStore; + const seqStore_t* seqStorePtr; if (srcSize > ZSTD_BLOCKSIZE_MAX) srcSize = ZSTD_BLOCKSIZE_MAX; /* protection vs large samples */ ZSTD_copyCCtx(esr.zc, esr.ref); ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_MAX, src, srcSize); - seqStore = ZSTD_copySeqStore(esr.zc); + seqStorePtr = ZSTD_getSeqStore(esr.zc); - /* count stats */ - for(bytePtr = seqStore.litStart; bytePtr < seqStore.lit; bytePtr++) - countLit[*bytePtr]++; - for(u32Ptr = seqStore.offsetStart; u32Ptr < seqStore.offset; u32Ptr++) { - BYTE offcode = (BYTE)ZSTD_highbit(*u32Ptr) + 1; - if (*u32Ptr==0) offcode=0; - offsetcodeCount[offcode]++; + /* literals stats */ + { const BYTE* bytePtr; + for(bytePtr = seqStorePtr->litStart; bytePtr < seqStorePtr->lit; bytePtr++) + countLit[*bytePtr]++; } - (void)matchlengthCount; (void)litlengthCount; - /* - for(bytePtr = seqStore.matchLengthStart; bytePtr < seqStore.matchLength; bytePtr++) - matchlengthCount[*bytePtr]++; - for(bytePtr = seqStore.litLengthStart; bytePtr < seqStore.litLength; bytePtr++) - litlengthCount[*bytePtr]++; - */ + + /* seqStats */ + { size_t const nbSeq = (size_t)(seqStorePtr->offset - seqStorePtr->offsetStart); + ZSTD_seqToCodes(seqStorePtr, nbSeq); + + { const BYTE* codePtr = seqStorePtr->offCodeStart; + size_t u; + for (u=0; umlCodeStart; + size_t u; + for (u=0; ullCodeStart; + size_t u; + for (u=0; u= 3) { - const U32 nb = 25; + U32 const nb = 25; + U32 const dictContentSize = ZDICT_dictSize(dictList); U32 u; - U32 dictContentSize = ZDICT_dictSize(dictList); DISPLAYLEVEL(3, "\n %u segments found, of total size %u \n", dictList[0].pos, dictContentSize); DISPLAYLEVEL(3, "list %u best segments \n", nb); for (u=1; u<=nb; u++) { @@ -850,8 +862,7 @@ size_t ZDICT_trainFromBuffer_unsafe( } } } /* create dictionary */ - { - U32 dictContentSize = ZDICT_dictSize(dictList); + { U32 dictContentSize = ZDICT_dictSize(dictList); size_t hSize; BYTE* ptr; U32 u; @@ -896,31 +907,32 @@ size_t ZDICT_trainFromBuffer_unsafe( } +/* issue : samplesBuffer need to be followed by a noisy guard band. +* work around : duplicate the buffer, and add the noise */ size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacity, const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, ZDICT_params_t params) { - size_t sBuffSize; void* newBuff; - size_t result; + size_t sBuffSize; { unsigned u; for (u=0, sBuffSize=0; u no dictionary */ newBuff = malloc(sBuffSize + NOISELENGTH); if (!newBuff) return ERROR(memory_allocation); memcpy(newBuff, samplesBuffer, sBuffSize); ZDICT_fillNoise((char*)newBuff + sBuffSize, NOISELENGTH); /* guard band, for end of buffer condition */ - result = ZDICT_trainFromBuffer_unsafe(dictBuffer, dictBufferCapacity, + { size_t const result = ZDICT_trainFromBuffer_unsafe( + dictBuffer, dictBufferCapacity, newBuff, samplesSizes, nbSamples, params); - free(newBuff); - return result; + free(newBuff); + return result; } } -/* issue : samplesBuffer need to be followed by a noisy guard band. -* work around : duplicate the buffer, and add the noise ? */ size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity, const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples) { diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index 90857a82..9f2a28f5 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -127,9 +127,9 @@ size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx) return 0; /* reserved as a potential error code in the future */ } -seqStore_t ZSTD_copySeqStore(const ZSTD_CCtx* ctx) /* hidden interface */ +const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) /* hidden interface */ { - return ctx->seqStore; + return &(ctx->seqStore); } @@ -569,11 +569,59 @@ static size_t ZSTD_compressLiterals (ZSTD_CCtx* zc, ostart[4] = (BYTE)(cLitSize); break; } - return lhSize+cLitSize; } +void ZSTD_seqToCodes(const seqStore_t* seqStorePtr, size_t const nbSeq) +{ + /* LL codes */ + { static const BYTE LL_Code[64] = { 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 16, 17, 17, 18, 18, 19, 19, + 20, 20, 20, 20, 21, 21, 21, 21, + 22, 22, 22, 22, 22, 22, 22, 22, + 23, 23, 23, 23, 23, 23, 23, 23, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24 }; + const BYTE LL_deltaCode = 19; + U16* const llTable = seqStorePtr->litLengthStart; + BYTE* const llCodeTable = seqStorePtr->llCodeStart; + size_t u; + for (u=0; ulongLength; llTable[u] = (U16)ll; } + llCodeTable[u] = (ll>63) ? (BYTE)ZSTD_highbit(ll) + LL_deltaCode : LL_Code[ll]; + } } + + /* Offset codes */ + { const U32* const offsetTable = seqStorePtr->offsetStart; + BYTE* const ofCodeTable = seqStorePtr->offCodeStart; + size_t u; + for (u=0; umatchLengthStart; + BYTE* const mlCodeTable = seqStorePtr->mlCodeStart; + size_t u; + for (u=0; ulongLength; mlTable[u] = (U16)ml; } + mlCodeTable[u] = (ml>127) ? (BYTE)ZSTD_highbit(ml) + ML_deltaCode : ML_Code[ml]; + } } +} + + size_t ZSTD_compressSequences(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, size_t srcSize) @@ -619,22 +667,8 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, #define MIN_SEQ_FOR_DYNAMIC_FSE 64 #define MAX_SEQ_FOR_STATIC_FSE 1000 - /* LL codes */ - { static const BYTE LL_Code[64] = { 0, 1, 2, 3, 4, 5, 6, 7, - 8, 9, 10, 11, 12, 13, 14, 15, - 16, 16, 17, 17, 18, 18, 19, 19, - 20, 20, 20, 20, 21, 21, 21, 21, - 22, 22, 22, 22, 22, 22, 22, 22, - 23, 23, 23, 23, 23, 23, 23, 23, - 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24 }; - const BYTE LL_deltaCode = 19; - size_t u; - for (u=0; ulongLength; llTable[u] = (U16)ll; } - llCodeTable[u] = (ll>63) ? (BYTE)ZSTD_highbit(ll) + LL_deltaCode : LL_Code[ll]; - } } + /* convert length/distances into codes */ + ZSTD_seqToCodes(seqStorePtr, nbSeq); /* CTable for Literal Lengths */ { U32 max = MaxLL; @@ -660,9 +694,7 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, LLtype = FSE_ENCODING_DYNAMIC; } } - /* Offset codes */ - { size_t i; for (i=0; i 2)) { @@ -686,23 +718,6 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, Offtype = FSE_ENCODING_DYNAMIC; } } - /* ML codes */ - { static const BYTE ML_Code[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - 32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37, - 38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39, - 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, - 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 }; - const BYTE ML_deltaCode = 36; - size_t u; - for (u=0; ulongLength; mlTable[u] = (U16)ml; } - mlCodeTable[u] = (ml>127) ? (BYTE)ZSTD_highbit(ml) + ML_deltaCode : ML_Code[ml]; - } } - /* CTable for MatchLengths */ { U32 max = MaxML; size_t const mostFrequent = FSE_countFast(count, &max, mlCodeTable, nbSeq); diff --git a/lib/zstd_internal.h b/lib/zstd_internal.h index 4ce44399..ff271340 100644 --- a/lib/zstd_internal.h +++ b/lib/zstd_internal.h @@ -236,7 +236,8 @@ typedef struct { #endif } seqStore_t; -seqStore_t ZSTD_copySeqStore(const ZSTD_CCtx* ctx); +const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx); +void ZSTD_seqToCodes(const seqStore_t* seqStorePtr, size_t const nbSeq); #endif /* ZSTD_CCOMMON_H_MODULE */ diff --git a/programs/fileio.c b/programs/fileio.c index 907d990a..ff77a8a9 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -333,28 +333,30 @@ static int FIO_compressFilename_internal(cRess_t ress, { FILE* srcFile = ress.srcFile; FILE* dstFile = ress.dstFile; - U64 filesize = 0; + U64 readsize = 0; U64 compressedfilesize = 0; size_t dictSize = ress.dictBufferSize; size_t sizeCheck, errorCode; ZSTD_parameters params; /* init */ - filesize = MAX(FIO_getFileSize(srcFileName),dictSize); - params = ZSTD_getParams(cLevel, filesize); - params.srcSize = filesize; + { U64 const filesize = FIO_getFileSize(srcFileName); + U64 const levelsize = MAX(FIO_getFileSize(srcFileName), dictSize); + params = ZSTD_getParams(cLevel, levelsize); + params.srcSize = filesize; + } if (g_maxWLog) if (params.windowLog > g_maxWLog) params.windowLog = g_maxWLog; errorCode = ZBUFF_compressInit_advanced(ress.ctx, ress.dictBuffer, ress.dictBufferSize, params); if (ZBUFF_isError(errorCode)) EXM_THROW(21, "Error initializing compression : %s", ZBUFF_getErrorName(errorCode)); /* Main compression loop */ - filesize = 0; + readsize = 0; while (1) { /* Fill input Buffer */ - size_t inSize = fread(ress.srcBuffer, (size_t)1, ress.srcBufferSize, srcFile); + size_t const inSize = fread(ress.srcBuffer, (size_t)1, ress.srcBufferSize, srcFile); if (inSize==0) break; - filesize += inSize; - DISPLAYUPDATE(2, "\rRead : %u MB ", (U32)(filesize>>20)); + readsize += inSize; + DISPLAYUPDATE(2, "\rRead : %u MB ", (U32)(readsize>>20)); { /* Compress using buffered streaming */ size_t usedInSize = inSize; @@ -371,13 +373,12 @@ static int FIO_compressFilename_internal(cRess_t ress, if (sizeCheck!=cSize) EXM_THROW(25, "Write error : cannot write compressed block into %s", dstFileName); compressedfilesize += cSize; } - DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%% ", (U32)(filesize>>20), (double)compressedfilesize/filesize*100); + DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%% ", (U32)(readsize>>20), (double)compressedfilesize/readsize*100); } /* End of Frame */ - { - size_t cSize = ress.dstBufferSize; - size_t result = ZBUFF_compressEnd(ress.ctx, ress.dstBuffer, &cSize); + { size_t cSize = ress.dstBufferSize; + size_t const result = ZBUFF_compressEnd(ress.ctx, ress.dstBuffer, &cSize); if (result!=0) EXM_THROW(26, "Compression error : cannot create frame end"); sizeCheck = fwrite(ress.dstBuffer, 1, cSize, dstFile); @@ -388,7 +389,7 @@ static int FIO_compressFilename_internal(cRess_t ress, /* Status */ DISPLAYLEVEL(2, "\r%79s\r", ""); DISPLAYLEVEL(2,"Compressed %llu bytes into %llu bytes ==> %.2f%%\n", - (unsigned long long) filesize, (unsigned long long) compressedfilesize, (double)compressedfilesize/filesize*100); + (unsigned long long)readsize, (unsigned long long) compressedfilesize, (double)compressedfilesize/readsize*100); return 0; } diff --git a/programs/playTests.sh b/programs/playTests.sh index 444d91eb..aa0ffc3a 100755 --- a/programs/playTests.sh +++ b/programs/playTests.sh @@ -25,7 +25,7 @@ roundTripTest() { echo "\n**** simple tests **** " ./datagen > tmp -$ZSTD tmp +$ZSTD -f tmp $ZSTD -99 tmp && die "too large compression level undetected" $ZSTD tmp -c > tmpCompressed $ZSTD tmp --stdout > tmpCompressed @@ -71,6 +71,11 @@ echo "\n**** dictionary tests **** " ./datagen -g1M | md5sum > tmp1 ./datagen -g1M | $ZSTD -D tmpDict | $ZSTD -D tmpDict -dvq | md5sum > tmp2 diff -q tmp1 tmp2 +$ZSTD --train *.c *.h -o tmpDict +$ZSTD xxhash.c -D tmpDict -of tmp +$ZSTD -d tmp -D tmpDict -of result +diff xxhash.c result + echo "\n**** multiple files tests **** " From 06f793a3abd63744118b7d80beaa7df99dd43498 Mon Sep 17 00:00:00 2001 From: inikep Date: Tue, 29 Mar 2016 11:17:58 +0200 Subject: [PATCH 165/247] fix for BMK_clockSpan on Windows --- programs/bench.c | 45 +++++++++++++++++++++++++++------------------ 1 file changed, 27 insertions(+), 18 deletions(-) diff --git a/programs/bench.c b/programs/bench.c index 1468bcb2..8cdb8051 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -162,9 +162,17 @@ void BMK_SetBlockSize(size_t blockSize) /* ******************************************************** * Private functions **********************************************************/ -static clock_t BMK_clockSpan( clock_t clockStart ) +typedef clock_t BMK_time_t; + +static BMK_time_t BMK_getTime() { - return clock() - clockStart; /* works even if overflow, span limited to <= ~30mn */ + return clock(); +} + +/* returns time span in nanoseconds */ +static U64 BMK_clockSpan( BMK_time_t clockStart ) +{ + return 1000000ULL * (BMK_getTime() - clockStart) / CLOCKS_PER_SEC; } @@ -265,20 +273,21 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, double fastestC = 100000000., fastestD = 100000000.; double ratio = 0.; U64 crcCheck = 0; - clock_t coolTime = clock(); + BMK_time_t coolTime = BMK_getTime(); U32 testNb; DISPLAYLEVEL(2, "\r%79s\r", ""); for (testNb = 1; testNb <= (g_nbIterations + !g_nbIterations); testNb++) { int nbLoops; - clock_t clockStart, clockSpan; - clock_t const clockLoop = g_nbIterations ? TIMELOOP_S * CLOCKS_PER_SEC : 10; + BMK_time_t clockStart; + U64 clockSpan; + U64 const clockLoop = g_nbIterations ? TIMELOOP_S*1000000ULL : 10; /* overheat protection */ - if (BMK_clockSpan(coolTime) > ACTIVEPERIOD_S * CLOCKS_PER_SEC) { + if (BMK_clockSpan(coolTime) > ACTIVEPERIOD_S*1000000ULL) { DISPLAY("\rcooling down ... \r"); BMK_sleep(COOLPERIOD_S); - coolTime = clock(); + coolTime = BMK_getTime(); } /* Compression */ @@ -286,9 +295,9 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, memset(compressedBuffer, 0xE5, maxCompressedSize); /* warm up and erase result buffer */ mili_sleep(1); /* give processor time to other processes */ - clockStart = clock(); - while (clock() == clockStart); - clockStart = clock(); + clockStart = BMK_getTime(); + while (BMK_getTime() == clockStart); + clockStart = BMK_getTime(); for (nbLoops = 0 ; BMK_clockSpan(clockStart) < clockLoop ; nbLoops++) { U32 blockNb; @@ -308,16 +317,16 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, ratio = (double)srcSize / (double)cSize; DISPLAYLEVEL(2, "%2i-%-17.17s :%10u ->%10u (%5.3f),%6.1f MB/s\r", testNb, displayName, (U32)srcSize, (U32)cSize, ratio, - (double)srcSize / 1000000. / (fastestC / CLOCKS_PER_SEC) ); + (double)srcSize / fastestC ); #if 1 /* Decompression */ memset(resultBuffer, 0xD6, srcSize); /* warm result buffer */ mili_sleep(1); /* give processor time to other processes */ - clockStart = clock(); - while (clock() == clockStart); - clockStart = clock(); + clockStart = BMK_getTime(); + while (BMK_getTime() == clockStart); + clockStart = BMK_getTime(); for (nbLoops = 0 ; BMK_clockSpan(clockStart) < clockLoop ; nbLoops++) { U32 blockNb; @@ -338,8 +347,8 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, if ((double)clockSpan < fastestD*nbLoops) fastestD = (double)clockSpan / nbLoops; DISPLAYLEVEL(2, "%2i-%-17.17s :%10u ->%10u (%5.3f),%6.1f MB/s ,%6.1f MB/s\r", testNb, displayName, (U32)srcSize, (U32)cSize, ratio, - (double)srcSize / 1000000. / (fastestC / CLOCKS_PER_SEC), - (double)srcSize / 1000000. / (fastestD / CLOCKS_PER_SEC) ); + (double)srcSize / fastestC, + (double)srcSize / fastestD ); /* CRC Checking */ _findError: @@ -372,8 +381,8 @@ _findError: if (crcOrig == crcCheck) { result->ratio = ratio; result->cSize = cSize; - result->cSpeed = (double)srcSize / 1000000. / (fastestC / CLOCKS_PER_SEC); - result->dSpeed = (double)srcSize / 1000000. / (fastestD / CLOCKS_PER_SEC); + result->cSpeed = (double)srcSize / fastestC; + result->dSpeed = (double)srcSize / fastestD; } DISPLAYLEVEL(2, "%2i#\n", cLevel); } /* Bench */ From 4c12f232ec0c55d9b77d7f49826a8eaa47d14262 Mon Sep 17 00:00:00 2001 From: inikep Date: Tue, 29 Mar 2016 14:52:13 +0200 Subject: [PATCH 166/247] support for QueryPerformanceCounter and clock_gettime --- programs/bench.c | 107 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 81 insertions(+), 26 deletions(-) diff --git a/programs/bench.c b/programs/bench.c index 8cdb8051..0de58238 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -57,7 +57,7 @@ # include # include /* setpriority */ # define BMK_sleep(s) sleep(s) -# define mili_sleep(mili) { struct timespec t; t.tv_sec=0; t.tv_nsec=mili*1000000L; nanosleep(&t, NULL); } +# define mili_sleep(mili) { struct timespec t; t.tv_sec=0; t.tv_nsec=mili*1000000ULL; nanosleep(&t, NULL); } # define setHighPriority() setpriority(PRIO_PROCESS, 0, -20) #elif defined(_WIN32) # include @@ -70,6 +70,49 @@ # define setHighPriority() /* disabled */ #endif +/* +Windows QueryPerformanceCounter resolution = 410 nanosec +Windows clock() resolution = 1000000 nanosec +VirtualBox Ubuntu clock() resolution = 1000 nanosec +VirtualBox Ubuntu clock_gettime() resolution = 100-280 nanosec +fizzle clock() resolution = 1000 nanosec +fizzle clock_gettime() resolution = 100-280 nanosec +*/ + +#if !defined(_WIN32) && (defined(__unix__) || defined(__unix) || (defined(__APPLE__) && defined(__MACH__))) + +#if 0 + typedef clock_t BMK_time_t; +# define BMK_TIME_FUNCTION "clock()" +# define BMK_initTimer(ticksPerSecond) (void)ticksPerSecond +# define BMK_getTime(x) x = clock() +# define BMK_getSpanTimeMicro(ticksPerSecond, clockStart, clockEnd) (1000000ULL * (clockEnd - clockStart) / CLOCKS_PER_SEC) +# define BMK_getSpanTimeNano(ticksPerSecond, clockStart, clockEnd) (1000000000ULL * (clockEnd - clockStart) / CLOCKS_PER_SEC) +#else + typedef struct timespec BMK_time_t; +# define BMK_TIME_FUNCTION "clock_gettime" +# define BMK_initTimer(ticksPerSecond) (void)ticksPerSecond +# define BMK_getTime(x) if (clock_gettime(CLOCK_MONOTONIC, &x) == -1 ){ fprintf(stderr, "ERROR: clock_gettime error\n"); } +# define BMK_getSpanTimeMicro(ticksPerSecond, clockStart, clockEnd) (1000000ULL*( clockEnd.tv_sec - clockStart.tv_sec ) + ( clockEnd.tv_nsec - clockStart.tv_nsec ) / 1000) +# define BMK_getSpanTimeNano(ticksPerSecond, clockStart, clockEnd) (1000000000ULL*( clockEnd.tv_sec - clockStart.tv_sec ) + ( clockEnd.tv_nsec - clockStart.tv_nsec )) +#endif + +#elif defined(_WIN32) + typedef LARGE_INTEGER BMK_time_t; +# define BMK_TIME_FUNCTION "QueryPerformanceFrequency" +# define BMK_initTimer(x) if (!QueryPerformanceFrequency(&x)) { fprintf(stderr, "ERROR: QueryPerformance not present\n"); } +# define BMK_getTime(x) QueryPerformanceCounter(&x) +# define BMK_getSpanTimeMicro(ticksPerSecond, clockStart, clockEnd) (1000000ULL*(clockEnd.QuadPart - clockStart.QuadPart)/ticksPerSecond.QuadPart) +# define BMK_getSpanTimeNano(ticksPerSecond, clockStart, clockEnd) (1000000000ULL*(clockEnd.QuadPart - clockStart.QuadPart)/ticksPerSecond.QuadPart) +#else + typedef int BMK_time_t; +# define BMK_TIME_FUNCTION "None" +# define BMK_initTimer(ticksPerSecond) (void)ticksPerSecond +# define BMK_getTimeMicro(clockStart) clockStart=1 +# define BMK_getSpanTimeMicro(ticksPerSecond, clockStart, clockEnd) (TIMELOOP_S*1000000ULL+clockEnd-clockStart) +# define BMK_getSpanTimeNano(ticksPerSecond, clockStart, clockEnd) (TIMELOOP_S*1000000000ULL+clockEnd-clockStart) +#endif + #include "mem.h" #include "zstd_static.h" #include "zstd_internal.h" /* ZSTD_compressBegin_targetSrcSize */ @@ -162,20 +205,16 @@ void BMK_SetBlockSize(size_t blockSize) /* ******************************************************** * Private functions **********************************************************/ -typedef clock_t BMK_time_t; - -static BMK_time_t BMK_getTime() +/* returns time span in microseconds */ +static U64 BMK_clockSpan( BMK_time_t clockStart, BMK_time_t ticksPerSecond ) { - return clock(); + BMK_time_t clockEnd; + + (void)ticksPerSecond; + BMK_getTime(clockEnd); + return BMK_getSpanTimeMicro(ticksPerSecond, clockStart, clockEnd); } -/* returns time span in nanoseconds */ -static U64 BMK_clockSpan( BMK_time_t clockStart ) -{ - return 1000000ULL * (BMK_getTime() - clockStart) / CLOCKS_PER_SEC; -} - - static U64 BMK_getFileSize(const char* infilename) { int r; @@ -235,6 +274,7 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, U64 const crcOrig = XXH64(srcBuffer, srcSize, 0); U32 nbBlocks; + BMK_time_t ticksPerSecond; /* checks */ if (!compressedBuffer || !resultBuffer || !blockTable || !refCtx || !ctx || !refDCtx || !dctx) @@ -242,6 +282,18 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, /* init */ if (strlen(displayName)>17) displayName += strlen(displayName)-17; /* can only display 17 characters */ + BMK_initTimer(ticksPerSecond); + + { + BMK_time_t clockStart, clockEnd; + + BMK_getTime(clockStart); + do { BMK_getTime(clockEnd); } + while (BMK_getSpanTimeNano(ticksPerSecond, clockStart, clockEnd) == 0); + + printf(BMK_TIME_FUNCTION " resolution = %d nanosec\n", (int)BMK_getSpanTimeNano(ticksPerSecond, clockStart, clockEnd)); + } + /* Init blockTable data */ { const char* srcPtr = (const char*)srcBuffer; @@ -273,21 +325,22 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, double fastestC = 100000000., fastestD = 100000000.; double ratio = 0.; U64 crcCheck = 0; - BMK_time_t coolTime = BMK_getTime(); + BMK_time_t coolTime; U32 testNb; + BMK_getTime(coolTime); DISPLAYLEVEL(2, "\r%79s\r", ""); for (testNb = 1; testNb <= (g_nbIterations + !g_nbIterations); testNb++) { int nbLoops; - BMK_time_t clockStart; + BMK_time_t clockStart, clockEnd; U64 clockSpan; U64 const clockLoop = g_nbIterations ? TIMELOOP_S*1000000ULL : 10; /* overheat protection */ - if (BMK_clockSpan(coolTime) > ACTIVEPERIOD_S*1000000ULL) { + if (BMK_clockSpan(coolTime, ticksPerSecond) > ACTIVEPERIOD_S*1000000ULL) { DISPLAY("\rcooling down ... \r"); BMK_sleep(COOLPERIOD_S); - coolTime = BMK_getTime(); + BMK_getTime(coolTime); } /* Compression */ @@ -295,11 +348,12 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, memset(compressedBuffer, 0xE5, maxCompressedSize); /* warm up and erase result buffer */ mili_sleep(1); /* give processor time to other processes */ - clockStart = BMK_getTime(); - while (BMK_getTime() == clockStart); - clockStart = BMK_getTime(); + BMK_getTime(clockStart); + do { BMK_getTime(clockEnd); } + while (BMK_getSpanTimeNano(ticksPerSecond, clockStart, clockEnd) == 0); + BMK_getTime(clockStart); - for (nbLoops = 0 ; BMK_clockSpan(clockStart) < clockLoop ; nbLoops++) { + for (nbLoops = 0 ; BMK_clockSpan(clockStart, ticksPerSecond) < clockLoop ; nbLoops++) { U32 blockNb; ZSTD_compressBegin_targetSrcSize(refCtx, dictBuffer, dictBufferSize, blockSize, cLevel); for (blockNb=0; blockNb%10u (%5.3f),%6.1f MB/s ,%6.1f MB/s\r", testNb, displayName, (U32)srcSize, (U32)cSize, ratio, From 4611d11fea2402e45edd7093a73b6e7f15c6f986 Mon Sep 17 00:00:00 2001 From: inikep Date: Tue, 29 Mar 2016 15:52:38 +0200 Subject: [PATCH 167/247] added -lrt required by clock_gettime --- programs/Makefile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/programs/Makefile b/programs/Makefile index 6aca9768..b81bfeba 100644 --- a/programs/Makefile +++ b/programs/Makefile @@ -48,6 +48,7 @@ CPPFLAGS= -I../lib -DZSTD_VERSION=\"$(VERSION)\" CFLAGS ?= -O3 # -falign-loops=32 # not always beneficial CFLAGS += -std=c99 -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow -Wstrict-aliasing=1 -Wswitch-enum -Wstrict-prototypes -Wundef FLAGS = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) $(MOREFLAGS) +LDFLAGS ?= -lrt BINDIR = $(PREFIX)/bin MANDIR = $(PREFIX)/share/man/man1 @@ -86,11 +87,11 @@ all: zstd zstd32 fullbench fullbench32 fuzzer fuzzer32 zbufftest zbufftest32 par zstd : $(ZSTD_FILES) $(ZSTD_FILES_LEGACY) $(ZSTDDIR)/zbuff.c $(ZSTDDIR)/zdict.c $(ZSTDDIR)/divsufsort.c \ zstdcli.c fileio.c bench.c xxhash.c datagen.c dibio.c - $(CC) $(FLAGS) -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT) $^ -o $@$(EXT) + $(CC) $(FLAGS) -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT) $^ $(LDFLAGS) -o $@$(EXT) zstd32: $(ZSTD_FILES) $(ZSTD_FILES_LEGACY) $(ZSTDDIR)/zbuff.c $(ZSTDDIR)/zdict.c $(ZSTDDIR)/divsufsort.c \ zstdcli.c fileio.c bench.c xxhash.c datagen.c dibio.c - $(CC) -m32 $(FLAGS) -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT) $^ -o $@$(EXT) + $(CC) -m32 $(FLAGS) -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT) $^ $(LDFLAGS) -o $@$(EXT) zstd_nolegacy : $(MAKE) zstd ZSTD_LEGACY_SUPPORT=0 From 33ad2512368bac6c06d4242e9df2c921c2049e63 Mon Sep 17 00:00:00 2001 From: inikep Date: Wed, 30 Mar 2016 09:55:37 +0200 Subject: [PATCH 168/247] removed clock_gettime (not portable) --- programs/Makefile | 5 ++--- programs/bench.c | 34 ---------------------------------- 2 files changed, 2 insertions(+), 37 deletions(-) diff --git a/programs/Makefile b/programs/Makefile index b81bfeba..6aca9768 100644 --- a/programs/Makefile +++ b/programs/Makefile @@ -48,7 +48,6 @@ CPPFLAGS= -I../lib -DZSTD_VERSION=\"$(VERSION)\" CFLAGS ?= -O3 # -falign-loops=32 # not always beneficial CFLAGS += -std=c99 -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow -Wstrict-aliasing=1 -Wswitch-enum -Wstrict-prototypes -Wundef FLAGS = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) $(MOREFLAGS) -LDFLAGS ?= -lrt BINDIR = $(PREFIX)/bin MANDIR = $(PREFIX)/share/man/man1 @@ -87,11 +86,11 @@ all: zstd zstd32 fullbench fullbench32 fuzzer fuzzer32 zbufftest zbufftest32 par zstd : $(ZSTD_FILES) $(ZSTD_FILES_LEGACY) $(ZSTDDIR)/zbuff.c $(ZSTDDIR)/zdict.c $(ZSTDDIR)/divsufsort.c \ zstdcli.c fileio.c bench.c xxhash.c datagen.c dibio.c - $(CC) $(FLAGS) -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT) $^ $(LDFLAGS) -o $@$(EXT) + $(CC) $(FLAGS) -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT) $^ -o $@$(EXT) zstd32: $(ZSTD_FILES) $(ZSTD_FILES_LEGACY) $(ZSTDDIR)/zbuff.c $(ZSTDDIR)/zdict.c $(ZSTDDIR)/divsufsort.c \ zstdcli.c fileio.c bench.c xxhash.c datagen.c dibio.c - $(CC) -m32 $(FLAGS) -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT) $^ $(LDFLAGS) -o $@$(EXT) + $(CC) -m32 $(FLAGS) -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT) $^ -o $@$(EXT) zstd_nolegacy : $(MAKE) zstd ZSTD_LEGACY_SUPPORT=0 diff --git a/programs/bench.c b/programs/bench.c index 0de58238..28027f4a 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -70,43 +70,20 @@ # define setHighPriority() /* disabled */ #endif -/* -Windows QueryPerformanceCounter resolution = 410 nanosec -Windows clock() resolution = 1000000 nanosec -VirtualBox Ubuntu clock() resolution = 1000 nanosec -VirtualBox Ubuntu clock_gettime() resolution = 100-280 nanosec -fizzle clock() resolution = 1000 nanosec -fizzle clock_gettime() resolution = 100-280 nanosec -*/ - #if !defined(_WIN32) && (defined(__unix__) || defined(__unix) || (defined(__APPLE__) && defined(__MACH__))) - -#if 0 typedef clock_t BMK_time_t; -# define BMK_TIME_FUNCTION "clock()" # define BMK_initTimer(ticksPerSecond) (void)ticksPerSecond # define BMK_getTime(x) x = clock() # define BMK_getSpanTimeMicro(ticksPerSecond, clockStart, clockEnd) (1000000ULL * (clockEnd - clockStart) / CLOCKS_PER_SEC) # define BMK_getSpanTimeNano(ticksPerSecond, clockStart, clockEnd) (1000000000ULL * (clockEnd - clockStart) / CLOCKS_PER_SEC) -#else - typedef struct timespec BMK_time_t; -# define BMK_TIME_FUNCTION "clock_gettime" -# define BMK_initTimer(ticksPerSecond) (void)ticksPerSecond -# define BMK_getTime(x) if (clock_gettime(CLOCK_MONOTONIC, &x) == -1 ){ fprintf(stderr, "ERROR: clock_gettime error\n"); } -# define BMK_getSpanTimeMicro(ticksPerSecond, clockStart, clockEnd) (1000000ULL*( clockEnd.tv_sec - clockStart.tv_sec ) + ( clockEnd.tv_nsec - clockStart.tv_nsec ) / 1000) -# define BMK_getSpanTimeNano(ticksPerSecond, clockStart, clockEnd) (1000000000ULL*( clockEnd.tv_sec - clockStart.tv_sec ) + ( clockEnd.tv_nsec - clockStart.tv_nsec )) -#endif - #elif defined(_WIN32) typedef LARGE_INTEGER BMK_time_t; -# define BMK_TIME_FUNCTION "QueryPerformanceFrequency" # define BMK_initTimer(x) if (!QueryPerformanceFrequency(&x)) { fprintf(stderr, "ERROR: QueryPerformance not present\n"); } # define BMK_getTime(x) QueryPerformanceCounter(&x) # define BMK_getSpanTimeMicro(ticksPerSecond, clockStart, clockEnd) (1000000ULL*(clockEnd.QuadPart - clockStart.QuadPart)/ticksPerSecond.QuadPart) # define BMK_getSpanTimeNano(ticksPerSecond, clockStart, clockEnd) (1000000000ULL*(clockEnd.QuadPart - clockStart.QuadPart)/ticksPerSecond.QuadPart) #else typedef int BMK_time_t; -# define BMK_TIME_FUNCTION "None" # define BMK_initTimer(ticksPerSecond) (void)ticksPerSecond # define BMK_getTimeMicro(clockStart) clockStart=1 # define BMK_getSpanTimeMicro(ticksPerSecond, clockStart, clockEnd) (TIMELOOP_S*1000000ULL+clockEnd-clockStart) @@ -284,17 +261,6 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, if (strlen(displayName)>17) displayName += strlen(displayName)-17; /* can only display 17 characters */ BMK_initTimer(ticksPerSecond); - { - BMK_time_t clockStart, clockEnd; - - BMK_getTime(clockStart); - do { BMK_getTime(clockEnd); } - while (BMK_getSpanTimeNano(ticksPerSecond, clockStart, clockEnd) == 0); - - printf(BMK_TIME_FUNCTION " resolution = %d nanosec\n", (int)BMK_getSpanTimeNano(ticksPerSecond, clockStart, clockEnd)); - } - - /* Init blockTable data */ { const char* srcPtr = (const char*)srcBuffer; char* cPtr = (char*)compressedBuffer; From 1c556a3838ec6f32d1574f49787eb1fb46ac8db7 Mon Sep 17 00:00:00 2001 From: inikep Date: Wed, 30 Mar 2016 10:59:48 +0200 Subject: [PATCH 169/247] ticksPerSecond=0 --- programs/bench.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/programs/bench.c b/programs/bench.c index 28027f4a..dec57589 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -72,7 +72,7 @@ #if !defined(_WIN32) && (defined(__unix__) || defined(__unix) || (defined(__APPLE__) && defined(__MACH__))) typedef clock_t BMK_time_t; -# define BMK_initTimer(ticksPerSecond) (void)ticksPerSecond +# define BMK_initTimer(ticksPerSecond) ticksPerSecond=0 # define BMK_getTime(x) x = clock() # define BMK_getSpanTimeMicro(ticksPerSecond, clockStart, clockEnd) (1000000ULL * (clockEnd - clockStart) / CLOCKS_PER_SEC) # define BMK_getSpanTimeNano(ticksPerSecond, clockStart, clockEnd) (1000000000ULL * (clockEnd - clockStart) / CLOCKS_PER_SEC) @@ -84,7 +84,7 @@ # define BMK_getSpanTimeNano(ticksPerSecond, clockStart, clockEnd) (1000000000ULL*(clockEnd.QuadPart - clockStart.QuadPart)/ticksPerSecond.QuadPart) #else typedef int BMK_time_t; -# define BMK_initTimer(ticksPerSecond) (void)ticksPerSecond +# define BMK_initTimer(ticksPerSecond) ticksPerSecond=0 # define BMK_getTimeMicro(clockStart) clockStart=1 # define BMK_getSpanTimeMicro(ticksPerSecond, clockStart, clockEnd) (TIMELOOP_S*1000000ULL+clockEnd-clockStart) # define BMK_getSpanTimeNano(ticksPerSecond, clockStart, clockEnd) (TIMELOOP_S*1000000000ULL+clockEnd-clockStart) From 21588e370be06d5a3e2292e75a8fb940d72e69e2 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 30 Mar 2016 16:50:44 +0200 Subject: [PATCH 170/247] changed validateParams() into checkParams() + adjustParams() --- lib/error_private.h | 1 + lib/error_public.h | 1 + lib/zbuff.c | 5 ++- lib/zstd_compress.c | 93 ++++++++++++++++++++++++++++++--------------- lib/zstd_static.h | 11 ++++-- 5 files changed, 77 insertions(+), 34 deletions(-) diff --git a/lib/error_private.h b/lib/error_private.h index ff0b829f..7bd03065 100644 --- a/lib/error_private.h +++ b/lib/error_private.h @@ -95,6 +95,7 @@ ERR_STATIC const char* ERR_getErrorName(size_t code) case PREFIX(prefix_unknown): return "Unknown frame descriptor"; case PREFIX(frameParameter_unsupported): return "Unsupported frame parameter"; case PREFIX(frameParameter_unsupportedBy32bits): return "Frame parameter unsupported in 32-bits mode"; + case PREFIX(compressionParameter_unsupported): return "Compression parameter is out of bound"; case PREFIX(init_missing): return "Context should be init first"; case PREFIX(memory_allocation): return "Allocation error : not enough memory"; case PREFIX(stage_wrong): return "Operation not authorized at current processing stage"; diff --git a/lib/error_public.h b/lib/error_public.h index 073b8c6a..6fcf802e 100644 --- a/lib/error_public.h +++ b/lib/error_public.h @@ -47,6 +47,7 @@ typedef enum { ZSTD_error_prefix_unknown, ZSTD_error_frameParameter_unsupported, ZSTD_error_frameParameter_unsupportedBy32bits, + ZSTD_error_compressionParameter_unsupported, ZSTD_error_init_missing, ZSTD_error_memory_allocation, ZSTD_error_stage_wrong, diff --git a/lib/zbuff.c b/lib/zbuff.c index 386b47d5..6c06b543 100644 --- a/lib/zbuff.c +++ b/lib/zbuff.c @@ -128,7 +128,9 @@ size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* zbc, const void* dict, size_t dic { size_t neededInBuffSize; - ZSTD_validateParams(¶ms); + { size_t const errorCode = ZSTD_checkParams(params); + if (ZSTD_isError(errorCode)) return errorCode; } + ZSTD_adjustParams(¶ms, 0, dictSize); neededInBuffSize = (size_t)1 << params.windowLog; /* allocate buffers */ @@ -157,6 +159,7 @@ size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* zbc, const void* dict, size_t dic return 0; /* ready to go */ } + size_t ZBUFF_compressInit(ZBUFF_CCtx* zbc, int compressionLevel) { return ZBUFF_compressInit_advanced(zbc, NULL, 0, ZSTD_getParams(compressionLevel, 0)); diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index 9f2a28f5..9cac406d 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -133,35 +133,51 @@ const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) /* hidden interface * } +#define CLAMP(val,min,max) { if (valmax) val=max; } +#define CLAMPCHECK(val,min,max) { if ((valmax)) return ERROR(compressionParameter_unsupported); } + +/** ZSTD_checkParams() : + ensure param values remain within authorized range. + @return : 0, or an error code if one value is beyond authorized range */ +size_t ZSTD_checkParams(ZSTD_parameters params) +{ + { U32 const windowLog_max = MEM_32bits() ? 25 : ZSTD_WINDOWLOG_MAX; /* 32 bits mode cannot flush > 24 bits */ + CLAMPCHECK(params.windowLog, ZSTD_WINDOWLOG_MIN, windowLog_max); } + CLAMPCHECK(params.contentLog, ZSTD_CONTENTLOG_MIN, ZSTD_CONTENTLOG_MAX); + CLAMPCHECK(params.hashLog, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX); + CLAMPCHECK(params.searchLog, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX); + { U32 const searchLengthMin = (params.strategy == ZSTD_btopt) ? ZSTD_SEARCHLENGTH_MIN : ZSTD_SEARCHLENGTH_MIN+1; + U32 const searchLengthMax = (params.strategy == ZSTD_fast) ? ZSTD_SEARCHLENGTH_MAX : ZSTD_SEARCHLENGTH_MAX-1; + CLAMPCHECK(params.searchLength, searchLengthMin, searchLengthMax); } + CLAMPCHECK(params.targetLength, ZSTD_TARGETLENGTH_MIN, ZSTD_TARGETLENGTH_MAX); + CLAMPCHECK((U32)(params.strategy), 0, (U32)ZSTD_btopt); + return 0; +} + + static unsigned ZSTD_highbit(U32 val); -#define CLAMP(val,min,max) { if (valmax) val=max; } - -/** ZSTD_validateParams() : - correct params value to remain within authorized range, - optimize for `srcSize` if srcSize > 0 */ -void ZSTD_validateParams(ZSTD_parameters* params) +/** ZSTD_adjustParams() : + optimize params for q given input (`srcSize` and `dictSize`). + mostly downsizing to reduce memory consumption and initialization. + Both `srcSize` and `dictSize` are optional (use 0 if unknown), + but if both are 0, no optimization can be done. + Note : params is considered validated at this stage. Use ZSTD_checkParams() to ensure that. */ +void ZSTD_adjustParams(ZSTD_parameters* params, size_t srcSize, size_t dictSize) { - /* validate params */ - if (MEM_32bits()) if (params->windowLog > 25) params->windowLog = 25; /* 32 bits mode cannot flush > 24 bits */ - CLAMP(params->windowLog, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX); - CLAMP(params->contentLog, ZSTD_CONTENTLOG_MIN, ZSTD_CONTENTLOG_MAX); - CLAMP(params->hashLog, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX); - CLAMP(params->searchLog, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX); - { U32 const searchLengthMin = (params->strategy == ZSTD_btopt) ? ZSTD_SEARCHLENGTH_MIN : ZSTD_SEARCHLENGTH_MIN+1; - U32 const searchLengthMax = (params->strategy == ZSTD_fast) ? ZSTD_SEARCHLENGTH_MAX : ZSTD_SEARCHLENGTH_MAX-1; - CLAMP(params->searchLength, searchLengthMin, searchLengthMax); } - CLAMP(params->targetLength, ZSTD_TARGETLENGTH_MIN, ZSTD_TARGETLENGTH_MAX); - if ((U32)params->strategy>(U32)ZSTD_btopt) params->strategy = ZSTD_btopt; + if (srcSize+dictSize == 0) return; /* no size information available : no adjustment */ /* resize params, to use less memory when necessary */ - if ((params->srcSize > 0) && (params->srcSize < (1<srcSize)-1) + 1; - if (params->windowLog > srcLog) params->windowLog = srcLog; - } + { size_t const minSrcSize = (srcSize==0) ? 500 : 0; + size_t const rSize = srcSize + dictSize + minSrcSize; + if (rSize < (1<windowLog > srcLog) params->windowLog = srcLog; + } } if (params->hashLog > params->windowLog) params->hashLog = params->windowLog; - { U32 const btPlus = (params->strategy == ZSTD_btlazy2) || (params->strategy == ZSTD_btopt); - if (params->contentLog > params->windowLog+btPlus) params->contentLog = params->windowLog+btPlus; } /* <= ZSTD_CONTENTLOG_MAX */ + { U32 const btPlus = (params->strategy == ZSTD_btlazy2) || (params->strategy == ZSTD_btopt); + U32 const maxContentLog = params->windowLog+btPlus; + if (params->contentLog > maxContentLog) params->contentLog = maxContentLog; } /* <= ZSTD_CONTENTLOG_MAX */ if (params->windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN) params->windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN; /* required for frame header */ } @@ -2194,9 +2210,9 @@ static size_t ZSTD_compress_insertDictionary(ZSTD_CCtx* zc, const void* dict, si if (MEM_readLE32(dict) != ZSTD_DICT_MAGIC) return ZSTD_loadDictionaryContent(zc, dict, dictSize); /* known magic number : dict is parsed for entropy stats and content */ - { size_t const eSize = ZSTD_loadDictEntropyStats(zc, (const char*)dict+4 /* skip magic */, dictSize-4) + 4; - if (ZSTD_isError(eSize)) return eSize; - return ZSTD_loadDictionaryContent(zc, (const char*)dict+eSize, dictSize-eSize); + { size_t const eSize = ZSTD_loadDictEntropyStats(zc, (const char*)dict+4 /* skip magic */, dictSize-4) + 4; + if (ZSTD_isError(eSize)) return eSize; + return ZSTD_loadDictionaryContent(zc, (const char*)dict+eSize, dictSize-eSize); } } @@ -2207,7 +2223,11 @@ size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* zc, const void* dict, size_t dictSize, ZSTD_parameters params) { - ZSTD_validateParams(¶ms); + /* compression parameters verification and optimization */ + { size_t const errorCode = ZSTD_checkParams(params); + if (ZSTD_isError(errorCode)) return errorCode; } + + ZSTD_adjustParams(¶ms, 0, dictSize); { size_t const errorCode = ZSTD_resetCCtx_advanced(zc, params); if (ZSTD_isError(errorCode)) return errorCode; } @@ -2295,7 +2315,7 @@ size_t ZSTD_compress_usingPreparedCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* prepare } -size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx, +static size_t ZSTD_compress_internal (ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, const void* dict,size_t dictSize, @@ -2322,16 +2342,29 @@ size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx, return (op - ostart); } +size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + ZSTD_parameters params) +{ + size_t const errorCode = ZSTD_checkParams(params); + if (ZSTD_isError(errorCode)) return errorCode; + return ZSTD_compress_internal(ctx, dst, dstCapacity, src, srcSize, dict, dictSize, params); +} + size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, const void* dict, size_t dictSize, int compressionLevel) { + ZSTD_parameters params = ZSTD_getParams(compressionLevel, srcSize+dictSize); ZSTD_LOG_BLOCK("%p: ZSTD_compress_usingDict srcSize=%d dictSize=%d compressionLevel=%d\n", ctx->base, (int)srcSize, (int)dictSize, compressionLevel); - return ZSTD_compress_advanced(ctx, dst, dstCapacity, src, srcSize, dict, dictSize, ZSTD_getParams(compressionLevel, srcSize)); + ZSTD_adjustParams(¶ms, srcSize, dictSize); + return ZSTD_compress_internal(ctx, dst, dstCapacity, src, srcSize, dict, dictSize, params); } size_t ZSTD_compressCCtx (ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel) { ZSTD_LOG_BLOCK("%p: ZSTD_compressCCtx srcSize=%d compressionLevel=%d\n", ctx->base, (int)srcSize, compressionLevel); - return ZSTD_compress_advanced(ctx, dst, dstCapacity, src, srcSize, NULL, 0, ZSTD_getParams(compressionLevel, srcSize)); + return ZSTD_compress_usingDict(ctx, dst, dstCapacity, src, srcSize, NULL, 0, compressionLevel); } size_t ZSTD_compress(void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel) diff --git a/lib/zstd_static.h b/lib/zstd_static.h index 4ae771fd..db962405 100644 --- a/lib/zstd_static.h +++ b/lib/zstd_static.h @@ -95,9 +95,14 @@ ZSTDLIB_API unsigned ZSTD_maxCLevel (void); * `srcSize` value is optional, select 0 if not known */ ZSTDLIB_API ZSTD_parameters ZSTD_getParams(int compressionLevel, U64 srcSize); -/*! ZSTD_validateParams() : -* correct params value to remain within authorized range */ -ZSTDLIB_API void ZSTD_validateParams(ZSTD_parameters* params); +/*! ZSTD_checkParams() : +* Ensure param values remain within authorized range */ +ZSTDLIB_API size_t ZSTD_checkParams(ZSTD_parameters params); + +/*! ZSTD_adjustParams() : +* optimize params for a given `srcSize` and `dictSize`. +* both values are optional, select `0` if unknown. */ +ZSTDLIB_API void ZSTD_adjustParams(ZSTD_parameters* params, size_t srcSize, size_t dictSize); /*! ZSTD_compress_advanced() : * Same as ZSTD_compress_usingDict(), with fine-tune control of each compression parameter */ From 3b71925c2db586b18cacc0f77aa0e9ab76603be2 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 30 Mar 2016 19:48:05 +0200 Subject: [PATCH 171/247] separate params into compressionParams and frameParams --- lib/zbuff.c | 28 ++-- lib/zbuff_static.h | 4 +- lib/zdict.c | 19 ++- lib/zstd_compress.c | 355 ++++++++++++++++++++++-------------------- lib/zstd_opt.h | 20 +-- lib/zstd_static.h | 22 ++- programs/fileio.c | 16 +- programs/playTests.sh | 6 + 8 files changed, 257 insertions(+), 213 deletions(-) diff --git a/lib/zbuff.c b/lib/zbuff.c index 6c06b543..e26072d8 100644 --- a/lib/zbuff.c +++ b/lib/zbuff.c @@ -124,14 +124,14 @@ size_t ZBUFF_freeCCtx(ZBUFF_CCtx* zbc) /* *** Initialization *** */ -size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* zbc, const void* dict, size_t dictSize, ZSTD_parameters params) +size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* zbc, + const void* dict, size_t dictSize, + ZSTD_parameters params, U64 pledgedSrcSize) { size_t neededInBuffSize; - { size_t const errorCode = ZSTD_checkParams(params); - if (ZSTD_isError(errorCode)) return errorCode; } - ZSTD_adjustParams(¶ms, 0, dictSize); - neededInBuffSize = (size_t)1 << params.windowLog; + ZSTD_adjustCParams(¶ms.cParams, pledgedSrcSize, dictSize); + neededInBuffSize = (size_t)1 << params.cParams.windowLog; /* allocate buffers */ if (zbc->inBuffSize < neededInBuffSize) { @@ -148,7 +148,7 @@ size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* zbc, const void* dict, size_t dic if (zbc->outBuff == NULL) return ERROR(memory_allocation); } - zbc->outBuffContentSize = ZSTD_compressBegin_advanced(zbc->zc, dict, dictSize, params); + zbc->outBuffContentSize = ZSTD_compressBegin_advanced(zbc->zc, dict, dictSize, params, pledgedSrcSize); if (ZSTD_isError(zbc->outBuffContentSize)) return zbc->outBuffContentSize; zbc->inToCompress = 0; @@ -160,15 +160,17 @@ size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* zbc, const void* dict, size_t dic } -size_t ZBUFF_compressInit(ZBUFF_CCtx* zbc, int compressionLevel) -{ - return ZBUFF_compressInit_advanced(zbc, NULL, 0, ZSTD_getParams(compressionLevel, 0)); -} - - ZSTDLIB_API size_t ZBUFF_compressInitDictionary(ZBUFF_CCtx* zbc, const void* dict, size_t dictSize, int compressionLevel) { - return ZBUFF_compressInit_advanced(zbc, dict, dictSize, ZSTD_getParams(compressionLevel, 0)); + ZSTD_parameters params; + params.cParams = ZSTD_getCParams(compressionLevel, 0, dictSize); + params.fParams.contentSizeFlag = 0; + return ZBUFF_compressInit_advanced(zbc, dict, dictSize, params, 0); +} + +size_t ZBUFF_compressInit(ZBUFF_CCtx* zbc, int compressionLevel) +{ + return ZBUFF_compressInitDictionary(zbc, NULL, 0, compressionLevel); } diff --git a/lib/zbuff_static.h b/lib/zbuff_static.h index 40550890..9fb522e5 100644 --- a/lib/zbuff_static.h +++ b/lib/zbuff_static.h @@ -51,7 +51,9 @@ extern "C" { /* ************************************* * Advanced Streaming functions ***************************************/ -ZSTDLIB_API size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params); +ZSTDLIB_API size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* cctx, + const void* dict, size_t dictSize, + ZSTD_parameters params, U64 pledgedSrcSize); #if defined (__cplusplus) diff --git a/lib/zdict.c b/lib/zdict.c index a643f4f5..122ac8cb 100644 --- a/lib/zdict.c +++ b/lib/zdict.c @@ -626,6 +626,7 @@ static void ZDICT_countEStats(EStats_ress_t esr, } } } +/* static size_t ZDICT_maxSampleSize(const size_t* fileSizes, unsigned nbFiles) { unsigned u; @@ -634,6 +635,15 @@ static size_t ZDICT_maxSampleSize(const size_t* fileSizes, unsigned nbFiles) if (max < fileSizes[u]) max = fileSizes[u]; return max; } +*/ + +static size_t ZDICT_totalSampleSize(const size_t* fileSizes, unsigned nbFiles) +{ + size_t total; + unsigned u; + for (u=0, total=0; u