lazy strategy

This commit is contained in:
Yann Collet 2015-10-31 12:57:14 +01:00
parent 092a4f123e
commit be2010ea1b
6 changed files with 165 additions and 42 deletions

View File

@ -49,6 +49,8 @@ extern "C" {
****************************************/
static size_t ZSTD_read_ARCH(const void* p) { size_t r; memcpy(&r, p, sizeof(r)); return r; }
#define MIN(a,b) ((a)<(b) ? (a) : (b))
static unsigned ZSTD_highbit(U32 val)
{
# if defined(_MSC_VER) /* Visual */

View File

@ -191,7 +191,7 @@ static size_t ZSTD_HC_hashPtr(const void* p, U32 h, U32 mls)
* HC Compression
***************************************/
/* Update chains up to ip (excluded) */
static void ZSTD_HC_insert (ZSTD_HC_CCtx* zc, const BYTE* ip, U32 mls)
static U32 ZSTD_HC_insertAndFindFirstIndex (ZSTD_HC_CCtx* zc, const BYTE* ip, U32 mls)
{
U32* const hashTable = zc->hashTable;
const U32 hashLog = zc->params.hashLog;
@ -210,6 +210,7 @@ static void ZSTD_HC_insert (ZSTD_HC_CCtx* zc, const BYTE* ip, U32 mls)
}
zc->nextToUpdate = target;
return hashTable[ZSTD_HC_hashPtr(ip, hashLog, mls)];
}
@ -220,8 +221,6 @@ size_t ZSTD_HC_insertAndFindBestMatch (
const BYTE** matchpos,
const U32 maxNbAttempts, const U32 matchLengthSearch)
{
U32* const hashTable = zc->hashTable;
const U32 hashLog = zc->params.hashLog;
U32* const chainTable = zc->chainTable;
const U32 chainSize = (1 << zc->params.chainLog);
const U32 chainMask = chainSize-1;
@ -236,8 +235,7 @@ size_t ZSTD_HC_insertAndFindBestMatch (
size_t ml=0;
/* HC4 match finder */
ZSTD_HC_insert(zc, ip, matchLengthSearch);
matchIndex = hashTable[ZSTD_HC_hashPtr(ip, hashLog, matchLengthSearch)];
matchIndex = ZSTD_HC_insertAndFindFirstIndex(zc, ip, matchLengthSearch);
while ((matchIndex>=lowLimit) && (nbAttempts))
{
@ -291,7 +289,7 @@ static size_t ZSTD_HC_insertAndFindBestMatch_selectMLS (
}
static size_t ZSTD_HC_compressBlock(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
size_t ZSTD_HC_compressBlock_greedy(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
{
seqStore_t* seqStorePtr = &(ctx->seqStore);
const BYTE* const istart = (const BYTE*)src;
@ -367,6 +365,114 @@ static size_t ZSTD_HC_compressBlock(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstS
seqStorePtr, srcSize);
}
size_t ZSTD_HC_compressBlock_lazy(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
{
seqStore_t* seqStorePtr = &(ctx->seqStore);
const BYTE* const istart = (const BYTE*)src;
const BYTE* ip = istart;
const BYTE* anchor = istart;
const BYTE* const iend = istart + srcSize;
const BYTE* const ilimit = iend - 8;
const BYTE* match = istart;
size_t offset_2=REPCODE_STARTVALUE, offset_1=REPCODE_STARTVALUE;
const U32 maxSearches = 1 << ctx->params.searchLog;
const U32 mls = ctx->params.searchLength;
/* init */
ZSTD_resetSeqStore(seqStorePtr);
if (((ip-ctx->base) - ctx->dictLimit) < REPCODE_STARTVALUE) ip += REPCODE_STARTVALUE;
/* Match Loop */
while (ip <= ilimit)
{
size_t matchLength;
size_t offset;
const BYTE* start;
/* try to find a first match */
if (MEM_read32(ip) == MEM_read32(ip - offset_2))
{
/* repcode : we take it*/
size_t offtmp = offset_2;
size_t litLength = ip - anchor;
matchLength = ZSTD_count(ip+MINMATCH, ip+MINMATCH-offset_2, iend);
offset_2 = offset_1;
offset_1 = offtmp;
ZSTD_storeSeq(seqStorePtr, litLength, anchor, 0, matchLength);
ip += matchLength+MINMATCH;
anchor = ip;
continue;
}
offset_2 = offset_1;
matchLength = ZSTD_HC_insertAndFindBestMatch_selectMLS(ctx, ip, iend, &match, maxSearches, mls);
if (!matchLength) { ip++; continue; }
/* let's try to find a better solution */
offset = ip - match;
start = ip;
while (ip<ilimit)
{
ip ++;
if (MEM_read32(ip) == MEM_read32(ip - offset_1))
{
size_t ml2 = ZSTD_count(ip+MINMATCH, ip+MINMATCH-offset_1, iend) + MINMATCH;
int gain1 = matchLength - (ZSTD_highbit((U32)offset) / 3);
if ((int)ml2 > gain1)
{
matchLength = ml2, offset = 0, start = ip;
break;
}
}
{
size_t ml2 = ZSTD_HC_insertAndFindBestMatch_selectMLS(ctx, ip, iend, &match, maxSearches, mls);
size_t offset2 = ip - match;
int gain2 = ml2 - (ZSTD_highbit((U32)offset2) / 4); /* raw approx */
int gain1 = matchLength - (ZSTD_highbit((U32)offset) / 4);
if (gain2 > gain1)
{
matchLength = ml2, offset = offset2, start = ip;
continue; /* search a better one */
}
}
break; /* nothing found : store previous one */
}
/* store sequence */
{
size_t litLength = start - anchor;
if (offset) offset_1 = offset;
ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, matchLength-MINMATCH);
ip = start + matchLength;
anchor = ip;
}
}
/* Last Literals */
{
size_t lastLLSize = iend - anchor;
memcpy(seqStorePtr->lit, anchor, lastLLSize);
seqStorePtr->lit += lastLLSize;
}
/* Final compression stage */
return ZSTD_compressSequences((BYTE*)dst, maxDstSize,
seqStorePtr, srcSize);
}
size_t ZSTD_HC_compressBlock(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
{
if (ctx->params.strategy==ZSTD_HC_greedy)
return ZSTD_HC_compressBlock_greedy(ctx, dst, maxDstSize, src, srcSize);
return ZSTD_HC_compressBlock_lazy(ctx, dst, maxDstSize, src, srcSize);
}
static size_t ZSTD_HC_compress_generic (ZSTD_HC_CCtx* ctxPtr,
void* dst, size_t maxDstSize,
const void* src, size_t srcSize)

View File

@ -45,6 +45,7 @@ extern "C" {
/* *************************************
* Types
***************************************/
typedef enum { ZSTD_HC_greedy, ZSTD_HC_lazy } ZSTD_HC_strategy;
typedef struct
{
U32 windowLog; /* largest match distance : impact decompression buffer size */
@ -52,6 +53,7 @@ typedef struct
U32 hashLog; /* dispatch table : larger == more memory, faster*/
U32 searchLog; /* nb of searches : larger == more compression, slower*/
U32 searchLength; /* size of matches : larger == faster decompression */
ZSTD_HC_strategy strategy; /* greedy, lazy (stronger, slower) */
} ZSTD_HC_parameters;
/* parameters boundaries */
@ -92,33 +94,33 @@ size_t ZSTD_HC_compressEnd(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize);
#define ZSTD_HC_MAX_CLEVEL 26
static const ZSTD_HC_parameters ZSTD_HC_defaultParameters[ZSTD_HC_MAX_CLEVEL+1] = {
/* W, C, H, S */
{ 18, 12, 14, 1, 4 }, /* level 0 - never used */
{ 18, 12, 14, 1, 4 }, /* level 1 - in fact redirected towards zstd fast */
{ 18, 12, 15, 2, 4 }, /* level 2 */
{ 19, 14, 16, 3, 4 }, /* level 3 */
{ 20, 15, 17, 4, 5 }, /* level 4 */
{ 20, 17, 19, 4, 5 }, /* level 5 */
{ 20, 19, 19, 4, 5 }, /* level 6 */
{ 20, 19, 19, 5, 5 }, /* level 7 */
{ 20, 20, 20, 5, 5 }, /* level 8 */
{ 20, 20, 20, 6, 5 }, /* level 9 */
{ 21, 21, 20, 5, 5 }, /* level 10 */
{ 22, 21, 22, 6, 5 }, /* level 11 */
{ 23, 21, 22, 6, 5 }, /* level 12 */
{ 23, 21, 22, 7, 5 }, /* level 13 */
{ 22, 22, 23, 7, 5 }, /* level 14 */
{ 22, 22, 23, 7, 5 }, /* level 15 */
{ 22, 22, 23, 8, 5 }, /* level 16 */
{ 22, 22, 23, 8, 5 }, /* level 17 */
{ 22, 22, 23, 9, 5 }, /* level 18 */
{ 22, 22, 23, 9, 5 }, /* level 19 */
{ 23, 23, 23, 9, 5 }, /* level 20 */
{ 23, 23, 23, 9, 5 }, /* level 21 */
{ 23, 23, 23, 10, 5 }, /* level 22 */
{ 23, 23, 23, 10, 5 }, /* level 23 */
{ 23, 23, 23, 11, 5 }, /* level 24 */
{ 23, 23, 23, 12, 5 }, /* level 25 */
{ 23, 23, 23, 13, 5 }, /* level 26 */ /* ZSTD_HC_MAX_CLEVEL */
{ 18, 12, 14, 1, 4, ZSTD_HC_greedy }, /* level 0 - never used */
{ 18, 12, 14, 1, 4, ZSTD_HC_greedy }, /* level 1 - in fact redirected towards zstd fast */
{ 18, 12, 15, 2, 4, ZSTD_HC_greedy }, /* level 2 */
{ 19, 13, 17, 3, 5, ZSTD_HC_greedy }, /* level 3 */
{ 20, 18, 19, 2, 5, ZSTD_HC_greedy }, /* level 4 */
{ 20, 19, 19, 3, 5, ZSTD_HC_greedy }, /* level 5 */
{ 20, 19, 19, 4, 5, ZSTD_HC_greedy }, /* level 6 */
{ 20, 19, 19, 5, 5, ZSTD_HC_greedy }, /* level 7 */
{ 20, 20, 20, 5, 5, ZSTD_HC_greedy }, /* level 8 */
{ 20, 20, 20, 6, 5, ZSTD_HC_greedy }, /* level 9 */
{ 21, 20, 21, 6, 5, ZSTD_HC_greedy }, /* level 10 */
{ 22, 21, 22, 6, 5, ZSTD_HC_greedy }, /* level 11 */
{ 23, 21, 22, 6, 5, ZSTD_HC_greedy }, /* level 12 */
{ 22, 22, 22, 6, 5, ZSTD_HC_greedy }, /* level 13 */
{ 22, 22, 23, 7, 5, ZSTD_HC_greedy }, /* level 14 */
{ 22, 22, 23, 7, 5, ZSTD_HC_greedy }, /* level 15 */
{ 22, 22, 23, 8, 5, ZSTD_HC_greedy }, /* level 16 */
{ 22, 22, 23, 8, 5, ZSTD_HC_greedy }, /* level 17 */
{ 22, 22, 23, 9, 5, ZSTD_HC_greedy }, /* level 18 */
{ 22, 22, 23, 10, 5, ZSTD_HC_greedy }, /* level 19 */
{ 23, 23, 23, 9, 5, ZSTD_HC_greedy }, /* level 20 */
{ 23, 23, 23, 9, 5, ZSTD_HC_greedy }, /* level 21 */
{ 23, 23, 23, 10, 5, ZSTD_HC_greedy }, /* level 22 */
{ 23, 23, 23, 11, 5, ZSTD_HC_greedy }, /* level 23 */
{ 23, 23, 23, 11, 5, ZSTD_HC_greedy }, /* level 24 */
{ 24, 24, 23, 11, 5, ZSTD_HC_greedy }, /* level 25 */
{ 24, 24, 24, 12, 5, ZSTD_HC_greedy }, /* level 26 */ /* ZSTD_HC_MAX_CLEVEL */
};
#if defined (__cplusplus)

View File

@ -237,7 +237,7 @@ static size_t local_compress_fast (void* dst, size_t maxDstSize, const void* src
return ZSTD_compress(dst, maxDstSize, src, srcSize);
}
#define MIN(a,b) (a<b ? a : b)
#define MIN(a,b) ((a)<(b) ? (a) : (b))
static int BMK_benchMem(void* srcBuffer, size_t srcSize, const char* fileName, int cLevel)
{

View File

@ -122,7 +122,7 @@ static U32 g_rand = 1;
static U32 g_singleRun = 0;
static U32 g_target = 0;
static U32 g_noSeed = 0;
static ZSTD_HC_parameters g_params = { 0, 0, 0, 0, 0 };
static ZSTD_HC_parameters g_params = { 0, 0, 0, 0, 0, 0 };
void BMK_SetNbIterations(int nbLoops)
{
@ -283,9 +283,12 @@ static size_t BMK_benchParam(BMK_result_t* resultPtr,
U32 Hlog = params.hashLog;
U32 Slog = params.searchLog;
U32 Slength = params.searchLength;
ZSTD_HC_strategy strat = params.strategy;
char name[30] = { 0 };
U64 crcOrig;
/* Memory allocation & restrictions */
snprintf(name, 30, "W%02uC%02uH%02uS%02uL%1ust%1u", Wlog, Clog, Hlog, Slog, Slength, strat);
if (!compressedBuffer || !resultBuffer || !blockTable)
{
DISPLAY("\nError: not enough memory!\n");
@ -344,7 +347,7 @@ static size_t BMK_benchParam(BMK_result_t* resultPtr,
if (totalTime > g_maxParamTime) break;
/* Compression */
DISPLAY("%1u-W%02uC%02uH%02uS%02uL%02u : %9u ->\r", loopNb, Wlog, Clog, Hlog, Slog, Slength, (U32)srcSize);
DISPLAY("%1u-%s : %9u ->\r", loopNb, name, (U32)srcSize);
memset(compressedBuffer, 0xE5, maxCompressedSize);
nbLoops = 0;
@ -367,7 +370,7 @@ static size_t BMK_benchParam(BMK_result_t* resultPtr,
cSize += blockTable[blockNb].cSize;
if ((double)milliTime < fastestC*nbLoops) fastestC = (double)milliTime / nbLoops;
ratio = (double)srcSize / (double)cSize;
DISPLAY("%1u-W%02uC%02uH%02uS%02uL%02u : %9u ->", loopNb, Wlog, Clog, Hlog, Slog, Slength, (U32)srcSize);
DISPLAY("%1u-%s : %9u ->", loopNb, name, (U32)srcSize);
DISPLAY(" %9u (%4.3f),%7.1f MB/s\r", (U32)cSize, ratio, (double)srcSize / fastestC / 1000.);
resultPtr->cSize = cSize;
resultPtr->cSpeed = (U32)((double)srcSize / fastestC);
@ -389,7 +392,7 @@ static size_t BMK_benchParam(BMK_result_t* resultPtr,
milliTime = BMK_GetMilliSpan(milliTime);
if ((double)milliTime < fastestD*nbLoops) fastestD = (double)milliTime / nbLoops;
DISPLAY("%1u-W%02uC%02uH%02uS%02uL%02u : %9u -> ", loopNb, Wlog, Clog, Hlog, Slog, Slength, (U32)srcSize);
DISPLAY("%1u-%s : %9u -> ", loopNb, name, (U32)srcSize);
DISPLAY("%9u (%4.3f),%7.1f MB/s, ", (U32)cSize, ratio, (double)srcSize / fastestC / 1000.);
DISPLAY("%7.1f MB/s\r", (double)srcSize / fastestD / 1000.);
resultPtr->dSpeed = (U32)((double)srcSize / fastestD);
@ -421,11 +424,14 @@ static size_t BMK_benchParam(BMK_result_t* resultPtr,
return 0;
}
const char* g_stratName[2] = { "ZSTD_HC_greedy", "ZSTD_HC_lazy " };
static void BMK_printWinner(FILE* f, U32 cLevel, BMK_result_t result, ZSTD_HC_parameters params, size_t srcSize)
{
DISPLAY("\r%79s\r", "");
fprintf(f," {%3u,%3u,%3u,%3u,%3u }, ", params.windowLog, params.chainLog, params.hashLog, params.searchLog, params.searchLength);
fprintf(f," {%3u,%3u,%3u,%3u,%3u, %s }, ",
params.windowLog, params.chainLog, params.hashLog, params.searchLog, params.searchLength,
g_stratName[params.strategy]);
fprintf(f,
"/* level %2u */ /* R:%5.3f at %5.1f MB/s - %5.1f MB/s */ \n",
cLevel, (double)srcSize / result.cSize, (double)result.cSpeed / 1000., (double)result.dSpeed / 1000.);
@ -446,7 +452,7 @@ static void BMK_printWinners2(FILE* f, const winnerInfo_t* winners, size_t srcSi
fprintf(f, "\n /* Selected configurations : */ \n");
fprintf(f, "#define ZSTD_HC_MAX_CLEVEL %2u \n", ZSTD_HC_MAX_CLEVEL);
fprintf(f, "static const ZSTD_HC_parameters ZSTD_HC_defaultParameters[ZSTD_HC_MAX_CLEVEL+1] = {\n");
fprintf(f, " /* W, C, H, S, L */ \n");
fprintf(f, " /* W, C, H, S, L, strat */ \n");
for (cLevel=0; cLevel <= ZSTD_HC_MAX_CLEVEL; cLevel++)
BMK_printWinner(f, cLevel, winners[cLevel].result, winners[cLevel].params, srcSize);
@ -593,7 +599,7 @@ static void playAround(FILE* f, winnerInfo_t* winners,
for (; nbChanges; nbChanges--)
{
const U32 changeID = FUZ_rand(&g_rand) % 9;
const U32 changeID = FUZ_rand(&g_rand) % 12;
switch(changeID)
{
case 0:
@ -616,6 +622,10 @@ static void playAround(FILE* f, winnerInfo_t* winners,
p.searchLength++; break;
case 9:
p.searchLength--; break;
case 10:
p.strategy++; break;
case 11:
p.strategy--; break;
}
}
@ -631,6 +641,8 @@ static void playAround(FILE* f, winnerInfo_t* winners,
if (p.searchLog < ZSTD_HC_SEARCHLOG_MIN) continue;
if (p.searchLength > ZSTD_HC_SEARCHLENGTH_MAX) continue;
if (p.searchLength < ZSTD_HC_SEARCHLENGTH_MIN) continue;
if (p.strategy < ZSTD_HC_greedy) continue;
if (p.strategy > ZSTD_HC_lazy) continue;
/* exclude faster if already played params */
if (FUZ_rand(&g_rand) & ((1 << NB_TESTS_PLAYED(p))-1)) continue;
@ -662,6 +674,7 @@ static void BMK_selectRandomStart(
p.searchLog = FUZ_rand(&g_rand) % (ZSTD_HC_SEARCHLOG_MAX+1 - ZSTD_HC_SEARCHLOG_MIN) + ZSTD_HC_SEARCHLOG_MIN;
p.windowLog = FUZ_rand(&g_rand) % (ZSTD_HC_WINDOWLOG_MAX+1 - ZSTD_HC_WINDOWLOG_MIN) + ZSTD_HC_WINDOWLOG_MIN;
p.searchLength=FUZ_rand(&g_rand) % (ZSTD_HC_SEARCHLENGTH_MAX+1 - ZSTD_HC_SEARCHLENGTH_MIN) + ZSTD_HC_SEARCHLENGTH_MIN;
p.strategy = FUZ_rand(&g_rand) & 1;
playAround(f, winners, p, srcBuffer, srcSize, ctx);
}
else

View File

@ -70,7 +70,7 @@
**************************************/
#define COMPRESSOR_NAME "zstd command line interface"
#ifndef ZSTD_VERSION
# define ZSTD_VERSION "v0.0.1"
# define ZSTD_VERSION "v0.3.0"
#endif
#define AUTHOR "Yann Collet"
#define WELCOME_MESSAGE "*** %s %i-bits %s, by %s (%s) ***\n", COMPRESSOR_NAME, (int)(sizeof(void*)*8), ZSTD_VERSION, AUTHOR, __DATE__