Add long distance matcher

Move last literals section to ZSTD_block_internal
This commit is contained in:
Stella Lau 2017-07-28 15:51:33 -07:00
parent d01cd4ea47
commit 6a546efb8c
12 changed files with 901 additions and 124 deletions

View File

@ -274,6 +274,20 @@ typedef struct {
const BYTE* cachedLiterals;
} optState_t;
typedef struct {
U32 offset;
U32 checksum;
} ldmEntry_t;
typedef struct {
ldmEntry_t* hashTable;
BYTE* bucketOffsets;
U32 ldmEnable; /* 1 if enable long distance matching */
U32 hashLog; /* log size of hashTable */
U32 bucketLog; /* log number of buckets, at most 4 */
U32 hashEveryLog;
} ldmState_t;
typedef struct {
U32 hufCTable[HUF_CTABLE_SIZE_U32(255)];
FSE_CTable offcodeCTable[FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)];

File diff suppressed because it is too large Load Diff

View File

@ -413,8 +413,9 @@ static U32 ZSTD_BtGetAllMatches_selectMLS_extDict (
* Optimal parser
*********************************/
FORCE_INLINE
void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
const void* src, size_t srcSize, const int ultra)
size_t ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
const void* src, size_t srcSize,
const int ultra)
{
seqStore_t* seqStorePtr = &(ctx->seqStore);
optState_t* optStatePtr = &(ctx->optState);
@ -654,17 +655,15 @@ _storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */
/* Save reps for next block */
{ int i; for (i=0; i<ZSTD_REP_NUM; i++) seqStorePtr->repToConfirm[i] = rep[i]; }
/* Last Literals */
{ size_t const lastLLSize = iend - anchor;
memcpy(seqStorePtr->lit, anchor, lastLLSize);
seqStorePtr->lit += lastLLSize;
}
/* Return the last literals size */
return iend - anchor;
}
FORCE_INLINE
void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
const void* src, size_t srcSize, const int ultra)
size_t ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
const void* src, size_t srcSize,
const int ultra)
{
seqStore_t* seqStorePtr = &(ctx->seqStore);
optState_t* optStatePtr = &(ctx->optState);
@ -928,11 +927,8 @@ _storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */
/* Save reps for next block */
{ int i; for (i=0; i<ZSTD_REP_NUM; i++) seqStorePtr->repToConfirm[i] = rep[i]; }
/* Last Literals */
{ size_t lastLLSize = iend - anchor;
memcpy(seqStorePtr->lit, anchor, lastLLSize);
seqStorePtr->lit += lastLLSize;
}
/* Return the last literals size */
return iend - anchor;
}
#endif /* ZSTD_OPT_H_91842398743 */

View File

@ -978,7 +978,9 @@ typedef enum {
/* advanced parameters - may not remain available after API update */
ZSTD_p_forceMaxWindow=1100, /* Force back-reference distances to remain < windowSize,
* even when referencing into Dictionary content (default:0) */
ZSTD_p_longDistanceMatching, /* Enable long distance matching.
* This increases the memory usage as well as the
* window size. */
} ZSTD_cParameter;

View File

@ -129,7 +129,10 @@ void BMK_setNbThreads(unsigned nbThreads) {
#endif
g_nbThreads = nbThreads;
}
static U32 g_ldmFlag = 0;
void BMK_setLdmFlag(unsigned ldmFlag) {
g_ldmFlag = ldmFlag;
}
/* ********************************************************
* Bench functions
@ -271,6 +274,7 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
ZSTD_CCtx_setParameter(ctx, ZSTD_p_searchLog, comprParams->searchLog);
ZSTD_CCtx_setParameter(ctx, ZSTD_p_minMatch, comprParams->searchLength);
ZSTD_CCtx_setParameter(ctx, ZSTD_p_targetLength, comprParams->targetLength);
ZSTD_CCtx_setParameter(ctx, ZSTD_p_longDistanceMatching, g_ldmFlag);
ZSTD_CCtx_setParameter(ctx, ZSTD_p_compressionStrategy, comprParams->strategy);
ZSTD_CCtx_loadDictionary(ctx, dictBuffer, dictBufferSize);
#else

View File

@ -25,5 +25,6 @@ void BMK_setNbThreads(unsigned nbThreads);
void BMK_setNotificationLevel(unsigned level);
void BMK_setAdditionalParam(int additionalParam);
void BMK_setDecodeOnlyMode(unsigned decodeFlag);
void BMK_setLdmFlag(unsigned ldmFlag);
#endif /* BENCH_H_121279284357 */

View File

@ -213,6 +213,10 @@ void FIO_setOverlapLog(unsigned overlapLog){
DISPLAYLEVEL(2, "Setting overlapLog is useless in single-thread mode \n");
g_overlapLog = overlapLog;
}
static U32 g_ldmFlag = 0;
void FIO_setLdmFlag(unsigned ldmFlag) {
g_ldmFlag = (ldmFlag>0);
}
/*-*************************************
@ -407,6 +411,8 @@ static cRess_t FIO_createCResources(const char* dictFileName, int cLevel,
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_minMatch, comprParams->searchLength) );
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_targetLength, comprParams->targetLength) );
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_compressionStrategy, (U32)comprParams->strategy) );
/* long distance matching */
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_longDistanceMatching, g_ldmFlag) );
/* multi-threading */
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_nbThreads, g_nbThreads) );
/* dictionary */

View File

@ -56,6 +56,7 @@ void FIO_setMemLimit(unsigned memLimit);
void FIO_setNbThreads(unsigned nbThreads);
void FIO_setBlockSize(unsigned blockSize);
void FIO_setOverlapLog(unsigned overlapLog);
void FIO_setLdmFlag(unsigned ldmFlag);
/*-*************************************

View File

@ -152,6 +152,7 @@ static int usage_advanced(const char* programName)
#endif
DISPLAY( " -M# : Set a memory usage limit for decompression \n");
DISPLAY( "--list : list information about a zstd compressed file \n");
DISPLAY( "--long : enable long distance matching\n");
DISPLAY( "-- : All arguments after \"--\" are treated as files \n");
#ifndef ZSTD_NODICT
DISPLAY( "\n");
@ -333,7 +334,8 @@ int main(int argCount, const char* argv[])
ultra=0,
lastCommand = 0,
nbThreads = 1,
setRealTimePrio = 0;
setRealTimePrio = 0,
ldmFlag = 0;
unsigned bench_nbSeconds = 3; /* would be better if this value was synchronized from bench */
size_t blockSize = 0;
zstd_operation_mode operation = zom_compress;
@ -440,6 +442,7 @@ int main(int argCount, const char* argv[])
#ifdef ZSTD_LZ4COMPRESS
if (!strcmp(argument, "--format=lz4")) { suffix = LZ4_EXTENSION; FIO_setCompressionType(FIO_lz4Compression); continue; }
#endif
if (!strcmp(argument, "--long")) { ldmFlag = 1; continue; }
/* long commands with arguments */
#ifndef ZSTD_NODICT
@ -690,6 +693,7 @@ int main(int argCount, const char* argv[])
BMK_setBlockSize(blockSize);
BMK_setNbThreads(nbThreads);
BMK_setNbSeconds(bench_nbSeconds);
BMK_setLdmFlag(ldmFlag);
BMK_benchFiles(filenameTable, filenameIdx, dictFileName, cLevel, cLevelLast, &compressionParams, setRealTimePrio);
#endif
(void)bench_nbSeconds;
@ -757,6 +761,7 @@ int main(int argCount, const char* argv[])
#ifndef ZSTD_NOCOMPRESS
FIO_setNbThreads(nbThreads);
FIO_setBlockSize((U32)blockSize);
FIO_setLdmFlag(ldmFlag);
if (g_overlapLog!=OVERLAP_LOG_DEFAULT) FIO_setOverlapLog(g_overlapLog);
if ((filenameIdx==1) && outFileName)
operationResult = FIO_compressFilename(outFileName, filenameTable[0], dictFileName, cLevel, &compressionParams);

View File

@ -440,8 +440,6 @@ static int basicUnitTests(U32 seed, double compressibility)
free(staticDCtxBuffer);
}
/* ZSTDMT simple MT compression test */
DISPLAYLEVEL(4, "test%3i : create ZSTDMT CCtx : ", testNb++);
{ ZSTDMT_CCtx* mtctx = ZSTDMT_createCCtx(2);
@ -1342,6 +1340,7 @@ static int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, U32 const maxD
dictSize = FUZ_rLogLength(&lseed, dictLog); /* needed also for decompression */
dict = srcBuffer + (FUZ_rand(&lseed) % (srcBufferSize - dictSize));
if (FUZ_rand(&lseed) & 0xF) {
CHECK_Z ( ZSTD_compressBegin_usingDict(refCtx, dict, dictSize, cLevel) );
} else {
@ -1350,6 +1349,7 @@ static int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, U32 const maxD
!(FUZ_rand(&lseed)&3) /* contentChecksumFlag*/,
0 /*NodictID*/ }; /* note : since dictionary is fake, dictIDflag has no impact */
ZSTD_parameters const p = FUZ_makeParams(cPar, fPar);
CHECK_Z ( ZSTD_compressBegin_advanced(refCtx, dict, dictSize, p, 0) );
}
CHECK_Z( ZSTD_copyCCtx(ctx, refCtx, 0) );

View File

@ -544,6 +544,15 @@ roundTripTest -g516K 19 # btopt
fileRoundTripTest -g500K
$ECHO "\n**** zstd long distance matching round-trip tests **** "
roundTripTest -g0 "2 --long"
roundTripTest -g1000K "1 --long"
roundTripTest -g517K "6 --long"
roundTripTest -g516K "16 --long"
roundTripTest -g518K "19 --long"
fileRoundTripTest -g5M "3 --long"
if [ -n "$hasMT" ]
then
$ECHO "\n**** zstdmt round-trip tests **** "
@ -551,6 +560,9 @@ then
roundTripTest -g8M "3 -T2"
roundTripTest -g8000K "2 --threads=2"
fileRoundTripTest -g4M "19 -T2 -B1M"
$ECHO "\n**** zstdmt long distance matching round-trip tests **** "
roundTripTest -g8M "3 --long -T2"
else
$ECHO "\n**** no multithreading, skipping zstdmt tests **** "
fi
@ -639,6 +651,15 @@ roundTripTest -g6000000000 -P99 1
fileRoundTripTest -g4193M -P99 1
$ECHO "\n**** zstd long, long distance matching round-trip tests **** "
roundTripTest -g0 "2 --long"
roundTripTest -g270000000 "1 --long"
roundTripTest -g140000000 -P60 "5 --long"
roundTripTest -g70000000 -P70 "8 --long"
roundTripTest -g18000001 -P80 "18 --long"
fileRoundTripTest -g4100M -P99 "1 --long"
if [ -n "$hasMT" ]
then
$ECHO "\n**** zstdmt long round-trip tests **** "
@ -646,6 +667,7 @@ then
roundTripTest -g6000000000 -P99 "1 -T2"
roundTripTest -g1500000000 -P97 "1 -T999"
fileRoundTripTest -g4195M -P98 " -T0"
roundTripTest -g1500000000 -P97 "1 --long -T999"
else
$ECHO "\n**** no multithreading, skipping zstdmt tests **** "
fi

View File

@ -1380,6 +1380,8 @@ static int fuzzerTests_newAPI(U32 seed, U32 nbTests, unsigned startTest, double
if (FUZ_rand(&lseed) & 1) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_p_minMatch, cParams.searchLength, useOpaqueAPI) );
if (FUZ_rand(&lseed) & 1) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_p_targetLength, cParams.targetLength, useOpaqueAPI) );
if (FUZ_rand(&lseed) & 1) CHECK_Z( ZSTD_CCtx_setParameter(zc, ZSTD_p_longDistanceMatching, FUZ_rand(&lseed) & 63) );
/* unconditionally set, to be sync with decoder */
/* mess with frame parameters */
if (FUZ_rand(&lseed) & 1) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_p_checksumFlag, FUZ_rand(&lseed) & 1, useOpaqueAPI) );