bounds for --adapt mode

can supply min and max compression level through advanced command :
--adapt=min=#,max=#
This commit is contained in:
Yann Collet 2018-09-24 18:16:08 -07:00
parent 04f47bbdd2
commit 6c51bf420c
5 changed files with 90 additions and 18 deletions

View File

@ -292,6 +292,20 @@ void FIO_setAdaptiveMode(unsigned adapt) {
EXM_THROW(1, "Adaptive mode is not compatible with single thread mode \n");
g_adaptiveMode = adapt;
}
static int g_minAdaptLevel = -50; /* initializing this value requires a constant, so ZSTD_minCLevel() doesn't work */
void FIO_setAdaptMin(int minCLevel)
{
#ifndef ZSTD_NOCOMPRESS
assert(minCLevel >= ZSTD_minCLevel());
#endif
g_minAdaptLevel = minCLevel;
}
static int g_maxAdaptLevel = 22; /* initializing this value requires a constant, so ZSTD_maxCLevel() doesn't work */
void FIO_setAdaptMax(int maxCLevel)
{
g_maxAdaptLevel = maxCLevel;
}
static U32 g_ldmFlag = 0;
void FIO_setLdmFlag(unsigned ldmFlag) {
g_ldmFlag = (ldmFlag>0);
@ -954,13 +968,15 @@ FIO_compressZstdFrame(const cRess_t* ressPtr,
if (speedChange == slower) {
DISPLAYLEVEL(6, "slower speed , higher compression \n")
compressionLevel ++;
compressionLevel += (compressionLevel == 0); /* skip 0 */
if (compressionLevel > ZSTD_maxCLevel()) compressionLevel = ZSTD_maxCLevel();
if (compressionLevel > g_maxAdaptLevel) compressionLevel = g_maxAdaptLevel;
compressionLevel += (compressionLevel == 0); /* skip 0 */
ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_compressionLevel, (unsigned)compressionLevel);
}
if (speedChange == faster) {
DISPLAYLEVEL(6, "faster speed , lighter compression \n")
compressionLevel --;
if (compressionLevel < g_minAdaptLevel) compressionLevel = g_minAdaptLevel;
compressionLevel -= (compressionLevel == 0); /* skip 0 */
ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_compressionLevel, (unsigned)compressionLevel);
}

View File

@ -48,21 +48,23 @@ typedef enum { FIO_zstdCompression, FIO_gzipCompression, FIO_xzCompression, FIO_
***************************************/
void FIO_setCompressionType(FIO_compressionType_t compressionType);
void FIO_overwriteMode(void);
void FIO_setNotificationLevel(unsigned level);
void FIO_setSparseWrite(unsigned sparse); /**< 0: no sparse; 1: disable on stdout; 2: always enabled */
void FIO_setDictIDFlag(unsigned dictIDFlag);
void FIO_setChecksumFlag(unsigned checksumFlag);
void FIO_setRemoveSrcFile(unsigned flag);
void FIO_setMemLimit(unsigned memLimit);
void FIO_setNbWorkers(unsigned nbWorkers);
void FIO_setBlockSize(unsigned blockSize);
void FIO_setOverlapLog(unsigned overlapLog);
void FIO_setAdaptiveMode(unsigned adapt);
void FIO_setAdaptMin(int minCLevel);
void FIO_setAdaptMax(int maxCLevel);
void FIO_setBlockSize(unsigned blockSize);
void FIO_setChecksumFlag(unsigned checksumFlag);
void FIO_setDictIDFlag(unsigned dictIDFlag);
void FIO_setLdmBucketSizeLog(unsigned ldmBucketSizeLog);
void FIO_setLdmFlag(unsigned ldmFlag);
void FIO_setLdmHashEveryLog(unsigned ldmHashEveryLog);
void FIO_setLdmHashLog(unsigned ldmHashLog);
void FIO_setLdmMinMatch(unsigned ldmMinMatch);
void FIO_setLdmBucketSizeLog(unsigned ldmBucketSizeLog);
void FIO_setLdmHashEveryLog(unsigned ldmHashEveryLog);
void FIO_setMemLimit(unsigned memLimit);
void FIO_setNbWorkers(unsigned nbWorkers);
void FIO_setNotificationLevel(unsigned level);
void FIO_setOverlapLog(unsigned overlapLog);
void FIO_setRemoveSrcFile(unsigned flag);
void FIO_setSparseWrite(unsigned sparse); /**< 0: no sparse; 1: disable on stdout; 2: always enabled */
/*-*************************************
@ -79,6 +81,7 @@ int FIO_decompressFilename (const char* outfilename, const char* infilename, con
int FIO_listMultipleFiles(unsigned numFiles, const char** filenameTable, int displayLevel);
/*-*************************************
* Multiple File functions
***************************************/
@ -96,9 +99,15 @@ int FIO_decompressMultipleFilenames(const char** srcNamesTable, unsigned nbFiles
const char* dictFileName);
/*-*************************************
* Advanced stuff (should actually be hosted elsewhere)
***************************************/
/* custom crash signal handler */
void FIO_addAbortHandler(void);
#if defined (__cplusplus)
}
#endif

View File

@ -134,9 +134,10 @@ the last one takes effect.
This mode is the only one available when multithread support is disabled.
Single-thread mode features lower memory usage.
Final compressed result is slightly different from `-T1`.
* `--adapt` :
* `--adapt[=min=#,max=#]` :
`zstd` will dynamically adapt compression level to perceived I/O conditions.
Compression level adaptation can be observed live by using command `-v`.
Adaptation can be constrained between supplied `min` and `max` levels.
The feature works when combined with multi-threading and `--long` mode.
It does not work with `--single-thread`.
It sets window size to 8 MB by default (can be changed manually, see `wlog`).

View File

@ -138,8 +138,8 @@ static int usage_advanced(const char* programName)
#ifndef ZSTD_NOCOMPRESS
DISPLAY( "--ultra : enable levels beyond %i, up to %i (requires more memory)\n", ZSTDCLI_CLEVEL_MAX, ZSTD_maxCLevel());
DISPLAY( "--long[=#]: enable long distance matching with given window log (default: %u)\n", g_defaultMaxWindowLog);
DISPLAY( "--adapt : automatically adapt compression level to I/O conditions \n");
DISPLAY( "--fast[=#]: switch to ultra fast compression level (default: %u)\n", 1);
DISPLAY( "--adapt : dynamically adapt compression level to I/O conditions \n");
#ifdef ZSTD_MULTITHREAD
DISPLAY( " -T# : spawns # compression threads (default: 1, 0==# cores) \n");
DISPLAY( " -B# : select size of each job (default: 0==automatic) \n");
@ -366,6 +366,30 @@ static ZDICT_fastCover_params_t defaultFastCoverParams(void)
#endif
/** parseAdaptParameters() :
* reads adapt parameters from *stringPtr (e.g. "--zstd=min=1,max=19) and store them into adaptMinPtr and adaptMaxPtr.
* Both adaptMinPtr and adaptMaxPtr must be already allocated and correctly initialized.
* There is no guarantee that any of these values will be updated.
* @return 1 means that parsing was successful,
* @return 0 in case of malformed parameters
*/
static unsigned parseAdaptParameters(const char* stringPtr, int* adaptMinPtr, int* adaptMaxPtr)
{
for ( ; ;) {
if (longCommandWArg(&stringPtr, "min=")) { *adaptMinPtr = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
if (longCommandWArg(&stringPtr, "max=")) { *adaptMaxPtr = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
DISPLAYLEVEL(4, "invalid compression parameter \n");
return 0;
}
if (stringPtr[0] != 0) return 0; /* check the end of string */
if (*adaptMinPtr > *adaptMaxPtr) {
DISPLAYLEVEL(4, "incoherent adaptation limits \n");
return 0;
}
return 1;
}
/** parseCompressionParameters() :
* reads compression parameters from *stringPtr (e.g. "--zstd=wlog=23,clog=23,hlog=22,slog=6,slen=3,tlen=48,strat=6") into *params
* @return 1 means that compression parameters were correct
@ -430,6 +454,15 @@ typedef enum { zom_compress, zom_decompress, zom_test, zom_bench, zom_train, zom
#define CLEAN_RETURN(i) { operationResult = (i); goto _end; }
#ifdef ZSTD_NOCOMPRESS
/* symbols from compression library are not defined and should not be invoked */
# define MINCLEVEL -50
# define MAXCLEVEL 22
#else
# define MINCLEVEL ZSTD_minCLevel()
# define MAXCLEVEL ZSTD_maxCLevel()
#endif
int main(int argCount, const char* argv[])
{
int argNb,
@ -440,6 +473,8 @@ int main(int argCount, const char* argv[])
main_pause = 0,
nbWorkers = 0,
adapt = 0,
adaptMin = MINCLEVEL,
adaptMax = MAXCLEVEL,
nextArgumentIsOutFileName = 0,
nextArgumentIsMaxDict = 0,
nextArgumentIsDictID = 0,
@ -559,6 +594,7 @@ int main(int argCount, const char* argv[])
if (!strcmp(argument, "--rm")) { FIO_setRemoveSrcFile(1); continue; }
if (!strcmp(argument, "--priority=rt")) { setRealTimePrio = 1; continue; }
if (!strcmp(argument, "--adapt")) { adapt = 1; continue; }
if (longCommandWArg(&argument, "--adapt=")) { adapt = 1; if (!parseAdaptParameters(argument, &adaptMin, &adaptMax)) CLEAN_RETURN(badusage(programName)); continue; }
if (!strcmp(argument, "--single-thread")) { nbWorkers = 0; singleThread = 1; continue; }
if (!strcmp(argument, "--format=zstd")) { suffix = ZSTD_EXTENSION; FIO_setCompressionType(FIO_zstdCompression); continue; }
#ifdef ZSTD_GZCOMPRESS
@ -1014,6 +1050,10 @@ int main(int argCount, const char* argv[])
if (g_ldmBucketSizeLog != LDM_PARAM_DEFAULT) FIO_setLdmBucketSizeLog(g_ldmBucketSizeLog);
if (g_ldmHashEveryLog != LDM_PARAM_DEFAULT) FIO_setLdmHashEveryLog(g_ldmHashEveryLog);
FIO_setAdaptiveMode(adapt);
FIO_setAdaptMin(adaptMin);
FIO_setAdaptMax(adaptMax);
if (adaptMin > cLevel) cLevel = adaptMin;
if (adaptMax < cLevel) cLevel = adaptMax;
if ((filenameIdx==1) && outFileName)
operationResult = FIO_compressFilename(outFileName, filenameTable[0], dictFileName, cLevel, compressionParams);

View File

@ -103,6 +103,7 @@ else
fi
$ECHO "\n===> simple tests "
./datagen > tmp
@ -811,11 +812,20 @@ roundTripTest -g1M -P50 "1 --single-thread --long=29" " --long=28 --memory=512MB
roundTripTest -g1M -P50 "1 --single-thread --long=29" " --zstd=wlog=28 --memory=512MB"
$ECHO "\n===> adaptive mode "
roundTripTest -g270000000 " --adapt"
roundTripTest -g27000000 " --adapt=min=1,max=4"
./datagen > tmp
$ZSTD -f -vv --adapt=min=10,max=9 tmp && die "--adapt must fail on incoherent bounds"
if [ "$1" != "--test-large-data" ]; then
$ECHO "Skipping large data tests"
exit 0
fi
$ECHO "\n===> large files tests "
roundTripTest -g270000000 1
@ -858,10 +868,6 @@ roundTripTest -g700M -P50 "1 --single-thread --long=29"
roundTripTest -g600M -P50 "1 --single-thread --long --zstd=wlog=29,clog=28"
$ECHO "\n===> adaptive mode "
roundTripTest -g270000000 " --adapt"
if [ -n "$hasMT" ]
then
$ECHO "\n===> zstdmt long round-trip tests "