[zstdcli] Add a flag to control literals compression

This commit is contained in:
Nick Terrell 2019-02-15 14:15:36 -08:00
parent 5261a288d1
commit 0c53c5ad4a
6 changed files with 42 additions and 8 deletions

View File

@ -296,6 +296,7 @@ struct FIO_prefs_s {
int ldmMinMatch;
int ldmBucketSizeLog;
int ldmHashRateLog;
ZSTD_literalCompressionMode_e literalCompressionMode;
/* IO preferences */
U32 removeSrcFile;
@ -339,6 +340,7 @@ FIO_prefs_t* FIO_createPreferences(void)
ret->ldmMinMatch = 0;
ret->ldmBucketSizeLog = FIO_LDM_PARAM_NOTSET;
ret->ldmHashRateLog = FIO_LDM_PARAM_NOTSET;
ret->literalCompressionMode = ZSTD_lcm_auto;
return ret;
}
@ -406,6 +408,12 @@ void FIO_setRsyncable(FIO_prefs_t* const prefs, int rsyncable) {
prefs->rsyncable = rsyncable;
}
void FIO_setLiteralCompressionMode(
FIO_prefs_t* const prefs,
ZSTD_literalCompressionMode_e mode) {
prefs->literalCompressionMode = mode;
}
void FIO_setAdaptMin(FIO_prefs_t* const prefs, int minCLevel)
{
#ifndef ZSTD_NOCOMPRESS
@ -674,6 +682,7 @@ static cRess_t FIO_createCResources(FIO_prefs_t* const prefs,
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_minMatch, (int)comprParams.minMatch) );
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_targetLength, (int)comprParams.targetLength) );
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_strategy, comprParams.strategy) );
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_literalCompressionMode, (int)prefs->literalCompressionMode) );
/* multi-threading */
#ifdef ZSTD_MULTITHREAD
DISPLAYLEVEL(5,"set nb workers = %u \n", prefs->nbWorkers);

View File

@ -71,6 +71,9 @@ void FIO_setOverlapLog(FIO_prefs_t* const prefs, int overlapLog);
void FIO_setRemoveSrcFile(FIO_prefs_t* const prefs, unsigned flag);
void FIO_setSparseWrite(FIO_prefs_t* const prefs, unsigned sparse); /**< 0: no sparse; 1: disable on stdout; 2: always enabled */
void FIO_setRsyncable(FIO_prefs_t* const prefs, int rsyncable);
void FIO_setLiteralCompressionMode(
FIO_prefs_t* const prefs,
ZSTD_literalCompressionMode_e mode);
void FIO_setNoProgress(unsigned noProgress);
void FIO_setNotificationLevel(int level);

View File

@ -148,6 +148,7 @@ static int usage_advanced(const char* programName)
#endif
DISPLAY( "--no-dictID : don't write dictID into header (dictionary compression)\n");
DISPLAY( "--[no-]check : integrity check (default: enabled) \n");
DISPLAY( "--[no-]compress-literals : force (un)compressed literals \n");
#endif
#ifdef UTIL_HAS_CREATEFILELIST
DISPLAY( " -r : operate recursively on directories \n");
@ -659,6 +660,8 @@ int main(int argCount, const char* argv[])
if (!strcmp(argument, "--format=lz4")) { suffix = LZ4_EXTENSION; FIO_setCompressionType(prefs, FIO_lz4Compression); continue; }
#endif
if (!strcmp(argument, "--rsyncable")) { rsyncable = 1; continue; }
if (!strcmp(argument, "--compress-literals")) { FIO_setLiteralCompressionMode(prefs, ZSTD_lcm_huffman); continue; }
if (!strcmp(argument, "--no-compress-literals")) { FIO_setLiteralCompressionMode(prefs, ZSTD_lcm_uncompressed); continue; }
if (!strcmp(argument, "--no-progress")) { FIO_setNoProgress(1); continue; }
/* long commands with arguments */

View File

@ -200,6 +200,13 @@ $ZSTD tmp -fo tmp && die "zstd compression overwrote the input file"
$ZSTD tmp.zst -dfo tmp.zst && die "zstd decompression overwrote the input file"
$ECHO "test: detect that input file does not exist"
$ZSTD nothere && die "zstd hasn't detected that input file does not exist"
$ECHO "test: --[no-]compress-literals"
$ZSTD tmp -c --no-compress-literals -1 | $ZSTD -t
$ZSTD tmp -c --no-compress-literals --fast=1 | $ZSTD -t
$ZSTD tmp -c --no-compress-literals -19 | $ZSTD -t
$ZSTD tmp -c --compress-literals -1 | $ZSTD -t
$ZSTD tmp -c --compress-literals --fast=1 | $ZSTD -t
$ZSTD tmp -c --compress-literals -19 | $ZSTD -t
$ECHO "test : file removal"
$ZSTD -f --rm tmp

View File

@ -129,6 +129,7 @@ static param_value_t const uncompressed_literals_param_values[] = {
static config_t uncompressed_literals = {
.name = "uncompressed literals",
.cli_args = "-3 --no-compress-literals",
.param_values = PARAM_VALUES(uncompressed_literals_param_values),
};
@ -139,6 +140,7 @@ static param_value_t const uncompressed_literals_opt_param_values[] = {
static config_t uncompressed_literals_opt = {
.name = "uncompressed literals optimal",
.cli_args = "-19 --no-compress-literals",
.param_values = PARAM_VALUES(uncompressed_literals_opt_param_values),
};
@ -149,6 +151,7 @@ static param_value_t const huffman_literals_param_values[] = {
static config_t huffman_literals = {
.name = "huffman literals",
.cli_args = "--fast=1 --compress-literals",
.param_values = PARAM_VALUES(huffman_literals_param_values),
};

View File

@ -68,13 +68,13 @@ github, level 16, compress cct
github, level 16 with dict, compress cctx, 37568
github, level 19, compress cctx, 133717
github, level 19 with dict, compress cctx, 37567
github, long distance mode, compress cctx, decompression error
github, multithreaded, compress cctx, decompression error
github, multithreaded long distance mode, compress cctx, decompression error
github, small window log, compress cctx, decompression error
github, small hash log, compress cctx, decompression error
github, small chain log, compress cctx, decompression error
github, explicit params, compress cctx, decompression error
github, long distance mode, compress cctx, 141473
github, multithreaded, compress cctx, 141473
github, multithreaded long distance mode, compress cctx, 141473
github, small window log, compress cctx, 141473
github, small hash log, compress cctx, 138943
github, small chain log, compress cctx, 139239
github, explicit params, compress cctx, 140924
github, uncompressed literals, compress cctx, 136397
github, uncompressed literals optimal, compress cctx, 133717
github, huffman literals, compress cctx, 176575
@ -99,6 +99,9 @@ silesia, small window log, zstdcli,
silesia, small hash log, zstdcli, 6554946
silesia, small chain log, zstdcli, 4931141
silesia, explicit params, zstdcli, 4815380
silesia, uncompressed literals, zstdcli, 5155472
silesia, uncompressed literals optimal, zstdcli, 4325475
silesia, huffman literals, zstdcli, 5341405
silesia.tar, level -5, zstdcli, 7161160
silesia.tar, level -3, zstdcli, 6789865
silesia.tar, level -1, zstdcli, 6196433
@ -121,6 +124,9 @@ silesia.tar, small window log, zstdcli,
silesia.tar, small hash log, zstdcli, 6587841
silesia.tar, small chain log, zstdcli, 4943259
silesia.tar, explicit params, zstdcli, 4839202
silesia.tar, uncompressed literals, zstdcli, 5158134
silesia.tar, uncompressed literals optimal, zstdcli, 4321098
silesia.tar, huffman literals, zstdcli, 5358479
github, level -5, zstdcli, 234744
github, level -5 with dict, zstdcli, 48718
github, level -3, zstdcli, 222611
@ -156,6 +162,9 @@ github, small window log, zstdcli,
github, small hash log, zstdcli, 137467
github, small chain log, zstdcli, 138314
github, explicit params, zstdcli, 136140
github, uncompressed literals, zstdcli, 169004
github, uncompressed literals optimal, zstdcli, 158824
github, huffman literals, zstdcli, 145457
silesia, level -5, advanced one pass, 7152294
silesia, level -3, advanced one pass, 6789969
silesia, level -1, advanced one pass, 6191548

1 Data Config Method Total compressed size
68 github level 16 with dict compress cctx 37568
69 github level 19 compress cctx 133717
70 github level 19 with dict compress cctx 37567
71 github long distance mode compress cctx decompression error 141473
72 github multithreaded compress cctx decompression error 141473
73 github multithreaded long distance mode compress cctx decompression error 141473
74 github small window log compress cctx decompression error 141473
75 github small hash log compress cctx decompression error 138943
76 github small chain log compress cctx decompression error 139239
77 github explicit params compress cctx decompression error 140924
78 github uncompressed literals compress cctx 136397
79 github uncompressed literals optimal compress cctx 133717
80 github huffman literals compress cctx 176575
99 silesia small hash log zstdcli 6554946
100 silesia small chain log zstdcli 4931141
101 silesia explicit params zstdcli 4815380
102 silesia uncompressed literals zstdcli 5155472
103 silesia uncompressed literals optimal zstdcli 4325475
104 silesia huffman literals zstdcli 5341405
105 silesia.tar level -5 zstdcli 7161160
106 silesia.tar level -3 zstdcli 6789865
107 silesia.tar level -1 zstdcli 6196433
124 silesia.tar small hash log zstdcli 6587841
125 silesia.tar small chain log zstdcli 4943259
126 silesia.tar explicit params zstdcli 4839202
127 silesia.tar uncompressed literals zstdcli 5158134
128 silesia.tar uncompressed literals optimal zstdcli 4321098
129 silesia.tar huffman literals zstdcli 5358479
130 github level -5 zstdcli 234744
131 github level -5 with dict zstdcli 48718
132 github level -3 zstdcli 222611
162 github small hash log zstdcli 137467
163 github small chain log zstdcli 138314
164 github explicit params zstdcli 136140
165 github uncompressed literals zstdcli 169004
166 github uncompressed literals optimal zstdcli 158824
167 github huffman literals zstdcli 145457
168 silesia level -5 advanced one pass 7152294
169 silesia level -3 advanced one pass 6789969
170 silesia level -1 advanced one pass 6191548