Merge pull request #1230 from terrelln/train-out

zstdcli: Allow -o before --train
This commit is contained in:
Yann Collet 2018-07-18 16:34:10 +02:00 committed by GitHub
commit effa84c8d1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 29 additions and 9 deletions

View File

@ -502,7 +502,7 @@ int main(int argCount, const char* argv[])
if (!strcmp(argument, "--sparse")) { FIO_setSparseWrite(2); continue; }
if (!strcmp(argument, "--no-sparse")) { FIO_setSparseWrite(0); continue; }
if (!strcmp(argument, "--test")) { operation=zom_test; continue; }
if (!strcmp(argument, "--train")) { operation=zom_train; outFileName=g_defaultDictName; continue; }
if (!strcmp(argument, "--train")) { operation=zom_train; if (outFileName==NULL) outFileName=g_defaultDictName; continue; }
if (!strcmp(argument, "--maxdict")) { nextArgumentIsMaxDict=1; lastCommand=1; continue; } /* kept available for compatibility with old syntax ; will be removed one day */
if (!strcmp(argument, "--dictID")) { nextArgumentIsDictID=1; lastCommand=1; continue; } /* kept available for compatibility with old syntax ; will be removed one day */
if (!strcmp(argument, "--no-dictID")) { FIO_setDictIDFlag(0); continue; }
@ -526,7 +526,8 @@ int main(int argCount, const char* argv[])
#ifndef ZSTD_NODICT
if (longCommandWArg(&argument, "--train-cover")) {
operation = zom_train;
outFileName = g_defaultDictName;
if (outFileName == NULL)
outFileName = g_defaultDictName;
cover = 1;
/* Allow optional arguments following an = */
if (*argument == 0) { memset(&coverParams, 0, sizeof(coverParams)); }
@ -536,7 +537,8 @@ int main(int argCount, const char* argv[])
}
if (longCommandWArg(&argument, "--train-legacy")) {
operation = zom_train;
outFileName = g_defaultDictName;
if (outFileName == NULL)
outFileName = g_defaultDictName;
cover = 0;
/* Allow optional arguments following an = */
if (*argument == 0) { continue; }
@ -718,7 +720,7 @@ int main(int argCount, const char* argv[])
break;
/* Select compressibility of synthetic sample */
case 'P':
case 'P':
{ argument++;
compressibility = (double)readU32FromChar(&argument) / 100;
}
@ -841,7 +843,7 @@ int main(int argCount, const char* argv[])
if (cLevel > ZSTD_maxCLevel()) cLevel = ZSTD_maxCLevel();
if (cLevelLast > ZSTD_maxCLevel()) cLevelLast = ZSTD_maxCLevel();
if (cLevelLast < cLevel) cLevelLast = cLevel;
if (cLevelLast > cLevel)
if (cLevelLast > cLevel)
DISPLAYLEVEL(2, "Benchmarking levels from %d to %d\n", cLevel, cLevelLast);
if(filenameIdx) {
if(separateFiles) {
@ -856,7 +858,7 @@ int main(int argCount, const char* argv[])
} else {
for(; cLevel <= cLevelLast; cLevel++) {
BMK_benchFilesAdvanced(filenameTable, filenameIdx, dictFileName, cLevel, &compressionParams, g_displayLevel, &adv);
}
}
}
} else {
for(; cLevel <= cLevelLast; cLevel++) {

View File

@ -404,7 +404,13 @@ $ECHO "Hello World" > tmp
$ZSTD --train-legacy -q tmp && die "Dictionary training should fail : not enough input source"
./datagen -P0 -g10M > tmp
$ZSTD --train-legacy -q tmp && die "Dictionary training should fail : source is pure noise"
rm tmp*
$ECHO "- Test -o before --train"
rm -f tmpDict dictionary
$ZSTD -o tmpDict --train *.c ../programs/*.c
test -f tmpDict
$ZSTD --train *.c ../programs/*.c
test -f dictionary
rm tmp* dictionary
$ECHO "\n===> cover dictionary builder : advanced options "
@ -425,12 +431,18 @@ $ZSTD --train-cover=k=46,d=8 *.c ../programs/*.c --dictID=1 -o tmpDict1
cmp tmpDict tmpDict1 && die "dictionaries should have different ID !"
$ECHO "- Create dictionary with size limit"
$ZSTD --train-cover=steps=8 *.c ../programs/*.c -o tmpDict2 --maxdict=4K
rm tmp*
$ECHO "- Compare size of dictionary from 90% training samples with 80% training samples"
$ZSTD --train-cover=split=90 -r *.c ../programs/*.c
$ZSTD --train-cover=split=80 -r *.c ../programs/*.c
$ECHO "- Create dictionary using all samples for both training and testing"
$ZSTD --train-cover=split=100 -r *.c ../programs/*.c
$ECHO "- Test -o before --train-cover"
rm -f tmpDict dictionary
$ZSTD -o tmpDict --train-cover *.c ../programs/*.c
test -f tmpDict
$ZSTD --train-cover *.c ../programs/*.c
test -f dictionary
rm tmp* dictionary
$ECHO "\n===> legacy dictionary builder "
@ -450,7 +462,13 @@ $ZSTD --train-legacy -s5 *.c ../programs/*.c --dictID=1 -o tmpDict1
cmp tmpDict tmpDict1 && die "dictionaries should have different ID !"
$ECHO "- Create dictionary with size limit"
$ZSTD --train-legacy -s9 *.c ../programs/*.c -o tmpDict2 --maxdict=4K
rm tmp*
$ECHO "- Test -o before --train-legacy"
rm -f tmpDict dictionary
$ZSTD -o tmpDict --train-legacy *.c ../programs/*.c
test -f tmpDict
$ZSTD --train-legacy *.c ../programs/*.c
test -f dictionary
rm tmp* dictionary
$ECHO "\n===> integrity tests "