Merge pull request #1230 from terrelln/train-out

zstdcli: Allow -o before --train
This commit is contained in:
Yann Collet 2018-07-18 16:34:10 +02:00 committed by GitHub
commit effa84c8d1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 29 additions and 9 deletions

View File

@ -502,7 +502,7 @@ int main(int argCount, const char* argv[])
if (!strcmp(argument, "--sparse")) { FIO_setSparseWrite(2); continue; } if (!strcmp(argument, "--sparse")) { FIO_setSparseWrite(2); continue; }
if (!strcmp(argument, "--no-sparse")) { FIO_setSparseWrite(0); continue; } if (!strcmp(argument, "--no-sparse")) { FIO_setSparseWrite(0); continue; }
if (!strcmp(argument, "--test")) { operation=zom_test; continue; } if (!strcmp(argument, "--test")) { operation=zom_test; continue; }
if (!strcmp(argument, "--train")) { operation=zom_train; outFileName=g_defaultDictName; continue; } if (!strcmp(argument, "--train")) { operation=zom_train; if (outFileName==NULL) outFileName=g_defaultDictName; continue; }
if (!strcmp(argument, "--maxdict")) { nextArgumentIsMaxDict=1; lastCommand=1; continue; } /* kept available for compatibility with old syntax ; will be removed one day */ if (!strcmp(argument, "--maxdict")) { nextArgumentIsMaxDict=1; lastCommand=1; continue; } /* kept available for compatibility with old syntax ; will be removed one day */
if (!strcmp(argument, "--dictID")) { nextArgumentIsDictID=1; lastCommand=1; continue; } /* kept available for compatibility with old syntax ; will be removed one day */ if (!strcmp(argument, "--dictID")) { nextArgumentIsDictID=1; lastCommand=1; continue; } /* kept available for compatibility with old syntax ; will be removed one day */
if (!strcmp(argument, "--no-dictID")) { FIO_setDictIDFlag(0); continue; } if (!strcmp(argument, "--no-dictID")) { FIO_setDictIDFlag(0); continue; }
@ -526,7 +526,8 @@ int main(int argCount, const char* argv[])
#ifndef ZSTD_NODICT #ifndef ZSTD_NODICT
if (longCommandWArg(&argument, "--train-cover")) { if (longCommandWArg(&argument, "--train-cover")) {
operation = zom_train; operation = zom_train;
outFileName = g_defaultDictName; if (outFileName == NULL)
outFileName = g_defaultDictName;
cover = 1; cover = 1;
/* Allow optional arguments following an = */ /* Allow optional arguments following an = */
if (*argument == 0) { memset(&coverParams, 0, sizeof(coverParams)); } if (*argument == 0) { memset(&coverParams, 0, sizeof(coverParams)); }
@ -536,7 +537,8 @@ int main(int argCount, const char* argv[])
} }
if (longCommandWArg(&argument, "--train-legacy")) { if (longCommandWArg(&argument, "--train-legacy")) {
operation = zom_train; operation = zom_train;
outFileName = g_defaultDictName; if (outFileName == NULL)
outFileName = g_defaultDictName;
cover = 0; cover = 0;
/* Allow optional arguments following an = */ /* Allow optional arguments following an = */
if (*argument == 0) { continue; } if (*argument == 0) { continue; }

View File

@ -404,7 +404,13 @@ $ECHO "Hello World" > tmp
$ZSTD --train-legacy -q tmp && die "Dictionary training should fail : not enough input source" $ZSTD --train-legacy -q tmp && die "Dictionary training should fail : not enough input source"
./datagen -P0 -g10M > tmp ./datagen -P0 -g10M > tmp
$ZSTD --train-legacy -q tmp && die "Dictionary training should fail : source is pure noise" $ZSTD --train-legacy -q tmp && die "Dictionary training should fail : source is pure noise"
rm tmp* $ECHO "- Test -o before --train"
rm -f tmpDict dictionary
$ZSTD -o tmpDict --train *.c ../programs/*.c
test -f tmpDict
$ZSTD --train *.c ../programs/*.c
test -f dictionary
rm tmp* dictionary
$ECHO "\n===> cover dictionary builder : advanced options " $ECHO "\n===> cover dictionary builder : advanced options "
@ -425,12 +431,18 @@ $ZSTD --train-cover=k=46,d=8 *.c ../programs/*.c --dictID=1 -o tmpDict1
cmp tmpDict tmpDict1 && die "dictionaries should have different ID !" cmp tmpDict tmpDict1 && die "dictionaries should have different ID !"
$ECHO "- Create dictionary with size limit" $ECHO "- Create dictionary with size limit"
$ZSTD --train-cover=steps=8 *.c ../programs/*.c -o tmpDict2 --maxdict=4K $ZSTD --train-cover=steps=8 *.c ../programs/*.c -o tmpDict2 --maxdict=4K
rm tmp*
$ECHO "- Compare size of dictionary from 90% training samples with 80% training samples" $ECHO "- Compare size of dictionary from 90% training samples with 80% training samples"
$ZSTD --train-cover=split=90 -r *.c ../programs/*.c $ZSTD --train-cover=split=90 -r *.c ../programs/*.c
$ZSTD --train-cover=split=80 -r *.c ../programs/*.c $ZSTD --train-cover=split=80 -r *.c ../programs/*.c
$ECHO "- Create dictionary using all samples for both training and testing" $ECHO "- Create dictionary using all samples for both training and testing"
$ZSTD --train-cover=split=100 -r *.c ../programs/*.c $ZSTD --train-cover=split=100 -r *.c ../programs/*.c
$ECHO "- Test -o before --train-cover"
rm -f tmpDict dictionary
$ZSTD -o tmpDict --train-cover *.c ../programs/*.c
test -f tmpDict
$ZSTD --train-cover *.c ../programs/*.c
test -f dictionary
rm tmp* dictionary
$ECHO "\n===> legacy dictionary builder " $ECHO "\n===> legacy dictionary builder "
@ -450,7 +462,13 @@ $ZSTD --train-legacy -s5 *.c ../programs/*.c --dictID=1 -o tmpDict1
cmp tmpDict tmpDict1 && die "dictionaries should have different ID !" cmp tmpDict tmpDict1 && die "dictionaries should have different ID !"
$ECHO "- Create dictionary with size limit" $ECHO "- Create dictionary with size limit"
$ZSTD --train-legacy -s9 *.c ../programs/*.c -o tmpDict2 --maxdict=4K $ZSTD --train-legacy -s9 *.c ../programs/*.c -o tmpDict2 --maxdict=4K
rm tmp* $ECHO "- Test -o before --train-legacy"
rm -f tmpDict dictionary
$ZSTD -o tmpDict --train-legacy *.c ../programs/*.c
test -f tmpDict
$ZSTD --train-legacy *.c ../programs/*.c
test -f dictionary
rm tmp* dictionary
$ECHO "\n===> integrity tests " $ECHO "\n===> integrity tests "