Merge pull request #1230 from terrelln/train-out
zstdcli: Allow -o before --train
This commit is contained in:
commit
effa84c8d1
@ -502,7 +502,7 @@ int main(int argCount, const char* argv[])
|
|||||||
if (!strcmp(argument, "--sparse")) { FIO_setSparseWrite(2); continue; }
|
if (!strcmp(argument, "--sparse")) { FIO_setSparseWrite(2); continue; }
|
||||||
if (!strcmp(argument, "--no-sparse")) { FIO_setSparseWrite(0); continue; }
|
if (!strcmp(argument, "--no-sparse")) { FIO_setSparseWrite(0); continue; }
|
||||||
if (!strcmp(argument, "--test")) { operation=zom_test; continue; }
|
if (!strcmp(argument, "--test")) { operation=zom_test; continue; }
|
||||||
if (!strcmp(argument, "--train")) { operation=zom_train; outFileName=g_defaultDictName; continue; }
|
if (!strcmp(argument, "--train")) { operation=zom_train; if (outFileName==NULL) outFileName=g_defaultDictName; continue; }
|
||||||
if (!strcmp(argument, "--maxdict")) { nextArgumentIsMaxDict=1; lastCommand=1; continue; } /* kept available for compatibility with old syntax ; will be removed one day */
|
if (!strcmp(argument, "--maxdict")) { nextArgumentIsMaxDict=1; lastCommand=1; continue; } /* kept available for compatibility with old syntax ; will be removed one day */
|
||||||
if (!strcmp(argument, "--dictID")) { nextArgumentIsDictID=1; lastCommand=1; continue; } /* kept available for compatibility with old syntax ; will be removed one day */
|
if (!strcmp(argument, "--dictID")) { nextArgumentIsDictID=1; lastCommand=1; continue; } /* kept available for compatibility with old syntax ; will be removed one day */
|
||||||
if (!strcmp(argument, "--no-dictID")) { FIO_setDictIDFlag(0); continue; }
|
if (!strcmp(argument, "--no-dictID")) { FIO_setDictIDFlag(0); continue; }
|
||||||
@ -526,6 +526,7 @@ int main(int argCount, const char* argv[])
|
|||||||
#ifndef ZSTD_NODICT
|
#ifndef ZSTD_NODICT
|
||||||
if (longCommandWArg(&argument, "--train-cover")) {
|
if (longCommandWArg(&argument, "--train-cover")) {
|
||||||
operation = zom_train;
|
operation = zom_train;
|
||||||
|
if (outFileName == NULL)
|
||||||
outFileName = g_defaultDictName;
|
outFileName = g_defaultDictName;
|
||||||
cover = 1;
|
cover = 1;
|
||||||
/* Allow optional arguments following an = */
|
/* Allow optional arguments following an = */
|
||||||
@ -536,6 +537,7 @@ int main(int argCount, const char* argv[])
|
|||||||
}
|
}
|
||||||
if (longCommandWArg(&argument, "--train-legacy")) {
|
if (longCommandWArg(&argument, "--train-legacy")) {
|
||||||
operation = zom_train;
|
operation = zom_train;
|
||||||
|
if (outFileName == NULL)
|
||||||
outFileName = g_defaultDictName;
|
outFileName = g_defaultDictName;
|
||||||
cover = 0;
|
cover = 0;
|
||||||
/* Allow optional arguments following an = */
|
/* Allow optional arguments following an = */
|
||||||
|
@ -404,7 +404,13 @@ $ECHO "Hello World" > tmp
|
|||||||
$ZSTD --train-legacy -q tmp && die "Dictionary training should fail : not enough input source"
|
$ZSTD --train-legacy -q tmp && die "Dictionary training should fail : not enough input source"
|
||||||
./datagen -P0 -g10M > tmp
|
./datagen -P0 -g10M > tmp
|
||||||
$ZSTD --train-legacy -q tmp && die "Dictionary training should fail : source is pure noise"
|
$ZSTD --train-legacy -q tmp && die "Dictionary training should fail : source is pure noise"
|
||||||
rm tmp*
|
$ECHO "- Test -o before --train"
|
||||||
|
rm -f tmpDict dictionary
|
||||||
|
$ZSTD -o tmpDict --train *.c ../programs/*.c
|
||||||
|
test -f tmpDict
|
||||||
|
$ZSTD --train *.c ../programs/*.c
|
||||||
|
test -f dictionary
|
||||||
|
rm tmp* dictionary
|
||||||
|
|
||||||
|
|
||||||
$ECHO "\n===> cover dictionary builder : advanced options "
|
$ECHO "\n===> cover dictionary builder : advanced options "
|
||||||
@ -425,12 +431,18 @@ $ZSTD --train-cover=k=46,d=8 *.c ../programs/*.c --dictID=1 -o tmpDict1
|
|||||||
cmp tmpDict tmpDict1 && die "dictionaries should have different ID !"
|
cmp tmpDict tmpDict1 && die "dictionaries should have different ID !"
|
||||||
$ECHO "- Create dictionary with size limit"
|
$ECHO "- Create dictionary with size limit"
|
||||||
$ZSTD --train-cover=steps=8 *.c ../programs/*.c -o tmpDict2 --maxdict=4K
|
$ZSTD --train-cover=steps=8 *.c ../programs/*.c -o tmpDict2 --maxdict=4K
|
||||||
rm tmp*
|
|
||||||
$ECHO "- Compare size of dictionary from 90% training samples with 80% training samples"
|
$ECHO "- Compare size of dictionary from 90% training samples with 80% training samples"
|
||||||
$ZSTD --train-cover=split=90 -r *.c ../programs/*.c
|
$ZSTD --train-cover=split=90 -r *.c ../programs/*.c
|
||||||
$ZSTD --train-cover=split=80 -r *.c ../programs/*.c
|
$ZSTD --train-cover=split=80 -r *.c ../programs/*.c
|
||||||
$ECHO "- Create dictionary using all samples for both training and testing"
|
$ECHO "- Create dictionary using all samples for both training and testing"
|
||||||
$ZSTD --train-cover=split=100 -r *.c ../programs/*.c
|
$ZSTD --train-cover=split=100 -r *.c ../programs/*.c
|
||||||
|
$ECHO "- Test -o before --train-cover"
|
||||||
|
rm -f tmpDict dictionary
|
||||||
|
$ZSTD -o tmpDict --train-cover *.c ../programs/*.c
|
||||||
|
test -f tmpDict
|
||||||
|
$ZSTD --train-cover *.c ../programs/*.c
|
||||||
|
test -f dictionary
|
||||||
|
rm tmp* dictionary
|
||||||
|
|
||||||
$ECHO "\n===> legacy dictionary builder "
|
$ECHO "\n===> legacy dictionary builder "
|
||||||
|
|
||||||
@ -450,7 +462,13 @@ $ZSTD --train-legacy -s5 *.c ../programs/*.c --dictID=1 -o tmpDict1
|
|||||||
cmp tmpDict tmpDict1 && die "dictionaries should have different ID !"
|
cmp tmpDict tmpDict1 && die "dictionaries should have different ID !"
|
||||||
$ECHO "- Create dictionary with size limit"
|
$ECHO "- Create dictionary with size limit"
|
||||||
$ZSTD --train-legacy -s9 *.c ../programs/*.c -o tmpDict2 --maxdict=4K
|
$ZSTD --train-legacy -s9 *.c ../programs/*.c -o tmpDict2 --maxdict=4K
|
||||||
rm tmp*
|
$ECHO "- Test -o before --train-legacy"
|
||||||
|
rm -f tmpDict dictionary
|
||||||
|
$ZSTD -o tmpDict --train-legacy *.c ../programs/*.c
|
||||||
|
test -f tmpDict
|
||||||
|
$ZSTD --train-legacy *.c ../programs/*.c
|
||||||
|
test -f dictionary
|
||||||
|
rm tmp* dictionary
|
||||||
|
|
||||||
|
|
||||||
$ECHO "\n===> integrity tests "
|
$ECHO "\n===> integrity tests "
|
||||||
|
Loading…
Reference in New Issue
Block a user