Add split=# to cli
This commit is contained in:
parent
52fbbbcb6b
commit
348e5f77a9
@ -558,15 +558,15 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
|
|||||||
/* Check if there's training sample */
|
/* Check if there's training sample */
|
||||||
if (nbTrainSamples < 1) {
|
if (nbTrainSamples < 1) {
|
||||||
DISPLAYLEVEL(1, "Total number of training samples is %u and is invalid.", nbTrainSamples);
|
DISPLAYLEVEL(1, "Total number of training samples is %u and is invalid.", nbTrainSamples);
|
||||||
DISPLAYLEVEL(1, "splitPoint is %i", (int)(splitPoint*100));
|
|
||||||
DISPLAYLEVEL(1, "nbSamples is %u", nbSamples);
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
/* Check if there's testing sample when splitPoint is nonzero */
|
/* Check if there's testing sample when splitPoint is nonzero */
|
||||||
if (nbTestSamples < 1 && splitPoint < 1.0) {
|
if (nbTestSamples < 1 && splitPoint < 1.0) {
|
||||||
DISPLAYLEVEL(1, "Total number of testing samples is %u and is invalid.", nbTestSamples);
|
DISPLAYLEVEL(1, "Total number of testing samples is %u and is invalid.", nbTestSamples);
|
||||||
DISPLAYLEVEL(1, "splitPoint is %i", (int)(splitPoint*100));
|
return 0;
|
||||||
DISPLAYLEVEL(1, "nbSamples is %u", nbSamples);
|
}
|
||||||
|
if (nbTrainSamples + nbTestSamples != nbSamples) {
|
||||||
|
DISPLAYLEVEL(1, "nbTrainSamples plus nbTestSamples don't add up to nbSamples");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
/* Zero the context */
|
/* Zero the context */
|
||||||
|
@ -223,11 +223,12 @@ Compression of small files similar to the sample set will be greatly improved.
|
|||||||
This compares favorably to 4 bytes default.
|
This compares favorably to 4 bytes default.
|
||||||
However, it's up to the dictionary manager to not assign twice the same ID to
|
However, it's up to the dictionary manager to not assign twice the same ID to
|
||||||
2 different dictionaries.
|
2 different dictionaries.
|
||||||
* `--train-cover[=k#,d=#,steps=#]`:
|
* `--train-cover[=k#,d=#,steps=#,split=#]`:
|
||||||
Select parameters for the default dictionary builder algorithm named cover.
|
Select parameters for the default dictionary builder algorithm named cover.
|
||||||
If _d_ is not specified, then it tries _d_ = 6 and _d_ = 8.
|
If _d_ is not specified, then it tries _d_ = 6 and _d_ = 8.
|
||||||
If _k_ is not specified, then it tries _steps_ values in the range [50, 2000].
|
If _k_ is not specified, then it tries _steps_ values in the range [50, 2000].
|
||||||
If _steps_ is not specified, then the default value of 40 is used.
|
If _steps_ is not specified, then the default value of 40 is used.
|
||||||
|
If _split_ is not specified, then the default value of 80 is used.
|
||||||
Requires that _d_ <= _k_.
|
Requires that _d_ <= _k_.
|
||||||
|
|
||||||
Selects segments of size _k_ with highest score to put in the dictionary.
|
Selects segments of size _k_ with highest score to put in the dictionary.
|
||||||
@ -249,6 +250,8 @@ Compression of small files similar to the sample set will be greatly improved.
|
|||||||
|
|
||||||
`zstd --train-cover=k=50 FILEs`
|
`zstd --train-cover=k=50 FILEs`
|
||||||
|
|
||||||
|
`zstd --train-cover=k=50,split=60 FILEs`
|
||||||
|
|
||||||
* `--train-legacy[=selectivity=#]`:
|
* `--train-legacy[=selectivity=#]`:
|
||||||
Use legacy dictionary builder algorithm with the given dictionary
|
Use legacy dictionary builder algorithm with the given dictionary
|
||||||
_selectivity_ (default: 9).
|
_selectivity_ (default: 9).
|
||||||
|
@ -278,14 +278,20 @@ static unsigned longCommandWArg(const char** stringPtr, const char* longCommand)
|
|||||||
static unsigned parseCoverParameters(const char* stringPtr, ZDICT_cover_params_t* params)
|
static unsigned parseCoverParameters(const char* stringPtr, ZDICT_cover_params_t* params)
|
||||||
{
|
{
|
||||||
memset(params, 0, sizeof(*params));
|
memset(params, 0, sizeof(*params));
|
||||||
|
unsigned splitPercentage = 100;
|
||||||
for (; ;) {
|
for (; ;) {
|
||||||
if (longCommandWArg(&stringPtr, "k=")) { params->k = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
|
if (longCommandWArg(&stringPtr, "k=")) { params->k = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
|
||||||
if (longCommandWArg(&stringPtr, "d=")) { params->d = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
|
if (longCommandWArg(&stringPtr, "d=")) { params->d = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
|
||||||
if (longCommandWArg(&stringPtr, "steps=")) { params->steps = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
|
if (longCommandWArg(&stringPtr, "steps=")) { params->steps = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
|
||||||
|
if (longCommandWArg(&stringPtr, "split=")) {
|
||||||
|
splitPercentage = readU32FromChar(&stringPtr);
|
||||||
|
params->splitPoint = (double)splitPercentage / 100.0;
|
||||||
|
if (stringPtr[0]==',') { stringPtr++; continue; } else break;
|
||||||
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
if (stringPtr[0] != 0) return 0;
|
if (stringPtr[0] != 0) return 0;
|
||||||
DISPLAYLEVEL(4, "cover: k=%u\nd=%u\nsteps=%u\n", params->k, params->d, params->steps);
|
DISPLAYLEVEL(4, "cover: k=%u\nd=%u\nsteps=%u\nsplitPoint=%d\n", params->k, params->d, params->steps, splitPercentage);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user