Merge pull request #1302 from jennifermliu/splitpoint
Always use splitPoint=1.0 for non-optimize cover and fastcover and some minor changes on comments
This commit is contained in:
commit
7a02df8dbe
@ -684,7 +684,7 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
|
||||
BYTE* const dict = (BYTE*)dictBuffer;
|
||||
COVER_ctx_t ctx;
|
||||
COVER_map_t activeDmers;
|
||||
parameters.splitPoint = parameters.splitPoint <= 0.0 ? DEFAULT_SPLITPOINT : parameters.splitPoint;
|
||||
parameters.splitPoint = 1.0;
|
||||
/* Initialize global data */
|
||||
g_displayLevel = parameters.zParams.notificationLevel;
|
||||
/* Checks */
|
||||
|
@ -510,7 +510,7 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_fastCover(
|
||||
/* Initialize global data */
|
||||
g_displayLevel = parameters.zParams.notificationLevel;
|
||||
/* Assign splitPoint and f if not provided */
|
||||
parameters.splitPoint = parameters.splitPoint <= 0.0 ? DEFAULT_SPLITPOINT : parameters.splitPoint;
|
||||
parameters.splitPoint = 1.0;
|
||||
parameters.f = parameters.f == 0 ? DEFAULT_F : parameters.f;
|
||||
parameters.accel = parameters.accel == 0 ? DEFAULT_ACCEL : parameters.accel;
|
||||
/* Convert to cover parameter */
|
||||
|
@ -85,9 +85,9 @@ typedef struct {
|
||||
typedef struct {
|
||||
unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
|
||||
unsigned d; /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */
|
||||
unsigned steps; /* Number of steps : Only used for optimization : 0 means default (32) : Higher means more parameters checked */
|
||||
unsigned steps; /* Number of steps : Only used for optimization : 0 means default (40) : Higher means more parameters checked */
|
||||
unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */
|
||||
double splitPoint; /* Percentage of samples used for training: the first nbSamples * splitPoint samples will be used to training, the last nbSamples * (1 - splitPoint) samples will be used for testing, 0 means default (1.0), 1.0 when all samples are used for both training and testing */
|
||||
double splitPoint; /* Percentage of samples used for training: Only used for optimization : the first nbSamples * splitPoint samples will be used to training, the last nbSamples * (1 - splitPoint) samples will be used for testing, 0 means default (1.0), 1.0 when all samples are used for both training and testing */
|
||||
ZDICT_params_t zParams;
|
||||
} ZDICT_cover_params_t;
|
||||
|
||||
@ -95,9 +95,9 @@ typedef struct {
|
||||
unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
|
||||
unsigned d; /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */
|
||||
unsigned f; /* log of size of frequency array : constraint: 0 < f <= 31 : 1 means default(18)*/
|
||||
unsigned steps; /* Number of steps : Only used for optimization : 0 means default (32) : Higher means more parameters checked */
|
||||
unsigned steps; /* Number of steps : Only used for optimization : 0 means default (40) : Higher means more parameters checked */
|
||||
unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */
|
||||
double splitPoint; /* Percentage of samples used for training: the first nbSamples * splitPoint samples will be used to training, the last nbSamples * (1 - splitPoint) samples will be used for testing, 0 means default (0.75), 1.0 when all samples are used for both training and testing */
|
||||
double splitPoint; /* Percentage of samples used for training: Only used for optimization : the first nbSamples * splitPoint samples will be used to training, the last nbSamples * (1 - splitPoint) samples will be used for testing, 0 means default (0.75), 1.0 when all samples are used for both training and testing */
|
||||
unsigned accel; /* Acceleration level: constraint: 0 < accel <= 10, higher means faster and less accurate, 0 means default(1) */
|
||||
ZDICT_params_t zParams;
|
||||
} ZDICT_fastCover_params_t;
|
||||
|
@ -194,7 +194,7 @@ All arguments after \fB\-\-\fR are treated as files
|
||||
Use FILEs as training set to create a dictionary\. The training set should contain a lot of small files (> 100), and weight typically 100x the target dictionary size (for example, 10 MB for a 100 KB dictionary)\.
|
||||
.
|
||||
.IP
|
||||
Supports multithreading if \fBzstd\fR is compiled with threading support\. Additional parameters can be specified with \fB\-\-train\-cover\fR\ or \fB\-\-train\-fastcover\fR\. The legacy dictionary builder can be accessed with \fB\-\-train\-legacy\fR\. Equivalent to \fB\-\-train\-fastCover=d=8,steps=4\fR\.
|
||||
Supports multithreading if \fBzstd\fR is compiled with threading support\. Additional parameters can be specified with \fB\-\-train\-fastcover\fR\. The legacy dictionary builder can be accessed with \fB\-\-train\-legacy\fR\. The cover dictionary builder can be accessed with \fB\-\-train\-cover\fR\. Equivalent to \fB\-\-train\-fastCover=d=8,steps=4\fR\.
|
||||
.
|
||||
.TP
|
||||
\fB\-o file\fR
|
||||
|
@ -200,9 +200,10 @@ Compression of small files similar to the sample set will be greatly improved.
|
||||
(for example, 10 MB for a 100 KB dictionary).
|
||||
|
||||
Supports multithreading if `zstd` is compiled with threading support.
|
||||
Additional parameters can be specified with `--train-cover`.
|
||||
Additional parameters can be specified with `--train-fastcover`.
|
||||
The legacy dictionary builder can be accessed with `--train-legacy`.
|
||||
Equivalent to `--train-cover=d=8,steps=4`.
|
||||
The cover dictionary builder can be accessed with `--train-cover`.
|
||||
Equivalent to `--train-fastcover=d=8,steps=4`.
|
||||
* `-o file`:
|
||||
Dictionary saved into `file` (default name: dictionary).
|
||||
* `--maxdict=#`:
|
||||
|
Loading…
Reference in New Issue
Block a user