Merge pull request #736 from terrelln/cover-default-api
[zdict] Make COVER the default algorithm
This commit is contained in:
commit
811deaea6f
@ -398,7 +398,8 @@ typedef struct {
|
|||||||
*/
|
*/
|
||||||
static COVER_segment_t COVER_selectSegment(const COVER_ctx_t *ctx, U32 *freqs,
|
static COVER_segment_t COVER_selectSegment(const COVER_ctx_t *ctx, U32 *freqs,
|
||||||
COVER_map_t *activeDmers, U32 begin,
|
COVER_map_t *activeDmers, U32 begin,
|
||||||
U32 end, COVER_params_t parameters) {
|
U32 end,
|
||||||
|
ZDICT_cover_params_t parameters) {
|
||||||
/* Constants */
|
/* Constants */
|
||||||
const U32 k = parameters.k;
|
const U32 k = parameters.k;
|
||||||
const U32 d = parameters.d;
|
const U32 d = parameters.d;
|
||||||
@ -478,7 +479,7 @@ static COVER_segment_t COVER_selectSegment(const COVER_ctx_t *ctx, U32 *freqs,
|
|||||||
* Check the validity of the parameters.
|
* Check the validity of the parameters.
|
||||||
* Returns non-zero if the parameters are valid and 0 otherwise.
|
* Returns non-zero if the parameters are valid and 0 otherwise.
|
||||||
*/
|
*/
|
||||||
static int COVER_checkParameters(COVER_params_t parameters) {
|
static int COVER_checkParameters(ZDICT_cover_params_t parameters) {
|
||||||
/* k and d are required parameters */
|
/* k and d are required parameters */
|
||||||
if (parameters.d == 0 || parameters.k == 0) {
|
if (parameters.d == 0 || parameters.k == 0) {
|
||||||
return 0;
|
return 0;
|
||||||
@ -600,7 +601,7 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
|
|||||||
static size_t COVER_buildDictionary(const COVER_ctx_t *ctx, U32 *freqs,
|
static size_t COVER_buildDictionary(const COVER_ctx_t *ctx, U32 *freqs,
|
||||||
COVER_map_t *activeDmers, void *dictBuffer,
|
COVER_map_t *activeDmers, void *dictBuffer,
|
||||||
size_t dictBufferCapacity,
|
size_t dictBufferCapacity,
|
||||||
COVER_params_t parameters) {
|
ZDICT_cover_params_t parameters) {
|
||||||
BYTE *const dict = (BYTE *)dictBuffer;
|
BYTE *const dict = (BYTE *)dictBuffer;
|
||||||
size_t tail = dictBufferCapacity;
|
size_t tail = dictBufferCapacity;
|
||||||
/* Divide the data up into epochs of equal size.
|
/* Divide the data up into epochs of equal size.
|
||||||
@ -639,22 +640,10 @@ static size_t COVER_buildDictionary(const COVER_ctx_t *ctx, U32 *freqs,
|
|||||||
return tail;
|
return tail;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
|
||||||
* Translate from COVER_params_t to ZDICT_params_t required for finalizing the
|
|
||||||
* dictionary.
|
|
||||||
*/
|
|
||||||
static ZDICT_params_t COVER_translateParams(COVER_params_t parameters) {
|
|
||||||
ZDICT_params_t zdictParams;
|
|
||||||
memset(&zdictParams, 0, sizeof(zdictParams));
|
|
||||||
zdictParams.notificationLevel = 1;
|
|
||||||
zdictParams.dictID = parameters.dictID;
|
|
||||||
zdictParams.compressionLevel = parameters.compressionLevel;
|
|
||||||
return zdictParams;
|
|
||||||
}
|
|
||||||
|
|
||||||
ZDICTLIB_API size_t COVER_trainFromBuffer(
|
|
||||||
void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
|
void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
|
||||||
const size_t *samplesSizes, unsigned nbSamples, COVER_params_t parameters) {
|
const size_t *samplesSizes, unsigned nbSamples,
|
||||||
|
ZDICT_cover_params_t parameters) {
|
||||||
BYTE *const dict = (BYTE *)dictBuffer;
|
BYTE *const dict = (BYTE *)dictBuffer;
|
||||||
COVER_ctx_t ctx;
|
COVER_ctx_t ctx;
|
||||||
COVER_map_t activeDmers;
|
COVER_map_t activeDmers;
|
||||||
@ -673,7 +662,7 @@ ZDICTLIB_API size_t COVER_trainFromBuffer(
|
|||||||
return ERROR(dstSize_tooSmall);
|
return ERROR(dstSize_tooSmall);
|
||||||
}
|
}
|
||||||
/* Initialize global data */
|
/* Initialize global data */
|
||||||
g_displayLevel = parameters.notificationLevel;
|
g_displayLevel = parameters.zParams.notificationLevel;
|
||||||
/* Initialize context and activeDmers */
|
/* Initialize context and activeDmers */
|
||||||
if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
|
if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
|
||||||
parameters.d)) {
|
parameters.d)) {
|
||||||
@ -690,10 +679,9 @@ ZDICTLIB_API size_t COVER_trainFromBuffer(
|
|||||||
const size_t tail =
|
const size_t tail =
|
||||||
COVER_buildDictionary(&ctx, ctx.freqs, &activeDmers, dictBuffer,
|
COVER_buildDictionary(&ctx, ctx.freqs, &activeDmers, dictBuffer,
|
||||||
dictBufferCapacity, parameters);
|
dictBufferCapacity, parameters);
|
||||||
ZDICT_params_t zdictParams = COVER_translateParams(parameters);
|
|
||||||
const size_t dictionarySize = ZDICT_finalizeDictionary(
|
const size_t dictionarySize = ZDICT_finalizeDictionary(
|
||||||
dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
|
dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
|
||||||
samplesBuffer, samplesSizes, nbSamples, zdictParams);
|
samplesBuffer, samplesSizes, nbSamples, parameters.zParams);
|
||||||
if (!ZSTD_isError(dictionarySize)) {
|
if (!ZSTD_isError(dictionarySize)) {
|
||||||
DISPLAYLEVEL(2, "Constructed dictionary of size %u\n",
|
DISPLAYLEVEL(2, "Constructed dictionary of size %u\n",
|
||||||
(U32)dictionarySize);
|
(U32)dictionarySize);
|
||||||
@ -718,7 +706,7 @@ typedef struct COVER_best_s {
|
|||||||
size_t liveJobs;
|
size_t liveJobs;
|
||||||
void *dict;
|
void *dict;
|
||||||
size_t dictSize;
|
size_t dictSize;
|
||||||
COVER_params_t parameters;
|
ZDICT_cover_params_t parameters;
|
||||||
size_t compressedSize;
|
size_t compressedSize;
|
||||||
} COVER_best_t;
|
} COVER_best_t;
|
||||||
|
|
||||||
@ -786,7 +774,7 @@ static void COVER_best_start(COVER_best_t *best) {
|
|||||||
* If this dictionary is the best so far save it and its parameters.
|
* If this dictionary is the best so far save it and its parameters.
|
||||||
*/
|
*/
|
||||||
static void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
|
static void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
|
||||||
COVER_params_t parameters, void *dict,
|
ZDICT_cover_params_t parameters, void *dict,
|
||||||
size_t dictSize) {
|
size_t dictSize) {
|
||||||
if (!best) {
|
if (!best) {
|
||||||
return;
|
return;
|
||||||
@ -830,7 +818,7 @@ typedef struct COVER_tryParameters_data_s {
|
|||||||
const COVER_ctx_t *ctx;
|
const COVER_ctx_t *ctx;
|
||||||
COVER_best_t *best;
|
COVER_best_t *best;
|
||||||
size_t dictBufferCapacity;
|
size_t dictBufferCapacity;
|
||||||
COVER_params_t parameters;
|
ZDICT_cover_params_t parameters;
|
||||||
} COVER_tryParameters_data_t;
|
} COVER_tryParameters_data_t;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -842,7 +830,7 @@ static void COVER_tryParameters(void *opaque) {
|
|||||||
/* Save parameters as local variables */
|
/* Save parameters as local variables */
|
||||||
COVER_tryParameters_data_t *const data = (COVER_tryParameters_data_t *)opaque;
|
COVER_tryParameters_data_t *const data = (COVER_tryParameters_data_t *)opaque;
|
||||||
const COVER_ctx_t *const ctx = data->ctx;
|
const COVER_ctx_t *const ctx = data->ctx;
|
||||||
const COVER_params_t parameters = data->parameters;
|
const ZDICT_cover_params_t parameters = data->parameters;
|
||||||
size_t dictBufferCapacity = data->dictBufferCapacity;
|
size_t dictBufferCapacity = data->dictBufferCapacity;
|
||||||
size_t totalCompressedSize = ERROR(GENERIC);
|
size_t totalCompressedSize = ERROR(GENERIC);
|
||||||
/* Allocate space for hash table, dict, and freqs */
|
/* Allocate space for hash table, dict, and freqs */
|
||||||
@ -863,10 +851,10 @@ static void COVER_tryParameters(void *opaque) {
|
|||||||
{
|
{
|
||||||
const size_t tail = COVER_buildDictionary(ctx, freqs, &activeDmers, dict,
|
const size_t tail = COVER_buildDictionary(ctx, freqs, &activeDmers, dict,
|
||||||
dictBufferCapacity, parameters);
|
dictBufferCapacity, parameters);
|
||||||
const ZDICT_params_t zdictParams = COVER_translateParams(parameters);
|
|
||||||
dictBufferCapacity = ZDICT_finalizeDictionary(
|
dictBufferCapacity = ZDICT_finalizeDictionary(
|
||||||
dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
|
dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
|
||||||
ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbSamples, zdictParams);
|
ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbSamples,
|
||||||
|
parameters.zParams);
|
||||||
if (ZDICT_isError(dictBufferCapacity)) {
|
if (ZDICT_isError(dictBufferCapacity)) {
|
||||||
DISPLAYLEVEL(1, "Failed to finalize dictionary\n");
|
DISPLAYLEVEL(1, "Failed to finalize dictionary\n");
|
||||||
goto _cleanup;
|
goto _cleanup;
|
||||||
@ -892,8 +880,8 @@ static void COVER_tryParameters(void *opaque) {
|
|||||||
}
|
}
|
||||||
/* Create the cctx and cdict */
|
/* Create the cctx and cdict */
|
||||||
cctx = ZSTD_createCCtx();
|
cctx = ZSTD_createCCtx();
|
||||||
cdict =
|
cdict = ZSTD_createCDict(dict, dictBufferCapacity,
|
||||||
ZSTD_createCDict(dict, dictBufferCapacity, parameters.compressionLevel);
|
parameters.zParams.compressionLevel);
|
||||||
if (!dst || !cctx || !cdict) {
|
if (!dst || !cctx || !cdict) {
|
||||||
goto _compressCleanup;
|
goto _compressCleanup;
|
||||||
}
|
}
|
||||||
@ -930,12 +918,10 @@ _cleanup:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ZDICTLIB_API size_t COVER_optimizeTrainFromBuffer(void *dictBuffer,
|
ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
|
||||||
size_t dictBufferCapacity,
|
void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
|
||||||
const void *samplesBuffer,
|
const size_t *samplesSizes, unsigned nbSamples,
|
||||||
const size_t *samplesSizes,
|
ZDICT_cover_params_t *parameters) {
|
||||||
unsigned nbSamples,
|
|
||||||
COVER_params_t *parameters) {
|
|
||||||
/* constants */
|
/* constants */
|
||||||
const unsigned nbThreads = parameters->nbThreads;
|
const unsigned nbThreads = parameters->nbThreads;
|
||||||
const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d;
|
const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d;
|
||||||
@ -947,7 +933,7 @@ ZDICTLIB_API size_t COVER_optimizeTrainFromBuffer(void *dictBuffer,
|
|||||||
const unsigned kIterations =
|
const unsigned kIterations =
|
||||||
(1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize);
|
(1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize);
|
||||||
/* Local variables */
|
/* Local variables */
|
||||||
const int displayLevel = parameters->notificationLevel;
|
const int displayLevel = parameters->zParams.notificationLevel;
|
||||||
unsigned iteration = 1;
|
unsigned iteration = 1;
|
||||||
unsigned d;
|
unsigned d;
|
||||||
unsigned k;
|
unsigned k;
|
||||||
@ -976,7 +962,7 @@ ZDICTLIB_API size_t COVER_optimizeTrainFromBuffer(void *dictBuffer,
|
|||||||
/* Initialization */
|
/* Initialization */
|
||||||
COVER_best_init(&best);
|
COVER_best_init(&best);
|
||||||
/* Turn down global display level to clean up display at level 2 and below */
|
/* Turn down global display level to clean up display at level 2 and below */
|
||||||
g_displayLevel = parameters->notificationLevel - 1;
|
g_displayLevel = parameters->zParams.notificationLevel - 1;
|
||||||
/* Loop through d first because each new value needs a new context */
|
/* Loop through d first because each new value needs a new context */
|
||||||
LOCALDISPLAYLEVEL(displayLevel, 2, "Trying %u different sets of parameters\n",
|
LOCALDISPLAYLEVEL(displayLevel, 2, "Trying %u different sets of parameters\n",
|
||||||
kIterations);
|
kIterations);
|
||||||
|
@ -487,7 +487,7 @@ static U32 ZDICT_dictSize(const dictItem* dictList)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static size_t ZDICT_trainBuffer(dictItem* dictList, U32 dictListSize,
|
static size_t ZDICT_trainBuffer_legacy(dictItem* dictList, U32 dictListSize,
|
||||||
const void* const buffer, size_t bufferSize, /* buffer must end with noisy guard band */
|
const void* const buffer, size_t bufferSize, /* buffer must end with noisy guard band */
|
||||||
const size_t* fileSizes, unsigned nbFiles,
|
const size_t* fileSizes, unsigned nbFiles,
|
||||||
U32 minRatio, U32 notificationLevel)
|
U32 minRatio, U32 notificationLevel)
|
||||||
@ -634,17 +634,6 @@ static void ZDICT_countEStats(EStats_ress_t esr, ZSTD_parameters params,
|
|||||||
} } }
|
} } }
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
static size_t ZDICT_maxSampleSize(const size_t* fileSizes, unsigned nbFiles)
|
|
||||||
{
|
|
||||||
unsigned u;
|
|
||||||
size_t max=0;
|
|
||||||
for (u=0; u<nbFiles; u++)
|
|
||||||
if (max < fileSizes[u]) max = fileSizes[u];
|
|
||||||
return max;
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
|
|
||||||
static size_t ZDICT_totalSampleSize(const size_t* fileSizes, unsigned nbFiles)
|
static size_t ZDICT_totalSampleSize(const size_t* fileSizes, unsigned nbFiles)
|
||||||
{
|
{
|
||||||
size_t total=0;
|
size_t total=0;
|
||||||
@ -930,14 +919,14 @@ size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictCo
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*! ZDICT_trainFromBuffer_unsafe() :
|
/*! ZDICT_trainFromBuffer_unsafe_legacy() :
|
||||||
* Warning : `samplesBuffer` must be followed by noisy guard band.
|
* Warning : `samplesBuffer` must be followed by noisy guard band.
|
||||||
* @return : size of dictionary, or an error code which can be tested with ZDICT_isError()
|
* @return : size of dictionary, or an error code which can be tested with ZDICT_isError()
|
||||||
*/
|
*/
|
||||||
size_t ZDICT_trainFromBuffer_unsafe(
|
size_t ZDICT_trainFromBuffer_unsafe_legacy(
|
||||||
void* dictBuffer, size_t maxDictSize,
|
void* dictBuffer, size_t maxDictSize,
|
||||||
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
||||||
ZDICT_params_t params)
|
ZDICT_legacy_params_t params)
|
||||||
{
|
{
|
||||||
U32 const dictListSize = MAX(MAX(DICTLISTSIZE_DEFAULT, nbSamples), (U32)(maxDictSize/16));
|
U32 const dictListSize = MAX(MAX(DICTLISTSIZE_DEFAULT, nbSamples), (U32)(maxDictSize/16));
|
||||||
dictItem* const dictList = (dictItem*)malloc(dictListSize * sizeof(*dictList));
|
dictItem* const dictList = (dictItem*)malloc(dictListSize * sizeof(*dictList));
|
||||||
@ -946,7 +935,7 @@ size_t ZDICT_trainFromBuffer_unsafe(
|
|||||||
size_t const targetDictSize = maxDictSize;
|
size_t const targetDictSize = maxDictSize;
|
||||||
size_t const samplesBuffSize = ZDICT_totalSampleSize(samplesSizes, nbSamples);
|
size_t const samplesBuffSize = ZDICT_totalSampleSize(samplesSizes, nbSamples);
|
||||||
size_t dictSize = 0;
|
size_t dictSize = 0;
|
||||||
U32 const notificationLevel = params.notificationLevel;
|
U32 const notificationLevel = params.zParams.notificationLevel;
|
||||||
|
|
||||||
/* checks */
|
/* checks */
|
||||||
if (!dictList) return ERROR(memory_allocation);
|
if (!dictList) return ERROR(memory_allocation);
|
||||||
@ -957,13 +946,13 @@ size_t ZDICT_trainFromBuffer_unsafe(
|
|||||||
ZDICT_initDictItem(dictList);
|
ZDICT_initDictItem(dictList);
|
||||||
|
|
||||||
/* build dictionary */
|
/* build dictionary */
|
||||||
ZDICT_trainBuffer(dictList, dictListSize,
|
ZDICT_trainBuffer_legacy(dictList, dictListSize,
|
||||||
samplesBuffer, samplesBuffSize,
|
samplesBuffer, samplesBuffSize,
|
||||||
samplesSizes, nbSamples,
|
samplesSizes, nbSamples,
|
||||||
minRep, notificationLevel);
|
minRep, notificationLevel);
|
||||||
|
|
||||||
/* display best matches */
|
/* display best matches */
|
||||||
if (params.notificationLevel>= 3) {
|
if (params.zParams.notificationLevel>= 3) {
|
||||||
U32 const nb = MIN(25, dictList[0].pos);
|
U32 const nb = MIN(25, dictList[0].pos);
|
||||||
U32 const dictContentSize = ZDICT_dictSize(dictList);
|
U32 const dictContentSize = ZDICT_dictSize(dictList);
|
||||||
U32 u;
|
U32 u;
|
||||||
@ -1026,7 +1015,7 @@ size_t ZDICT_trainFromBuffer_unsafe(
|
|||||||
|
|
||||||
dictSize = ZDICT_addEntropyTablesFromBuffer_advanced(dictBuffer, dictContentSize, maxDictSize,
|
dictSize = ZDICT_addEntropyTablesFromBuffer_advanced(dictBuffer, dictContentSize, maxDictSize,
|
||||||
samplesBuffer, samplesSizes, nbSamples,
|
samplesBuffer, samplesSizes, nbSamples,
|
||||||
params);
|
params.zParams);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* clean up */
|
/* clean up */
|
||||||
@ -1037,9 +1026,9 @@ size_t ZDICT_trainFromBuffer_unsafe(
|
|||||||
|
|
||||||
/* issue : samplesBuffer need to be followed by a noisy guard band.
|
/* issue : samplesBuffer need to be followed by a noisy guard band.
|
||||||
* work around : duplicate the buffer, and add the noise */
|
* work around : duplicate the buffer, and add the noise */
|
||||||
size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacity,
|
size_t ZDICT_trainFromBuffer_legacy(void* dictBuffer, size_t dictBufferCapacity,
|
||||||
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
||||||
ZDICT_params_t params)
|
ZDICT_legacy_params_t params)
|
||||||
{
|
{
|
||||||
size_t result;
|
size_t result;
|
||||||
void* newBuff;
|
void* newBuff;
|
||||||
@ -1052,10 +1041,9 @@ size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacit
|
|||||||
memcpy(newBuff, samplesBuffer, sBuffSize);
|
memcpy(newBuff, samplesBuffer, sBuffSize);
|
||||||
ZDICT_fillNoise((char*)newBuff + sBuffSize, NOISELENGTH); /* guard band, for end of buffer condition */
|
ZDICT_fillNoise((char*)newBuff + sBuffSize, NOISELENGTH); /* guard band, for end of buffer condition */
|
||||||
|
|
||||||
result = ZDICT_trainFromBuffer_unsafe(
|
result =
|
||||||
dictBuffer, dictBufferCapacity,
|
ZDICT_trainFromBuffer_unsafe_legacy(dictBuffer, dictBufferCapacity, newBuff,
|
||||||
newBuff, samplesSizes, nbSamples,
|
samplesSizes, nbSamples, params);
|
||||||
params);
|
|
||||||
free(newBuff);
|
free(newBuff);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
@ -1064,11 +1052,13 @@ size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacit
|
|||||||
size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
|
size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
|
||||||
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples)
|
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples)
|
||||||
{
|
{
|
||||||
ZDICT_params_t params;
|
ZDICT_cover_params_t params;
|
||||||
memset(¶ms, 0, sizeof(params));
|
memset(¶ms, 0, sizeof(params));
|
||||||
return ZDICT_trainFromBuffer_advanced(dictBuffer, dictBufferCapacity,
|
params.d = 8;
|
||||||
samplesBuffer, samplesSizes, nbSamples,
|
params.steps = 4;
|
||||||
params);
|
return ZDICT_optimizeTrainFromBuffer_cover(dictBuffer, dictBufferCapacity,
|
||||||
|
samplesBuffer, samplesSizes,
|
||||||
|
nbSamples, ¶ms);
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t ZDICT_addEntropyTablesFromBuffer(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
|
size_t ZDICT_addEntropyTablesFromBuffer(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
|
||||||
|
@ -36,18 +36,20 @@ extern "C" {
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
/*! ZDICT_trainFromBuffer() :
|
/*! ZDICT_trainFromBuffer():
|
||||||
Train a dictionary from an array of samples.
|
* Train a dictionary from an array of samples.
|
||||||
Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
|
* Uses ZDICT_optimizeTrainFromBuffer_cover() single-threaded, with d=8 and steps=4.
|
||||||
supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
|
* Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
|
||||||
The resulting dictionary will be saved into `dictBuffer`.
|
* supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
|
||||||
@return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
|
* The resulting dictionary will be saved into `dictBuffer`.
|
||||||
or an error code, which can be tested with ZDICT_isError().
|
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
|
||||||
Tips : In general, a reasonable dictionary has a size of ~ 100 KB.
|
* or an error code, which can be tested with ZDICT_isError().
|
||||||
It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`.
|
* Note: ZDICT_trainFromBuffer() requires about 9 bytes of memory for each input byte.
|
||||||
In general, it's recommended to provide a few thousands samples, but this can vary a lot.
|
* Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
|
||||||
It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
|
* It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`.
|
||||||
*/
|
* In general, it's recommended to provide a few thousands samples, but this can vary a lot.
|
||||||
|
* It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
|
||||||
|
*/
|
||||||
ZDICTLIB_API size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
|
ZDICTLIB_API size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
|
||||||
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples);
|
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples);
|
||||||
|
|
||||||
@ -69,94 +71,78 @@ ZDICTLIB_API const char* ZDICT_getErrorName(size_t errorCode);
|
|||||||
* ==================================================================================== */
|
* ==================================================================================== */
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
unsigned selectivityLevel; /* 0 means default; larger => select more => larger dictionary */
|
|
||||||
int compressionLevel; /* 0 means default; target a specific zstd compression level */
|
int compressionLevel; /* 0 means default; target a specific zstd compression level */
|
||||||
unsigned notificationLevel; /* Write to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
|
unsigned notificationLevel; /* Write to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
|
||||||
unsigned dictID; /* 0 means auto mode (32-bits random value); other : force dictID value */
|
unsigned dictID; /* 0 means auto mode (32-bits random value); other : force dictID value */
|
||||||
unsigned reserved[2]; /* reserved space for future parameters */
|
|
||||||
} ZDICT_params_t;
|
} ZDICT_params_t;
|
||||||
|
|
||||||
|
/*! ZDICT_cover_params_t:
|
||||||
/*! ZDICT_trainFromBuffer_advanced() :
|
* For all values 0 means default.
|
||||||
Same as ZDICT_trainFromBuffer() with control over more parameters.
|
* k and d are the only required parameters.
|
||||||
`parameters` is optional and can be provided with values set to 0 to mean "default".
|
*/
|
||||||
@return : size of dictionary stored into `dictBuffer` (<= `dictBufferSize`),
|
|
||||||
or an error code, which can be tested by ZDICT_isError().
|
|
||||||
note : ZDICT_trainFromBuffer_advanced() will send notifications into stderr if instructed to, using notificationLevel>0.
|
|
||||||
*/
|
|
||||||
ZDICTLIB_API size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacity,
|
|
||||||
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
|
||||||
ZDICT_params_t parameters);
|
|
||||||
|
|
||||||
/*! COVER_params_t :
|
|
||||||
For all values 0 means default.
|
|
||||||
k and d are the only required parameters.
|
|
||||||
*/
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
|
unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
|
||||||
unsigned d; /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */
|
unsigned d; /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */
|
||||||
unsigned steps; /* Number of steps : Only used for optimization : 0 means default (32) : Higher means more parameters checked */
|
unsigned steps; /* Number of steps : Only used for optimization : 0 means default (32) : Higher means more parameters checked */
|
||||||
|
|
||||||
unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */
|
unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */
|
||||||
unsigned notificationLevel; /* Write to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
|
ZDICT_params_t zParams;
|
||||||
unsigned dictID; /* 0 means auto mode (32-bits random value); other : force dictID value */
|
} ZDICT_cover_params_t;
|
||||||
int compressionLevel; /* 0 means default; target a specific zstd compression level */
|
|
||||||
} COVER_params_t;
|
|
||||||
|
|
||||||
|
|
||||||
/*! COVER_trainFromBuffer() :
|
/*! ZDICT_trainFromBuffer_cover():
|
||||||
Train a dictionary from an array of samples using the COVER algorithm.
|
* Train a dictionary from an array of samples using the COVER algorithm.
|
||||||
Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
|
* Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
|
||||||
supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
|
* supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
|
||||||
The resulting dictionary will be saved into `dictBuffer`.
|
* The resulting dictionary will be saved into `dictBuffer`.
|
||||||
@return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
|
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
|
||||||
or an error code, which can be tested with ZDICT_isError().
|
* or an error code, which can be tested with ZDICT_isError().
|
||||||
Note : COVER_trainFromBuffer() requires about 9 bytes of memory for each input byte.
|
* Note: ZDICT_trainFromBuffer_cover() requires about 9 bytes of memory for each input byte.
|
||||||
Tips : In general, a reasonable dictionary has a size of ~ 100 KB.
|
* Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
|
||||||
It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`.
|
* It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`.
|
||||||
In general, it's recommended to provide a few thousands samples, but this can vary a lot.
|
* In general, it's recommended to provide a few thousands samples, but this can vary a lot.
|
||||||
It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
|
* It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
|
||||||
*/
|
*/
|
||||||
ZDICTLIB_API size_t COVER_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
|
ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
|
||||||
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
|
||||||
COVER_params_t parameters);
|
const size_t *samplesSizes, unsigned nbSamples,
|
||||||
|
ZDICT_cover_params_t parameters);
|
||||||
|
|
||||||
/*! COVER_optimizeTrainFromBuffer() :
|
/*! ZDICT_optimizeTrainFromBuffer_cover():
|
||||||
The same requirements as above hold for all the parameters except `parameters`.
|
* The same requirements as above hold for all the parameters except `parameters`.
|
||||||
This function tries many parameter combinations and picks the best parameters.
|
* This function tries many parameter combinations and picks the best parameters.
|
||||||
`*parameters` is filled with the best parameters found, and the dictionary
|
* `*parameters` is filled with the best parameters found, and the dictionary
|
||||||
constructed with those parameters is stored in `dictBuffer`.
|
* constructed with those parameters is stored in `dictBuffer`.
|
||||||
|
*
|
||||||
|
* All of the parameters d, k, steps are optional.
|
||||||
|
* If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8, 10, 12, 14, 16}.
|
||||||
|
* if steps is zero it defaults to its default value.
|
||||||
|
* If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [16, 2048].
|
||||||
|
*
|
||||||
|
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
|
||||||
|
* or an error code, which can be tested with ZDICT_isError().
|
||||||
|
* On success `*parameters` contains the parameters selected.
|
||||||
|
* Note: ZDICT_optimizeTrainFromBuffer_cover() requires about 8 bytes of memory for each input byte and additionally another 5 bytes of memory for each byte of memory for each thread.
|
||||||
|
*/
|
||||||
|
ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
|
||||||
|
void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
|
||||||
|
const size_t *samplesSizes, unsigned nbSamples,
|
||||||
|
ZDICT_cover_params_t *parameters);
|
||||||
|
|
||||||
All of the parameters d, k, steps are optional.
|
/*! ZDICT_finalizeDictionary():
|
||||||
If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8, 10, 12, 14, 16}.
|
* Given a custom content as a basis for dictionary, and a set of samples,
|
||||||
if steps is zero it defaults to its default value.
|
* finalize dictionary by adding headers and statistics.
|
||||||
If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [16, 2048].
|
*
|
||||||
|
* Samples must be stored concatenated in a flat buffer `samplesBuffer`,
|
||||||
@return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
|
* supplied with an array of sizes `samplesSizes`, providing the size of each sample in order.
|
||||||
or an error code, which can be tested with ZDICT_isError().
|
*
|
||||||
On success `*parameters` contains the parameters selected.
|
* dictContentSize must be >= ZDICT_CONTENTSIZE_MIN bytes.
|
||||||
Note : COVER_optimizeTrainFromBuffer() requires about 8 bytes of memory for each input byte and additionally another 5 bytes of memory for each byte of memory for each thread.
|
* maxDictSize must be >= dictContentSize, and must be >= ZDICT_DICTSIZE_MIN bytes.
|
||||||
*/
|
*
|
||||||
ZDICTLIB_API size_t COVER_optimizeTrainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
|
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`),
|
||||||
const void* samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
|
* or an error code, which can be tested by ZDICT_isError().
|
||||||
COVER_params_t *parameters);
|
* Note: ZDICT_finalizeDictionary() will push notifications into stderr if instructed to, using notificationLevel>0.
|
||||||
|
* Note 2: dictBuffer and dictContent can overlap
|
||||||
/*! ZDICT_finalizeDictionary() :
|
*/
|
||||||
|
|
||||||
Given a custom content as a basis for dictionary, and a set of samples,
|
|
||||||
finalize dictionary by adding headers and statistics.
|
|
||||||
|
|
||||||
Samples must be stored concatenated in a flat buffer `samplesBuffer`,
|
|
||||||
supplied with an array of sizes `samplesSizes`, providing the size of each sample in order.
|
|
||||||
|
|
||||||
dictContentSize must be >= ZDICT_CONTENTSIZE_MIN bytes.
|
|
||||||
maxDictSize must be >= dictContentSize, and must be >= ZDICT_DICTSIZE_MIN bytes.
|
|
||||||
|
|
||||||
@return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`),
|
|
||||||
or an error code, which can be tested by ZDICT_isError().
|
|
||||||
note : ZDICT_finalizeDictionary() will push notifications into stderr if instructed to, using notificationLevel>0.
|
|
||||||
note 2 : dictBuffer and dictContent can overlap
|
|
||||||
*/
|
|
||||||
#define ZDICT_CONTENTSIZE_MIN 128
|
#define ZDICT_CONTENTSIZE_MIN 128
|
||||||
#define ZDICT_DICTSIZE_MIN 256
|
#define ZDICT_DICTSIZE_MIN 256
|
||||||
ZDICTLIB_API size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
|
ZDICTLIB_API size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
|
||||||
@ -164,7 +150,28 @@ ZDICTLIB_API size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBuffer
|
|||||||
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
||||||
ZDICT_params_t parameters);
|
ZDICT_params_t parameters);
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
unsigned selectivityLevel; /* 0 means default; larger => select more => larger dictionary */
|
||||||
|
ZDICT_params_t zParams;
|
||||||
|
} ZDICT_legacy_params_t;
|
||||||
|
|
||||||
|
/*! ZDICT_trainFromBuffer_legacy():
|
||||||
|
* Train a dictionary from an array of samples.
|
||||||
|
* Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
|
||||||
|
* supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
|
||||||
|
* The resulting dictionary will be saved into `dictBuffer`.
|
||||||
|
* `parameters` is optional and can be provided with values set to 0 to mean "default".
|
||||||
|
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
|
||||||
|
* or an error code, which can be tested with ZDICT_isError().
|
||||||
|
* Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
|
||||||
|
* It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`.
|
||||||
|
* In general, it's recommended to provide a few thousands samples, but this can vary a lot.
|
||||||
|
* It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
|
||||||
|
* Note: ZDICT_trainFromBuffer_legacy() will send notifications into stderr if instructed to, using notificationLevel>0.
|
||||||
|
*/
|
||||||
|
ZDICTLIB_API size_t ZDICT_trainFromBuffer_legacy(
|
||||||
|
void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
|
||||||
|
const size_t *samplesSizes, unsigned nbSamples, ZDICT_legacy_params_t parameters);
|
||||||
|
|
||||||
/* Deprecation warnings */
|
/* Deprecation warnings */
|
||||||
/* It is generally possible to disable deprecation warnings from compiler,
|
/* It is generally possible to disable deprecation warnings from compiler,
|
||||||
|
@ -216,21 +216,21 @@ static U64 DiB_getTotalCappedFileSize(const char** fileNamesTable, unsigned nbFi
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*! ZDICT_trainFromBuffer_unsafe() :
|
/*! ZDICT_trainFromBuffer_unsafe_legacy() :
|
||||||
Strictly Internal use only !!
|
Strictly Internal use only !!
|
||||||
Same as ZDICT_trainFromBuffer_advanced(), but does not control `samplesBuffer`.
|
Same as ZDICT_trainFromBuffer_legacy(), but does not control `samplesBuffer`.
|
||||||
`samplesBuffer` must be followed by noisy guard band to avoid out-of-buffer reads.
|
`samplesBuffer` must be followed by noisy guard band to avoid out-of-buffer reads.
|
||||||
@return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
|
@return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
|
||||||
or an error code.
|
or an error code.
|
||||||
*/
|
*/
|
||||||
size_t ZDICT_trainFromBuffer_unsafe(void* dictBuffer, size_t dictBufferCapacity,
|
size_t ZDICT_trainFromBuffer_unsafe_legacy(void* dictBuffer, size_t dictBufferCapacity,
|
||||||
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
||||||
ZDICT_params_t parameters);
|
ZDICT_legacy_params_t parameters);
|
||||||
|
|
||||||
|
|
||||||
int DiB_trainFromFiles(const char* dictFileName, unsigned maxDictSize,
|
int DiB_trainFromFiles(const char* dictFileName, unsigned maxDictSize,
|
||||||
const char** fileNamesTable, unsigned nbFiles,
|
const char** fileNamesTable, unsigned nbFiles,
|
||||||
ZDICT_params_t *params, COVER_params_t *coverParams,
|
ZDICT_legacy_params_t *params, ZDICT_cover_params_t *coverParams,
|
||||||
int optimizeCover)
|
int optimizeCover)
|
||||||
{
|
{
|
||||||
void* const dictBuffer = malloc(maxDictSize);
|
void* const dictBuffer = malloc(maxDictSize);
|
||||||
@ -243,8 +243,8 @@ int DiB_trainFromFiles(const char* dictFileName, unsigned maxDictSize,
|
|||||||
int result = 0;
|
int result = 0;
|
||||||
|
|
||||||
/* Checks */
|
/* Checks */
|
||||||
if (params) g_displayLevel = params->notificationLevel;
|
if (params) g_displayLevel = params->zParams.notificationLevel;
|
||||||
else if (coverParams) g_displayLevel = coverParams->notificationLevel;
|
else if (coverParams) g_displayLevel = coverParams->zParams.notificationLevel;
|
||||||
else EXM_THROW(13, "Neither dictionary algorith selected"); /* should not happen */
|
else EXM_THROW(13, "Neither dictionary algorith selected"); /* should not happen */
|
||||||
if ((!fileSizes) || (!srcBuffer) || (!dictBuffer)) EXM_THROW(12, "not enough memory for DiB_trainFiles"); /* should not happen */
|
if ((!fileSizes) || (!srcBuffer) || (!dictBuffer)) EXM_THROW(12, "not enough memory for DiB_trainFiles"); /* should not happen */
|
||||||
if (g_tooLargeSamples) {
|
if (g_tooLargeSamples) {
|
||||||
@ -273,20 +273,20 @@ int DiB_trainFromFiles(const char* dictFileName, unsigned maxDictSize,
|
|||||||
size_t dictSize;
|
size_t dictSize;
|
||||||
if (params) {
|
if (params) {
|
||||||
DiB_fillNoise((char*)srcBuffer + benchedSize, NOISELENGTH); /* guard band, for end of buffer condition */
|
DiB_fillNoise((char*)srcBuffer + benchedSize, NOISELENGTH); /* guard band, for end of buffer condition */
|
||||||
dictSize = ZDICT_trainFromBuffer_unsafe(dictBuffer, maxDictSize,
|
dictSize = ZDICT_trainFromBuffer_unsafe_legacy(dictBuffer, maxDictSize,
|
||||||
srcBuffer, fileSizes, nbFiles,
|
srcBuffer, fileSizes, nbFiles,
|
||||||
*params);
|
*params);
|
||||||
} else if (optimizeCover) {
|
} else if (optimizeCover) {
|
||||||
dictSize = COVER_optimizeTrainFromBuffer(
|
dictSize = ZDICT_optimizeTrainFromBuffer_cover(dictBuffer, maxDictSize,
|
||||||
dictBuffer, maxDictSize, srcBuffer, fileSizes, nbFiles,
|
srcBuffer, fileSizes, nbFiles,
|
||||||
coverParams);
|
coverParams);
|
||||||
if (!ZDICT_isError(dictSize)) {
|
if (!ZDICT_isError(dictSize)) {
|
||||||
DISPLAYLEVEL(2, "k=%u\nd=%u\nsteps=%u\n", coverParams->k, coverParams->d, coverParams->steps);
|
DISPLAYLEVEL(2, "k=%u\nd=%u\nsteps=%u\n", coverParams->k, coverParams->d, coverParams->steps);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
dictSize = COVER_trainFromBuffer(dictBuffer, maxDictSize,
|
dictSize =
|
||||||
srcBuffer, fileSizes, nbFiles,
|
ZDICT_trainFromBuffer_cover(dictBuffer, maxDictSize, srcBuffer,
|
||||||
*coverParams);
|
fileSizes, nbFiles, *coverParams);
|
||||||
}
|
}
|
||||||
if (ZDICT_isError(dictSize)) {
|
if (ZDICT_isError(dictSize)) {
|
||||||
DISPLAYLEVEL(1, "dictionary training failed : %s \n", ZDICT_getErrorName(dictSize)); /* should not happen */
|
DISPLAYLEVEL(1, "dictionary training failed : %s \n", ZDICT_getErrorName(dictSize)); /* should not happen */
|
||||||
|
@ -32,7 +32,7 @@
|
|||||||
*/
|
*/
|
||||||
int DiB_trainFromFiles(const char* dictFileName, unsigned maxDictSize,
|
int DiB_trainFromFiles(const char* dictFileName, unsigned maxDictSize,
|
||||||
const char** fileNamesTable, unsigned nbFiles,
|
const char** fileNamesTable, unsigned nbFiles,
|
||||||
ZDICT_params_t *params, COVER_params_t *coverParams,
|
ZDICT_legacy_params_t *params, ZDICT_cover_params_t *coverParams,
|
||||||
int optimizeCover);
|
int optimizeCover);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -248,7 +248,7 @@ static unsigned longCommandWArg(const char** stringPtr, const char* longCommand)
|
|||||||
* @return 1 means that cover parameters were correct
|
* @return 1 means that cover parameters were correct
|
||||||
* @return 0 in case of malformed parameters
|
* @return 0 in case of malformed parameters
|
||||||
*/
|
*/
|
||||||
static unsigned parseCoverParameters(const char* stringPtr, COVER_params_t* params)
|
static unsigned parseCoverParameters(const char* stringPtr, ZDICT_cover_params_t* params)
|
||||||
{
|
{
|
||||||
memset(params, 0, sizeof(*params));
|
memset(params, 0, sizeof(*params));
|
||||||
for (; ;) {
|
for (; ;) {
|
||||||
@ -277,9 +277,9 @@ static unsigned parseLegacyParameters(const char* stringPtr, unsigned* selectivi
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
static COVER_params_t defaultCoverParams(void)
|
static ZDICT_cover_params_t defaultCoverParams(void)
|
||||||
{
|
{
|
||||||
COVER_params_t params;
|
ZDICT_cover_params_t params;
|
||||||
memset(¶ms, 0, sizeof(params));
|
memset(¶ms, 0, sizeof(params));
|
||||||
params.d = 8;
|
params.d = 8;
|
||||||
params.steps = 4;
|
params.steps = 4;
|
||||||
@ -358,7 +358,7 @@ int main(int argCount, const char* argv[])
|
|||||||
unsigned fileNamesNb;
|
unsigned fileNamesNb;
|
||||||
#endif
|
#endif
|
||||||
#ifndef ZSTD_NODICT
|
#ifndef ZSTD_NODICT
|
||||||
COVER_params_t coverParams = defaultCoverParams();
|
ZDICT_cover_params_t coverParams = defaultCoverParams();
|
||||||
int cover = 1;
|
int cover = 1;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -699,20 +699,20 @@ int main(int argCount, const char* argv[])
|
|||||||
/* Check if dictionary builder is selected */
|
/* Check if dictionary builder is selected */
|
||||||
if (operation==zom_train) {
|
if (operation==zom_train) {
|
||||||
#ifndef ZSTD_NODICT
|
#ifndef ZSTD_NODICT
|
||||||
|
ZDICT_params_t zParams;
|
||||||
|
zParams.compressionLevel = dictCLevel;
|
||||||
|
zParams.notificationLevel = g_displayLevel;
|
||||||
|
zParams.dictID = dictID;
|
||||||
if (cover) {
|
if (cover) {
|
||||||
int const optimize = !coverParams.k || !coverParams.d;
|
int const optimize = !coverParams.k || !coverParams.d;
|
||||||
coverParams.nbThreads = nbThreads;
|
coverParams.nbThreads = nbThreads;
|
||||||
coverParams.compressionLevel = dictCLevel;
|
coverParams.zParams = zParams;
|
||||||
coverParams.notificationLevel = g_displayLevel;
|
|
||||||
coverParams.dictID = dictID;
|
|
||||||
operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenameTable, filenameIdx, NULL, &coverParams, optimize);
|
operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenameTable, filenameIdx, NULL, &coverParams, optimize);
|
||||||
} else {
|
} else {
|
||||||
ZDICT_params_t dictParams;
|
ZDICT_legacy_params_t dictParams;
|
||||||
memset(&dictParams, 0, sizeof(dictParams));
|
memset(&dictParams, 0, sizeof(dictParams));
|
||||||
dictParams.compressionLevel = dictCLevel;
|
|
||||||
dictParams.selectivityLevel = dictSelect;
|
dictParams.selectivityLevel = dictSelect;
|
||||||
dictParams.notificationLevel = g_displayLevel;
|
dictParams.zParams = zParams;
|
||||||
dictParams.dictID = dictID;
|
|
||||||
operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenameTable, filenameIdx, &dictParams, NULL, 0);
|
operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenameTable, filenameIdx, &dictParams, NULL, 0);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -638,7 +638,7 @@ static int basicUnitTests(U32 seed, double compressibility)
|
|||||||
size_t const sampleUnitSize = 8 KB;
|
size_t const sampleUnitSize = 8 KB;
|
||||||
U32 const nbSamples = (U32)(totalSampleSize / sampleUnitSize);
|
U32 const nbSamples = (U32)(totalSampleSize / sampleUnitSize);
|
||||||
size_t* const samplesSizes = (size_t*) malloc(nbSamples * sizeof(size_t));
|
size_t* const samplesSizes = (size_t*) malloc(nbSamples * sizeof(size_t));
|
||||||
COVER_params_t params;
|
ZDICT_cover_params_t params;
|
||||||
U32 dictID;
|
U32 dictID;
|
||||||
|
|
||||||
if (dictBuffer==NULL || samplesSizes==NULL) {
|
if (dictBuffer==NULL || samplesSizes==NULL) {
|
||||||
@ -647,14 +647,14 @@ static int basicUnitTests(U32 seed, double compressibility)
|
|||||||
goto _output_error;
|
goto _output_error;
|
||||||
}
|
}
|
||||||
|
|
||||||
DISPLAYLEVEL(4, "test%3i : COVER_trainFromBuffer : ", testNb++);
|
DISPLAYLEVEL(4, "test%3i : ZDICT_trainFromBuffer_cover : ", testNb++);
|
||||||
{ U32 u; for (u=0; u<nbSamples; u++) samplesSizes[u] = sampleUnitSize; }
|
{ U32 u; for (u=0; u<nbSamples; u++) samplesSizes[u] = sampleUnitSize; }
|
||||||
memset(¶ms, 0, sizeof(params));
|
memset(¶ms, 0, sizeof(params));
|
||||||
params.d = 1 + (FUZ_rand(&seed) % 16);
|
params.d = 1 + (FUZ_rand(&seed) % 16);
|
||||||
params.k = params.d + (FUZ_rand(&seed) % 256);
|
params.k = params.d + (FUZ_rand(&seed) % 256);
|
||||||
dictSize = COVER_trainFromBuffer(dictBuffer, dictSize,
|
dictSize = ZDICT_trainFromBuffer_cover(dictBuffer, dictSize,
|
||||||
CNBuffer, samplesSizes, nbSamples,
|
CNBuffer, samplesSizes, nbSamples,
|
||||||
params);
|
params);
|
||||||
if (ZDICT_isError(dictSize)) goto _output_error;
|
if (ZDICT_isError(dictSize)) goto _output_error;
|
||||||
DISPLAYLEVEL(4, "OK, created dictionary of size %u \n", (U32)dictSize);
|
DISPLAYLEVEL(4, "OK, created dictionary of size %u \n", (U32)dictSize);
|
||||||
|
|
||||||
@ -663,12 +663,12 @@ static int basicUnitTests(U32 seed, double compressibility)
|
|||||||
if (dictID==0) goto _output_error;
|
if (dictID==0) goto _output_error;
|
||||||
DISPLAYLEVEL(4, "OK : %u \n", dictID);
|
DISPLAYLEVEL(4, "OK : %u \n", dictID);
|
||||||
|
|
||||||
DISPLAYLEVEL(4, "test%3i : COVER_optimizeTrainFromBuffer : ", testNb++);
|
DISPLAYLEVEL(4, "test%3i : ZDICT_optimizeTrainFromBuffer_cover : ", testNb++);
|
||||||
memset(¶ms, 0, sizeof(params));
|
memset(¶ms, 0, sizeof(params));
|
||||||
params.steps = 4;
|
params.steps = 4;
|
||||||
optDictSize = COVER_optimizeTrainFromBuffer(dictBuffer, optDictSize,
|
optDictSize = ZDICT_optimizeTrainFromBuffer_cover(dictBuffer, optDictSize,
|
||||||
CNBuffer, samplesSizes, nbSamples / 4,
|
CNBuffer, samplesSizes,
|
||||||
¶ms);
|
nbSamples / 4, ¶ms);
|
||||||
if (ZDICT_isError(optDictSize)) goto _output_error;
|
if (ZDICT_isError(optDictSize)) goto _output_error;
|
||||||
DISPLAYLEVEL(4, "OK, created dictionary of size %u \n", (U32)optDictSize);
|
DISPLAYLEVEL(4, "OK, created dictionary of size %u \n", (U32)optDictSize);
|
||||||
|
|
||||||
|
@ -131,7 +131,10 @@ static const void *symbols[] = {
|
|||||||
&ZDICT_isError,
|
&ZDICT_isError,
|
||||||
&ZDICT_getErrorName,
|
&ZDICT_getErrorName,
|
||||||
/* zdict.h: advanced functions */
|
/* zdict.h: advanced functions */
|
||||||
&ZDICT_trainFromBuffer_advanced,
|
&ZDICT_trainFromBuffer_cover,
|
||||||
|
&ZDICT_optimizeTrainFromBuffer_cover,
|
||||||
|
&ZDICT_finalizeDictionary,
|
||||||
|
&ZDICT_trainFromBuffer_legacy,
|
||||||
&ZDICT_addEntropyTablesFromBuffer,
|
&ZDICT_addEntropyTablesFromBuffer,
|
||||||
NULL,
|
NULL,
|
||||||
};
|
};
|
||||||
|
Loading…
Reference in New Issue
Block a user