Merge pull request #509 from terrelln/dict-builder-32
Handle cover dictionary builder maximum input size for 32-bit mode
This commit is contained in:
commit
b8cdc16969
@ -28,7 +28,7 @@
|
||||
/*-*************************************
|
||||
* Constants
|
||||
***************************************/
|
||||
#define COVER_MAX_SAMPLES_SIZE ((U32)-1)
|
||||
#define COVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((U32)-1) : ((U32)1 GB))
|
||||
|
||||
/*-*************************************
|
||||
* Console display
|
||||
@ -500,7 +500,9 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
|
||||
const size_t totalSamplesSize = COVER_sum(samplesSizes, nbSamples);
|
||||
/* Checks */
|
||||
if (totalSamplesSize < d ||
|
||||
totalSamplesSize > (size_t)COVER_MAX_SAMPLES_SIZE) {
|
||||
totalSamplesSize >= (size_t)COVER_MAX_SAMPLES_SIZE) {
|
||||
DISPLAYLEVEL(1, "Total samples size is too large, maximum size is %u MB\n",
|
||||
(COVER_MAX_SAMPLES_SIZE >> 20));
|
||||
return 0;
|
||||
}
|
||||
/* Zero the context */
|
||||
@ -518,6 +520,7 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
|
||||
/* The offsets of each file */
|
||||
ctx->offsets = (size_t *)malloc((nbSamples + 1) * sizeof(size_t));
|
||||
if (!ctx->suffix || !ctx->dmerAt || !ctx->offsets) {
|
||||
DISPLAYLEVEL(1, "Failed to allocate scratch buffers\n");
|
||||
COVER_ctx_destroy(ctx);
|
||||
return 0;
|
||||
}
|
||||
@ -651,7 +654,6 @@ ZDICTLIB_API size_t COVER_trainFromBuffer(
|
||||
/* Initialize context and activeDmers */
|
||||
if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
|
||||
parameters.d)) {
|
||||
DISPLAYLEVEL(1, "Failed to initialize context\n");
|
||||
return ERROR(GENERIC);
|
||||
}
|
||||
if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
|
||||
|
@ -108,6 +108,7 @@ typedef struct {
|
||||
The resulting dictionary will be saved into `dictBuffer`.
|
||||
@return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
|
||||
or an error code, which can be tested with ZDICT_isError().
|
||||
Note : COVER_trainFromBuffer() requires about 9 bytes of memory for each input byte.
|
||||
Tips : In general, a reasonable dictionary has a size of ~ 100 KB.
|
||||
It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`.
|
||||
In general, it's recommended to provide a few thousands samples, but this can vary a lot.
|
||||
@ -131,6 +132,7 @@ ZDICTLIB_API size_t COVER_trainFromBuffer(void* dictBuffer, size_t dictBufferCap
|
||||
@return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
|
||||
or an error code, which can be tested with ZDICT_isError().
|
||||
On success `*parameters` contains the parameters selected.
|
||||
Note : COVER_optimizeTrainFromBuffer() requires about 9 bytes of memory for each input byte.
|
||||
*/
|
||||
ZDICTLIB_API size_t COVER_optimizeTrainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
|
||||
const void* samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
|
||||
|
@ -235,7 +235,8 @@ int DiB_trainFromFiles(const char* dictFileName, unsigned maxDictSize,
|
||||
void* const dictBuffer = malloc(maxDictSize);
|
||||
size_t* const fileSizes = (size_t*)malloc(nbFiles * sizeof(size_t));
|
||||
unsigned long long const totalSizeToLoad = DiB_getTotalCappedFileSize(fileNamesTable, nbFiles);
|
||||
size_t const maxMem = DiB_findMaxMem(totalSizeToLoad * MEMMULT) / MEMMULT;
|
||||
size_t const memMult = params ? MEMMULT : COVER_MEMMULT;
|
||||
size_t const maxMem = DiB_findMaxMem(totalSizeToLoad * memMult) / memMult;
|
||||
size_t benchedSize = (size_t) MIN ((unsigned long long)maxMem, totalSizeToLoad);
|
||||
void* const srcBuffer = malloc(benchedSize+NOISELENGTH);
|
||||
int result = 0;
|
||||
|
Loading…
Reference in New Issue
Block a user