diff --git a/lib/dictBuilder/cover.c b/lib/dictBuilder/cover.c index c8bee4e2..089f077c 100644 --- a/lib/dictBuilder/cover.c +++ b/lib/dictBuilder/cover.c @@ -28,7 +28,7 @@ /*-************************************* * Constants ***************************************/ -#define COVER_MAX_SAMPLES_SIZE ((U32)-1) +#define COVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((U32)-1) : ((U32)1 GB)) /*-************************************* * Console display @@ -500,7 +500,9 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer, const size_t totalSamplesSize = COVER_sum(samplesSizes, nbSamples); /* Checks */ if (totalSamplesSize < d || - totalSamplesSize > (size_t)COVER_MAX_SAMPLES_SIZE) { + totalSamplesSize >= (size_t)COVER_MAX_SAMPLES_SIZE) { + DISPLAYLEVEL(1, "Total samples size is too large, maximum size is %u MB\n", + (COVER_MAX_SAMPLES_SIZE >> 20)); return 0; } /* Zero the context */ @@ -518,6 +520,7 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer, /* The offsets of each file */ ctx->offsets = (size_t *)malloc((nbSamples + 1) * sizeof(size_t)); if (!ctx->suffix || !ctx->dmerAt || !ctx->offsets) { + DISPLAYLEVEL(1, "Failed to allocate scratch buffers\n"); COVER_ctx_destroy(ctx); return 0; } @@ -651,7 +654,6 @@ ZDICTLIB_API size_t COVER_trainFromBuffer( /* Initialize context and activeDmers */ if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, parameters.d)) { - DISPLAYLEVEL(1, "Failed to initialize context\n"); return ERROR(GENERIC); } if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) { diff --git a/lib/dictBuilder/zdict.h b/lib/dictBuilder/zdict.h index b99990c3..1b3bcb5b 100644 --- a/lib/dictBuilder/zdict.h +++ b/lib/dictBuilder/zdict.h @@ -108,6 +108,7 @@ typedef struct { The resulting dictionary will be saved into `dictBuffer`. @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) or an error code, which can be tested with ZDICT_isError(). + Note : COVER_trainFromBuffer() requires about 9 bytes of memory for each input byte. Tips : In general, a reasonable dictionary has a size of ~ 100 KB. It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`. In general, it's recommended to provide a few thousands samples, but this can vary a lot. @@ -131,6 +132,7 @@ ZDICTLIB_API size_t COVER_trainFromBuffer(void* dictBuffer, size_t dictBufferCap @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) or an error code, which can be tested with ZDICT_isError(). On success `*parameters` contains the parameters selected. + Note : COVER_optimizeTrainFromBuffer() requires about 9 bytes of memory for each input byte. */ ZDICTLIB_API size_t COVER_optimizeTrainFromBuffer(void* dictBuffer, size_t dictBufferCapacity, const void* samplesBuffer, const size_t *samplesSizes, unsigned nbSamples, diff --git a/programs/dibio.c b/programs/dibio.c index 33c0250c..5ef202c8 100644 --- a/programs/dibio.c +++ b/programs/dibio.c @@ -235,7 +235,8 @@ int DiB_trainFromFiles(const char* dictFileName, unsigned maxDictSize, void* const dictBuffer = malloc(maxDictSize); size_t* const fileSizes = (size_t*)malloc(nbFiles * sizeof(size_t)); unsigned long long const totalSizeToLoad = DiB_getTotalCappedFileSize(fileNamesTable, nbFiles); - size_t const maxMem = DiB_findMaxMem(totalSizeToLoad * MEMMULT) / MEMMULT; + size_t const memMult = params ? MEMMULT : COVER_MEMMULT; + size_t const maxMem = DiB_findMaxMem(totalSizeToLoad * memMult) / memMult; size_t benchedSize = (size_t) MIN ((unsigned long long)maxMem, totalSizeToLoad); void* const srcBuffer = malloc(benchedSize+NOISELENGTH); int result = 0;