Refactoring and benchmark without dictionary

2018-07-20 17:03:47 -07:00 · 2018-07-20 17:03:47 -07:00 · 71e767ac09
commit 71e767ac09
parent 470c8d42f4
15 changed files with 232 additions and 313 deletions
--- a/contrib/benchmarkDictBuilder/README.md
+++ b/contrib/benchmarkDictBuilder/README.md
@ -1,43 +0,0 @@
-Benchmarking Dictionary Builder
-
-### Permitted Argument:
-Input File/Directory (in=fileName): required; file/directory used to build dictionary; if directory, will operate recursively for files inside directory; can include multiple files/directories, each following "in="
-
-###Running Test:
-make test
-
-###Usage:
-Benchmark given input files: make ARG= followed by permitted arguments
-
-### Examples:
-make ARG="in=../../lib/dictBuilder in=../../lib/compress"
-
-###Benchmarking Result:
-
-github:
-| Algorithm     | Speed(sec)    | Compression Ratio  |
-| ------------- |:-------------:| ------------------:|
-| random        | 0.182254      |  8.786957          |
-| cover         | 34.821007     |  10.430999         |
-| legacy        | 1.125494      |  8.989482          |
-
-hg-commands
-| Algorithm     | Speed(sec)    | Compression Ratio  |
-| ------------- |:-------------:| ------------------:|
-| random        | 0.089231      |  3.489515          |
-| cover         | 32.342462     |  4.030274          |
-| legacy        | 1.066594      |  3.911896          |
-
-hg-manifest
-| Algorithm     | Speed(sec)    | Compression Ratio  |
-| ------------- |:-------------:| ------------------:|
-| random        | 1.095083      |  2.309485          |
-| cover         | 517.999132    |  2.575331          |
-| legacy        | 10.789509     |  2.506775          |
-
-hg-changelog
-| Algorithm     | Speed(sec)    | Compression Ratio  |
-| ------------- |:-------------:| ------------------:|
-| random        | 0.639630      |  2.096785          |
-| cover         | 121.398023    |  2.175706          |
-| legacy        | 3.050893      |  2.058273          |
--- a/contrib/benchmarkDictBuilder/dictBuilder.h
+++ b/contrib/benchmarkDictBuilder/dictBuilder.h
@ -1,10 +0,0 @@
-/*! ZDICT_trainFromBuffer_unsafe_legacy() :
-    Strictly Internal use only !!
-    Same as ZDICT_trainFromBuffer_legacy(), but does not control `samplesBuffer`.
-    `samplesBuffer` must be followed by noisy guard band to avoid out-of-buffer reads.
-    @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
-              or an error code.
-*/
-size_t ZDICT_trainFromBuffer_unsafe_legacy(void* dictBuffer, size_t dictBufferCapacity,
-                                           const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
-                                           ZDICT_legacy_params_t parameters);
--- a/contrib/experimental_dict_builders/benchmarkDictBuilder/Makefile
+++ b/contrib/experimental_dict_builders/benchmarkDictBuilder/Makefile
@ -2,7 +2,7 @@ ARG :=

 CC ?= gcc
 CFLAGS ?= -O3
-INCLUDES := -I ../randomDictBuilder -I ../../programs -I ../../lib/common -I ../../lib -I ../../lib/dictBuilder
+INCLUDES := -I ../randomDictBuilder -I ../../../programs -I ../../../lib/common -I ../../../lib -I ../../../lib/dictBuilder

 RANDOM_FILE := ../randomDictBuilder/random.c
 IO_FILE := ../randomDictBuilder/io.c
@ -34,11 +34,11 @@ io.o: $(IO_FILE)
 	$(CC) $(CFLAGS) $(INCLUDES) -c $(IO_FILE)

 libzstd.a:
-	$(MAKE) -C ../../lib libzstd.a
-	mv ../../lib/libzstd.a .
+	$(MAKE) -C ../../../lib libzstd.a
+	mv ../../../lib/libzstd.a .

 .PHONY: clean
 clean:
 	rm -f *.o benchmark libzstd.a
-	$(MAKE) -C ../../lib clean
+	$(MAKE) -C ../../../lib clean
 	echo "Cleaning is completed"
--- a/contrib/experimental_dict_builders/benchmarkDictBuilder/README.md
+++ b/contrib/experimental_dict_builders/benchmarkDictBuilder/README.md
@ -0,0 +1,47 @@
+Benchmarking Dictionary Builder
+
+### Permitted Argument:
+Input File/Directory (in=fileName): required; file/directory used to build dictionary; if directory, will operate recursively for files inside directory; can include multiple files/directories, each following "in="
+
+###Running Test:
+make test
+
+###Usage:
+Benchmark given input files: make ARG= followed by permitted arguments
+
+### Examples:
+make ARG="in=../../../lib/dictBuilder in=../../../lib/compress"
+
+###Benchmarking Result:
+
+github:
+| Algorithm     | Speed(sec)    | Compression Ratio  |
+| ------------- |:-------------:| ------------------:|
+| nodict        | 0.000004      |  2.999642          |
+| random        | 0.180238      |  8.786957          |
+| cover         | 33.891987     |  10.430999         |
+| legacy        | 1.077569      |  8.989482          |
+
+hg-commands
+| Algorithm     | Speed(sec)    | Compression Ratio  |
+| ------------- |:-------------:| ------------------:|
+| nodict        | 0.000006      |  2.425291          |
+| random        | 0.088735      |  3.489515          |
+| cover         | 35.447300     |  4.030274          |
+| legacy        | 1.048509      |  3.911896          |
+
+hg-manifest
+| Algorithm     | Speed(sec)    | Compression Ratio  |
+| ------------- |:-------------:| ------------------:|
+| nodict        | 0.000005      |  1.866385          |
+| random        | 1.148231      |  2.309485          |
+| cover         | 509.685257    |  2.575331          |
+| legacy        | 10.705866     |  2.506775          |
+
+hg-changelog
+| Algorithm     | Speed(sec)    | Compression Ratio  |
+| ------------- |:-------------:| ------------------:|
+| nodict        | 0.000005      |  1.377613          |
+| random        | 0.706434      |  2.096785          |
+| cover         | 122.815783    |  2.175706          |
+| legacy        | 3.010318      |  2.058273          |
--- a/contrib/experimental_dict_builders/benchmarkDictBuilder/benchmark.c
+++ b/contrib/experimental_dict_builders/benchmarkDictBuilder/benchmark.c
@ -44,12 +44,14 @@ static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER;
    exit(error);                                                          \
 }

+
 /*-*************************************
 *  Constants
 ***************************************/
 static const unsigned g_defaultMaxDictSize = 110 KB;
-#define MEMMULT 11
-#define NOISELENGTH 32
+#define DEFAULT_CLEVEL 3
+#define DEFAULT_DISPLAYLEVEL 2
+

 /*-*************************************
 *  Struct
@ -60,57 +62,6 @@ typedef struct {
 } dictInfo;


-/*-*************************************
-*  Commandline related functions
-***************************************/
-static unsigned readU32FromChar(const char** stringPtr){
-    const char errorMsg[] = "error: numeric value too large";
-    unsigned result = 0;
-    while ((**stringPtr >='0') && (**stringPtr <='9')) {
-        unsigned const max = (((unsigned)(-1)) / 10) - 1;
-        if (result > max) exit(1);
-        result *= 10, result += **stringPtr - '0', (*stringPtr)++ ;
-    }
-    if ((**stringPtr=='K') || (**stringPtr=='M')) {
-        unsigned const maxK = ((unsigned)(-1)) >> 10;
-        if (result > maxK) exit(1);
-        result <<= 10;
-        if (**stringPtr=='M') {
-            if (result > maxK) exit(1);
-            result <<= 10;
-        }
-        (*stringPtr)++;  /* skip `K` or `M` */
-        if (**stringPtr=='i') (*stringPtr)++;
-        if (**stringPtr=='B') (*stringPtr)++;
-    }
-    return result;
-}
-
-/** longCommandWArg() :
- *  check if *stringPtr is the same as longCommand.
- *  If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand.
- * @return 0 and doesn't modify *stringPtr otherwise.
- */
-static unsigned longCommandWArg(const char** stringPtr, const char* longCommand){
-    size_t const comSize = strlen(longCommand);
-    int const result = !strncmp(*stringPtr, longCommand, comSize);
-    if (result) *stringPtr += comSize;
-    return result;
-}
-
-static void fillNoise(void* buffer, size_t length)
-{
-    unsigned const prime1 = 2654435761U;
-    unsigned const prime2 = 2246822519U;
-    unsigned acc = prime1;
-    size_t p=0;;
-
-    for (p=0; p<length; p++) {
-        acc *= prime2;
-        ((unsigned char*)buffer)[p] = (unsigned char)(acc >> 21);
-    }
-}
-
 /*-*************************************
 * Dictionary related operations
 ***************************************/
@ -122,9 +73,9 @@ dictInfo* createDictFromFiles(sampleInfo *info, unsigned maxDictSize,
                  ZDICT_random_params_t *randomParams, ZDICT_cover_params_t *coverParams,
                  ZDICT_legacy_params_t *legacyParams) {
    unsigned const displayLevel = randomParams ? randomParams->zParams.notificationLevel :
-                        coverParams ? coverParams->zParams.notificationLevel :
-                        legacyParams ? legacyParams->zParams.notificationLevel :
-                        0;   /* should never happen */
+                                  coverParams ? coverParams->zParams.notificationLevel :
+                                  legacyParams ? legacyParams->zParams.notificationLevel :
+                                  DEFAULT_DISPLAYLEVEL;   /* no dict */
    void* const dictBuffer = malloc(maxDictSize);

    dictInfo* dInfo;
@ -140,21 +91,15 @@ dictInfo* createDictFromFiles(sampleInfo *info, unsigned maxDictSize,
        }else if(coverParams) {
          dictSize = ZDICT_optimizeTrainFromBuffer_cover(dictBuffer, maxDictSize, info->srcBuffer,
                                                info->samplesSizes, info->nbSamples, coverParams);
-        } else {
-          size_t totalSize= 0;
-          for (int i = 0; i < info->nbSamples; i++) {
-            totalSize += info->samplesSizes[i];
-          }
-          size_t const maxMem = findMaxMem(totalSize * MEMMULT) / MEMMULT;
-          size_t loadedSize = (size_t) MIN ((unsigned long long)maxMem, totalSize);
-          fillNoise((char*)(info->srcBuffer) + loadedSize, NOISELENGTH);
-          dictSize = ZDICT_trainFromBuffer_unsafe_legacy(dictBuffer, maxDictSize, info->srcBuffer,
+        } else if(legacyParams) {
+          dictSize = ZDICT_trainFromBuffer_legacy(dictBuffer, maxDictSize, info->srcBuffer,
                                               info->samplesSizes, info->nbSamples, *legacyParams);
+        } else {
+          dictSize = 0;
        }
        if (ZDICT_isError(dictSize)) {
            DISPLAYLEVEL(1, "dictionary training failed : %s \n", ZDICT_getErrorName(dictSize));   /* should not happen */
            free(dictBuffer);
-            freeSampleInfo(info);
            return dInfo;
        }
        dInfo = (dictInfo *)malloc(sizeof(dictInfo));
@ -173,6 +118,7 @@ double compressWithDict(sampleInfo *srcInfo, dictInfo* dInfo, int compressionLev
  /* Local variables */
  size_t totalCompressedSize = 0;
  size_t totalOriginalSize = 0;
+  unsigned hasDict = dInfo->dictSize > 0 ? 1 : 0;
  double cRatio;
  size_t dstCapacity;
  int i;
@ -193,15 +139,6 @@ double compressWithDict(sampleInfo *srcInfo, dictInfo* dInfo, int compressionLev
    dst = malloc(dstCapacity);
  }

-  /* Create the cctx and cdict */
-  cctx = ZSTD_createCCtx();
-  cdict = ZSTD_createCDict(dInfo->dictBuffer, dInfo->dictSize, compressionLevel);
-
-  if(!cctx || !cdict || !dst) {
-    cRatio = -1;
-    goto _cleanup;
-  }
-
  /* Calculate offset for each sample */
  offsets = (size_t *)malloc((srcInfo->nbSamples + 1) * sizeof(size_t));
  offsets[0] = 0;
@ -209,13 +146,35 @@ double compressWithDict(sampleInfo *srcInfo, dictInfo* dInfo, int compressionLev
    offsets[i] = offsets[i - 1] + srcInfo->samplesSizes[i - 1];
  }

+  /* Create the cctx */
+  cctx = ZSTD_createCCtx();
+  if(!cctx || !dst) {
+    cRatio = -1;
+    goto _nodictCleanup;
+  }
+
+  /* Create CDict if there's a dictionary stored on buffer */
+  if (hasDict) {
+    cdict = ZSTD_createCDict(dInfo->dictBuffer, dInfo->dictSize, compressionLevel);
+    if(!cdict) {
+      cRatio = -1;
+      goto _dictCleanup;
+    }
+  }
+
  /* Compress each sample and sum their sizes*/
  const BYTE *const samples = (const BYTE *)srcInfo->srcBuffer;
  for (i = 0; i < srcInfo->nbSamples; i++) {
-    const size_t compressedSize = ZSTD_compress_usingCDict(cctx, dst, dstCapacity, samples + offsets[i], srcInfo->samplesSizes[i], cdict);
+    size_t compressedSize;
+    if(hasDict) {
+      compressedSize = ZSTD_compress_usingCDict(cctx, dst, dstCapacity, samples + offsets[i], srcInfo->samplesSizes[i], cdict);
+    } else {
+      compressedSize = ZSTD_compressCCtx(cctx, dst, dstCapacity,samples + offsets[i], srcInfo->samplesSizes[i], compressionLevel);
+    }
    if (ZSTD_isError(compressedSize)) {
      cRatio = -1;
-      goto _cleanup;
+      if(hasDict) goto _dictCleanup;
+      else goto _nodictCleanup;
    }
    totalCompressedSize += compressedSize;
  }
@ -230,15 +189,14 @@ double compressWithDict(sampleInfo *srcInfo, dictInfo* dInfo, int compressionLev
  DISPLAYLEVEL(2, "compressed size is %lu\n", totalCompressedSize);
  cRatio = (double)totalOriginalSize/(double)totalCompressedSize;

-_cleanup:
-  if(dst) {
-    free(dst);
-  }
-  if(offsets) {
-    free(offsets);
-  }
-  ZSTD_freeCCtx(cctx);
+_dictCleanup:
  ZSTD_freeCDict(cdict);
+
+_nodictCleanup:
+  free(dst);
+  free(offsets);
+  ZSTD_freeCCtx(cctx);
+
  return cRatio;
 }

@ -257,102 +215,48 @@ void freeDictInfo(dictInfo* info) {
 /*-********************************************************
  *  Benchmarking functions
 **********************************************************/
-/** benchmarkRandom() :
- *  Measure how long random dictionary builder takes and compression ratio with the random dictionary
+/** benchmarkDictBuilder() :
+ *  Measure how long a dictionary builder takes and compression ratio with the dictionary built
 *  @return 0 if benchmark successfully, 1 otherwise
 */
-int benchmarkRandom(sampleInfo *srcInfo, unsigned maxDictSize, ZDICT_random_params_t *randomParam) {
-  const int displayLevel = randomParam->zParams.notificationLevel;
+int benchmarkDictBuilder(sampleInfo *srcInfo, unsigned maxDictSize, ZDICT_random_params_t *randomParam,
+                        ZDICT_cover_params_t *coverParam, ZDICT_legacy_params_t *legacyParam) {
+  /* Local variables */
+  const unsigned displayLevel = randomParam ? randomParam->zParams.notificationLevel :
+                                coverParam ? coverParam->zParams.notificationLevel :
+                                legacyParam ? legacyParam->zParams.notificationLevel :
+                                DEFAULT_DISPLAYLEVEL;   /* no dict */
+  const char* name = randomParam ? "RANDOM" :
+                    coverParam ? "COVER" :
+                    legacyParam ? "LEGACY" :
+                    "NODICT";    /* no dict */
+  const unsigned cLevel = randomParam ? randomParam->zParams.compressionLevel :
+                          coverParam ? coverParam->zParams.compressionLevel :
+                          legacyParam ? legacyParam->zParams.compressionLevel :
+                          DEFAULT_CLEVEL;   /* no dict */
  int result = 0;
-  clock_t t;
-  t = clock();
-  dictInfo* dInfo = createDictFromFiles(srcInfo, maxDictSize, randomParam, NULL, NULL);
-  t = clock() - t;
-  double time_taken = ((double)t)/CLOCKS_PER_SEC;
+
+  /* Calculate speed */
+  const UTIL_time_t begin = UTIL_getTime();
+  dictInfo* dInfo = createDictFromFiles(srcInfo, maxDictSize, randomParam, coverParam, legacyParam);
+  const U64 timeMicro = UTIL_clockSpanMicro(begin);
+  const double timeSec = timeMicro / (double)SEC_TO_MICRO;
  if (!dInfo) {
-    DISPLAYLEVEL(1, "RANDOM does not train successfully\n");
+    DISPLAYLEVEL(1, "%s does not train successfully\n", name);
    result = 1;
    goto _cleanup;
  }
-  DISPLAYLEVEL(2, "RANDOM took %f seconds to execute \n", time_taken);
+  DISPLAYLEVEL(2, "%s took %f seconds to execute \n", name, timeSec);

-  double cRatio = compressWithDict(srcInfo, dInfo, randomParam->zParams.compressionLevel, displayLevel);
+  /* Calculate compression ratio */
+  double cRatio = compressWithDict(srcInfo, dInfo, cLevel, displayLevel);
  if (cRatio < 0) {
-    DISPLAYLEVEL(1, "Compressing with RANDOM dictionary does not work\n");
-    result = 1;
-    goto _cleanup;
-  }
-  DISPLAYLEVEL(2, "Compression ratio with random dictionary is %f\n", cRatio);
-
-
-_cleanup:
-  freeDictInfo(dInfo);
-  return result;
-}
-
-/** benchmarkCover() :
- *  Measure how long random dictionary builder takes and compression ratio with the cover dictionary
- *  @return 0 if benchmark successfully, 1 otherwise
- */
-int benchmarkCover(sampleInfo *srcInfo, unsigned maxDictSize,
-                ZDICT_cover_params_t *coverParam) {
-  const int displayLevel = coverParam->zParams.notificationLevel;
-  int result = 0;
-  clock_t t;
-  t = clock();
-  dictInfo* dInfo = createDictFromFiles(srcInfo, maxDictSize, NULL, coverParam, NULL);
-  t = clock() - t;
-  double time_taken = ((double)t)/CLOCKS_PER_SEC;
-  if (!dInfo) {
-    DISPLAYLEVEL(1, "COVER does not train successfully\n");
-    result = 1;
-    goto _cleanup;
-  }
-  DISPLAYLEVEL(2, "COVER took %f seconds to execute \n", time_taken);
-
-  double cRatio = compressWithDict(srcInfo, dInfo, coverParam->zParams.compressionLevel, displayLevel);
-  if (cRatio < 0) {
-    DISPLAYLEVEL(1, "Compressing with COVER dictionary does not work\n");
-    result = 1;
-    goto _cleanup;
-  }
-  DISPLAYLEVEL(2, "Compression ratio with cover dictionary is %f\n", cRatio);
-
-_cleanup:
-  freeDictInfo(dInfo);
-  return result;
-}
-
-
-
-/** benchmarkLegacy() :
- *  Measure how long legacy dictionary builder takes and compression ratio with the legacy dictionary
- *  @return 0 if benchmark successfully, 1 otherwise
- */
-int benchmarkLegacy(sampleInfo *srcInfo, unsigned maxDictSize, ZDICT_legacy_params_t *legacyParam) {
-  const int displayLevel = legacyParam->zParams.notificationLevel;
-  int result = 0;
-  clock_t t;
-  t = clock();
-  dictInfo* dInfo = createDictFromFiles(srcInfo, maxDictSize, NULL, NULL, legacyParam);
-  t = clock() - t;
-  double time_taken = ((double)t)/CLOCKS_PER_SEC;
-  if (!dInfo) {
-    DISPLAYLEVEL(1, "LEGACY does not train successfully\n");
+    DISPLAYLEVEL(1, "Compressing with %s dictionary does not work\n", name);
    result = 1;
    goto _cleanup;

  }
-  DISPLAYLEVEL(2, "LEGACY took %f seconds to execute \n", time_taken);
-
-  double cRatio = compressWithDict(srcInfo, dInfo, legacyParam->zParams.compressionLevel, displayLevel);
-  if (cRatio < 0) {
-    DISPLAYLEVEL(1, "Compressing with LEGACY dictionary does not work\n");
-    result = 1;
-    goto _cleanup;
-
-  }
-  DISPLAYLEVEL(2, "Compression ratio with legacy dictionary is %f\n", cRatio);
+  DISPLAYLEVEL(2, "Compression ratio with %s dictionary is %f\n", name, cRatio);

 _cleanup:
  freeDictInfo(dInfo);
@ -363,15 +267,16 @@ _cleanup:

 int main(int argCount, const char* argv[])
 {
-  int displayLevel = 2;
+  const int displayLevel = DEFAULT_DISPLAYLEVEL;
  const char* programName = argv[0];
  int result = 0;
+
  /* Initialize arguments to default values */
-  unsigned k = 200;
-  unsigned d = 6;
-  unsigned cLevel = 3;
-  unsigned dictID = 0;
-  unsigned maxDictSize = g_defaultMaxDictSize;
+  const unsigned k = 200;
+  const unsigned d = 6;
+  const unsigned cLevel = DEFAULT_CLEVEL;
+  const unsigned dictID = 0;
+  const unsigned maxDictSize = g_defaultMaxDictSize;

  /* Initialize table to store input files */
  const char** filenameTable = (const char**)malloc(argCount * sizeof(const char*));
@ -379,7 +284,7 @@ int main(int argCount, const char* argv[])

  char* fileNamesBuf = NULL;
  unsigned fileNamesNb = filenameIdx;
-  int followLinks = 0;
+  const int followLinks = 0;
  const char** extendedFileList = NULL;

  /* Parse arguments */
@ -394,7 +299,6 @@ int main(int argCount, const char* argv[])
    return 1;
  }

-
  /* Get the list of all files recursively (because followLinks==0)*/
  extendedFileList = UTIL_createFileList(filenameTable, filenameIdx, &fileNamesBuf,
                                        &fileNamesNb, followLinks);
@ -406,6 +310,7 @@ int main(int argCount, const char* argv[])
    filenameIdx = fileNamesNb;
  }

+  /* get sampleInfo */
  size_t blockSize = 0;
  sampleInfo* srcInfo= getSampleInfo(filenameTable,
                    filenameIdx, blockSize, maxDictSize, displayLevel);
@ -416,38 +321,53 @@ int main(int argCount, const char* argv[])
  zParams.notificationLevel = displayLevel;
  zParams.dictID = dictID;

+  /* with no dict */
+  {
+    const int noDictResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, NULL, NULL);
+    if(noDictResult) {
+      result = 1;
+      goto _cleanup;
+    }
+  }
+
  /* for random */
-  ZDICT_random_params_t randomParam;
-  randomParam.zParams = zParams;
-  randomParam.k = k;
-  int randomResult = benchmarkRandom(srcInfo, maxDictSize, &randomParam);
-  if(randomResult) {
-    result = 1;
-    goto _cleanup;
+  {
+    ZDICT_random_params_t randomParam;
+    randomParam.zParams = zParams;
+    randomParam.k = k;
+    const int randomResult = benchmarkDictBuilder(srcInfo, maxDictSize, &randomParam, NULL, NULL);
+    if(randomResult) {
+      result = 1;
+      goto _cleanup;
+    }
  }

  /* for cover */
-  ZDICT_cover_params_t coverParam;
-  memset(&coverParam, 0, sizeof(coverParam));
-  coverParam.zParams = zParams;
-  coverParam.splitPoint = 1.0;
-  coverParam.d = d;
-  coverParam.steps = 40;
-  coverParam.nbThreads = 1;
-  int coverOptResult = benchmarkCover(srcInfo, maxDictSize, &coverParam);
-  if(coverOptResult) {
-    result = 1;
-    goto _cleanup;
+  {
+    ZDICT_cover_params_t coverParam;
+    memset(&coverParam, 0, sizeof(coverParam));
+    coverParam.zParams = zParams;
+    coverParam.splitPoint = 1.0;
+    coverParam.d = d;
+    coverParam.steps = 40;
+    coverParam.nbThreads = 1;
+    const int coverOptResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, &coverParam, NULL);
+    if(coverOptResult) {
+      result = 1;
+      goto _cleanup;
+    }
  }

  /* for legacy */
-  ZDICT_legacy_params_t legacyParam;
-  legacyParam.zParams = zParams;
-  legacyParam.selectivityLevel = 9;
-  int legacyResult = benchmarkLegacy(srcInfo, maxDictSize, &legacyParam);
-  if(legacyResult) {
-    result = 1;
-    goto _cleanup;
+  {
+    ZDICT_legacy_params_t legacyParam;
+    legacyParam.zParams = zParams;
+    legacyParam.selectivityLevel = 9;
+    const int legacyResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, NULL, &legacyParam);
+    if(legacyResult) {
+      result = 1;
+      goto _cleanup;
+    }
  }

  /* Free allocated memory */
--- a/contrib/experimental_dict_builders/benchmarkDictBuilder/dictBuilder.h
+++ b/contrib/experimental_dict_builders/benchmarkDictBuilder/dictBuilder.h
@ -0,0 +1,6 @@
+/* ZDICT_trainFromBuffer_legacy() :
+ * issue : samplesBuffer need to be followed by a noisy guard band.
+ * work around : duplicate the buffer, and add the noise */
+size_t ZDICT_trainFromBuffer_legacy(void* dictBuffer, size_t dictBufferCapacity,
+                                    const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
+                                    ZDICT_legacy_params_t params);
--- a/contrib/experimental_dict_builders/benchmarkDictBuilder/test.sh
+++ b/contrib/experimental_dict_builders/benchmarkDictBuilder/test.sh
@ -1,2 +1,2 @@
 echo "Benchmark with in=../../lib/common"
-./benchmark in=../../lib/common
+./benchmark in=../../../lib/common
--- a/contrib/experimental_dict_builders/randomDictBuilder/Makefile
+++ b/contrib/experimental_dict_builders/randomDictBuilder/Makefile
@ -2,9 +2,9 @@ ARG :=

 CC ?= gcc
 CFLAGS ?= -O3
-INCLUDES := -I ../../programs -I ../../lib/common -I ../../lib -I ../../lib/dictBuilder
+INCLUDES := -I ../../../programs -I ../../../lib/common -I ../../../lib -I ../../../lib/dictBuilder

-TEST_INPUT := ../../lib
+TEST_INPUT := ../../../lib
 TEST_OUTPUT := randomDict

 all: main run clean
@ -30,8 +30,8 @@ io.o: io.c
 	$(CC) $(CFLAGS) $(INCLUDES) -c io.c

 libzstd.a:
-	$(MAKE) -C ../../lib libzstd.a
-	mv ../../lib/libzstd.a .
+	$(MAKE) -C ../../../lib libzstd.a
+	mv ../../../lib/libzstd.a .

 .PHONY: testrun
 testrun: main
@ -48,5 +48,5 @@ testshell: test.sh
 .PHONY: clean
 clean:
 	rm -f *.o main libzstd.a
-	$(MAKE) -C ../../lib clean
+	$(MAKE) -C ../../../lib clean
 	echo "Cleaning is completed"
--- a/contrib/experimental_dict_builders/randomDictBuilder/README.md
+++ b/contrib/experimental_dict_builders/randomDictBuilder/README.md
@ -16,5 +16,5 @@ To build a random dictionary with the provided arguments: make ARG= followed by


 ### Examples:
-make ARG="in=../../lib/dictBuilder out=dict100 dictID=520"
-make ARG="in=../../lib/dictBuilder in=../../lib/compress"
+make ARG="in=../../../lib/dictBuilder out=dict100 dictID=520"
+make ARG="in=../../../lib/dictBuilder in=../../../lib/compress"
--- a/contrib/experimental_dict_builders/randomDictBuilder/io.c
+++ b/contrib/experimental_dict_builders/randomDictBuilder/io.c
@ -53,6 +53,39 @@ static const size_t g_maxMemory = (sizeof(size_t) == 4) ?
 #define NOISELENGTH 32


+/*-*************************************
+*  Commandline related functions
+***************************************/
+unsigned readU32FromChar(const char** stringPtr){
+    const char errorMsg[] = "error: numeric value too large";
+    unsigned result = 0;
+    while ((**stringPtr >='0') && (**stringPtr <='9')) {
+        unsigned const max = (((unsigned)(-1)) / 10) - 1;
+        if (result > max) exit(1);
+        result *= 10, result += **stringPtr - '0', (*stringPtr)++ ;
+    }
+    if ((**stringPtr=='K') || (**stringPtr=='M')) {
+        unsigned const maxK = ((unsigned)(-1)) >> 10;
+        if (result > maxK) exit(1);
+        result <<= 10;
+        if (**stringPtr=='M') {
+            if (result > maxK) exit(1);
+            result <<= 10;
+        }
+        (*stringPtr)++;  /* skip `K` or `M` */
+        if (**stringPtr=='i') (*stringPtr)++;
+        if (**stringPtr=='B') (*stringPtr)++;
+    }
+    return result;
+}
+
+unsigned longCommandWArg(const char** stringPtr, const char* longCommand){
+    size_t const comSize = strlen(longCommand);
+    int const result = !strncmp(*stringPtr, longCommand, comSize);
+    if (result) *stringPtr += comSize;
+    return result;
+}
+

 /* ********************************************************
 *  File related operations
--- a/contrib/experimental_dict_builders/randomDictBuilder/io.h
+++ b/contrib/experimental_dict_builders/randomDictBuilder/io.h
@ -50,5 +50,11 @@ void freeSampleInfo(sampleInfo *info);
 void saveDict(const char* dictFileName, const void* buff, size_t buffSize);


+unsigned readU32FromChar(const char** stringPtr);

-size_t findMaxMem(unsigned long long requiredMem);
+/** longCommandWArg() :
+ *  check if *stringPtr is the same as longCommand.
+ *  If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand.
+ * @return 0 and doesn't modify *stringPtr otherwise.
+ */
+unsigned longCommandWArg(const char** stringPtr, const char* longCommand);
--- a/contrib/experimental_dict_builders/randomDictBuilder/main.c
+++ b/contrib/experimental_dict_builders/randomDictBuilder/main.c
@ -52,46 +52,6 @@ static const unsigned g_defaultMaxDictSize = 110 KB;



-/*-*************************************
-*  Commandline related functions
-***************************************/
-static unsigned readU32FromChar(const char** stringPtr){
-    const char errorMsg[] = "error: numeric value too large";
-    unsigned result = 0;
-    while ((**stringPtr >='0') && (**stringPtr <='9')) {
-        unsigned const max = (((unsigned)(-1)) / 10) - 1;
-        if (result > max) exit(1);
-        result *= 10, result += **stringPtr - '0', (*stringPtr)++ ;
-    }
-    if ((**stringPtr=='K') || (**stringPtr=='M')) {
-        unsigned const maxK = ((unsigned)(-1)) >> 10;
-        if (result > maxK) exit(1);
-        result <<= 10;
-        if (**stringPtr=='M') {
-            if (result > maxK) exit(1);
-            result <<= 10;
-        }
-        (*stringPtr)++;  /* skip `K` or `M` */
-        if (**stringPtr=='i') (*stringPtr)++;
-        if (**stringPtr=='B') (*stringPtr)++;
-    }
-    return result;
-}
-
-/** longCommandWArg() :
- *  check if *stringPtr is the same as longCommand.
- *  If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand.
- * @return 0 and doesn't modify *stringPtr otherwise.
- */
-static unsigned longCommandWArg(const char** stringPtr, const char* longCommand){
-    size_t const comSize = strlen(longCommand);
-    int const result = !strncmp(*stringPtr, longCommand, comSize);
-    if (result) *stringPtr += comSize;
-    return result;
-}
-
-
-
 /*-*************************************
 *  RANDOM
 ***************************************/
--- a/contrib/experimental_dict_builders/randomDictBuilder/random.c
+++ b/contrib/experimental_dict_builders/randomDictBuilder/random.c
--- a/contrib/experimental_dict_builders/randomDictBuilder/random.h
+++ b/contrib/experimental_dict_builders/randomDictBuilder/random.h
--- a/contrib/experimental_dict_builders/randomDictBuilder/test.sh
+++ b/contrib/experimental_dict_builders/randomDictBuilder/test.sh
@ -1,12 +1,12 @@
 echo "Building random dictionary with in=../../lib/common k=200 out=dict1"
-./main in=../../lib/common k=200 out=dict1
-zstd -be3 -D dict1 -r ../../lib/common -q
+./main in=../../../lib/common k=200 out=dict1
+zstd -be3 -D dict1 -r ../../../lib/common -q
 echo "Building random dictionary with in=../../lib/common k=500 out=dict2 dictID=100 maxdict=140000"
-./main in=../../lib/common k=500 out=dict2 dictID=100 maxdict=140000
-zstd -be3 -D dict2 -r ../../lib/common -q
+./main in=../../../lib/common k=500 out=dict2 dictID=100 maxdict=140000
+zstd -be3 -D dict2 -r ../../../lib/common -q
 echo "Building random dictionary with 2 sample sources"
-./main in=../../lib/common in=../../lib/compress out=dict3
-zstd -be3 -D dict3 -r ../../lib/common -q
+./main in=../../../lib/common in=../../../lib/compress out=dict3
+zstd -be3 -D dict3 -r ../../../lib/common -q
 echo "Removing dict1 dict2 dict3"
 rm -f dict1 dict2 dict3