diff --git a/tests/README.md b/tests/README.md index 442285db..c1128571 100644 --- a/tests/README.md +++ b/tests/README.md @@ -95,7 +95,6 @@ Full list of arguments ``` -T# : set level 1 speed objective -B# : cut input into blocks of size # (default : single block) - -i# : iteration loops -S : benchmarks a single run (example command: -Sl3w10h12) w# - windowLog h# - hashLog @@ -119,7 +118,7 @@ Full list of arguments speedRatio= (accepts decimals) : determines value of gains in speed vs gains in ratio when determining overall winner (default 5 (1% ratio = 5% speed)). - tries= : Maximum number of random restarts on a single strategy before switching (Default 3) + tries= : Maximum number of random restarts on a single strategy before switching (Default 5) Higher values will make optimizer run longer, more chances to find better solution. memLog : Limits the log of the size of each memotable (1 per strategy). Setting memLog = 0 turns off memoization --display= : specifiy which parameters are included in the output diff --git a/tests/paramgrill.c b/tests/paramgrill.c index 0d814bf7..e94eead0 100644 --- a/tests/paramgrill.c +++ b/tests/paramgrill.c @@ -37,9 +37,6 @@ #define WELCOME_MESSAGE "*** %s %s %i-bits, by %s ***\n", PROGRAM_DESCRIPTION, ZSTD_VERSION_STRING, (int)(sizeof(void*)*8), AUTHOR #define TIMELOOP_NANOSEC (1*1000000000ULL) /* 1 second */ - -#define NBLOOPS 2 -#define TIMELOOP (2 * SEC_TO_MICRO) #define NB_LEVELS_TRACKED 22 /* ensured being >= ZSTD_maxCLevel() in BMK_init_level_constraints() */ static const size_t maxMemory = (sizeof(size_t)==4) ? (2 GB - 64 MB) : (size_t)(1ULL << ((sizeof(size_t)*8)-31)); @@ -54,12 +51,12 @@ static const int g_maxNbVariations = 64; **************************************/ #define DISPLAY(...) fprintf(stderr, __VA_ARGS__) #define DISPLAYLEVEL(n, ...) if(g_displayLevel >= n) { fprintf(stderr, __VA_ARGS__); } +#define DEBUGOUTPUT(...) { if (DEBUG) DISPLAY(__VA_ARGS__); } #define TIMED 0 #ifndef DEBUG # define DEBUG 0 #endif -#define DEBUGOUTPUT(...) { if (DEBUG) DISPLAY(__VA_ARGS__); } #undef MIN #undef MAX @@ -68,8 +65,6 @@ static const int g_maxNbVariations = 64; #define CUSTOM_LEVEL 99 #define BASE_CLEVEL 1 -#undef ZSTD_WINDOWLOG_MAX -#define ZSTD_WINDOWLOG_MAX 27 //no long range stuff for now. #define FADT_MIN 0 #define FADT_MAX ((U32)-1) @@ -223,21 +218,30 @@ static void displayParamVal(FILE* f, varInds_t param, U32 value, int width) { typedef BYTE U8; +/* General Utility */ static U32 g_timeLimit_s = 99999; /* about 27 hours */ -static U32 g_nbIterations = NBLOOPS; -static double g_compressibility = COMPRESSIBILITY_DEFAULT; +static UTIL_time_t g_time; /* to be used to compare solution finding speeds to compare to original */ static U32 g_blockSize = 0; static U32 g_rand = 1; + +/* Display */ +static int g_displayLevel = 3; +static BYTE g_silenceParams[NUM_PARAMS]; + +/* Mode Selection */ static U32 g_singleRun = 0; static U32 g_optimizer = 0; +static int g_optmode = 0; + +/* For cLevel Table generation */ static U32 g_target = 0; static U32 g_noSeed = 0; -static paramValues_t g_params; /* Initialized at the beginning of main w/ emptyParams() function */ -static UTIL_time_t g_time; /* to be used to compare solution finding speeds to compare to original */ -static U32 g_memoTableLog = PARAM_UNSET; -static int g_displayLevel = 3; -static BYTE g_silenceParams[NUM_PARAMS]; +/* For optimizer */ +static paramValues_t g_params; /* Initialized at the beginning of main w/ emptyParams() function */ +static double g_ratioMultiplier = 5.; +static U32 g_strictness = PARAM_UNSET; /* range 1 - 100, measure of how strict */ +static BMK_result_t g_lvltarget; typedef enum { directMap, @@ -258,11 +262,6 @@ typedef struct { paramValues_t params; } winnerInfo_t; -typedef struct { - BMK_result_t result; - ZSTD_compressionParameters params; -} oldWinnerInfo_t; - typedef struct { U32 cSpeed; /* bytes / sec */ U32 dSpeed; @@ -276,33 +275,33 @@ struct winner_ll_node { }; static winner_ll_node* g_winners; /* linked list sorted ascending by cSize & cSpeed */ -static BMK_result_t g_lvltarget; -static int g_optmode = 0; - -static double g_ratioMultiplier = 5.; - -/* g_mode? */ - -/* range 0 - 99, measure of how strict */ -static U32 g_strictness = PARAM_UNSET; - -void BMK_SetNbIterations(int nbLoops) -{ - g_nbIterations = nbLoops; - DISPLAY("- %u iterations -\n", g_nbIterations); -} /* * Additional Global Variables (Defined Above Use) * g_level_constraint * g_alreadyTested * g_maxTries + * g_clockGranularity */ /*-******************************************************* -* Private functions +* General Util Functions *********************************************************/ +/* nullified useless params, to ensure count stats */ +/* cleans up params for memoizing / display */ +static paramValues_t sanitizeParams(paramValues_t params) +{ + if (params.vals[strt_ind] == ZSTD_fast) + params.vals[clog_ind] = 0, params.vals[slog_ind] = 0; + if (params.vals[strt_ind] == ZSTD_dfast) + params.vals[slog_ind] = 0; + if (params.vals[strt_ind] != ZSTD_btopt && params.vals[strt_ind] != ZSTD_btultra && params.vals[strt_ind] != ZSTD_fast) + params.vals[tlen_ind] = 0; + + return params; +} + static ZSTD_compressionParameters pvalsToCParams(paramValues_t p) { ZSTD_compressionParameters c; memset(&c, 0, sizeof(ZSTD_compressionParameters)); @@ -340,7 +339,7 @@ static paramValues_t adjustParams(paramValues_t p, const size_t maxBlockSize, co paramValues_t ot = p; varInds_t i; p = cParamsToPVals(ZSTD_adjustCParams(pvalsToCParams(p), maxBlockSize, dictSize)); - + if(!dictSize) { p.vals[fadt_ind] = 0; } /* retain value of all other parameters */ for(i = strt_ind + 1; i < NUM_PARAMS; i++) { p.vals[i] = ot.vals[i]; @@ -348,9 +347,6 @@ static paramValues_t adjustParams(paramValues_t p, const size_t maxBlockSize, co return p; } -/* accuracy in seconds only, span can be multiple years */ -static U32 BMK_timeSpan(const UTIL_time_t tStart) { return (U32)(UTIL_clockSpanMicro(tStart) / 1000000ULL); } - static size_t BMK_findMaxMem(U64 requiredMem) { size_t const step = 64 MB; @@ -369,6 +365,8 @@ static size_t BMK_findMaxMem(U64 requiredMem) return (size_t) requiredMem; } +/* accuracy in seconds only, span can be multiple years */ +static U32 BMK_timeSpan(const UTIL_time_t tStart) { return (U32)(UTIL_clockSpanMicro(tStart) / 1000000ULL); } static U32 FUZ_rotl32(U32 x, U32 r) { @@ -387,42 +385,6 @@ U32 FUZ_rand(U32* src) return rand32 >> 5; } -/** longCommandWArg() : - * check if *stringPtr is the same as longCommand. - * If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand. - * @return 0 and doesn't modify *stringPtr otherwise. - * from zstdcli.c - */ -static unsigned longCommandWArg(const char** stringPtr, const char* longCommand) -{ - size_t const comSize = strlen(longCommand); - int const result = !strncmp(*stringPtr, longCommand, comSize); - if (result) *stringPtr += comSize; - return result; -} - -static U64 g_clockGranularity = 100000000ULL; - -static void findClockGranularity(void) { - UTIL_time_t clockStart = UTIL_getTime(); - U64 el1 = 0, el2 = 0; - int i = 0; - do { - el1 = el2; - el2 = UTIL_clockSpanNano(clockStart); - if(el1 < el2) { - U64 iv = el2 - el1; - if(g_clockGranularity > iv) { - g_clockGranularity = iv; - i = 0; - } else { - i++; - } - } - } while(i < 10); - DEBUGOUTPUT("Granularity: %llu\n", (unsigned long long)g_clockGranularity); -} - /* allows zeros */ #define CLAMPCHECK(val,min,max) { \ if (((val)<(min)) | ((val)>(max))) { \ @@ -448,38 +410,92 @@ static paramValues_t cParamUnsetMin(paramValues_t paramTarget) { return paramTarget; } -static void BMK_translateAdvancedParams(FILE* f, const paramValues_t params) { - varInds_t v; - int first = 1; - fprintf(f,"--zstd="); - for(v = 0; v < NUM_PARAMS; v++) { - if(g_silenceParams[v]) { continue; } - if(!first) { fprintf(f, ","); } - fprintf(f,"%s=", g_paramNames[v]); - - if(v == strt_ind) { fprintf(f,"%u", params.vals[v]); } - else { displayParamVal(f, v, params.vals[v], 0); } - first = 0; +static paramValues_t emptyParams(void) { + U32 i; + paramValues_t p; + for(i = 0; i < NUM_PARAMS; i++) { + p.vals[i] = PARAM_UNSET; } - fprintf(f, "\n"); + return p; } -static void BMK_displayOneResult(FILE* f, winnerInfo_t res, const size_t srcSize) { - varInds_t v; - int first = 1; - res.params = cParamUnsetMin(res.params); - fprintf(f," {"); - for(v = 0; v < NUM_PARAMS; v++) { - if(g_silenceParams[v]) { continue; } - if(!first) { fprintf(f, ","); } - displayParamVal(f, v, res.params.vals[v], 3); - first = 0; +static winnerInfo_t initWinnerInfo(const paramValues_t p) { + winnerInfo_t w1; + w1.result.cSpeed = 0.; + w1.result.dSpeed = 0.; + w1.result.cMem = (size_t)-1; + w1.result.cSize = (size_t)-1; + w1.params = p; + return w1; +} + +static paramValues_t overwriteParams(paramValues_t base, const paramValues_t mask) { + U32 i; + for(i = 0; i < NUM_PARAMS; i++) { + if(mask.vals[i] != PARAM_UNSET) { + base.vals[i] = mask.vals[i]; + } + } + return base; +} + +static void paramVaryOnce(const varInds_t paramIndex, const int amt, paramValues_t* ptr) { + ptr->vals[paramIndex] = rangeMap(paramIndex, invRangeMap(paramIndex, ptr->vals[paramIndex]) + amt); +} + +/* varies ptr by nbChanges respecting varyParams*/ +static void paramVariation(paramValues_t* ptr, memoTable_t* mtAll, const U32 nbChanges) +{ + paramValues_t p; + U32 validated = 0; + while (!validated) { + U32 i; + p = *ptr; + for (i = 0 ; i < nbChanges ; i++) { + const U32 changeID = (U32)FUZ_rand(&g_rand) % (mtAll[p.vals[strt_ind]].varLen << 1); + paramVaryOnce(mtAll[p.vals[strt_ind]].varArray[changeID >> 1], ((changeID & 1) << 1) - 1, &p); + } + validated = paramValid(p); + } + *ptr = p; +} + +/* Completely random parameter selection */ +static paramValues_t randomParams(void) +{ + varInds_t v; paramValues_t p; + for(v = 0; v <= NUM_PARAMS; v++) { + p.vals[v] = rangeMap(v, FUZ_rand(&g_rand) % rangetable[v]); + } + return p; +} + +static U64 g_clockGranularity = 100000000ULL; + +static void findClockGranularity(void) { + UTIL_time_t clockStart = UTIL_getTime(); + U64 el1 = 0, el2 = 0; + int i = 0; + do { + el1 = el2; + el2 = UTIL_clockSpanNano(clockStart); + if(el1 < el2) { + U64 iv = el2 - el1; + if(g_clockGranularity > iv) { + g_clockGranularity = iv; + i = 0; + } else { + i++; } - - fprintf(f, " }, /* R:%5.3f at %5.1f MB/s - %5.1f MB/s */\n", - (double)srcSize / res.result.cSize, (double)res.result.cSpeed / (1 MB), (double)res.result.dSpeed / (1 MB)); + } + } while(i < 10); + DEBUGOUTPUT("Granularity: %llu\n", (unsigned long long)g_clockGranularity); } +/*-************************************ +* Optimizer Util Functions +**************************************/ + /* checks results are feasible */ static int feasible(const BMK_result_t results, const constraint_t target) { return (results.cSpeed >= target.cSpeed) && (results.dSpeed >= target.dSpeed) && (results.cMem <= target.cMem) && (!g_optmode || results.cSize <= g_lvltarget.cSize); @@ -536,52 +552,322 @@ static constraint_t relaxTarget(constraint_t target) { return target; } -/*-******************************************************* -* Bench functions -*********************************************************/ - -static paramValues_t emptyParams(void) { - U32 i; - paramValues_t p; - for(i = 0; i < NUM_PARAMS; i++) { - p.vals[i] = PARAM_UNSET; +static void optimizerAdjustInput(paramValues_t* pc, const size_t maxBlockSize) { + varInds_t v; + for(v = 0; v < NUM_PARAMS; v++) { + if(pc->vals[v] != PARAM_UNSET) { + U32 newval = MIN(MAX(pc->vals[v], mintable[v]), maxtable[v]); + if(newval != pc->vals[v]) { + pc->vals[v] = newval; + DISPLAY("Warning: parameter %s not in valid range, adjusting to ", g_paramNames[v]); displayParamVal(stderr, v, newval, 0); DISPLAY("\n"); + } + } + } + + if(pc->vals[wlog_ind] != PARAM_UNSET) { + + U32 sshb = maxBlockSize > 1 ? ZSTD_highbit32((U32)(maxBlockSize-1)) + 1 : 1; + /* edge case of highBit not working for 0 */ + + if(maxBlockSize < (1ULL << 31) && sshb + 1 < pc->vals[wlog_ind]) { + U32 adjust = MAX(mintable[wlog_ind], sshb); + if(adjust != pc->vals[wlog_ind]) { + pc->vals[wlog_ind] = adjust; + DISPLAY("Warning: windowLog larger than src/block size, adjusted to %u\n", pc->vals[wlog_ind]); + } + } + } + + if(pc->vals[wlog_ind] != PARAM_UNSET && pc->vals[clog_ind] != PARAM_UNSET) { + U32 maxclog; + if(pc->vals[strt_ind] == PARAM_UNSET || pc->vals[strt_ind] >= (U32)ZSTD_btlazy2) { + maxclog = pc->vals[wlog_ind] + 1; + } else { + maxclog = pc->vals[wlog_ind]; + } + + if(pc->vals[clog_ind] > maxclog) { + pc->vals[clog_ind] = maxclog; + DISPLAY("Warning: chainlog too much larger than windowLog size, adjusted to %u\n", pc->vals[clog_ind]); + } + } + + if(pc->vals[wlog_ind] != PARAM_UNSET && pc->vals[hlog_ind] != PARAM_UNSET) { + if(pc->vals[wlog_ind] + 1 < pc->vals[hlog_ind]) { + pc->vals[hlog_ind] = pc->vals[wlog_ind] + 1; + DISPLAY("Warning: hashlog too much larger than windowLog size, adjusted to %u\n", pc->vals[hlog_ind]); + } + } + + if(pc->vals[slog_ind] != PARAM_UNSET && pc->vals[clog_ind] != PARAM_UNSET) { + if(pc->vals[slog_ind] > pc->vals[clog_ind]) { + pc->vals[clog_ind] = pc->vals[slog_ind]; + DISPLAY("Warning: searchLog larger than chainLog, adjusted to %u\n", pc->vals[slog_ind]); + } } - return p; } -static winnerInfo_t initWinnerInfo(const paramValues_t p) { - winnerInfo_t w1; - w1.result.cSpeed = 0.; - w1.result.dSpeed = 0.; - w1.result.cMem = (size_t)-1; - w1.result.cSize = (size_t)-1; - w1.params = p; - return w1; +static int redundantParams(const paramValues_t paramValues, const constraint_t target, const size_t maxBlockSize) { + return + (ZSTD_estimateCStreamSize_usingCParams(pvalsToCParams(paramValues)) > (size_t)target.cMem) /* Uses too much memory */ + || ((1ULL << (paramValues.vals[wlog_ind] - 1)) >= maxBlockSize && paramValues.vals[wlog_ind] != mintable[wlog_ind]) /* wlog too much bigger than src size */ + || (paramValues.vals[clog_ind] > (paramValues.vals[wlog_ind] + (paramValues.vals[strt_ind] > ZSTD_btlazy2))) /* chainLog larger than windowLog*/ + || (paramValues.vals[slog_ind] > paramValues.vals[clog_ind]) /* searchLog larger than chainLog */ + || (paramValues.vals[hlog_ind] > paramValues.vals[wlog_ind] + 1); /* hashLog larger than windowLog + 1 */ + } -typedef struct { - void* srcBuffer; - size_t srcSize; - const void** srcPtrs; - size_t* srcSizes; - void** dstPtrs; - size_t* dstCapacities; - size_t* dstSizes; - void** resPtrs; - size_t* resSizes; - size_t nbBlocks; - size_t maxBlockSize; -} buffers_t; +/*-************************************ +* Display Functions +**************************************/ + +static void BMK_translateAdvancedParams(FILE* f, const paramValues_t params) { + varInds_t v; + int first = 1; + fprintf(f,"--zstd="); + for(v = 0; v < NUM_PARAMS; v++) { + if(g_silenceParams[v]) { continue; } + if(!first) { fprintf(f, ","); } + fprintf(f,"%s=", g_paramNames[v]); + + if(v == strt_ind) { fprintf(f,"%u", params.vals[v]); } + else { displayParamVal(f, v, params.vals[v], 0); } + first = 0; + } + fprintf(f, "\n"); +} + +static void BMK_displayOneResult(FILE* f, winnerInfo_t res, const size_t srcSize) { + varInds_t v; + int first = 1; + res.params = cParamUnsetMin(res.params); + fprintf(f," {"); + for(v = 0; v < NUM_PARAMS; v++) { + if(g_silenceParams[v]) { continue; } + if(!first) { fprintf(f, ","); } + displayParamVal(f, v, res.params.vals[v], 3); + first = 0; + } + + fprintf(f, " }, /* R:%5.3f at %5.1f MB/s - %5.1f MB/s */\n", + (double)srcSize / res.result.cSize, (double)res.result.cSpeed / (1 MB), (double)res.result.dSpeed / (1 MB)); +} + +/* Writes to f the results of a parameter benchmark */ +/* when used with --optimize, will only print results better than previously discovered */ +static void BMK_printWinner(FILE* f, const U32 cLevel, const BMK_result_t result, const paramValues_t params, const size_t srcSize) +{ + char lvlstr[15] = "Custom Level"; + winnerInfo_t w; + w.params = params; + w.result = result; + + fprintf(f, "\r%79s\r", ""); + + if(cLevel != CUSTOM_LEVEL) { + snprintf(lvlstr, 15, " Level %2u ", cLevel); + } + + if(TIMED) { + const U64 time = UTIL_clockSpanNano(g_time); + const U64 minutes = time / (60ULL * TIMELOOP_NANOSEC); + fprintf(f, "%1lu:%2lu:%05.2f - ", (unsigned long) minutes / 60,(unsigned long) minutes % 60, (double)(time - minutes * TIMELOOP_NANOSEC * 60ULL)/TIMELOOP_NANOSEC); + } + + fprintf(f, "/* %s */ ", lvlstr); + BMK_displayOneResult(f, w, srcSize); +} + +/* comparison function: */ +/* strictly better, strictly worse, equal, speed-side adv, size-side adv */ +#define WORSE_RESULT 0 +#define BETTER_RESULT 1 +#define ERROR_RESULT 2 + +#define SPEED_RESULT 4 +#define SIZE_RESULT 5 +/* maybe have epsilon-eq to limit table size? */ +static int speedSizeCompare(const BMK_result_t r1, const BMK_result_t r2) { + if(r1.cSpeed < r2.cSpeed) { + if(r1.cSize >= r2.cSize) { + return BETTER_RESULT; + } + return SPEED_RESULT; /* r2 is smaller but not faster. */ + } else { + if(r1.cSize <= r2.cSize) { + return WORSE_RESULT; + } + return SIZE_RESULT; /* r2 is faster but not smaller */ + } +} + +/* 0 for insertion, 1 for no insert */ +/* maintain invariant speedSizeCompare(n, n->next) = SPEED_RESULT */ +static int insertWinner(const winnerInfo_t w, const constraint_t targetConstraints) { + BMK_result_t r = w.result; + winner_ll_node* cur_node = g_winners; + /* first node to insert */ + if(!feasible(r, targetConstraints)) { + return 1; + } + + if(g_winners == NULL) { + winner_ll_node* first_node = malloc(sizeof(winner_ll_node)); + if(first_node == NULL) { + return 1; + } + first_node->next = NULL; + first_node->res = w; + g_winners = first_node; + return 0; + } + + while(cur_node->next != NULL) { + switch(speedSizeCompare(cur_node->res.result, r)) { + case WORSE_RESULT: + { + return 1; /* never insert if better */ + } + case BETTER_RESULT: + { + winner_ll_node* tmp; + cur_node->res = cur_node->next->res; + tmp = cur_node->next; + cur_node->next = cur_node->next->next; + free(tmp); + break; + } + case SIZE_RESULT: + { + cur_node = cur_node->next; + break; + } + case SPEED_RESULT: /* insert after first size result, then return */ + { + winner_ll_node* newnode = malloc(sizeof(winner_ll_node)); + if(newnode == NULL) { + return 1; + } + newnode->res = cur_node->res; + cur_node->res = w; + newnode->next = cur_node->next; + cur_node->next = newnode; + return 0; + } + } + + } + + assert(cur_node->next == NULL); + switch(speedSizeCompare(cur_node->res.result, r)) { + case WORSE_RESULT: + { + return 1; /* never insert if better */ + } + case BETTER_RESULT: + { + cur_node->res = w; + return 0; + } + case SIZE_RESULT: + { + winner_ll_node* newnode = malloc(sizeof(winner_ll_node)); + if(newnode == NULL) { + return 1; + } + newnode->res = w; + newnode->next = NULL; + cur_node->next = newnode; + return 0; + } + case SPEED_RESULT: /* insert before first size result, then return */ + { + winner_ll_node* newnode = malloc(sizeof(winner_ll_node)); + if(newnode == NULL) { + return 1; + } + newnode->res = cur_node->res; + cur_node->res = w; + newnode->next = cur_node->next; + cur_node->next = newnode; + return 0; + } + default: + return 1; + } +} + +static void BMK_printWinnerOpt(FILE* f, const U32 cLevel, const BMK_result_t result, const paramValues_t params, const constraint_t targetConstraints, const size_t srcSize) +{ + /* global winner used for constraints */ + /* cSize, cSpeed, dSpeed, cMem */ + static winnerInfo_t g_winner = { { (size_t)-1LL, 0, 0, (size_t)-1LL }, { { PARAM_UNSET, PARAM_UNSET, PARAM_UNSET, PARAM_UNSET, PARAM_UNSET, PARAM_UNSET, PARAM_UNSET, PARAM_UNSET } } }; + if(DEBUG || compareResultLT(g_winner.result, result, targetConstraints, srcSize) || g_displayLevel >= 4) { + if(DEBUG && compareResultLT(g_winner.result, result, targetConstraints, srcSize)) { + DISPLAY("New Winner: \n"); + } + + if(g_displayLevel >= 2) { BMK_printWinner(f, cLevel, result, params, srcSize); } + + if(compareResultLT(g_winner.result, result, targetConstraints, srcSize)) { + if(g_displayLevel >= 1) { BMK_translateAdvancedParams(f, params); } + g_winner.result = result; + g_winner.params = params; + } + } + + if(g_optmode && g_optimizer && (DEBUG || g_displayLevel == 3)) { + winnerInfo_t w; + winner_ll_node* n; + w.result = result; + w.params = params; + insertWinner(w, targetConstraints); + + if(!DEBUG) { fprintf(f, "\033c"); } + fprintf(f, "\n"); + + /* the table */ + fprintf(f, "================================\n"); + for(n = g_winners; n != NULL; n = n->next) { + BMK_displayOneResult(f, n->res, srcSize); + } + fprintf(f, "================================\n"); + fprintf(f, "Level Bounds: R: > %.3f AND C: < %.1f MB/s \n\n", + (double)srcSize / g_lvltarget.cSize, (double)g_lvltarget.cSpeed / (1 MB)); + + + fprintf(f, "Overall Winner: \n"); + BMK_displayOneResult(f, g_winner, srcSize); + BMK_translateAdvancedParams(f, g_winner.params); + + fprintf(f, "Latest BMK: \n");\ + BMK_displayOneResult(f, w, srcSize); + } +} + +static void BMK_printWinners2(FILE* f, const winnerInfo_t* winners, const size_t srcSize) +{ + int cLevel; + + fprintf(f, "\n /* Proposed configurations : */ \n"); + fprintf(f, " /* W, C, H, S, L, T, strat */ \n"); + + for (cLevel=0; cLevel <= NB_LEVELS_TRACKED; cLevel++) + BMK_printWinner(f, cLevel, winners[cLevel].result, winners[cLevel].params, srcSize); +} + + +static void BMK_printWinners(FILE* f, const winnerInfo_t* winners, const size_t srcSize) +{ + fseek(f, 0, SEEK_SET); + BMK_printWinners2(f, winners, srcSize); + fflush(f); + BMK_printWinners2(stdout, winners, srcSize); +} -typedef struct { - size_t dictSize; - void* dictBuffer; - ZSTD_CCtx* cctx; - ZSTD_DCtx* dctx; -} contexts_t; /*-******************************************************* -* From bench.c +* Functions to Benchmark *********************************************************/ typedef struct { @@ -678,75 +964,30 @@ static size_t local_defaultDecompress( } -/*-******************************************************* -* From bench.c End -*********************************************************/ +/*-************************************ +* Data Initialization Functions +**************************************/ -static void optimizerAdjustInput(paramValues_t* pc, const size_t maxBlockSize) { - varInds_t v; - for(v = 0; v < NUM_PARAMS; v++) { - if(pc->vals[v] != PARAM_UNSET) { - U32 newval = MIN(MAX(pc->vals[v], mintable[v]), maxtable[v]); - if(newval != pc->vals[v]) { - pc->vals[v] = newval; - DISPLAY("Warning: parameter %s not in valid range, adjusting to ", g_paramNames[v]); displayParamVal(stderr, v, newval, 0); DISPLAY("\n"); - } - } - } +typedef struct { + void* srcBuffer; + size_t srcSize; + const void** srcPtrs; + size_t* srcSizes; + void** dstPtrs; + size_t* dstCapacities; + size_t* dstSizes; + void** resPtrs; + size_t* resSizes; + size_t nbBlocks; + size_t maxBlockSize; +} buffers_t; - if(pc->vals[wlog_ind] != PARAM_UNSET) { - - U32 sshb = maxBlockSize > 1 ? ZSTD_highbit32((U32)(maxBlockSize-1)) + 1 : 1; - /* edge case of highBit not working for 0 */ - - if(maxBlockSize < (1ULL << 31) && sshb + 1 < pc->vals[wlog_ind]) { - U32 adjust = MAX(mintable[wlog_ind], sshb); - if(adjust != pc->vals[wlog_ind]) { - pc->vals[wlog_ind] = adjust; - DISPLAY("Warning: windowLog larger than src/block size, adjusted to %u\n", pc->vals[wlog_ind]); - } - } - } - - if(pc->vals[wlog_ind] != PARAM_UNSET && pc->vals[clog_ind] != PARAM_UNSET) { - U32 maxclog; - if(pc->vals[strt_ind] == PARAM_UNSET || pc->vals[strt_ind] >= (U32)ZSTD_btlazy2) { - maxclog = pc->vals[wlog_ind] + 1; - } else { - maxclog = pc->vals[wlog_ind]; - } - - if(pc->vals[clog_ind] > maxclog) { - pc->vals[clog_ind] = maxclog; - DISPLAY("Warning: chainlog too much larger than windowLog size, adjusted to %u\n", pc->vals[clog_ind]); - } - } - - if(pc->vals[wlog_ind] != PARAM_UNSET && pc->vals[hlog_ind] != PARAM_UNSET) { - if(pc->vals[wlog_ind] + 1 < pc->vals[hlog_ind]) { - pc->vals[hlog_ind] = pc->vals[wlog_ind] + 1; - DISPLAY("Warning: hashlog too much larger than windowLog size, adjusted to %u\n", pc->vals[hlog_ind]); - } - } - - if(pc->vals[slog_ind] != PARAM_UNSET && pc->vals[clog_ind] != PARAM_UNSET) { - if(pc->vals[slog_ind] > pc->vals[clog_ind]) { - pc->vals[clog_ind] = pc->vals[slog_ind]; - DISPLAY("Warning: searchLog larger than chainLog, adjusted to %u\n", pc->vals[slog_ind]); - } - } -} - -/* what about low something like clog vs hlog in lvl 1? */ -static int redundantParams(const paramValues_t paramValues, const constraint_t target, const size_t maxBlockSize) { - return - (ZSTD_estimateCStreamSize_usingCParams(pvalsToCParams(paramValues)) > (size_t)target.cMem) /* Uses too much memory */ - || ((1ULL << (paramValues.vals[wlog_ind] - 1)) >= maxBlockSize && paramValues.vals[wlog_ind] != mintable[wlog_ind]) /* wlog too much bigger than src size */ - || (paramValues.vals[clog_ind] > (paramValues.vals[wlog_ind] + (paramValues.vals[strt_ind] > ZSTD_btlazy2))) /* chainLog larger than windowLog*/ - || (paramValues.vals[slog_ind] > paramValues.vals[clog_ind]) /* searchLog larger than chainLog */ - || (paramValues.vals[hlog_ind] > paramValues.vals[wlog_ind] + 1); /* hashLog larger than windowLog + 1 */ - -} +typedef struct { + size_t dictSize; + void* dictBuffer; + ZSTD_CCtx* cctx; + ZSTD_DCtx* dctx; +} contexts_t; static void freeNonSrcBuffers(const buffers_t b) { free(b.srcPtrs); @@ -965,12 +1206,181 @@ static int createContexts(contexts_t* ctx, const char* dictFileName) { return 0; } +/*-************************************ +* Optimizer Memoization Functions +**************************************/ + +/* return: new length */ +/* keep old array, will need if iter over strategy. */ +/* prunes useless params */ +static size_t sanitizeVarArray(varInds_t* varNew, const size_t varLength, const varInds_t* varArray, const ZSTD_strategy strat) { + size_t i, j = 0; + for(i = 0; i < varLength; i++) { + if( !((varArray[i] == clog_ind && strat == ZSTD_fast) + || (varArray[i] == slog_ind && strat == ZSTD_fast) + || (varArray[i] == slog_ind && strat == ZSTD_dfast) + || (varArray[i] == tlen_ind && strat != ZSTD_btopt && strat != ZSTD_btultra && strat != ZSTD_fast))) { + varNew[j] = varArray[i]; + j++; + } + } + return j; +} + +/* res should be NUM_PARAMS size */ +/* constructs varArray from paramValues_t style parameter */ +/* pass in using dict. */ +static size_t variableParams(const paramValues_t paramConstraints, varInds_t* res, const int usingDictionary) { + varInds_t i; + size_t j = 0; + for(i = 0; i < NUM_PARAMS; i++) { + if(paramConstraints.vals[i] == PARAM_UNSET) { + if(i == fadt_ind && !usingDictionary) continue; /* don't use fadt if no dictionary */ + res[j] = i; j++; + } + } + return j; +} + +/* length of memo table given free variables */ +static size_t memoTableLen(const varInds_t* varyParams, const size_t varyLen) { + size_t arrayLen = 1; + size_t i; + for(i = 0; i < varyLen; i++) { + if(varyParams[i] == strt_ind) continue; /* strategy separated by table */ + arrayLen *= rangetable[varyParams[i]]; + } + return arrayLen; +} + +/* returns unique index in memotable of compression parameters */ +static unsigned memoTableIndDirect(const paramValues_t* ptr, const varInds_t* varyParams, const size_t varyLen) { + size_t i; + unsigned ind = 0; + for(i = 0; i < varyLen; i++) { + varInds_t v = varyParams[i]; + if(v == strt_ind) continue; /* exclude strategy from memotable */ + ind *= rangetable[v]; ind += (unsigned)invRangeMap(v, ptr->vals[v]); + } + return ind; +} + +static size_t memoTableGet(const memoTable_t* memoTableArray, const paramValues_t p) { + const memoTable_t mt = memoTableArray[p.vals[strt_ind]]; + switch(mt.tableType) { + case directMap: + return mt.table[memoTableIndDirect(&p, mt.varArray, mt.varLen)]; + case xxhashMap: + return mt.table[(XXH64(&p.vals, sizeof(U32) * NUM_PARAMS, 0) >> 3) % mt.tableLen]; + case noMemo: + return 0; + } + return 0; /* should never happen, stop compiler warnings */ +} + +static void memoTableSet(const memoTable_t* memoTableArray, const paramValues_t p, const BYTE value) { + const memoTable_t mt = memoTableArray[p.vals[strt_ind]]; + switch(mt.tableType) { + case directMap: + mt.table[memoTableIndDirect(&p, mt.varArray, mt.varLen)] = value; break; + case xxhashMap: + mt.table[(XXH64(&p.vals, sizeof(U32) * NUM_PARAMS, 0) >> 3) % mt.tableLen] = value; break; + case noMemo: + break; + } +} + +/* frees all allocated memotables */ +static void freeMemoTableArray(memoTable_t* const mtAll) { + int i; + if(mtAll == NULL) { return; } + for(i = 1; i <= (int)ZSTD_btultra; i++) { + free(mtAll[i].table); + } + free(mtAll); +} + +/* inits memotables for all (including mallocs), all strategies */ +/* takes unsanitized varyParams */ +static memoTable_t* createMemoTableArray(const paramValues_t p, const varInds_t* const varyParams, const size_t varyLen, const U32 memoTableLog) { + memoTable_t* mtAll = (memoTable_t*)calloc(sizeof(memoTable_t),(ZSTD_btultra + 1)); + ZSTD_strategy i, stratMin = ZSTD_fast, stratMax = ZSTD_btultra; + + if(mtAll == NULL) { + return NULL; + } + + for(i = 1; i <= (int)ZSTD_btultra; i++) { + mtAll[i].varLen = sanitizeVarArray(mtAll[i].varArray, varyLen, varyParams, i); + } + + /* no memoization */ + if(memoTableLog == 0) { + for(i = 1; i <= (int)ZSTD_btultra; i++) { + mtAll[i].tableType = noMemo; + mtAll[i].table = NULL; + mtAll[i].tableLen = 0; + } + return mtAll; + } + + + if(p.vals[strt_ind] != PARAM_UNSET) { + stratMin = p.vals[strt_ind]; + stratMax = p.vals[strt_ind]; + } + + + for(i = stratMin; i <= stratMax; i++) { + size_t mtl = memoTableLen(mtAll[i].varArray, mtAll[i].varLen); + mtAll[i].tableType = directMap; + + if(memoTableLog != PARAM_UNSET && mtl > (1ULL << memoTableLog)) { /* use hash table */ /* provide some option to only use hash tables? */ + mtAll[i].tableType = xxhashMap; + mtl = (1ULL << memoTableLog); + } + + mtAll[i].table = (BYTE*)calloc(sizeof(BYTE), mtl); + mtAll[i].tableLen = mtl; + + if(mtAll[i].table == NULL) { + freeMemoTableArray(mtAll); + return NULL; + } + } + + return mtAll; +} + +/* Sets pc to random unmeasured set of parameters */ +/* specifiy strategy */ +static void randomConstrainedParams(paramValues_t* pc, const memoTable_t* memoTableArray, const ZSTD_strategy st) +{ + size_t j; + const memoTable_t mt = memoTableArray[st]; + pc->vals[strt_ind] = st; + for(j = 0; j < mt.tableLen; j++) { + int i; + for(i = 0; i < NUM_PARAMS; i++) { + varInds_t v = mt.varArray[i]; + if(v == strt_ind) continue; + pc->vals[v] = rangeMap(v, FUZ_rand(&g_rand) % rangetable[v]); + } + + if(!(memoTableGet(memoTableArray, *pc))) break; /* only pick unpicked params. */ + } +} + +/*-************************************ +* Benchmarking Functions +**************************************/ + /* Replicate functionality of benchMemAdvanced, but with pre-split src / dst buffers */ /* The purpose is so that sufficient information is returned so that a decompression call to benchMemInvertible is possible */ /* BMK_benchMemAdvanced(srcBuffer,srcSize, dstBuffer, dstSize, fileSizes, nbFiles, 0, &cParams, dictBuffer, dictSize, ctx, dctx, 0, "File", &adv); */ /* nbSeconds used in same way as in BMK_advancedParams_t, as nbIters when in iterMode */ -/* if in decodeOnly, then srcPtr's will be compressed blocks, and uncompressedBlocks will be written to dstPtrs? */ +/* if in decodeOnly, then srcPtr's will be compressed blocks, and uncompressedBlocks will be written to dstPtrs */ /* dictionary nullable, nothing else though. */ static BMK_return_t BMK_benchMemInvertible(const buffers_t buf, const contexts_t ctx, const int cLevel, const paramValues_t* comprParams, @@ -1112,766 +1522,6 @@ static int BMK_benchParam(BMK_result_t* resultPtr, return res.error; } -/* comparison function: */ -/* strictly better, strictly worse, equal, speed-side adv, size-side adv */ -//Maybe use compress_only for benchmark first run? -#define WORSE_RESULT 0 -#define BETTER_RESULT 1 -#define ERROR_RESULT 2 - -#define SPEED_RESULT 4 -#define SIZE_RESULT 5 -/* maybe have epsilon-eq to limit table size? */ -static int speedSizeCompare(const BMK_result_t r1, const BMK_result_t r2) { - if(r1.cSpeed < r2.cSpeed) { - if(r1.cSize >= r2.cSize) { - return BETTER_RESULT; - } - return SPEED_RESULT; /* r2 is smaller but not faster. */ - } else { - if(r1.cSize <= r2.cSize) { - return WORSE_RESULT; - } - return SIZE_RESULT; /* r2 is faster but not smaller */ - } -} - -/* 0 for insertion, 1 for no insert */ -/* maintain invariant speedSizeCompare(n, n->next) = SPEED_RESULT */ -static int insertWinner(const winnerInfo_t w, const constraint_t targetConstraints) { - BMK_result_t r = w.result; - winner_ll_node* cur_node = g_winners; - /* first node to insert */ - if(!feasible(r, targetConstraints)) { - return 1; - } - - if(g_winners == NULL) { - winner_ll_node* first_node = malloc(sizeof(winner_ll_node)); - if(first_node == NULL) { - return 1; - } - first_node->next = NULL; - first_node->res = w; - g_winners = first_node; - return 0; - } - - while(cur_node->next != NULL) { - switch(speedSizeCompare(cur_node->res.result, r)) { - case WORSE_RESULT: - { - return 1; /* never insert if better */ - } - case BETTER_RESULT: - { - winner_ll_node* tmp; - cur_node->res = cur_node->next->res; - tmp = cur_node->next; - cur_node->next = cur_node->next->next; - free(tmp); - break; - } - case SIZE_RESULT: - { - cur_node = cur_node->next; - break; - } - case SPEED_RESULT: /* insert after first size result, then return */ - { - winner_ll_node* newnode = malloc(sizeof(winner_ll_node)); - if(newnode == NULL) { - return 1; - } - newnode->res = cur_node->res; - cur_node->res = w; - newnode->next = cur_node->next; - cur_node->next = newnode; - return 0; - } - } - - } - - assert(cur_node->next == NULL); - switch(speedSizeCompare(cur_node->res.result, r)) { - case WORSE_RESULT: - { - return 1; /* never insert if better */ - } - case BETTER_RESULT: - { - cur_node->res = w; - return 0; - } - case SIZE_RESULT: - { - winner_ll_node* newnode = malloc(sizeof(winner_ll_node)); - if(newnode == NULL) { - return 1; - } - newnode->res = w; - newnode->next = NULL; - cur_node->next = newnode; - return 0; - } - case SPEED_RESULT: /* insert before first size result, then return */ - { - winner_ll_node* newnode = malloc(sizeof(winner_ll_node)); - if(newnode == NULL) { - return 1; - } - newnode->res = cur_node->res; - cur_node->res = w; - newnode->next = cur_node->next; - cur_node->next = newnode; - return 0; - } - default: - return 1; - } -} - -/* Writes to f the results of a parameter benchmark */ -/* when used with --optimize, will only print results better than previously discovered */ -static void BMK_printWinner(FILE* f, const U32 cLevel, const BMK_result_t result, const paramValues_t params, const size_t srcSize) -{ - char lvlstr[15] = "Custom Level"; - winnerInfo_t w; - w.params = params; - w.result = result; - - fprintf(f, "\r%79s\r", ""); - - if(cLevel != CUSTOM_LEVEL) { - snprintf(lvlstr, 15, " Level %2u ", cLevel); - } - - if(TIMED) { - const U64 time = UTIL_clockSpanNano(g_time); - const U64 minutes = time / (60ULL * TIMELOOP_NANOSEC); - fprintf(f, "%1lu:%2lu:%05.2f - ", (unsigned long) minutes / 60,(unsigned long) minutes % 60, (double)(time - minutes * TIMELOOP_NANOSEC * 60ULL)/TIMELOOP_NANOSEC); - } - - fprintf(f, "/* %s */ ", lvlstr); - BMK_displayOneResult(f, w, srcSize); -} - -static void BMK_printWinnerOpt(FILE* f, const U32 cLevel, const BMK_result_t result, const paramValues_t params, const constraint_t targetConstraints, const size_t srcSize) -{ - /* global winner used for constraints */ - /* cSize, cSpeed, dSpeed, cMem */ - static winnerInfo_t g_winner = { { (size_t)-1LL, 0, 0, (size_t)-1LL }, { { PARAM_UNSET, PARAM_UNSET, PARAM_UNSET, PARAM_UNSET, PARAM_UNSET, PARAM_UNSET, PARAM_UNSET, PARAM_UNSET } } }; - if(DEBUG || compareResultLT(g_winner.result, result, targetConstraints, srcSize) || g_displayLevel >= 4) { - if(DEBUG && compareResultLT(g_winner.result, result, targetConstraints, srcSize)) { - DISPLAY("New Winner: \n"); - } - - if(g_displayLevel >= 2) { BMK_printWinner(f, cLevel, result, params, srcSize); } - - if(compareResultLT(g_winner.result, result, targetConstraints, srcSize)) { - if(g_displayLevel >= 1) { BMK_translateAdvancedParams(f, params); } - g_winner.result = result; - g_winner.params = params; - } - } - - //prints out tradeoff table if using lvloptimize - if(g_optmode && g_optimizer && (DEBUG || g_displayLevel == 3)) { - winnerInfo_t w; - winner_ll_node* n; - w.result = result; - w.params = params; - insertWinner(w, targetConstraints); - - if(!DEBUG) { fprintf(f, "\033c"); } - fprintf(f, "\n"); - - /* the table */ - fprintf(f, "================================\n"); - for(n = g_winners; n != NULL; n = n->next) { - BMK_displayOneResult(f, n->res, srcSize); - } - fprintf(f, "================================\n"); - fprintf(f, "Level Bounds: R: > %.3f AND C: < %.1f MB/s \n\n", - (double)srcSize / g_lvltarget.cSize, (double)g_lvltarget.cSpeed / (1 MB)); - - - fprintf(f, "Overall Winner: \n"); - BMK_displayOneResult(f, g_winner, srcSize); - BMK_translateAdvancedParams(f, g_winner.params); - - fprintf(f, "Latest BMK: \n");\ - BMK_displayOneResult(f, w, srcSize); - } -} - -static void BMK_printWinners2(FILE* f, const oldWinnerInfo_t* winners, const size_t srcSize) -{ - int cLevel; - - fprintf(f, "\n /* Proposed configurations : */ \n"); - fprintf(f, " /* W, C, H, S, L, T, strat */ \n"); - - for (cLevel=0; cLevel <= NB_LEVELS_TRACKED; cLevel++) - BMK_printWinner(f, cLevel, winners[cLevel].result, cParamsToPVals(winners[cLevel].params), srcSize); -} - - -static void BMK_printWinners(FILE* f, const oldWinnerInfo_t* winners, const size_t srcSize) -{ - fseek(f, 0, SEEK_SET); - BMK_printWinners2(f, winners, srcSize); - fflush(f); - BMK_printWinners2(stdout, winners, srcSize); -} - - -typedef struct { - U64 cSpeed_min; - U64 dSpeed_min; - U32 windowLog_max; - ZSTD_strategy strategy_max; -} level_constraints_t; - -static level_constraints_t g_level_constraint[NB_LEVELS_TRACKED+1]; - -static void BMK_init_level_constraints(int bytePerSec_level1) -{ - assert(NB_LEVELS_TRACKED >= ZSTD_maxCLevel()); - memset(g_level_constraint, 0, sizeof(g_level_constraint)); - g_level_constraint[1].cSpeed_min = bytePerSec_level1; - g_level_constraint[1].dSpeed_min = 0.; - g_level_constraint[1].windowLog_max = 19; - g_level_constraint[1].strategy_max = ZSTD_fast; - - /* establish speed objectives (relative to level 1) */ - { int l; - for (l=2; l<=NB_LEVELS_TRACKED; l++) { - g_level_constraint[l].cSpeed_min = (g_level_constraint[l-1].cSpeed_min * 49) / 64; - g_level_constraint[l].dSpeed_min = 0.; - g_level_constraint[l].windowLog_max = (l<20) ? 23 : l+5; /* only --ultra levels >= 20 can use windowlog > 23 */ - g_level_constraint[l].strategy_max = (l<19) ? ZSTD_btopt : ZSTD_btultra; /* level 19 is allowed to use btultra */ - } } -} - -static int BMK_seed(oldWinnerInfo_t* winners, const ZSTD_compressionParameters params, - const buffers_t buf, const contexts_t ctx) -{ - BMK_result_t testResult; - int better = 0; - int cLevel; - - BMK_benchParam(&testResult, buf, ctx, cParamsToPVals(params)); - - - for (cLevel = 1; cLevel <= NB_LEVELS_TRACKED; cLevel++) { - if (testResult.cSpeed < g_level_constraint[cLevel].cSpeed_min) - continue; /* not fast enough for this level */ - if (testResult.dSpeed < g_level_constraint[cLevel].dSpeed_min) - continue; /* not fast enough for this level */ - if (params.windowLog > g_level_constraint[cLevel].windowLog_max) - continue; /* too much memory for this level */ - if (params.strategy > g_level_constraint[cLevel].strategy_max) - continue; /* forbidden strategy for this level */ - if (winners[cLevel].result.cSize==0) { - /* first solution for this cLevel */ - winners[cLevel].result = testResult; - winners[cLevel].params = params; - BMK_printWinner(stdout, cLevel, testResult, cParamsToPVals(params), buf.srcSize); - better = 1; - continue; - } - - if ((double)testResult.cSize <= ((double)winners[cLevel].result.cSize * (1. + (0.02 / cLevel))) ) { - /* Validate solution is "good enough" */ - double W_ratio = (double)buf.srcSize / testResult.cSize; - double O_ratio = (double)buf.srcSize / winners[cLevel].result.cSize; - double W_ratioNote = log (W_ratio); - double O_ratioNote = log (O_ratio); - size_t W_DMemUsed = (1 << params.windowLog) + (16 KB); - size_t O_DMemUsed = (1 << winners[cLevel].params.windowLog) + (16 KB); - double W_DMemUsed_note = W_ratioNote * ( 40 + 9*cLevel) - log((double)W_DMemUsed); - double O_DMemUsed_note = O_ratioNote * ( 40 + 9*cLevel) - log((double)O_DMemUsed); - - size_t W_CMemUsed = (1 << params.windowLog) + ZSTD_estimateCCtxSize_usingCParams(params); - size_t O_CMemUsed = (1 << winners[cLevel].params.windowLog) + ZSTD_estimateCCtxSize_usingCParams(winners[cLevel].params); - double W_CMemUsed_note = W_ratioNote * ( 50 + 13*cLevel) - log((double)W_CMemUsed); - double O_CMemUsed_note = O_ratioNote * ( 50 + 13*cLevel) - log((double)O_CMemUsed); - - double W_CSpeed_note = W_ratioNote * ( 30 + 10*cLevel) + log(testResult.cSpeed); - double O_CSpeed_note = O_ratioNote * ( 30 + 10*cLevel) + log(winners[cLevel].result.cSpeed); - - double W_DSpeed_note = W_ratioNote * ( 20 + 2*cLevel) + log(testResult.dSpeed); - double O_DSpeed_note = O_ratioNote * ( 20 + 2*cLevel) + log(winners[cLevel].result.dSpeed); - - if (W_DMemUsed_note < O_DMemUsed_note) { - /* uses too much Decompression memory for too little benefit */ - if (W_ratio > O_ratio) - DISPLAY ("Decompression Memory : %5.3f @ %4.1f MB vs %5.3f @ %4.1f MB : not enough for level %i\n", - W_ratio, (double)(W_DMemUsed) / 1024 / 1024, - O_ratio, (double)(O_DMemUsed) / 1024 / 1024, cLevel); - continue; - } - if (W_CMemUsed_note < O_CMemUsed_note) { - /* uses too much memory for compression for too little benefit */ - if (W_ratio > O_ratio) - DISPLAY ("Compression Memory : %5.3f @ %4.1f MB vs %5.3f @ %4.1f MB : not enough for level %i\n", - W_ratio, (double)(W_CMemUsed) / 1024 / 1024, - O_ratio, (double)(O_CMemUsed) / 1024 / 1024, cLevel); - continue; - } - if (W_CSpeed_note < O_CSpeed_note ) { - /* too large compression speed difference for the compression benefit */ - if (W_ratio > O_ratio) - DISPLAY ("Compression Speed : %5.3f @ %4.1f MB/s vs %5.3f @ %4.1f MB/s : not enough for level %i\n", - W_ratio, (double)testResult.cSpeed / (1 MB), - O_ratio, (double)winners[cLevel].result.cSpeed / (1 MB), cLevel); - continue; - } - if (W_DSpeed_note < O_DSpeed_note ) { - /* too large decompression speed difference for the compression benefit */ - if (W_ratio > O_ratio) - DISPLAY ("Decompression Speed : %5.3f @ %4.1f MB/s vs %5.3f @ %4.1f MB/s : not enough for level %i\n", - W_ratio, (double)testResult.dSpeed / (1 MB), - O_ratio, (double)winners[cLevel].result.dSpeed / (1 MB), cLevel); - continue; - } - - if (W_ratio < O_ratio) - DISPLAY("Solution %4.3f selected over %4.3f at level %i, due to better secondary statistics \n", W_ratio, O_ratio, cLevel); - - winners[cLevel].result = testResult; - winners[cLevel].params = params; - BMK_printWinner(stdout, cLevel, testResult, cParamsToPVals(params), buf.srcSize); - - better = 1; - } } - - return better; -} - -/* bounds check in sanitize too? */ -#define CLAMP(var, lo, hi) { \ - var = MAX(MIN(var, hi), lo); \ -} - -/* nullified useless params, to ensure count stats */ -/* cleans up params for memoizing / display */ -static paramValues_t sanitizeParams(paramValues_t params) -{ - if (params.vals[strt_ind] == ZSTD_fast) - params.vals[clog_ind] = 0, params.vals[slog_ind] = 0; - if (params.vals[strt_ind] == ZSTD_dfast) - params.vals[slog_ind] = 0; - if (params.vals[strt_ind] != ZSTD_btopt && params.vals[strt_ind] != ZSTD_btultra && params.vals[strt_ind] != ZSTD_fast) - params.vals[tlen_ind] = 0; - - return params; -} - -/* return: new length */ -/* keep old array, will need if iter over strategy. */ -/* prunes useless params */ -static size_t sanitizeVarArray(varInds_t* varNew, const size_t varLength, const varInds_t* varArray, const ZSTD_strategy strat) { - size_t i, j = 0; - for(i = 0; i < varLength; i++) { - if( !((varArray[i] == clog_ind && strat == ZSTD_fast) - || (varArray[i] == slog_ind && strat == ZSTD_fast) - || (varArray[i] == slog_ind && strat == ZSTD_dfast) - || (varArray[i] == tlen_ind && strat != ZSTD_btopt && strat != ZSTD_btultra && strat != ZSTD_fast))) { - varNew[j] = varArray[i]; - j++; - } - } - return j; - -} - -/* res should be NUM_PARAMS size */ -/* constructs varArray from paramValues_t style parameter */ -/* pass in using dict. */ -static size_t variableParams(const paramValues_t paramConstraints, varInds_t* res, const int usingDictionary) { - varInds_t i; - size_t j = 0; - for(i = 0; i < NUM_PARAMS; i++) { - if(paramConstraints.vals[i] == PARAM_UNSET) { - if(i == fadt_ind && !usingDictionary) continue; /* don't use fadt if no dictionary */ - res[j] = i; j++; - } - } - return j; -} - -/* amt will probably always be \pm 1? */ -/* slight change from old paramVariation, targetLength can only take on powers of 2 now (999 ~= 1024?) */ -/* take max/min bounds into account as well? */ -static void paramVaryOnce(const varInds_t paramIndex, const int amt, paramValues_t* ptr) { - ptr->vals[paramIndex] = rangeMap(paramIndex, invRangeMap(paramIndex, ptr->vals[paramIndex]) + amt); -} - -/* varies ptr by nbChanges respecting varyParams*/ -static void paramVariation(paramValues_t* ptr, memoTable_t* mtAll, const U32 nbChanges) -{ - paramValues_t p; - U32 validated = 0; - while (!validated) { - U32 i; - p = *ptr; - for (i = 0 ; i < nbChanges ; i++) { - const U32 changeID = (U32)FUZ_rand(&g_rand) % (mtAll[p.vals[strt_ind]].varLen << 1); - paramVaryOnce(mtAll[p.vals[strt_ind]].varArray[changeID >> 1], ((changeID & 1) << 1) - 1, &p); - } - validated = paramValid(p); - } - *ptr = p; -} - -/* length of memo table given free variables */ -static size_t memoTableLen(const varInds_t* varyParams, const size_t varyLen) { - size_t arrayLen = 1; - size_t i; - for(i = 0; i < varyLen; i++) { - if(varyParams[i] == strt_ind) continue; /* strategy separated by table */ - arrayLen *= rangetable[varyParams[i]]; - } - return arrayLen; -} - -/* returns unique index in memotable of compression parameters */ -static unsigned memoTableIndDirect(const paramValues_t* ptr, const varInds_t* varyParams, const size_t varyLen) { - size_t i; - unsigned ind = 0; - for(i = 0; i < varyLen; i++) { - varInds_t v = varyParams[i]; - if(v == strt_ind) continue; /* exclude strategy from memotable */ - ind *= rangetable[v]; ind += (unsigned)invRangeMap(v, ptr->vals[v]); - } - return ind; -} - -static size_t memoTableGet(const memoTable_t* memoTableArray, const paramValues_t p) { - const memoTable_t mt = memoTableArray[p.vals[strt_ind]]; - switch(mt.tableType) { - case directMap: - return mt.table[memoTableIndDirect(&p, mt.varArray, mt.varLen)]; - case xxhashMap: - return mt.table[(XXH64(&p.vals, sizeof(U32) * NUM_PARAMS, 0) >> 3) % mt.tableLen]; - case noMemo: - return 0; - } - return 0; /* should never happen, stop compiler warnings */ -} - -static void memoTableSet(const memoTable_t* memoTableArray, const paramValues_t p, const BYTE value) { - const memoTable_t mt = memoTableArray[p.vals[strt_ind]]; - switch(mt.tableType) { - case directMap: - mt.table[memoTableIndDirect(&p, mt.varArray, mt.varLen)] = value; break; - case xxhashMap: - mt.table[(XXH64(&p.vals, sizeof(U32) * NUM_PARAMS, 0) >> 3) % mt.tableLen] = value; break; - case noMemo: - break; - } -} - -/* frees all allocated memotables */ -static void freeMemoTableArray(memoTable_t* const mtAll) { - int i; - if(mtAll == NULL) { return; } - for(i = 1; i <= (int)ZSTD_btultra; i++) { - free(mtAll[i].table); - } - free(mtAll); -} - -/* inits memotables for all (including mallocs), all strategies */ -/* takes unsanitized varyParams */ -static memoTable_t* createMemoTableArray(const varInds_t* const varyParams, const size_t varyLen) { - memoTable_t* mtAll = (memoTable_t*)calloc(sizeof(memoTable_t),(ZSTD_btultra + 1)); - int i; - - if(mtAll == NULL) { - return NULL; - } - - for(i = 1; i <= (int)ZSTD_btultra; i++) { - mtAll[i].varLen = sanitizeVarArray(mtAll[i].varArray, varyLen, varyParams, i); - } - - /* no memoization */ - if(g_memoTableLog == 0) { - for(i = 1; i <= (int)ZSTD_btultra; i++) { - mtAll[i].tableType = noMemo; - mtAll[i].table = NULL; - mtAll[i].tableLen = 0; - } - return mtAll; - } - - /* hash table if normal table is too big */ - for(i = 1; i <= (int)ZSTD_btultra; i++) { - size_t mtl = memoTableLen(mtAll[i].varArray, mtAll[i].varLen); - mtAll[i].tableType = directMap; - - if(g_memoTableLog != PARAM_UNSET && mtl > (1ULL << g_memoTableLog)) { /* use hash table */ /* provide some option to only use hash tables? */ - mtAll[i].tableType = xxhashMap; - mtl = (1ULL << g_memoTableLog); - } - - mtAll[i].table = (BYTE*)calloc(sizeof(BYTE), mtl); - mtAll[i].tableLen = mtl; - - if(mtAll[i].table == NULL) { - freeMemoTableArray(mtAll); - return NULL; - } - } - - return mtAll; -} - -static paramValues_t overwriteParams(paramValues_t base, const paramValues_t mask) { - U32 i; - for(i = 0; i < NUM_PARAMS; i++) { - if(mask.vals[i] != PARAM_UNSET) { - base.vals[i] = mask.vals[i]; - } - } - return base; -} - -#define PARAMTABLELOG 25 -#define PARAMTABLESIZE (1<> 3) & PARAMTABLEMASK]; -} - -static void playAround(FILE* f, oldWinnerInfo_t* winners, - ZSTD_compressionParameters params, - const buffers_t buf, const contexts_t ctx) -{ - int nbVariations = 0, i; - UTIL_time_t const clockStart = UTIL_getTime(); - - while (UTIL_clockSpanMicro(clockStart) < g_maxVariationTime) { - paramValues_t p = cParamsToPVals(params); - ZSTD_compressionParameters p2; - BYTE* b; - - if (nbVariations++ > g_maxNbVariations) break; - - do { for(i = 0; i < 4; i++) { paramVaryOnce(FUZ_rand(&g_rand) % (strt_ind + 1), ((FUZ_rand(&g_rand) & 1) << 1) - 1, &p); } } - while(!paramValid(p)); - - p2 = pvalsToCParams(p); - - /* exclude faster if already played params */ - if (FUZ_rand(&g_rand) & ((1 << *NB_TESTS_PLAYED(p2))-1)) - continue; - - /* test */ - b = NB_TESTS_PLAYED(p2); - (*b)++; - if (!BMK_seed(winners, p2, buf, ctx)) continue; - - /* improvement found => search more */ - BMK_printWinners(f, winners, buf.srcSize); - playAround(f, winners, p2, buf, ctx); - } - -} - -/* Sets pc to random unmeasured set of parameters */ -/* specifiy strategy */ -static void randomConstrainedParams(paramValues_t* pc, const memoTable_t* memoTableArray, const ZSTD_strategy st) -{ - size_t j; - const memoTable_t mt = memoTableArray[st]; - pc->vals[strt_ind] = st; - for(j = 0; j < MIN(1ULL << g_memoTableLog, memoTableLen(mt.varArray, mt.varLen)); j++) { - int i; - for(i = 0; i < NUM_PARAMS; i++) { - varInds_t v = mt.varArray[i]; - if(v == strt_ind) continue; //skip, already specified - pc->vals[v] = rangeMap(v, FUZ_rand(&g_rand) % rangetable[v]); - } - - if(!(memoTableGet(memoTableArray, *pc))) break; //only pick unpicked params. - } -} - -/* Completely random parameter selection */ -static ZSTD_compressionParameters randomParams(void) -{ - paramValues_t p; varInds_t v; - for(v = 0; v < NUM_PARAMS; v++) { - p.vals[v] = rangeMap(v, FUZ_rand(&g_rand) % rangetable[v]); - } - return pvalsToCParams(p); -} - -static void BMK_selectRandomStart( - FILE* f, oldWinnerInfo_t* winners, - const buffers_t buf, const contexts_t ctx) -{ - U32 const id = FUZ_rand(&g_rand) % (NB_LEVELS_TRACKED+1); - if ((id==0) || (winners[id].params.windowLog==0)) { - /* use some random entry */ - ZSTD_compressionParameters const p = ZSTD_adjustCParams(randomParams(), buf.srcSize, 0); - playAround(f, winners, p, buf, ctx); - } else { - playAround(f, winners, winners[id].params, buf, ctx); - } -} - -static void BMK_benchFullTable(const buffers_t buf, const contexts_t ctx) -{ - ZSTD_compressionParameters params; - oldWinnerInfo_t winners[NB_LEVELS_TRACKED+1]; - const char* const rfName = "grillResults.txt"; - FILE* const f = fopen(rfName, "w"); - - /* init */ - assert(g_singleRun==0); - memset(winners, 0, sizeof(winners)); - if (f==NULL) { DISPLAY("error opening %s \n", rfName); exit(1); } - - if (g_target) { - BMK_init_level_constraints(g_target * (1 MB)); - } else { - /* baseline config for level 1 */ - ZSTD_compressionParameters const l1params = ZSTD_getCParams(1, buf.maxBlockSize, ctx.dictSize); - BMK_result_t testResult; - BMK_benchParam(&testResult, buf, ctx, cParamsToPVals(l1params)); - BMK_init_level_constraints((int)((testResult.cSpeed * 31) / 32)); - } - - /* populate initial solution */ - { const int maxSeeds = g_noSeed ? 1 : ZSTD_maxCLevel(); - int i; - for (i=0; i<=maxSeeds; i++) { - params = ZSTD_getCParams(i, buf.maxBlockSize, 0); - BMK_seed(winners, params, buf, ctx); - } } - BMK_printWinners(f, winners, buf.srcSize); - - /* start tests */ - { const UTIL_time_t grillStart = UTIL_getTime(); - do { - BMK_selectRandomStart(f, winners, buf, ctx); - } while (BMK_timeSpan(grillStart) < g_timeLimit_s); - } - - /* end summary */ - BMK_printWinners(f, winners, buf.srcSize); - DISPLAY("grillParams operations completed \n"); - - /* clean up*/ - fclose(f); -} - -static int benchOnce(const buffers_t buf, const contexts_t ctx) { - BMK_result_t testResult; - - if(BMK_benchParam(&testResult, buf, ctx, g_params)) { - DISPLAY("Error during benchmarking\n"); - return 1; - } - - BMK_printWinner(stdout, CUSTOM_LEVEL, testResult, g_params, buf.srcSize); - - return 0; -} - -static int benchSample(void) -{ - const char* const name = "Sample 10MB"; - size_t const benchedSize = 10 MB; - void* srcBuffer = malloc(benchedSize); - int ret = 0; - - buffers_t buf; - contexts_t ctx; - - if(srcBuffer == NULL) { - DISPLAY("Out of Memory\n"); - return 2; - } - - RDG_genBuffer(srcBuffer, benchedSize, g_compressibility, 0.0, 0); - - if(createBuffersFromMemory(&buf, srcBuffer, 1, &benchedSize)) { - DISPLAY("Buffer Creation Error\n"); - free(srcBuffer); - return 3; - } - - if(createContexts(&ctx, NULL)) { - DISPLAY("Context Creation Error\n"); - freeBuffers(buf); - return 1; - } - - /* bench */ - DISPLAY("\r%79s\r", ""); - DISPLAY("using %s %i%%: \n", name, (int)(g_compressibility*100)); - - if(g_singleRun) { - ret = benchOnce(buf, ctx); - } else { - BMK_benchFullTable(buf, ctx); - } - - freeBuffers(buf); - freeContexts(ctx); - - return ret; -} - -/* benchFiles() : - * note: while this function takes a table of filenames, - * in practice, only the first filename will be used */ -int benchFiles(const char** fileNamesTable, int nbFiles, const char* dictFileName, int cLevel) -{ - buffers_t buf; - contexts_t ctx; - int ret = 0; - - if(createBuffers(&buf, fileNamesTable, nbFiles)) { - DISPLAY("unable to load files\n"); - return 1; - } - - if(createContexts(&ctx, dictFileName)) { - DISPLAY("unable to load dictionary\n"); - freeBuffers(buf); - return 2; - } - - DISPLAY("\r%79s\r", ""); - if(nbFiles == 1) { - DISPLAY("using %s : \n", fileNamesTable[0]); - } else { - DISPLAY("using %d Files : \n", nbFiles); - } - - g_params = adjustParams(overwriteParams(cParamsToPVals(ZSTD_getCParams(cLevel, buf.maxBlockSize, ctx.dictSize)), g_params), buf.maxBlockSize, ctx.dictSize); - - if(g_singleRun) { - ret = benchOnce(buf, ctx); - } else { - BMK_benchFullTable(buf, ctx); - } - - freeBuffers(buf); - freeContexts(ctx); - return ret; -} #define CBENCHMARK(conditional, resultvar, tmpret, mode, loopmode, sec) { \ if(conditional) { \ @@ -1981,6 +1631,343 @@ static int benchMemo(BMK_result_t* resultPtr, return res; } +typedef struct { + U64 cSpeed_min; + U64 dSpeed_min; + U32 windowLog_max; + ZSTD_strategy strategy_max; +} level_constraints_t; + +static level_constraints_t g_level_constraint[NB_LEVELS_TRACKED+1]; + +static void BMK_init_level_constraints(int bytePerSec_level1) +{ + assert(NB_LEVELS_TRACKED >= ZSTD_maxCLevel()); + memset(g_level_constraint, 0, sizeof(g_level_constraint)); + g_level_constraint[1].cSpeed_min = bytePerSec_level1; + g_level_constraint[1].dSpeed_min = 0.; + g_level_constraint[1].windowLog_max = 19; + g_level_constraint[1].strategy_max = ZSTD_fast; + + /* establish speed objectives (relative to level 1) */ + { int l; + for (l=2; l<=NB_LEVELS_TRACKED; l++) { + g_level_constraint[l].cSpeed_min = (g_level_constraint[l-1].cSpeed_min * 49) / 64; + g_level_constraint[l].dSpeed_min = 0.; + g_level_constraint[l].windowLog_max = (l<20) ? 23 : l+5; /* only --ultra levels >= 20 can use windowlog > 23 */ + g_level_constraint[l].strategy_max = (l<19) ? ZSTD_btopt : ZSTD_btultra; /* level 19 is allowed to use btultra */ + } } +} + +static int BMK_seed(winnerInfo_t* winners, const paramValues_t params, + const buffers_t buf, const contexts_t ctx) +{ + BMK_result_t testResult; + int better = 0; + int cLevel; + + BMK_benchParam(&testResult, buf, ctx, params); + + + for (cLevel = 1; cLevel <= NB_LEVELS_TRACKED; cLevel++) { + if (testResult.cSpeed < g_level_constraint[cLevel].cSpeed_min) + continue; /* not fast enough for this level */ + if (testResult.dSpeed < g_level_constraint[cLevel].dSpeed_min) + continue; /* not fast enough for this level */ + if (params.vals[wlog_ind] > g_level_constraint[cLevel].windowLog_max) + continue; /* too much memory for this level */ + if (params.vals[strt_ind] > g_level_constraint[cLevel].strategy_max) + continue; /* forbidden strategy for this level */ + if (winners[cLevel].result.cSize==0) { + /* first solution for this cLevel */ + winners[cLevel].result = testResult; + winners[cLevel].params = params; + BMK_printWinner(stdout, cLevel, testResult, params, buf.srcSize); + better = 1; + continue; + } + + if ((double)testResult.cSize <= ((double)winners[cLevel].result.cSize * (1. + (0.02 / cLevel))) ) { + /* Validate solution is "good enough" */ + double W_ratio = (double)buf.srcSize / testResult.cSize; + double O_ratio = (double)buf.srcSize / winners[cLevel].result.cSize; + double W_ratioNote = log (W_ratio); + double O_ratioNote = log (O_ratio); + size_t W_DMemUsed = (1 << params.vals[wlog_ind]) + (16 KB); + size_t O_DMemUsed = (1 << winners[cLevel].params.vals[wlog_ind]) + (16 KB); + double W_DMemUsed_note = W_ratioNote * ( 40 + 9*cLevel) - log((double)W_DMemUsed); + double O_DMemUsed_note = O_ratioNote * ( 40 + 9*cLevel) - log((double)O_DMemUsed); + + size_t W_CMemUsed = (1 << params.vals[wlog_ind]) + ZSTD_estimateCCtxSize_usingCParams(pvalsToCParams(params)); + size_t O_CMemUsed = (1 << winners[cLevel].params.vals[wlog_ind]) + ZSTD_estimateCCtxSize_usingCParams(pvalsToCParams(winners[cLevel].params)); + double W_CMemUsed_note = W_ratioNote * ( 50 + 13*cLevel) - log((double)W_CMemUsed); + double O_CMemUsed_note = O_ratioNote * ( 50 + 13*cLevel) - log((double)O_CMemUsed); + + double W_CSpeed_note = W_ratioNote * ( 30 + 10*cLevel) + log(testResult.cSpeed); + double O_CSpeed_note = O_ratioNote * ( 30 + 10*cLevel) + log(winners[cLevel].result.cSpeed); + + double W_DSpeed_note = W_ratioNote * ( 20 + 2*cLevel) + log(testResult.dSpeed); + double O_DSpeed_note = O_ratioNote * ( 20 + 2*cLevel) + log(winners[cLevel].result.dSpeed); + + if (W_DMemUsed_note < O_DMemUsed_note) { + /* uses too much Decompression memory for too little benefit */ + if (W_ratio > O_ratio) + DISPLAY ("Decompression Memory : %5.3f @ %4.1f MB vs %5.3f @ %4.1f MB : not enough for level %i\n", + W_ratio, (double)(W_DMemUsed) / 1024 / 1024, + O_ratio, (double)(O_DMemUsed) / 1024 / 1024, cLevel); + continue; + } + if (W_CMemUsed_note < O_CMemUsed_note) { + /* uses too much memory for compression for too little benefit */ + if (W_ratio > O_ratio) + DISPLAY ("Compression Memory : %5.3f @ %4.1f MB vs %5.3f @ %4.1f MB : not enough for level %i\n", + W_ratio, (double)(W_CMemUsed) / 1024 / 1024, + O_ratio, (double)(O_CMemUsed) / 1024 / 1024, cLevel); + continue; + } + if (W_CSpeed_note < O_CSpeed_note ) { + /* too large compression speed difference for the compression benefit */ + if (W_ratio > O_ratio) + DISPLAY ("Compression Speed : %5.3f @ %4.1f MB/s vs %5.3f @ %4.1f MB/s : not enough for level %i\n", + W_ratio, (double)testResult.cSpeed / (1 MB), + O_ratio, (double)winners[cLevel].result.cSpeed / (1 MB), cLevel); + continue; + } + if (W_DSpeed_note < O_DSpeed_note ) { + /* too large decompression speed difference for the compression benefit */ + if (W_ratio > O_ratio) + DISPLAY ("Decompression Speed : %5.3f @ %4.1f MB/s vs %5.3f @ %4.1f MB/s : not enough for level %i\n", + W_ratio, (double)testResult.dSpeed / (1 MB), + O_ratio, (double)winners[cLevel].result.dSpeed / (1 MB), cLevel); + continue; + } + + if (W_ratio < O_ratio) + DISPLAY("Solution %4.3f selected over %4.3f at level %i, due to better secondary statistics \n", W_ratio, O_ratio, cLevel); + + winners[cLevel].result = testResult; + winners[cLevel].params = params; + BMK_printWinner(stdout, cLevel, testResult, params, buf.srcSize); + + better = 1; + } } + + return better; +} + +/*-************************************ +* Compression Level Table Generation Functions +**************************************/ + +#define PARAMTABLELOG 25 +#define PARAMTABLESIZE (1<> 3) & PARAMTABLEMASK]; +} + +static void playAround(FILE* f, winnerInfo_t* winners, + paramValues_t p, + const buffers_t buf, const contexts_t ctx) +{ + int nbVariations = 0, i; + UTIL_time_t const clockStart = UTIL_getTime(); + + while (UTIL_clockSpanMicro(clockStart) < g_maxVariationTime) { + BYTE* b; + + if (nbVariations++ > g_maxNbVariations) break; + + do { for(i = 0; i < 4; i++) { paramVaryOnce(FUZ_rand(&g_rand) % (strt_ind + 1), ((FUZ_rand(&g_rand) & 1) << 1) - 1, &p); } } + while(!paramValid(p)); + + /* exclude faster if already played params */ + if (FUZ_rand(&g_rand) & ((1 << *NB_TESTS_PLAYED(p))-1)) + continue; + + /* test */ + b = NB_TESTS_PLAYED(p); + (*b)++; + if (!BMK_seed(winners, p, buf, ctx)) continue; + + /* improvement found => search more */ + BMK_printWinners(f, winners, buf.srcSize); + playAround(f, winners, p, buf, ctx); + } + +} + +static void BMK_selectRandomStart( + FILE* f, winnerInfo_t* winners, + const buffers_t buf, const contexts_t ctx) +{ + U32 const id = FUZ_rand(&g_rand) % (NB_LEVELS_TRACKED+1); + if ((id==0) || (winners[id].params.vals[wlog_ind]==0)) { + /* use some random entry */ + paramValues_t const p = adjustParams(cParamsToPVals(pvalsToCParams(randomParams())), /* defaults nonCompression parameters */ + buf.srcSize, 0); + playAround(f, winners, p, buf, ctx); + } else { + playAround(f, winners, winners[id].params, buf, ctx); + } +} + +static void BMK_benchFullTable(const buffers_t buf, const contexts_t ctx) +{ + paramValues_t params; + winnerInfo_t winners[NB_LEVELS_TRACKED+1]; + const char* const rfName = "grillResults.txt"; + FILE* const f = fopen(rfName, "w"); + + /* init */ + assert(g_singleRun==0); + memset(winners, 0, sizeof(winners)); + if (f==NULL) { DISPLAY("error opening %s \n", rfName); exit(1); } + + if (g_target) { + BMK_init_level_constraints(g_target * (1 MB)); + } else { + /* baseline config for level 1 */ + paramValues_t const l1params = cParamsToPVals(ZSTD_getCParams(1, buf.maxBlockSize, ctx.dictSize)); + BMK_result_t testResult; + BMK_benchParam(&testResult, buf, ctx, l1params); + BMK_init_level_constraints((int)((testResult.cSpeed * 31) / 32)); + } + + /* populate initial solution */ + { const int maxSeeds = g_noSeed ? 1 : ZSTD_maxCLevel(); + int i; + for (i=0; i<=maxSeeds; i++) { + params = cParamsToPVals(ZSTD_getCParams(i, buf.maxBlockSize, 0)); + BMK_seed(winners, params, buf, ctx); + } } + BMK_printWinners(f, winners, buf.srcSize); + + /* start tests */ + { const UTIL_time_t grillStart = UTIL_getTime(); + do { + BMK_selectRandomStart(f, winners, buf, ctx); + } while (BMK_timeSpan(grillStart) < g_timeLimit_s); + } + + /* end summary */ + BMK_printWinners(f, winners, buf.srcSize); + DISPLAY("grillParams operations completed \n"); + + /* clean up*/ + fclose(f); +} + +/*-************************************ +* Single Benchmark Functions +**************************************/ + +static int benchOnce(const buffers_t buf, const contexts_t ctx) { + BMK_result_t testResult; + + if(BMK_benchParam(&testResult, buf, ctx, g_params)) { + DISPLAY("Error during benchmarking\n"); + return 1; + } + + BMK_printWinner(stdout, CUSTOM_LEVEL, testResult, g_params, buf.srcSize); + + return 0; +} + +static int benchSample(double compressibility) +{ + const char* const name = "Sample 10MB"; + size_t const benchedSize = 10 MB; + void* srcBuffer = malloc(benchedSize); + int ret = 0; + + buffers_t buf; + contexts_t ctx; + + if(srcBuffer == NULL) { + DISPLAY("Out of Memory\n"); + return 2; + } + + RDG_genBuffer(srcBuffer, benchedSize, compressibility, 0.0, 0); + + if(createBuffersFromMemory(&buf, srcBuffer, 1, &benchedSize)) { + DISPLAY("Buffer Creation Error\n"); + free(srcBuffer); + return 3; + } + + if(createContexts(&ctx, NULL)) { + DISPLAY("Context Creation Error\n"); + freeBuffers(buf); + return 1; + } + + /* bench */ + DISPLAY("\r%79s\r", ""); + DISPLAY("using %s %i%%: \n", name, (int)(compressibility*100)); + + if(g_singleRun) { + ret = benchOnce(buf, ctx); + } else { + BMK_benchFullTable(buf, ctx); + } + + freeBuffers(buf); + freeContexts(ctx); + + return ret; +} + +/* benchFiles() : + * note: while this function takes a table of filenames, + * in practice, only the first filename will be used */ +int benchFiles(const char** fileNamesTable, int nbFiles, const char* dictFileName, int cLevel) +{ + buffers_t buf; + contexts_t ctx; + int ret = 0; + + if(createBuffers(&buf, fileNamesTable, nbFiles)) { + DISPLAY("unable to load files\n"); + return 1; + } + + if(createContexts(&ctx, dictFileName)) { + DISPLAY("unable to load dictionary\n"); + freeBuffers(buf); + return 2; + } + + DISPLAY("\r%79s\r", ""); + if(nbFiles == 1) { + DISPLAY("using %s : \n", fileNamesTable[0]); + } else { + DISPLAY("using %d Files : \n", nbFiles); + } + + g_params = adjustParams(overwriteParams(cParamsToPVals(ZSTD_getCParams(cLevel, buf.maxBlockSize, ctx.dictSize)), g_params), buf.maxBlockSize, ctx.dictSize); + + if(g_singleRun) { + ret = benchOnce(buf, ctx); + } else { + BMK_benchFullTable(buf, ctx); + } + + freeBuffers(buf); + freeContexts(ctx); + return ret; +} + + +/*-************************************ +* Local Optimization Functions +**************************************/ + /* One iteration of hill climbing. Specifically, it first tries all * valid parameter configurations w/ manhattan distance 1 and picks the best one * failing that, it progressively tries candidates further and further away (up to #dim + 2) @@ -2096,11 +2083,9 @@ static winnerInfo_t climbOnce(const constraint_t target, /* Optimizes for a fixed strategy */ -/* flexible parameters: iterations of (failed?) climbing (or if we do non-random, maybe this is when everything is close to visitied) +/* flexible parameters: iterations of failed climbing (or if we do non-random, maybe this is when everything is close to visitied) weight more on visit for bad results, less on good results/more on later results / ones with more failures. allocate memoTable here. - only real use for paramTarget is to get the fixed values, right? - maybe allow giving it a first init? */ static winnerInfo_t optimizeFixedStrategy( const buffers_t buf, const contexts_t ctx, @@ -2121,7 +2106,7 @@ static winnerInfo_t optimizeFixedStrategy( for(i = 0; i < tries; i++) { DEBUGOUTPUT("Restart\n"); - do { randomConstrainedParams(&init, memoTableArray, strat); } while(redundantParams(init, target, buf.maxBlockSize)); //only non-redundant params + do { randomConstrainedParams(&init, memoTableArray, strat); } while(redundantParams(init, target, buf.maxBlockSize)); candidateInfo = climbOnce(target, memoTableArray, buf, ctx, init); if(compareResultLT(winnerInfo.result, candidateInfo.result, target, buf.srcSize)) { winnerInfo = candidateInfo; @@ -2180,10 +2165,11 @@ static int nextStrategy(const int currentStrategy, const int bestStrategy) { * cLevel - compression level to exceed (all solutions must be > lvl in cSpeed + ratio) */ -static int g_maxTries = 3; +static int g_maxTries = 5; #define TRY_DECAY 1 -static int optimizeForSize(const char* const * const fileNamesTable, const size_t nbFiles, const char* dictFileName, constraint_t target, paramValues_t paramTarget, int cLevelOpt, int cLevelRun) +static int optimizeForSize(const char* const * const fileNamesTable, const size_t nbFiles, const char* dictFileName, constraint_t target, paramValues_t paramTarget, + const int cLevelOpt, const int cLevelRun, const U32 memoTableLog) { varInds_t varArray [NUM_PARAMS]; int ret = 0; @@ -2216,8 +2202,7 @@ static int optimizeForSize(const char* const * const fileNamesTable, const size_ optimizerAdjustInput(¶mTarget, buf.maxBlockSize); paramBase = cParamUnsetMin(paramTarget); - // TODO: if strategy is fixed, only init that row - allMT = createMemoTableArray(varArray, varLen); + allMT = createMemoTableArray(paramTarget, varArray, varLen, memoTableLog); if(!allMT) { DISPLAY("MemoTable Init Error\n"); @@ -2241,7 +2226,6 @@ static int optimizeForSize(const char* const * const fileNamesTable, const size_ } /* use level'ing mode instead of normal target mode */ - /* Should lvl be parameter-masked here? */ if(g_optmode) { winner.params = cParamsToPVals(ZSTD_getCParams(cLevelOpt, buf.maxBlockSize, ctx.dictSize)); if(BMK_benchParam(&winner.result, buf, ctx, winner.params)) { @@ -2255,7 +2239,7 @@ static int optimizeForSize(const char* const * const fileNamesTable, const size_ g_lvltarget.cSize /= ((double)g_strictness) / 100; target.cSpeed = (U32)g_lvltarget.cSpeed; - target.dSpeed = (U32)g_lvltarget.dSpeed; //See if this is reasonable. + target.dSpeed = (U32)g_lvltarget.dSpeed; BMK_printWinnerOpt(stdout, cLevelOpt, winner.result, winner.params, target, buf.srcSize); } @@ -2379,6 +2363,24 @@ _cleanUp: return ret; } +/*-************************************ +* CLI parsing functions +**************************************/ + +/** longCommandWArg() : + * check if *stringPtr is the same as longCommand. + * If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand. + * @return 0 and doesn't modify *stringPtr otherwise. + * from zstdcli.c + */ +static unsigned longCommandWArg(const char** stringPtr, const char* longCommand) +{ + size_t const comSize = strlen(longCommand); + int const result = !strncmp(*stringPtr, longCommand, comSize); + if (result) *stringPtr += comSize; + return result; +} + static void errorOut(const char* msg) { DISPLAY("%s \n", msg); exit(1); @@ -2446,7 +2448,6 @@ static int usage_advanced(void) DISPLAY( "\nAdvanced options :\n"); DISPLAY( " -T# : set level 1 speed objective \n"); DISPLAY( " -B# : cut input into blocks of size # (default : single block) \n"); - DISPLAY( " -i# : iteration loops (default : %i) \n", NBLOOPS); DISPLAY( " --optimize= : same as -O with more verbose syntax (see README.md)\n"); DISPLAY( " -S : Single run \n"); DISPLAY( " --zstd : Single run, parameter selection same as zstdcli \n"); @@ -2486,6 +2487,10 @@ static int parse_params(const char** argptr, paramValues_t* pv) { return matched; } +/*-************************************ +* Main +**************************************/ + int main(int argc, const char** argv) { int i, @@ -2497,6 +2502,8 @@ int main(int argc, const char** argv) U32 main_pause = 0; int cLevelOpt = 0, cLevelRun = 0; int seperateFiles = 0; + double compressibility = COMPRESSIBILITY_DEFAULT; + U32 memoTableLog = PARAM_UNSET; constraint_t target = { 0, 0, (U32)-1 }; paramValues_t paramTarget = emptyParams(); @@ -2520,7 +2527,7 @@ int main(int argc, const char** argv) PARSE_SUB_ARGS("compressionMemory=" , "cMem=", target.cMem); PARSE_SUB_ARGS("strict=", "stc=", g_strictness); PARSE_SUB_ARGS("maxTries=", "tries=", g_maxTries); - PARSE_SUB_ARGS("memoLimitLog=", "memLog=", g_memoTableLog); + PARSE_SUB_ARGS("memoLimitLog=", "memLog=", memoTableLog); if (longCommandWArg(&argument, "level=") || longCommandWArg(&argument, "lvl=")) { cLevelOpt = readU32FromChar(&argument); g_optmode = 1; if (argument[0]==',') { argument++; continue; } else break; } if (longCommandWArg(&argument, "speedForRatio=") || longCommandWArg(&argument, "speedRatio=")) { g_ratioMultiplier = readDoubleFromChar(&argument); if (argument[0]==',') { argument++; continue; } else break; } @@ -2600,18 +2607,12 @@ int main(int argc, const char** argv) /* Pause at the end (hidden option) */ case 'p': main_pause = 1; argument++; break; - /* Modify Nb Iterations */ - - case 'i': - argument++; - g_nbIterations = readU32FromChar(&argument); - break; /* Sample compressibility (when no file provided) */ case 'P': argument++; { U32 const proba32 = readU32FromChar(&argument); - g_compressibility = (double)proba32 / 100.; + compressibility = (double)proba32 / 100.; } break; @@ -2730,13 +2731,13 @@ int main(int argc, const char** argv) DISPLAY("Optimizer Expects File\n"); return 1; } else { - result = benchSample(); + result = benchSample(compressibility); } } else { if(seperateFiles) { for(i = 0; i < argc - filenamesStart; i++) { if (g_optimizer) { - result = optimizeForSize(argv+filenamesStart + i, 1, dictFileName, target, paramTarget, cLevelOpt, cLevelRun); + result = optimizeForSize(argv+filenamesStart + i, 1, dictFileName, target, paramTarget, cLevelOpt, cLevelRun, memoTableLog); if(result) { DISPLAY("Error on File %d", i); return result; } } else { result = benchFiles(argv+filenamesStart + i, 1, dictFileName, cLevelRun); @@ -2745,7 +2746,7 @@ int main(int argc, const char** argv) } } else { if (g_optimizer) { - result = optimizeForSize(argv+filenamesStart, argc-filenamesStart, dictFileName, target, paramTarget, cLevelOpt, cLevelRun); + result = optimizeForSize(argv+filenamesStart, argc-filenamesStart, dictFileName, target, paramTarget, cLevelOpt, cLevelRun, memoTableLog); } else { result = benchFiles(argv+filenamesStart, argc-filenamesStart, dictFileName, cLevelRun); }