From 5f49034520940af38deb5e13d9d624afc7ffe534 Mon Sep 17 00:00:00 2001 From: George Lu Date: Mon, 18 Jun 2018 11:59:45 -0700 Subject: [PATCH] Working V1 --- programs/bench.c | 95 ++-- programs/bench.h | 3 + tests/Makefile | 2 +- tests/paramgrill.c | 1256 ++++++++++++++++++++++++++++++++++++++++---- 4 files changed, 1213 insertions(+), 143 deletions(-) diff --git a/programs/bench.c b/programs/bench.c index 2fa9262c..0d35f3eb 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -85,7 +85,7 @@ static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER; * Exceptions ***************************************/ #ifndef DEBUG -# define DEBUG 0 +# define DEBUG 1 #endif #define DEBUGOUTPUT(...) { if (DEBUG) DISPLAY(__VA_ARGS__); } @@ -281,12 +281,14 @@ static size_t local_defaultDecompress( } +volatile char g_touched; + /* initFn will be measured once, bench fn will be measured x times */ /* benchFn should return error value or out Size */ /* takes # of blocks and list of size & stuff for each. */ /* only does looping */ /* note time per loop could be zero if interval too short */ -BMK_customReturn_t BMK_benchFunction( +BMK_customReturn_t __attribute__((optimize("O0"))) BMK_benchFunction( BMK_benchFn_t benchFn, void* benchPayload, BMK_initFn_t initFn, void* initPayload, size_t blockCount, @@ -299,16 +301,6 @@ BMK_customReturn_t BMK_benchFunction( BMK_customReturn_t retval; UTIL_time_t clockStart; - { - unsigned i; - for(i = 0; i < blockCount; i++) { - memset(dstBlockBuffers[i], 0xE5, dstBlockCapacities[i]); /* warm up and erase result buffer */ - } - - UTIL_sleepMilli(5); /* give processor time to other processes */ - UTIL_waitForNextTick(); - } - if(!nbLoops) { EXM_THROW_ND(1, BMK_customReturn_t, "nbLoops must be nonzero \n"); } @@ -317,6 +309,22 @@ BMK_customReturn_t BMK_benchFunction( srcSize += srcBlockSizes[ind]; } + { + unsigned i, j; + for(i = 0; i < blockCount; i++) { + for(j = 0; j < srcBlockSizes[i]; j++) { + g_touched = ((const char*)srcBlockBuffers[i])[j]; /* touch */ + } + } + for(i = 0; i < blockCount; i++) { + memset(dstBlockBuffers[i], 0xE5, dstBlockCapacities[i]); /* warm up and erase result buffer */ + //this is written at end proc? where did compressed data get overwritten ny this? + } + + UTIL_sleepMilli(5); /* give processor time to other processes */ + UTIL_waitForNextTick(); + } + { unsigned i, j, firstIter = 1; clockStart = UTIL_getTime(); @@ -327,9 +335,9 @@ BMK_customReturn_t BMK_benchFunction( if(ZSTD_isError(res)) { EXM_THROW_ND(2, BMK_customReturn_t, "Function benchmarking failed on block %u of size %u : %s \n", j, (U32)dstBlockCapacities[j], ZSTD_getErrorName(res)); - } else if(firstIter) { + } else if(firstIter) { dstSize += res; - } + } } firstIter = 0; } @@ -393,7 +401,7 @@ BMK_customTimedReturn_t BMK_benchFunctionTimed( { U64 const loopDuration = r.result.result.nanoSecPerRun * cont->nbLoops; r.completed = (cont->timeRemaining <= loopDuration); cont->timeRemaining -= loopDuration; - if (loopDuration > 0) { + if (loopDuration > (TIMELOOP_NANOSEC / 100)) { fastest = MIN(fastest, r.result.result.nanoSecPerRun); if(loopDuration >= MINUSABLETIME) { r.result.result.nanoSecPerRun = fastest; @@ -420,7 +428,7 @@ static BMK_return_t BMK_benchMemAdvancedNoAlloc( const void ** const srcPtrs, size_t* const srcSizes, void** const cPtrs, size_t* const cSizes, void** const resPtrs, size_t* const resSizes, - void* resultBuffer, void* compressedBuffer, + void** resultBufferPtr, void* compressedBuffer, const size_t maxCompressedSize, BMK_timedFnState_t* timeStateCompress, BMK_timedFnState_t* timeStateDecompress, @@ -432,7 +440,7 @@ static BMK_return_t BMK_benchMemAdvancedNoAlloc( int displayLevel, const char* displayName, const BMK_advancedParams_t* adv) { size_t const blockSize = ((adv->blockSize>=32 && (adv->mode != BMK_decodeOnly)) ? adv->blockSize : srcSize) + (!srcSize); /* avoid div by 0 */ - BMK_return_t results; + BMK_return_t results = { { 0, 0., 0., 0 }, 0 } ; size_t const loadedCompressedSize = srcSize; size_t cSize = 0; double ratio = 0.; @@ -454,13 +462,14 @@ static BMK_return_t BMK_benchMemAdvancedNoAlloc( srcPtr += fileSizes[fileNb]; } { size_t const decodedSize = (size_t)totalDSize64; - free(resultBuffer); - resultBuffer = malloc(decodedSize); - if (!resultBuffer) { + free(*resultBufferPtr); + //TODO: decodedSize corrupted? + *resultBufferPtr = malloc(decodedSize); + if (!(*resultBufferPtr)) { EXM_THROW(33, BMK_return_t, "not enough memory"); } if (totalDSize64 > decodedSize) { - free(resultBuffer); + free(*resultBufferPtr); EXM_THROW(32, BMK_return_t, "original size is too large"); /* size_t overflow */ } cSize = srcSize; @@ -472,7 +481,7 @@ static BMK_return_t BMK_benchMemAdvancedNoAlloc( /* Init data blocks */ { const char* srcPtr = (const char*)srcBuffer; char* cPtr = (char*)compressedBuffer; - char* resPtr = (char*)resultBuffer; + char* resPtr = (char*)(*resultBufferPtr); U32 fileNb; for (nbBlocks=0, fileNb=0; fileNbmode != BMK_decodeOnly) { + BMK_customReturn_t compressionResults = BMK_benchFunction(&local_defaultCompress, (void*)ctx, &local_initCCtx, (void*)&cctxprep, - nbBlocks, srcPtrs, srcSizes, cPtrs, cSizes, adv->nbSeconds); + nbBlocks, srcPtrs, srcSizes, cPtrs, cSizes, adv->nbSeconds); if(compressionResults.error) { results.error = compressionResults.error; return results; @@ -642,7 +652,8 @@ static BMK_return_t BMK_benchMemAdvancedNoAlloc( } /* CRC Checking */ - { U64 const crcCheck = XXH64(resultBuffer, srcSize, 0); + { void* resultBuffer = *resultBufferPtr; + U64 const crcCheck = XXH64(resultBuffer, srcSize, 0); /* adv->mode == 0 -> compress + decompress */ if ((adv->mode == BMK_both) && (crcOrig!=crcCheck)) { size_t u; @@ -692,11 +703,13 @@ static BMK_return_t BMK_benchMemAdvancedNoAlloc( } DISPLAYLEVEL(2, "%2i#\n", cLevel); } /* Bench */ - results.result.cMem = ZSTD_sizeof_CCtx(ctx); + results.result.cMem = (1 << (comprParams->windowLog)) + ZSTD_sizeof_CCtx(ctx); + results.error = 0; return results; } BMK_return_t BMK_benchMemAdvanced(const void* srcBuffer, size_t srcSize, + void* dstBuffer, size_t dstCapacity, const size_t* fileSizes, unsigned nbFiles, const int cLevel, const ZSTD_compressionParameters* comprParams, const void* dictBuffer, size_t dictBufferSize, @@ -717,26 +730,41 @@ BMK_return_t BMK_benchMemAdvanced(const void* srcBuffer, size_t srcSize, void ** const resPtrs = (void** const)malloc(maxNbBlocks * sizeof(void*)); size_t* const resSizes = (size_t* const)malloc(maxNbBlocks * sizeof(size_t)); - const size_t maxCompressedSize = ZSTD_compressBound(srcSize) + (maxNbBlocks * 1024); /* add some room for safety */ - void* compressedBuffer = malloc(maxCompressedSize); - void* resultBuffer = malloc(srcSize); BMK_timedFnState_t* timeStateCompress = BMK_createTimeState(adv->nbSeconds); BMK_timedFnState_t* timeStateDecompress = BMK_createTimeState(adv->nbSeconds); + void* compressedBuffer; + const size_t maxCompressedSize = dstCapacity ? dstCapacity : ZSTD_compressBound(srcSize) + (maxNbBlocks * 1024); + void* resultBuffer = malloc(srcSize); + + BMK_return_t results; - int allocationincomplete = !compressedBuffer || !resultBuffer || + int allocationincomplete; + + if(!dstCapacity) { + compressedBuffer = malloc(maxCompressedSize); + } else { + compressedBuffer = dstBuffer; + } + + allocationincomplete = !compressedBuffer || !resultBuffer || !srcPtrs || !srcSizes || !cPtrs || !cSizes || !resPtrs || !resSizes; + if (!allocationincomplete) { results = BMK_benchMemAdvancedNoAlloc(srcPtrs, srcSizes, cPtrs, cSizes, - resPtrs, resSizes, resultBuffer, compressedBuffer, maxCompressedSize, timeStateCompress, timeStateDecompress, + resPtrs, resSizes, &resultBuffer, compressedBuffer, maxCompressedSize, timeStateCompress, timeStateDecompress, srcBuffer, srcSize, fileSizes, nbFiles, cLevel, comprParams, dictBuffer, dictBufferSize, ctx, dctx, displayLevel, displayName, adv); } + + /* clean up */ BMK_freeTimeState(timeStateCompress); BMK_freeTimeState(timeStateDecompress); - free(compressedBuffer); + if(!dstCapacity) { /* only free if not given */ + free(compressedBuffer); + } free(resultBuffer); free((void*)srcPtrs); @@ -749,7 +777,6 @@ BMK_return_t BMK_benchMemAdvanced(const void* srcBuffer, size_t srcSize, if(allocationincomplete) { EXM_THROW(31, BMK_return_t, "allocation error : not enough memory"); } - results.error = 0; return results; } @@ -762,6 +789,7 @@ BMK_return_t BMK_benchMem(const void* srcBuffer, size_t srcSize, const BMK_advancedParams_t adv = BMK_initAdvancedParams(); return BMK_benchMemAdvanced(srcBuffer, srcSize, + NULL, 0, fileSizes, nbFiles, cLevel, comprParams, dictBuffer, dictBufferSize, @@ -783,6 +811,7 @@ static BMK_return_t BMK_benchMemCtxless(const void* srcBuffer, size_t srcSize, EXM_THROW(12, BMK_return_t, "not enough memory for contexts"); } res = BMK_benchMemAdvanced(srcBuffer, srcSize, + NULL, 0, fileSizes, nbFiles, cLevel, comprParams, dictBuffer, dictBufferSize, diff --git a/programs/bench.h b/programs/bench.h index f1ac255f..625b6575 100644 --- a/programs/bench.h +++ b/programs/bench.h @@ -122,6 +122,8 @@ BMK_return_t BMK_syntheticTest(int cLevel, double compressibility, * (cLevel, comprParams + adv in advanced Mode) */ /* srcBuffer - data source, expected to be valid compressed data if in Decode Only Mode * srcSize - size of data in srcBuffer + * dstBuffer - destination buffer to write compressed output in, optional (NULL) + * dstCapacity - capacity of destination buffer, give 0 if dstBuffer = NULL * cLevel - compression level * comprParams - basic compression parameters * dictBuffer - a dictionary if used, null otherwise @@ -144,6 +146,7 @@ BMK_return_t BMK_benchMem(const void* srcBuffer, size_t srcSize, /* See benchMem for normal parameter uses and return, see advancedParams_t for adv */ BMK_return_t BMK_benchMemAdvanced(const void* srcBuffer, size_t srcSize, + void* dstBuffer, size_t dstCapacity, const size_t* fileSizes, unsigned nbFiles, const int cLevel, const ZSTD_compressionParameters* comprParams, const void* dictBuffer, size_t dictBufferSize, diff --git a/tests/Makefile b/tests/Makefile index 81e68578..ecf7fe2a 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -200,7 +200,7 @@ zstreamtest-dll : $(ZSTDDIR)/common/xxhash.c # xxh symbols not exposed from dll zstreamtest-dll : $(ZSTREAM_LOCAL_FILES) $(CC) $(CPPFLAGS) $(CFLAGS) $(filter %.c,$^) $(LDFLAGS) -o $@$(EXT) -paramgrill : DEBUGFLAGS = # turn off assert() for speed measurements +#paramgrill : DEBUGFLAGS = # turn off assert() for speed measurements paramgrill : $(ZSTD_FILES) $(PRGDIR)/bench.c $(PRGDIR)/datagen.c paramgrill.c $(CC) $(FLAGS) $^ -lm -o $@$(EXT) diff --git a/tests/paramgrill.c b/tests/paramgrill.c index 81c6dffc..44bcedef 100644 --- a/tests/paramgrill.c +++ b/tests/paramgrill.c @@ -41,6 +41,8 @@ #define MB *(1<<20) #define GB *(1ULL<<30) +#define TIMELOOP_NANOSEC (1*1000000000ULL) /* 1 second */ + #define NBLOOPS 2 #define TIMELOOP (2 * SEC_TO_MICRO) #define NB_LEVELS_TRACKED 22 /* ensured being >= ZSTD_maxCLevel() in BMK_init_level_constraints() */ @@ -70,13 +72,39 @@ static const int g_maxNbVariations = 64; #define SLOG_IND 3 #define SLEN_IND 4 #define TLEN_IND 5 -#define STRT_IND 6 -#define NUM_PARAMS 7 +//#define STRT_IND 6 +//#define NUM_PARAMS 7 +#define NUM_PARAMS 6 +//just don't use strategy as a param. +#undef ZSTD_WINDOWLOG_MAX +#define ZSTD_WINDOWLOG_MAX 27 //no long range stuff for now. + +//make 2^[0,10] w/ 999 +#define ZSTD_TARGETLENGTH_MIN 0 //actually targeLengthlog min +#define ZSTD_TARGETLENGTH_MAX 10 + +//#define ZSTD_TARGETLENGTH_MAX 1024 +#define WLOG_RANGE (ZSTD_WINDOWLOG_MAX - ZSTD_WINDOWLOG_MIN + 1) +#define CLOG_RANGE (ZSTD_CHAINLOG_MAX - ZSTD_CHAINLOG_MIN + 1) +#define HLOG_RANGE (ZSTD_HASHLOG_MAX - ZSTD_HASHLOG_MIN + 1) +#define SLOG_RANGE (ZSTD_SEARCHLOG_MAX - ZSTD_SEARCHLOG_MIN + 1) +#define SLEN_RANGE (ZSTD_SEARCHLENGTH_MAX - ZSTD_SEARCHLENGTH_MIN + 1) +#define TLEN_RANGE 11 +//hard coded since we only use powers of 2 (and 999 ~ 1024) + +static const int mintable[NUM_PARAMS] = { ZSTD_WINDOWLOG_MIN, ZSTD_CHAINLOG_MIN, ZSTD_HASHLOG_MIN, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLENGTH_MIN, ZSTD_TARGETLENGTH_MIN }; +static const int maxtable[NUM_PARAMS] = { ZSTD_WINDOWLOG_MAX, ZSTD_CHAINLOG_MAX, ZSTD_HASHLOG_MAX, ZSTD_SEARCHLOG_MAX, ZSTD_SEARCHLENGTH_MAX, ZSTD_TARGETLENGTH_MAX }; +static const int rangetable[NUM_PARAMS] = { WLOG_RANGE, CLOG_RANGE, HLOG_RANGE, SLOG_RANGE, SLEN_RANGE, TLEN_RANGE }; + +//use grid-search or something when space is small enough? +#define SMALL_SEARCH_SPACE 1000 /*-************************************ * Benchmark Parameters **************************************/ +typedef BYTE U8; + static double g_grillDuration_s = 99999; /* about 27 hours */ static U32 g_nbIterations = NBLOOPS; static double g_compressibility = COMPRESSIBILITY_DEFAULT; @@ -150,13 +178,74 @@ static unsigned longCommandWArg(const char** stringPtr, const char* longCommand) return result; } +//assume that clock can at least measure .01 second intervals? +//make this a settable global initialized with fn? +//#define CLOCK_GRANULARITY 100000000ULL +static U64 g_clockGranularity = 100000000ULL; + +static void findClockGranularity(void) { + UTIL_time_t clockStart = UTIL_getTime(); + U64 el1 = 0, el2 = 0; + int i = 0; + do { + el1 = el2; + el2 = UTIL_clockSpanNano(clockStart); + if(el1 < el2) { + U64 iv = el2 - el1; + if(g_clockGranularity > iv) { + g_clockGranularity = iv; + i = 0; + } else { + i++; + } + } + } while(i < 10); + DISPLAY("Granularity: %llu\n", (unsigned long long)g_clockGranularity); +} typedef struct { U32 cSpeed; /* bytes / sec */ U32 dSpeed; - U32 Mem; /* bytes */ + U32 cMem; /* bytes */ } constraint_t; +#define CLAMPCHECK(val,min,max) { \ + if (val && (((val)<(min)) | ((val)>(max)))) { \ + DISPLAY("INVALID PARAMETER CONSTRAINTS\n"); \ + return 0; \ +} } + + +/* Like ZSTD_checkCParams() but allows 0's */ +/* no check on targetLen? */ +static int cParamValid(ZSTD_compressionParameters paramTarget) { + CLAMPCHECK(paramTarget.hashLog, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX); + CLAMPCHECK(paramTarget.searchLog, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX); + CLAMPCHECK(paramTarget.searchLength, ZSTD_SEARCHLENGTH_MIN, ZSTD_SEARCHLENGTH_MAX); + CLAMPCHECK(paramTarget.windowLog, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX); + CLAMPCHECK(paramTarget.chainLog, ZSTD_CHAINLOG_MIN, ZSTD_CHAINLOG_MAX); + if(paramTarget.strategy > ZSTD_btultra) { + DISPLAY("INVALID PARAMETER CONSTRAINTS\n"); + return 0; + } + return 1; +} + +static void cParamZeroMin(ZSTD_compressionParameters* paramTarget) { + paramTarget->windowLog = paramTarget->windowLog ? paramTarget->windowLog : ZSTD_WINDOWLOG_MIN; + paramTarget->searchLog = paramTarget->searchLog ? paramTarget->searchLog : ZSTD_SEARCHLOG_MIN; + paramTarget->chainLog = paramTarget->chainLog ? paramTarget->chainLog : ZSTD_CHAINLOG_MIN; + paramTarget->hashLog = paramTarget->hashLog ? paramTarget->hashLog : ZSTD_HASHLOG_MIN; + paramTarget->searchLength = paramTarget->searchLength ? paramTarget->searchLength : ZSTD_SEARCHLENGTH_MIN; + paramTarget->targetLength = paramTarget->targetLength ? paramTarget->targetLength : 1; +} + +static void BMK_translateAdvancedParams(ZSTD_compressionParameters params) +{ + DISPLAY("--zstd=windowLog=%u,chainLog=%u,hashLog=%u,searchLog=%u,searchLength=%u,targetLength=%u,strategy=%u \n", + params.windowLog, params.chainLog, params.hashLog, params.searchLog, params.searchLength, params.targetLength, (U32)(params.strategy)); +} + /*-******************************************************* * Bench functions *********************************************************/ @@ -357,20 +446,45 @@ static int BMK_seed(winnerInfo_t* winners, const ZSTD_compressionParameters para return better; } +/* bounds check in sanitize too? */ +#define CLAMP(var, lo, hi) { \ + var = MAX(MIN(var, hi), lo); \ +} + /* nullified useless params, to ensure count stats */ -static ZSTD_compressionParameters* sanitizeParams(ZSTD_compressionParameters params) +/* no point in windowLog < chainLog (no point 2x chainLog for bt) */ +/* now with built in bounds-checking */ +/* no longer does anything with sanitizeVarArray + clampcheck */ +static ZSTD_compressionParameters sanitizeParams(ZSTD_compressionParameters params) { - g_params = params; if (params.strategy == ZSTD_fast) g_params.chainLog = 0, g_params.searchLog = 0; if (params.strategy == ZSTD_dfast) g_params.searchLog = 0; - if (params.strategy != ZSTD_btopt && params.strategy != ZSTD_btultra) + if (params.strategy != ZSTD_btopt && params.strategy != ZSTD_btultra && params.strategy != ZSTD_fast) g_params.targetLength = 0; - return &g_params; + + return params; +} + +/* new length */ +/* keep old array, will need if iter over strategy. */ +static int sanitizeVarArray(int varLength, U32* varArray, U32* varNew, ZSTD_strategy strat) { + int i, j = 0; + for(i = 0; i < varLength; i++) { + if( !((varArray[i] == CLOG_IND && strat == ZSTD_fast) + || (varArray[i] == SLOG_IND && strat == ZSTD_dfast) + || (varArray[i] == TLEN_IND && strat != ZSTD_btopt && strat != ZSTD_btultra && strat != ZSTD_fast))) { + varNew[j] = varArray[i]; + j++; + } + } + return j; + } /* res should be NUM_PARAMS size */ +/* constructs varArray from ZSTD_compressionParameters style parameter */ static int variableParams(const ZSTD_compressionParameters paramConstraints, U32* res) { int j = 0; if(!paramConstraints.windowLog) { @@ -397,10 +511,6 @@ static int variableParams(const ZSTD_compressionParameters paramConstraints, U32 res[j] = TLEN_IND; j++; } - if(!(U32)paramConstraints.strategy) { - res[j] = STRT_IND; - j++; - } return j; } @@ -417,43 +527,39 @@ static int inverseVariableParams(const ZSTD_compressionParameters paramConstrain res[j] = CLOG_IND; j++; } else { - res[WLOG_IND] = -1; + res[CLOG_IND] = -1; } if(!paramConstraints.hashLog) { res[j] = HLOG_IND; j++; } else { - res[WLOG_IND] = -1; + res[HLOG_IND] = -1; } if(!paramConstraints.searchLog) { res[j] = SLOG_IND; j++; } else { - res[WLOG_IND] = -1; + res[SLOG_IND] = -1; } if(!paramConstraints.searchLength) { res[j] = SLEN_IND; j++; } else { - res[WLOG_IND] = -1; + res[SLEN_IND] = -1; } if(!paramConstraints.targetLength) { res[j] = TLEN_IND; j++; } else { - res[WLOG_IND] = -1; - } - if(!(U32)paramConstraints.strategy) { - res[j] = STRT_IND; - j++; - } else { - res[WLOG_IND] = -1; + res[TLEN_IND] = -1; } + return j; } /* amt will probably always be \pm 1? */ /* slight change from old paramVariation, targetLength can only take on powers of 2 now (999 ~= 1024?) */ +/* take max/min bounds into account as well? */ static void paramVaryOnce(U32 paramIndex, int amt, ZSTD_compressionParameters* ptr) { switch(paramIndex) { @@ -463,13 +569,16 @@ static void paramVaryOnce(U32 paramIndex, int amt, ZSTD_compressionParameters* p case SLOG_IND: ptr->searchLog += amt; break; case SLEN_IND: ptr->searchLength += amt; break; case TLEN_IND: - if(amt > 0) { + if(amt >= 0) { ptr->targetLength <<= amt; + ptr->targetLength = MIN(ptr->targetLength, 999); } else { + if(ptr->targetLength == 999) { + ptr->targetLength = 1024; + } ptr->targetLength >>= -amt; } break; - case STRT_IND: ptr->strategy += amt; break; default: break; } } @@ -477,34 +586,143 @@ static void paramVaryOnce(U32 paramIndex, int amt, ZSTD_compressionParameters* p //Don't fuzz fixed variables. //turn pcs to pcs array with macro for params. //pass in variation array from variableParams -static void paramVariation(ZSTD_compressionParameters* ptr, const U32* varyParams, const int varyLen) +//take nbChanges as argument? +static void paramVariation(ZSTD_compressionParameters* ptr, const U32* varyParams, const int varyLen, U32 nbChanges) { ZSTD_compressionParameters p; U32 validated = 0; while (!validated) { - U32 nbChanges = (FUZ_rand(&g_rand) & 3) + 1; + U32 i; p = *ptr; - for ( ; nbChanges ; nbChanges--) { - const U32 changeID = FUZ_rand(&g_rand) % (2 * varyLen); + for (i = 0 ; i < nbChanges ; i++) { + const U32 changeID = FUZ_rand(&g_rand) % (varyLen << 1); paramVaryOnce(varyParams[changeID >> 1], ((changeID & 1) << 1) - 1, &p); } - validated = !ZSTD_isError(ZSTD_checkCParams(p)); - + //validated = !ZSTD_isError(ZSTD_checkCParams(p)); + validated = cParamValid(p); + //Make sure memory is at least close to feasible? //ZSTD_estimateCCtxSize thing. } - *ptr = p; + *ptr = sanitizeParams(p); } +//varyParams gives us table size? +//1 per strategy +//varyParams should always be sorted smallest to largest +//take arrayLen to allocate memotable +//should be ~10^7 unconstrained. +static size_t memoTableLen(const U32* varyParams, const int varyLen) { + size_t arrayLen = 1; + int i; + for(i = 0; i < varyLen; i++) { + arrayLen *= rangetable[varyParams[i]]; + } + return arrayLen; +} + +//sort of ~lg2 (replace 1024 w/ 999) for memoTableInd Tlen +static unsigned lg2(unsigned x) { + unsigned j = 0; + if(x == 999) { + return 10; + } + while(x >>= 1) { + j++; + } + return j; +} + +//indexes compressionParameters into memotable +//of form +static unsigned memoTableInd(const ZSTD_compressionParameters* ptr, const U32* varyParams, const int varyLen) { + int i; + unsigned ind = 0; + for(i = 0; i < varyLen; i++) { + switch(varyParams[i]) { + case WLOG_IND: ind *= WLOG_RANGE; ind += ptr->windowLog - ZSTD_WINDOWLOG_MIN ; break; + case CLOG_IND: ind *= CLOG_RANGE; ind += ptr->chainLog - ZSTD_CHAINLOG_MIN ; break; + case HLOG_IND: ind *= HLOG_RANGE; ind += ptr->hashLog - ZSTD_HASHLOG_MIN ; break; + case SLOG_IND: ind *= SLOG_RANGE; ind += ptr->searchLog - ZSTD_SEARCHLOG_MIN ; break; + case SLEN_IND: ind *= SLEN_RANGE; ind += ptr->searchLength - ZSTD_SEARCHLENGTH_MIN; break; + case TLEN_IND: ind *= TLEN_RANGE; ind += lg2(ptr->targetLength) - ZSTD_TARGETLENGTH_MIN; break; + } + } + return ind; +} + +/* presumably, the unfilled parameters are already at their correct value */ +/* inverse above function for varyParams */ +static void memoTableIndInv(ZSTD_compressionParameters* ptr, const U32* varyParams, const int varyLen, size_t ind) { + int i; + for(i = varyLen - 1; i >= 0; i--) { + switch(varyParams[i]) { + case WLOG_IND: ptr->windowLog = ind % WLOG_RANGE + ZSTD_WINDOWLOG_MIN; ind /= WLOG_RANGE; break; + case CLOG_IND: ptr->chainLog = ind % CLOG_RANGE + ZSTD_CHAINLOG_MIN; ind /= CLOG_RANGE; break; + case HLOG_IND: ptr->hashLog = ind % HLOG_RANGE + ZSTD_HASHLOG_MIN; ind /= HLOG_RANGE; break; + case SLOG_IND: ptr->searchLog = ind % SLOG_RANGE + ZSTD_SEARCHLOG_MIN; ind /= SLOG_RANGE; break; + case SLEN_IND: ptr->searchLength = ind % SLEN_RANGE + ZSTD_SEARCHLENGTH_MIN; ind /= SLEN_RANGE; break; + case TLEN_IND: ptr->targetLength = MIN(1 << (ind % TLEN_RANGE), 999); ind /= TLEN_RANGE; break; + } + } +} + +//initializing memoTable +/* */ +static void memoTableInit(U8* memoTable, ZSTD_compressionParameters paramConstraints, constraint_t target, const U32* varyParams, const int varyLen) { + size_t i; + size_t arrayLen = memoTableLen(varyParams, varyLen); + int cwFixed = !paramConstraints.chainLog || !paramConstraints.windowLog; + int scFixed = !paramConstraints.searchLog || !paramConstraints.chainLog; + int j = 0; + memset(memoTable, 0, arrayLen); + + + for(i = 0; i < arrayLen; i++) { + memoTableIndInv(¶mConstraints, varyParams, varyLen, i); + BMK_translateAdvancedParams(paramConstraints); + if(ZSTD_estimateCCtxSize_usingCParams(paramConstraints) + (1 << paramConstraints.windowLog) > target.cMem) { + //infeasible; + memoTable[i] = 255; + j++; + } + /* nil out parameter sets equivalent to others. */ + if(cwFixed/* at most least 1 param fixed. */) { + if(paramConstraints.strategy == ZSTD_btlazy2 || paramConstraints.strategy == ZSTD_btopt || paramConstraints.strategy == ZSTD_btultra) { + if(paramConstraints.chainLog > paramConstraints.windowLog + 1) { + if(memoTable[i] != 255) { j++; } + memoTable[i] = 255; + } + } else { + if(paramConstraints.chainLog > paramConstraints.windowLog) { + if(memoTable[i] != 255) { j++; } + memoTable[i] = 255; + } + } + } + if(scFixed) { + if(paramConstraints.searchLog > paramConstraints.chainLog) { + if(memoTable[i] != 255) { j++; } + memoTable[i] = 255; + } + } + } + DISPLAY("%d / %d Invalid\n", j, (int)i); +} #define PARAMTABLELOG 25 #define PARAMTABLESIZE (1<> 3) & PARAMTABLEMASK] + g_alreadyTested[(XXH64(((void*)&sanitizeParams(p), sizeof(p), 0) >> 3) & PARAMTABLEMASK] */ +static BYTE* NB_TESTS_PLAYED(ZSTD_compressionParameters p) { + ZSTD_compressionParameters p2 = sanitizeParams(p); + return &g_alreadyTested[(XXH64((void*)&p2, sizeof(p2), 0) >> 3) & PARAMTABLEMASK]; +} static void playAround(FILE* f, winnerInfo_t* winners, ZSTD_compressionParameters params, @@ -513,21 +731,23 @@ static void playAround(FILE* f, winnerInfo_t* winners, { int nbVariations = 0; UTIL_time_t const clockStart = UTIL_getTime(); - const U32 unconstrained[NUM_PARAMS] = { 0, 1, 2, 3, 4, 5, 6 }; + const U32 unconstrained[NUM_PARAMS] = { 0, 1, 2, 3, 4, 5 }; while (UTIL_clockSpanMicro(clockStart) < g_maxVariationTime) { ZSTD_compressionParameters p = params; + BYTE* b; if (nbVariations++ > g_maxNbVariations) break; - paramVariation(&p, unconstrained, 7); + paramVariation(&p, unconstrained, 7, 4); /* exclude faster if already played params */ - if (FUZ_rand(&g_rand) & ((1 << NB_TESTS_PLAYED(p))-1)) + if (FUZ_rand(&g_rand) & ((1 << *NB_TESTS_PLAYED(p))-1)) continue; /* test */ - NB_TESTS_PLAYED(p)++; + b = NB_TESTS_PLAYED(p); + (*b)++; if (!BMK_seed(winners, p, srcBuffer, srcSize, ctx, dctx)) continue; /* improvement found => search more */ @@ -544,34 +764,37 @@ static ZSTD_compressionParameters randomParams(void) U32 validated = 0; while (!validated) { /* totally random entry */ - p.chainLog = (FUZ_rand(&g_rand) % (ZSTD_CHAINLOG_MAX+1 - ZSTD_CHAINLOG_MIN)) + ZSTD_CHAINLOG_MIN; - p.hashLog = (FUZ_rand(&g_rand) % (ZSTD_HASHLOG_MAX+1 - ZSTD_HASHLOG_MIN)) + ZSTD_HASHLOG_MIN; - p.searchLog = (FUZ_rand(&g_rand) % (ZSTD_SEARCHLOG_MAX+1 - ZSTD_SEARCHLOG_MIN)) + ZSTD_SEARCHLOG_MIN; - p.windowLog = (FUZ_rand(&g_rand) % (ZSTD_WINDOWLOG_MAX+1 - ZSTD_WINDOWLOG_MIN)) + ZSTD_WINDOWLOG_MIN; + p.chainLog = (FUZ_rand(&g_rand) % (ZSTD_CHAINLOG_MAX+1 - ZSTD_CHAINLOG_MIN)) + ZSTD_CHAINLOG_MIN; + p.hashLog = (FUZ_rand(&g_rand) % (ZSTD_HASHLOG_MAX+1 - ZSTD_HASHLOG_MIN)) + ZSTD_HASHLOG_MIN; + p.searchLog = (FUZ_rand(&g_rand) % (ZSTD_SEARCHLOG_MAX+1 - ZSTD_SEARCHLOG_MIN)) + ZSTD_SEARCHLOG_MIN; + p.windowLog = (FUZ_rand(&g_rand) % (ZSTD_WINDOWLOG_MAX+1 - ZSTD_WINDOWLOG_MIN)) + ZSTD_WINDOWLOG_MIN; p.searchLength=(FUZ_rand(&g_rand) % (ZSTD_SEARCHLENGTH_MAX+1 - ZSTD_SEARCHLENGTH_MIN)) + ZSTD_SEARCHLENGTH_MIN; p.targetLength=(FUZ_rand(&g_rand) % (512)); p.strategy = (ZSTD_strategy) (FUZ_rand(&g_rand) % (ZSTD_btultra +1)); - validated = !ZSTD_isError(ZSTD_checkCParams(p)); + //validated = !ZSTD_isError(ZSTD_checkCParams(p)); + validated = cParamValid(p); } return p; } -static ZSTD_compressionParameters randomConstrainedParams(ZSTD_compressionParameters pc) +//destructively modifies pc. +//Maybe if memoTable[ind] > 0 too often, count zeroes and explicitly choose from free stuff? +//^ maybe this doesn't matter, with |mt| size it has \approx 1-(1/e) of finding even single free spot in |mt| tries, not too bad. +//TODO: maybe memoTable pc before sanitization too so no repeats? +static void randomConstrainedParams(ZSTD_compressionParameters* pc, U32* varArray, int varLen, U8* memoTable) { - ZSTD_compressionParameters p; - U32 validated = 0; - while (!validated) { - /* totally random entry */ - if(!pc.chainLog) p.chainLog = (FUZ_rand(&g_rand) % (ZSTD_CHAINLOG_MAX+1 - ZSTD_CHAINLOG_MIN)) + ZSTD_CHAINLOG_MIN; - if(!pc.chainLog) p.hashLog = (FUZ_rand(&g_rand) % (ZSTD_HASHLOG_MAX+1 - ZSTD_HASHLOG_MIN)) + ZSTD_HASHLOG_MIN; - if(!pc.chainLog) p.searchLog = (FUZ_rand(&g_rand) % (ZSTD_SEARCHLOG_MAX+1 - ZSTD_SEARCHLOG_MIN)) + ZSTD_SEARCHLOG_MIN; - if(!pc.chainLog) p.windowLog = (FUZ_rand(&g_rand) % (ZSTD_WINDOWLOG_MAX+1 - ZSTD_WINDOWLOG_MIN)) + ZSTD_WINDOWLOG_MIN; - if(!pc.chainLog) p.searchLength=(FUZ_rand(&g_rand) % (ZSTD_SEARCHLENGTH_MAX+1 - ZSTD_SEARCHLENGTH_MIN)) + ZSTD_SEARCHLENGTH_MIN; - if(!pc.chainLog) p.targetLength=(FUZ_rand(&g_rand) % (512)) + 1; //ZSTD_TARGETLENGTH_MIN; //change to 2^[0,10?] - if(!pc.chainLog) p.strategy = (ZSTD_strategy) (FUZ_rand(&g_rand) % (ZSTD_btultra +1)); - validated = !ZSTD_isError(ZSTD_checkCParams(p)); - } - return p; + int tries = memoTableLen(varArray, varLen); //configurable, + const size_t maxSize = memoTableLen(varArray, varLen); + size_t ind; + do { + ind = (FUZ_rand(&g_rand)) % maxSize; + tries--; + } while(memoTable[ind] > 0 && tries > 0); + //&& FUZ_rand(&g_rand) % 256 > memoTable[ind]); get nd choosing? (helpful w/ distance) /* maybe > infeasible bound? */ + + /* memoTable[ind] == 0 -> unexplored */ + memoTableIndInv(pc, varArray, varLen, (unsigned)ind); + *pc = sanitizeParams(*pc); } static void BMK_selectRandomStart( @@ -746,22 +969,80 @@ int benchFiles(const char** fileNamesTable, int nbFiles) return 0; } - -static void BMK_translateAdvancedParams(ZSTD_compressionParameters params) -{ - DISPLAY("--zstd=windowLog=%u,chainLog=%u,hashLog=%u,searchLog=%u,searchLength=%u,targetLength=%u,strategy=%u \n", - params.windowLog, params.chainLog, params.hashLog, params.searchLog, params.searchLength, params.targetLength, (U32)(params.strategy)); -} - -//Results currently don't capture memory usage or anything. //parameter feasibility is not checked, should just be restricted from use. static int feasible(BMK_result_t results, constraint_t target) { - return (results.cSpeed >= target.cSpeed) && (results.dSpeed >= target.dSpeed) && (results.cMem <= target.Mem || !target.Mem); + return (results.cSpeed >= target.cSpeed) && (results.dSpeed >= target.dSpeed) && (results.cMem <= target.cMem || !target.cMem); +} + +#define EPSILON 0.01 +static int epsilonEqual(double c1, double c2) { + return MAX(c1/c2,c2/c1) < 1 + EPSILON; +} + +//so the compiler stops warning +static int eqZero(double c1) { + return (U64)c1 == (U64)0.0 || (U64)c1 == (U64)-0.0; } /* returns 1 if result2 is strictly 'better' than result1 */ +/* strict comparison / cutoff based */ static int objective_lt(BMK_result_t result1, BMK_result_t result2) { - return (result1.cSize > result2.cSize) || (result1.cSize == result2.cSize && result2.cSpeed > result1.cSpeed); + return (result1.cSize > result2.cSize) || (epsilonEqual(result1.cSize, result2.cSize) && result2.cSpeed > result1.cSpeed) + || (epsilonEqual(result1.cSize,result2.cSize) && epsilonEqual(result2.cSpeed, result1.cSpeed) && result2.dSpeed > result1.dSpeed); +} + +//will probably be some linear combinartion of comp speed, decompSpeed, & ratio (maybe size), and memory? +//pretty arbitrary right now +//maybe better - higher coefficient when below threshold, lower when above +//need to normalize speed? or just use ratio speed / target? +//Maybe don't use ratio at all when looking for feasibility? + +/* maybe dynamically vary the coefficients for this around based on what's already been discovered. (maybe make a reversed ratio cutoff?) concave to pheaily penalize below ratio? */ +static double resultScore(BMK_result_t res, size_t srcSize, constraint_t target) { + double cs = 0., ds = 0., rt, cm = 0.; + const double r1 = 1, r2 = 0.1, rtr = 0.5; + double ret; + if(target.cSpeed) { cs = res.cSpeed / (double)target.cSpeed; } + if(target.dSpeed) { ds = res.dSpeed / (double)target.dSpeed; } + if(target.cMem != (U32)-1) { cm = (double)target.cMem / res.cMem; } + rt = ((double)srcSize / res.cSize); + + //(void)rt; + //(void)rtr; + + ret = (MIN(1, cs) + MIN(1, ds) + MIN(1, cm))*r1 + rt * rtr + + (MAX(0, log(cs))+ MAX(0, log(ds))+ MAX(0, log(cm))) * r2; + //DISPLAY("resultScore: %f\n", ret); + return ret; +} + +/* + double W_ratio = (double)srcSize / testResult.cSize; + double O_ratio = (double)srcSize / winners[cLevel].result.cSize; + double W_ratioNote = log (W_ratio); + double O_ratioNote = log (O_ratio); + size_t W_DMemUsed = (1 << params.windowLog) + (16 KB); + size_t O_DMemUsed = (1 << winners[cLevel].params.windowLog) + (16 KB); + double W_DMemUsed_note = W_ratioNote * ( 40 + 9*cLevel) - log((double)W_DMemUsed); + double O_DMemUsed_note = O_ratioNote * ( 40 + 9*cLevel) - log((double)O_DMemUsed); + + size_t W_CMemUsed = (1 << params.windowLog) + ZSTD_estimateCCtxSize_usingCParams(params); + size_t O_CMemUsed = (1 << winners[cLevel].params.windowLog) + ZSTD_estimateCCtxSize_usingCParams(winners[cLevel].params); + double W_CMemUsed_note = W_ratioNote * ( 50 + 13*cLevel) - log((double)W_CMemUsed); + double O_CMemUsed_note = O_ratioNote * ( 50 + 13*cLevel) - log((double)O_CMemUsed); + + double W_CSpeed_note = W_ratioNote * ( 30 + 10*cLevel) + log(testResult.cSpeed); + double O_CSpeed_note = O_ratioNote * ( 30 + 10*cLevel) + log(winners[cLevel].result.cSpeed); + + double W_DSpeed_note = W_ratioNote * ( 20 + 2*cLevel) + log(testResult.dSpeed); + double O_DSpeed_note = O_ratioNote * ( 20 + 2*cLevel) + log(winners[cLevel].result.dSpeed); + +*/ +//ratio tradeoffs, may be useful in guiding + +/* objective_lt, but based on scoring function */ +static int objective_lt2(BMK_result_t result1, BMK_result_t result2, size_t srcSize, constraint_t target) { + return resultScore(result1, srcSize, target) < resultScore(result2, srcSize, target); } /* res gives array dimensions, should be size NUM_PARAMS */ @@ -783,48 +1064,800 @@ static size_t computeStateSize(const ZSTD_compressionParameters paramConstraints static unsigned calcViolation(BMK_result_t results, constraint_t target) { int diffcSpeed = MAX(target.cSpeed - results.cSpeed, 0); int diffdSpeed = MAX(target.dSpeed - results.dSpeed, 0); - int diffcMem = MAX(results.cMem - target.Mem, 0); + int diffcMem = MAX(results.cMem - target.cMem, 0); return diffcSpeed + diffdSpeed + diffcMem; } -/* finds some set of parameters which fulfills req's - * Prioritize highest / try to locally minimize sum? - * Is it ever useful to go out of the param constraints? ? - * random / perturb when revisit? - * momentum? - */ -static ZSTD_compressionParameters findFeasible(constraint_t target, ZSTD_compressionParameters paramTarget) { - unsigned violation; - - ZSTD_compressionParameters winner = randomConstrainedParams(paramTarget); - //just use g_alreadyTested and xxhash? - BYTE* memotable; - do { - //prioritize memory - if(diffcMem >= diffcSpeed && diffcMem >= diffdSpeed) { - - //prioritize compression Speed - } else if (diffcSpeeed >= diffdSpeed && diffcSpeed >= diffcMem) { - - //prioritize decompressionSpeed - } else { - - } - violation = calcViolation(result, target); - } while(objective); - if(validate) { - DISPLAY("Feasible Point Found\n"); - return winner; +/* +uncertaintyConstant >= 1 +returns -1 = 'certainly' infeasible + 0 = unceratin + 1 = 'certainly' feasible +*/ +//paramTarget misnamed, should just be target +static int uncertainFeasibility(double const uncertaintyConstantC, double const uncertaintyConstantD, const constraint_t paramTarget, const BMK_result_t* const results) { + if((paramTarget.cSpeed != 0 && results->cSpeed * uncertaintyConstantC < paramTarget.cSpeed) || + (paramTarget.dSpeed != 0 && results->dSpeed * uncertaintyConstantD < paramTarget.dSpeed) || + (paramTarget.cMem != 0 && results->cMem > paramTarget.cMem)) { + return -1; + } else if((paramTarget.cSpeed == 0 || results->cSpeed / uncertaintyConstantC > paramTarget.cSpeed) && + (paramTarget.dSpeed == 0 || results->dSpeed / uncertaintyConstantD > paramTarget.dSpeed) && + (paramTarget.cMem == 0 || results->cMem <= paramTarget.cMem)) { + return 1; } else { - DISPLAY("No solution found\n"); - ZSTD_compressionParameters ret = { 0, 0, 0, 0, 0, 0, 0 }; - return ret; + return 0; } } +/* 1 - better than prev best + 0 - uncertain + -1 - worse + assume prev_best status is run fully? + but then we'd have to rerun any winners anyway */ +//presumably memory has already been compared, mostly worried about mem, cspeed, dspeed +//uncertainty only applies to speed. +//if using objective fn, this could be much easier since we could just scale that. +//difficult to make judgements about later parameters in prioritization type when there's +//uncertainty on the first. +static int uncertainComparison(double const uncertaintyConstantC, double const uncertaintyConstantD, BMK_result_t* candidate, BMK_result_t* prevBest) { + (void)uncertaintyConstantD; //unused for now + if(candidate->cSpeed > prevBest->cSpeed * uncertaintyConstantC) { + return 1; + } else if (candidate->cSpeed * uncertaintyConstantC < prevBest->cSpeed) { + return -1; + } else { + return 0; + } +} + +/* speed in b, srcSize in b/s loopDuration in ns */ +//TODO: simplify code in feasibleBench with this instead of writing it all out. +//only applicable for single loop +static double calcUncertainty(double speed, size_t srcSize) { + U64 loopDuration; + if(eqZero(speed)) { return 2; } + loopDuration = ((srcSize * TIMELOOP_NANOSEC) / speed); + return MIN((loopDuration + (double)2 * g_clockGranularity) / loopDuration, 2); +} + +//benchmarks and tests feasibility together +//1 = true = better +//0 = false = not better +//if true then resultPtr will give results. +//2+ on error? +//alt: error = 0 / infeasible as well; +//maybe use compress_only mode for ratio-finding benchmark? +//prioritize ratio > cSpeed > dSpeed > cMem +//Misnamed - should be worse, better, error +//alternative (to make work for feasible-pt searching as well) - only compare to winner, not to target +//but then we need to judge what better means in this context, which shouldn't be the same (strict ratio improvement) +#define INFEASIBLE_RESULT 0 +#define FEASIBLE_RESULT 1 +#define ERROR_RESULT 2 +static int feasibleBench(BMK_result_t* resultPtr, + const void* srcBuffer, size_t srcSize, + void* dstBuffer, size_t dstSize, + ZSTD_CCtx* ctx, ZSTD_DCtx* dctx, + const ZSTD_compressionParameters cParams, + const constraint_t target, + BMK_result_t* winnerResult) { + BMK_advancedParams_t adv = BMK_initAdvancedParams(); + BMK_return_t benchres; + U64 loopDurationC = 0, loopDurationD = 0; + double uncertaintyConstantC, uncertaintyConstantD; + adv.loopMode = BMK_iterMode; + adv.nbSeconds = 1; //get ratio and 2x approx speed? + + //alternative - test 1 iter for ratio, (possibility of error 3 which is fine), + //maybe iter this until 2x measurable for better guarantee? + DISPLAY("Feas:\n"); + benchres = BMK_benchMemAdvanced(srcBuffer,srcSize, dstBuffer, dstSize, &srcSize, 1, 0, &cParams, NULL, 0, ctx, dctx, 0, "File", &adv); + if(benchres.error) { + DISPLAY("ERROR %d !!\n", benchres.error); + } + BMK_printWinner(stdout, CUSTOM_LEVEL, benchres.result, cParams, srcSize); + + if(!benchres.error) { + *resultPtr = benchres.result; + /* if speed is 0 (only happens when time = 0) */ + if(eqZero(benchres.result.cSpeed)) { + loopDurationC = 0; + uncertaintyConstantC = 2; + } else { + loopDurationC = ((srcSize * TIMELOOP_NANOSEC) / benchres.result.cSpeed); + //problem - tested in fullbench, saw speed vary 3x between iters, maybe raise uncertaintyConstraint up? + //possibly has to do with initCCtx? or system stuff? + //asymmetric +/- constant needed? + uncertaintyConstantC = MIN((loopDurationC + (double)(2 * g_clockGranularity)/loopDurationC), 2); //.02 seconds + } + if(eqZero(benchres.result.dSpeed)) { + loopDurationD = 0; + uncertaintyConstantD = 2; + } else { + loopDurationD = ((srcSize * TIMELOOP_NANOSEC) / benchres.result.dSpeed); + //problem - tested in fullbench, saw speed vary 3x between iters, maybe raise uncertaintyConstraint up? + //possibly has to do with initCCtx? or system stuff? + //asymmetric +/- constant needed? + uncertaintyConstantD = MIN((loopDurationD + (double)(2 * g_clockGranularity)/loopDurationD), 2); //.02 seconds + } + + + if(benchres.result.cSize < winnerResult->cSize) { //better compression ratio, just needs to be feasible + //optimistic assume speed + //incoporate some sort of tradeoff comparison with the winner's results? + int feas = uncertainFeasibility(uncertaintyConstantC, uncertaintyConstantD, target, &(benchres.result)); + if(feas == 0) { // uncertain feasibility + adv.loopMode = BMK_timeMode; + if(loopDurationC < TIMELOOP_NANOSEC) { + BMK_return_t benchres2; + adv.mode = BMK_compressOnly; + benchres2 = BMK_benchMemAdvanced(srcBuffer,srcSize, dstBuffer, dstSize, &srcSize, 1, 0, &cParams, NULL, 0, ctx, dctx, 0, "File", &adv); + if(benchres2.error) { + return ERROR_RESULT; + } else { + benchres.result.cSpeed = benchres2.result.cSpeed; + } + } + if(loopDurationD < TIMELOOP_NANOSEC) { + BMK_return_t benchres2; + adv.mode = BMK_decodeOnly; + benchres2 = BMK_benchMemAdvanced(dstBuffer,dstSize, NULL, 0, &srcSize, 1, 0, &cParams, NULL, 0, ctx, dctx, 0, "File", &adv); + if(benchres2.error) { + return ERROR_RESULT; + } else { + benchres.result.dSpeed = benchres2.result.dSpeed; + } + } + *resultPtr = benchres.result; + return feasible(benchres.result, target); + } else { //feas = 1 or -1 map to 1, 0 respectively + return (feas + 1) >> 1; //relies on INFEASIBLE_RESULT == 0, FEASIBLE_RESULT == 1 + } + } else if (benchres.result.cSize == winnerResult->cSize) { //equal ratio, needs to be better than winner in cSpeed/ dSpeed / cMem + int feas = uncertainFeasibility(uncertaintyConstantC, uncertaintyConstantD, target, &(benchres.result)); + if(feas == 0) { // uncertain feasibility + adv.loopMode = BMK_timeMode; + if(loopDurationC < TIMELOOP_NANOSEC) { + BMK_return_t benchres2; + adv.mode = BMK_compressOnly; + benchres2 = BMK_benchMemAdvanced(srcBuffer,srcSize, dstBuffer, dstSize, &srcSize, 1, 0, &cParams, NULL, 0, ctx, dctx, 0, "File", &adv); + if(benchres2.error) { + return ERROR_RESULT; + } else { + benchres.result.cSpeed = benchres2.result.cSpeed; + } + } + if(loopDurationD < TIMELOOP_NANOSEC) { + BMK_return_t benchres2; + adv.mode = BMK_decodeOnly; + benchres2 = BMK_benchMemAdvanced(dstBuffer,dstSize, NULL, 0, &srcSize, 1, 0, &cParams, NULL, 0, ctx, dctx, 0, "File", &adv); + if(benchres2.error) { + return ERROR_RESULT; + } else { + benchres.result.dSpeed = benchres2.result.dSpeed; + } + } + + *resultPtr = benchres.result; + return feasible(benchres.result, target) && objective_lt(*winnerResult, benchres.result); + } else if (feas == 1) { //no need to check feasibility compares (maybe only it is chosen as a winner) + int btw = uncertainComparison(uncertaintyConstantC, uncertaintyConstantD, &(benchres.result), winnerResult); + if(btw == -1) { + return INFEASIBLE_RESULT; + } else { //possibly better, benchmark and find out + adv.loopMode = BMK_timeMode; + benchres = BMK_benchMemAdvanced(srcBuffer, srcSize, dstBuffer, dstSize, &srcSize, 1, 0, &cParams, NULL, 0, ctx, dctx, 0, "File", &adv); + *resultPtr = benchres.result; + return objective_lt(*winnerResult, benchres.result); + } + } else { //feas == -1 + return INFEASIBLE_RESULT; //infeasible + } + } else { + return INFEASIBLE_RESULT; //infeasible + } + } else { + return ERROR_RESULT; //BMK error + } + +} +//sameas before, but +/-? +//alternative, just return comparison result, leave caller to worry about feasibility. +//have version of benchMemAdvanced which takes in dstBuffer/cap as well? +//(motivation: repeat tests (maybe just on decompress) don't need further compress runs) +static int infeasibleBench(BMK_result_t* resultPtr, + const void* srcBuffer, size_t srcSize, + void* dstBuffer, size_t dstSize, + ZSTD_CCtx* ctx, ZSTD_DCtx* dctx, + const ZSTD_compressionParameters cParams, + const constraint_t target, + BMK_result_t* winnerResult) { + BMK_advancedParams_t adv = BMK_initAdvancedParams(); + BMK_return_t benchres; + BMK_result_t resultMin, resultMax; + UTIL_time_t startTime; + U64 loopDurationC = 0, loopDurationD = 0; + double uncertaintyConstantC, uncertaintyConstantD; + double winnerRS = resultScore(*winnerResult, srcSize, target); + adv.loopMode = BMK_iterMode; //can only use this for ratio measurement then, super inaccurate timing + adv.nbSeconds = 1; //get ratio and 2x approx speed? //maybe run until twice MIN(minloopinterval * clockDuration) + + (void)startTime; //TODO: actually use this to adjust timing + DISPLAY("WinnerScore: %f\n ", winnerRS); + /* + adv.loopMode = BMK_timeMode; + adv.nbSeconds = 1; */ + benchres = BMK_benchMemAdvanced(srcBuffer,srcSize, dstBuffer, dstSize, &srcSize, 1, 0, &cParams, NULL, 0, ctx, dctx, 0, "File", &adv); + BMK_printWinner(stdout, CUSTOM_LEVEL, benchres.result, cParams, srcSize); + + if(!benchres.error) { + *resultPtr = benchres.result; + if(eqZero(benchres.result.cSpeed)) { + loopDurationC = 0; + uncertaintyConstantC = 2; + } else { + loopDurationC = ((srcSize * TIMELOOP_NANOSEC) / benchres.result.cSpeed); + //problem - tested in fullbench, saw speed vary 3x between iters, maybe raise uncertaintyConstraint up? + //possibly has to do with initCCtx? or system stuff? + uncertaintyConstantC = MIN((loopDurationC + (double)(2 * g_clockGranularity)/loopDurationC), 2); //.02 seconds + } + + if(eqZero(benchres.result.dSpeed)) { + loopDurationD = 0; + uncertaintyConstantD = 2; + } else { + loopDurationD = ((srcSize * TIMELOOP_NANOSEC) / benchres.result.dSpeed); + //problem - tested in fullbench, saw speed vary 3x between iters, maybe raise uncertaintyConstraint up? + //possibly has to do with initCCtx? or system stuff? + uncertaintyConstantD = MIN((loopDurationD + (double)(2 * g_clockGranularity)/loopDurationD), 2); //.02 seconds + } + + /* benchres's certainty range. */ + resultMax = benchres.result; + resultMin = benchres.result; + resultMax.cSpeed *= uncertaintyConstantC; + resultMax.dSpeed *= uncertaintyConstantD; + resultMin.cSpeed /= uncertaintyConstantC; + resultMin.dSpeed /= uncertaintyConstantD; + (void)resultMin; + //TODO: consider if resultMin is actually needed. + if (winnerRS > resultScore(resultMax, srcSize, target)) { + return INFEASIBLE_RESULT; + } else { + //do this w/o copying / stuff + adv.loopMode = BMK_timeMode; + if(loopDurationC < TIMELOOP_NANOSEC) { + BMK_return_t benchres2; + adv.mode = BMK_compressOnly; + benchres2 = BMK_benchMemAdvanced(srcBuffer,srcSize, dstBuffer, dstSize, &srcSize, 1, 0, &cParams, NULL, 0, ctx, dctx, 0, "File", &adv); + if(benchres2.error) { + return ERROR_RESULT; + } else { + benchres.result.cSpeed = benchres2.result.cSpeed; + } + } + if(loopDurationD < TIMELOOP_NANOSEC) { + BMK_return_t benchres2; + adv.mode = BMK_decodeOnly; + //TODO: dstBuffer corrupted sometime between top and now + //probably occuring in feasible bench too. + benchres2 = BMK_benchMemAdvanced(dstBuffer, dstSize, NULL, 0, &benchres.result.cSize, 1, 0, &cParams, NULL, 0, ctx, dctx, 0, "File", &adv); + if(benchres2.error) { + return ERROR_RESULT; + } else { + benchres.result.dSpeed = benchres2.result.dSpeed; + } + } + *resultPtr = benchres.result; + return (resultScore(benchres.result, srcSize, target) > winnerRS); + } + + *resultPtr = benchres.result; + } else { + return ERROR_RESULT; //BMK error + } + +} + +/* wrap feasibleBench w/ memotable */ +//TODO: void sanitized and unsanitized ver's so input doesn't double-choose +#define INFEASIBLE_THRESHOLD 200 +static int feasibleBenchMemo(BMK_result_t* resultPtr, + const void* srcBuffer, size_t srcSize, + void* dstBuffer, size_t dstSize, + ZSTD_CCtx* ctx, ZSTD_DCtx* dctx, + const ZSTD_compressionParameters cParams, + const constraint_t target, + BMK_result_t* winnerResult, U8* memoTable, + U32* varyParams, const int varyLen) { + + size_t memind = memoTableInd(&cParams, varyParams, varyLen); + //BMK_translateAdvancedParams(cParams); + if(memoTable[memind] >= INFEASIBLE_THRESHOLD) { + return INFEASIBLE_RESULT; //probably pick a different code for already tested? + //maybe remove this if we incorporate nonrandom location picking? + //what is the intended behavior in this case? + //ignore? stop iterating completely? other? + } else { + int res = feasibleBench(resultPtr, srcBuffer, srcSize, dstBuffer, dstSize, ctx, dctx, + cParams, target, winnerResult); + memoTable[memind] = 255; //tested are all infeasible (other possible values for opti) + return res; + } +} + +//should infeasible stage searching also be memo-marked in the same way? +//don't actually memoize unless result is feasible/error? +static int infeasibleBenchMemo(BMK_result_t* resultPtr, + const void* srcBuffer, size_t srcSize, + void* dstBuffer, size_t dstSize, + ZSTD_CCtx* ctx, ZSTD_DCtx* dctx, + const ZSTD_compressionParameters cParams, + const constraint_t target, + BMK_result_t* winnerResult, U8* memoTable, + U32* varyParams, const int varyLen) { + size_t memind = memoTableInd(&cParams, varyParams, varyLen); + //BMK_translateAdvancedParams(cParams); + if(memoTable[memind] >= INFEASIBLE_THRESHOLD) { + return INFEASIBLE_RESULT; //see feasibleBenchMemo for concerns + } else { + int res = infeasibleBench(resultPtr, srcBuffer, srcSize, dstBuffer, dstSize, ctx, dctx, + cParams, target, winnerResult); + if(res == FEASIBLE_RESULT) { + memoTable[memind] = 255; //infeasible resultscores could still be normal feasible. + } + return res; + } +} + +/* specifically feasibleBenchMemo and infeasibleBenchMemo */ +//maybe not necessary +typedef int (*BMK_benchMemo_t)(BMK_result_t*, const void*, size_t, void*, size_t, ZSTD_CCtx*, ZSTD_DCtx*, + const ZSTD_compressionParameters, const constraint_t, BMK_result_t*, U8*, U32*, const int); + +//varArray should be sanitized when this is called. +//TODO: transition to simpler greedy method if evaluation time is too long? +//would it be better to start at best feasible via feasible or infeasible metric? both? +//possibility climb is infeasible, responsibility of caller to check that. but if something feasible is evaluated, it will be returned +// *actually if it performs too +//sanitize all params here. +//all generation after random should be sanitized. (maybe sanitize random) +//TODO: paramTarget uneeded at this point w/ varArray and init; +static winnerInfo_t climbOnce(constraint_t target, U32* varArray, const int varLen, U8* memoTable, + const void* srcBuffer, size_t srcSize, void* dstBuffer, size_t dstSize, ZSTD_CCtx* ctx, ZSTD_DCtx* dctx, ZSTD_compressionParameters init) { + //pick later initializations non-randomly? high dist from explored nodes. + //how to do this efficiently? (might not be too much of a problem, happens rarely, running time probably dominated by benchmarking) + //distance maximizing selection? + //cparam - currently considered center + //candidate - params to benchmark/results + //winner - best option found so far. + ZSTD_compressionParameters cparam = init; + winnerInfo_t candidateInfo, winnerInfo; + int better = 1; + + winnerInfo.params = init; + winnerInfo.result.cSpeed = 0; + winnerInfo.result.dSpeed = 0; + winnerInfo.result.cMem = (size_t)-1; + winnerInfo.result.cSize = (size_t)-1; + + /* ineasible -> (hopefully) feasible */ + /* when nothing is found, this garbages part 2. */ + { + //TODO: initialize these values! + winnerInfo_t bestFeasible1; /* uses feasibleBench Metric */ + winnerInfo_t bestFeasible2; /* uses resultScore Metric */ + + //init these params + bestFeasible1.params = cparam; + bestFeasible2.params = cparam; + bestFeasible1.result.cSpeed = 0; + bestFeasible1.result.dSpeed = 0; + bestFeasible1.result.cMem = (size_t)-1; + bestFeasible1.result.cSize = (size_t)-1; + bestFeasible2.result.cSpeed = 0; + bestFeasible2.result.dSpeed = 0; + bestFeasible2.result.cMem = (size_t)-1; + bestFeasible2.result.cSize = (size_t)-1; + DISPLAY("Climb Part 1\n"); + while(better) { + + //UTIL_time_t timestart = UTIL_getTime(); TODO: adjust sampling based on time + int i, d; + better = 0; + DISPLAY("Start\n"); + cparam = winnerInfo.params; + BMK_printWinner(stdout, CUSTOM_LEVEL, winnerInfo.result, winnerInfo.params, srcSize); + candidateInfo.params = cparam; + //all dist-1 targets + for(i = 0; i < varLen; i++) { + paramVaryOnce(varArray[i], 1, &candidateInfo.params); /* +1 */ + candidateInfo.params = sanitizeParams(candidateInfo.params); + //evaluate + //if(!ZSTD_isError(ZSTD_checkCParams(candidateInfo.params))) { + if(cParamValid(candidateInfo.params)) { + int res = infeasibleBenchMemo(&candidateInfo.result, + srcBuffer, srcSize, + dstBuffer, dstSize, + ctx, dctx, + candidateInfo.params, target, &winnerInfo.result, memoTable, + varArray, varLen); + if(res == FEASIBLE_RESULT) { /* synonymous with better when called w/ infeasibleBM */ + winnerInfo = candidateInfo; + //BMK_printWinner(stdout, CUSTOM_LEVEL, winnerInfo.result, winnerInfo.params, srcSize); + better = 1; + if(feasible(candidateInfo.result, target)) { + bestFeasible2 = winnerInfo; + if(objective_lt(bestFeasible1.result, bestFeasible2.result)) { + bestFeasible1 = bestFeasible2; /* using feasibleBench metric */ + } + } + } + } + candidateInfo.params = cparam; + paramVaryOnce(varArray[i], -1, &candidateInfo.params); /* -1 */ + candidateInfo.params = sanitizeParams(candidateInfo.params); + //evaluate + //if(!ZSTD_isError(ZSTD_checkCParams(candidateInfo.params))) { + if(cParamValid(candidateInfo.params)) { + int res = infeasibleBenchMemo(&candidateInfo.result, + srcBuffer, srcSize, + dstBuffer, dstSize, + ctx, dctx, + candidateInfo.params, target, &winnerInfo.result, memoTable, + varArray, varLen); + if(res == FEASIBLE_RESULT) { + winnerInfo = candidateInfo; + //BMK_printWinner(stdout, CUSTOM_LEVEL, winnerInfo.result, winnerInfo.params, srcSize); + better = 1; + if(feasible(candidateInfo.result, target)) { + bestFeasible2 = winnerInfo; + if(objective_lt(bestFeasible1.result, bestFeasible2.result)) { + bestFeasible1 = bestFeasible2; + } + } + } + } + } + + if(better) { + continue; + } + //if 'better' enough, skip further parameter search, center there? + //possible improvement - guide direction here w/ knowledge rather than completely random variation. + for(d = 2; d < varLen + 2; d++) { /* varLen is # dimensions */ + for(i = 0; i < 5; i++) { //make ? relative to # of free dimensions. + int res; + candidateInfo.params = cparam; + /* param error checking already done here */ + paramVariation(&candidateInfo.params, varArray, varLen, d); + res = infeasibleBenchMemo(&candidateInfo.result, + srcBuffer, srcSize, + dstBuffer, dstSize, + ctx, dctx, + candidateInfo.params, target, &winnerInfo.result, memoTable, + varArray, varLen); + if(res == FEASIBLE_RESULT) { /* synonymous with better in this case*/ + winnerInfo = candidateInfo; + //BMK_printWinner(stdout, CUSTOM_LEVEL, winnerInfo.result, winnerInfo.params, srcSize); + better = 1; + if(feasible(candidateInfo.result, target)) { + bestFeasible2 = winnerInfo; + if(objective_lt(bestFeasible1.result, bestFeasible2.result)) { + bestFeasible1 = bestFeasible2; + } + } + } + + } + if(better) { + continue; + } + } + //bias to test previous delta? + //change cparam -> candidate before restart + } + //TODO:Consider if this is best config. idea: explore from obj best keep rbest + cparam = bestFeasible2.params; + candidateInfo = bestFeasible2; + winnerInfo = bestFeasible1; + } + + //is it better to break here instead of bumbling about? + if(winnerInfo.result.cMem == (U32)-1) { + DISPLAY("No Feasible Found\n"); + return winnerInfo; + } + DISPLAY("Climb Part 2\n"); + + better = 1; + /* feasible -> best feasible (hopefully) */ + { + while(better) { + + //UTIL_time_t timestart = UTIL_getTime(); //TODO: if benchmarking is taking too long, be more greedy. + int i, d; + better = 0; + BMK_printWinner(stdout, CUSTOM_LEVEL, winnerInfo.result, winnerInfo.params, srcSize); + //all dist-1 targets + cparam = winnerInfo.params; //TODO: this messes the taking bestFeasible1, bestFeasible2 + candidateInfo.params = cparam; + for(i = 0; i < varLen; i++) { + paramVaryOnce(varArray[i], 1, &candidateInfo.params); + candidateInfo.params = sanitizeParams(candidateInfo.params); + + //evaluate + //if(!ZSTD_isError(ZSTD_checkCParams(candidateInfo.params))) { + if(cParamValid(candidateInfo.params)) { + int res = feasibleBenchMemo(&candidateInfo.result, + srcBuffer, srcSize, + dstBuffer, dstSize, + ctx, dctx, + candidateInfo.params, target, &winnerInfo.result, memoTable, + varArray, varLen); + if(res == FEASIBLE_RESULT) { + winnerInfo = candidateInfo; + //BMK_printWinner(stdout, CUSTOM_LEVEL, winnerInfo.result, winnerInfo.params, srcSize); + better = 1; + } + } + candidateInfo.params = cparam; + paramVaryOnce(varArray[i], -1, &candidateInfo.params); + candidateInfo.params = sanitizeParams(candidateInfo.params); + //evaluate + //if(!ZSTD_isError(ZSTD_checkCParams(candidateInfo.params))) { + if(cParamValid(candidateInfo.params)) { + int res = feasibleBenchMemo(&candidateInfo.result, + srcBuffer, srcSize, + dstBuffer, dstSize, + ctx, dctx, + candidateInfo.params, target, &winnerInfo.result, memoTable, + varArray, varLen); + if(res == FEASIBLE_RESULT) { + winnerInfo = candidateInfo; + //BMK_printWinner(stdout, CUSTOM_LEVEL, winnerInfo.result, winnerInfo.params, srcSize); + better = 1; + } + } + } + //if 'better' enough, skip further parameter search, center there? + //possible improvement - guide direction here w/ knowledge rather than completely random variation. + for(d = 2; d < varLen + 2; d++) { /* varLen is # dimensions */ + for(i = 0; i < 5; i++) { //TODO: make ? relative to # of free dimensions. + int res; + candidateInfo.params = cparam; + /* param error checking already done here */ + paramVariation(&candidateInfo.params, varArray, varLen, d); //info candidateInfo.params is garbage, this is too. + res = feasibleBenchMemo(&candidateInfo.result, + srcBuffer, srcSize, + dstBuffer, dstSize, + ctx, dctx, + candidateInfo.params, target, &winnerInfo.result, memoTable, + varArray, varLen); + if(res == FEASIBLE_RESULT) { + winnerInfo = candidateInfo; + //BMK_printWinner(stdout, CUSTOM_LEVEL, winnerInfo.result, winnerInfo.params, srcSize); + better = 1; + } + } + if(better) { + continue; + } + } + //bias to test previous delta? + //change cparam -> candidate before restart + } + } + return winnerInfo; +} + +//optimizeForSize but with fixed strategy +//place to configure/filter out strategy specific parameters. +//need args for all buffers and parameter stuff +//sanitization here. + +//flexible parameters: iterations of (failed?) climbing (or if we do non-random, maybe this is when everything is close to visitied) +//weight more on visit for bad results, less on good results/more on later results / ones with more failures. +//allocate memoTable here. +//only real use for paramTarget is to get the fixed values, right? +static winnerInfo_t optimizeFixedStrategy( + const void* srcBuffer, const size_t srcSize, + void* dstBuffer, size_t dstSize, + constraint_t target, ZSTD_compressionParameters paramTarget, + ZSTD_strategy strat, U32* varArray, int varLen) { + int i = 0; //TODO: Temp fix 10 iters, check effects of changing this? + U32* varNew = malloc(sizeof(U32) * varLen); + int varLenNew = sanitizeVarArray(varLen, varArray, varNew, strat); + size_t memoLen = memoTableLen(varNew, varLenNew); + U8* memoTable = malloc(sizeof(U8) * memoLen); + ZSTD_compressionParameters init; + ZSTD_CCtx* ctx = ZSTD_createCCtx(); + ZSTD_DCtx* dctx = ZSTD_createDCtx(); + winnerInfo_t winnerInfo, candidateInfo; + winnerInfo.result.cSpeed = 0; + winnerInfo.result.dSpeed = 0; + winnerInfo.result.cMem = (size_t)(-1); + winnerInfo.result.cSize = (size_t)(-1); + /* so climb is given the right fixed strategy */ + paramTarget.strategy = strat; + /* to pass ZSTD_checkCParams */ + cParamZeroMin(¶mTarget); + memoTableInit(memoTable, paramTarget, target, varNew, varLenNew); + + + init = paramTarget; + + + if(!ctx || !dctx || !memoTable || !varNew) { + DISPLAY("NOT ENOUGH MEMORY ! ! ! \n"); + goto _cleanUp; + } + + while(i < 10) { + DISPLAY("Restart\n"); + randomConstrainedParams(&init, varNew, varLenNew, memoTable); + candidateInfo = climbOnce(target, varNew, varLenNew, memoTable, srcBuffer, srcSize, dstBuffer, dstSize, ctx, dctx, init); + if(objective_lt(winnerInfo.result, candidateInfo.result)) { + winnerInfo = candidateInfo; + DISPLAY("Climb Winner: "); + BMK_printWinner(stdout, CUSTOM_LEVEL, winnerInfo.result, winnerInfo.params, srcSize); + } + i++; + } + +_cleanUp: + ZSTD_freeCCtx(ctx); + ZSTD_freeDCtx(dctx); + free(memoTable); + free(varNew); + return winnerInfo; +} + +// bigger and (hopefully) better* than optimizeForSize +// TODO: Change level bm'ing to respect constraints. +static int optimizeForSize2(const char* inFileName, constraint_t target, ZSTD_compressionParameters paramTarget) +{ + FILE* const inFile = fopen( inFileName, "rb" ); + U64 const inFileSize = UTIL_getFileSize(inFileName); + size_t benchedSize = BMK_findMaxMem(inFileSize*3) / 3; + void* origBuff; + U32 varArray [NUM_PARAMS]; + int varLen = variableParams(paramTarget, varArray); + /* Init */ + + + if(!cParamValid(paramTarget)) { + return 10; + } + + if (inFile==NULL) { DISPLAY( "Pb opening %s\n", inFileName); return 11; } + if (inFileSize == UTIL_FILESIZE_UNKNOWN) { + DISPLAY("Pb evaluatin size of %s \n", inFileName); + fclose(inFile); + return 11; + } + + /* Memory allocation & restrictions */ + if ((U64)benchedSize > inFileSize) benchedSize = (size_t)inFileSize; + if (benchedSize < inFileSize) { + DISPLAY("Not enough memory for '%s' \n", inFileName); + fclose(inFile); + return 11; + } + + /* Alloc */ + origBuff = malloc(benchedSize); + if(!origBuff) { + DISPLAY("\nError: not enough memory!\n"); + fclose(inFile); + return 12; + } + + /* Fill input buffer */ + DISPLAY("Loading %s... \r", inFileName); + { size_t const readSize = fread(origBuff, 1, benchedSize, inFile); + fclose(inFile); + if(readSize != benchedSize) { + DISPLAY("\nError: problem reading file '%s' !! \n", inFileName); + free(origBuff); + return 13; + } } + + /* bench */ + DISPLAY("\r%79s\r", ""); + DISPLAY("optimizing for %s", inFileName); + if(target.cSpeed != 0) { DISPLAY(" - limit compression speed %u MB/s", target.cSpeed / 1000000); } + if(target.dSpeed != 0) { DISPLAY(" - limit decompression speed %u MB/s", target.dSpeed / 1000000); } + if(target.cMem != (U32)-1) { DISPLAY(" - limit memory %u MB", target.cMem / 1000000); } + DISPLAY("\n"); + findClockGranularity(); + + { ZSTD_CCtx* const ctx = ZSTD_createCCtx(); + ZSTD_DCtx* const dctx = ZSTD_createDCtx(); + winnerInfo_t winner; + //BMK_result_t candidate; + const size_t blockSize = g_blockSize ? g_blockSize : benchedSize; + U32 const maxNbBlocks = (U32) ((benchedSize + (blockSize-1)) / blockSize) + 1; + const size_t maxCompressedSize = ZSTD_compressBound(benchedSize) + (maxNbBlocks * 1024); + void* compressedBuffer = malloc(maxCompressedSize); + + /* init */ + if (ctx==NULL) { DISPLAY("\n ZSTD_createCCtx error \n"); free(origBuff); return 14;} + if(compressedBuffer==NULL) { DISPLAY("\n Allocation Error \n"); free(origBuff); free(ctx); return 15; } + + memset(&winner, 0, sizeof(winner)); + winner.result.cSize = (size_t)(-1); + + + /* find best solution from default params */ + //Can't do this w/ cparameter constraints + //still useful though? + /* + { const int maxSeeds = g_noSeed ? 1 : ZSTD_maxCLevel(); + int i; + for (i=1; i<=maxSeeds; i++) { + ZSTD_compressionParameters const CParams = ZSTD_getCParams(i, blockSize, 0); + BMK_benchParam(&candidate, origBuff, benchedSize, ctx, dctx, CParams); + if (!feasible(candidate, target) ) { + break; + } + if (feasible(candidate,target) && objective_lt(winner.result, candidate)) + { + winner.params = CParams; + winner.result = candidate; + BMK_printWinner(stdout, i, winner.result, winner.params, benchedSize); + } } + }*/ + BMK_printWinner(stdout, CUSTOM_LEVEL, winner.result, winner.params, benchedSize); + + BMK_translateAdvancedParams(winner.params); + + /* start real tests */ + { + if(paramTarget.strategy == 0) { + int st; + for(st = 1; st <= 8; st++) { + winnerInfo_t wc = optimizeFixedStrategy(origBuff, benchedSize, compressedBuffer, maxCompressedSize, + target, paramTarget, st, varArray, varLen); + DISPLAY("StratNum %d\n", st); + if(objective_lt(winner.result, wc.result)) { + winner = wc; + } + } + } else { + winner = optimizeFixedStrategy(origBuff, benchedSize, compressedBuffer, maxCompressedSize, + target, paramTarget, paramTarget.strategy, varArray, varLen); + } + + } + + /* no solution found */ + if(winner.result.cSize == (size_t)-1) { + DISPLAY("No feasible solution found\n"); + return 1; + } + /* end summary */ + BMK_printWinner(stdout, CUSTOM_LEVEL, winner.result, winner.params, benchedSize); + BMK_translateAdvancedParams(winner.params); + DISPLAY("grillParams size - optimizer completed \n"); + + /* clean up*/ + ZSTD_freeCCtx(ctx); + ZSTD_freeDCtx(dctx); + } + + free(origBuff); + return 0; +} + + /* optimizeForSize(): * targetSpeed : expressed in B/s */ -/* if state space is small (from paramTarget) */ +/* expresses targeted compression, decompression speeds and memory requirements */ +/* if state space is small (from paramTarget), exhaustive search? */ +//things to consider : if doing strategy-separate approach, what cutoffs to evaluate each strategy +//or do all? can't be absolute, should be relative after some sort of calibration +//(synthetic? test levels (we don't care about data specifics rn, scale?) ? int optimizeForSize(const char* inFileName, constraint_t target, ZSTD_compressionParameters paramTarget) { FILE* const inFile = fopen( inFileName, "rb" ); @@ -872,7 +1905,7 @@ int optimizeForSize(const char* inFileName, constraint_t target, ZSTD_compressio DISPLAY("optimizing for %s", inFileName); if(target.cSpeed != 0) { DISPLAY(" - limit compression speed %u MB/s", target.cSpeed / 1000000); } if(target.dSpeed != 0) { DISPLAY(" - limit decompression speed %u MB/s", target.dSpeed / 1000000); } - if(target.Mem != 0) { DISPLAY(" - limit memory %u MB", target.Mem / 1000000); } + if(target.cMem != 0) { DISPLAY(" - limit memory %u MB", target.cMem / 1000000); } DISPLAY("\n"); { ZSTD_CCtx* const ctx = ZSTD_createCCtx(); ZSTD_DCtx* const dctx = ZSTD_createDCtx(); @@ -881,12 +1914,13 @@ int optimizeForSize(const char* inFileName, constraint_t target, ZSTD_compressio const size_t blockSize = g_blockSize ? g_blockSize : benchedSize; /* init */ - if (ctx==NULL) { DISPLAY("\n ZSTD_createCCtx error \n"); free(origBuff); return 14;} + if (ctx==NULL) { DISPLAY("\n ZSTD_createCCtx error \n"); free(origBuff); return 14; } + memset(&winner, 0, sizeof(winner)); winner.result.cSize = (size_t)(-1); /* find best solution from default params */ - //Can't do this iteration normally w/ cparameter constraints + //Can't do this w/ cparameter constraints { const int maxSeeds = g_noSeed ? 1 : ZSTD_maxCLevel(); int i; for (i=1; i<=maxSeeds; i++) { @@ -910,15 +1944,17 @@ int optimizeForSize(const char* inFileName, constraint_t target, ZSTD_compressio { time_t const grillStart = time(NULL); do { ZSTD_compressionParameters params = winner.params; - paramVariation(¶ms, paramVarArray, paramCount); + BYTE* b; + paramVariation(¶ms, paramVarArray, paramCount, 4); if ((FUZ_rand(&g_rand) & 31) == 3) params = randomParams(); /* totally random config to improve search space */ params = ZSTD_adjustCParams(params, blockSize, 0); /* exclude faster if already played set of params */ - if (FUZ_rand(&g_rand) & ((1 << NB_TESTS_PLAYED(params))-1)) continue; + if (FUZ_rand(&g_rand) & ((1 << *NB_TESTS_PLAYED(params))-1)) continue; /* test */ - NB_TESTS_PLAYED(params)++; + b = NB_TESTS_PLAYED(params); + (*b)++; BMK_benchParam(&candidate, origBuff, benchedSize, ctx, dctx, params); /* improvement found => new winner */ @@ -1027,7 +2063,8 @@ int main(int argc, const char** argv) U32 optimizer = 0; U32 main_pause = 0; - constraint_t target = { 0 , 0, 0 }; //0 for anything unset + + constraint_t target = { 0, 0, (U32)-1 }; //0 for anything unset ZSTD_compressionParameters paramTarget = { 0, 0, 0, 0, 0, 0, 0 }; assert(argc>=1); /* for exename */ @@ -1053,7 +2090,7 @@ int main(int argc, const char** argv) if (longCommandWArg(&argument, "strategy=") || longCommandWArg(&argument, "strat=")) { paramTarget.strategy = (ZSTD_strategy)(readU32FromChar(&argument)); if (argument[0]==',') { argument++; continue; } else break; } if (longCommandWArg(&argument, "compressionSpeed=") || longCommandWArg(&argument, "cSpeed=")) { target.cSpeed = readU32FromChar(&argument) * 1000000; if (argument[0]==',') { argument++; continue; } else break; } if (longCommandWArg(&argument, "decompressionSpeed=") || longCommandWArg(&argument, "dSpeed=")) { target.dSpeed = readU32FromChar(&argument) * 1000000; if (argument[0]==',') { argument++; continue; } else break; } - if (longCommandWArg(&argument, "compressionMemory=") || longCommandWArg(&argument, "cMem=")) { target.Mem = readU32FromChar(&argument) * 1000000; if (argument[0]==',') { argument++; continue; } else break; } + if (longCommandWArg(&argument, "compressionMemory=") || longCommandWArg(&argument, "cMem=")) { target.cMem = readU32FromChar(&argument) * 1000000; if (argument[0]==',') { argument++; continue; } else break; } /* in MB or MB/s */ DISPLAY("invalid optimization parameter \n"); return 1; @@ -1132,7 +2169,7 @@ int main(int argc, const char** argv) continue; case 'M': argument++; - target.Mem = readU32FromChar(&argument) * 1000000; + target.cMem = readU32FromChar(&argument) * 1000000; continue; case 'w': argument++; @@ -1255,7 +2292,8 @@ int main(int argc, const char** argv) } } else { if (optimizer) { - result = optimizeForSize(input_filename, target, paramTarget); + result = optimizeForSize2(input_filename, target, paramTarget); + //optimizeForSize(input_filename, target, paramTarget); } else { result = benchFiles(argv+filenamesStart, argc-filenamesStart); } }