From fb98fd0bd4c7f95e790958e936aacff85abf71ee Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Tue, 10 Feb 2015 18:15:20 +0100 Subject: [PATCH] datagen refactoring --- lib/zstd.c | 4 +- programs/datagen.c | 123 +++++++++++++++++++++--------------------- programs/datagen.h | 15 +++++- programs/datagencli.c | 5 +- programs/fullbench.c | 20 +++---- 5 files changed, 91 insertions(+), 76 deletions(-) diff --git a/lib/zstd.c b/lib/zstd.c index b85df00a..38e7634a 100644 --- a/lib/zstd.c +++ b/lib/zstd.c @@ -861,13 +861,13 @@ static size_t ZSTD_compressSequences(BYTE* dst, size_t maxDstSize, } -static void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* anchor, size_t offset, size_t matchLength) +static void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* literals, size_t offset, size_t matchLength) { BYTE* op_lit = seqStorePtr->lit; BYTE* const l_end = op_lit + litLength; /* copy Literals */ - while (op_litlit += litLength; /* literal Length */ diff --git a/programs/datagen.c b/programs/datagen.c index 1ac4313f..e5471727 100644 --- a/programs/datagen.c +++ b/programs/datagen.c @@ -122,84 +122,85 @@ static char RDG_genChar(U32* seed, const void* ltctx) return lt[id]; } +#define RDG_DICTSIZE (32 KB) #define RDG_RAND15BITS ((RDG_rand(seed) >> 3) & 32767) #define RDG_RANDLENGTH ( ((RDG_rand(seed) >> 7) & 7) ? (RDG_rand(seed) & 15) : (RDG_rand(seed) & 511) + 15) -#define RDG_DICTSIZE (32 KB) -void RDG_generate(U64 size, U32 seedInit, double matchProba, double litProba) +void RDG_genBlock(void* buffer, size_t buffSize, size_t prefixSize, double matchProba, void* litTable, unsigned* seedPtr) { - BYTE fullbuff[RDG_DICTSIZE + 128 KB + 1]; - BYTE* buff = fullbuff + RDG_DICTSIZE; - U64 total=0; - U32 P32 = (U32)(32768 * matchProba); - U32 pos=1; - U32 genBlockSize = 128 KB; - void* ldctx = RDG_createLiteralDistrib(litProba); - FILE* fout = stdout; - U32* seed = &seedInit;; + BYTE* buffPtr = ((BYTE*)buffer) - prefixSize; + const U32 matchProba32 = (U32)(32768 * matchProba); + size_t pos = prefixSize; + void* ldctx = litTable; + U32* seed = seedPtr; /* init */ - SET_BINARY_MODE(stdout); - fullbuff[0] = RDG_genChar(seed, ldctx); - while (pos<32 KB) + if (pos==0) buffPtr[0] = RDG_genChar(seed, ldctx), pos=1; + + /* Generate compressible data */ + while (pos < buffSize) { /* Select : Literal (char) or Match (within 32K) */ - if (RDG_RAND15BITS < P32) + if (RDG_RAND15BITS < matchProba32) { - /* Copy (within 64K) */ + /* Copy (within 32K) */ + int match; U32 d; - int ref; int length = RDG_RANDLENGTH + 4; U32 offset = RDG_RAND15BITS + 1; if (offset > pos) offset = pos; - ref = pos - offset; + if (pos + length > buffSize) length = buffSize - pos; + match = pos - offset; d = pos + length; - while (pos < d) fullbuff[pos++] = fullbuff[ref++]; + while (pos < d) buffPtr[pos++] = buffPtr[match++]; } else { /* Literal (noise) */ - U32 d = pos + RDG_RANDLENGTH; - while (pos < d) fullbuff[pos++] = RDG_genChar(seed, ldctx); + U32 d; + int length = RDG_RANDLENGTH; + if (pos + length > buffSize) length = buffSize - pos; + d = pos + length; + while (pos < d) buffPtr[pos++] = RDG_genChar(seed, ldctx); } } - - /* Generate compressible data */ - pos = 0; - while (total < size) - { - if (size-total < 128 KB) genBlockSize = (U32)(size-total); - total += genBlockSize; - buff[genBlockSize] = 0; - pos = 0; - while (pos genBlockSize ) length = genBlockSize - pos; - ref = pos - offset; - d = pos + length; - while (pos < d) buff[pos++] = buff[ref++]; - } - else - { - /* Literal (noise) */ - U32 d; - int length = RDG_RANDLENGTH; - if (pos + length > genBlockSize) length = genBlockSize - pos; - d = pos + length; - while (pos < d) buff[pos++] = RDG_genChar(seed, ldctx); - } - } - - /* output generated data */ - fwrite(buff, 1, genBlockSize, fout); - /* Regenerate prefix */ - memcpy(fullbuff, buff + 96 KB, 32 KB); - } +} + + +void RDG_genBuffer(void* buffer, size_t size, double matchProba, double litProba, unsigned seed) +{ + void* ldctx; + if (litProba==0.0) litProba = matchProba / 3.6; + ldctx = RDG_createLiteralDistrib(litProba); + RDG_genBlock(buffer, size, 0, matchProba, ldctx, &seed); +} + + +#define RDG_BLOCKSIZE (128 KB) +void RDG_genOut(unsigned long long size, double matchProba, double litProba, unsigned seed) +{ + BYTE fullbuff[RDG_DICTSIZE + RDG_BLOCKSIZE + 1]; + BYTE* buff = fullbuff + RDG_DICTSIZE; + U64 total = 0; + U32 genBlockSize = RDG_BLOCKSIZE; + void* ldctx; + + /* init */ + if (litProba==0.0) litProba = matchProba / 3.6; + ldctx = RDG_createLiteralDistrib(litProba); + SET_BINARY_MODE(stdout); + + /* Generate dict */ + RDG_genBlock(fullbuff, RDG_DICTSIZE, 0, matchProba, ldctx, &seed); + + /* Generate compressible data */ + while (total < size) + { + RDG_genBlock(buff, RDG_BLOCKSIZE, RDG_DICTSIZE, matchProba, ldctx, &seed); + if (size-total < RDG_BLOCKSIZE) genBlockSize = (U32)(size-total); + total += genBlockSize; + buff[genBlockSize] = 0; + fwrite(buff, 1, genBlockSize, stdout); + /* update dict */ + memcpy(fullbuff, buff + (RDG_BLOCKSIZE - RDG_DICTSIZE), RDG_DICTSIZE); + } } diff --git a/programs/datagen.h b/programs/datagen.h index 6d07f204..631d1463 100644 --- a/programs/datagen.h +++ b/programs/datagen.h @@ -24,4 +24,17 @@ */ -void RDG_generate(unsigned long long size, unsigned seed, double matchProba, double litProba); +#include /* size_t */ + +void RDG_genOut(unsigned long long size, double matchProba, double litProba, unsigned seed); +void RDG_genBuffer(void* buffer, size_t size, double matchProba, double litProba, unsigned seed); +/* RDG_genOut + Generate 'size' bytes of compressible data into stdout. + Compressibility can be controlled using 'matchProba'. + 'LitProba' is optional, and affect variability of bytes. If litProba==0.0, default value is used. + Generated data can be selected using 'seed'. + If (matchProba, litProba and seed) are equal, the function always generate the same content. + + RDG_genBuffer + Same as RDG_genOut, but generate data into provided buffer +*/ diff --git a/programs/datagencli.c b/programs/datagencli.c index b6ca508b..801e1980 100644 --- a/programs/datagencli.c +++ b/programs/datagencli.c @@ -96,7 +96,7 @@ int main(int argc, char** argv) { int argNb; double proba = (double)COMPRESSIBILITY_DEFAULT / 100; - double litProba = proba / 3.6; + double litProba = 0.0; U64 size = SIZE_DEFAULT; U32 seed = SEED_DEFAULT; char* programName; @@ -154,7 +154,6 @@ int main(int argc, char** argv) } if (proba>100.) proba=100.; proba /= 100.; - litProba = proba / 3.6; break; case 'L': /* hidden argument : Literal distribution probability */ argument++; @@ -184,7 +183,7 @@ int main(int argc, char** argv) DISPLAYLEVEL(3, "Seed = %u \n", seed); if (proba!=COMPRESSIBILITY_DEFAULT) DISPLAYLEVEL(3, "Compressibility : %i%%\n", (U32)(proba*100)); - RDG_generate(size, seed, proba, litProba); + RDG_genOut(size, proba, litProba, seed); DISPLAYLEVEL(1, "\n"); return 0; diff --git a/programs/fullbench.c b/programs/fullbench.c index 1992df6e..99ac0b9e 100644 --- a/programs/fullbench.c +++ b/programs/fullbench.c @@ -62,6 +62,7 @@ #include "zstd.h" #include "fse_static.h" +#include "datagen.h" /************************************** @@ -215,6 +216,7 @@ static U64 BMK_GetFileSize(char* infilename) } +#if 1 static U32 BMK_rotl32(unsigned val32, unsigned nbBits) { return((val32 << nbBits) | (val32 >> (32 - nbBits))); } static U32 BMK_rand(U32* src) @@ -265,7 +267,7 @@ static void BMK_datagen(void* buffer, size_t bufferSize, double proba, U32 seed) } } } - +#endif /********************************************************* * Benchmark wrappers @@ -329,8 +331,8 @@ size_t local_conditionalNull(void* dst, size_t dstSize, void* buff2, const void* if (b==0) total = 0; // 825 //if (!b) total = 0; // 825 //total = b ? total : 0; // 622 - //total *= !!b; // 465 //total &= -!b; // 622 + //total *= !!b; // 465 } return total; } @@ -357,7 +359,7 @@ size_t benchMem(void* src, size_t srcSize, U32 benchNb) double bestTime = 100000000.; size_t errorCode = 0; - // Declaration + /* Selection */ switch(benchNb) { case 1: @@ -399,7 +401,7 @@ size_t benchMem(void* src, size_t srcSize, U32 benchNb) case 11: g_cSize = ZSTD_compress(buff2, dstBuffSize, src, srcSize); break; - case 31: // ZSTD_decodeLiteralsBlock + case 31: /* ZSTD_decodeLiteralsBlock */ { blockProperties_t bp; ZSTD_compress(dstBuff, dstBuffSize, src, srcSize); @@ -417,7 +419,7 @@ size_t benchMem(void* src, size_t srcSize, U32 benchNb) srcSize = srcSize > 128 KB ? 128 KB : srcSize; // relative to block break; } - case 32: // ZSTD_decodeSeqHeaders + case 32: /* ZSTD_decodeSeqHeaders */ { blockProperties_t bp; const BYTE* ip = dstBuff; @@ -444,15 +446,14 @@ size_t benchMem(void* src, size_t srcSize, U32 benchNb) /* test functions */ - case 101: // conditionalNull + case 101: /* conditionalNull */ { size_t i; - U32 seed = (U32)srcSize; for (i=0; i