datagen refactoring

This commit is contained in:
Yann Collet 2015-02-10 18:15:20 +01:00
parent 6610cb381c
commit fb98fd0bd4
5 changed files with 91 additions and 76 deletions

View File

@ -861,13 +861,13 @@ static size_t ZSTD_compressSequences(BYTE* dst, size_t maxDstSize,
}
static void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* anchor, size_t offset, size_t matchLength)
static void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* literals, size_t offset, size_t matchLength)
{
BYTE* op_lit = seqStorePtr->lit;
BYTE* const l_end = op_lit + litLength;
/* copy Literals */
while (op_lit<l_end) COPY8(op_lit, anchor);
while (op_lit<l_end) COPY8(op_lit, literals);
seqStorePtr->lit += litLength;
/* literal Length */

View File

@ -122,84 +122,85 @@ static char RDG_genChar(U32* seed, const void* ltctx)
return lt[id];
}
#define RDG_DICTSIZE (32 KB)
#define RDG_RAND15BITS ((RDG_rand(seed) >> 3) & 32767)
#define RDG_RANDLENGTH ( ((RDG_rand(seed) >> 7) & 7) ? (RDG_rand(seed) & 15) : (RDG_rand(seed) & 511) + 15)
#define RDG_DICTSIZE (32 KB)
void RDG_generate(U64 size, U32 seedInit, double matchProba, double litProba)
void RDG_genBlock(void* buffer, size_t buffSize, size_t prefixSize, double matchProba, void* litTable, unsigned* seedPtr)
{
BYTE fullbuff[RDG_DICTSIZE + 128 KB + 1];
BYTE* buff = fullbuff + RDG_DICTSIZE;
U64 total=0;
U32 P32 = (U32)(32768 * matchProba);
U32 pos=1;
U32 genBlockSize = 128 KB;
void* ldctx = RDG_createLiteralDistrib(litProba);
FILE* fout = stdout;
U32* seed = &seedInit;;
BYTE* buffPtr = ((BYTE*)buffer) - prefixSize;
const U32 matchProba32 = (U32)(32768 * matchProba);
size_t pos = prefixSize;
void* ldctx = litTable;
U32* seed = seedPtr;
/* init */
SET_BINARY_MODE(stdout);
fullbuff[0] = RDG_genChar(seed, ldctx);
while (pos<32 KB)
if (pos==0) buffPtr[0] = RDG_genChar(seed, ldctx), pos=1;
/* Generate compressible data */
while (pos < buffSize)
{
/* Select : Literal (char) or Match (within 32K) */
if (RDG_RAND15BITS < P32)
if (RDG_RAND15BITS < matchProba32)
{
/* Copy (within 64K) */
/* Copy (within 32K) */
int match;
U32 d;
int ref;
int length = RDG_RANDLENGTH + 4;
U32 offset = RDG_RAND15BITS + 1;
if (offset > pos) offset = pos;
ref = pos - offset;
if (pos + length > buffSize) length = buffSize - pos;
match = pos - offset;
d = pos + length;
while (pos < d) fullbuff[pos++] = fullbuff[ref++];
}
else
{
/* Literal (noise) */
U32 d = pos + RDG_RANDLENGTH;
while (pos < d) fullbuff[pos++] = RDG_genChar(seed, ldctx);
}
}
/* Generate compressible data */
pos = 0;
while (total < size)
{
if (size-total < 128 KB) genBlockSize = (U32)(size-total);
total += genBlockSize;
buff[genBlockSize] = 0;
pos = 0;
while (pos<genBlockSize)
{
/* Select : Literal (char) or Match (within 32K) */
if (RDG_RAND15BITS < P32)
{
/* Copy (within 64K) */
int ref;
U32 d;
int length = RDG_RANDLENGTH + 4;
U32 offset = RDG_RAND15BITS + 1;
if (pos + length > genBlockSize ) length = genBlockSize - pos;
ref = pos - offset;
d = pos + length;
while (pos < d) buff[pos++] = buff[ref++];
while (pos < d) buffPtr[pos++] = buffPtr[match++];
}
else
{
/* Literal (noise) */
U32 d;
int length = RDG_RANDLENGTH;
if (pos + length > genBlockSize) length = genBlockSize - pos;
if (pos + length > buffSize) length = buffSize - pos;
d = pos + length;
while (pos < d) buff[pos++] = RDG_genChar(seed, ldctx);
while (pos < d) buffPtr[pos++] = RDG_genChar(seed, ldctx);
}
}
/* output generated data */
fwrite(buff, 1, genBlockSize, fout);
/* Regenerate prefix */
memcpy(fullbuff, buff + 96 KB, 32 KB);
}
}
void RDG_genBuffer(void* buffer, size_t size, double matchProba, double litProba, unsigned seed)
{
void* ldctx;
if (litProba==0.0) litProba = matchProba / 3.6;
ldctx = RDG_createLiteralDistrib(litProba);
RDG_genBlock(buffer, size, 0, matchProba, ldctx, &seed);
}
#define RDG_BLOCKSIZE (128 KB)
void RDG_genOut(unsigned long long size, double matchProba, double litProba, unsigned seed)
{
BYTE fullbuff[RDG_DICTSIZE + RDG_BLOCKSIZE + 1];
BYTE* buff = fullbuff + RDG_DICTSIZE;
U64 total = 0;
U32 genBlockSize = RDG_BLOCKSIZE;
void* ldctx;
/* init */
if (litProba==0.0) litProba = matchProba / 3.6;
ldctx = RDG_createLiteralDistrib(litProba);
SET_BINARY_MODE(stdout);
/* Generate dict */
RDG_genBlock(fullbuff, RDG_DICTSIZE, 0, matchProba, ldctx, &seed);
/* Generate compressible data */
while (total < size)
{
RDG_genBlock(buff, RDG_BLOCKSIZE, RDG_DICTSIZE, matchProba, ldctx, &seed);
if (size-total < RDG_BLOCKSIZE) genBlockSize = (U32)(size-total);
total += genBlockSize;
buff[genBlockSize] = 0;
fwrite(buff, 1, genBlockSize, stdout);
/* update dict */
memcpy(fullbuff, buff + (RDG_BLOCKSIZE - RDG_DICTSIZE), RDG_DICTSIZE);
}
}

View File

@ -24,4 +24,17 @@
*/
void RDG_generate(unsigned long long size, unsigned seed, double matchProba, double litProba);
#include <stddef.h> /* size_t */
void RDG_genOut(unsigned long long size, double matchProba, double litProba, unsigned seed);
void RDG_genBuffer(void* buffer, size_t size, double matchProba, double litProba, unsigned seed);
/* RDG_genOut
Generate 'size' bytes of compressible data into stdout.
Compressibility can be controlled using 'matchProba'.
'LitProba' is optional, and affect variability of bytes. If litProba==0.0, default value is used.
Generated data can be selected using 'seed'.
If (matchProba, litProba and seed) are equal, the function always generate the same content.
RDG_genBuffer
Same as RDG_genOut, but generate data into provided buffer
*/

View File

@ -96,7 +96,7 @@ int main(int argc, char** argv)
{
int argNb;
double proba = (double)COMPRESSIBILITY_DEFAULT / 100;
double litProba = proba / 3.6;
double litProba = 0.0;
U64 size = SIZE_DEFAULT;
U32 seed = SEED_DEFAULT;
char* programName;
@ -154,7 +154,6 @@ int main(int argc, char** argv)
}
if (proba>100.) proba=100.;
proba /= 100.;
litProba = proba / 3.6;
break;
case 'L': /* hidden argument : Literal distribution probability */
argument++;
@ -184,7 +183,7 @@ int main(int argc, char** argv)
DISPLAYLEVEL(3, "Seed = %u \n", seed);
if (proba!=COMPRESSIBILITY_DEFAULT) DISPLAYLEVEL(3, "Compressibility : %i%%\n", (U32)(proba*100));
RDG_generate(size, seed, proba, litProba);
RDG_genOut(size, proba, litProba, seed);
DISPLAYLEVEL(1, "\n");
return 0;

View File

@ -62,6 +62,7 @@
#include "zstd.h"
#include "fse_static.h"
#include "datagen.h"
/**************************************
@ -215,6 +216,7 @@ static U64 BMK_GetFileSize(char* infilename)
}
#if 1
static U32 BMK_rotl32(unsigned val32, unsigned nbBits) { return((val32 << nbBits) | (val32 >> (32 - nbBits))); }
static U32 BMK_rand(U32* src)
@ -265,7 +267,7 @@ static void BMK_datagen(void* buffer, size_t bufferSize, double proba, U32 seed)
}
}
}
#endif
/*********************************************************
* Benchmark wrappers
@ -329,8 +331,8 @@ size_t local_conditionalNull(void* dst, size_t dstSize, void* buff2, const void*
if (b==0) total = 0; // 825
//if (!b) total = 0; // 825
//total = b ? total : 0; // 622
//total *= !!b; // 465
//total &= -!b; // 622
//total *= !!b; // 465
}
return total;
}
@ -357,7 +359,7 @@ size_t benchMem(void* src, size_t srcSize, U32 benchNb)
double bestTime = 100000000.;
size_t errorCode = 0;
// Declaration
/* Selection */
switch(benchNb)
{
case 1:
@ -399,7 +401,7 @@ size_t benchMem(void* src, size_t srcSize, U32 benchNb)
case 11:
g_cSize = ZSTD_compress(buff2, dstBuffSize, src, srcSize);
break;
case 31: // ZSTD_decodeLiteralsBlock
case 31: /* ZSTD_decodeLiteralsBlock */
{
blockProperties_t bp;
ZSTD_compress(dstBuff, dstBuffSize, src, srcSize);
@ -417,7 +419,7 @@ size_t benchMem(void* src, size_t srcSize, U32 benchNb)
srcSize = srcSize > 128 KB ? 128 KB : srcSize; // relative to block
break;
}
case 32: // ZSTD_decodeSeqHeaders
case 32: /* ZSTD_decodeSeqHeaders */
{
blockProperties_t bp;
const BYTE* ip = dstBuff;
@ -444,15 +446,14 @@ size_t benchMem(void* src, size_t srcSize, U32 benchNb)
/* test functions */
case 101: // conditionalNull
case 101: /* conditionalNull */
{
size_t i;
U32 seed = (U32)srcSize;
for (i=0; i<srcSize; i++)
buff2[i] = (BYTE)(BMK_rand(&seed) & 15);
buff2[i] = i & 15;
break;
}
case 102: //
case 102: /* local_decodeLiteralsForward */
{
blockProperties_t bp;
ZSTD_compress(dstBuff, dstBuffSize, src, srcSize);
@ -515,6 +516,7 @@ int benchSample(U32 benchNb)
/* Fill buffer */
BMK_datagen(origBuff, benchedSize, g_compressibilityDefault, 0);
//RDG_generate(benchedSize, 0, g_compressibilityDefault, g_compressibilityDefault / 3.6);
/* bench */
DISPLAY("\r%79s\r", "");