diff --git a/programs/Makefile b/programs/Makefile index 64044622..20205b90 100644 --- a/programs/Makefile +++ b/programs/Makefile @@ -59,7 +59,7 @@ endif default: zstd -all: zstd zstd32 fullbench fullbench32 fuzzer fuzzer32 +all: zstd zstd32 fullbench fullbench32 fuzzer fuzzer32 datagen zstd: $(ZSTDDIR)/zstd.c xxhash.c bench.c fileio.c zstdcli.c $(CC) $(FLAGS) $^ -o $@$(EXT) diff --git a/programs/datagen.c b/programs/datagen.c index 50daaa07..fa0e62a1 100644 --- a/programs/datagen.c +++ b/programs/datagen.c @@ -130,7 +130,6 @@ static BYTE RDG_genChar(U32* seed, const litDistribTable lt) } -#define RDG_DICTSIZE (32 KB) #define RDG_RAND15BITS ((RDG_rand(seed) >> 3) & 32767) #define RDG_RANDLENGTH ( ((RDG_rand(seed) >> 7) & 7) ? (RDG_rand(seed) & 15) : (RDG_rand(seed) & 511) + 15) void RDG_genBlock(void* buffer, size_t buffSize, size_t prefixSize, double matchProba, litDistribTable lt, unsigned* seedPtr) @@ -140,7 +139,7 @@ void RDG_genBlock(void* buffer, size_t buffSize, size_t prefixSize, double match size_t pos = prefixSize; U32* seed = seedPtr; - /* special case */ + /* special case : sparse content */ while (matchProba >= 1.0) { size_t size0 = RDG_rand(seed) & 3; @@ -154,6 +153,7 @@ void RDG_genBlock(void* buffer, size_t buffSize, size_t prefixSize, double match memset(buffPtr+pos, 0, size0); pos += size0; buffPtr[pos-1] = RDG_genChar(seed, lt); + return; } /* init */ @@ -198,20 +198,22 @@ void RDG_genBuffer(void* buffer, size_t size, double matchProba, double litProba } +#define RDG_DICTSIZE (32 KB) #define RDG_BLOCKSIZE (128 KB) void RDG_genOut(unsigned long long size, double matchProba, double litProba, unsigned seed) { - BYTE buff[RDG_DICTSIZE + RDG_BLOCKSIZE]; + BYTE* buff = (BYTE*)malloc(RDG_DICTSIZE + RDG_BLOCKSIZE); U64 total = 0; size_t genBlockSize = RDG_BLOCKSIZE; litDistribTable lt; /* init */ + if (buff==NULL) { fprintf(stdout, "not enough memory\n"); exit(1); } if (litProba==0.0) litProba = matchProba / 4.5; RDG_fillLiteralDistrib(lt, litProba); SET_BINARY_MODE(stdout); - /* Generate dict */ + /* Generate initial dict */ RDG_genBlock(buff, RDG_DICTSIZE, 0, matchProba, lt, &seed); /* Generate compressible data */ @@ -224,4 +226,7 @@ void RDG_genOut(unsigned long long size, double matchProba, double litProba, uns /* update dict */ memcpy(buff, buff + RDG_BLOCKSIZE, RDG_DICTSIZE); } + + // cleanup + free(buff); } diff --git a/programs/datagen.h b/programs/datagen.h index 631d1463..89482dc2 100644 --- a/programs/datagen.h +++ b/programs/datagen.h @@ -28,13 +28,13 @@ void RDG_genOut(unsigned long long size, double matchProba, double litProba, unsigned seed); void RDG_genBuffer(void* buffer, size_t size, double matchProba, double litProba, unsigned seed); -/* RDG_genOut - Generate 'size' bytes of compressible data into stdout. +/* RDG_genBuffer + Generate 'size' bytes of compressible data into 'buffer'. Compressibility can be controlled using 'matchProba'. - 'LitProba' is optional, and affect variability of bytes. If litProba==0.0, default value is used. - Generated data can be selected using 'seed'. + 'LitProba' is optional, and affect variability of individual bytes. If litProba==0.0, default value is used. + Generated data pattern can be modified using different 'seed'. If (matchProba, litProba and seed) are equal, the function always generate the same content. - RDG_genBuffer - Same as RDG_genOut, but generate data into provided buffer + RDG_genOut + Same as RDG_genBuffer, but generate data towards stdout */