diff --git a/programs/Makefile b/programs/Makefile index c3335a47..1949362b 100644 --- a/programs/Makefile +++ b/programs/Makefile @@ -30,7 +30,7 @@ # fullbench32: Same as fullbench, but forced to compile in 32-bits mode # ########################################################################## -RELEASE?= r0 +RELEASE?= r1 DESTDIR?= PREFIX ?= /usr @@ -78,7 +78,7 @@ fuzzer : $(ZSTDDIR)/zstd.c xxhash.c fuzzer.c fuzzer32: $(ZSTDDIR)/zstd.c xxhash.c fuzzer.c $(CC) -m32 $(FLAGS) $^ -o $@$(EXT) -datagen : datagen.c +datagen : datagen.c datagencli.c $(CC) $(FLAGS) $^ -o $@$(EXT) clean: diff --git a/programs/datagen.c b/programs/datagen.c index 09dfe71e..1ac4313f 100644 --- a/programs/datagen.c +++ b/programs/datagen.c @@ -23,18 +23,12 @@ - Public forum : https://groups.google.com/forum/#!forum/lz4c */ -/************************************** -* Remove Visual warning messages -**************************************/ -#define _CRT_SECURE_NO_WARNINGS /* fgets */ - - /************************************** * Includes **************************************/ #include /* malloc */ -#include /* fgets, sscanf */ -#include /* strcmp */ +#include /* FILE, fwrite */ +#include /* memcpy */ /************************************** @@ -62,60 +56,31 @@ #if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(_WIN32) || defined(__CYGWIN__) # include /* _O_BINARY */ # include /* _setmode, _isatty */ -# ifdef __MINGW32__ - int _fileno(FILE *stream); /* MINGW somehow forgets to include this windows declaration into */ -# endif # define SET_BINARY_MODE(file) _setmode(_fileno(file), _O_BINARY) -# define IS_CONSOLE(stdStream) _isatty(_fileno(stdStream)) #else -# include /* isatty */ # define SET_BINARY_MODE(file) -# define IS_CONSOLE(stdStream) isatty(fileno(stdStream)) #endif /************************************** * Constants **************************************/ -#ifndef ZSTD_VERSION -# define ZSTD_VERSION "r0" -#endif - #define KB *(1 <<10) -#define MB *(1 <<20) -#define GB *(1U<<30) -#define CDG_SIZE_DEFAULT (64 KB) -#define CDG_SEED_DEFAULT 0 -#define CDG_COMPRESSIBILITY_DEFAULT 50 #define PRIME1 2654435761U #define PRIME2 2246822519U -/************************************** -* Macros -**************************************/ -#define DISPLAY(...) fprintf(stderr, __VA_ARGS__) -#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); } - - -/************************************** -* Local Parameters -**************************************/ -static unsigned no_prompt = 0; -static unsigned displayLevel = 2; - - /********************************************************* * Local Functions *********************************************************/ -#define CDG_rotl32(x,r) ((x << r) | (x >> (32 - r))) -static unsigned int CDG_rand(U32* src) +#define RDG_rotl32(x,r) ((x << r) | (x >> (32 - r))) +static unsigned int RDG_rand(U32* src) { U32 rand32 = *src; rand32 *= PRIME1; rand32 ^= PRIME2; - rand32 = CDG_rotl32(rand32, 13); + rand32 = RDG_rotl32(rand32, 13); *src = rand32; return rand32; } @@ -123,7 +88,7 @@ static unsigned int CDG_rand(U32* src) #define LTSIZE 8192 #define LTMASK (LTSIZE-1) -static void* CDG_createLiteralDistrib(double ld) +static void* RDG_createLiteralDistrib(double ld) { BYTE* lt = malloc(LTSIZE); U32 i = 0; @@ -150,40 +115,41 @@ static void* CDG_createLiteralDistrib(double ld) return lt; } -static char CDG_genChar(U32* seed, const void* ltctx) +static char RDG_genChar(U32* seed, const void* ltctx) { const BYTE* lt = ltctx; - U32 id = CDG_rand(seed) & LTMASK; + U32 id = RDG_rand(seed) & LTMASK; return lt[id]; } -#define CDG_RAND15BITS ((CDG_rand(seed) >> 3) & 32767) -#define CDG_RANDLENGTH ( ((CDG_rand(seed) >> 7) & 7) ? (CDG_rand(seed) & 15) : (CDG_rand(seed) & 511) + 15) -#define CDG_DICTSIZE (32 KB) -static void CDG_generate(U64 size, U32* seed, double matchProba, double litProba) +#define RDG_RAND15BITS ((RDG_rand(seed) >> 3) & 32767) +#define RDG_RANDLENGTH ( ((RDG_rand(seed) >> 7) & 7) ? (RDG_rand(seed) & 15) : (RDG_rand(seed) & 511) + 15) +#define RDG_DICTSIZE (32 KB) +void RDG_generate(U64 size, U32 seedInit, double matchProba, double litProba) { - BYTE fullbuff[CDG_DICTSIZE + 128 KB + 1]; - BYTE* buff = fullbuff + CDG_DICTSIZE; + BYTE fullbuff[RDG_DICTSIZE + 128 KB + 1]; + BYTE* buff = fullbuff + RDG_DICTSIZE; U64 total=0; U32 P32 = (U32)(32768 * matchProba); U32 pos=1; U32 genBlockSize = 128 KB; - void* ldctx = CDG_createLiteralDistrib(litProba); + void* ldctx = RDG_createLiteralDistrib(litProba); FILE* fout = stdout; + U32* seed = &seedInit;; /* init */ SET_BINARY_MODE(stdout); - fullbuff[0] = CDG_genChar(seed, ldctx); + fullbuff[0] = RDG_genChar(seed, ldctx); while (pos<32 KB) { /* Select : Literal (char) or Match (within 32K) */ - if (CDG_RAND15BITS < P32) + if (RDG_RAND15BITS < P32) { /* Copy (within 64K) */ U32 d; int ref; - int length = CDG_RANDLENGTH + 4; - U32 offset = CDG_RAND15BITS + 1; + int length = RDG_RANDLENGTH + 4; + U32 offset = RDG_RAND15BITS + 1; if (offset > pos) offset = pos; ref = pos - offset; d = pos + length; @@ -192,8 +158,8 @@ static void CDG_generate(U64 size, U32* seed, double matchProba, double litProba else { /* Literal (noise) */ - U32 d = pos + CDG_RANDLENGTH; - while (pos < d) fullbuff[pos++] = CDG_genChar(seed, ldctx); + U32 d = pos + RDG_RANDLENGTH; + while (pos < d) fullbuff[pos++] = RDG_genChar(seed, ldctx); } } @@ -208,13 +174,13 @@ static void CDG_generate(U64 size, U32* seed, double matchProba, double litProba while (pos genBlockSize ) length = genBlockSize - pos; ref = pos - offset; d = pos + length; @@ -224,10 +190,10 @@ static void CDG_generate(U64 size, U32* seed, double matchProba, double litProba { /* Literal (noise) */ U32 d; - int length = CDG_RANDLENGTH; + int length = RDG_RANDLENGTH; if (pos + length > genBlockSize) length = genBlockSize - pos; d = pos + length; - while (pos < d) buff[pos++] = CDG_genChar(seed, ldctx); + while (pos < d) buff[pos++] = RDG_genChar(seed, ldctx); } } @@ -237,121 +203,3 @@ static void CDG_generate(U64 size, U32* seed, double matchProba, double litProba memcpy(fullbuff, buff + 96 KB, 32 KB); } } - - -/********************************************************* -* Command line -*********************************************************/ -static int CDG_usage(char* programName) -{ - DISPLAY( "Compressible data generator\n"); - DISPLAY( "Usage :\n"); - DISPLAY( " %s [size] [args]\n", programName); - DISPLAY( "\n"); - DISPLAY( "Arguments :\n"); - DISPLAY( " -g# : generate # data (default:%i)\n", CDG_SIZE_DEFAULT); - DISPLAY( " -s# : Select seed (default:%i)\n", CDG_SEED_DEFAULT); - DISPLAY( " -p# : Select compressibility in %% (default:%i%%)\n", CDG_COMPRESSIBILITY_DEFAULT); - DISPLAY( " -h : display help and exit\n"); - return 0; -} - - -int main(int argc, char** argv) -{ - int argNb; - double proba = (double)CDG_COMPRESSIBILITY_DEFAULT / 100; - double litProba = proba / 3.6; - U64 size = CDG_SIZE_DEFAULT; - U32 seed = CDG_SEED_DEFAULT; - char* programName; - - /* Check command line */ - programName = argv[0]; - for(argNb=1; argNb='0') && (*argument<='9')) - { - size *= 10; - size += *argument - '0'; - argument++; - } - if (*argument=='K') { size <<= 10; argument++; } - if (*argument=='M') { size <<= 20; argument++; } - if (*argument=='G') { size <<= 30; argument++; } - if (*argument=='B') { argument++; } - break; - case 's': - argument++; - seed=0; - while ((*argument>='0') && (*argument<='9')) - { - seed *= 10; - seed += *argument - '0'; - argument++; - } - break; - case 'P': - argument++; - proba=0.0; - while ((*argument>='0') && (*argument<='9')) - { - proba *= 10; - proba += *argument - '0'; - argument++; - } - if (proba>100.) proba=100.; - proba /= 100.; - litProba = proba / 4.; - break; - case 'L': - argument++; - litProba=0.; - while ((*argument>='0') && (*argument<='9')) - { - litProba *= 10; - litProba += *argument - '0'; - argument++; - } - if (litProba>100.) litProba=100.; - litProba /= 100.; - break; - case 'v': - displayLevel = 4; - argument++; - break; - default: - return CDG_usage(programName); - } - } - - } - } - - DISPLAYLEVEL(4, "Data Generator %s \n", ZSTD_VERSION); - DISPLAYLEVEL(3, "Seed = %u \n", seed); - if (proba!=CDG_COMPRESSIBILITY_DEFAULT) DISPLAYLEVEL(3, "Compressibility : %i%%\n", (U32)(proba*100)); - - CDG_generate(size, &seed, proba, litProba); - - return 0; -} diff --git a/programs/datagen.h b/programs/datagen.h new file mode 100644 index 00000000..6d07f204 --- /dev/null +++ b/programs/datagen.h @@ -0,0 +1,27 @@ +/* + datagen.h - compressible data generator header + Copyright (C) Yann Collet 2012-2015 + + GPL v2 License + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + You can contact the author at : + - ZSTD source repository : https://github.com/Cyan4973/zstd + - Public forum : https://groups.google.com/forum/#!forum/lz4c +*/ + + +void RDG_generate(unsigned long long size, unsigned seed, double matchProba, double litProba); diff --git a/programs/datagencli.c b/programs/datagencli.c new file mode 100644 index 00000000..b6ca508b --- /dev/null +++ b/programs/datagencli.c @@ -0,0 +1,191 @@ +/* + datagencli.c + compressible data command line generator + Copyright (C) Yann Collet 2012-2015 + + GPL v2 License + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + You can contact the author at : + - ZSTD source repository : https://github.com/Cyan4973/zstd + - Public forum : https://groups.google.com/forum/#!forum/lz4c +*/ + +/************************************** +* Includes +**************************************/ +#include /* fprintf, stderr */ +#include "datagen.h" /* RDG_generate */ + + +/************************************** +* Basic Types +**************************************/ +#if defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */ +# include + typedef uint8_t BYTE; + typedef uint16_t U16; + typedef uint32_t U32; + typedef int32_t S32; + typedef uint64_t U64; +#else + typedef unsigned char BYTE; + typedef unsigned short U16; + typedef unsigned int U32; + typedef signed int S32; + typedef unsigned long long U64; +#endif + + +/************************************** +* Constants +**************************************/ +#ifndef ZSTD_VERSION +# define ZSTD_VERSION "r1" +#endif + +#define KB *(1 <<10) +#define MB *(1 <<20) +#define GB *(1U<<30) + +#define SIZE_DEFAULT (64 KB) +#define SEED_DEFAULT 0 +#define COMPRESSIBILITY_DEFAULT 50 + + +/************************************** +* Macros +**************************************/ +#define DISPLAY(...) fprintf(stderr, __VA_ARGS__) +#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); } +static unsigned displayLevel = 2; + + +/********************************************************* +* Command line +*********************************************************/ +static int usage(char* programName) +{ + DISPLAY( "Compressible data generator\n"); + DISPLAY( "Usage :\n"); + DISPLAY( " %s [size] [args]\n", programName); + DISPLAY( "\n"); + DISPLAY( "Arguments :\n"); + DISPLAY( " -g# : generate # data (default:%i)\n", SIZE_DEFAULT); + DISPLAY( " -s# : Select seed (default:%i)\n", SEED_DEFAULT); + DISPLAY( " -P# : Select compressibility in %% (default:%i%%)\n", COMPRESSIBILITY_DEFAULT); + DISPLAY( " -h : display help and exit\n"); + return 0; +} + + +int main(int argc, char** argv) +{ + int argNb; + double proba = (double)COMPRESSIBILITY_DEFAULT / 100; + double litProba = proba / 3.6; + U64 size = SIZE_DEFAULT; + U32 seed = SEED_DEFAULT; + char* programName; + + /* Check command line */ + programName = argv[0]; + for(argNb=1; argNb='0') && (*argument<='9')) + { + size *= 10; + size += *argument - '0'; + argument++; + } + if (*argument=='K') { size <<= 10; argument++; } + if (*argument=='M') { size <<= 20; argument++; } + if (*argument=='G') { size <<= 30; argument++; } + if (*argument=='B') { argument++; } + break; + case 's': + argument++; + seed=0; + while ((*argument>='0') && (*argument<='9')) + { + seed *= 10; + seed += *argument - '0'; + argument++; + } + break; + case 'P': + argument++; + proba=0.0; + while ((*argument>='0') && (*argument<='9')) + { + proba *= 10; + proba += *argument - '0'; + argument++; + } + if (proba>100.) proba=100.; + proba /= 100.; + litProba = proba / 3.6; + break; + case 'L': /* hidden argument : Literal distribution probability */ + argument++; + litProba=0.; + while ((*argument>='0') && (*argument<='9')) + { + litProba *= 10; + litProba += *argument - '0'; + argument++; + } + if (litProba>100.) litProba=100.; + litProba /= 100.; + break; + case 'v': + displayLevel = 4; + argument++; + break; + default: + return usage(programName); + } + } + + } + } + + DISPLAYLEVEL(4, "Data Generator %s \n", ZSTD_VERSION); + DISPLAYLEVEL(3, "Seed = %u \n", seed); + if (proba!=COMPRESSIBILITY_DEFAULT) DISPLAYLEVEL(3, "Compressibility : %i%%\n", (U32)(proba*100)); + + RDG_generate(size, seed, proba, litProba); + DISPLAYLEVEL(1, "\n"); + + return 0; +}