diff --git a/Makefile b/Makefile index 076fd55..76f3163 100644 --- a/Makefile +++ b/Makefile @@ -1,10 +1,20 @@ +OS := $(shell uname) + +ifeq ($(OS),Linux) + OUTPUT32 = lz4demo32 + OUTPUT64 = lz4demo64 +else + OUTPUT32 = LZ4Demo32.exe + OUTPUT64 = LZ4Demo64.exe +endif + all: lz4demo64 lz4demo32 lz4demo64: lz4.c lz4.h bench.c lz4demo.c - gcc -O3 -I. -std=c99 -Wall -W -Wundef -Wno-implicit-function-declaration lz4.c bench.c lz4demo.c -o lz4demo64.exe + gcc -O3 -I. -std=c99 -Wall -W -Wundef -Wno-implicit-function-declaration lz4.c bench.c lz4demo.c -o $(OUTPUT64) lz4demo32: lz4.c lz4.h bench.c lz4demo.c - gcc -m32 -O3 -I. -std=c99 -Wall -W -Wundef -Wno-implicit-function-declaration lz4.c bench.c lz4demo.c -o lz4demo32.exe + gcc -m32 -O3 -I. -std=c99 -Wall -W -Wundef -Wno-implicit-function-declaration lz4.c bench.c lz4demo.c -o $(OUTPUT32) clean: - rm -f core *.o lz4demo32.exe lz4demo64.exe + rm -f core *.o $(OUTPUT32) $(OUTPUT64) diff --git a/bench.c b/bench.c index 6fe0d94..5139e8a 100644 --- a/bench.c +++ b/bench.c @@ -47,7 +47,10 @@ #include // timeb #include // stat64 #include // stat64 + #include "lz4.h" +#define DEFAULTCOMPRESSOR LZ4_compress + //************************************** @@ -107,16 +110,24 @@ struct compressionParameters //************************************** -// Private Parameters +// Benchmark Parameters //************************************** static int chunkSize = DEFAULT_CHUNKSIZE; +static int nbIterations = NBLOOPS; void BMK_SetBlocksize(int bsize) { chunkSize = bsize; - DISPLAY("Using Block Size of %i KB... ", chunkSize>>10); + DISPLAY("-Using Block Size of %i KB-", chunkSize>>10); } +void BMK_SetNbIterations(int nbLoops) +{ + nbIterations = nbLoops; + DISPLAY("- %i iterations-", nbIterations); +} + + //********************************************************* // Private functions //********************************************************* @@ -231,7 +242,7 @@ static U64 BMK_GetFileSize(char* infilename) // Public function //********************************************************* -int BMK_benchFile(char** fileNamesTable, int nbFiles) +int BMK_benchFile(char** fileNamesTable, int nbFiles, int cLevel) { int fileIdx=0; FILE* fileIn; @@ -244,7 +255,7 @@ int BMK_benchFile(char** fileNamesTable, int nbFiles) char* in_buff; char* out_buff; int out_buff_size; struct chunkParameters* chunkP; - U32 crcc, crcd; + U32 crcc, crcd=0; struct compressionParameters compP; U64 totals = 0; @@ -254,7 +265,13 @@ int BMK_benchFile(char** fileNamesTable, int nbFiles) // Init - compP.compressionFunction = LZ4_compress; + switch (cLevel) + { +#ifdef COMPRESSOR0 + case 0 : compP.compressionFunction = COMPRESSOR0; break; +#endif + default : compP.compressionFunction = DEFAULTCOMPRESSOR; + } compP.decompressionFunction = LZ4_uncompress; // Loop for each file @@ -313,7 +330,7 @@ int BMK_benchFile(char** fileNamesTable, int nbFiles) } // Fill input buffer - DISPLAY("Loading %s... \r", infilename); + DISPLAY("Loading %s... \r", infilename); readSize = fread(in_buff, 1, benchedsize, fileIn); fclose(fileIn); @@ -332,11 +349,13 @@ int BMK_benchFile(char** fileNamesTable, int nbFiles) // Bench { int loopNb, nb_loops, chunkNb; - size_t cSize; + size_t cSize=0; int milliTime; double fastestC = 100000000., fastestD = 100000000.; + double ratio=0.; - for (loopNb = 1; loopNb <= NBLOOPS; loopNb++) + DISPLAY("\r%79s\r", ""); + for (loopNb = 1; loopNb <= nbIterations; loopNb++) { // Compression DISPLAY("%1i-%-14.14s : %9i ->\r", loopNb, infilename, (int)benchedsize); @@ -356,8 +375,9 @@ int BMK_benchFile(char** fileNamesTable, int nbFiles) if ((double)milliTime < fastestC*nb_loops) fastestC = (double)milliTime/nb_loops; cSize=0; for (chunkNb=0; chunkNb %9i (%5.2f%%), %6.1f MB/s\r", loopNb, infilename, (int)benchedsize, (int)cSize, (double)cSize/(double)benchedsize*100., (double)benchedsize / fastestC / 1000.); + DISPLAY("%1i-%-14.14s : %9i -> %9i (%5.2f%%), %6.1f MB/s\r", loopNb, infilename, (int)benchedsize, (int)cSize, ratio, (double)benchedsize / fastestC / 1000.); // Decompression { size_t i; for (i=0; i %9i (%5.2f%%), %6.1f MB/s , %6.1f MB/s\r", loopNb, infilename, (int)benchedsize, (int)cSize, (double)cSize/(double)benchedsize*100., (double)benchedsize / fastestC / 1000., (double)benchedsize / fastestD / 1000.); + DISPLAY("%1i-%-14.14s : %9i -> %9i (%5.2f%%), %6.1f MB/s , %6.1f MB/s\r", loopNb, infilename, (int)benchedsize, (int)cSize, ratio, (double)benchedsize / fastestC / 1000., (double)benchedsize / fastestD / 1000.); // CRC Checking crcd = BMK_checksum_MMH3A(in_buff, benchedsize); if (crcc!=crcd) { DISPLAY("\n!!! WARNING !!! %14s : Invalid Checksum : %x != %x\n", infilename, (unsigned)crcc, (unsigned)crcd); break; } } - DISPLAY("%-16.16s : %9i -> %9i (%5.2f%%), %6.1f MB/s , %6.1f MB/s\n", infilename, (int)benchedsize, (int)cSize, (double)cSize/(double)benchedsize*100., (double)benchedsize / fastestC / 1000., (double)benchedsize / fastestD / 1000.); + if (crcc==crcd) + { + if (ratio<100.) + DISPLAY("%-16.16s : %9i -> %9i (%5.2f%%), %6.1f MB/s , %6.1f MB/s\n", infilename, (int)benchedsize, (int)cSize, ratio, (double)benchedsize / fastestC / 1000., (double)benchedsize / fastestD / 1000.); + else + DISPLAY("%-16.16s : %9i -> %9i (%5.1f%%), %6.1f MB/s , %6.1f MB/s \n", infilename, (int)benchedsize, (int)cSize, ratio, (double)benchedsize / fastestC / 1000., (double)benchedsize / fastestD / 1000.); + } totals += benchedsize; totalz += cSize; totalc += fastestC; diff --git a/bench.h b/bench.h index 547a1bc..f97eb51 100644 --- a/bench.h +++ b/bench.h @@ -27,10 +27,11 @@ extern "C" { #endif -int BMK_benchFile(char** fileNamesTable, int nbFiles) ; +int BMK_benchFile(char** fileNamesTable, int nbFiles, int cLevel); // Parameters void BMK_SetBlocksize(int bsize); +void BMK_SetNbIterations(int nbLoops); diff --git a/lz4.c b/lz4.c index 28a892e..df62be6 100644 --- a/lz4.c +++ b/lz4.c @@ -98,6 +98,8 @@ #define restrict // Disable restrict #endif +#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) + #ifdef _MSC_VER // Visual Studio #define inline __forceinline // Visual is not C99, but supports some kind of inline #include // _BitScanForward @@ -109,6 +111,15 @@ #define bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | (((x) & 0xffu) << 8))) #endif +#if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__) +# define expect(expr,value) (__builtin_expect ((expr),(value)) ) +#else +# define expect(expr,value) (expr) +#endif + +#define likely(expr) expect((expr) != 0, 1) +#define unlikely(expr) expect((expr) != 0, 0) + //************************************** // Includes @@ -210,14 +221,6 @@ typedef struct _U64_S { U64 v; } U64_S; #define LZ4_WRITE_LITTLEENDIAN_16(p,v) { A16(p) = v; p+=2; } #endif -#if __GNUC__ >= 3 -# define expect(expr,value) __builtin_expect ((expr),(value)) -#else -# define expect(expr,value) (expr) -#endif - -#define expect_true(expr) expect ((expr) != 0, 1) -#define expect_false(expr) expect ((expr) != 0, 0) //************************************** // Local structures @@ -249,7 +252,7 @@ inline static int LZ4_NbCommonBytes (register U64 val) unsigned long r = 0; _BitScanReverse64( &r, val ); return (int)(r>>3); - #elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) + #elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) return (__builtin_clzll(val) >> 3); #else int r; @@ -263,7 +266,7 @@ inline static int LZ4_NbCommonBytes (register U64 val) unsigned long r = 0; _BitScanForward64( &r, val ); return (int)(r>>3); - #elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) + #elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) return (__builtin_ctzll(val) >> 3); #else static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 }; @@ -281,7 +284,7 @@ inline static int LZ4_NbCommonBytes (register U32 val) unsigned long r = 0; _BitScanReverse( &r, val ); return (int)(r>>3); - #elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) + #elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) return (__builtin_clz(val) >> 3); #else int r; @@ -294,7 +297,7 @@ inline static int LZ4_NbCommonBytes (register U32 val) unsigned long r = 0; _BitScanForward( &r, val ); return (int)(r>>3); - #elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) + #elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) return (__builtin_ctz(val) >> 3); #else static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 }; @@ -381,7 +384,7 @@ int LZ4_compressCtx(void** ctx, ip = forwardIp; forwardIp = ip + step; - if (expect_false(forwardIp > mflimit)) { goto _last_literals; } + if unlikely(forwardIp > mflimit) { goto _last_literals; } forwardH = LZ4_HASH_VALUE(forwardIp); ref = base + HashTable[h]; @@ -390,7 +393,7 @@ int LZ4_compressCtx(void** ctx, } while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip))); // Catch up - while ((expect_false(ip>anchor) && expect_false(ref>(BYTE*)source) && (ip[-1]==ref[-1]))) { ip--; ref--; } + while ((ip>anchor) && (ref>(BYTE*)source) && unlikely(ip[-1]==ref[-1])) { ip--; ref--; } // Encode Literal length length = ip - anchor; @@ -408,7 +411,7 @@ _next_match: // Start Counting ip+=MINMATCH; ref+=MINMATCH; // MinMatch verified anchor = ip; - while (expect_true(ipanchor) && expect_false(ref>(BYTE*)source) && (ip[-1]==ref[-1]))) { ip--; ref--; } + while ((ip>anchor) && (ref>(BYTE*)source) && (ip[-1]==ref[-1])) { ip--; ref--; } // Encode Literal length length = ip - anchor; @@ -576,7 +579,7 @@ _endCount: // Test next position ref = base + HashTable[LZ4_HASH64K_VALUE(ip)]; HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base; - if (expect_true(A32(ref) == A32(ip))) { token = op++; *token=0; goto _next_match; } + if (A32(ref) == A32(ip)) { token = op++; *token=0; goto _next_match; } // Prepare next loop anchor = ip++; @@ -658,7 +661,7 @@ int LZ4_uncompress(const char* source, // copy literals cpy = op+length; - if (expect_false(cpy>oend-COPYLENGTH)) + if unlikely(cpy>oend-COPYLENGTH) { if (cpy > oend) goto _output_error; memcpy(op, ip, length); @@ -675,7 +678,7 @@ int LZ4_uncompress(const char* source, if ((length=(token&ML_MASK)) == ML_MASK) { for (;*ip==255;length+=255) {ip++;} length += *ip++; } // copy repeated sequence - if (expect_false(op-ref Error if(!input_filename) { badusage(); return 1; } - if (bench) return BMK_benchFile(argv+filenamesStart, argc-filenamesStart); + if (bench) return BMK_benchFile(argv+filenamesStart, argc-filenamesStart, 0); // No output filename if (!output_filename) { badusage(); return 1; }