minor code refactoring, mostly around __builtin_expect

git-svn-id: https://lz4.googlecode.com/svn/trunk@59 650e7d94-2a16-8b24-b05c-7c0b3f6821cd
This commit is contained in:
yann.collet.73@gmail.com 2012-03-09 21:46:59 +00:00
parent 89767cc280
commit ad59ba1cfa
5 changed files with 80 additions and 38 deletions

View File

@ -1,10 +1,20 @@
OS := $(shell uname)
ifeq ($(OS),Linux)
OUTPUT32 = lz4demo32
OUTPUT64 = lz4demo64
else
OUTPUT32 = LZ4Demo32.exe
OUTPUT64 = LZ4Demo64.exe
endif
all: lz4demo64 lz4demo32
lz4demo64: lz4.c lz4.h bench.c lz4demo.c
gcc -O3 -I. -std=c99 -Wall -W -Wundef -Wno-implicit-function-declaration lz4.c bench.c lz4demo.c -o lz4demo64.exe
gcc -O3 -I. -std=c99 -Wall -W -Wundef -Wno-implicit-function-declaration lz4.c bench.c lz4demo.c -o $(OUTPUT64)
lz4demo32: lz4.c lz4.h bench.c lz4demo.c
gcc -m32 -O3 -I. -std=c99 -Wall -W -Wundef -Wno-implicit-function-declaration lz4.c bench.c lz4demo.c -o lz4demo32.exe
gcc -m32 -O3 -I. -std=c99 -Wall -W -Wundef -Wno-implicit-function-declaration lz4.c bench.c lz4demo.c -o $(OUTPUT32)
clean:
rm -f core *.o lz4demo32.exe lz4demo64.exe
rm -f core *.o $(OUTPUT32) $(OUTPUT64)

49
bench.c
View File

@ -47,7 +47,10 @@
#include <sys/timeb.h> // timeb
#include <sys/types.h> // stat64
#include <sys/stat.h> // stat64
#include "lz4.h"
#define DEFAULTCOMPRESSOR LZ4_compress
//**************************************
@ -107,16 +110,24 @@ struct compressionParameters
//**************************************
// Private Parameters
// Benchmark Parameters
//**************************************
static int chunkSize = DEFAULT_CHUNKSIZE;
static int nbIterations = NBLOOPS;
void BMK_SetBlocksize(int bsize)
{
chunkSize = bsize;
DISPLAY("Using Block Size of %i KB... ", chunkSize>>10);
DISPLAY("-Using Block Size of %i KB-", chunkSize>>10);
}
void BMK_SetNbIterations(int nbLoops)
{
nbIterations = nbLoops;
DISPLAY("- %i iterations-", nbIterations);
}
//*********************************************************
// Private functions
//*********************************************************
@ -231,7 +242,7 @@ static U64 BMK_GetFileSize(char* infilename)
// Public function
//*********************************************************
int BMK_benchFile(char** fileNamesTable, int nbFiles)
int BMK_benchFile(char** fileNamesTable, int nbFiles, int cLevel)
{
int fileIdx=0;
FILE* fileIn;
@ -244,7 +255,7 @@ int BMK_benchFile(char** fileNamesTable, int nbFiles)
char* in_buff;
char* out_buff; int out_buff_size;
struct chunkParameters* chunkP;
U32 crcc, crcd;
U32 crcc, crcd=0;
struct compressionParameters compP;
U64 totals = 0;
@ -254,7 +265,13 @@ int BMK_benchFile(char** fileNamesTable, int nbFiles)
// Init
compP.compressionFunction = LZ4_compress;
switch (cLevel)
{
#ifdef COMPRESSOR0
case 0 : compP.compressionFunction = COMPRESSOR0; break;
#endif
default : compP.compressionFunction = DEFAULTCOMPRESSOR;
}
compP.decompressionFunction = LZ4_uncompress;
// Loop for each file
@ -313,7 +330,7 @@ int BMK_benchFile(char** fileNamesTable, int nbFiles)
}
// Fill input buffer
DISPLAY("Loading %s... \r", infilename);
DISPLAY("Loading %s... \r", infilename);
readSize = fread(in_buff, 1, benchedsize, fileIn);
fclose(fileIn);
@ -332,11 +349,13 @@ int BMK_benchFile(char** fileNamesTable, int nbFiles)
// Bench
{
int loopNb, nb_loops, chunkNb;
size_t cSize;
size_t cSize=0;
int milliTime;
double fastestC = 100000000., fastestD = 100000000.;
double ratio=0.;
for (loopNb = 1; loopNb <= NBLOOPS; loopNb++)
DISPLAY("\r%79s\r", "");
for (loopNb = 1; loopNb <= nbIterations; loopNb++)
{
// Compression
DISPLAY("%1i-%-14.14s : %9i ->\r", loopNb, infilename, (int)benchedsize);
@ -356,8 +375,9 @@ int BMK_benchFile(char** fileNamesTable, int nbFiles)
if ((double)milliTime < fastestC*nb_loops) fastestC = (double)milliTime/nb_loops;
cSize=0; for (chunkNb=0; chunkNb<nbChunks; chunkNb++) cSize += chunkP[chunkNb].outputSize;
ratio = (double)cSize/(double)benchedsize*100.;
DISPLAY("%1i-%-14.14s : %9i -> %9i (%5.2f%%), %6.1f MB/s\r", loopNb, infilename, (int)benchedsize, (int)cSize, (double)cSize/(double)benchedsize*100., (double)benchedsize / fastestC / 1000.);
DISPLAY("%1i-%-14.14s : %9i -> %9i (%5.2f%%), %6.1f MB/s\r", loopNb, infilename, (int)benchedsize, (int)cSize, ratio, (double)benchedsize / fastestC / 1000.);
// Decompression
{ size_t i; for (i=0; i<benchedsize; i++) in_buff[i]=0; } // zeroing area, for CRC checking
@ -370,20 +390,25 @@ int BMK_benchFile(char** fileNamesTable, int nbFiles)
{
for (chunkNb=0; chunkNb<nbChunks; chunkNb++)
chunkP[chunkNb].outputSize = compP.decompressionFunction(chunkP[chunkNb].outputBuffer, chunkP[chunkNb].inputBuffer, chunkP[chunkNb].inputSize);
//LZ4_uncompress_unknownOutputSize(chunkP[chunkNb].outputBuffer, chunkP[chunkNb].inputBuffer, chunkP[chunkNb].outputSize, chunkP[chunkNb].inputSize); // For testing
nb_loops++;
}
milliTime = BMK_GetMilliSpan(milliTime);
if ((double)milliTime < fastestD*nb_loops) fastestD = (double)milliTime/nb_loops;
DISPLAY("%1i-%-14.14s : %9i -> %9i (%5.2f%%), %6.1f MB/s , %6.1f MB/s\r", loopNb, infilename, (int)benchedsize, (int)cSize, (double)cSize/(double)benchedsize*100., (double)benchedsize / fastestC / 1000., (double)benchedsize / fastestD / 1000.);
DISPLAY("%1i-%-14.14s : %9i -> %9i (%5.2f%%), %6.1f MB/s , %6.1f MB/s\r", loopNb, infilename, (int)benchedsize, (int)cSize, ratio, (double)benchedsize / fastestC / 1000., (double)benchedsize / fastestD / 1000.);
// CRC Checking
crcd = BMK_checksum_MMH3A(in_buff, benchedsize);
if (crcc!=crcd) { DISPLAY("\n!!! WARNING !!! %14s : Invalid Checksum : %x != %x\n", infilename, (unsigned)crcc, (unsigned)crcd); break; }
}
DISPLAY("%-16.16s : %9i -> %9i (%5.2f%%), %6.1f MB/s , %6.1f MB/s\n", infilename, (int)benchedsize, (int)cSize, (double)cSize/(double)benchedsize*100., (double)benchedsize / fastestC / 1000., (double)benchedsize / fastestD / 1000.);
if (crcc==crcd)
{
if (ratio<100.)
DISPLAY("%-16.16s : %9i -> %9i (%5.2f%%), %6.1f MB/s , %6.1f MB/s\n", infilename, (int)benchedsize, (int)cSize, ratio, (double)benchedsize / fastestC / 1000., (double)benchedsize / fastestD / 1000.);
else
DISPLAY("%-16.16s : %9i -> %9i (%5.1f%%), %6.1f MB/s , %6.1f MB/s \n", infilename, (int)benchedsize, (int)cSize, ratio, (double)benchedsize / fastestC / 1000., (double)benchedsize / fastestD / 1000.);
}
totals += benchedsize;
totalz += cSize;
totalc += fastestC;

View File

@ -27,10 +27,11 @@ extern "C" {
#endif
int BMK_benchFile(char** fileNamesTable, int nbFiles) ;
int BMK_benchFile(char** fileNamesTable, int nbFiles, int cLevel);
// Parameters
void BMK_SetBlocksize(int bsize);
void BMK_SetNbIterations(int nbLoops);

43
lz4.c
View File

@ -98,6 +98,8 @@
#define restrict // Disable restrict
#endif
#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
#ifdef _MSC_VER // Visual Studio
#define inline __forceinline // Visual is not C99, but supports some kind of inline
#include <intrin.h> // _BitScanForward
@ -109,6 +111,15 @@
#define bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | (((x) & 0xffu) << 8)))
#endif
#if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__)
# define expect(expr,value) (__builtin_expect ((expr),(value)) )
#else
# define expect(expr,value) (expr)
#endif
#define likely(expr) expect((expr) != 0, 1)
#define unlikely(expr) expect((expr) != 0, 0)
//**************************************
// Includes
@ -210,14 +221,6 @@ typedef struct _U64_S { U64 v; } U64_S;
#define LZ4_WRITE_LITTLEENDIAN_16(p,v) { A16(p) = v; p+=2; }
#endif
#if __GNUC__ >= 3
# define expect(expr,value) __builtin_expect ((expr),(value))
#else
# define expect(expr,value) (expr)
#endif
#define expect_true(expr) expect ((expr) != 0, 1)
#define expect_false(expr) expect ((expr) != 0, 0)
//**************************************
// Local structures
@ -249,7 +252,7 @@ inline static int LZ4_NbCommonBytes (register U64 val)
unsigned long r = 0;
_BitScanReverse64( &r, val );
return (int)(r>>3);
#elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
#elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
return (__builtin_clzll(val) >> 3);
#else
int r;
@ -263,7 +266,7 @@ inline static int LZ4_NbCommonBytes (register U64 val)
unsigned long r = 0;
_BitScanForward64( &r, val );
return (int)(r>>3);
#elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
#elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
return (__builtin_ctzll(val) >> 3);
#else
static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 };
@ -281,7 +284,7 @@ inline static int LZ4_NbCommonBytes (register U32 val)
unsigned long r = 0;
_BitScanReverse( &r, val );
return (int)(r>>3);
#elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
#elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
return (__builtin_clz(val) >> 3);
#else
int r;
@ -294,7 +297,7 @@ inline static int LZ4_NbCommonBytes (register U32 val)
unsigned long r = 0;
_BitScanForward( &r, val );
return (int)(r>>3);
#elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
#elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
return (__builtin_ctz(val) >> 3);
#else
static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 };
@ -381,7 +384,7 @@ int LZ4_compressCtx(void** ctx,
ip = forwardIp;
forwardIp = ip + step;
if (expect_false(forwardIp > mflimit)) { goto _last_literals; }
if unlikely(forwardIp > mflimit) { goto _last_literals; }
forwardH = LZ4_HASH_VALUE(forwardIp);
ref = base + HashTable[h];
@ -390,7 +393,7 @@ int LZ4_compressCtx(void** ctx,
} while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
// Catch up
while ((expect_false(ip>anchor) && expect_false(ref>(BYTE*)source) && (ip[-1]==ref[-1]))) { ip--; ref--; }
while ((ip>anchor) && (ref>(BYTE*)source) && unlikely(ip[-1]==ref[-1])) { ip--; ref--; }
// Encode Literal length
length = ip - anchor;
@ -408,7 +411,7 @@ _next_match:
// Start Counting
ip+=MINMATCH; ref+=MINMATCH; // MinMatch verified
anchor = ip;
while (expect_true(ip<matchlimit-(STEPSIZE-1)))
while likely(ip<matchlimit-(STEPSIZE-1))
{
UARCH diff = AARCH(ref) ^ AARCH(ip);
if (!diff) { ip+=STEPSIZE; ref+=STEPSIZE; continue; }
@ -532,7 +535,7 @@ int LZ4_compress64kCtx(void** ctx,
} while (A32(ref) != A32(ip));
// Catch up
while (((ip>anchor) && expect_false(ref>(BYTE*)source) && (ip[-1]==ref[-1]))) { ip--; ref--; }
while ((ip>anchor) && (ref>(BYTE*)source) && (ip[-1]==ref[-1])) { ip--; ref--; }
// Encode Literal length
length = ip - anchor;
@ -576,7 +579,7 @@ _endCount:
// Test next position
ref = base + HashTable[LZ4_HASH64K_VALUE(ip)];
HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base;
if (expect_true(A32(ref) == A32(ip))) { token = op++; *token=0; goto _next_match; }
if (A32(ref) == A32(ip)) { token = op++; *token=0; goto _next_match; }
// Prepare next loop
anchor = ip++;
@ -658,7 +661,7 @@ int LZ4_uncompress(const char* source,
// copy literals
cpy = op+length;
if (expect_false(cpy>oend-COPYLENGTH))
if unlikely(cpy>oend-COPYLENGTH)
{
if (cpy > oend) goto _output_error;
memcpy(op, ip, length);
@ -675,7 +678,7 @@ int LZ4_uncompress(const char* source,
if ((length=(token&ML_MASK)) == ML_MASK) { for (;*ip==255;length+=255) {ip++;} length += *ip++; }
// copy repeated sequence
if (expect_false(op-ref<STEPSIZE))
if unlikely(op-ref<STEPSIZE)
{
#if LZ4_ARCH64
size_t dec2table[]={0, 0, 0, -1, 0, 1, 2, 3};
@ -764,7 +767,7 @@ int LZ4_uncompress_unknownOutputSize(
if ((length=(token&ML_MASK)) == ML_MASK) { while (ip<iend) { int s = *ip++; length +=s; if (s==255) continue; break; } }
// copy repeated sequence
if (expect_false(op-ref<STEPSIZE))
if unlikely(op-ref<STEPSIZE)
{
#if LZ4_ARCH64
size_t dec2table[]={0, 0, 0, -1, 0, 1, 2, 3};

View File

@ -23,8 +23,8 @@
/*
Note : this is *only* a demo program, an example to show how LZ4 can be used.
It is not considered part of LZ4 compression library.
The license of the demo program is GPL.
The license of LZ4 is BSD.
The license of the demo program is GPL.
*/
//****************************
@ -340,6 +340,9 @@ int main(int argc, char** argv)
// Modify Block Size (benchmark only)
if ( argument[0] =='B' ) { int B = argument[1] - '0'; int S = 1 << (10 + 2*B); BMK_SetBlocksize(S); continue; }
// Modify Nb Iterations (benchmark only)
if ( argument[0] =='i' ) { int iters = argument[1] - '0'; BMK_SetNbIterations(iters); continue; }
// Test
if ( argument[0] =='t' ) { decode=1; output_filename=nulmark; continue; }
}
@ -359,7 +362,7 @@ int main(int argc, char** argv)
// No input filename ==> Error
if(!input_filename) { badusage(); return 1; }
if (bench) return BMK_benchFile(argv+filenamesStart, argc-filenamesStart);
if (bench) return BMK_benchFile(argv+filenamesStart, argc-filenamesStart, 0);
// No output filename
if (!output_filename) { badusage(); return 1; }