minor code refactoring, mostly around __builtin_expect
git-svn-id: https://lz4.googlecode.com/svn/trunk@59 650e7d94-2a16-8b24-b05c-7c0b3f6821cd
This commit is contained in:
parent
89767cc280
commit
ad59ba1cfa
16
Makefile
16
Makefile
@ -1,10 +1,20 @@
|
||||
OS := $(shell uname)
|
||||
|
||||
ifeq ($(OS),Linux)
|
||||
OUTPUT32 = lz4demo32
|
||||
OUTPUT64 = lz4demo64
|
||||
else
|
||||
OUTPUT32 = LZ4Demo32.exe
|
||||
OUTPUT64 = LZ4Demo64.exe
|
||||
endif
|
||||
|
||||
all: lz4demo64 lz4demo32
|
||||
|
||||
lz4demo64: lz4.c lz4.h bench.c lz4demo.c
|
||||
gcc -O3 -I. -std=c99 -Wall -W -Wundef -Wno-implicit-function-declaration lz4.c bench.c lz4demo.c -o lz4demo64.exe
|
||||
gcc -O3 -I. -std=c99 -Wall -W -Wundef -Wno-implicit-function-declaration lz4.c bench.c lz4demo.c -o $(OUTPUT64)
|
||||
|
||||
lz4demo32: lz4.c lz4.h bench.c lz4demo.c
|
||||
gcc -m32 -O3 -I. -std=c99 -Wall -W -Wundef -Wno-implicit-function-declaration lz4.c bench.c lz4demo.c -o lz4demo32.exe
|
||||
gcc -m32 -O3 -I. -std=c99 -Wall -W -Wundef -Wno-implicit-function-declaration lz4.c bench.c lz4demo.c -o $(OUTPUT32)
|
||||
|
||||
clean:
|
||||
rm -f core *.o lz4demo32.exe lz4demo64.exe
|
||||
rm -f core *.o $(OUTPUT32) $(OUTPUT64)
|
||||
|
47
bench.c
47
bench.c
@ -47,7 +47,10 @@
|
||||
#include <sys/timeb.h> // timeb
|
||||
#include <sys/types.h> // stat64
|
||||
#include <sys/stat.h> // stat64
|
||||
|
||||
#include "lz4.h"
|
||||
#define DEFAULTCOMPRESSOR LZ4_compress
|
||||
|
||||
|
||||
|
||||
//**************************************
|
||||
@ -107,16 +110,24 @@ struct compressionParameters
|
||||
|
||||
|
||||
//**************************************
|
||||
// Private Parameters
|
||||
// Benchmark Parameters
|
||||
//**************************************
|
||||
static int chunkSize = DEFAULT_CHUNKSIZE;
|
||||
static int nbIterations = NBLOOPS;
|
||||
|
||||
void BMK_SetBlocksize(int bsize)
|
||||
{
|
||||
chunkSize = bsize;
|
||||
DISPLAY("Using Block Size of %i KB... ", chunkSize>>10);
|
||||
DISPLAY("-Using Block Size of %i KB-", chunkSize>>10);
|
||||
}
|
||||
|
||||
void BMK_SetNbIterations(int nbLoops)
|
||||
{
|
||||
nbIterations = nbLoops;
|
||||
DISPLAY("- %i iterations-", nbIterations);
|
||||
}
|
||||
|
||||
|
||||
//*********************************************************
|
||||
// Private functions
|
||||
//*********************************************************
|
||||
@ -231,7 +242,7 @@ static U64 BMK_GetFileSize(char* infilename)
|
||||
// Public function
|
||||
//*********************************************************
|
||||
|
||||
int BMK_benchFile(char** fileNamesTable, int nbFiles)
|
||||
int BMK_benchFile(char** fileNamesTable, int nbFiles, int cLevel)
|
||||
{
|
||||
int fileIdx=0;
|
||||
FILE* fileIn;
|
||||
@ -244,7 +255,7 @@ int BMK_benchFile(char** fileNamesTable, int nbFiles)
|
||||
char* in_buff;
|
||||
char* out_buff; int out_buff_size;
|
||||
struct chunkParameters* chunkP;
|
||||
U32 crcc, crcd;
|
||||
U32 crcc, crcd=0;
|
||||
struct compressionParameters compP;
|
||||
|
||||
U64 totals = 0;
|
||||
@ -254,7 +265,13 @@ int BMK_benchFile(char** fileNamesTable, int nbFiles)
|
||||
|
||||
|
||||
// Init
|
||||
compP.compressionFunction = LZ4_compress;
|
||||
switch (cLevel)
|
||||
{
|
||||
#ifdef COMPRESSOR0
|
||||
case 0 : compP.compressionFunction = COMPRESSOR0; break;
|
||||
#endif
|
||||
default : compP.compressionFunction = DEFAULTCOMPRESSOR;
|
||||
}
|
||||
compP.decompressionFunction = LZ4_uncompress;
|
||||
|
||||
// Loop for each file
|
||||
@ -332,11 +349,13 @@ int BMK_benchFile(char** fileNamesTable, int nbFiles)
|
||||
// Bench
|
||||
{
|
||||
int loopNb, nb_loops, chunkNb;
|
||||
size_t cSize;
|
||||
size_t cSize=0;
|
||||
int milliTime;
|
||||
double fastestC = 100000000., fastestD = 100000000.;
|
||||
double ratio=0.;
|
||||
|
||||
for (loopNb = 1; loopNb <= NBLOOPS; loopNb++)
|
||||
DISPLAY("\r%79s\r", "");
|
||||
for (loopNb = 1; loopNb <= nbIterations; loopNb++)
|
||||
{
|
||||
// Compression
|
||||
DISPLAY("%1i-%-14.14s : %9i ->\r", loopNb, infilename, (int)benchedsize);
|
||||
@ -356,8 +375,9 @@ int BMK_benchFile(char** fileNamesTable, int nbFiles)
|
||||
|
||||
if ((double)milliTime < fastestC*nb_loops) fastestC = (double)milliTime/nb_loops;
|
||||
cSize=0; for (chunkNb=0; chunkNb<nbChunks; chunkNb++) cSize += chunkP[chunkNb].outputSize;
|
||||
ratio = (double)cSize/(double)benchedsize*100.;
|
||||
|
||||
DISPLAY("%1i-%-14.14s : %9i -> %9i (%5.2f%%), %6.1f MB/s\r", loopNb, infilename, (int)benchedsize, (int)cSize, (double)cSize/(double)benchedsize*100., (double)benchedsize / fastestC / 1000.);
|
||||
DISPLAY("%1i-%-14.14s : %9i -> %9i (%5.2f%%), %6.1f MB/s\r", loopNb, infilename, (int)benchedsize, (int)cSize, ratio, (double)benchedsize / fastestC / 1000.);
|
||||
|
||||
// Decompression
|
||||
{ size_t i; for (i=0; i<benchedsize; i++) in_buff[i]=0; } // zeroing area, for CRC checking
|
||||
@ -370,20 +390,25 @@ int BMK_benchFile(char** fileNamesTable, int nbFiles)
|
||||
{
|
||||
for (chunkNb=0; chunkNb<nbChunks; chunkNb++)
|
||||
chunkP[chunkNb].outputSize = compP.decompressionFunction(chunkP[chunkNb].outputBuffer, chunkP[chunkNb].inputBuffer, chunkP[chunkNb].inputSize);
|
||||
//LZ4_uncompress_unknownOutputSize(chunkP[chunkNb].outputBuffer, chunkP[chunkNb].inputBuffer, chunkP[chunkNb].outputSize, chunkP[chunkNb].inputSize); // For testing
|
||||
nb_loops++;
|
||||
}
|
||||
milliTime = BMK_GetMilliSpan(milliTime);
|
||||
|
||||
if ((double)milliTime < fastestD*nb_loops) fastestD = (double)milliTime/nb_loops;
|
||||
DISPLAY("%1i-%-14.14s : %9i -> %9i (%5.2f%%), %6.1f MB/s , %6.1f MB/s\r", loopNb, infilename, (int)benchedsize, (int)cSize, (double)cSize/(double)benchedsize*100., (double)benchedsize / fastestC / 1000., (double)benchedsize / fastestD / 1000.);
|
||||
DISPLAY("%1i-%-14.14s : %9i -> %9i (%5.2f%%), %6.1f MB/s , %6.1f MB/s\r", loopNb, infilename, (int)benchedsize, (int)cSize, ratio, (double)benchedsize / fastestC / 1000., (double)benchedsize / fastestD / 1000.);
|
||||
|
||||
// CRC Checking
|
||||
crcd = BMK_checksum_MMH3A(in_buff, benchedsize);
|
||||
if (crcc!=crcd) { DISPLAY("\n!!! WARNING !!! %14s : Invalid Checksum : %x != %x\n", infilename, (unsigned)crcc, (unsigned)crcd); break; }
|
||||
}
|
||||
|
||||
DISPLAY("%-16.16s : %9i -> %9i (%5.2f%%), %6.1f MB/s , %6.1f MB/s\n", infilename, (int)benchedsize, (int)cSize, (double)cSize/(double)benchedsize*100., (double)benchedsize / fastestC / 1000., (double)benchedsize / fastestD / 1000.);
|
||||
if (crcc==crcd)
|
||||
{
|
||||
if (ratio<100.)
|
||||
DISPLAY("%-16.16s : %9i -> %9i (%5.2f%%), %6.1f MB/s , %6.1f MB/s\n", infilename, (int)benchedsize, (int)cSize, ratio, (double)benchedsize / fastestC / 1000., (double)benchedsize / fastestD / 1000.);
|
||||
else
|
||||
DISPLAY("%-16.16s : %9i -> %9i (%5.1f%%), %6.1f MB/s , %6.1f MB/s \n", infilename, (int)benchedsize, (int)cSize, ratio, (double)benchedsize / fastestC / 1000., (double)benchedsize / fastestD / 1000.);
|
||||
}
|
||||
totals += benchedsize;
|
||||
totalz += cSize;
|
||||
totalc += fastestC;
|
||||
|
3
bench.h
3
bench.h
@ -27,10 +27,11 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
int BMK_benchFile(char** fileNamesTable, int nbFiles) ;
|
||||
int BMK_benchFile(char** fileNamesTable, int nbFiles, int cLevel);
|
||||
|
||||
// Parameters
|
||||
void BMK_SetBlocksize(int bsize);
|
||||
void BMK_SetNbIterations(int nbLoops);
|
||||
|
||||
|
||||
|
||||
|
43
lz4.c
43
lz4.c
@ -98,6 +98,8 @@
|
||||
#define restrict // Disable restrict
|
||||
#endif
|
||||
|
||||
#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
|
||||
|
||||
#ifdef _MSC_VER // Visual Studio
|
||||
#define inline __forceinline // Visual is not C99, but supports some kind of inline
|
||||
#include <intrin.h> // _BitScanForward
|
||||
@ -109,6 +111,15 @@
|
||||
#define bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | (((x) & 0xffu) << 8)))
|
||||
#endif
|
||||
|
||||
#if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__)
|
||||
# define expect(expr,value) (__builtin_expect ((expr),(value)) )
|
||||
#else
|
||||
# define expect(expr,value) (expr)
|
||||
#endif
|
||||
|
||||
#define likely(expr) expect((expr) != 0, 1)
|
||||
#define unlikely(expr) expect((expr) != 0, 0)
|
||||
|
||||
|
||||
//**************************************
|
||||
// Includes
|
||||
@ -210,14 +221,6 @@ typedef struct _U64_S { U64 v; } U64_S;
|
||||
#define LZ4_WRITE_LITTLEENDIAN_16(p,v) { A16(p) = v; p+=2; }
|
||||
#endif
|
||||
|
||||
#if __GNUC__ >= 3
|
||||
# define expect(expr,value) __builtin_expect ((expr),(value))
|
||||
#else
|
||||
# define expect(expr,value) (expr)
|
||||
#endif
|
||||
|
||||
#define expect_true(expr) expect ((expr) != 0, 1)
|
||||
#define expect_false(expr) expect ((expr) != 0, 0)
|
||||
|
||||
//**************************************
|
||||
// Local structures
|
||||
@ -249,7 +252,7 @@ inline static int LZ4_NbCommonBytes (register U64 val)
|
||||
unsigned long r = 0;
|
||||
_BitScanReverse64( &r, val );
|
||||
return (int)(r>>3);
|
||||
#elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
|
||||
#elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
|
||||
return (__builtin_clzll(val) >> 3);
|
||||
#else
|
||||
int r;
|
||||
@ -263,7 +266,7 @@ inline static int LZ4_NbCommonBytes (register U64 val)
|
||||
unsigned long r = 0;
|
||||
_BitScanForward64( &r, val );
|
||||
return (int)(r>>3);
|
||||
#elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
|
||||
#elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
|
||||
return (__builtin_ctzll(val) >> 3);
|
||||
#else
|
||||
static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 };
|
||||
@ -281,7 +284,7 @@ inline static int LZ4_NbCommonBytes (register U32 val)
|
||||
unsigned long r = 0;
|
||||
_BitScanReverse( &r, val );
|
||||
return (int)(r>>3);
|
||||
#elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
|
||||
#elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
|
||||
return (__builtin_clz(val) >> 3);
|
||||
#else
|
||||
int r;
|
||||
@ -294,7 +297,7 @@ inline static int LZ4_NbCommonBytes (register U32 val)
|
||||
unsigned long r = 0;
|
||||
_BitScanForward( &r, val );
|
||||
return (int)(r>>3);
|
||||
#elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
|
||||
#elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
|
||||
return (__builtin_ctz(val) >> 3);
|
||||
#else
|
||||
static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 };
|
||||
@ -381,7 +384,7 @@ int LZ4_compressCtx(void** ctx,
|
||||
ip = forwardIp;
|
||||
forwardIp = ip + step;
|
||||
|
||||
if (expect_false(forwardIp > mflimit)) { goto _last_literals; }
|
||||
if unlikely(forwardIp > mflimit) { goto _last_literals; }
|
||||
|
||||
forwardH = LZ4_HASH_VALUE(forwardIp);
|
||||
ref = base + HashTable[h];
|
||||
@ -390,7 +393,7 @@ int LZ4_compressCtx(void** ctx,
|
||||
} while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
|
||||
|
||||
// Catch up
|
||||
while ((expect_false(ip>anchor) && expect_false(ref>(BYTE*)source) && (ip[-1]==ref[-1]))) { ip--; ref--; }
|
||||
while ((ip>anchor) && (ref>(BYTE*)source) && unlikely(ip[-1]==ref[-1])) { ip--; ref--; }
|
||||
|
||||
// Encode Literal length
|
||||
length = ip - anchor;
|
||||
@ -408,7 +411,7 @@ _next_match:
|
||||
// Start Counting
|
||||
ip+=MINMATCH; ref+=MINMATCH; // MinMatch verified
|
||||
anchor = ip;
|
||||
while (expect_true(ip<matchlimit-(STEPSIZE-1)))
|
||||
while likely(ip<matchlimit-(STEPSIZE-1))
|
||||
{
|
||||
UARCH diff = AARCH(ref) ^ AARCH(ip);
|
||||
if (!diff) { ip+=STEPSIZE; ref+=STEPSIZE; continue; }
|
||||
@ -532,7 +535,7 @@ int LZ4_compress64kCtx(void** ctx,
|
||||
} while (A32(ref) != A32(ip));
|
||||
|
||||
// Catch up
|
||||
while (((ip>anchor) && expect_false(ref>(BYTE*)source) && (ip[-1]==ref[-1]))) { ip--; ref--; }
|
||||
while ((ip>anchor) && (ref>(BYTE*)source) && (ip[-1]==ref[-1])) { ip--; ref--; }
|
||||
|
||||
// Encode Literal length
|
||||
length = ip - anchor;
|
||||
@ -576,7 +579,7 @@ _endCount:
|
||||
// Test next position
|
||||
ref = base + HashTable[LZ4_HASH64K_VALUE(ip)];
|
||||
HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base;
|
||||
if (expect_true(A32(ref) == A32(ip))) { token = op++; *token=0; goto _next_match; }
|
||||
if (A32(ref) == A32(ip)) { token = op++; *token=0; goto _next_match; }
|
||||
|
||||
// Prepare next loop
|
||||
anchor = ip++;
|
||||
@ -658,7 +661,7 @@ int LZ4_uncompress(const char* source,
|
||||
|
||||
// copy literals
|
||||
cpy = op+length;
|
||||
if (expect_false(cpy>oend-COPYLENGTH))
|
||||
if unlikely(cpy>oend-COPYLENGTH)
|
||||
{
|
||||
if (cpy > oend) goto _output_error;
|
||||
memcpy(op, ip, length);
|
||||
@ -675,7 +678,7 @@ int LZ4_uncompress(const char* source,
|
||||
if ((length=(token&ML_MASK)) == ML_MASK) { for (;*ip==255;length+=255) {ip++;} length += *ip++; }
|
||||
|
||||
// copy repeated sequence
|
||||
if (expect_false(op-ref<STEPSIZE))
|
||||
if unlikely(op-ref<STEPSIZE)
|
||||
{
|
||||
#if LZ4_ARCH64
|
||||
size_t dec2table[]={0, 0, 0, -1, 0, 1, 2, 3};
|
||||
@ -764,7 +767,7 @@ int LZ4_uncompress_unknownOutputSize(
|
||||
if ((length=(token&ML_MASK)) == ML_MASK) { while (ip<iend) { int s = *ip++; length +=s; if (s==255) continue; break; } }
|
||||
|
||||
// copy repeated sequence
|
||||
if (expect_false(op-ref<STEPSIZE))
|
||||
if unlikely(op-ref<STEPSIZE)
|
||||
{
|
||||
#if LZ4_ARCH64
|
||||
size_t dec2table[]={0, 0, 0, -1, 0, 1, 2, 3};
|
||||
|
@ -23,8 +23,8 @@
|
||||
/*
|
||||
Note : this is *only* a demo program, an example to show how LZ4 can be used.
|
||||
It is not considered part of LZ4 compression library.
|
||||
The license of the demo program is GPL.
|
||||
The license of LZ4 is BSD.
|
||||
The license of the demo program is GPL.
|
||||
*/
|
||||
|
||||
//****************************
|
||||
@ -340,6 +340,9 @@ int main(int argc, char** argv)
|
||||
// Modify Block Size (benchmark only)
|
||||
if ( argument[0] =='B' ) { int B = argument[1] - '0'; int S = 1 << (10 + 2*B); BMK_SetBlocksize(S); continue; }
|
||||
|
||||
// Modify Nb Iterations (benchmark only)
|
||||
if ( argument[0] =='i' ) { int iters = argument[1] - '0'; BMK_SetNbIterations(iters); continue; }
|
||||
|
||||
// Test
|
||||
if ( argument[0] =='t' ) { decode=1; output_filename=nulmark; continue; }
|
||||
}
|
||||
@ -359,7 +362,7 @@ int main(int argc, char** argv)
|
||||
// No input filename ==> Error
|
||||
if(!input_filename) { badusage(); return 1; }
|
||||
|
||||
if (bench) return BMK_benchFile(argv+filenamesStart, argc-filenamesStart);
|
||||
if (bench) return BMK_benchFile(argv+filenamesStart, argc-filenamesStart, 0);
|
||||
|
||||
// No output filename
|
||||
if (!output_filename) { badusage(); return 1; }
|
||||
|
Loading…
Reference in New Issue
Block a user