Better detection of Big-Endian and PowerPC CPU

LZ4Demo : correction : produces compatible streams on both big-endian and little-endian systems

git-svn-id: https://lz4.googlecode.com/svn/trunk@52 650e7d94-2a16-8b24-b05c-7c0b3f6821cd
This commit is contained in:
yann.collet.73@gmail.com 2012-02-01 04:19:38 +00:00
parent 572cab747f
commit 2327aa404e
3 changed files with 88 additions and 47 deletions

16
bench.c
View File

@ -29,7 +29,7 @@
//**************************************
// Includes
//**************************************
#include <stdio.h> // printf, fopen, fseeko64, ftello64
#include <stdio.h> // fprintf, fopen, ftello64
#include <stdlib.h> // malloc
#include <sys/timeb.h> // timeb
#include "lz4.h"
@ -167,11 +167,11 @@ static U64 BMK_GetFileSize(FILE* f)
{
U64 r;
#ifdef _MSC_VER
_fseeki64(f, 0L, SEEK_END);
r = _fseeki64(f, 0L, SEEK_END);
r = (U64) _ftelli64(f);
_fseeki64(f, 0L, SEEK_SET);
#else
fseeko64(f, 0LL, SEEK_END);
r = (U64) fseeko64(f, 0LL, SEEK_END);
r = (U64) ftello64(f);
fseeko64(f, 0LL, SEEK_SET);
#endif
@ -269,7 +269,7 @@ int BMK_benchFile(char** fileNamesTable, int nbFiles)
if(readSize != benchedsize)
{
printf("\nError: problem reading file '%s' !! \n", infilename);
DISPLAY("\nError: problem reading file '%s' !! \n", infilename);
free(in_buff);
free(out_buff);
return 13;
@ -326,6 +326,10 @@ int BMK_benchFile(char** fileNamesTable, int nbFiles)
if ((double)milliTime < fastestD*nb_loops) fastestD = (double)milliTime/nb_loops;
DISPLAY("%1i-%-14.14s : %9i -> %9i (%5.2f%%), %6.1f MB/s , %6.1f MB/s\r", loopNb, infilename, (int)benchedsize, (int)cSize, (double)cSize/(double)benchedsize*100., (double)benchedsize / fastestC / 1000., (double)benchedsize / fastestD / 1000.);
// CRC Checking
crcd = BMK_checksum(in_buff, benchedsize);
if (crcc!=crcd) { DISPLAY("\n!!! WARNING !!! %14s : Invalid Checksum : %x != %x\n", infilename, (unsigned)crcc, (unsigned)crcd); break; }
}
DISPLAY("%-16.16s : %9i -> %9i (%5.2f%%), %6.1f MB/s , %6.1f MB/s\n", infilename, (int)benchedsize, (int)cSize, (double)cSize/(double)benchedsize*100., (double)benchedsize / fastestC / 1000., (double)benchedsize / fastestD / 1000.);
@ -333,10 +337,6 @@ int BMK_benchFile(char** fileNamesTable, int nbFiles)
totalz += cSize;
totalc += fastestC;
totald += fastestD;
// CRC Checking
crcd = BMK_checksum(in_buff, benchedsize);
if (crcc!=crcd) printf("!!! WARNING !!! Invalid Checksum : %x != %x\n", (unsigned)crcc, (unsigned)crcd);
}
free(in_buff);

70
lz4.c
View File

@ -28,16 +28,30 @@
*/
//**************************************
// Compilation Directives
// Tuning parameters
//**************************************
#if __STDC_VERSION__ >= 199901L
// Increasing this value improves compression ratio
// Lowering this value reduces memory usage
// Reduced memory usage typically improves speed, due to cache effect (ex : L1 32KB for Intel, L1 64KB for AMD)
// Memory usage formula : N->2^(N+2) Bytes (examples : 12 -> 16KB ; 17 -> 512KB)
#define COMPRESSIONLEVEL 12
// Uncomment this parameter if your target system does not support hardware bit count
//#define _FORCE_SW_BITCOUNT
//**************************************
// Compiler Options
//**************************************
#if __STDC_VERSION__ >= 199901L // C99
/* "restrict" is a known keyword */
#else
#define restrict // Disable restrict
#endif
#ifdef _MSC_VER
#define inline __forceinline
#define inline __forceinline // Visual is not C99, but supports inline
#endif
#ifdef __GNUC__
@ -46,10 +60,10 @@
#define _PACKED
#endif
#if (__x86_64__ || __ppc64__ || _WIN64 || __LP64__) // Detect 64 bits mode
#define ARCH64 1
#ifdef _MSC_VER // Visual Studio
#define bswap16(i) _byteswap_ushort(i)
#else
#define ARCH64 0
#define bswap16(i) (((i)>>8) | ((i)<<8))
#endif
@ -61,18 +75,6 @@
#include "lz4.h"
//**************************************
// Performance parameter
//**************************************
// Increasing this value improves compression ratio
// Lowering this value reduces memory usage
// Lowering may also improve speed, typically on reaching cache size limits (L1 32KB for Intel, 64KB for AMD)
// Memory usage formula for 32 bits systems : N->2^(N+2) Bytes (examples : 17 -> 512KB ; 12 -> 16KB)
#define HASH_LOG 12
//#define _FORCE_SW_BITCOUNT // Uncomment for better performance if target platform has no hardware support for LowBitCount
//**************************************
// Basic Types
//**************************************
@ -107,6 +109,7 @@
#define MAXD_LOG 16
#define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
#define HASH_LOG COMPRESSIONLEVEL
#define HASHTABLESIZE (1 << HASH_LOG)
#define HASH_MASK (HASHTABLESIZE - 1)
@ -147,6 +150,19 @@ typedef struct _U16_S
//**************************************
// Architecture-specific macros
//**************************************
#if (__x86_64__ || __x86_64 || __amd64__ || __amd64 || __ppc64__ || _WIN64 || __LP64__ || _LP64) // Detects 64 bits mode
#define ARCH64 1
#else
#define ARCH64 0
#endif
// The following macro auto-detects Big-endian CPU. You can manually override it in case of bad detection.
#if (__BIG_ENDIAN__ || _BIG_ENDIAN || _ARCH_PPC || __PPC__ || __PPC || PPC || __powerpc__ || __powerpc || powerpc || ((defined(__BYTE_ORDER__)&&(__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__))) )
#define CPU_BIG_ENDIAN 1
#else
// Little Endian assumed. PDP Endian and other very rare endian format are unsupported.
#endif
#if ARCH64 // 64-bit
#define STEPSIZE 8
#define UARCH U64
@ -167,14 +183,14 @@ typedef struct _U16_S
#define INITBASE(base) const int base = 0
#endif
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
#if CPU_BIG_ENDIAN
#define LZ4_READ_LITTLEENDIAN_16(d,s,p) { U16 v = A16(p); v = bswap16(v); d = (s) - v; }
#define LZ4_WRITE_LITTLEENDIAN_16(p,i) { U16 v = (U16)(i); v = bswap16(v); A16(p) = v; p+=2; }
#define LZ4_NbCommonBytes LZ4_NbCommonBytes_BigEndian
#else // Little Endian
#define LZ4_READ_LITTLEENDIAN_16(d,s,p) { d = (s) - A16(p); }
#define LZ4_WRITE_LITTLEENDIAN_16(p,v) { A16(p) = v; p+=2; }
#define LZ4_NbCommonBytes LZ4_NbCommonBytes_LittleEndian
#else // Big Endian
#define LZ4_READ_LITTLEENDIAN_16(d,s,p) { int delta = p[0]; delta += p[1] << 8; d = (s) - delta; }
#define LZ4_WRITE_LITTLEENDIAN_16(p,v) { int delta = v; *p++ = delta; *p++ = delta>>8; }
#define LZ4_NbCommonBytes LZ4_NbCommonBytes_BigEndian
#endif
@ -616,7 +632,7 @@ int LZ4_uncompress(char* source,
if (op-ref<STEPSIZE)
{
#if ARCH64
size_t dec2table[]={0, 4, 4, 3, 4, 5, 6, 7};
size_t dec2table[]={0, 0, 0, -1, 0, 1, 2, 3};
size_t dec2 = dec2table[op-ref];
#else
const int dec2 = 0;
@ -626,7 +642,7 @@ int LZ4_uncompress(char* source,
*op++ = *ref++;
*op++ = *ref++;
ref -= dec[op-ref];
A32(op)=A32(ref); op += STEPSIZE-4; ref += STEPSIZE-4;
A32(op)=A32(ref); op += STEPSIZE-4;
ref -= dec2;
} else { LZ4_COPYSTEP(ref,op); }
cpy = op + length - (STEPSIZE-4);
@ -703,7 +719,7 @@ int LZ4_uncompress_unknownOutputSize(
if (op-ref<STEPSIZE)
{
#if ARCH64
size_t dec2table[]={0, 4, 4, 3, 4, 5, 6, 7};
size_t dec2table[]={0, 0, 0, -1, 0, 1, 2, 3};
size_t dec2 = dec2table[op-ref];
#else
const int dec2 = 0;
@ -713,7 +729,7 @@ int LZ4_uncompress_unknownOutputSize(
*op++ = *ref++;
*op++ = *ref++;
ref -= dec[op-ref];
A32(op)=A32(ref); op += STEPSIZE-4; ref += STEPSIZE-4;
A32(op)=A32(ref); op += STEPSIZE-4;
ref -= dec2;
} else { LZ4_COPYSTEP(ref,op); }
cpy = op + length - (STEPSIZE-4);

View File

@ -43,9 +43,13 @@
//**************************************
// Basic Types
// Compiler functions
//**************************************
#if defined(_MSC_VER) // Visual Studio
#define swap32 _byteswap_ulong
#else // GCC assumed
#define swap32 __builtin_bswap32
#endif
//****************************
@ -67,19 +71,27 @@
//**************************************
// MACRO
// Architecture Macros
//**************************************
static const int one = 1;
#define CPU_LITTLE_ENDIAN (*(char*)(&one))
#define CPU_BIG_ENDIAN (!CPU_LITTLE_ENDIAN)
#define LITTLE_ENDIAN32(i) if (CPU_BIG_ENDIAN) { i = swap32(i); }
//**************************************
// Macros
//**************************************
#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
//****************************
// Functions
//****************************
int usage()
{
DISPLAY( "Usage :\n");
DISPLAY( " %s [arg] input output\n",BINARY_NAME);
DISPLAY( " %s [arg] input output\n", BINARY_NAME);
DISPLAY( "Arguments :\n");
DISPLAY( " -c : compression (default)\n");
DISPLAY( " -d : decompression \n");
@ -138,6 +150,7 @@ int compress_file(char* input_filename, char* output_filename)
{
unsigned long long filesize = 0;
unsigned long long compressedfilesize = ARCHIVE_MAGICNUMBER_SIZE;
unsigned int u32var;
char* in_buff;
char* out_buff;
FILE* finput;
@ -157,7 +170,9 @@ int compress_file(char* input_filename, char* output_filename)
if (!in_buff || !out_buff) { DISPLAY("Allocation error : not enough memory\n"); return 8; }
// Write Archive Header
*(unsigned long*)out_buff = ARCHIVE_MAGICNUMBER;
u32var = ARCHIVE_MAGICNUMBER;
LITTLE_ENDIAN32(u32var);
*(unsigned int*)out_buff = u32var;
fwrite(out_buff, 1, ARCHIVE_MAGICNUMBER_SIZE, foutput);
// Main Loop
@ -171,10 +186,12 @@ int compress_file(char* input_filename, char* output_filename)
// Compress Block
outSize = LZ4_compress(in_buff, out_buff+4, inSize);
* (unsigned int*) out_buff = outSize;
compressedfilesize += outSize+4;
// Write Block
LITTLE_ENDIAN32(outSize);
* (unsigned int*) out_buff = outSize;
LITTLE_ENDIAN32(outSize);
fwrite(out_buff, 1, outSize+4, foutput);
}
@ -223,9 +240,15 @@ int decode_file(char* input_filename, char* output_filename)
// Check Archive Header
uselessRet = fread(out_buff, 1, ARCHIVE_MAGICNUMBER_SIZE, finput);
if (*(unsigned long*)out_buff != ARCHIVE_MAGICNUMBER) { DISPLAY("Unrecognized header : file cannot be decoded\n"); return 6; }
nextSize = *(unsigned int*)out_buff;
LITTLE_ENDIAN32(nextSize);
if (nextSize != ARCHIVE_MAGICNUMBER) { DISPLAY("Unrecognized header : file cannot be decoded\n"); return 6; }
// First Block
*(unsigned int*)in_buff = 0;
uselessRet = fread(in_buff, 1, 4, finput);
nextSize = *(unsigned long*)in_buff;
nextSize = *(unsigned int*)in_buff;
LITTLE_ENDIAN32(nextSize);
// Main Loop
while (1)
@ -234,18 +257,20 @@ int decode_file(char* input_filename, char* output_filename)
uselessRet = fread(in_buff, 1, nextSize, finput);
// Check Next Block
uselessRet = (unsigned long) fread(&nextSize, 1, 4, finput);
if( uselessRet==0 ) break;
uselessRet = (size_t) fread(&nextSize, 1, 4, finput);
if( uselessRet==0 ) break; // Nothing read : file read is completed
LITTLE_ENDIAN32(nextSize);
// Decode Block
sinkint = LZ4_uncompress(in_buff, out_buff, CHUNKSIZE);
if (sinkint < 0) { DISPLAY("Decoding Failed ! Corrupted input !\n"); return 9; }
filesize += CHUNKSIZE;
// Write Block
fwrite(out_buff, 1, CHUNKSIZE, foutput);
}
// Last Block
// Last Block (which size is <= CHUNKSIZE, but let LZ4 figure that out)
uselessRet = fread(in_buff, 1, nextSize, finput);
sinkint = LZ4_uncompress_unknownOutputSize(in_buff, out_buff, nextSize, CHUNKSIZE);
filesize += sinkint;