Slightly improved decoding speed under GCC compilation

git-svn-id: https://lz4.googlecode.com/svn/trunk@38 650e7d94-2a16-8b24-b05c-7c0b3f6821cd
This commit is contained in:
yann.collet.73@gmail.com 2011-11-15 22:06:33 +00:00
parent 667f8ea1c8
commit 439f270a4e
2 changed files with 26 additions and 25 deletions

39
lz4.c
View File

@ -48,17 +48,19 @@
//**************************************
// Basic Types
//**************************************
#if defined(_MSC_VER)
#if defined(_MSC_VER) // Visual Studio does not support 'stdint' natively
#define BYTE unsigned __int8
#define U16 unsigned __int16
#define U32 unsigned __int32
#define S32 __int32
#define restrict // Visual Studio does not support 'restrict' keyword either
#else
#include <stdint.h>
#define BYTE uint8_t
#define U16 uint16_t
#define U32 uint32_t
#define S32 int32_t
#define restrict restrict
#endif
@ -435,10 +437,10 @@ int LZ4_uncompress(char* source,
int osize)
{
// Local Variables
const BYTE* ip = (const BYTE*) source;
BYTE* ref;
const BYTE* restrict ip = (const BYTE*) source;
const BYTE* restrict ref;
BYTE* op = (BYTE*) dest;
BYTE* restrict op = (BYTE*) dest;
BYTE* const oend = op + osize;
BYTE* cpy;
@ -456,19 +458,19 @@ int LZ4_uncompress(char* source,
if ((length=(token>>ML_BITS)) == RUN_MASK) { for (;(len=*ip++)==255;length+=255){} length += len; }
// copy literals
ref = op+length;
if (ref>oend-COPYLENGTH)
cpy = op+length;
if (cpy>oend-COPYLENGTH)
{
if (ref > oend) goto _output_error;
if (cpy > oend) goto _output_error;
memcpy(op, ip, length);
ip += length;
break; // Necessarily EOF
}
LZ4_WILDCOPY(ip, op, ref); ip -= (op-ref); op = ref;
LZ4_WILDCOPY(ip, op, cpy); ip -= (op-cpy); op = cpy;
// get offset
ref -= A16(ip); ip+=2;
ref = cpy - A16(ip); ip+=2;
// get matchlength
if ((length=(token&ML_MASK)) == ML_MASK) { for (;*ip==255;length+=255) {ip++;} length += *ip++; }
@ -513,17 +515,17 @@ int LZ4_uncompress_unknownOutputSize(
int maxOutputSize)
{
// Local Variables
const BYTE* ip = (const BYTE*) source;
const BYTE* restrict ip = (const BYTE*) source;
const BYTE* const iend = ip + isize;
BYTE* ref;
const BYTE* restrict ref;
BYTE* op = (BYTE*) dest;
BYTE* restrict op = (BYTE*) dest;
BYTE* const oend = op + maxOutputSize;
BYTE* cpy;
BYTE token;
U32 dec[COPYTOKEN]={0, 3, 2, 3};
U32 dec[4]={0, 3, 2, 3};
int len, length;
@ -535,20 +537,20 @@ int LZ4_uncompress_unknownOutputSize(
if ((length=(token>>ML_BITS)) == RUN_MASK) { for (;(len=*ip++)==255;length+=255){} length += len; }
// copy literals
ref = op+length;
if (ref>oend-COPYLENGTH)
cpy = op+length;
if (cpy>oend-COPYLENGTH)
{
if (ref > oend) goto _output_error;
if (cpy > oend) goto _output_error;
memcpy(op, ip, length);
op += length;
break; // Necessarily EOF
}
LZ4_WILDCOPY(ip, op, ref); ip -= (op-ref); op = ref;
LZ4_WILDCOPY(ip, op, cpy); ip -= (op-cpy); op = cpy;
if (ip>=iend) break; // check EOF
// get offset
ref -= A16(ip); ip+=2;
ref = cpy - A16(ip); ip+=2;
// get matchlength
if ((length=(token&ML_MASK)) == ML_MASK) { for (;(len=*ip++)==255;length+=255){} length += len; }
@ -585,4 +587,3 @@ _output_error:
return (int) (-(((char*)ip)-source));
}

12
lz4.h
View File

@ -41,13 +41,13 @@ int LZ4_compress (char* source, char* dest, int isize);
int LZ4_uncompress (char* source, char* dest, int osize);
/*
LZ4_compress :
LZ4_compress() :
return : the number of bytes in compressed buffer dest
note : destination buffer must be already allocated.
To avoid any problem, size it to handle worst cases situations (input data not compressible)
Worst case size is : "inputsize + 0.4%", with "0.4%" being at least 8 bytes.
LZ4_uncompress :
LZ4_uncompress() :
osize : is the output size, therefore the original size
return : the number of bytes read in the source buffer
If the source stream is malformed, the function will stop decoding and return a negative result, indicating the byte position of the faulty instruction
@ -63,20 +63,20 @@ LZ4_uncompress :
int LZ4_uncompress_unknownOutputSize (char* source, char* dest, int isize, int maxOutputSize);
/*
LZ4_uncompress_unknownOutputSize :
LZ4_uncompress_unknownOutputSize() :
isize : is the input size, therefore the compressed size
maxOutputSize : is the size of the destination buffer (which must be already allocated)
return : the number of bytes decoded in the destination buffer (necessarily <= maxOutputSize)
If the source stream is malformed, the function will stop decoding and return a negative result, indicating the byte position of the faulty instruction
This version never writes beyond dest + maxOutputSize, and is therefore protected against malicious data packets
note : This version is slower than LZ4_uncompress, and is therefore not recommended for general use
note : This version is a bit slower than LZ4_uncompress
*/
int LZ4_compressCtx(void** ctx, char* source, char* dest, int isize);
/*
LZ4_compressCtx :
LZ4_compressCtx() :
This function explicitly handles the CTX memory structure.
It avoids allocating/deallocating memory between each call, improving performance when malloc is time-consuming.
Note : when memory is allocated into the stack (default mode), there is no "malloc" penalty.
@ -86,7 +86,7 @@ LZ4_compressCtx :
On next calls : reuse the same ctx pointer.
Use different pointers for different threads when doing multi-threading.
note : performance difference is small, mostly noticeable when repetitively calling the compression algorithm on many small segments.
note : performance difference is small, mostly noticeable in HeapMode when repetitively calling the compression function over many small segments.
*/