Slightly improved decoding speed under GCC compilation
git-svn-id: https://lz4.googlecode.com/svn/trunk@38 650e7d94-2a16-8b24-b05c-7c0b3f6821cd
This commit is contained in:
parent
667f8ea1c8
commit
439f270a4e
39
lz4.c
39
lz4.c
@ -48,17 +48,19 @@
|
||||
//**************************************
|
||||
// Basic Types
|
||||
//**************************************
|
||||
#if defined(_MSC_VER)
|
||||
#if defined(_MSC_VER) // Visual Studio does not support 'stdint' natively
|
||||
#define BYTE unsigned __int8
|
||||
#define U16 unsigned __int16
|
||||
#define U32 unsigned __int32
|
||||
#define S32 __int32
|
||||
#define restrict // Visual Studio does not support 'restrict' keyword either
|
||||
#else
|
||||
#include <stdint.h>
|
||||
#define BYTE uint8_t
|
||||
#define U16 uint16_t
|
||||
#define U32 uint32_t
|
||||
#define S32 int32_t
|
||||
#define restrict restrict
|
||||
#endif
|
||||
|
||||
|
||||
@ -435,10 +437,10 @@ int LZ4_uncompress(char* source,
|
||||
int osize)
|
||||
{
|
||||
// Local Variables
|
||||
const BYTE* ip = (const BYTE*) source;
|
||||
BYTE* ref;
|
||||
const BYTE* restrict ip = (const BYTE*) source;
|
||||
const BYTE* restrict ref;
|
||||
|
||||
BYTE* op = (BYTE*) dest;
|
||||
BYTE* restrict op = (BYTE*) dest;
|
||||
BYTE* const oend = op + osize;
|
||||
BYTE* cpy;
|
||||
|
||||
@ -456,19 +458,19 @@ int LZ4_uncompress(char* source,
|
||||
if ((length=(token>>ML_BITS)) == RUN_MASK) { for (;(len=*ip++)==255;length+=255){} length += len; }
|
||||
|
||||
// copy literals
|
||||
ref = op+length;
|
||||
if (ref>oend-COPYLENGTH)
|
||||
cpy = op+length;
|
||||
if (cpy>oend-COPYLENGTH)
|
||||
{
|
||||
if (ref > oend) goto _output_error;
|
||||
if (cpy > oend) goto _output_error;
|
||||
memcpy(op, ip, length);
|
||||
ip += length;
|
||||
break; // Necessarily EOF
|
||||
}
|
||||
LZ4_WILDCOPY(ip, op, ref); ip -= (op-ref); op = ref;
|
||||
LZ4_WILDCOPY(ip, op, cpy); ip -= (op-cpy); op = cpy;
|
||||
|
||||
|
||||
// get offset
|
||||
ref -= A16(ip); ip+=2;
|
||||
ref = cpy - A16(ip); ip+=2;
|
||||
|
||||
// get matchlength
|
||||
if ((length=(token&ML_MASK)) == ML_MASK) { for (;*ip==255;length+=255) {ip++;} length += *ip++; }
|
||||
@ -513,17 +515,17 @@ int LZ4_uncompress_unknownOutputSize(
|
||||
int maxOutputSize)
|
||||
{
|
||||
// Local Variables
|
||||
const BYTE* ip = (const BYTE*) source;
|
||||
const BYTE* restrict ip = (const BYTE*) source;
|
||||
const BYTE* const iend = ip + isize;
|
||||
BYTE* ref;
|
||||
const BYTE* restrict ref;
|
||||
|
||||
BYTE* op = (BYTE*) dest;
|
||||
BYTE* restrict op = (BYTE*) dest;
|
||||
BYTE* const oend = op + maxOutputSize;
|
||||
BYTE* cpy;
|
||||
|
||||
BYTE token;
|
||||
|
||||
U32 dec[COPYTOKEN]={0, 3, 2, 3};
|
||||
U32 dec[4]={0, 3, 2, 3};
|
||||
int len, length;
|
||||
|
||||
|
||||
@ -535,20 +537,20 @@ int LZ4_uncompress_unknownOutputSize(
|
||||
if ((length=(token>>ML_BITS)) == RUN_MASK) { for (;(len=*ip++)==255;length+=255){} length += len; }
|
||||
|
||||
// copy literals
|
||||
ref = op+length;
|
||||
if (ref>oend-COPYLENGTH)
|
||||
cpy = op+length;
|
||||
if (cpy>oend-COPYLENGTH)
|
||||
{
|
||||
if (ref > oend) goto _output_error;
|
||||
if (cpy > oend) goto _output_error;
|
||||
memcpy(op, ip, length);
|
||||
op += length;
|
||||
break; // Necessarily EOF
|
||||
}
|
||||
LZ4_WILDCOPY(ip, op, ref); ip -= (op-ref); op = ref;
|
||||
LZ4_WILDCOPY(ip, op, cpy); ip -= (op-cpy); op = cpy;
|
||||
if (ip>=iend) break; // check EOF
|
||||
|
||||
|
||||
// get offset
|
||||
ref -= A16(ip); ip+=2;
|
||||
ref = cpy - A16(ip); ip+=2;
|
||||
|
||||
// get matchlength
|
||||
if ((length=(token&ML_MASK)) == ML_MASK) { for (;(len=*ip++)==255;length+=255){} length += len; }
|
||||
@ -585,4 +587,3 @@ _output_error:
|
||||
return (int) (-(((char*)ip)-source));
|
||||
}
|
||||
|
||||
|
||||
|
12
lz4.h
12
lz4.h
@ -41,13 +41,13 @@ int LZ4_compress (char* source, char* dest, int isize);
|
||||
int LZ4_uncompress (char* source, char* dest, int osize);
|
||||
|
||||
/*
|
||||
LZ4_compress :
|
||||
LZ4_compress() :
|
||||
return : the number of bytes in compressed buffer dest
|
||||
note : destination buffer must be already allocated.
|
||||
To avoid any problem, size it to handle worst cases situations (input data not compressible)
|
||||
Worst case size is : "inputsize + 0.4%", with "0.4%" being at least 8 bytes.
|
||||
|
||||
LZ4_uncompress :
|
||||
LZ4_uncompress() :
|
||||
osize : is the output size, therefore the original size
|
||||
return : the number of bytes read in the source buffer
|
||||
If the source stream is malformed, the function will stop decoding and return a negative result, indicating the byte position of the faulty instruction
|
||||
@ -63,20 +63,20 @@ LZ4_uncompress :
|
||||
int LZ4_uncompress_unknownOutputSize (char* source, char* dest, int isize, int maxOutputSize);
|
||||
|
||||
/*
|
||||
LZ4_uncompress_unknownOutputSize :
|
||||
LZ4_uncompress_unknownOutputSize() :
|
||||
isize : is the input size, therefore the compressed size
|
||||
maxOutputSize : is the size of the destination buffer (which must be already allocated)
|
||||
return : the number of bytes decoded in the destination buffer (necessarily <= maxOutputSize)
|
||||
If the source stream is malformed, the function will stop decoding and return a negative result, indicating the byte position of the faulty instruction
|
||||
This version never writes beyond dest + maxOutputSize, and is therefore protected against malicious data packets
|
||||
note : This version is slower than LZ4_uncompress, and is therefore not recommended for general use
|
||||
note : This version is a bit slower than LZ4_uncompress
|
||||
*/
|
||||
|
||||
|
||||
int LZ4_compressCtx(void** ctx, char* source, char* dest, int isize);
|
||||
|
||||
/*
|
||||
LZ4_compressCtx :
|
||||
LZ4_compressCtx() :
|
||||
This function explicitly handles the CTX memory structure.
|
||||
It avoids allocating/deallocating memory between each call, improving performance when malloc is time-consuming.
|
||||
Note : when memory is allocated into the stack (default mode), there is no "malloc" penalty.
|
||||
@ -86,7 +86,7 @@ LZ4_compressCtx :
|
||||
On next calls : reuse the same ctx pointer.
|
||||
Use different pointers for different threads when doing multi-threading.
|
||||
|
||||
note : performance difference is small, mostly noticeable when repetitively calling the compression algorithm on many small segments.
|
||||
note : performance difference is small, mostly noticeable in HeapMode when repetitively calling the compression function over many small segments.
|
||||
*/
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user