Slightly improved decoding speed under GCC compilation

git-svn-id: https://lz4.googlecode.com/svn/trunk@38 650e7d94-2a16-8b24-b05c-7c0b3f6821cd
2011-11-15 22:06:33 +00:00 · 2011-11-15 22:06:33 +00:00 · 439f270a4e
commit 439f270a4e
parent 667f8ea1c8
2 changed files with 26 additions and 25 deletions
--- a/lz4.c
+++ b/lz4.c
@ -48,17 +48,19 @@
 //**************************************
 // Basic Types
 //**************************************
-#if defined(_MSC_VER) 
+#if defined(_MSC_VER)    // Visual Studio does not support 'stdint' natively
 #define BYTE	unsigned __int8
 #define U16		unsigned __int16
 #define U32		unsigned __int32
 #define S32		__int32
+#define restrict         // Visual Studio does not support 'restrict' keyword either
 #else
 #include <stdint.h>
 #define BYTE	uint8_t
 #define U16		uint16_t
 #define U32		uint32_t
 #define S32		int32_t
+#define restrict restrict
 #endif


@ -435,10 +437,10 @@ int LZ4_uncompress(char* source,
 				 int osize)
 {	
 	// Local Variables
-	const BYTE* ip = (const BYTE*) source;
-	BYTE* ref;
+	const BYTE* restrict ip = (const BYTE*) source;
+	const BYTE* restrict ref;

-	BYTE* op = (BYTE*) dest;
+	BYTE* restrict op = (BYTE*) dest;
 	BYTE* const oend = op + osize;
 	BYTE* cpy;

@ -456,19 +458,19 @@ int LZ4_uncompress(char* source,
 		if ((length=(token>>ML_BITS)) == RUN_MASK)  { for (;(len=*ip++)==255;length+=255){} length += len; } 

 		// copy literals
-		ref = op+length;
-		if (ref>oend-COPYLENGTH) 
+		cpy = op+length;
+		if (cpy>oend-COPYLENGTH) 
 		{ 
-			if (ref > oend) goto _output_error;
+			if (cpy > oend) goto _output_error;
 			memcpy(op, ip, length);
 			ip += length;
 			break;    // Necessarily EOF
 		}
-		LZ4_WILDCOPY(ip, op, ref); ip -= (op-ref); op = ref;
+		LZ4_WILDCOPY(ip, op, cpy); ip -= (op-cpy); op = cpy;


 		// get offset
-		ref -= A16(ip); ip+=2;
+		ref = cpy - A16(ip); ip+=2;

 		// get matchlength
 		if ((length=(token&ML_MASK)) == ML_MASK) { for (;*ip==255;length+=255) {ip++;} length += *ip++; } 
@ -513,17 +515,17 @@ int LZ4_uncompress_unknownOutputSize(
 				int maxOutputSize)
 {	
 	// Local Variables
-	const BYTE* ip = (const BYTE*) source;
+	const BYTE* restrict ip = (const BYTE*) source;
 	const BYTE* const iend = ip + isize;
-	BYTE* ref;
+	const BYTE* restrict ref;

-	BYTE* op = (BYTE*) dest;
+	BYTE* restrict op = (BYTE*) dest;
 	BYTE* const oend = op + maxOutputSize;
 	BYTE* cpy;

 	BYTE token;
 	
-	U32	dec[COPYTOKEN]={0, 3, 2, 3};
+	U32	dec[4]={0, 3, 2, 3};
 	int	len, length;


@ -535,20 +537,20 @@ int LZ4_uncompress_unknownOutputSize(
 		if ((length=(token>>ML_BITS)) == RUN_MASK)  { for (;(len=*ip++)==255;length+=255){} length += len; } 

 		// copy literals
-		ref = op+length;
-		if (ref>oend-COPYLENGTH) 
+		cpy = op+length;
+		if (cpy>oend-COPYLENGTH) 
 		{ 
-			if (ref > oend) goto _output_error;
+			if (cpy > oend) goto _output_error;
 			memcpy(op, ip, length);
 			op += length;
 			break;    // Necessarily EOF
 		}
-		LZ4_WILDCOPY(ip, op, ref); ip -= (op-ref); op = ref;
+		LZ4_WILDCOPY(ip, op, cpy); ip -= (op-cpy); op = cpy;
 		if (ip>=iend) break;    // check EOF


 		// get offset
-		ref -= A16(ip); ip+=2;
+		ref = cpy - A16(ip); ip+=2;

 		// get matchlength
 		if ((length=(token&ML_MASK)) == ML_MASK) { for (;(len=*ip++)==255;length+=255){} length += len; }
@ -585,4 +587,3 @@ _output_error:
 	return (int) (-(((char*)ip)-source));
 }

-
--- a/lz4.h
+++ b/lz4.h
@ -41,13 +41,13 @@ int LZ4_compress   (char* source, char* dest, int isize);
 int LZ4_uncompress (char* source, char* dest, int osize);

 /*
-LZ4_compress :
+LZ4_compress() :
 	return : the number of bytes in compressed buffer dest
 	note : destination buffer must be already allocated. 
 		To avoid any problem, size it to handle worst cases situations (input data not compressible)
 		Worst case size is : "inputsize + 0.4%", with "0.4%" being at least 8 bytes.

-LZ4_uncompress :
+LZ4_uncompress() :
 	osize  : is the output size, therefore the original size
 	return : the number of bytes read in the source buffer
 			 If the source stream is malformed, the function will stop decoding and return a negative result, indicating the byte position of the faulty instruction
@ -63,20 +63,20 @@ LZ4_uncompress :
 int LZ4_uncompress_unknownOutputSize (char* source, char* dest, int isize, int maxOutputSize);

 /*
-LZ4_uncompress_unknownOutputSize :
+LZ4_uncompress_unknownOutputSize() :
 	isize  : is the input size, therefore the compressed size
 	maxOutputSize : is the size of the destination buffer (which must be already allocated)
 	return : the number of bytes decoded in the destination buffer (necessarily <= maxOutputSize)
 			 If the source stream is malformed, the function will stop decoding and return a negative result, indicating the byte position of the faulty instruction
 			 This version never writes beyond dest + maxOutputSize, and is therefore protected against malicious data packets
-	note   : This version is slower than LZ4_uncompress, and is therefore not recommended for general use
+	note   : This version is a bit slower than LZ4_uncompress
 */


 int LZ4_compressCtx(void** ctx, char* source,  char* dest, int isize);

 /*
-LZ4_compressCtx :
+LZ4_compressCtx() :
 	This function explicitly handles the CTX memory structure.
 	It avoids allocating/deallocating memory between each call, improving performance when malloc is time-consuming.
 	Note : when memory is allocated into the stack (default mode), there is no "malloc" penalty.
@ -86,7 +86,7 @@ LZ4_compressCtx :
 	On next calls : reuse the same ctx pointer.
 	Use different pointers for different threads when doing multi-threading.

-	note : performance difference is small, mostly noticeable when repetitively calling the compression algorithm on many small segments.
+	note : performance difference is small, mostly noticeable in HeapMode when repetitively calling the compression function over many small segments.
 */