- New naming : LZ4_decompress_safe() and LZ4_decompress_fast()

- New function : LZ4_decompress_safe_partial(), to decompress just enough data within a compressed block, saving CPU cycles - New source files : lz4_decoder.h, lz4_encoder.h, lz4hc_encoder.h - Improved speed of LZ4_decompress_fast() - Improved speed for compression of small blocks < 64KB - Improved speed for HC compression git-svn-id: https://lz4.googlecode.com/svn/trunk@94 650e7d94-2a16-8b24-b05c-7c0b3f6821cd
2013-04-26 14:37:46 +00:00 · 2013-04-26 14:37:46 +00:00 · 40ae7043df
commit 40ae7043df
parent a2e93db805
10 changed files with 1266 additions and 938 deletions
--- a/Format.odt
+++ b/Format.odt
--- a/bench.c
+++ b/bench.c
@ -58,18 +58,20 @@
 #include <sys/stat.h>    // stat64

 // Use ftime() if gettimeofday() is not available on your target
-#if defined(BMK_LEGACY_TIMER)   
+#if defined(BMK_LEGACY_TIMER)
 #  include <sys/timeb.h>   // timeb, ftime
 #else
 #  include <sys/time.h>    // gettimeofday
 #endif

 #include "lz4.h"
+//int LZ4_compress_stack(const char* in, char* out, int size);
 #define COMPRESSOR0 LZ4_compress
 #include "lz4hc.h"
 #define COMPRESSOR1 LZ4_compressHC
-#define DEFAULTCOMPRESSOR LZ4_compress
+#define DEFAULTCOMPRESSOR COMPRESSOR0

+#include "xxhash.h"


 //**************************************
@ -99,7 +101,7 @@

 #define KNUTH		2654435761U
 #define MAX_MEM		(1984<<20)
-#define DEFAULT_CHUNKSIZE   (8<<20)
+#define DEFAULT_CHUNKSIZE   (4<<20)


 //**************************************
@ -108,10 +110,10 @@
 struct chunkParameters
 {
 	U32   id;
-	char* inputBuffer;
-	char* outputBuffer;
-	int   inputSize;
-	int   outputSize;
+	char* origBuffer;
+	char* compressedBuffer;
+	int   origSize;
+	int   compressedSize;
 };

 struct compressionParameters
@ -144,7 +146,7 @@ void BMK_SetBlocksize(int bsize)
 void BMK_SetNbIterations(int nbLoops)
 {
 	nbIterations = nbLoops;
-	DISPLAY("- %i iterations-\n", nbIterations);
+	DISPLAY("- %i iterations -\n", nbIterations);
 }

 void BMK_SetPause()
@ -195,55 +197,6 @@ static int BMK_GetMilliSpan( int nTimeStart )
 }


-static U32 BMK_checksum_MMH3A (char* buff, U32 length)
-{
-  const BYTE* data = (const BYTE*)buff;
-  const int nblocks = length >> 2;
-
-  U32 h1 = KNUTH;
-  U32 c1 = 0xcc9e2d51;
-  U32 c2 = 0x1b873593;
-
-  const U32* blocks = (const U32*)(data + nblocks*4);
-  int i;
-
-  for(i = -nblocks; i; i++)
-  {
-    U32 k1 = blocks[i];
-
-    k1 *= c1;
-    k1 = _rotl(k1,15);
-    k1 *= c2;
-
-    h1 ^= k1;
-    h1 = _rotl(h1,13);
-    h1 = h1*5+0xe6546b64;
-  }
-
-  {
-	  const BYTE* tail = (const BYTE*)(data + nblocks*4);
-	  U32 k1 = 0;
-
-	  switch(length & 3)
-	  {
-	  case 3: k1 ^= tail[2] << 16;
-	  case 2: k1 ^= tail[1] << 8;
-	  case 1: k1 ^= tail[0];
-			  k1 *= c1; k1 = _rotl(k1,15); k1 *= c2; h1 ^= k1;
-	  };
-  }
-
-  h1 ^= length;
-  h1 ^= h1 >> 16;
-  h1 *= 0x85ebca6b;
-  h1 ^= h1 >> 13;
-  h1 *= 0xc2b2ae35;
-  h1 ^= h1 >> 16;
-
-  return h1;
-}
-
-
 static size_t BMK_findMaxMem(U64 requiredMem)
 {
 	size_t step = (64U<<20);   // 64 MB
@ -289,12 +242,12 @@ int BMK_benchFile(char** fileNamesTable, int nbFiles, int cLevel)
  FILE* fileIn;
  char* infilename;
  U64 largefilesize;
-  size_t benchedsize;
+  size_t benchedSize;
  int nbChunks;
  int maxCChunkSize;
  size_t readSize;
-  char* in_buff;
-  char* out_buff; int out_buff_size;
+  char* orig_buff;
+  char* compressed_buff; int compressed_buff_size;
  struct chunkParameters* chunkP;
  U32 crcc, crcd=0;
  struct compressionParameters compP;
@ -316,7 +269,7 @@ int BMK_benchFile(char** fileNamesTable, int nbFiles, int cLevel)
 #endif
  default : compP.compressionFunction = DEFAULTCOMPRESSOR;
  }
-  compP.decompressionFunction = LZ4_uncompress;
+  compP.decompressionFunction = LZ4_decompress_fast;

  // Loop for each file
  while (fileIdx<nbFiles)
@ -332,27 +285,27 @@ int BMK_benchFile(char** fileNamesTable, int nbFiles, int cLevel)

 	  // Memory allocation & restrictions
 	  largefilesize = BMK_GetFileSize(infilename);
-	  benchedsize = (size_t) BMK_findMaxMem(largefilesize) / 2;
-	  if ((U64)benchedsize > largefilesize) benchedsize = (size_t)largefilesize;
-	  if (benchedsize < largefilesize)
+	  benchedSize = (size_t) BMK_findMaxMem(largefilesize) / 2;
+	  if ((U64)benchedSize > largefilesize) benchedSize = (size_t)largefilesize;
+	  if (benchedSize < largefilesize)
 	  {
-		  DISPLAY("Not enough memory for '%s' full size; testing %i MB only...\n", infilename, (int)(benchedsize>>20));
+		  DISPLAY("Not enough memory for '%s' full size; testing %i MB only...\n", infilename, (int)(benchedSize>>20));
 	  }

 	  // Alloc
-	  chunkP = (struct chunkParameters*) malloc(((benchedsize / chunkSize)+1) * sizeof(struct chunkParameters));
-	  in_buff = malloc((size_t )benchedsize);
-	  nbChunks = (int) (benchedsize / chunkSize) + 1;
+	  chunkP = (struct chunkParameters*) malloc(((benchedSize / chunkSize)+1) * sizeof(struct chunkParameters));
+	  orig_buff = malloc((size_t )benchedSize);
+	  nbChunks = (int) (benchedSize / chunkSize) + 1;
 	  maxCChunkSize = LZ4_compressBound(chunkSize);
-	  out_buff_size = nbChunks * maxCChunkSize;
-	  out_buff = malloc((size_t )out_buff_size);
+	  compressed_buff_size = nbChunks * maxCChunkSize;
+	  compressed_buff = malloc((size_t )compressed_buff_size);


-	  if(!in_buff || !out_buff)
+	  if(!orig_buff || !compressed_buff)
 	  {
 		DISPLAY("\nError: not enough memory!\n");
-		free(in_buff);
-		free(out_buff);
+		free(orig_buff);
+		free(compressed_buff);
 		fclose(fileIn);
 		return 12;
 	  }
@ -360,34 +313,34 @@ int BMK_benchFile(char** fileNamesTable, int nbFiles, int cLevel)
 	  // Init chunks data
 	  {
 		  int i;
-		  size_t remaining = benchedsize;
-		  char* in = in_buff;
-		  char* out = out_buff;
+		  size_t remaining = benchedSize;
+		  char* in = orig_buff;
+		  char* out = compressed_buff;
 		  for (i=0; i<nbChunks; i++)
 		  {
 			  chunkP[i].id = i;
-			  chunkP[i].inputBuffer = in; in += chunkSize;
-			  if ((int)remaining > chunkSize) { chunkP[i].inputSize = chunkSize; remaining -= chunkSize; } else { chunkP[i].inputSize = (int)remaining; remaining = 0; }
-			  chunkP[i].outputBuffer = out; out += maxCChunkSize;
-			  chunkP[i].outputSize = 0;
+			  chunkP[i].origBuffer = in; in += chunkSize;
+			  if ((int)remaining > chunkSize) { chunkP[i].origSize = chunkSize; remaining -= chunkSize; } else { chunkP[i].origSize = (int)remaining; remaining = 0; }
+			  chunkP[i].compressedBuffer = out; out += maxCChunkSize;
+			  chunkP[i].compressedSize = 0;
 		  }
 	  }

 	  // Fill input buffer
 	  DISPLAY("Loading %s...       \r", infilename);
-	  readSize = fread(in_buff, 1, benchedsize, fileIn);
+	  readSize = fread(orig_buff, 1, benchedSize, fileIn);
 	  fclose(fileIn);

-	  if(readSize != benchedsize)
+	  if(readSize != benchedSize)
 	  {
 		DISPLAY("\nError: problem reading file '%s' !!    \n", infilename);
-		free(in_buff);
-		free(out_buff);
+		free(orig_buff);
+		free(compressed_buff);
 		return 13;
 	  }

 	  // Calculating input Checksum
-	  crcc = BMK_checksum_MMH3A(in_buff, (unsigned int)benchedsize);
+	  crcc = XXH32(orig_buff, (unsigned int)benchedSize,0);


 	  // Bench
@ -402,8 +355,8 @@ int BMK_benchFile(char** fileNamesTable, int nbFiles, int cLevel)
 		for (loopNb = 1; loopNb <= nbIterations; loopNb++)
 		{
 		  // Compression
-		  DISPLAY("%1i-%-14.14s : %9i ->\r", loopNb, infilename, (int)benchedsize);
-		  { size_t i; for (i=0; i<benchedsize; i++) out_buff[i]=(char)i; }     // warmimg up memory
+		  DISPLAY("%1i-%-14.14s : %9i ->\r", loopNb, infilename, (int)benchedSize);
+		  { size_t i; for (i=0; i<benchedSize; i++) compressed_buff[i]=(char)i; }     // warmimg up memory

 		  nb_loops = 0;
 		  milliTime = BMK_GetMilliStart();
@ -412,19 +365,19 @@ int BMK_benchFile(char** fileNamesTable, int nbFiles, int cLevel)
 		  while(BMK_GetMilliSpan(milliTime) < TIMELOOP)
 		  {
 			for (chunkNb=0; chunkNb<nbChunks; chunkNb++)
-				chunkP[chunkNb].outputSize = compP.compressionFunction(chunkP[chunkNb].inputBuffer, chunkP[chunkNb].outputBuffer, chunkP[chunkNb].inputSize);
+				chunkP[chunkNb].compressedSize = compP.compressionFunction(chunkP[chunkNb].origBuffer, chunkP[chunkNb].compressedBuffer, chunkP[chunkNb].origSize);
 			nb_loops++;
 		  }
 		  milliTime = BMK_GetMilliSpan(milliTime);

 		  if ((double)milliTime < fastestC*nb_loops) fastestC = (double)milliTime/nb_loops;
-		  cSize=0; for (chunkNb=0; chunkNb<nbChunks; chunkNb++) cSize += chunkP[chunkNb].outputSize;
-		  ratio = (double)cSize/(double)benchedsize*100.;
+		  cSize=0; for (chunkNb=0; chunkNb<nbChunks; chunkNb++) cSize += chunkP[chunkNb].compressedSize;
+		  ratio = (double)cSize/(double)benchedSize*100.;

-		  DISPLAY("%1i-%-14.14s : %9i -> %9i (%5.2f%%),%7.1f MB/s\r", loopNb, infilename, (int)benchedsize, (int)cSize, ratio, (double)benchedsize / fastestC / 1000.);
+		  DISPLAY("%1i-%-14.14s : %9i -> %9i (%5.2f%%),%7.1f MB/s\r", loopNb, infilename, (int)benchedSize, (int)cSize, ratio, (double)benchedSize / fastestC / 1000.);

 		  // Decompression
-		  { size_t i; for (i=0; i<benchedsize; i++) in_buff[i]=0; }     // zeroing area, for CRC checking
+		  { size_t i; for (i=0; i<benchedSize; i++) orig_buff[i]=0; }     // zeroing area, for CRC checking

 		  nb_loops = 0;
 		  milliTime = BMK_GetMilliStart();
@ -433,35 +386,39 @@ int BMK_benchFile(char** fileNamesTable, int nbFiles, int cLevel)
 		  while(BMK_GetMilliSpan(milliTime) < TIMELOOP)
 		  {
 			for (chunkNb=0; chunkNb<nbChunks; chunkNb++)
-				chunkP[chunkNb].outputSize = LZ4_uncompress(chunkP[chunkNb].outputBuffer, chunkP[chunkNb].inputBuffer, chunkP[chunkNb].inputSize);
-				//chunkP[chunkNb].inputSize = LZ4_uncompress_unknownOutputSize(chunkP[chunkNb].outputBuffer, chunkP[chunkNb].inputBuffer, chunkP[chunkNb].outputSize, chunkSize);
+				chunkP[chunkNb].origSize = LZ4_decompress_safe(chunkP[chunkNb].compressedBuffer, chunkP[chunkNb].origBuffer, chunkP[chunkNb].compressedSize, chunkSize);
+				//chunkP[chunkNb].compressedSize = LZ4_decompress_fast(chunkP[chunkNb].compressedBuffer, chunkP[chunkNb].origBuffer, chunkP[chunkNb].origSize);
+				//chunkP[chunkNb].compressedSize = LZ4_decompress_fast_withPrefix64k(chunkP[chunkNb].compressedBuffer, chunkP[chunkNb].origBuffer, chunkP[chunkNb].origSize);
+				//chunkP[chunkNb].origSize = LZ4_decompress_safe_withPrefix64k(chunkP[chunkNb].compressedBuffer, chunkP[chunkNb].origBuffer, chunkP[chunkNb].compressedSize, chunkSize);
+				//chunkP[chunkNb].compressedSize = LZ4_uncompress(chunkP[chunkNb].compressedBuffer, chunkP[chunkNb].origBuffer, chunkP[chunkNb].origSize);
+				//chunkP[chunkNb].origSize = LZ4_uncompress_unknownOutputSize(chunkP[chunkNb].compressedBuffer, chunkP[chunkNb].origBuffer, chunkP[chunkNb].compressedSize, chunkSize);
 			nb_loops++;
 		  }
 		  milliTime = BMK_GetMilliSpan(milliTime);

 		  if ((double)milliTime < fastestD*nb_loops) fastestD = (double)milliTime/nb_loops;
-		  DISPLAY("%1i-%-14.14s : %9i -> %9i (%5.2f%%),%7.1f MB/s ,%7.1f MB/s\r", loopNb, infilename, (int)benchedsize, (int)cSize, ratio, (double)benchedsize / fastestC / 1000., (double)benchedsize / fastestD / 1000.);
+		  DISPLAY("%1i-%-14.14s : %9i -> %9i (%5.2f%%),%7.1f MB/s ,%7.1f MB/s\r", loopNb, infilename, (int)benchedSize, (int)cSize, ratio, (double)benchedSize / fastestC / 1000., (double)benchedSize / fastestD / 1000.);

 		  // CRC Checking
-		  crcd = BMK_checksum_MMH3A(in_buff, (unsigned int)benchedsize);
+		  crcd = XXH32(orig_buff, (unsigned int)benchedSize,0);
 		  if (crcc!=crcd) { DISPLAY("\n!!! WARNING !!! %14s : Invalid Checksum : %x != %x\n", infilename, (unsigned)crcc, (unsigned)crcd); break; }
 		}

 	    if (crcc==crcd)
 		{
 			if (ratio<100.)
-				DISPLAY("%-16.16s : %9i -> %9i (%5.2f%%),%7.1f MB/s ,%7.1f MB/s\n", infilename, (int)benchedsize, (int)cSize, ratio, (double)benchedsize / fastestC / 1000., (double)benchedsize / fastestD / 1000.);
+				DISPLAY("%-16.16s : %9i -> %9i (%5.2f%%),%7.1f MB/s ,%7.1f MB/s\n", infilename, (int)benchedSize, (int)cSize, ratio, (double)benchedSize / fastestC / 1000., (double)benchedSize / fastestD / 1000.);
 			else
-				DISPLAY("%-16.16s : %9i -> %9i (%5.1f%%),%7.1f MB/s ,%7.1f MB/s \n", infilename, (int)benchedsize, (int)cSize, ratio, (double)benchedsize / fastestC / 1000., (double)benchedsize / fastestD / 1000.);
+				DISPLAY("%-16.16s : %9i -> %9i (%5.1f%%),%7.1f MB/s ,%7.1f MB/s \n", infilename, (int)benchedSize, (int)cSize, ratio, (double)benchedSize / fastestC / 1000., (double)benchedSize / fastestD / 1000.);
 		}
-		totals += benchedsize;
+		totals += benchedSize;
 		totalz += cSize;
 		totalc += fastestC;
 		totald += fastestD;
 	  }

-	  free(in_buff);
-	  free(out_buff);
+	  free(orig_buff);
+	  free(compressed_buff);
 	  free(chunkP);
  }

--- a/fuzzer.c
+++ b/fuzzer.c
@ -107,7 +107,7 @@ int FUZ_SecurityTest()
  input[2] = 0x00;
  for(i = 3; i < 16840000; i++)
    input[i] = 0xff;
-  r = LZ4_uncompress(input, output, 20<<20);
+  r = LZ4_decompress_fast(input, output, 20<<20);

  free(input);
  free(output);
@ -178,35 +178,35 @@ int main() {
            len = ret;

            // Test decoding with output size being exactly what's necessary => must work
-            ret = LZ4_uncompress((char*)&cbuf[off_full], (char*)testOut, LEN);
+            ret = LZ4_decompress_fast((char*)&cbuf[off_full], (char*)testOut, LEN);
            if (ret<0) { printf("decompression failed despite correct space: seed %u, len %d\n", seed, LEN); goto _output_error; }

            // Test decoding with one byte missing => must fail
-            ret = LZ4_uncompress((char*)&cbuf[off_full], (char*)testOut, LEN-1);
+            ret = LZ4_decompress_fast((char*)&cbuf[off_full], (char*)testOut, LEN-1);
            if (ret>=0) { printf("decompression should have failed, due to Output Size being too small : seed %u, len %d\n", seed, LEN); goto _output_error; }

            // Test decoding with one byte too much => must fail
-            ret = LZ4_uncompress((char*)&cbuf[off_full], (char*)testOut, LEN+1);
+            ret = LZ4_decompress_fast((char*)&cbuf[off_full], (char*)testOut, LEN+1);
            if (ret>=0) { printf("decompression should have failed, due to Output Size being too large : seed %u, len %d\n", seed, LEN); goto _output_error; }

            // Test decoding with enough output size => must work
-            ret = LZ4_uncompress_unknownOutputSize((char*)&cbuf[off_full], (char*)testOut, len, LEN+1);
+            ret = LZ4_decompress_safe((char*)&cbuf[off_full], (char*)testOut, len, LEN+1);
            if (ret<0) { printf("decompression failed despite sufficient space: seed %u, len %d\n", seed, LEN); goto _output_error; }

            // Test decoding with output size being exactly what's necessary => must work
-            ret = LZ4_uncompress_unknownOutputSize((char*)&cbuf[off_full], (char*)testOut, len, LEN);
+            ret = LZ4_decompress_safe((char*)&cbuf[off_full], (char*)testOut, len, LEN);
            if (ret<0) { printf("decompression failed despite sufficient space: seed %u, len %d\n", seed, LEN); goto _output_error; }

            // Test decoding with output size being one byte too short => must fail
-            ret = LZ4_uncompress_unknownOutputSize((char*)&cbuf[off_full], (char*)testOut, len, LEN-1);
+            ret = LZ4_decompress_safe((char*)&cbuf[off_full], (char*)testOut, len, LEN-1);
            if (ret>=0) { printf("decompression should have failed, due to Output Size being too small : seed %u, len %d\n", seed, LEN); goto _output_error; }

            // Test decoding with input size being one byte too short => must fail
-            ret = LZ4_uncompress_unknownOutputSize((char*)&cbuf[off_full], (char*)testOut, len-1, LEN);
+            ret = LZ4_decompress_safe((char*)&cbuf[off_full], (char*)testOut, len-1, LEN);
            if (ret>=0) { printf("decompression should have failed, due to input size being too small : seed %u, len %d\n", seed, LEN); goto _output_error; }

            // Test decoding with input size being one byte too large => must fail
-            ret = LZ4_uncompress_unknownOutputSize((char*)&cbuf[off_full], (char*)testOut, len+1, LEN);
+            ret = LZ4_decompress_safe((char*)&cbuf[off_full], (char*)testOut, len+1, LEN);
            if (ret>=0) { printf("decompression should have failed, due to input size being too large : seed %u, len %d\n", seed, LEN); goto _output_error; }

            // Test compression with output size being exactly what's necessary (should work)
--- a/lz4.c
+++ b/lz4.c
@ -31,6 +31,10 @@
   - LZ4 source repository : http://code.google.com/p/lz4/
 */

+/*
+Note : this source file requires "lz4_encoder.h" and "lz4_decoder.h"
+*/
+
 //**************************************
 // Tuning parameters
 //**************************************
@ -41,6 +45,12 @@
 // Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache
 #define MEMORY_USAGE 14

+// HEAPMODE :
+// Select if compression algorithm will allocate space for its tables
+// in memory stack (0:default, fastest), or in memory heap (1:requires memory allocation (malloc)).
+// Default allocation strategy is to use stack (HEAPMODE 0)
+#define HEAPMODE 0
+
 // BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE :
 // This will provide a small boost to performance for big endian cpu, but the resulting compressed stream will be incompatible with little-endian CPU.
 // You can set this option to 1 in situations where data will remain within closed environment
@ -175,21 +185,18 @@ typedef struct _U64_S { U64 v; } U64_S;
 //**************************************
 // Constants
 //**************************************
+#define HASHTABLESIZE (1 << MEMORY_USAGE)
+
 #define MINMATCH 4

-#define HASH_LOG (MEMORY_USAGE-2)
-#define HASHTABLESIZE (1 << HASH_LOG)
-#define HASH_MASK (HASHTABLESIZE - 1)
-
-#define NOTCOMPRESSIBLE_DETECTIONLEVEL 6 // Increasing this value will make the algorithm slower on incompressible data
-#define SKIPSTRENGTH (NOTCOMPRESSIBLE_DETECTIONLEVEL>2?NOTCOMPRESSIBLE_DETECTIONLEVEL:2)
-#define STACKLIMIT 13
-#define HEAPMODE (HASH_LOG>STACKLIMIT)   // Defines if memory is allocated into the stack (local variable), or into the heap (malloc()).
 #define COPYLENGTH 8
 #define LASTLITERALS 5
 #define MFLIMIT (COPYLENGTH+MINMATCH)
 #define MINLENGTH (MFLIMIT+1)

+#define LZ4_64KLIMIT ((1<<16) + (MFLIMIT-1))
+#define SKIPSTRENGTH 6     // Increasing this value will make the compression run slower on incompressible data
+
 #define MAXD_LOG 16
 #define MAX_DISTANCE ((1 << MAXD_LOG) - 1)

@ -231,20 +238,9 @@ typedef struct _U64_S { U64 v; } U64_S;
 #endif


-//**************************************
-// Local structures
-//**************************************
-struct refTables
-{
-    HTYPE hashTable[HASHTABLESIZE];
-};
-
-
 //**************************************
 // Macros
 //**************************************
-#define LZ4_HASH_FUNCTION(i)	(((i) * 2654435761U) >> ((MINMATCH*8)-HASH_LOG))
-#define LZ4_HASH_VALUE(p)		LZ4_HASH_FUNCTION(A32(p))
 #define LZ4_WILDCOPY(s,d,e)		do { LZ4_COPYPACKET(s,d) } while (d<e);
 #define LZ4_BLINDCOPY(s,d,l)	{ BYTE* e=(d)+l; LZ4_WILDCOPY(s,d,e); d=e; }

@ -289,29 +285,29 @@ static inline int LZ4_NbCommonBytes (register U64 val)
 static inline int LZ4_NbCommonBytes (register U32 val)
 {
 #if defined(LZ4_BIG_ENDIAN)
-    #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
+#  if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
    unsigned long r = 0;
    _BitScanReverse( &r, val );
    return (int)(r>>3);
-    #elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
+#  elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
    return (__builtin_clz(val) >> 3);
-    #else
+#  else
    int r;
    if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
    r += (!val);
    return r;
-    #endif
+#  endif
 #else
-    #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
+#  if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
    unsigned long r;
    _BitScanForward( &r, val );
    return (int)(r>>3);
-    #elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
+#  elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
    return (__builtin_ctz(val) >> 3);
-    #else
+#  else
    static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 };
    return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
-    #endif
+#  endif
 #endif
 }

@ -323,581 +319,282 @@ static inline int LZ4_NbCommonBytes (register U32 val)
 // Compression functions
 //******************************

-// LZ4_compressCtx :
-// -----------------
-// Compress 'isize' bytes from 'source' into an output buffer 'dest' of maximum size 'maxOutputSize'.
-// If it cannot achieve it, compression will stop, and result of the function will be zero.
-// return : the number of bytes written in buffer 'dest', or 0 if the compression fails
+/*
+int LZ4_compress_stack(
+                 const char* source,
+                 char* dest,
+                 int inputSize)

-static inline int LZ4_compressCtx(void** ctx,
+Compress 'inputSize' bytes from 'source' into an output buffer 'dest'.
+Destination buffer must be already allocated, and sized at a minimum of LZ4_compressBound(inputSize).
+return : the number of bytes written in buffer 'dest'
+*/
+#define FUNCTION_NAME LZ4_compress_stack
+#include "lz4_encoder.h"
+
+
+/*
+int LZ4_compress_stack_limitedOutput(
                 const char* source,
                 char* dest,
                 int inputSize,
                 int maxOutputSize)
-{
-#if HEAPMODE
-    struct refTables *srt = (struct refTables *) (*ctx);
-    HTYPE* HashTable;
-#else
-    HTYPE HashTable[HASHTABLESIZE] = {0};
-#endif

-    const BYTE* ip = (BYTE*) source;
-    INITBASE(base);
-    const BYTE* anchor = ip;
-    const BYTE* const iend = ip + inputSize;
-    const BYTE* const mflimit = iend - MFLIMIT;
-#define matchlimit (iend - LASTLITERALS)
-
-    BYTE* op = (BYTE*) dest;
-    BYTE* const oend = op + maxOutputSize;
-
-    int length;
-    const int skipStrength = SKIPSTRENGTH;
-    U32 forwardH;
+Compress 'inputSize' bytes from 'source' into an output buffer 'dest' of maximum size 'maxOutputSize'.
+If it cannot achieve it, compression will stop, and result of the function will be zero.
+return : the number of bytes written in buffer 'dest', or 0 if the compression fails
+*/
+#define FUNCTION_NAME LZ4_compress_stack_limitedOutput
+#define LIMITED_OUTPUT
+#include "lz4_encoder.h"


-    // Init
-    if (inputSize<MINLENGTH) goto _last_literals;
-#if HEAPMODE
-    if (*ctx == NULL)
-    {
-        srt = (struct refTables *) malloc ( sizeof(struct refTables) );
-        *ctx = (void*) srt;
-    }
-    HashTable = (HTYPE*)(srt->hashTable);
-    memset((void*)HashTable, 0, sizeof(srt->hashTable));
-#else
-    (void) ctx;
-#endif
-
-
-    // First Byte
-    HashTable[LZ4_HASH_VALUE(ip)] = (HTYPE)(ip - base);
-    ip++; forwardH = LZ4_HASH_VALUE(ip);
-
-    // Main Loop
-    for ( ; ; )
-    {
-        int findMatchAttempts = (1U << skipStrength) + 3;
-        const BYTE* forwardIp = ip;
-        const BYTE* ref;
-        BYTE* token;
-
-        // Find a match
-        do {
-            U32 h = forwardH;
-            int step = findMatchAttempts++ >> skipStrength;
-            ip = forwardIp;
-            forwardIp = ip + step;
-
-            if unlikely(forwardIp > mflimit) { goto _last_literals; }
-
-            forwardH = LZ4_HASH_VALUE(forwardIp);
-            ref = base + HashTable[h];
-            HashTable[h] = (HTYPE)(ip - base);
-
-        } while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
-
-        // Catch up
-        while ((ip>anchor) && (ref>(BYTE*)source) && unlikely(ip[-1]==ref[-1])) { ip--; ref--; }
-
-        // Encode Literal length
-        length = (int)(ip - anchor);
-        token = op++;
-        if unlikely(op + length + (2 + 1 + LASTLITERALS) + (length>>8) > oend) return 0; 		// Check output limit
-#ifdef _MSC_VER
-        if (length>=(int)RUN_MASK) 
-        { 
-            int len = length-RUN_MASK; 
-            *token=(RUN_MASK<<ML_BITS); 
-            if (len>254)
-            {
-                do { *op++ = 255; len -= 255; } while (len>254);
-                *op++ = (BYTE)len; 
-                memcpy(op, anchor, length);
-                op += length;
-                goto _next_match;
-            }
-            else
-            *op++ = (BYTE)len; 
-        }
-        else *token = (BYTE)(length<<ML_BITS);
-#else
-        if (length>=(int)RUN_MASK) 
-        { 
-            int len;
-            *token=(RUN_MASK<<ML_BITS); 
-            len = length-RUN_MASK; 
-            for(; len > 254 ; len-=255) *op++ = 255; 
-            *op++ = (BYTE)len; 
-        }
-        else *token = (length<<ML_BITS);
-#endif
-
-        // Copy Literals
-        LZ4_BLINDCOPY(anchor, op, length);
-
-_next_match:
-        // Encode Offset
-        LZ4_WRITE_LITTLEENDIAN_16(op,(U16)(ip-ref));
-
-        // Start Counting
-        ip+=MINMATCH; ref+=MINMATCH;    // MinMatch already verified
-        anchor = ip;
-        while likely(ip<matchlimit-(STEPSIZE-1))
-        {
-            UARCH diff = AARCH(ref) ^ AARCH(ip);
-            if (!diff) { ip+=STEPSIZE; ref+=STEPSIZE; continue; }
-            ip += LZ4_NbCommonBytes(diff);
-            goto _endCount;
-        }
-        if (LZ4_ARCH64) if ((ip<(matchlimit-3)) && (A32(ref) == A32(ip))) { ip+=4; ref+=4; }
-        if ((ip<(matchlimit-1)) && (A16(ref) == A16(ip))) { ip+=2; ref+=2; }
-        if ((ip<matchlimit) && (*ref == *ip)) ip++;
-_endCount:
-
-        // Encode MatchLength
-        length = (int)(ip - anchor);
-        if unlikely(op + (1 + LASTLITERALS) + (length>>8) > oend) return 0; 		// Check output limit
-        if (length>=(int)ML_MASK) 
-        { 
-            *token += ML_MASK; 
-            length -= ML_MASK; 
-            for (; length > 509 ; length-=510) { *op++ = 255; *op++ = 255; } 
-            if (length > 254) { length-=255; *op++ = 255; } 
-            *op++ = (BYTE)length; 
-        }
-        else *token += (BYTE)length;
-
-        // Test end of chunk
-        if (ip > mflimit) { anchor = ip;  break; }
-
-        // Fill table
-        HashTable[LZ4_HASH_VALUE(ip-2)] = (HTYPE)(ip - 2 - base);
-
-        // Test next position
-        ref = base + HashTable[LZ4_HASH_VALUE(ip)];
-        HashTable[LZ4_HASH_VALUE(ip)] = (HTYPE)(ip - base);
-        if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) { token = op++; *token=0; goto _next_match; }
-
-        // Prepare next loop
-        anchor = ip++;
-        forwardH = LZ4_HASH_VALUE(ip);
-    }
-
-_last_literals:
-    // Encode Last Literals
-    {
-        int lastRun = (int)(iend - anchor);
-        if (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize) return 0;  // Check output limit
-        if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun > 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; }
-        else *op++ = (BYTE)(lastRun<<ML_BITS);
-        memcpy(op, anchor, iend - anchor);
-        op += iend-anchor;
-    }
-
-    // End
-    return (int) (((char*)op)-dest);
-}
-
-
-
-// Note : this function is valid only if isize < LZ4_64KLIMIT
-#define LZ4_64KLIMIT ((1<<16) + (MFLIMIT-1))
-#define HASHLOG64K (HASH_LOG+1)
-#define HASH64KTABLESIZE (1U<<HASHLOG64K)
-#define LZ4_HASH64K_FUNCTION(i)	(((i) * 2654435761U) >> ((MINMATCH*8)-HASHLOG64K))
-#define LZ4_HASH64K_VALUE(p)	LZ4_HASH64K_FUNCTION(A32(p))
-static inline int LZ4_compress64kCtx(void** ctx,
+/*
+int LZ4_compress64k_stack(
                 const char* source,
                 char* dest,
-                 int isize,
+                 int inputSize)
+
+Compress 'inputSize' bytes from 'source' into an output buffer 'dest'.
+This function compresses better than LZ4_compress_stack(), on the condition that
+'inputSize' must be < to LZ4_64KLIMIT, or the function will fail.
+Destination buffer must be already allocated, and sized at a minimum of LZ4_compressBound(inputSize).
+return : the number of bytes written in buffer 'dest', or 0 if compression fails
+*/
+#define FUNCTION_NAME LZ4_compress64k_stack
+#define COMPRESS_64K
+#include "lz4_encoder.h"
+
+
+/*
+int LZ4_compress64k_stack_limitedOutput(
+                 const char* source,
+                 char* dest,
+                 int inputSize,
                 int maxOutputSize)
+
+Compress 'inputSize' bytes from 'source' into an output buffer 'dest' of maximum size 'maxOutputSize'.
+This function compresses better than LZ4_compress_stack_limitedOutput(), on the condition that
+'inputSize' must be < to LZ4_64KLIMIT, or the function will fail.
+If it cannot achieve it, compression will stop, and result of the function will be zero.
+return : the number of bytes written in buffer 'dest', or 0 if the compression fails
+*/
+#define FUNCTION_NAME LZ4_compress64k_stack_limitedOutput
+#define COMPRESS_64K
+#define LIMITED_OUTPUT
+#include "lz4_encoder.h"
+
+
+/*
+void* LZ4_createHeapMemory();
+int LZ4_freeHeapMemory(void* ctx);
+
+Used to allocate and free hashTable memory 
+to be used by the LZ4_compress_heap* family of functions.
+LZ4_createHeapMemory() returns NULL is memory allocation fails.
+*/
+void* LZ4_create() { return malloc(HASHTABLESIZE); }
+int LZ4_free(void* ctx) { free(ctx); return 0; }
+
+
+/*
+int LZ4_compress_heap(
+                 void* ctx,
+                 const char* source,
+                 char* dest,
+                 int inputSize)
+
+Compress 'inputSize' bytes from 'source' into an output buffer 'dest'.
+The memory used for compression must be created by LZ4_createHeapMemory() and provided by pointer 'ctx'.
+Destination buffer must be already allocated, and sized at a minimum of LZ4_compressBound(inputSize).
+return : the number of bytes written in buffer 'dest'
+*/
+#define FUNCTION_NAME LZ4_compress_heap
+#define USE_HEAPMEMORY
+#include "lz4_encoder.h"
+
+
+/*
+int LZ4_compress_heap_limitedOutput(
+                 void* ctx,
+                 const char* source,
+                 char* dest,
+                 int inputSize,
+                 int maxOutputSize)
+
+Compress 'inputSize' bytes from 'source' into an output buffer 'dest' of maximum size 'maxOutputSize'.
+If it cannot achieve it, compression will stop, and result of the function will be zero.
+The memory used for compression must be created by LZ4_createHeapMemory() and provided by pointer 'ctx'.
+return : the number of bytes written in buffer 'dest', or 0 if the compression fails
+*/
+#define FUNCTION_NAME LZ4_compress_heap_limitedOutput
+#define LIMITED_OUTPUT
+#define USE_HEAPMEMORY
+#include "lz4_encoder.h"
+
+
+/*
+int LZ4_compress64k_heap(
+                 void* ctx,
+                 const char* source,
+                 char* dest,
+                 int inputSize)
+
+Compress 'inputSize' bytes from 'source' into an output buffer 'dest'.
+The memory used for compression must be created by LZ4_createHeapMemory() and provided by pointer 'ctx'.
+'inputSize' must be < to LZ4_64KLIMIT, or the function will fail.
+Destination buffer must be already allocated, and sized at a minimum of LZ4_compressBound(inputSize).
+return : the number of bytes written in buffer 'dest'
+*/
+#define FUNCTION_NAME LZ4_compress64k_heap
+#define COMPRESS_64K
+#define USE_HEAPMEMORY
+#include "lz4_encoder.h"
+
+
+/*
+int LZ4_compress64k_heap_limitedOutput(
+                 void* ctx,
+                 const char* source,
+                 char* dest,
+                 int inputSize,
+                 int maxOutputSize)
+
+Compress 'inputSize' bytes from 'source' into an output buffer 'dest' of maximum size 'maxOutputSize'.
+If it cannot achieve it, compression will stop, and result of the function will be zero.
+The memory used for compression must be created by LZ4_createHeapMemory() and provided by pointer 'ctx'.
+'inputSize' must be < to LZ4_64KLIMIT, or the function will fail.
+return : the number of bytes written in buffer 'dest', or 0 if the compression fails
+*/
+#define FUNCTION_NAME LZ4_compress64k_heap_limitedOutput
+#define COMPRESS_64K
+#define LIMITED_OUTPUT
+#define USE_HEAPMEMORY
+#include "lz4_encoder.h"
+
+
+int LZ4_compress(const char* source, char* dest, int inputSize)
 {
 #if HEAPMODE
-    struct refTables *srt = (struct refTables *) (*ctx);
-    U16* HashTable;
-#else
-    U16 HashTable[HASH64KTABLESIZE] = {0};
-#endif
-
-    const BYTE* ip = (BYTE*) source;
-    const BYTE* anchor = ip;
-    const BYTE* const base = ip;
-    const BYTE* const iend = ip + isize;
-    const BYTE* const mflimit = iend - MFLIMIT;
-#define matchlimit (iend - LASTLITERALS)
-
-    BYTE* op = (BYTE*) dest;
-    BYTE* const oend = op + maxOutputSize;
-
-    int len, length;
-    const int skipStrength = SKIPSTRENGTH;
-    U32 forwardH;
-
-
-    // Init
-    if (isize<MINLENGTH) goto _last_literals;
-#if HEAPMODE
-    if (*ctx == NULL)
-    {
-        srt = (struct refTables *) malloc ( sizeof(struct refTables) );
-        *ctx = (void*) srt;
-    }
-    HashTable = (U16*)(srt->hashTable);
-    memset((void*)HashTable, 0, sizeof(srt->hashTable));
-#else
-    (void) ctx;
-#endif
-
-
-    // First Byte
-    ip++; forwardH = LZ4_HASH64K_VALUE(ip);
-
-    // Main Loop
-    for ( ; ; )
-    {
-        int findMatchAttempts = (1U << skipStrength) + 3;
-        const BYTE* forwardIp = ip;
-        const BYTE* ref;
-        BYTE* token;
-
-        // Find a match
-        do {
-            U32 h = forwardH;
-            int step = findMatchAttempts++ >> skipStrength;
-            ip = forwardIp;
-            forwardIp = ip + step;
-
-            if (forwardIp > mflimit) { goto _last_literals; }
-
-            forwardH = LZ4_HASH64K_VALUE(forwardIp);
-            ref = base + HashTable[h];
-            HashTable[h] = (U16)(ip - base);
-
-        } while (A32(ref) != A32(ip));
-
-        // Catch up
-        while ((ip>anchor) && (ref>(BYTE*)source) && (ip[-1]==ref[-1])) { ip--; ref--; }
-
-        // Encode Literal length
-        length = (int)(ip - anchor);
-        token = op++;
-        if unlikely(op + length + (2 + 1 + LASTLITERALS) + (length>>8) > oend) return 0; 		// Check output limit
-#ifdef _MSC_VER
-        if (length>=(int)RUN_MASK) 
-        { 
-            int len = length-RUN_MASK; 
-            *token=(RUN_MASK<<ML_BITS); 
-            if (len>254)
-            {
-                do { *op++ = 255; len -= 255; } while (len>254);
-                *op++ = (BYTE)len; 
-                memcpy(op, anchor, length);
-                op += length;
-                goto _next_match;
-            }
-            else
-            *op++ = (BYTE)len; 
-        }
-        else *token = (BYTE)(length<<ML_BITS);
-#else
-        if (length>=(int)RUN_MASK) { *token=(RUN_MASK<<ML_BITS); len = length-RUN_MASK; for(; len > 254 ; len-=255) *op++ = 255; *op++ = (BYTE)len; }
-        else *token = (length<<ML_BITS);
-#endif
-
-        // Copy Literals
-        LZ4_BLINDCOPY(anchor, op, length);
-
-_next_match:
-        // Encode Offset
-        LZ4_WRITE_LITTLEENDIAN_16(op,(U16)(ip-ref));
-
-        // Start Counting
-        ip+=MINMATCH; ref+=MINMATCH;   // MinMatch verified
-        anchor = ip;
-        while (ip<matchlimit-(STEPSIZE-1))
-        {
-            UARCH diff = AARCH(ref) ^ AARCH(ip);
-            if (!diff) { ip+=STEPSIZE; ref+=STEPSIZE; continue; }
-            ip += LZ4_NbCommonBytes(diff);
-            goto _endCount;
-        }
-        if (LZ4_ARCH64) if ((ip<(matchlimit-3)) && (A32(ref) == A32(ip))) { ip+=4; ref+=4; }
-        if ((ip<(matchlimit-1)) && (A16(ref) == A16(ip))) { ip+=2; ref+=2; }
-        if ((ip<matchlimit) && (*ref == *ip)) ip++;
-_endCount:
-
-        // Encode MatchLength
-        len = (int)(ip - anchor);
-        if unlikely(op + (1 + LASTLITERALS) + (len>>8) > oend) return 0; 		// Check output limit
-        if (len>=(int)ML_MASK) { *token+=ML_MASK; len-=ML_MASK; for(; len > 509 ; len-=510) { *op++ = 255; *op++ = 255; } if (len > 254) { len-=255; *op++ = 255; } *op++ = (BYTE)len; }
-        else *token += (BYTE)len;
-
-        // Test end of chunk
-        if (ip > mflimit) { anchor = ip;  break; }
-
-        // Fill table
-        HashTable[LZ4_HASH64K_VALUE(ip-2)] = (U16)(ip - 2 - base);
-
-        // Test next position
-        ref = base + HashTable[LZ4_HASH64K_VALUE(ip)];
-        HashTable[LZ4_HASH64K_VALUE(ip)] = (U16)(ip - base);
-        if (A32(ref) == A32(ip)) { token = op++; *token=0; goto _next_match; }
-
-        // Prepare next loop
-        anchor = ip++;
-        forwardH = LZ4_HASH64K_VALUE(ip);
-    }
-
-_last_literals:
-    // Encode Last Literals
-    {
-        int lastRun = (int)(iend - anchor);
-        if (op + lastRun + 1 + (lastRun-RUN_MASK+255)/255 > oend) return 0;
-        if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun > 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; }
-        else *op++ = (BYTE)(lastRun<<ML_BITS);
-        memcpy(op, anchor, iend - anchor);
-        op += iend-anchor;
-    }
-
-    // End
-    return (int) (((char*)op)-dest);
-}
-
-
-int LZ4_compress_limitedOutput(const char* source, 
-                               char* dest, 
-                               int isize, 
-                               int maxOutputSize)
-{
-#if HEAPMODE
-    void* ctx = malloc(sizeof(struct refTables));
+    void* ctx = LZ4_create();
    int result;
    if (ctx == NULL) return 0;    // Failed allocation => compression not done
-    if (isize < LZ4_64KLIMIT)
-        result = LZ4_compress64kCtx(&ctx, source, dest, isize, maxOutputSize);
-    else result = LZ4_compressCtx(&ctx, source, dest, isize, maxOutputSize);
-    free(ctx);
+    if (inputSize < LZ4_64KLIMIT)
+        result = LZ4_compress64k_heap(ctx, source, dest, inputSize);
+    else result = LZ4_compress_heap(ctx, source, dest, inputSize);
+    LZ4_free(ctx);
    return result;
 #else
-    if (isize < (int)LZ4_64KLIMIT) return LZ4_compress64kCtx(NULL, source, dest, isize, maxOutputSize);
-    return LZ4_compressCtx(NULL, source, dest, isize, maxOutputSize);
+    if (inputSize < (int)LZ4_64KLIMIT) return LZ4_compress64k_stack(source, dest, inputSize);
+    return LZ4_compress_stack(source, dest, inputSize);
 #endif
 }


-int LZ4_compress(const char* source,
-                 char* dest,
-                 int isize)
+int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize)
 {
-    return LZ4_compress_limitedOutput(source, dest, isize, LZ4_compressBound(isize));
+#if HEAPMODE
+    void* ctx = LZ4_create();
+    int result;
+    if (ctx == NULL) return 0;    // Failed allocation => compression not done
+    if (inputSize < LZ4_64KLIMIT)
+        result = LZ4_compress64k_heap_limitedOutput(ctx, source, dest, inputSize, maxOutputSize);
+    else result = LZ4_compress_heap_limitedOutput(ctx, source, dest, inputSize, maxOutputSize);
+    LZ4_free(ctx);
+    return result;
+#else
+    if (inputSize < (int)LZ4_64KLIMIT) return LZ4_compress64k_stack_limitedOutput(source, dest, inputSize, maxOutputSize);
+    return LZ4_compress_stack_limitedOutput(source, dest, inputSize, maxOutputSize);
+#endif
 }


-
-
 //****************************
 // Decompression functions
 //****************************

-// Note : The decoding functions LZ4_uncompress() and LZ4_uncompress_unknownOutputSize()
-//		are safe against "buffer overflow" attack type.
-//		They will never write nor read outside of the provided output buffers.
-//      LZ4_uncompress_unknownOutputSize() also insures that it will never read outside of the input buffer.
-//      LZ4_uncompress() guarantees that it will never read before source, nor beyond source + LZ4_compressBound(osize)
-//		A corrupted input will produce an error result, a negative int, indicating the position of the error within input stream.
+/*
+int LZ4_decompress_safe(const char* source,
+                        char* dest,
+                        int inputSize,
+                        int maxOutputSize);

-int LZ4_uncompress(const char* source,
-                 char* dest,
-                 int osize)
-{
-    // Local Variables
-    const BYTE* restrict ip = (const BYTE*) source;
-    const BYTE* ref;
-
-    BYTE* op = (BYTE*) dest;
-    BYTE* const oend = op + osize;
-    BYTE* cpy;
-
-    unsigned token;
-
-    size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
-#if LZ4_ARCH64
-    size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3};
-#endif
+LZ4_decompress_safe() guarantees it will never write nor read outside of the provided output buffers.
+This function is safe against "buffer overflow" attacks.
+A corrupted input will produce an error result, a negative int.
+*/
+#define FUNCTION_NAME LZ4_decompress_safe
+#define EXITCONDITION_INPUTSIZE
+#include "lz4_decoder.h"


-    // Main Loop
-    while (1)
-    {
-        size_t length;
+/*
+int LZ4_decompress_safe_withPrefix64k(
+                        const char* source,
+                        char* dest,
+                        int inputSize,
+                        int maxOutputSize);

-        // get runlength
-        token = *ip++;
-        if ((length=(token>>ML_BITS)) == RUN_MASK)  { size_t len; for (;(len=*ip++)==255;length+=255){} length += len; }
-
-        // copy literals
-        cpy = op+length;
-        if (cpy>oend-COPYLENGTH)
-        {
-            if (cpy != oend) goto _output_error;         // Error : not enough place for another match (min 4) + 5 literals
-            memcpy(op, ip, length);
-            ip += length;
-            break;                                       // EOF
-        }
-        LZ4_WILDCOPY(ip, op, cpy); ip -= (op-cpy); op = cpy;
-
-        // get offset
-        LZ4_READ_LITTLEENDIAN_16(ref,cpy,ip); ip+=2;
-        if unlikely(ref < (BYTE* const)dest) goto _output_error;   // Error : offset outside destination buffer
-
-        // get matchlength
-        if ((length=(token&ML_MASK)) == ML_MASK) { for (;*ip==255;length+=255) {ip++;} length += *ip++; }
-
-        // copy repeated sequence
-        if unlikely((op-ref)<STEPSIZE)
-        {
-#if LZ4_ARCH64
-            size_t dec64 = dec64table[op-ref];
-#else
-            const int dec64 = 0;
-#endif
-            op[0] = ref[0];
-            op[1] = ref[1];
-            op[2] = ref[2];
-            op[3] = ref[3];
-            op += 4, ref += 4; ref -= dec32table[op-ref];
-            A32(op) = A32(ref); 
-            op += STEPSIZE-4; ref -= dec64;
-        } else { LZ4_COPYSTEP(ref,op); }
-        cpy = op + length - (STEPSIZE-4);
-
-        if unlikely(cpy>oend-(COPYLENGTH)-(STEPSIZE-4))
-        {
-            if (cpy > oend-LASTLITERALS) goto _output_error;    // Error : last 5 bytes must be literals
-            LZ4_SECURECOPY(ref, op, (oend-COPYLENGTH));
-            while(op<cpy) *op++=*ref++;
-            op=cpy;
-            continue;
-        }
-        
-        LZ4_WILDCOPY(ref, op, cpy);
-        op=cpy;		// correction
-    }
-
-    // end of decoding
-    return (int) (((char*)ip)-source);
-
-    // write overflow error detected
-_output_error:
-    return (int) (-(((char*)ip)-source));
-}
+Same as LZ4_decompress_safe(), but will also use 64K of memory before the beginning of input buffer.
+Typically used to decode streams of inter-dependant blocks.
+Note : the 64K of memory before pointer 'source' must be allocated and read-allowed.
+*/
+#define FUNCTION_NAME LZ4_decompress_safe_withPrefix64k
+#define EXITCONDITION_INPUTSIZE
+#define PREFIX_64K
+#include "lz4_decoder.h"


-int LZ4_uncompress_unknownOutputSize(
-                const char* source,
-                char* dest,
-                int isize,
-                int maxOutputSize)
-{
-    // Local Variables
-    const BYTE* restrict ip = (const BYTE*) source;
-    const BYTE* const iend = ip + isize;
-    const BYTE* ref;
+/*
+int LZ4_decompress_safe_partial(
+                        const char* source,
+                        char* dest,
+                        int inputSize,
+                        int targetOutputSize,
+                        int maxOutputSize);

-    BYTE* op = (BYTE*) dest;
-    BYTE* const oend = op + maxOutputSize;
-    BYTE* cpy;
-
-    size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
-#if LZ4_ARCH64
-    size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3};
-#endif
+LZ4_decompress_safe_partial() objective is to decompress only a part of the compressed input block provided.
+The decoding process stops as soon as 'targetOutputSize' bytes have been decoded, reducing decoding time.
+The result of the function is the number of bytes decoded.
+LZ4_decompress_safe_partial() may decode less than 'targetOutputSize' if input doesn't contain enough bytes to decode.
+Always verify how many bytes were decoded to ensure there are as many as wanted into the output buffer 'dest'.
+A corrupted input will produce an error result, a negative int.
+*/
+#define FUNCTION_NAME LZ4_decompress_safe_partial
+#define EXITCONDITION_INPUTSIZE
+#define PARTIAL_DECODING
+#include "lz4_decoder.h"


-    // Special case
-    if unlikely(ip==iend) goto _output_error;    // A correctly formed null-compressed LZ4 must have at least one byte (token=0)
+/*
+int LZ4_decompress_fast(const char* source,
+                        char* dest,
+                        int outputSize);

-    // Main Loop
-    while (1) 
-    {
-        unsigned token;
-        size_t length;
+This function is faster than LZ4_decompress_safe().
+LZ4_decompress_fast() guarantees it will never write nor read outside of output buffer.
+Since LZ4_decompress_fast() doesn't know the size of input buffer.
+it can only guarantee that it will never write into the input buffer, and will never read before its beginning.
+To be used preferably in a controlled environment (when the compressed data to be decoded is from a trusted source).
+A detected corrupted input will produce an error result, a negative int.
+*/
+#define FUNCTION_NAME LZ4_decompress_fast
+#include "lz4_decoder.h"

-        // get runlength
-        token = *ip++;
-        if ((length=(token>>ML_BITS)) == RUN_MASK) 
-        { 
-            int s=255; 
-            while (likely(ip<iend) && (s==255)) { s=*ip++; length += s; } 
-        }

-        // copy literals
-        cpy = op+length;
-        if ((cpy>oend-MFLIMIT) || (ip+length>iend-(2+1+LASTLITERALS)))
-        {
-            if (cpy > oend) goto _output_error;          // Error : writes beyond output buffer
-            if (ip+length != iend) goto _output_error;   // Error : LZ4 format requires to consume all input at this stage (no match within the last 11 bytes, and at least 8 remaining input bytes for another match+literals)
-            memcpy(op, ip, length);
-            op += length;
-            break;                                       // Necessarily EOF, due to parsing restrictions
-        }
-        LZ4_WILDCOPY(ip, op, cpy); ip -= (op-cpy); op = cpy;
+/*
+int LZ4_decompress_fast_withPrefix64k(
+                        const char* source,
+                        char* dest,
+                        int inputSize
+                        int maxOutputSize);

-        // get offset
-        LZ4_READ_LITTLEENDIAN_16(ref,cpy,ip); ip+=2;
-        if unlikely(ref < (BYTE* const)dest) goto _output_error;   // Error : offset outside of destination buffer
+Same as LZ4_decompress_fast(), but will use the 64K of memory before the beginning of input buffer.
+Typically used to decode streams of dependant inter-blocks.
+Note : the 64K of memory before pointer 'source' must be allocated and read-allowed.
+*/
+#define FUNCTION_NAME LZ4_decompress_fast_withPrefix64k
+#define PREFIX_64K
+#include "lz4_decoder.h"

-        // get matchlength
-        if ((length=(token&ML_MASK)) == ML_MASK)    
-        { 
-            while likely(ip<iend-(LASTLITERALS+1))    // Error : a minimum input bytes must remain for LASTLITERALS + token
-            { 
-                int s = *ip++; 
-                length +=s; 
-                if (s==255) continue; 
-                break; 
-            } 
-        }
-
-        // copy repeated sequence
-        if unlikely(op-ref<STEPSIZE)
-        {
-#if LZ4_ARCH64
-            size_t dec64 = dec64table[op-ref];
-#else
-            const int dec64 = 0;
-#endif
-            op[0] = ref[0];
-            op[1] = ref[1];
-            op[2] = ref[2];
-            op[3] = ref[3];
-            op += 4, ref += 4; ref -= dec32table[op-ref];
-            A32(op) = A32(ref); 
-            op += STEPSIZE-4; ref -= dec64;
-        } else { LZ4_COPYSTEP(ref,op); }
-        cpy = op + length - (STEPSIZE-4);
-
-        if unlikely(cpy>oend-(COPYLENGTH+(STEPSIZE-4)))
-        {
-            if (cpy > oend-LASTLITERALS) goto _output_error;    // Error : last 5 bytes must be literals
-            LZ4_SECURECOPY(ref, op, (oend-COPYLENGTH));
-            while(op<cpy) *op++=*ref++;
-            op=cpy;
-            continue;
-        }
-
-        LZ4_WILDCOPY(ref, op, cpy);
-        op=cpy;		// correction
-    }
-
-    // end of decoding
-    return (int) (((char*)op)-dest);
-
-    // write overflow error detected
-_output_error:
-    return (int) (-(((char*)ip)-source));
-}

--- a/lz4.h
+++ b/lz4.h
@ -50,8 +50,8 @@ extern "C" {
 // Simple Functions
 //****************************

-int LZ4_compress   (const char* source, char* dest, int inputSize);
-int LZ4_uncompress (const char* source, char* dest, int outputSize);
+int LZ4_compress        (const char* source, char* dest, int inputSize);
+int LZ4_decompress_safe (const char* source, char* dest, int inputSize, int maxOutputSize);

 /*
 LZ4_compress() :
@ -63,13 +63,11 @@ LZ4_compress() :
    return : the number of bytes written in buffer dest
             or 0 if the compression fails

-LZ4_uncompress() :
-    outputSize : is the original (uncompressed) size
-    return : the number of bytes read in the source buffer (in other words, the compressed size)
-             If the source stream is malformed, the function will stop decoding and return a negative result, indicating the byte position of the faulty instruction.
-    note : This function never writes outside of provided buffers, and never modifies input buffer.
-           Destination buffer must be already allocated.
-           Its size must be a minimum of 'output_size' bytes.
+LZ4_decompress_safe() :
+    maxOutputSize : is the size of the destination buffer (which must be already allocated)
+    return : the number of bytes decoded in the destination buffer (necessarily <= maxOutputSize)
+             If the source stream is malformed, the function will stop decoding and return a negative result.
+             This function never writes outside of output buffer, and never reads outside of input buffer. It is therefore protected against malicious data packets
 */


@ -85,7 +83,7 @@ LZ4_compressBound() :
    Provides the maximum size that LZ4 may output in a "worst case" scenario (input data not compressible)
    primarily useful for memory allocation of output buffer.
 	inline function is recommended for the general case,
-	but macro is also provided when results need to be evaluated at compile time (such as table size allocation).
+	macro is also provided when result needs to be evaluated at compile time (such as table size allocation).

    isize  : is the input size. Max supported value is ~1.9GB
    return : maximum output size in a "worst case" scenario
@ -93,34 +91,63 @@ LZ4_compressBound() :
 */


-int LZ4_compress_limitedOutput   (const char* source, char* dest, int isize, int maxOutputSize);
+int LZ4_compress_limitedOutput (const char* source, char* dest, int inputSize, int maxOutputSize);

 /*
 LZ4_compress_limitedOutput() :
-    Compress 'isize' bytes from 'source' into an output buffer 'dest' of maximum size 'maxOutputSize'.
+    Compress 'inputSize' bytes from 'source' into an output buffer 'dest' of maximum size 'maxOutputSize'.
    If it cannot achieve it, compression will stop, and result of the function will be zero.
    This function never writes outside of provided output buffer.

-    isize  : is the input size. Max supported value is ~1.9GB
+    inputSize  : Max supported value is ~1.9GB
    maxOutputSize : is the size of the destination buffer (which must be already allocated)
    return : the number of bytes written in buffer 'dest'
             or 0 if the compression fails
 */


-int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize);
+int LZ4_decompress_fast (const char* source, char* dest, int outputSize);

 /*
-LZ4_uncompress_unknownOutputSize() :
-    isize  : is the input size, therefore the compressed size
-    maxOutputSize : is the size of the destination buffer (which must be already allocated)
-    return : the number of bytes decoded in the destination buffer (necessarily <= maxOutputSize)
-             If the source stream is malformed, the function will stop decoding and return a negative result, indicating the byte position of the faulty instruction
-             This function never writes beyond dest + maxOutputSize, and is therefore protected against malicious data packets
-    note   : Destination buffer must be already allocated.
-             This version is slightly slower than LZ4_uncompress()
+LZ4_decompress_fast() :
+    outputSize : is the original (uncompressed) size
+    return : the number of bytes read from the source buffer (in other words, the compressed size)
+             If the source stream is malformed, the function will stop decoding and return a negative result.
+    note : This function is a bit faster than LZ4_decompress_safe()
+           This function never writes outside of output buffers, and never read before input buffer, but may read beyond input buffer (since it doesn't know its size) in case of malicious data packet.
+           Use this function preferably into a trusted environment (data to decode comes from a trusted source).
+           Destination buffer must be already allocated. Its size must be a minimum of 'outputSize' bytes.
 */

+int LZ4_decompress_safe_partial (const char* source, char* dest, int inputSize, int targetOutputSize, int maxOutputSize);
+
+/*
+LZ4_decompress_safe_partial() :
+    This function decompress a compressed block of size 'inputSize' at position 'source'
+    into output buffer 'dest' of size 'maxOutputSize'.
+    The function stops decompressing operation as soon as 'targetOutputSize' has been reached,
+    reducing decompression time.
+    return : the number of bytes decoded in the destination buffer (necessarily <= maxOutputSize)
+       Note : this number might be < 'targetOutputSize' if the number of bytes to decode into the compressed block is not enough.
+             Always control how many bytes were  decoded.
+             If the source stream is malformed, the function will stop decoding and return a negative result.
+             This function never writes outside of output buffer, and never reads outside of input buffer. It is therefore protected against malicious data packets
+*/
+
+
+//****************************
+// Obsolete Functions
+//****************************
+
+inline int LZ4_uncompress (const char* source, char* dest, int outputSize)   { return LZ4_decompress_fast(source, dest, outputSize); }
+inline int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize)   { return LZ4_decompress_safe(source, dest, isize, maxOutputSize); }
+
+/* 
+These functions are deprecated and should no longer be used.
+They are provided here for compatibility with existing user programs.
+*/
+
+

 #if defined (__cplusplus)
 }
--- a/lz4_decoder.h
+++ b/lz4_decoder.h
@ -0,0 +1,233 @@
+/*
+   LZ4 Decoder - Part of LZ4 compression algorithm
+   Copyright (C) 2011-2013, Yann Collet.
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
+   - LZ4 source repository : http://code.google.com/p/lz4/
+*/
+
+/* lz4_decoder.h must be included into lz4.c
+   The objective of this file is to create a single LZ4 decoder function source
+   which will be instanciated multiple times with minor variations
+   depending on a set of #define.
+*/
+
+
+
+//****************************
+// Check required defines
+//****************************
+
+#ifndef FUNCTION_NAME
+#  error "FUNTION_NAME is not defined"
+#endif
+
+
+//****************************
+// Control tests
+//****************************
+
+#ifdef EXITCONDITION_INPUTSIZE
+#  define INPUTBUFFER_CONTROL(ip,iend) likely(ip<iend)
+#else
+#  define INPUTBUFFER_CONTROL(ip,iend) (1)
+#endif
+
+#ifdef PARTIAL_DECODING
+#  define OUTPUTTARGET(cpy,oexit) (cpy >= oexit)
+#else
+#  define OUTPUTTARGET(cpy,oexit) (0)
+#endif
+
+
+
+
+//****************************
+// Function code
+//****************************
+
+int FUNCTION_NAME(const char* source,
+                 char* dest,
+#ifdef EXITCONDITION_INPUTSIZE
+                 int inputSize,
+#endif
+#ifdef PARTIAL_DECODING
+                 int targetOutputSize,
+#endif
+                 int outputSize
+                 )
+{
+    // Local Variables
+    const BYTE* restrict ip = (const BYTE*) source;
+    const BYTE* ref;
+#ifdef EXITCONDITION_INPUTSIZE
+    const BYTE* const iend = ip + inputSize;
+#endif
+
+    BYTE* op = (BYTE*) dest;
+    BYTE* const oend = op + outputSize;
+    BYTE* cpy;
+#ifdef PARTIAL_DECODING
+    BYTE* const oexit = op + targetOutputSize;
+#endif
+
+    size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
+#if LZ4_ARCH64
+    size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3};
+#endif
+
+
+#ifdef EXITCONDITION_INPUTSIZE
+    // Special case
+    if unlikely(!inputSize) goto _output_error;     // A correctly formed null-compressed LZ4 must have at least one byte (token=0)
+#endif
+
+    // Main Loop
+    while (1)
+    {
+        unsigned token;
+        size_t length;
+
+        // get runlength
+        token = *ip++;
+        if ((length=(token>>ML_BITS)) == RUN_MASK)  
+        { 
+            unsigned s=255; 
+            while (INPUTBUFFER_CONTROL(ip,iend) && (s==255)) 
+            { 
+                s=*ip++; 
+                length += s; 
+            } 
+        }
+
+        // copy literals
+        cpy = op+length;
+#ifdef EXITCONDITION_INPUTSIZE
+        if ((cpy>oend-MFLIMIT) || (ip+length>iend-(2+1+LASTLITERALS)) || OUTPUTTARGET(cpy,oexit))
+        {
+            if (cpy > oend) goto _output_error;          // Error : write attempt beyond end of output buffer
+            if ((!OUTPUTTARGET(cpy,oexit)) && (ip+length != iend)) goto _output_error;   // Error : Must consume all input at this stage, except if reaching TargetOutputSize
+#else
+        if (cpy>oend-COPYLENGTH)
+        {
+            if (cpy != oend) goto _output_error;         // Error : not enough place for another match (min 4) + 5 literals
+#endif
+            memcpy(op, ip, length);
+            ip += length;
+            op += length;
+            break;                                       // Necessarily EOF, due to parsing restrictions
+        }
+        LZ4_WILDCOPY(ip, op, cpy); ip -= (op-cpy); op = cpy;
+
+        // get offset
+        LZ4_READ_LITTLEENDIAN_16(ref,cpy,ip); ip+=2;
+#ifndef PREFIX_64K
+        if unlikely(ref < (BYTE* const)dest) goto _output_error;   // Error : offset outside destination buffer
+#endif
+
+        // get matchlength
+        if ((length=(token&ML_MASK)) == ML_MASK) 
+        { 
+            while INPUTBUFFER_CONTROL(ip,iend-(LASTLITERALS+1))    // A minimum nb of input bytes must remain for LASTLITERALS + token
+            { 
+                unsigned s = *ip++; 
+                length += s; 
+                if (s==255) continue; 
+                break; 
+            } 
+        }
+
+        // copy repeated sequence
+        if unlikely((op-ref)<STEPSIZE)
+        {
+#if LZ4_ARCH64
+            size_t dec64 = dec64table[op-ref];
+#else
+            const size_t dec64 = 0;
+#endif
+            op[0] = ref[0];
+            op[1] = ref[1];
+            op[2] = ref[2];
+            op[3] = ref[3];
+            op += 4, ref += 4; ref -= dec32table[op-ref];
+            A32(op) = A32(ref); 
+            op += STEPSIZE-4; ref -= dec64;
+        } else { LZ4_COPYSTEP(ref,op); }
+        cpy = op + length - (STEPSIZE-4);
+
+        if unlikely(cpy>oend-(COPYLENGTH)-(STEPSIZE-4))
+        {
+            if (cpy > oend-LASTLITERALS) goto _output_error;    // Error : last 5 bytes must be literals
+            LZ4_SECURECOPY(ref, op, (oend-COPYLENGTH));
+            while(op<cpy) *op++=*ref++;
+            op=cpy;
+            continue;
+        }
+        
+        LZ4_WILDCOPY(ref, op, cpy);
+        op=cpy;		// correction
+    }
+
+    // end of decoding
+#ifdef EXITCONDITION_INPUTSIZE
+    return (int) (((char*)op)-dest);     // Nb of output bytes decoded
+#else
+    return (int) (((char*)ip)-source);   // Nb of input bytes read
+#endif
+
+    // Overflow error detected
+_output_error:
+    return (int) (-(((char*)ip)-source))-1;
+}
+
+
+
+//****************************
+// Clean defines
+//****************************
+
+// Required defines
+#undef FUNCTION_NAME
+
+// Locally Generated
+#undef INPUTBUFFER_CONTROL
+#undef OUTPUTTARGET
+
+// Optional defines
+#ifdef EXITCONDITION_INPUTSIZE
+#undef EXITCONDITION_INPUTSIZE
+#endif
+
+#ifdef PREFIX_64K
+#undef PREFIX_64K
+#endif
+
+#ifdef PARTIAL_DECODING
+#undef PARTIAL_DECODING
+#endif
+
--- a/lz4_encoder.h
+++ b/lz4_encoder.h
@ -0,0 +1,278 @@
+/*
+   LZ4 Encoder - Part of LZ4 compression algorithm
+   Copyright (C) 2011-2013, Yann Collet.
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
+   - LZ4 source repository : http://code.google.com/p/lz4/
+*/
+
+/* lz4_encoder.h must be included into lz4.c
+   The objective of this file is to create a single LZ4 compression function source
+   which will be instanciated multiple times with minor variations
+   depending on a set of #define.
+*/
+
+
+
+//****************************
+// Check required defines
+//****************************
+
+#ifndef FUNCTION_NAME
+#  error "FUNTION_NAME is not defined"
+#endif
+
+
+//****************************
+// Local definitions
+//****************************
+
+#ifdef COMPRESS_64K
+#  define HASHLOG (MEMORY_USAGE-1)
+#  define CURRENT_H_TYPE U16
+#  define CURRENTBASE(base) const BYTE* const base = ip
+#else
+#  define HASHLOG (MEMORY_USAGE-2)
+#  define CURRENT_H_TYPE HTYPE
+#  define CURRENTBASE(base) INITBASE(base)
+#endif
+
+#define HASHTABLE_NBCELLS  (1U<<HASHLOG)
+#define LZ4_HASH(i)        (((i) * 2654435761U) >> ((MINMATCH*8)-HASHLOG))
+#define LZ4_HASHVALUE(p)   LZ4_HASH(A32(p))
+
+
+
+//****************************
+// Function code
+//****************************
+
+int FUNCTION_NAME(
+#ifdef USE_HEAPMEMORY
+                 void* ctx,
+#endif
+                 const char* source,
+                 char* dest,
+                 int inputSize
+#ifdef LIMITED_OUTPUT
+                ,int maxOutputSize
+#endif
+                 )
+{
+#ifdef USE_HEAPMEMORY
+    CURRENT_H_TYPE* HashTable = (CURRENT_H_TYPE*)ctx;
+#else
+    CURRENT_H_TYPE HashTable[HASHTABLE_NBCELLS] = {0};
+#endif
+
+    const BYTE* ip = (BYTE*) source;
+    CURRENTBASE(base);
+    const BYTE* anchor = ip;
+    const BYTE* const iend = ip + inputSize;
+    const BYTE* const mflimit = iend - MFLIMIT;
+#define matchlimit (iend - LASTLITERALS)
+
+    BYTE* op = (BYTE*) dest;
+#ifdef LIMITED_OUTPUT
+    BYTE* const oend = op + maxOutputSize;
+#endif
+
+    int length;
+    const int skipStrength = SKIPSTRENGTH;
+    U32 forwardH;
+
+
+    // Init
+    if (inputSize<MINLENGTH) goto _last_literals;
+#ifdef COMPRESS_64K
+    if (inputSize>LZ4_64KLIMIT) return 0;   // Size too large (not within 64K limit)
+#endif
+#ifdef USE_HEAPMEMORY
+    memset((void*)HashTable, 0, HASHTABLESIZE);
+#endif
+
+    // First Byte
+    HashTable[LZ4_HASHVALUE(ip)] = (CURRENT_H_TYPE)(ip - base);
+    ip++; forwardH = LZ4_HASHVALUE(ip);
+
+    // Main Loop
+    for ( ; ; )
+    {
+        int findMatchAttempts = (1U << skipStrength) + 3;
+        const BYTE* forwardIp = ip;
+        const BYTE* ref;
+        BYTE* token;
+
+        // Find a match
+        do {
+            U32 h = forwardH;
+            int step = findMatchAttempts++ >> skipStrength;
+            ip = forwardIp;
+            forwardIp = ip + step;
+
+            if unlikely(forwardIp > mflimit) { goto _last_literals; }
+
+            forwardH = LZ4_HASHVALUE(forwardIp);
+            ref = base + HashTable[h];
+            HashTable[h] = (CURRENT_H_TYPE)(ip - base);
+
+        } while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
+
+        // Catch up
+        while ((ip>anchor) && (ref>(BYTE*)source) && unlikely(ip[-1]==ref[-1])) { ip--; ref--; }
+
+        // Encode Literal length
+        length = (int)(ip - anchor);
+        token = op++;
+#ifdef LIMITED_OUTPUT
+        if unlikely(op + length + (2 + 1 + LASTLITERALS) + (length>>8) > oend) return 0; 		// Check output limit
+#endif
+#ifdef _MSC_VER
+        if (length>=(int)RUN_MASK) 
+        { 
+            int len = length-RUN_MASK; 
+            *token=(RUN_MASK<<ML_BITS); 
+            if (len>254)
+            {
+                do { *op++ = 255; len -= 255; } while (len>254);
+                *op++ = (BYTE)len; 
+                memcpy(op, anchor, length);
+                op += length;
+                goto _next_match;
+            }
+            else
+            *op++ = (BYTE)len; 
+        }
+        else *token = (BYTE)(length<<ML_BITS);
+#else
+        if (length>=(int)RUN_MASK) 
+        { 
+            int len;
+            *token=(RUN_MASK<<ML_BITS); 
+            len = length-RUN_MASK; 
+            for(; len > 254 ; len-=255) *op++ = 255; 
+            *op++ = (BYTE)len; 
+        }
+        else *token = (length<<ML_BITS);
+#endif
+
+        // Copy Literals
+        LZ4_BLINDCOPY(anchor, op, length);
+
+_next_match:
+        // Encode Offset
+        LZ4_WRITE_LITTLEENDIAN_16(op,(U16)(ip-ref));
+
+        // Start Counting
+        ip+=MINMATCH; ref+=MINMATCH;    // MinMatch already verified
+        anchor = ip;
+        while likely(ip<matchlimit-(STEPSIZE-1))
+        {
+            UARCH diff = AARCH(ref) ^ AARCH(ip);
+            if (!diff) { ip+=STEPSIZE; ref+=STEPSIZE; continue; }
+            ip += LZ4_NbCommonBytes(diff);
+            goto _endCount;
+        }
+        if (LZ4_ARCH64) if ((ip<(matchlimit-3)) && (A32(ref) == A32(ip))) { ip+=4; ref+=4; }
+        if ((ip<(matchlimit-1)) && (A16(ref) == A16(ip))) { ip+=2; ref+=2; }
+        if ((ip<matchlimit) && (*ref == *ip)) ip++;
+_endCount:
+
+        // Encode MatchLength
+        length = (int)(ip - anchor);
+#ifdef LIMITED_OUTPUT
+        if unlikely(op + (1 + LASTLITERALS) + (length>>8) > oend) return 0; 		// Check output limit
+#endif
+        if (length>=(int)ML_MASK) 
+        { 
+            *token += ML_MASK; 
+            length -= ML_MASK; 
+            for (; length > 509 ; length-=510) { *op++ = 255; *op++ = 255; } 
+            if (length > 254) { length-=255; *op++ = 255; } 
+            *op++ = (BYTE)length; 
+        }
+        else *token += (BYTE)length;
+
+        // Test end of chunk
+        if (ip > mflimit) { anchor = ip;  break; }
+
+        // Fill table
+        HashTable[LZ4_HASHVALUE(ip-2)] = (CURRENT_H_TYPE)(ip - 2 - base);
+
+        // Test next position
+        ref = base + HashTable[LZ4_HASHVALUE(ip)];
+        HashTable[LZ4_HASHVALUE(ip)] = (CURRENT_H_TYPE)(ip - base);
+        if ((ref >= ip - MAX_DISTANCE) && (A32(ref) == A32(ip))) { token = op++; *token=0; goto _next_match; }
+
+        // Prepare next loop
+        anchor = ip++;
+        forwardH = LZ4_HASHVALUE(ip);
+    }
+
+_last_literals:
+    // Encode Last Literals
+    {
+        int lastRun = (int)(iend - anchor);
+#ifdef LIMITED_OUTPUT
+        if (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize) return 0;  // Check output limit
+#endif
+        if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun > 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; }
+        else *op++ = (BYTE)(lastRun<<ML_BITS);
+        memcpy(op, anchor, iend - anchor);
+        op += iend-anchor;
+    }
+
+    // End
+    return (int) (((char*)op)-dest);
+}
+
+
+
+//****************************
+// Clean defines
+//****************************
+
+// Required defines
+#undef FUNCTION_NAME
+
+// Locally Generated
+#undef HASHLOG
+#undef HASHTABLE_NBCELLS
+#undef LZ4_HASH
+#undef LZ4_HASHVALUE
+#undef CURRENT_H_TYPE
+#undef CURRENTBASE
+
+// Optional defines
+#ifdef LIMITED_OUTPUT
+#undef LIMITED_OUTPUT
+#endif
+
+#ifdef USE_HEAPMEMORY
+#undef USE_HEAPMEMORY
+#endif
--- a/lz4c.c
+++ b/lz4c.c
@ -169,8 +169,6 @@ int usage(char* exename)
    DISPLAY( " -d     : decompression \n");
    DISPLAY( " -y     : overwrite without prompting \n");
    DISPLAY( " -H     : Help (this text + advanced options)\n");
-    DISPLAY( "input   : can be 'stdin' (pipe) or a filename\n");
-    DISPLAY( "output  : can be 'stdout'(pipe) or a filename or 'null'\n");
    return 0;
 }

@ -183,6 +181,9 @@ int usage_advanced()
    DISPLAY( " -nx    : disable stream checksum (default:enabled)\n");
    DISPLAY( " -b#    : benchmark files, using # [0-1] compression level\n");
    DISPLAY( " -i#    : iteration loops [1-9](default : 3), benchmark mode only\n");
+    DISPLAY( "input   : can be 'stdin' (pipe) or a filename\n");
+    DISPLAY( "output  : can be 'stdout'(pipe) or a filename or 'null'\n");
+    DISPLAY( "          example : lz4c -hc stdin compressedfile.lz4");
    return 0;
 }

@ -492,7 +493,7 @@ unsigned long long decodeLegacyStream(FILE* finput, FILE* foutput)
        uselessRet = fread(in_buff, 1, blockSize, finput);

        // Decode Block
-        sinkint = LZ4_uncompress_unknownOutputSize(in_buff, out_buff, blockSize, LEGACY_BLOCKSIZE);
+        sinkint = LZ4_decompress_safe(in_buff, out_buff, blockSize, LEGACY_BLOCKSIZE);
        if (sinkint < 0) EXM_THROW(52, "Decoding Failed ! Corrupted input detected !");
        filesize += sinkint;

@ -603,7 +604,7 @@ unsigned long long decodeLZ4S(FILE* finput, FILE* foutput)
        else
        {
            // Decode Block
-            decodedBytes = LZ4_uncompress_unknownOutputSize(in_buff, out_buff, blockSize, maxBlockSize);
+            decodedBytes = LZ4_decompress_safe(in_buff, out_buff, blockSize, maxBlockSize);
            if (decodedBytes < 0) EXM_THROW(77, "Decoding Failed ! Corrupted input detected !");
            filesize += decodedBytes;
            if (streamChecksumFlag) XXH32_update(streamChecksumState, out_buff, decodedBytes);
--- a/lz4hc.c
+++ b/lz4hc.c
@ -31,6 +31,10 @@
   - LZ4 source repository : http://code.google.com/p/lz4/
 */

+/*
+Note : this source file requires "lz4hc_encoder.h"
+*/
+

 //**************************************
 // CPU Feature Detection
@ -319,7 +323,7 @@ inline static int LZ4HC_Init (LZ4HC_Data_Structure* hc4, const BYTE* base)
 }


-inline static void* LZ4HC_Create (const BYTE* base)
+inline static void* LZ4HC_create (const BYTE* base)
 {
    void* hc4 = ALLOCATOR(sizeof(LZ4HC_Data_Structure));

@ -328,7 +332,7 @@ inline static void* LZ4HC_Create (const BYTE* base)
 }


-inline static int LZ4HC_Free (void** LZ4HC_Data)
+inline static int LZ4HC_free (void** LZ4HC_Data)
 {
    FREEMEM(*LZ4HC_Data);
    *LZ4HC_Data = NULL;
@ -500,247 +504,37 @@ _endCount:
 }


-forceinline static int LZ4_encodeSequence(const BYTE** ip, BYTE** op, const BYTE** anchor, int matchLength, const BYTE* ref, BYTE* oend)
-{
-    int length, len; 
-    BYTE* token;

-    // Encode Literal length
-    length = (int)(*ip - *anchor);
-    token = (*op)++;
-    if ((*op + length + (2 + 1 + LASTLITERALS) + (length>>8)) > oend) return 1; 		// Check output limit
-    if (length>=(int)RUN_MASK) { *token=(RUN_MASK<<ML_BITS); len = length-RUN_MASK; for(; len > 254 ; len-=255) *(*op)++ = 255;  *(*op)++ = (BYTE)len; } 
-    else *token = (BYTE)(length<<ML_BITS);
+//**************************************
+// Compression functions
+//**************************************

-    // Copy Literals
-    LZ4_BLINDCOPY(*anchor, *op, length);
-
-    // Encode Offset
-    LZ4_WRITE_LITTLEENDIAN_16(*op,(U16)(*ip-ref));
-
-    // Encode MatchLength
-    len = (int)(matchLength-MINMATCH);
-    if (*op + (1 + LASTLITERALS) + (length>>8) > oend) return 1; 		// Check output limit
-    if (len>=(int)ML_MASK) { *token+=ML_MASK; len-=ML_MASK; for(; len > 509 ; len-=510) { *(*op)++ = 255; *(*op)++ = 255; } if (len > 254) { len-=255; *(*op)++ = 255; } *(*op)++ = (BYTE)len; } 
-    else *token += (BYTE)len;	
-
-    // Prepare next loop
-    *ip += matchLength;
-    *anchor = *ip; 
-
-    return 0;
-}
-
-
-//****************************
-// Compression CODE
-//****************************
-
-int LZ4_compressHCCtx(LZ4HC_Data_Structure* ctx,
-                 const char* source, 
-                 char* dest,
-                 int inputSize,
-                 int maxOutputSize)
-{	
-    const BYTE* ip = (const BYTE*) source;
-    const BYTE* anchor = ip;
-    const BYTE* const iend = ip + inputSize;
-    const BYTE* const mflimit = iend - MFLIMIT;
-    const BYTE* const matchlimit = (iend - LASTLITERALS);
-
-    BYTE* op = (BYTE*) dest;
-    BYTE* const oend = op + maxOutputSize;
-
-    int	ml, ml2, ml3, ml0;
-    const BYTE* ref=NULL;
-    const BYTE* start2=NULL;
-    const BYTE* ref2=NULL;
-    const BYTE* start3=NULL;
-    const BYTE* ref3=NULL;
-    const BYTE* start0;
-    const BYTE* ref0;
-
-    ip++;
-
-    // Main Loop
-    while (ip < mflimit)
-    {
-        ml = LZ4HC_InsertAndFindBestMatch (ctx, ip, matchlimit, (&ref));
-        if (!ml) { ip++; continue; }
-
-        // saved, in case we would skip too much
-        start0 = ip;
-        ref0 = ref;
-        ml0 = ml;
-
-_Search2:
-        if (ip+ml < mflimit)
-            ml2 = LZ4HC_InsertAndGetWiderMatch(ctx, ip + ml - 2, ip + 1, matchlimit, ml, &ref2, &start2);
-        else ml2 = ml;
-
-        if (ml2 == ml)  // No better match
-        {
-            if (LZ4_encodeSequence(&ip, &op, &anchor, ml, ref, oend)) return 0;
-            continue;
-        }
-
-        if (start0 < ip)
-        {
-            if (start2 < ip + ml0)   // empirical
-            {
-                ip = start0;
-                ref = ref0;
-                ml = ml0;
-            }
-        }
-
-        // Here, start0==ip
-        if ((start2 - ip) < 3)   // First Match too small : removed
-        {
-            ml = ml2;
-            ip = start2;
-            ref =ref2;
-            goto _Search2;
-        }
-
-_Search3:
-        // Currently we have :
-        // ml2 > ml1, and
-        // ip1+3 <= ip2 (usually < ip1+ml1)
-        if ((start2 - ip) < OPTIMAL_ML)
-        {
-            int correction;
-            int new_ml = ml;
-            if (new_ml > OPTIMAL_ML) new_ml = OPTIMAL_ML;
-            if (ip+new_ml > start2 + ml2 - MINMATCH) new_ml = (int)(start2 - ip) + ml2 - MINMATCH;
-            correction = new_ml - (int)(start2 - ip);
-            if (correction > 0)
-            {
-                start2 += correction;
-                ref2 += correction;
-                ml2 -= correction;
-            }
-        }
-        // Now, we have start2 = ip+new_ml, with new_ml = min(ml, OPTIMAL_ML=18)
-
-        if (start2 + ml2 < mflimit)
-            ml3 = LZ4HC_InsertAndGetWiderMatch(ctx, start2 + ml2 - 3, start2, matchlimit, ml2, &ref3, &start3);
-        else ml3 = ml2;
-
-        if (ml3 == ml2) // No better match : 2 sequences to encode
-        {
-            // ip & ref are known; Now for ml
-            if (start2 < ip+ml)  ml = (int)(start2 - ip);
-            // Now, encode 2 sequences
-            if (LZ4_encodeSequence(&ip, &op, &anchor, ml, ref, oend)) return 0;
-            ip = start2;
-            if (LZ4_encodeSequence(&ip, &op, &anchor, ml2, ref2, oend)) return 0;
-            continue;
-        }
-
-        if (start3 < ip+ml+3) // Not enough space for match 2 : remove it
-        {
-            if (start3 >= (ip+ml)) // can write Seq1 immediately ==> Seq2 is removed, so Seq3 becomes Seq1
-            {
-                if (start2 < ip+ml)
-                {
-                    int correction = (int)(ip+ml - start2);
-                    start2 += correction;
-                    ref2 += correction;
-                    ml2 -= correction;
-                    if (ml2 < MINMATCH)
-                    {
-                        start2 = start3;
-                        ref2 = ref3;
-                        ml2 = ml3;
-                    }
-                }
-
-                if (LZ4_encodeSequence(&ip, &op, &anchor, ml, ref, oend)) return 0;
-                ip  = start3;
-                ref = ref3;
-                ml  = ml3;
-
-                start0 = start2;
-                ref0 = ref2;
-                ml0 = ml2;
-                goto _Search2;
-            }
-
-            start2 = start3;
-            ref2 = ref3;
-            ml2 = ml3;
-            goto _Search3;
-        }
-
-        // OK, now we have 3 ascending matches; let's write at least the first one
-        // ip & ref are known; Now for ml
-        if (start2 < ip+ml)
-        {
-            if ((start2 - ip) < (int)ML_MASK)
-            {
-                int correction;
-                if (ml > OPTIMAL_ML) ml = OPTIMAL_ML;
-                if (ip + ml > start2 + ml2 - MINMATCH) ml = (int)(start2 - ip) + ml2 - MINMATCH;
-                correction = ml - (int)(start2 - ip);
-                if (correction > 0)
-                {
-                    start2 += correction;
-                    ref2 += correction;
-                    ml2 -= correction;
-                }
-            }
-            else
-            {
-                ml = (int)(start2 - ip);
-            }
-        }
-        if (LZ4_encodeSequence(&ip, &op, &anchor, ml, ref, oend)) return 0;
-
-        ip = start2;
-        ref = ref2;
-        ml = ml2;
-
-        start2 = start3;
-        ref2 = ref3;
-        ml2 = ml3;
-
-        goto _Search3;
-
-    }
-
-    // Encode Last Literals
-    {
-        int lastRun = (int)(iend - anchor);
-        if (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize) return 0;  // Check output limit
-        if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun > 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; } 
-        else *op++ = (BYTE)(lastRun<<ML_BITS);
-        memcpy(op, anchor, iend - anchor);
-        op += iend-anchor;
-    } 
-
-    // End
-    return (int) (((char*)op)-dest);
-}
-
-
-int LZ4_compressHC_limitedOutput(const char* source, 
-                 char* dest,
-                 int inputSize,
-                 int maxOutputSize)
-{
-    void* ctx = LZ4HC_Create((const BYTE*)source);
-    int result = LZ4_compressHCCtx(ctx, source, dest, inputSize, maxOutputSize);
-    LZ4HC_Free (&ctx);
-
-    return result;
-}
-
-
-int LZ4_compressHC(const char* source, 
+/*
+int LZ4_compressHC(
+                 const char* source,
                 char* dest,
                 int inputSize)
-{
-    return LZ4_compressHC_limitedOutput(source, dest, inputSize, LZ4_compressBound(inputSize)+1);
-}
+
+Compress 'inputSize' bytes from 'source' into an output buffer 'dest'.
+Destination buffer must be already allocated, and sized at a minimum of LZ4_compressBound(inputSize).
+return : the number of bytes written in buffer 'dest'
+*/
+#define FUNCTION_NAME LZ4_compressHC
+#include "lz4hc_encoder.h"
+
+
+/*
+int LZ4_compressHC_limitedOutput(
+                 const char* source,
+                 char* dest,
+                 int inputSize,
+                 int maxOutputSize)
+
+Compress 'inputSize' bytes from 'source' into an output buffer 'dest' of maximum size 'maxOutputSize'.
+If it cannot achieve it, compression will stop, and result of the function will be zero.
+return : the number of bytes written in buffer 'dest', or 0 if the compression fails
+*/
+#define FUNCTION_NAME LZ4_compressHC_limitedOutput
+#define LIMITED_OUTPUT
+#include "lz4hc_encoder.h"

--- a/lz4hc_encoder.h
+++ b/lz4hc_encoder.h
@ -0,0 +1,341 @@
+/*
+   LZ4 HC Encoder - Part of LZ4 HC algorithm
+   Copyright (C) 2011-2013, Yann Collet.
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
+   - LZ4 source repository : http://code.google.com/p/lz4/
+*/
+
+/* lz4hc_encoder.h must be included into lz4hc.c
+   The objective of this file is to create a single LZ4 compression function source
+   which will be instanciated multiple times with minor variations
+   depending on a set of #define.
+*/
+
+//****************************
+// Check required defines
+//****************************
+
+#ifndef FUNCTION_NAME
+#  error "FUNTION_NAME is not defined"
+#endif
+
+
+//****************************
+// Local definitions
+//****************************
+#define COMBINED_NAME_RAW(n1,n2) n1 ## n2
+#define COMBINED_NAME(n1,n2) COMBINED_NAME_RAW(n1,n2)
+#define ENCODE_SEQUENCE_NAME COMBINED_NAME(FUNCTION_NAME,_encodeSequence)
+#ifdef LIMITED_OUTPUT
+#  define ENCODE_SEQUENCE(i,o,a,m,r,d) if (ENCODE_SEQUENCE_NAME(i,o,a,m,r,d)) return 0;
+#else
+#  define ENCODE_SEQUENCE(i,o,a,m,r,d) ENCODE_SEQUENCE_NAME(i,o,a,m,r)
+#endif
+
+//****************************
+// Function code
+//****************************
+
+forceinline static int ENCODE_SEQUENCE_NAME (
+                       const BYTE** ip, 
+                       BYTE** op, 
+                       const BYTE** anchor, 
+                       int matchLength, 
+                       const BYTE* ref
+#ifdef LIMITED_OUTPUT
+                      ,BYTE* oend
+#endif
+                       )
+{
+    int length, len; 
+    BYTE* token;
+
+    // Encode Literal length
+    length = (int)(*ip - *anchor);
+    token = (*op)++;
+#ifdef LIMITED_OUTPUT
+    if ((*op + length + (2 + 1 + LASTLITERALS) + (length>>8)) > oend) return 1; 		// Check output limit
+#endif
+    if (length>=(int)RUN_MASK) { *token=(RUN_MASK<<ML_BITS); len = length-RUN_MASK; for(; len > 254 ; len-=255) *(*op)++ = 255;  *(*op)++ = (BYTE)len; } 
+    else *token = (BYTE)(length<<ML_BITS);
+
+    // Copy Literals
+    LZ4_BLINDCOPY(*anchor, *op, length);
+
+    // Encode Offset
+    LZ4_WRITE_LITTLEENDIAN_16(*op,(U16)(*ip-ref));
+
+    // Encode MatchLength
+    length = (int)(matchLength-MINMATCH);
+#ifdef LIMITED_OUTPUT
+    if (*op + (1 + LASTLITERALS) + (length>>8) > oend) return 1; 		// Check output limit
+#endif
+    if (length>=(int)ML_MASK) { *token+=ML_MASK; length-=ML_MASK; for(; length > 509 ; length-=510) { *(*op)++ = 255; *(*op)++ = 255; } if (length > 254) { length-=255; *(*op)++ = 255; } *(*op)++ = (BYTE)length; } 
+    else *token += (BYTE)length;	
+
+    // Prepare next loop
+    *ip += matchLength;
+    *anchor = *ip; 
+
+    return 0;
+}
+
+
+int COMBINED_NAME(FUNCTION_NAME,ctx) (
+                 LZ4HC_Data_Structure* ctx,
+                 const char* source, 
+                 char* dest,
+                 int inputSize
+#ifdef LIMITED_OUTPUT
+                ,int maxOutputSize
+#endif
+                )
+{	
+    const BYTE* ip = (const BYTE*) source;
+    const BYTE* anchor = ip;
+    const BYTE* const iend = ip + inputSize;
+    const BYTE* const mflimit = iend - MFLIMIT;
+    const BYTE* const matchlimit = (iend - LASTLITERALS);
+
+    BYTE* op = (BYTE*) dest;
+#ifdef LIMITED_OUTPUT
+    BYTE* const oend = op + maxOutputSize;
+#endif
+
+    int	ml, ml2, ml3, ml0;
+    const BYTE* ref=NULL;
+    const BYTE* start2=NULL;
+    const BYTE* ref2=NULL;
+    const BYTE* start3=NULL;
+    const BYTE* ref3=NULL;
+    const BYTE* start0;
+    const BYTE* ref0;
+
+    ip++;
+
+    // Main Loop
+    while (ip < mflimit)
+    {
+        ml = LZ4HC_InsertAndFindBestMatch (ctx, ip, matchlimit, (&ref));
+        if (!ml) { ip++; continue; }
+
+        // saved, in case we would skip too much
+        start0 = ip;
+        ref0 = ref;
+        ml0 = ml;
+
+_Search2:
+        if (ip+ml < mflimit)
+            ml2 = LZ4HC_InsertAndGetWiderMatch(ctx, ip + ml - 2, ip + 1, matchlimit, ml, &ref2, &start2);
+        else ml2 = ml;
+
+        if (ml2 == ml)  // No better match
+        {
+            ENCODE_SEQUENCE(&ip, &op, &anchor, ml, ref, oend);
+            continue;
+        }
+
+        if (start0 < ip)
+        {
+            if (start2 < ip + ml0)   // empirical
+            {
+                ip = start0;
+                ref = ref0;
+                ml = ml0;
+            }
+        }
+
+        // Here, start0==ip
+        if ((start2 - ip) < 3)   // First Match too small : removed
+        {
+            ml = ml2;
+            ip = start2;
+            ref =ref2;
+            goto _Search2;
+        }
+
+_Search3:
+        // Currently we have :
+        // ml2 > ml1, and
+        // ip1+3 <= ip2 (usually < ip1+ml1)
+        if ((start2 - ip) < OPTIMAL_ML)
+        {
+            int correction;
+            int new_ml = ml;
+            if (new_ml > OPTIMAL_ML) new_ml = OPTIMAL_ML;
+            if (ip+new_ml > start2 + ml2 - MINMATCH) new_ml = (int)(start2 - ip) + ml2 - MINMATCH;
+            correction = new_ml - (int)(start2 - ip);
+            if (correction > 0)
+            {
+                start2 += correction;
+                ref2 += correction;
+                ml2 -= correction;
+            }
+        }
+        // Now, we have start2 = ip+new_ml, with new_ml = min(ml, OPTIMAL_ML=18)
+
+        if (start2 + ml2 < mflimit)
+            ml3 = LZ4HC_InsertAndGetWiderMatch(ctx, start2 + ml2 - 3, start2, matchlimit, ml2, &ref3, &start3);
+        else ml3 = ml2;
+
+        if (ml3 == ml2) // No better match : 2 sequences to encode
+        {
+            // ip & ref are known; Now for ml
+            if (start2 < ip+ml)  ml = (int)(start2 - ip);
+            // Now, encode 2 sequences
+            ENCODE_SEQUENCE(&ip, &op, &anchor, ml, ref, oend);
+            ip = start2;
+            ENCODE_SEQUENCE(&ip, &op, &anchor, ml2, ref2, oend);
+            continue;
+        }
+
+        if (start3 < ip+ml+3) // Not enough space for match 2 : remove it
+        {
+            if (start3 >= (ip+ml)) // can write Seq1 immediately ==> Seq2 is removed, so Seq3 becomes Seq1
+            {
+                if (start2 < ip+ml)
+                {
+                    int correction = (int)(ip+ml - start2);
+                    start2 += correction;
+                    ref2 += correction;
+                    ml2 -= correction;
+                    if (ml2 < MINMATCH)
+                    {
+                        start2 = start3;
+                        ref2 = ref3;
+                        ml2 = ml3;
+                    }
+                }
+
+                ENCODE_SEQUENCE(&ip, &op, &anchor, ml, ref, oend);
+                ip  = start3;
+                ref = ref3;
+                ml  = ml3;
+
+                start0 = start2;
+                ref0 = ref2;
+                ml0 = ml2;
+                goto _Search2;
+            }
+
+            start2 = start3;
+            ref2 = ref3;
+            ml2 = ml3;
+            goto _Search3;
+        }
+
+        // OK, now we have 3 ascending matches; let's write at least the first one
+        // ip & ref are known; Now for ml
+        if (start2 < ip+ml)
+        {
+            if ((start2 - ip) < (int)ML_MASK)
+            {
+                int correction;
+                if (ml > OPTIMAL_ML) ml = OPTIMAL_ML;
+                if (ip + ml > start2 + ml2 - MINMATCH) ml = (int)(start2 - ip) + ml2 - MINMATCH;
+                correction = ml - (int)(start2 - ip);
+                if (correction > 0)
+                {
+                    start2 += correction;
+                    ref2 += correction;
+                    ml2 -= correction;
+                }
+            }
+            else
+            {
+                ml = (int)(start2 - ip);
+            }
+        }
+        ENCODE_SEQUENCE(&ip, &op, &anchor, ml, ref, oend);
+
+        ip = start2;
+        ref = ref2;
+        ml = ml2;
+
+        start2 = start3;
+        ref2 = ref3;
+        ml2 = ml3;
+
+        goto _Search3;
+
+    }
+
+    // Encode Last Literals
+    {
+        int lastRun = (int)(iend - anchor);
+#ifdef LIMITED_OUTPUT
+        if (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize) return 0;  // Check output limit
+#endif
+        if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun > 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; } 
+        else *op++ = (BYTE)(lastRun<<ML_BITS);
+        memcpy(op, anchor, iend - anchor);
+        op += iend-anchor;
+    } 
+
+    // End
+    return (int) (((char*)op)-dest);
+}
+
+
+int FUNCTION_NAME (const char* source, 
+                 char* dest,
+                 int inputSize
+#ifdef LIMITED_OUTPUT
+                ,int maxOutputSize
+#endif
+                )
+{
+    void* ctx = LZ4HC_create((const BYTE*)source);
+#ifdef LIMITED_OUTPUT
+    int result = COMBINED_NAME(FUNCTION_NAME,ctx) (ctx, source, dest, inputSize, maxOutputSize);
+#else
+    int result = COMBINED_NAME(FUNCTION_NAME,ctx) (ctx, source, dest, inputSize);
+#endif
+    LZ4HC_free (&ctx);
+
+    return result;
+}
+
+
+//****************************
+// Clean defines
+//****************************
+
+// Required defines
+#undef FUNCTION_NAME
+
+// Locally Generated
+#undef ENCODE_SEQUENCE
+
+// Optional defines
+#ifdef LIMITED_OUTPUT
+#undef LIMITED_OUTPUT
+#endif
+
+