New function : LZ4_uncompress : secure version which is safe against buffer overflow attacks

New function : LZ4_uncompress_unknownOutputSize : secure but slower version which also guess the size of data to be decoded
The demo file compression program (main.c) is updated to use these new functions
LZ4_decode is still supported but in deprecated status, due to its vulnerability to malicious buffer overflow scenario


git-svn-id: https://lz4.googlecode.com/svn/trunk@9 650e7d94-2a16-8b24-b05c-7c0b3f6821cd
This commit is contained in:
yann.collet.73@gmail.com 2011-06-04 17:15:43 +00:00
parent 0121f47e00
commit 6b798d5e40
3 changed files with 239 additions and 61 deletions

215
lz4.c
View File

@ -35,6 +35,16 @@
#include "lz4.h" #include "lz4.h"
//**************************************
// Performance parameter <---------------------------------------------------------
//**************************************
// Lowering this value reduce memory usage
// It may also improve speed, especially if you reach L1 cache size (32KB for Intel, 64KB for AMD)
// Expanding memory usage typically improves compression ratio
// Memory usage formula : N->2^(N+2) Bytes (examples : 17 -> 512KB ; 12 -> 16KB)
#define HASH_LOG 17
//************************************** //**************************************
// Basic Types // Basic Types
//************************************** //**************************************
@ -61,7 +71,6 @@
#define MAXD_LOG 16 #define MAXD_LOG 16
#define MAX_DISTANCE ((1 << MAXD_LOG) - 1) #define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
#define HASH_LOG 17 // <--- Lower this value to lower memory usage. N->2^(N+2) Bytes (ex : 17 -> 512KB)
#define HASHTABLESIZE (1 << HASH_LOG) #define HASHTABLESIZE (1 << HASH_LOG)
#define HASH_MASK (HASHTABLESIZE - 1) #define HASH_MASK (HASHTABLESIZE - 1)
@ -145,27 +154,21 @@ int LZ4_compressCtx(void** ctx,
ref = HashTable[h]; ref = HashTable[h];
HashTable[h] = ip; HashTable[h] = ip;
// Min Match // Check Min Match
if (( ((ip-ref) >> MAXD_LOG) != 0) || (*(U32*)ref != sequence)) if (( ((ip-ref) >> MAXD_LOG) != 0) || (*(U32*)ref != sequence))
{ {
if (ip-anchor>limit) { limit<<=1; step += 1 + (step>>2); } if (ip-anchor>limit) { limit <<= 1; step += 1 + (step>>2); }
ip+=step; ip += step;
continue; continue;
} }
// catch up // catch up
if (step>1) if (step>1) { HashTable[h] = ref; ip -= (step-1); step=1; continue; }
{ limit = INCOMPRESSIBLE;
HashTable[h] = ref;
ip -= (step-1);
step=1;
continue;
}
limit=INCOMPRESSIBLE;
// Encode Literal length // Encode Literal length
len = length = ip - anchor; len = length = ip - anchor;
orun=op++; orun = op++;
if (len>(RUN_MASK-1)) { *orun=(RUN_MASK<<ML_BITS); len-=RUN_MASK; for(; len > 254 ; len-=255) *op++ = 255; *op++ = (BYTE)len; } if (len>(RUN_MASK-1)) { *orun=(RUN_MASK<<ML_BITS); len-=RUN_MASK; for(; len > 254 ; len-=255) *op++ = 255; *op++ = (BYTE)len; }
else *orun = (len<<ML_BITS); else *orun = (len<<ML_BITS);
@ -191,14 +194,16 @@ int LZ4_compressCtx(void** ctx,
anchor = ip; anchor = ip;
} }
// Encode Last Literals // Encode Last Literals
len = length = iend - anchor; len = length = iend - anchor;
orun=op++; if (length)
if (len>(RUN_MASK-1)) { *orun=(RUN_MASK<<ML_BITS); len-=RUN_MASK; for(; len > 254 ; len-=255) *op++ = 255; *op++ = (BYTE) len; } {
else *orun = (len<<ML_BITS); orun=op++;
for(;length>0;length-=4) { *(U32*)op = *(U32*)anchor; op+=4; anchor+=4; } if (len>(RUN_MASK-1)) { *orun=(RUN_MASK<<ML_BITS); len-=RUN_MASK; for(; len > 254 ; len-=255) *op++ = 255; *op++ = (BYTE) len; }
op += length; // correction else *orun = (len<<ML_BITS);
for(;length>0;length-=4) { *(U32*)op = *(U32*)anchor; op+=4; anchor+=4; }
op += length; // correction
}
// End // End
return (int) (((char*)op)-dest); return (int) (((char*)op)-dest);
@ -209,15 +214,168 @@ int LZ4_compressCtx(void** ctx,
//**************************** //****************************
// Decompression CODE // Decompression CODE
//**************************** //****************************
int LZ4_uncompress(char* source,
char* dest,
int osize)
{
// Local Variables
BYTE *ip = (BYTE*) source;
BYTE *op = (BYTE*) dest,
*oend=(BYTE*) dest + osize,
*ref, *cpy,
runcode;
U32 dec[4]={0, 3, 2, 3};
int len, length;
// Main Loop
while (1)
{
// get runlength
runcode = *ip++;
if ((length=(runcode>>ML_BITS)) == RUN_MASK) { for (;(len=*ip++)==255;length+=255){} length += len; }
// copy literals
ref = op+length;
if (ref>oend-4)
{
if (ref > oend) goto _output_error;
while(op<oend-3) { *(U32*)op=*(U32*)ip; op+=4; ip+=4; }
while(op<ref) *op++=*ip++;
break; // Necessarily EOF
}
while (op<ref) { *(U32*)op = *(U32*)ip; op+=4; ip+=4; }
ip-=(op-ref); op=ref; // correction
// get offset
ref -= *(U16*)ip; ip+=2;
// get matchlength
if ((length=(runcode&ML_MASK)) == ML_MASK) { for (;(len=*ip++)==255;length+=255){} length += len; }
length += MINMATCH;
// copy repeated sequence
cpy = op + length;
if (op-ref<4)
{
*op++ = *ref++;
*op++ = *ref++;
*op++ = *ref++;
*op++ = *ref++;
ref -= dec[op-ref];
}
if (cpy>oend-4)
{
if (cpy > oend) goto _output_error;
while(op<cpy-3) { *(U32*)op=*(U32*)ref; op+=4; ref+=4; }
while(op<cpy) *op++=*ref++;
if (op>=oend) break; // Check EOF
continue;
}
while(op<cpy) { *(U32*)op=*(U32*)ref; op+=4; ref+=4; }
op=cpy; // correction
}
// end of decoding
return (int) (((char*)ip)-source);
// write overflow error detected
_output_error:
return (int) (-(((char*)ip)-source));
}
int LZ4_uncompress_unknownOutputSize(
char* source,
char* dest,
int isize,
int maxOutputSize)
{
// Local Variables
BYTE *ip = (BYTE*) source,
*iend = ip + isize;
BYTE *op = (BYTE*) dest,
*oend = op + maxOutputSize,
*ref, *cpy,
runcode;
U32 dec[4]={0, 3, 2, 3};
int len, length;
// Main Loop
while (ip<iend)
{
// get runlength
runcode = *ip++;
if ((length=(runcode>>ML_BITS)) == RUN_MASK) { for (;(len=*ip++)==255;length+=255){} length += len; }
// copy literals
ref = op+length;
if (ref>oend-4)
{
if (ref > oend) goto _output_error;
while(op<oend-3) { *(U32*)op=*(U32*)ip; op+=4; ip+=4; }
while(op<ref) *op++=*ip++;
break; // Necessarily EOF
}
while (op<ref) { *(U32*)op = *(U32*)ip; op+=4; ip+=4; }
ip-=(op-ref); op=ref; // correction
if (ip>=iend) break; // check EOF
// get offset
ref -= *(U16*)ip; ip+=2;
// get matchlength
if ((length=(runcode&ML_MASK)) == ML_MASK) { for (;(len=*ip++)==255;length+=255){} length += len; }
length += MINMATCH;
// copy repeated sequence
cpy = op + length;
if (op-ref<4)
{
*op++ = *ref++;
*op++ = *ref++;
*op++ = *ref++;
*op++ = *ref++;
ref -= dec[op-ref];
}
if (cpy>oend-4)
{
if (cpy > oend) goto _output_error;
while(op<cpy-3) { *(U32*)op=*(U32*)ref; op+=4; ref+=4; }
while(op<cpy) *op++=*ref++;
if (op>=oend) break; // Check EOF
continue;
}
while(op<cpy) { *(U32*)op=*(U32*)ref; op+=4; ref+=4; }
op=cpy; // correction
}
// end of decoding
return (int) (((char*)op)-dest);
// write overflow error detected
_output_error:
return (int) (-(((char*)ip)-source));
}
//****************************
// Deprecated functions
//****************************
int LZ4_decode ( char* source, int LZ4_decode ( char* source,
char* dest, char* dest,
int isize) int isize)
{ {
// Local Variables // Local Variables
BYTE *ip = (BYTE*) source, BYTE *ip = (BYTE*)source,
*iend = (BYTE*) source + isize; *iend = ip + isize;
BYTE *op = (BYTE*) dest, BYTE *op = (BYTE*)dest,
*ref, *cpy, *ref, *cpy,
runcode; runcode;
@ -233,11 +391,7 @@ int LZ4_decode ( char* source,
if ((length=(runcode>>ML_BITS)) == RUN_MASK) { for (;(len=*ip++)==255;length+=255){} length += len; } if ((length=(runcode>>ML_BITS)) == RUN_MASK) { for (;(len=*ip++)==255;length+=255){} length += len; }
// copy literals // copy literals
ref = op+length; ref=op+length;
#ifdef SAFEWRITEBUFFER
if (ref>iend-4) { while(op<iend-3) { *(U32*)op=*(U32*)ip; op+=4; ip+=4; } while(op<ref) *op++=*ip++; }
else
#endif
while (op<ref) { *(U32*)op = *(U32*)ip; op+=4; ip+=4; } while (op<ref) { *(U32*)op = *(U32*)ip; op+=4; ip+=4; }
ip-=(op-ref); op=ref; // correction ip-=(op-ref); op=ref; // correction
if (ip>=iend) break; // Check EOF if (ip>=iend) break; // Check EOF
@ -259,10 +413,6 @@ int LZ4_decode ( char* source,
*op++ = *ref++; *op++ = *ref++;
ref -= dec[op-ref]; ref -= dec[op-ref];
} }
#ifdef SAFEWRITEBUFFER
if (cpy>iend-4) { while(op<iend-3) { *(U32*)op=*(U32*)ref; op+=4; ref+=4; } while(op<cpy) *op++=*ref++; }
else
#endif
while(op<cpy) { *(U32*)op=*(U32*)ref; op+=4; ref+=4; } while(op<cpy) { *(U32*)op=*(U32*)ref; op+=4; ref+=4; }
op=cpy; // correction op=cpy; // correction
} }
@ -270,6 +420,3 @@ int LZ4_decode ( char* source,
// end of decoding // end of decoding
return (int) (((char*)op)-dest); return (int) (((char*)op)-dest);
} }

57
lz4.h
View File

@ -33,22 +33,12 @@ extern "C" {
#endif #endif
//****************************
// Instructions
//****************************
// Uncomment next line to ensure that LZ4_Decode will never write in destination buffer more than "decompressedSize" bytes
// If commented, the decoder may write up to 3 bytes more than decompressedSize, so provide extra room in dest buffer for that
// Recommendation : keep commented, for improved performance; ensure that destination buffer is at least decompressedSize + 3 Bytes
// #define SAFEWRITEBUFFER
//**************************** //****************************
// Simple Functions // Simple Functions
//**************************** //****************************
int LZ4_compress (char* source, char* dest, int isize); int LZ4_compress (char* source, char* dest, int isize);
int LZ4_decode (char* source, char* dest, int isize); int LZ4_uncompress (char* source, char* dest, int osize);
/* /*
LZ4_compress : LZ4_compress :
@ -57,12 +47,12 @@ LZ4_compress :
To avoid any problem, size it to handle worst cases situations (input data not compressible) To avoid any problem, size it to handle worst cases situations (input data not compressible)
Worst case size is : "inputsize + 0.4%", with "0.4%" being at least 8 bytes. Worst case size is : "inputsize + 0.4%", with "0.4%" being at least 8 bytes.
LZ4_decode : LZ4_uncompress :
return : the number of bytes in decoded buffer dest return : the number of bytes read in the source buffer
note 1 : isize is the input size, therefore the compressed size If the source stream is malformed, the function will stop decoding and return a negative result, indicating the byte position of the faulty instruction
note 2 : destination buffer must be already allocated. This version never writes beyond dest + osize, and is therefore protected against malicious data packets
The program calling the decoder must know in advance the size of decoded stream to properly allocate the destination buffer note 1 : osize is the output size, therefore the original size
Note that, in fast mode, the destination buffer size must be at least "decompressedSize + 3 Bytes" note 2 : destination buffer must be already allocated
*/ */
@ -70,6 +60,19 @@ LZ4_decode :
// Advanced Functions // Advanced Functions
//**************************** //****************************
int LZ4_uncompress_unknownOutputSize (char* source, char* dest, int isize, int maxOutputSize);
/*
LZ4_uncompress :
return : the number of bytes decoded in the destination buffer (necessarily <= maxOutputSize)
If the source stream is malformed, the function will stop decoding and return a negative result, indicating the byte position of the faulty instruction
This version never writes beyond dest + osize, and is therefore protected against malicious data packets
note 1 : isize is the input size, therefore the compressed size
note 2 : destination buffer must be already allocated
note 3 : this version is slower by up to 10%, and is therefore not recommended for general use
*/
int LZ4_compressCtx(void** ctx, char* source, char* dest, int isize); int LZ4_compressCtx(void** ctx, char* source, char* dest, int isize);
/* /*
@ -85,6 +88,24 @@ LZ4_compressCtx :
*/ */
//****************************
// Deprecated Functions
//****************************
int LZ4_decode (char* source, char* dest, int isize);
/*
LZ4_decode :
return : the number of bytes in decoded buffer dest
note 1 : isize is the input size, therefore the compressed size
note 2 : destination buffer must be already allocated.
The program calling the decoder must know in advance the size of decoded stream to properly allocate the destination buffer
The destination buffer size must be at least "decompressedSize + 3 Bytes"
This version is unprotected against malicious data packets designed to create buffer overflow errors.
It is therefore deprecated, but still present in this version for compatibility.
*/
#if defined (__cplusplus) #if defined (__cplusplus)
} }
#endif #endif

28
main.c
View File

@ -63,7 +63,7 @@
#define CHUNKSIZE (8<<20) // 8 MB #define CHUNKSIZE (8<<20) // 8 MB
#define CACHELINE 64 #define CACHELINE 64
#define OUT_CHUNKSIZE (CHUNKSIZE + CHUNKSIZE/256 + CACHELINE) #define OUT_CHUNKSIZE (CHUNKSIZE + (CHUNKSIZE>>8) + CACHELINE)
#define ARCHIVE_MAGICNUMBER 0x184C2102 #define ARCHIVE_MAGICNUMBER 0x184C2102
#define ARCHIVE_MAGICNUMBER_SIZE 4 #define ARCHIVE_MAGICNUMBER_SIZE 4
@ -149,6 +149,8 @@ int decode_file(char* input_filename, char* output_filename)
FILE* finput = fopen( input_filename, "rb" ); FILE* finput = fopen( input_filename, "rb" );
FILE* foutput = fopen( output_filename, "wb" ); FILE* foutput = fopen( output_filename, "wb" );
size_t uselessRet; size_t uselessRet;
int sinkint;
U32 nextSize;
if (finput==0 ) { printf("Pb opening %s\n", input_filename); return 4; } if (finput==0 ) { printf("Pb opening %s\n", input_filename); return 4; }
if (foutput==0) { printf("Pb opening %s\n", output_filename); return 5; } if (foutput==0) { printf("Pb opening %s\n", output_filename); return 5; }
@ -160,25 +162,33 @@ int decode_file(char* input_filename, char* output_filename)
// Check Archive Header // Check Archive Header
uselessRet = fread(out_buff, 1, ARCHIVE_MAGICNUMBER_SIZE, finput); uselessRet = fread(out_buff, 1, ARCHIVE_MAGICNUMBER_SIZE, finput);
if (*(U32*)out_buff != ARCHIVE_MAGICNUMBER) { printf("Wrong file : cannot be decoded\n"); return 6; } if (*(U32*)out_buff != ARCHIVE_MAGICNUMBER) { printf("Wrong file : cannot be decoded\n"); return 6; }
uselessRet = fread(in_buff, 1, 4, finput);
nextSize = *(U32*)in_buff;
// Main Loop // Main Loop
while (1) while (1)
{ {
int outSize;
// Read Block // Read Block
U32 inSize = (U32) fread(in_buff, 1, 4, finput); uselessRet = fread(in_buff, 1, nextSize, finput);
if( inSize<=0 ) break;
inSize = *(U32*)in_buff; // Check Next Block
uselessRet = fread( in_buff, 1, inSize, finput); uselessRet = (U32) fread(&nextSize, 1, 4, finput);
if( uselessRet==0 ) break;
// Decode Block // Decode Block
outSize = LZ4_decode(in_buff, out_buff, inSize); sinkint = LZ4_uncompress(in_buff, out_buff, CHUNKSIZE);
filesize += outSize; filesize += CHUNKSIZE;
// Write Block // Write Block
fwrite(out_buff, 1, outSize, foutput); fwrite(out_buff, 1, CHUNKSIZE, foutput);
} }
// Last Block
uselessRet = fread(in_buff, 1, nextSize, finput);
sinkint = LZ4_uncompress_unknownOutputSize(in_buff, out_buff, nextSize, CHUNKSIZE);
filesize += sinkint;
fwrite(out_buff, 1, sinkint, foutput);
// Status // Status
printf("Successfully decoded %llu bytes \n", (unsigned long long)filesize); printf("Successfully decoded %llu bytes \n", (unsigned long long)filesize);