New function : LZ4_uncompress : secure version which is safe against buffer overflow attacks

New function : LZ4_uncompress_unknownOutputSize : secure but slower version which also guess the size of data to be decoded
The demo file compression program (main.c) is updated to use these new functions
LZ4_decode is still supported but in deprecated status, due to its vulnerability to malicious buffer overflow scenario


git-svn-id: https://lz4.googlecode.com/svn/trunk@9 650e7d94-2a16-8b24-b05c-7c0b3f6821cd
This commit is contained in:
yann.collet.73@gmail.com 2011-06-04 17:15:43 +00:00
parent 0121f47e00
commit 6b798d5e40
3 changed files with 239 additions and 61 deletions

215
lz4.c
View File

@ -35,6 +35,16 @@
#include "lz4.h"
//**************************************
// Performance parameter <---------------------------------------------------------
//**************************************
// Lowering this value reduce memory usage
// It may also improve speed, especially if you reach L1 cache size (32KB for Intel, 64KB for AMD)
// Expanding memory usage typically improves compression ratio
// Memory usage formula : N->2^(N+2) Bytes (examples : 17 -> 512KB ; 12 -> 16KB)
#define HASH_LOG 17
//**************************************
// Basic Types
//**************************************
@ -61,7 +71,6 @@
#define MAXD_LOG 16
#define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
#define HASH_LOG 17 // <--- Lower this value to lower memory usage. N->2^(N+2) Bytes (ex : 17 -> 512KB)
#define HASHTABLESIZE (1 << HASH_LOG)
#define HASH_MASK (HASHTABLESIZE - 1)
@ -145,27 +154,21 @@ int LZ4_compressCtx(void** ctx,
ref = HashTable[h];
HashTable[h] = ip;
// Min Match
// Check Min Match
if (( ((ip-ref) >> MAXD_LOG) != 0) || (*(U32*)ref != sequence))
{
if (ip-anchor>limit) { limit<<=1; step += 1 + (step>>2); }
ip+=step;
if (ip-anchor>limit) { limit <<= 1; step += 1 + (step>>2); }
ip += step;
continue;
}
// catch up
if (step>1)
{
HashTable[h] = ref;
ip -= (step-1);
step=1;
continue;
}
limit=INCOMPRESSIBLE;
if (step>1) { HashTable[h] = ref; ip -= (step-1); step=1; continue; }
limit = INCOMPRESSIBLE;
// Encode Literal length
len = length = ip - anchor;
orun=op++;
orun = op++;
if (len>(RUN_MASK-1)) { *orun=(RUN_MASK<<ML_BITS); len-=RUN_MASK; for(; len > 254 ; len-=255) *op++ = 255; *op++ = (BYTE)len; }
else *orun = (len<<ML_BITS);
@ -191,14 +194,16 @@ int LZ4_compressCtx(void** ctx,
anchor = ip;
}
// Encode Last Literals
len = length = iend - anchor;
orun=op++;
if (len>(RUN_MASK-1)) { *orun=(RUN_MASK<<ML_BITS); len-=RUN_MASK; for(; len > 254 ; len-=255) *op++ = 255; *op++ = (BYTE) len; }
else *orun = (len<<ML_BITS);
for(;length>0;length-=4) { *(U32*)op = *(U32*)anchor; op+=4; anchor+=4; }
op += length; // correction
if (length)
{
orun=op++;
if (len>(RUN_MASK-1)) { *orun=(RUN_MASK<<ML_BITS); len-=RUN_MASK; for(; len > 254 ; len-=255) *op++ = 255; *op++ = (BYTE) len; }
else *orun = (len<<ML_BITS);
for(;length>0;length-=4) { *(U32*)op = *(U32*)anchor; op+=4; anchor+=4; }
op += length; // correction
}
// End
return (int) (((char*)op)-dest);
@ -209,15 +214,168 @@ int LZ4_compressCtx(void** ctx,
//****************************
// Decompression CODE
//****************************
int LZ4_uncompress(char* source,
char* dest,
int osize)
{
// Local Variables
BYTE *ip = (BYTE*) source;
BYTE *op = (BYTE*) dest,
*oend=(BYTE*) dest + osize,
*ref, *cpy,
runcode;
U32 dec[4]={0, 3, 2, 3};
int len, length;
// Main Loop
while (1)
{
// get runlength
runcode = *ip++;
if ((length=(runcode>>ML_BITS)) == RUN_MASK) { for (;(len=*ip++)==255;length+=255){} length += len; }
// copy literals
ref = op+length;
if (ref>oend-4)
{
if (ref > oend) goto _output_error;
while(op<oend-3) { *(U32*)op=*(U32*)ip; op+=4; ip+=4; }
while(op<ref) *op++=*ip++;
break; // Necessarily EOF
}
while (op<ref) { *(U32*)op = *(U32*)ip; op+=4; ip+=4; }
ip-=(op-ref); op=ref; // correction
// get offset
ref -= *(U16*)ip; ip+=2;
// get matchlength
if ((length=(runcode&ML_MASK)) == ML_MASK) { for (;(len=*ip++)==255;length+=255){} length += len; }
length += MINMATCH;
// copy repeated sequence
cpy = op + length;
if (op-ref<4)
{
*op++ = *ref++;
*op++ = *ref++;
*op++ = *ref++;
*op++ = *ref++;
ref -= dec[op-ref];
}
if (cpy>oend-4)
{
if (cpy > oend) goto _output_error;
while(op<cpy-3) { *(U32*)op=*(U32*)ref; op+=4; ref+=4; }
while(op<cpy) *op++=*ref++;
if (op>=oend) break; // Check EOF
continue;
}
while(op<cpy) { *(U32*)op=*(U32*)ref; op+=4; ref+=4; }
op=cpy; // correction
}
// end of decoding
return (int) (((char*)ip)-source);
// write overflow error detected
_output_error:
return (int) (-(((char*)ip)-source));
}
int LZ4_uncompress_unknownOutputSize(
char* source,
char* dest,
int isize,
int maxOutputSize)
{
// Local Variables
BYTE *ip = (BYTE*) source,
*iend = ip + isize;
BYTE *op = (BYTE*) dest,
*oend = op + maxOutputSize,
*ref, *cpy,
runcode;
U32 dec[4]={0, 3, 2, 3};
int len, length;
// Main Loop
while (ip<iend)
{
// get runlength
runcode = *ip++;
if ((length=(runcode>>ML_BITS)) == RUN_MASK) { for (;(len=*ip++)==255;length+=255){} length += len; }
// copy literals
ref = op+length;
if (ref>oend-4)
{
if (ref > oend) goto _output_error;
while(op<oend-3) { *(U32*)op=*(U32*)ip; op+=4; ip+=4; }
while(op<ref) *op++=*ip++;
break; // Necessarily EOF
}
while (op<ref) { *(U32*)op = *(U32*)ip; op+=4; ip+=4; }
ip-=(op-ref); op=ref; // correction
if (ip>=iend) break; // check EOF
// get offset
ref -= *(U16*)ip; ip+=2;
// get matchlength
if ((length=(runcode&ML_MASK)) == ML_MASK) { for (;(len=*ip++)==255;length+=255){} length += len; }
length += MINMATCH;
// copy repeated sequence
cpy = op + length;
if (op-ref<4)
{
*op++ = *ref++;
*op++ = *ref++;
*op++ = *ref++;
*op++ = *ref++;
ref -= dec[op-ref];
}
if (cpy>oend-4)
{
if (cpy > oend) goto _output_error;
while(op<cpy-3) { *(U32*)op=*(U32*)ref; op+=4; ref+=4; }
while(op<cpy) *op++=*ref++;
if (op>=oend) break; // Check EOF
continue;
}
while(op<cpy) { *(U32*)op=*(U32*)ref; op+=4; ref+=4; }
op=cpy; // correction
}
// end of decoding
return (int) (((char*)op)-dest);
// write overflow error detected
_output_error:
return (int) (-(((char*)ip)-source));
}
//****************************
// Deprecated functions
//****************************
int LZ4_decode ( char* source,
char* dest,
int isize)
{
// Local Variables
BYTE *ip = (BYTE*) source,
*iend = (BYTE*) source + isize;
BYTE *ip = (BYTE*)source,
*iend = ip + isize;
BYTE *op = (BYTE*) dest,
BYTE *op = (BYTE*)dest,
*ref, *cpy,
runcode;
@ -233,11 +391,7 @@ int LZ4_decode ( char* source,
if ((length=(runcode>>ML_BITS)) == RUN_MASK) { for (;(len=*ip++)==255;length+=255){} length += len; }
// copy literals
ref = op+length;
#ifdef SAFEWRITEBUFFER
if (ref>iend-4) { while(op<iend-3) { *(U32*)op=*(U32*)ip; op+=4; ip+=4; } while(op<ref) *op++=*ip++; }
else
#endif
ref=op+length;
while (op<ref) { *(U32*)op = *(U32*)ip; op+=4; ip+=4; }
ip-=(op-ref); op=ref; // correction
if (ip>=iend) break; // Check EOF
@ -259,10 +413,6 @@ int LZ4_decode ( char* source,
*op++ = *ref++;
ref -= dec[op-ref];
}
#ifdef SAFEWRITEBUFFER
if (cpy>iend-4) { while(op<iend-3) { *(U32*)op=*(U32*)ref; op+=4; ref+=4; } while(op<cpy) *op++=*ref++; }
else
#endif
while(op<cpy) { *(U32*)op=*(U32*)ref; op+=4; ref+=4; }
op=cpy; // correction
}
@ -270,6 +420,3 @@ int LZ4_decode ( char* source,
// end of decoding
return (int) (((char*)op)-dest);
}

57
lz4.h
View File

@ -33,22 +33,12 @@ extern "C" {
#endif
//****************************
// Instructions
//****************************
// Uncomment next line to ensure that LZ4_Decode will never write in destination buffer more than "decompressedSize" bytes
// If commented, the decoder may write up to 3 bytes more than decompressedSize, so provide extra room in dest buffer for that
// Recommendation : keep commented, for improved performance; ensure that destination buffer is at least decompressedSize + 3 Bytes
// #define SAFEWRITEBUFFER
//****************************
// Simple Functions
//****************************
int LZ4_compress (char* source, char* dest, int isize);
int LZ4_decode (char* source, char* dest, int isize);
int LZ4_compress (char* source, char* dest, int isize);
int LZ4_uncompress (char* source, char* dest, int osize);
/*
LZ4_compress :
@ -57,12 +47,12 @@ LZ4_compress :
To avoid any problem, size it to handle worst cases situations (input data not compressible)
Worst case size is : "inputsize + 0.4%", with "0.4%" being at least 8 bytes.
LZ4_decode :
return : the number of bytes in decoded buffer dest
note 1 : isize is the input size, therefore the compressed size
note 2 : destination buffer must be already allocated.
The program calling the decoder must know in advance the size of decoded stream to properly allocate the destination buffer
Note that, in fast mode, the destination buffer size must be at least "decompressedSize + 3 Bytes"
LZ4_uncompress :
return : the number of bytes read in the source buffer
If the source stream is malformed, the function will stop decoding and return a negative result, indicating the byte position of the faulty instruction
This version never writes beyond dest + osize, and is therefore protected against malicious data packets
note 1 : osize is the output size, therefore the original size
note 2 : destination buffer must be already allocated
*/
@ -70,6 +60,19 @@ LZ4_decode :
// Advanced Functions
//****************************
int LZ4_uncompress_unknownOutputSize (char* source, char* dest, int isize, int maxOutputSize);
/*
LZ4_uncompress :
return : the number of bytes decoded in the destination buffer (necessarily <= maxOutputSize)
If the source stream is malformed, the function will stop decoding and return a negative result, indicating the byte position of the faulty instruction
This version never writes beyond dest + osize, and is therefore protected against malicious data packets
note 1 : isize is the input size, therefore the compressed size
note 2 : destination buffer must be already allocated
note 3 : this version is slower by up to 10%, and is therefore not recommended for general use
*/
int LZ4_compressCtx(void** ctx, char* source, char* dest, int isize);
/*
@ -85,6 +88,24 @@ LZ4_compressCtx :
*/
//****************************
// Deprecated Functions
//****************************
int LZ4_decode (char* source, char* dest, int isize);
/*
LZ4_decode :
return : the number of bytes in decoded buffer dest
note 1 : isize is the input size, therefore the compressed size
note 2 : destination buffer must be already allocated.
The program calling the decoder must know in advance the size of decoded stream to properly allocate the destination buffer
The destination buffer size must be at least "decompressedSize + 3 Bytes"
This version is unprotected against malicious data packets designed to create buffer overflow errors.
It is therefore deprecated, but still present in this version for compatibility.
*/
#if defined (__cplusplus)
}
#endif

28
main.c
View File

@ -63,7 +63,7 @@
#define CHUNKSIZE (8<<20) // 8 MB
#define CACHELINE 64
#define OUT_CHUNKSIZE (CHUNKSIZE + CHUNKSIZE/256 + CACHELINE)
#define OUT_CHUNKSIZE (CHUNKSIZE + (CHUNKSIZE>>8) + CACHELINE)
#define ARCHIVE_MAGICNUMBER 0x184C2102
#define ARCHIVE_MAGICNUMBER_SIZE 4
@ -149,6 +149,8 @@ int decode_file(char* input_filename, char* output_filename)
FILE* finput = fopen( input_filename, "rb" );
FILE* foutput = fopen( output_filename, "wb" );
size_t uselessRet;
int sinkint;
U32 nextSize;
if (finput==0 ) { printf("Pb opening %s\n", input_filename); return 4; }
if (foutput==0) { printf("Pb opening %s\n", output_filename); return 5; }
@ -160,25 +162,33 @@ int decode_file(char* input_filename, char* output_filename)
// Check Archive Header
uselessRet = fread(out_buff, 1, ARCHIVE_MAGICNUMBER_SIZE, finput);
if (*(U32*)out_buff != ARCHIVE_MAGICNUMBER) { printf("Wrong file : cannot be decoded\n"); return 6; }
uselessRet = fread(in_buff, 1, 4, finput);
nextSize = *(U32*)in_buff;
// Main Loop
while (1)
{
int outSize;
// Read Block
U32 inSize = (U32) fread(in_buff, 1, 4, finput);
if( inSize<=0 ) break;
inSize = *(U32*)in_buff;
uselessRet = fread( in_buff, 1, inSize, finput);
uselessRet = fread(in_buff, 1, nextSize, finput);
// Check Next Block
uselessRet = (U32) fread(&nextSize, 1, 4, finput);
if( uselessRet==0 ) break;
// Decode Block
outSize = LZ4_decode(in_buff, out_buff, inSize);
filesize += outSize;
sinkint = LZ4_uncompress(in_buff, out_buff, CHUNKSIZE);
filesize += CHUNKSIZE;
// Write Block
fwrite(out_buff, 1, outSize, foutput);
fwrite(out_buff, 1, CHUNKSIZE, foutput);
}
// Last Block
uselessRet = fread(in_buff, 1, nextSize, finput);
sinkint = LZ4_uncompress_unknownOutputSize(in_buff, out_buff, nextSize, CHUNKSIZE);
filesize += sinkint;
fwrite(out_buff, 1, sinkint, foutput);
// Status
printf("Successfully decoded %llu bytes \n", (unsigned long long)filesize);