New function : LZ4_uncompress : secure version which is safe against buffer overflow attacks
New function : LZ4_uncompress_unknownOutputSize : secure but slower version which also guess the size of data to be decoded The demo file compression program (main.c) is updated to use these new functions LZ4_decode is still supported but in deprecated status, due to its vulnerability to malicious buffer overflow scenario git-svn-id: https://lz4.googlecode.com/svn/trunk@9 650e7d94-2a16-8b24-b05c-7c0b3f6821cd
This commit is contained in:
parent
0121f47e00
commit
6b798d5e40
215
lz4.c
215
lz4.c
@ -35,6 +35,16 @@
|
||||
#include "lz4.h"
|
||||
|
||||
|
||||
//**************************************
|
||||
// Performance parameter <---------------------------------------------------------
|
||||
//**************************************
|
||||
// Lowering this value reduce memory usage
|
||||
// It may also improve speed, especially if you reach L1 cache size (32KB for Intel, 64KB for AMD)
|
||||
// Expanding memory usage typically improves compression ratio
|
||||
// Memory usage formula : N->2^(N+2) Bytes (examples : 17 -> 512KB ; 12 -> 16KB)
|
||||
#define HASH_LOG 17
|
||||
|
||||
|
||||
//**************************************
|
||||
// Basic Types
|
||||
//**************************************
|
||||
@ -61,7 +71,6 @@
|
||||
#define MAXD_LOG 16
|
||||
#define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
|
||||
|
||||
#define HASH_LOG 17 // <--- Lower this value to lower memory usage. N->2^(N+2) Bytes (ex : 17 -> 512KB)
|
||||
#define HASHTABLESIZE (1 << HASH_LOG)
|
||||
#define HASH_MASK (HASHTABLESIZE - 1)
|
||||
|
||||
@ -145,27 +154,21 @@ int LZ4_compressCtx(void** ctx,
|
||||
ref = HashTable[h];
|
||||
HashTable[h] = ip;
|
||||
|
||||
// Min Match
|
||||
// Check Min Match
|
||||
if (( ((ip-ref) >> MAXD_LOG) != 0) || (*(U32*)ref != sequence))
|
||||
{
|
||||
if (ip-anchor>limit) { limit<<=1; step += 1 + (step>>2); }
|
||||
ip+=step;
|
||||
if (ip-anchor>limit) { limit <<= 1; step += 1 + (step>>2); }
|
||||
ip += step;
|
||||
continue;
|
||||
}
|
||||
|
||||
// catch up
|
||||
if (step>1)
|
||||
{
|
||||
HashTable[h] = ref;
|
||||
ip -= (step-1);
|
||||
step=1;
|
||||
continue;
|
||||
}
|
||||
limit=INCOMPRESSIBLE;
|
||||
if (step>1) { HashTable[h] = ref; ip -= (step-1); step=1; continue; }
|
||||
limit = INCOMPRESSIBLE;
|
||||
|
||||
// Encode Literal length
|
||||
len = length = ip - anchor;
|
||||
orun=op++;
|
||||
orun = op++;
|
||||
if (len>(RUN_MASK-1)) { *orun=(RUN_MASK<<ML_BITS); len-=RUN_MASK; for(; len > 254 ; len-=255) *op++ = 255; *op++ = (BYTE)len; }
|
||||
else *orun = (len<<ML_BITS);
|
||||
|
||||
@ -191,14 +194,16 @@ int LZ4_compressCtx(void** ctx,
|
||||
anchor = ip;
|
||||
}
|
||||
|
||||
|
||||
// Encode Last Literals
|
||||
len = length = iend - anchor;
|
||||
orun=op++;
|
||||
if (len>(RUN_MASK-1)) { *orun=(RUN_MASK<<ML_BITS); len-=RUN_MASK; for(; len > 254 ; len-=255) *op++ = 255; *op++ = (BYTE) len; }
|
||||
else *orun = (len<<ML_BITS);
|
||||
for(;length>0;length-=4) { *(U32*)op = *(U32*)anchor; op+=4; anchor+=4; }
|
||||
op += length; // correction
|
||||
if (length)
|
||||
{
|
||||
orun=op++;
|
||||
if (len>(RUN_MASK-1)) { *orun=(RUN_MASK<<ML_BITS); len-=RUN_MASK; for(; len > 254 ; len-=255) *op++ = 255; *op++ = (BYTE) len; }
|
||||
else *orun = (len<<ML_BITS);
|
||||
for(;length>0;length-=4) { *(U32*)op = *(U32*)anchor; op+=4; anchor+=4; }
|
||||
op += length; // correction
|
||||
}
|
||||
|
||||
// End
|
||||
return (int) (((char*)op)-dest);
|
||||
@ -209,15 +214,168 @@ int LZ4_compressCtx(void** ctx,
|
||||
//****************************
|
||||
// Decompression CODE
|
||||
//****************************
|
||||
int LZ4_uncompress(char* source,
|
||||
char* dest,
|
||||
int osize)
|
||||
{
|
||||
// Local Variables
|
||||
BYTE *ip = (BYTE*) source;
|
||||
|
||||
BYTE *op = (BYTE*) dest,
|
||||
*oend=(BYTE*) dest + osize,
|
||||
*ref, *cpy,
|
||||
runcode;
|
||||
|
||||
U32 dec[4]={0, 3, 2, 3};
|
||||
int len, length;
|
||||
|
||||
|
||||
// Main Loop
|
||||
while (1)
|
||||
{
|
||||
// get runlength
|
||||
runcode = *ip++;
|
||||
if ((length=(runcode>>ML_BITS)) == RUN_MASK) { for (;(len=*ip++)==255;length+=255){} length += len; }
|
||||
|
||||
// copy literals
|
||||
ref = op+length;
|
||||
if (ref>oend-4)
|
||||
{
|
||||
if (ref > oend) goto _output_error;
|
||||
while(op<oend-3) { *(U32*)op=*(U32*)ip; op+=4; ip+=4; }
|
||||
while(op<ref) *op++=*ip++;
|
||||
break; // Necessarily EOF
|
||||
}
|
||||
while (op<ref) { *(U32*)op = *(U32*)ip; op+=4; ip+=4; }
|
||||
ip-=(op-ref); op=ref; // correction
|
||||
|
||||
// get offset
|
||||
ref -= *(U16*)ip; ip+=2;
|
||||
|
||||
// get matchlength
|
||||
if ((length=(runcode&ML_MASK)) == ML_MASK) { for (;(len=*ip++)==255;length+=255){} length += len; }
|
||||
length += MINMATCH;
|
||||
|
||||
// copy repeated sequence
|
||||
cpy = op + length;
|
||||
if (op-ref<4)
|
||||
{
|
||||
*op++ = *ref++;
|
||||
*op++ = *ref++;
|
||||
*op++ = *ref++;
|
||||
*op++ = *ref++;
|
||||
ref -= dec[op-ref];
|
||||
}
|
||||
if (cpy>oend-4)
|
||||
{
|
||||
if (cpy > oend) goto _output_error;
|
||||
while(op<cpy-3) { *(U32*)op=*(U32*)ref; op+=4; ref+=4; }
|
||||
while(op<cpy) *op++=*ref++;
|
||||
if (op>=oend) break; // Check EOF
|
||||
continue;
|
||||
}
|
||||
while(op<cpy) { *(U32*)op=*(U32*)ref; op+=4; ref+=4; }
|
||||
op=cpy; // correction
|
||||
}
|
||||
|
||||
// end of decoding
|
||||
return (int) (((char*)ip)-source);
|
||||
|
||||
// write overflow error detected
|
||||
_output_error:
|
||||
return (int) (-(((char*)ip)-source));
|
||||
}
|
||||
|
||||
|
||||
int LZ4_uncompress_unknownOutputSize(
|
||||
char* source,
|
||||
char* dest,
|
||||
int isize,
|
||||
int maxOutputSize)
|
||||
{
|
||||
// Local Variables
|
||||
BYTE *ip = (BYTE*) source,
|
||||
*iend = ip + isize;
|
||||
|
||||
BYTE *op = (BYTE*) dest,
|
||||
*oend = op + maxOutputSize,
|
||||
*ref, *cpy,
|
||||
runcode;
|
||||
|
||||
U32 dec[4]={0, 3, 2, 3};
|
||||
int len, length;
|
||||
|
||||
|
||||
// Main Loop
|
||||
while (ip<iend)
|
||||
{
|
||||
// get runlength
|
||||
runcode = *ip++;
|
||||
if ((length=(runcode>>ML_BITS)) == RUN_MASK) { for (;(len=*ip++)==255;length+=255){} length += len; }
|
||||
|
||||
// copy literals
|
||||
ref = op+length;
|
||||
if (ref>oend-4)
|
||||
{
|
||||
if (ref > oend) goto _output_error;
|
||||
while(op<oend-3) { *(U32*)op=*(U32*)ip; op+=4; ip+=4; }
|
||||
while(op<ref) *op++=*ip++;
|
||||
break; // Necessarily EOF
|
||||
}
|
||||
while (op<ref) { *(U32*)op = *(U32*)ip; op+=4; ip+=4; }
|
||||
ip-=(op-ref); op=ref; // correction
|
||||
if (ip>=iend) break; // check EOF
|
||||
|
||||
// get offset
|
||||
ref -= *(U16*)ip; ip+=2;
|
||||
|
||||
// get matchlength
|
||||
if ((length=(runcode&ML_MASK)) == ML_MASK) { for (;(len=*ip++)==255;length+=255){} length += len; }
|
||||
length += MINMATCH;
|
||||
|
||||
// copy repeated sequence
|
||||
cpy = op + length;
|
||||
if (op-ref<4)
|
||||
{
|
||||
*op++ = *ref++;
|
||||
*op++ = *ref++;
|
||||
*op++ = *ref++;
|
||||
*op++ = *ref++;
|
||||
ref -= dec[op-ref];
|
||||
}
|
||||
if (cpy>oend-4)
|
||||
{
|
||||
if (cpy > oend) goto _output_error;
|
||||
while(op<cpy-3) { *(U32*)op=*(U32*)ref; op+=4; ref+=4; }
|
||||
while(op<cpy) *op++=*ref++;
|
||||
if (op>=oend) break; // Check EOF
|
||||
continue;
|
||||
}
|
||||
while(op<cpy) { *(U32*)op=*(U32*)ref; op+=4; ref+=4; }
|
||||
op=cpy; // correction
|
||||
}
|
||||
|
||||
// end of decoding
|
||||
return (int) (((char*)op)-dest);
|
||||
|
||||
// write overflow error detected
|
||||
_output_error:
|
||||
return (int) (-(((char*)ip)-source));
|
||||
}
|
||||
|
||||
|
||||
//****************************
|
||||
// Deprecated functions
|
||||
//****************************
|
||||
int LZ4_decode ( char* source,
|
||||
char* dest,
|
||||
int isize)
|
||||
{
|
||||
// Local Variables
|
||||
BYTE *ip = (BYTE*) source,
|
||||
*iend = (BYTE*) source + isize;
|
||||
BYTE *ip = (BYTE*)source,
|
||||
*iend = ip + isize;
|
||||
|
||||
BYTE *op = (BYTE*) dest,
|
||||
BYTE *op = (BYTE*)dest,
|
||||
*ref, *cpy,
|
||||
runcode;
|
||||
|
||||
@ -233,11 +391,7 @@ int LZ4_decode ( char* source,
|
||||
if ((length=(runcode>>ML_BITS)) == RUN_MASK) { for (;(len=*ip++)==255;length+=255){} length += len; }
|
||||
|
||||
// copy literals
|
||||
ref = op+length;
|
||||
#ifdef SAFEWRITEBUFFER
|
||||
if (ref>iend-4) { while(op<iend-3) { *(U32*)op=*(U32*)ip; op+=4; ip+=4; } while(op<ref) *op++=*ip++; }
|
||||
else
|
||||
#endif
|
||||
ref=op+length;
|
||||
while (op<ref) { *(U32*)op = *(U32*)ip; op+=4; ip+=4; }
|
||||
ip-=(op-ref); op=ref; // correction
|
||||
if (ip>=iend) break; // Check EOF
|
||||
@ -259,10 +413,6 @@ int LZ4_decode ( char* source,
|
||||
*op++ = *ref++;
|
||||
ref -= dec[op-ref];
|
||||
}
|
||||
#ifdef SAFEWRITEBUFFER
|
||||
if (cpy>iend-4) { while(op<iend-3) { *(U32*)op=*(U32*)ref; op+=4; ref+=4; } while(op<cpy) *op++=*ref++; }
|
||||
else
|
||||
#endif
|
||||
while(op<cpy) { *(U32*)op=*(U32*)ref; op+=4; ref+=4; }
|
||||
op=cpy; // correction
|
||||
}
|
||||
@ -270,6 +420,3 @@ int LZ4_decode ( char* source,
|
||||
// end of decoding
|
||||
return (int) (((char*)op)-dest);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
57
lz4.h
57
lz4.h
@ -33,22 +33,12 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
//****************************
|
||||
// Instructions
|
||||
//****************************
|
||||
|
||||
// Uncomment next line to ensure that LZ4_Decode will never write in destination buffer more than "decompressedSize" bytes
|
||||
// If commented, the decoder may write up to 3 bytes more than decompressedSize, so provide extra room in dest buffer for that
|
||||
// Recommendation : keep commented, for improved performance; ensure that destination buffer is at least decompressedSize + 3 Bytes
|
||||
// #define SAFEWRITEBUFFER
|
||||
|
||||
|
||||
//****************************
|
||||
// Simple Functions
|
||||
//****************************
|
||||
|
||||
int LZ4_compress (char* source, char* dest, int isize);
|
||||
int LZ4_decode (char* source, char* dest, int isize);
|
||||
int LZ4_compress (char* source, char* dest, int isize);
|
||||
int LZ4_uncompress (char* source, char* dest, int osize);
|
||||
|
||||
/*
|
||||
LZ4_compress :
|
||||
@ -57,12 +47,12 @@ LZ4_compress :
|
||||
To avoid any problem, size it to handle worst cases situations (input data not compressible)
|
||||
Worst case size is : "inputsize + 0.4%", with "0.4%" being at least 8 bytes.
|
||||
|
||||
LZ4_decode :
|
||||
return : the number of bytes in decoded buffer dest
|
||||
note 1 : isize is the input size, therefore the compressed size
|
||||
note 2 : destination buffer must be already allocated.
|
||||
The program calling the decoder must know in advance the size of decoded stream to properly allocate the destination buffer
|
||||
Note that, in fast mode, the destination buffer size must be at least "decompressedSize + 3 Bytes"
|
||||
LZ4_uncompress :
|
||||
return : the number of bytes read in the source buffer
|
||||
If the source stream is malformed, the function will stop decoding and return a negative result, indicating the byte position of the faulty instruction
|
||||
This version never writes beyond dest + osize, and is therefore protected against malicious data packets
|
||||
note 1 : osize is the output size, therefore the original size
|
||||
note 2 : destination buffer must be already allocated
|
||||
*/
|
||||
|
||||
|
||||
@ -70,6 +60,19 @@ LZ4_decode :
|
||||
// Advanced Functions
|
||||
//****************************
|
||||
|
||||
int LZ4_uncompress_unknownOutputSize (char* source, char* dest, int isize, int maxOutputSize);
|
||||
|
||||
/*
|
||||
LZ4_uncompress :
|
||||
return : the number of bytes decoded in the destination buffer (necessarily <= maxOutputSize)
|
||||
If the source stream is malformed, the function will stop decoding and return a negative result, indicating the byte position of the faulty instruction
|
||||
This version never writes beyond dest + osize, and is therefore protected against malicious data packets
|
||||
note 1 : isize is the input size, therefore the compressed size
|
||||
note 2 : destination buffer must be already allocated
|
||||
note 3 : this version is slower by up to 10%, and is therefore not recommended for general use
|
||||
*/
|
||||
|
||||
|
||||
int LZ4_compressCtx(void** ctx, char* source, char* dest, int isize);
|
||||
|
||||
/*
|
||||
@ -85,6 +88,24 @@ LZ4_compressCtx :
|
||||
*/
|
||||
|
||||
|
||||
//****************************
|
||||
// Deprecated Functions
|
||||
//****************************
|
||||
|
||||
int LZ4_decode (char* source, char* dest, int isize);
|
||||
|
||||
/*
|
||||
LZ4_decode :
|
||||
return : the number of bytes in decoded buffer dest
|
||||
note 1 : isize is the input size, therefore the compressed size
|
||||
note 2 : destination buffer must be already allocated.
|
||||
The program calling the decoder must know in advance the size of decoded stream to properly allocate the destination buffer
|
||||
The destination buffer size must be at least "decompressedSize + 3 Bytes"
|
||||
This version is unprotected against malicious data packets designed to create buffer overflow errors.
|
||||
It is therefore deprecated, but still present in this version for compatibility.
|
||||
*/
|
||||
|
||||
|
||||
#if defined (__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
28
main.c
28
main.c
@ -63,7 +63,7 @@
|
||||
|
||||
#define CHUNKSIZE (8<<20) // 8 MB
|
||||
#define CACHELINE 64
|
||||
#define OUT_CHUNKSIZE (CHUNKSIZE + CHUNKSIZE/256 + CACHELINE)
|
||||
#define OUT_CHUNKSIZE (CHUNKSIZE + (CHUNKSIZE>>8) + CACHELINE)
|
||||
#define ARCHIVE_MAGICNUMBER 0x184C2102
|
||||
#define ARCHIVE_MAGICNUMBER_SIZE 4
|
||||
|
||||
@ -149,6 +149,8 @@ int decode_file(char* input_filename, char* output_filename)
|
||||
FILE* finput = fopen( input_filename, "rb" );
|
||||
FILE* foutput = fopen( output_filename, "wb" );
|
||||
size_t uselessRet;
|
||||
int sinkint;
|
||||
U32 nextSize;
|
||||
|
||||
if (finput==0 ) { printf("Pb opening %s\n", input_filename); return 4; }
|
||||
if (foutput==0) { printf("Pb opening %s\n", output_filename); return 5; }
|
||||
@ -160,25 +162,33 @@ int decode_file(char* input_filename, char* output_filename)
|
||||
// Check Archive Header
|
||||
uselessRet = fread(out_buff, 1, ARCHIVE_MAGICNUMBER_SIZE, finput);
|
||||
if (*(U32*)out_buff != ARCHIVE_MAGICNUMBER) { printf("Wrong file : cannot be decoded\n"); return 6; }
|
||||
uselessRet = fread(in_buff, 1, 4, finput);
|
||||
nextSize = *(U32*)in_buff;
|
||||
|
||||
// Main Loop
|
||||
while (1)
|
||||
{
|
||||
int outSize;
|
||||
// Read Block
|
||||
U32 inSize = (U32) fread(in_buff, 1, 4, finput);
|
||||
if( inSize<=0 ) break;
|
||||
inSize = *(U32*)in_buff;
|
||||
uselessRet = fread( in_buff, 1, inSize, finput);
|
||||
uselessRet = fread(in_buff, 1, nextSize, finput);
|
||||
|
||||
// Check Next Block
|
||||
uselessRet = (U32) fread(&nextSize, 1, 4, finput);
|
||||
if( uselessRet==0 ) break;
|
||||
|
||||
// Decode Block
|
||||
outSize = LZ4_decode(in_buff, out_buff, inSize);
|
||||
filesize += outSize;
|
||||
sinkint = LZ4_uncompress(in_buff, out_buff, CHUNKSIZE);
|
||||
filesize += CHUNKSIZE;
|
||||
|
||||
// Write Block
|
||||
fwrite(out_buff, 1, outSize, foutput);
|
||||
fwrite(out_buff, 1, CHUNKSIZE, foutput);
|
||||
}
|
||||
|
||||
// Last Block
|
||||
uselessRet = fread(in_buff, 1, nextSize, finput);
|
||||
sinkint = LZ4_uncompress_unknownOutputSize(in_buff, out_buff, nextSize, CHUNKSIZE);
|
||||
filesize += sinkint;
|
||||
fwrite(out_buff, 1, sinkint, foutput);
|
||||
|
||||
// Status
|
||||
printf("Successfully decoded %llu bytes \n", (unsigned long long)filesize);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user