small decompression speed improvement

This commit is contained in:
Yann Collet 2015-10-19 02:14:43 +01:00
parent 2ec0cf21f8
commit 107c5755f4

View File

@ -84,7 +84,7 @@
#endif #endif
/******************************************************** /* *******************************************************
* Constants * Constants
*********************************************************/ *********************************************************/
#define HASH_LOG (ZSTD_MEMORY_USAGE - 2) #define HASH_LOG (ZSTD_MEMORY_USAGE - 2)
@ -129,9 +129,9 @@ static const size_t ZSTD_blockHeaderSize = 3;
static const size_t ZSTD_frameHeaderSize = 4; static const size_t ZSTD_frameHeaderSize = 4;
/******************************************************** /* *******************************************************
* Memory operations * Memory operations
*********************************************************/ **********************************************************/
static size_t ZSTD_read_ARCH(const void* p) { size_t r; memcpy(&r, p, sizeof(r)); return r; } static size_t ZSTD_read_ARCH(const void* p) { size_t r; memcpy(&r, p, sizeof(r)); return r; }
static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); } static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
@ -140,18 +140,19 @@ static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
#define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; } #define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
/*! ZSTD_wildcopy : custom version of memcpy(), can copy up to 7-8 bytes too many */
static void ZSTD_wildcopy(void* dst, const void* src, size_t length) static void ZSTD_wildcopy(void* dst, const void* src, size_t length)
{ {
const BYTE* ip = (const BYTE*)src; const BYTE* ip = (const BYTE*)src;
BYTE* op = (BYTE*)dst; BYTE* op = (BYTE*)dst;
BYTE* const oend = op + length; BYTE* const oend = op + length;
while (op < oend) COPY8(op, ip); do COPY8(op, ip) while (op < oend);
} }
/************************************** /* **************************************
* Local structures * Local structures
***************************************/ ****************************************/
typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t; typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t;
typedef struct typedef struct
@ -228,9 +229,9 @@ size_t ZSTD_freeCCtx(ZSTD_Cctx* ctx)
} }
/************************************** /* *************************************
* Error Management * Error Management
**************************************/ ***************************************/
/*! ZSTD_isError /*! ZSTD_isError
* tells if a return value is an error code */ * tells if a return value is an error code */
unsigned ZSTD_isError(size_t code) { return ERR_isError(code); } unsigned ZSTD_isError(size_t code) { return ERR_isError(code); }
@ -240,9 +241,9 @@ unsigned ZSTD_isError(size_t code) { return ERR_isError(code); }
const char* ZSTD_getErrorName(size_t code) { return ERR_getErrorName(code); } const char* ZSTD_getErrorName(size_t code) { return ERR_getErrorName(code); }
/************************************** /* *************************************
* Tool functions * Tool functions
**************************************/ ***************************************/
unsigned ZSTD_versionNumber (void) { return ZSTD_VERSION_NUMBER; } unsigned ZSTD_versionNumber (void) { return ZSTD_VERSION_NUMBER; }
static unsigned ZSTD_highbit(U32 val) static unsigned ZSTD_highbit(U32 val)
@ -354,7 +355,7 @@ static unsigned ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInL
} }
/******************************************************** /* *******************************************************
* Compression * Compression
*********************************************************/ *********************************************************/
size_t ZSTD_compressBound(size_t srcSize) /* maximum compressed size */ size_t ZSTD_compressBound(size_t srcSize) /* maximum compressed size */
@ -998,9 +999,9 @@ size_t ZSTD_compress(void* dst, size_t maxDstSize, const void* src, size_t srcSi
} }
/************************************************************** /* *************************************************************
* Decompression code * Decompression section
**************************************************************/ ***************************************************************/
struct ZSTD_Dctx_s struct ZSTD_Dctx_s
{ {
U32 LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)]; U32 LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)];
@ -1015,7 +1016,7 @@ struct ZSTD_Dctx_s
size_t litBufSize; size_t litBufSize;
size_t litSize; size_t litSize;
BYTE litBuffer[BLOCKSIZE]; BYTE litBuffer[BLOCKSIZE];
}; /* typedef'd to ZSTD_Dctx */ }; /* typedef'd to ZSTD_Dctx within "zstd_static.h" */
size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr) size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr)
{ {
@ -1256,16 +1257,14 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
seqState->prevOffset = seq->offset; seqState->prevOffset = seq->offset;
if (litLength == MaxLL) if (litLength == MaxLL)
{ {
U32 add = dumps<de ? *dumps++ : 0; U32 add = *dumps++;
if (add < 255) litLength += add; if (add < 255) litLength += add;
else else
{ {
if (dumps<=(de-3)) litLength = MEM_readLE32(dumps) & 0xFFFFFF; /* no pb : dumps is always followed by seq tables > 1 byte */
{ dumps += 3;
litLength = MEM_readLE32(dumps) & 0xFFFFFF; /* no pb : dumps is always followed by seq tables > 1 byte */
dumps += 3;
}
} }
if (dumps >= de) dumps = de-1; /* late correction, to avoid read overflow (data is now corrupted anyway) */
} }
/* Offset */ /* Offset */
@ -1277,23 +1276,21 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
if (offsetCode==0) nbBits = 0; /* cmove */ if (offsetCode==0) nbBits = 0; /* cmove */
offset = ((size_t)1 << (nbBits & ((sizeof(offset)*8)-1))) + BIT_readBits(&(seqState->DStream), nbBits); offset = ((size_t)1 << (nbBits & ((sizeof(offset)*8)-1))) + BIT_readBits(&(seqState->DStream), nbBits);
if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream)); if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream));
if (offsetCode==0) offset = prevOffset; if (offsetCode==0) offset = prevOffset; /* cmove */
} }
/* MatchLength */ /* MatchLength */
matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream)); matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
if (matchLength == MaxML) if (matchLength == MaxML)
{ {
U32 add = dumps<de ? *dumps++ : 0; U32 add = *dumps++;
if (add < 255) matchLength += add; if (add < 255) matchLength += add;
else else
{ {
if (dumps<=(de-3)) matchLength = MEM_readLE32(dumps) & 0xFFFFFF; /* no pb : dumps is always followed by seq tables > 1 byte */
{ dumps += 3;
matchLength = MEM_readLE32(dumps) & 0xFFFFFF; /* no pb : dumps is always followed by seq tables > 1 byte */
dumps += 3;
}
} }
if (dumps >= de) dumps = de-1; /* late correction, to avoid read overflow (data is now corrupted anyway) */
} }
matchLength += MINMATCH; matchLength += MINMATCH;
@ -1313,33 +1310,32 @@ static size_t ZSTD_execSequence(BYTE* op,
static const int dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4}; /* added */ static const int dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4}; /* added */
static const int dec64table[] = {8, 8, 8, 7, 8, 9,10,11}; /* substracted */ static const int dec64table[] = {8, 8, 8, 7, 8, 9,10,11}; /* substracted */
const BYTE* const ostart = op; const BYTE* const ostart = op;
const size_t litLength = sequence.litLength; const size_t litLength = sequence.litLength; /* for some reason, doing the same with matchLength decreases speed ... */
BYTE* const endMatch = op + litLength + sequence.matchLength; /* risk : address space overflow (32-bits) */ BYTE* const oMatchEnd = op + litLength + sequence.matchLength; /* risk : address space overflow (32-bits) */
const BYTE* const litEnd = *litPtr + litLength; const BYTE* const litEnd = *litPtr + litLength;
/* check */ /* check */
if (endMatch > oend) return ERROR(dstSize_tooSmall); /* overwrite beyond dst buffer */ if (oMatchEnd > oend) return ERROR(dstSize_tooSmall); /* overwrite beyond dst buffer */
if (litEnd > litLimit) return ERROR(corruption_detected); if (litEnd > litLimit) return ERROR(corruption_detected);
if (sequence.matchLength > (size_t)(*litPtr-op)) return ERROR(dstSize_tooSmall); /* overwrite literal segment */
/* copy Literals */ /* copy Literals */
if (((size_t)(*litPtr - op) < 8) || ((size_t)(oend-litEnd) < 8) || (op+litLength > oend-8)) if (op+litLength > oend-8) /* ZSTD_wildcopy() risks overwrite */
memmove(op, *litPtr, litLength); /* overwrite risk */ memmove(op, *litPtr, litLength);
else else
ZSTD_wildcopy(op, *litPtr, litLength); ZSTD_wildcopy(op, *litPtr, litLength);
op += litLength; op += litLength;
*litPtr = litEnd; /* update for next sequence */ *litPtr = litEnd; /* update for next sequence */
/* check : last match must be at a minimum distance of 8 from end of dest buffer */ /* check : last match must be at a minimum distance of 8 from end of dst buffer */
if (oend-op < 8) return ERROR(dstSize_tooSmall); if (oend-op < 8) return ERROR(dstSize_tooSmall);
/* copy Match */ /* copy Match */
{ {
const BYTE* match = op - sequence.offset; /* possible underflow at op - offset ? */ const BYTE* match = op - sequence.offset;
/* check */ /* check */
if (match < base) return ERROR(corruption_detected); if (match < base) return ERROR(corruption_detected);
if (sequence.offset > (size_t)base) return ERROR(corruption_detected); if (match > op) return ERROR(corruption_detected); /* address space overflow test */
/* close range match, overlap */ /* close range match, overlap */
if (sequence.offset < 8) if (sequence.offset < 8)
@ -1355,7 +1351,7 @@ static size_t ZSTD_execSequence(BYTE* op,
} else { ZSTD_copy8(op, match); } } else { ZSTD_copy8(op, match); }
op += 8; match += 8; op += 8; match += 8;
if (endMatch > oend-12) if (oMatchEnd > oend-12)
{ {
if (op < oend-8) if (op < oend-8)
{ {
@ -1363,15 +1359,18 @@ static size_t ZSTD_execSequence(BYTE* op,
match += (oend-8) - op; match += (oend-8) - op;
op = oend-8; op = oend-8;
} }
while (op<endMatch) *op++ = *match++; while (op < oMatchEnd) *op++ = *match++;
} }
else else
{
ZSTD_wildcopy(op, match, sequence.matchLength-8); /* works even if matchLength < 8 */ ZSTD_wildcopy(op, match, sequence.matchLength-8); /* works even if matchLength < 8 */
}
} }
return endMatch-ostart; return oMatchEnd - ostart;
} }
static size_t ZSTD_decompressSequences( static size_t ZSTD_decompressSequences(
void* ctx, void* ctx,
void* dst, size_t maxDstSize, void* dst, size_t maxDstSize,
@ -1480,7 +1479,7 @@ static size_t ZSTD_decompressDCtx(void* ctx, void* dst, size_t maxDstSize, const
magicNumber = MEM_readLE32(src); magicNumber = MEM_readLE32(src);
#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1) #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1)
if (magicNumber == ZSTDv01_magicNumberLE) return ZSTDv01_decompressDCtx(ctx, dst, maxDstSize, src, srcSize); if (magicNumber == ZSTDv01_magicNumberLE) return ZSTDv01_decompressDCtx(ctx, dst, maxDstSize, src, srcSize);
#endif // defined #endif /* ZSTD_LEGACY_SUPPORT */
if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown); if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown);
ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize; ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize;