decompress_generic: Add a loop fastpath
Copy the main loop, and change checks such that op is always less than oend-SAFE_DISTANCE. Currently these are added for the literal copy length check, and for the match copy length check. Otherwise the first loop is exactly the same as the second. Follow on diffs will optimize the first copy loop based on this new requirement. I also tried instead making a separate inlineable function for the copy loop (similar to existing partialDecode flags, etc), but I think the changes might be significant enough to warrent doubling the code, instead pulling out common functionality to separate functions. This is the basic transformation that will allow several following optimisations.
This commit is contained in:
parent
4da336062e
commit
28356e02ad
160
lib/lz4.c
160
lib/lz4.c
@ -307,6 +307,7 @@ void LZ4_wildCopy(void* dstPtr, const void* srcPtr, void* dstEnd)
|
||||
#define LASTLITERALS 5 /* see ../doc/lz4_Block_format.md#parsing-restrictions */
|
||||
#define MFLIMIT 12 /* see ../doc/lz4_Block_format.md#parsing-restrictions */
|
||||
#define MATCH_SAFEGUARD_DISTANCE ((2*WILDCOPYLENGTH) - MINMATCH) /* ensure it's possible to write 2 x wildcopyLength without overflowing output buffer */
|
||||
#define FASTLOOP_SAFE_DISTANCE 32
|
||||
static const int LZ4_minLength = (MFLIMIT+1);
|
||||
|
||||
#define KB *(1 <<10)
|
||||
@ -1500,10 +1501,17 @@ LZ4_decompress_generic(
|
||||
const int safeDecode = (endOnInput==endOnInputSize);
|
||||
const int checkOffset = ((safeDecode) && (dictSize < (int)(64 KB)));
|
||||
|
||||
|
||||
/* Set up the "end" pointers for the shortcut. */
|
||||
const BYTE* const shortiend = iend - (endOnInput ? 14 : 8) /*maxLL*/ - 2 /*offset*/;
|
||||
const BYTE* const shortoend = oend - (endOnInput ? 14 : 8) /*maxLL*/ - 18 /*maxML*/;
|
||||
|
||||
const BYTE* match;
|
||||
size_t offset;
|
||||
unsigned token;
|
||||
size_t length;
|
||||
|
||||
|
||||
DEBUGLOG(5, "LZ4_decompress_generic (srcSize:%i, dstSize:%i)", srcSize, outputSize);
|
||||
|
||||
/* Special cases */
|
||||
@ -1512,13 +1520,149 @@ LZ4_decompress_generic(
|
||||
if ((!endOnInput) && (unlikely(outputSize==0))) return (*ip==0 ? 1 : -1);
|
||||
if ((endOnInput) && unlikely(srcSize==0)) return -1;
|
||||
|
||||
/* Main Loop : decode sequences */
|
||||
while (1) {
|
||||
const BYTE* match;
|
||||
size_t offset;
|
||||
if ((oend - op) < FASTLOOP_SAFE_DISTANCE)
|
||||
goto safe_decode;
|
||||
|
||||
unsigned const token = *ip++;
|
||||
size_t length = token >> ML_BITS; /* literal length */
|
||||
/* Fast loop : decode sequences as long as output < iend-FASTLOOP_SAFE_DISTANCE */
|
||||
while (1) {
|
||||
/* Main fastloop assertion: We can always wildcopy FASTLOOP_SAFE_DISTANCE */
|
||||
assert(oend - op >= FASTLOOP_SAFE_DISTANCE);
|
||||
|
||||
token = *ip++;
|
||||
length = token >> ML_BITS; /* literal length */
|
||||
|
||||
assert(!endOnInput || ip <= iend); /* ip < iend before the increment */
|
||||
|
||||
/* decode literal length */
|
||||
if (length == RUN_MASK) {
|
||||
variable_length_error error = ok;
|
||||
length += read_variable_length(&ip, iend-RUN_MASK, endOnInput, endOnInput, &error);
|
||||
if (error == initial_error) goto _output_error;
|
||||
if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)(op))) goto _output_error; /* overflow detection */
|
||||
if ((safeDecode) && unlikely((uptrval)(ip)+length<(uptrval)(ip))) goto _output_error; /* overflow detection */
|
||||
}
|
||||
|
||||
/* copy literals */
|
||||
cpy = op+length;
|
||||
LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH);
|
||||
if ( ((endOnInput) && ((cpy>oend-FASTLOOP_SAFE_DISTANCE) || (ip+length>iend-(2+1+LASTLITERALS))) )
|
||||
|| ((!endOnInput) && (cpy>oend-FASTLOOP_SAFE_DISTANCE)) )
|
||||
{
|
||||
goto safe_literal_copy;
|
||||
} else {
|
||||
LZ4_wildCopy(op, ip, cpy); /* may overwrite up to WILDCOPYLENGTH beyond cpy */
|
||||
ip += length; op = cpy;
|
||||
}
|
||||
|
||||
/* get offset */
|
||||
offset = LZ4_readLE16(ip); ip+=2;
|
||||
match = op - offset;
|
||||
|
||||
/* get matchlength */
|
||||
length = token & ML_MASK;
|
||||
|
||||
if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) goto _output_error; /* Error : offset outside buffers */
|
||||
if (!partialDecoding) {
|
||||
assert(oend > op);
|
||||
assert(oend - op >= 4);
|
||||
LZ4_write32(op, 0); /* silence an msan warning when offset==0; costs <1%; */
|
||||
} /* note : when partialDecoding, there is no guarantee that at least 4 bytes remain available in output buffer */
|
||||
|
||||
if (length == ML_MASK) {
|
||||
variable_length_error error = ok;
|
||||
length += read_variable_length(&ip, iend - LASTLITERALS + 1, endOnInput, 0, &error);
|
||||
if (error != ok) goto _output_error;
|
||||
if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)op)) goto _output_error; /* overflow detection */
|
||||
}
|
||||
length += MINMATCH;
|
||||
|
||||
if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) {
|
||||
goto safe_match_copy;
|
||||
}
|
||||
|
||||
/* match starting within external dictionary */
|
||||
if ((dict==usingExtDict) && (match < lowPrefix)) {
|
||||
if (unlikely(op+length > oend-LASTLITERALS)) {
|
||||
if (partialDecoding) length = MIN(length, (size_t)(oend-op));
|
||||
else goto _output_error; /* doesn't respect parsing restriction */
|
||||
}
|
||||
|
||||
if (length <= (size_t)(lowPrefix-match)) {
|
||||
/* match fits entirely within external dictionary : just copy */
|
||||
memmove(op, dictEnd - (lowPrefix-match), length);
|
||||
op += length;
|
||||
} else {
|
||||
/* match stretches into both external dictionary and current block */
|
||||
size_t const copySize = (size_t)(lowPrefix - match);
|
||||
size_t const restSize = length - copySize;
|
||||
memcpy(op, dictEnd - copySize, copySize);
|
||||
op += copySize;
|
||||
if (restSize > (size_t)(op - lowPrefix)) { /* overlap copy */
|
||||
BYTE* const endOfMatch = op + restSize;
|
||||
const BYTE* copyFrom = lowPrefix;
|
||||
while (op < endOfMatch) *op++ = *copyFrom++;
|
||||
} else {
|
||||
memcpy(op, lowPrefix, restSize);
|
||||
op += restSize;
|
||||
} }
|
||||
continue;
|
||||
}
|
||||
|
||||
/* copy match within block */
|
||||
cpy = op + length;
|
||||
|
||||
/* partialDecoding : may not respect endBlock parsing restrictions */
|
||||
assert(op<=oend);
|
||||
if (partialDecoding && (cpy > oend-MATCH_SAFEGUARD_DISTANCE)) {
|
||||
size_t const mlen = MIN(length, (size_t)(oend-op));
|
||||
const BYTE* const matchEnd = match + mlen;
|
||||
BYTE* const copyEnd = op + mlen;
|
||||
if (matchEnd > op) { /* overlap copy */
|
||||
while (op < copyEnd) *op++ = *match++;
|
||||
} else {
|
||||
memcpy(op, match, mlen);
|
||||
}
|
||||
op = copyEnd;
|
||||
if (op==oend) goto decode_done;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (unlikely(offset<8)) {
|
||||
op[0] = match[0];
|
||||
op[1] = match[1];
|
||||
op[2] = match[2];
|
||||
op[3] = match[3];
|
||||
match += inc32table[offset];
|
||||
memcpy(op+4, match, 4);
|
||||
match -= dec64table[offset];
|
||||
} else {
|
||||
memcpy(op, match, 8);
|
||||
match += 8;
|
||||
}
|
||||
op += 8;
|
||||
|
||||
if (unlikely(cpy > oend-MATCH_SAFEGUARD_DISTANCE)) {
|
||||
BYTE* const oCopyLimit = oend - (WILDCOPYLENGTH-1);
|
||||
if (cpy > oend-LASTLITERALS) goto _output_error; /* Error : last LASTLITERALS bytes must be literals (uncompressed) */
|
||||
if (op < oCopyLimit) {
|
||||
LZ4_wildCopy(op, match, oCopyLimit);
|
||||
match += oCopyLimit - op;
|
||||
op = oCopyLimit;
|
||||
}
|
||||
while (op < cpy) *op++ = *match++;
|
||||
} else {
|
||||
memcpy(op, match, 8);
|
||||
if (length > 16) LZ4_wildCopy(op+8, match+8, cpy);
|
||||
}
|
||||
op = cpy; /* wildcopy correction */
|
||||
}
|
||||
|
||||
safe_decode:
|
||||
|
||||
/* Main Loop : decode remaining sequences where output < FASTLOOP_SAFE_DISTANCE */
|
||||
while (1) {
|
||||
token = *ip++;
|
||||
length = token >> ML_BITS; /* literal length */
|
||||
|
||||
assert(!endOnInput || ip <= iend); /* ip < iend before the increment */
|
||||
|
||||
@ -1574,6 +1718,7 @@ LZ4_decompress_generic(
|
||||
|
||||
/* copy literals */
|
||||
cpy = op+length;
|
||||
safe_literal_copy:
|
||||
LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH);
|
||||
if ( ((endOnInput) && ((cpy>oend-MFLIMIT) || (ip+length>iend-(2+1+LASTLITERALS))) )
|
||||
|| ((!endOnInput) && (cpy>oend-WILDCOPYLENGTH)) )
|
||||
@ -1621,6 +1766,8 @@ LZ4_decompress_generic(
|
||||
}
|
||||
length += MINMATCH;
|
||||
|
||||
safe_match_copy:
|
||||
|
||||
/* match starting within external dictionary */
|
||||
if ((dict==usingExtDict) && (match < lowPrefix)) {
|
||||
if (unlikely(op+length > oend-LASTLITERALS)) {
|
||||
@ -1697,6 +1844,7 @@ LZ4_decompress_generic(
|
||||
}
|
||||
op = cpy; /* wildcopy correction */
|
||||
}
|
||||
decode_done:
|
||||
|
||||
/* end of decoding */
|
||||
if (endOnInput)
|
||||
|
Loading…
Reference in New Issue
Block a user