decompress_generic: optimize match copy
Add an LZ4_wildCopy16, that will wildcopy, potentially smashing up to 16 bytes, and use it for match copy. On x64, this avoids many blocked loads due to store forwarding, similar to issue #411.
This commit is contained in:
parent
28356e02ad
commit
5dfa7d422b
29
lib/lz4.c
29
lib/lz4.c
@ -297,6 +297,16 @@ void LZ4_wildCopy(void* dstPtr, const void* srcPtr, void* dstEnd)
|
|||||||
do { memcpy(d,s,8); d+=8; s+=8; } while (d<e);
|
do { memcpy(d,s,8); d+=8; s+=8; } while (d<e);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* customized variant of memcpy, which can overwrite up to 16 bytes beyond dstEnd */
|
||||||
|
LZ4_FORCE_O2_INLINE_GCC_PPC64LE
|
||||||
|
void LZ4_wildCopy16(void* dstPtr, const void* srcPtr, void* dstEnd)
|
||||||
|
{
|
||||||
|
BYTE* d = (BYTE*)dstPtr;
|
||||||
|
const BYTE* s = (const BYTE*)srcPtr;
|
||||||
|
BYTE* const e = (BYTE*)dstEnd;
|
||||||
|
|
||||||
|
do { memcpy(d,s,16); d+=16; s+=16; } while (d<e);
|
||||||
|
}
|
||||||
|
|
||||||
/*-************************************
|
/*-************************************
|
||||||
* Common Constants
|
* Common Constants
|
||||||
@ -1627,7 +1637,8 @@ LZ4_decompress_generic(
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (unlikely(offset<8)) {
|
if (unlikely(offset<16)) {
|
||||||
|
if (offset < 8) {
|
||||||
op[0] = match[0];
|
op[0] = match[0];
|
||||||
op[1] = match[1];
|
op[1] = match[1];
|
||||||
op[2] = match[2];
|
op[2] = match[2];
|
||||||
@ -1635,25 +1646,19 @@ LZ4_decompress_generic(
|
|||||||
match += inc32table[offset];
|
match += inc32table[offset];
|
||||||
memcpy(op+4, match, 4);
|
memcpy(op+4, match, 4);
|
||||||
match -= dec64table[offset];
|
match -= dec64table[offset];
|
||||||
|
op += 8;
|
||||||
} else {
|
} else {
|
||||||
memcpy(op, match, 8);
|
memcpy(op, match, 8);
|
||||||
|
op += 8;
|
||||||
match += 8;
|
match += 8;
|
||||||
}
|
}
|
||||||
op += 8;
|
|
||||||
|
|
||||||
if (unlikely(cpy > oend-MATCH_SAFEGUARD_DISTANCE)) {
|
|
||||||
BYTE* const oCopyLimit = oend - (WILDCOPYLENGTH-1);
|
|
||||||
if (cpy > oend-LASTLITERALS) goto _output_error; /* Error : last LASTLITERALS bytes must be literals (uncompressed) */
|
|
||||||
if (op < oCopyLimit) {
|
|
||||||
LZ4_wildCopy(op, match, oCopyLimit);
|
|
||||||
match += oCopyLimit - op;
|
|
||||||
op = oCopyLimit;
|
|
||||||
}
|
|
||||||
while (op < cpy) *op++ = *match++;
|
|
||||||
} else {
|
|
||||||
memcpy(op, match, 8);
|
memcpy(op, match, 8);
|
||||||
if (length > 16) LZ4_wildCopy(op+8, match+8, cpy);
|
if (length > 16) LZ4_wildCopy(op+8, match+8, cpy);
|
||||||
|
} else {
|
||||||
|
LZ4_wildCopy16(op, match, cpy);
|
||||||
}
|
}
|
||||||
|
|
||||||
op = cpy; /* wildcopy correction */
|
op = cpy; /* wildcopy correction */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user