Merge pull request #1756 from mgrice/dev

Improvements in zstd decode performance
This commit is contained in:
Yann Collet 2019-09-18 11:35:50 -07:00 committed by GitHub
commit bfff5b30a4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 38 additions and 22 deletions

View File

@ -209,7 +209,7 @@ typedef enum {
/*! ZSTD_wildcopy() :
* custom version of memcpy(), can overwrite up to WILDCOPY_OVERLENGTH bytes (if length==0) */
MEM_STATIC FORCE_INLINE_ATTR DONT_VECTORIZE
void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e ovtype)
void ZSTD_wildcopy(void* dst, const void* src, BYTE* oend_g, ptrdiff_t length, ZSTD_overlap_e ovtype)
{
ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src;
const BYTE* ip = (const BYTE*)src;
@ -217,25 +217,33 @@ void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e
BYTE* const oend = op + length;
assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff < -8));
if (length < VECLEN || (ovtype == ZSTD_overlap_src_before_dst && diff < VECLEN)) {
do
COPY8(op, ip)
while (op < oend);
}
else {
if ((length & 8) == 0)
COPY8(op, ip);
do {
COPY16(op, ip);
if (oend < oend_g-16) {
/* common case */
do {
COPY16(op, ip);
}
while (op < oend);
}
else {
do {
COPY8(op, ip);
}
while (op < oend);
}
while (op < oend);
}
}
/*! ZSTD_wildcopy_16min() :
* same semantics as ZSTD_wilcopy() except guaranteed to be able to copy 16 bytes at the start */
* same semantics as ZSTD_wildcopy() except guaranteed to be able to copy 16 bytes at the start */
MEM_STATIC FORCE_INLINE_ATTR DONT_VECTORIZE
void ZSTD_wildcopy_16min(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e ovtype)
void ZSTD_wildcopy_16min(void* dst, const void* src, BYTE* oend_g, ptrdiff_t length, ZSTD_overlap_e ovtype)
{
ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src;
const BYTE* ip = (const BYTE*)src;
@ -246,17 +254,25 @@ void ZSTD_wildcopy_16min(void* dst, const void* src, ptrdiff_t length, ZSTD_over
assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff < -8));
if (ovtype == ZSTD_overlap_src_before_dst && diff < VECLEN) {
do
COPY8(op, ip)
do {
COPY8(op, ip);
}
while (op < oend);
}
else {
if ((length & 8) == 0)
COPY8(op, ip);
do {
COPY16(op, ip);
if (oend < oend_g-16) {
/* common case */
do {
COPY16(op, ip);
}
while (op < oend);
}
else {
do {
COPY8(op, ip);
}
while (op < oend);
}
while (op < oend);
}
}

View File

@ -359,7 +359,7 @@ MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const v
/* copy Literals */
assert(seqStorePtr->maxNbLit <= 128 KB);
assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit);
ZSTD_wildcopy(seqStorePtr->lit, literals, (ptrdiff_t)litLength, ZSTD_no_overlap);
ZSTD_wildcopy(seqStorePtr->lit, literals, seqStorePtr->lit + litLength + 8, (ptrdiff_t)litLength, ZSTD_no_overlap);
seqStorePtr->lit += litLength;
/* literal Length */

View File

@ -641,7 +641,7 @@ size_t ZSTD_execSequence(BYTE* op,
/* copy Literals */
if (sequence.litLength > 8)
ZSTD_wildcopy_16min(op, (*litPtr), sequence.litLength, ZSTD_no_overlap); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
ZSTD_wildcopy_16min(op, (*litPtr), oend, sequence.litLength, ZSTD_no_overlap); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
else
ZSTD_copy8(op, *litPtr);
op = oLitEnd;
@ -690,13 +690,13 @@ size_t ZSTD_execSequence(BYTE* op,
if (oMatchEnd > oend-(16-MINMATCH)) {
if (op < oend_w) {
ZSTD_wildcopy(op, match, oend_w - op, ZSTD_overlap_src_before_dst);
ZSTD_wildcopy(op, match, oend, oend_w - op, ZSTD_overlap_src_before_dst);
match += oend_w - op;
op = oend_w;
}
while (op < oMatchEnd) *op++ = *match++;
} else {
ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst); /* works even if matchLength < 8 */
ZSTD_wildcopy(op, match, oend, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst); /* works even if matchLength < 8 */
}
return sequenceLength;
}
@ -722,7 +722,7 @@ size_t ZSTD_execSequenceLong(BYTE* op,
/* copy Literals */
if (sequence.litLength > 8)
ZSTD_wildcopy_16min(op, *litPtr, sequence.litLength, ZSTD_no_overlap); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
ZSTD_wildcopy_16min(op, *litPtr, oend, sequence.litLength, ZSTD_no_overlap); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
else
ZSTD_copy8(op, *litPtr); /* note : op <= oLitEnd <= oend_w == oend - 8 */
@ -772,13 +772,13 @@ size_t ZSTD_execSequenceLong(BYTE* op,
if (oMatchEnd > oend-(16-MINMATCH)) {
if (op < oend_w) {
ZSTD_wildcopy(op, match, oend_w - op, ZSTD_overlap_src_before_dst);
ZSTD_wildcopy(op, match, oend, oend_w - op, ZSTD_overlap_src_before_dst);
match += oend_w - op;
op = oend_w;
}
while (op < oMatchEnd) *op++ = *match++;
} else {
ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst); /* works even if matchLength < 8 */
ZSTD_wildcopy(op, match, oend, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst); /* works even if matchLength < 8 */
}
return sequenceLength;
}