Merge pull request #1756 from mgrice/dev
Improvements in zstd decode performance
This commit is contained in:
commit
bfff5b30a4
@ -209,7 +209,7 @@ typedef enum {
|
||||
/*! ZSTD_wildcopy() :
|
||||
* custom version of memcpy(), can overwrite up to WILDCOPY_OVERLENGTH bytes (if length==0) */
|
||||
MEM_STATIC FORCE_INLINE_ATTR DONT_VECTORIZE
|
||||
void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e ovtype)
|
||||
void ZSTD_wildcopy(void* dst, const void* src, BYTE* oend_g, ptrdiff_t length, ZSTD_overlap_e ovtype)
|
||||
{
|
||||
ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src;
|
||||
const BYTE* ip = (const BYTE*)src;
|
||||
@ -217,25 +217,33 @@ void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e
|
||||
BYTE* const oend = op + length;
|
||||
|
||||
assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff < -8));
|
||||
|
||||
if (length < VECLEN || (ovtype == ZSTD_overlap_src_before_dst && diff < VECLEN)) {
|
||||
do
|
||||
COPY8(op, ip)
|
||||
while (op < oend);
|
||||
}
|
||||
else {
|
||||
if ((length & 8) == 0)
|
||||
COPY8(op, ip);
|
||||
do {
|
||||
COPY16(op, ip);
|
||||
if (oend < oend_g-16) {
|
||||
/* common case */
|
||||
do {
|
||||
COPY16(op, ip);
|
||||
}
|
||||
while (op < oend);
|
||||
}
|
||||
else {
|
||||
do {
|
||||
COPY8(op, ip);
|
||||
}
|
||||
while (op < oend);
|
||||
}
|
||||
while (op < oend);
|
||||
}
|
||||
}
|
||||
|
||||
/*! ZSTD_wildcopy_16min() :
|
||||
* same semantics as ZSTD_wilcopy() except guaranteed to be able to copy 16 bytes at the start */
|
||||
* same semantics as ZSTD_wildcopy() except guaranteed to be able to copy 16 bytes at the start */
|
||||
MEM_STATIC FORCE_INLINE_ATTR DONT_VECTORIZE
|
||||
void ZSTD_wildcopy_16min(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e ovtype)
|
||||
void ZSTD_wildcopy_16min(void* dst, const void* src, BYTE* oend_g, ptrdiff_t length, ZSTD_overlap_e ovtype)
|
||||
{
|
||||
ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src;
|
||||
const BYTE* ip = (const BYTE*)src;
|
||||
@ -246,17 +254,25 @@ void ZSTD_wildcopy_16min(void* dst, const void* src, ptrdiff_t length, ZSTD_over
|
||||
assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff < -8));
|
||||
|
||||
if (ovtype == ZSTD_overlap_src_before_dst && diff < VECLEN) {
|
||||
do
|
||||
COPY8(op, ip)
|
||||
do {
|
||||
COPY8(op, ip);
|
||||
}
|
||||
while (op < oend);
|
||||
}
|
||||
else {
|
||||
if ((length & 8) == 0)
|
||||
COPY8(op, ip);
|
||||
do {
|
||||
COPY16(op, ip);
|
||||
if (oend < oend_g-16) {
|
||||
/* common case */
|
||||
do {
|
||||
COPY16(op, ip);
|
||||
}
|
||||
while (op < oend);
|
||||
}
|
||||
else {
|
||||
do {
|
||||
COPY8(op, ip);
|
||||
}
|
||||
while (op < oend);
|
||||
}
|
||||
while (op < oend);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -359,7 +359,7 @@ MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const v
|
||||
/* copy Literals */
|
||||
assert(seqStorePtr->maxNbLit <= 128 KB);
|
||||
assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit);
|
||||
ZSTD_wildcopy(seqStorePtr->lit, literals, (ptrdiff_t)litLength, ZSTD_no_overlap);
|
||||
ZSTD_wildcopy(seqStorePtr->lit, literals, seqStorePtr->lit + litLength + 8, (ptrdiff_t)litLength, ZSTD_no_overlap);
|
||||
seqStorePtr->lit += litLength;
|
||||
|
||||
/* literal Length */
|
||||
|
@ -641,7 +641,7 @@ size_t ZSTD_execSequence(BYTE* op,
|
||||
|
||||
/* copy Literals */
|
||||
if (sequence.litLength > 8)
|
||||
ZSTD_wildcopy_16min(op, (*litPtr), sequence.litLength, ZSTD_no_overlap); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
|
||||
ZSTD_wildcopy_16min(op, (*litPtr), oend, sequence.litLength, ZSTD_no_overlap); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
|
||||
else
|
||||
ZSTD_copy8(op, *litPtr);
|
||||
op = oLitEnd;
|
||||
@ -690,13 +690,13 @@ size_t ZSTD_execSequence(BYTE* op,
|
||||
|
||||
if (oMatchEnd > oend-(16-MINMATCH)) {
|
||||
if (op < oend_w) {
|
||||
ZSTD_wildcopy(op, match, oend_w - op, ZSTD_overlap_src_before_dst);
|
||||
ZSTD_wildcopy(op, match, oend, oend_w - op, ZSTD_overlap_src_before_dst);
|
||||
match += oend_w - op;
|
||||
op = oend_w;
|
||||
}
|
||||
while (op < oMatchEnd) *op++ = *match++;
|
||||
} else {
|
||||
ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst); /* works even if matchLength < 8 */
|
||||
ZSTD_wildcopy(op, match, oend, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst); /* works even if matchLength < 8 */
|
||||
}
|
||||
return sequenceLength;
|
||||
}
|
||||
@ -722,7 +722,7 @@ size_t ZSTD_execSequenceLong(BYTE* op,
|
||||
|
||||
/* copy Literals */
|
||||
if (sequence.litLength > 8)
|
||||
ZSTD_wildcopy_16min(op, *litPtr, sequence.litLength, ZSTD_no_overlap); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
|
||||
ZSTD_wildcopy_16min(op, *litPtr, oend, sequence.litLength, ZSTD_no_overlap); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
|
||||
else
|
||||
ZSTD_copy8(op, *litPtr); /* note : op <= oLitEnd <= oend_w == oend - 8 */
|
||||
|
||||
@ -772,13 +772,13 @@ size_t ZSTD_execSequenceLong(BYTE* op,
|
||||
|
||||
if (oMatchEnd > oend-(16-MINMATCH)) {
|
||||
if (op < oend_w) {
|
||||
ZSTD_wildcopy(op, match, oend_w - op, ZSTD_overlap_src_before_dst);
|
||||
ZSTD_wildcopy(op, match, oend, oend_w - op, ZSTD_overlap_src_before_dst);
|
||||
match += oend_w - op;
|
||||
op = oend_w;
|
||||
}
|
||||
while (op < oMatchEnd) *op++ = *match++;
|
||||
} else {
|
||||
ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst); /* works even if matchLength < 8 */
|
||||
ZSTD_wildcopy(op, match, oend, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst); /* works even if matchLength < 8 */
|
||||
}
|
||||
return sequenceLength;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user