Merge pull request #1973 from terrelln/mobile-perf

Fix performance regression on aarch64 with clang
This commit is contained in:
Nick Terrell 2020-01-23 22:02:32 -08:00 committed by GitHub
commit e6d3a61db8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 7 additions and 6 deletions

View File

@ -817,7 +817,7 @@ HUF_decompress4X2_usingDTable_internal_body(
/* 16-32 symbols per loop (4-8 symbols per stream) */ /* 16-32 symbols per loop (4-8 symbols per stream) */
for ( ; (endSignal) & (op4 < olimit); ) { for ( ; (endSignal) & (op4 < olimit); ) {
#ifdef __clang__ #if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
HUF_DECODE_SYMBOLX2_2(op1, &bitD1); HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
HUF_DECODE_SYMBOLX2_1(op1, &bitD1); HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
HUF_DECODE_SYMBOLX2_2(op1, &bitD1); HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
@ -855,10 +855,11 @@ HUF_decompress4X2_usingDTable_internal_body(
HUF_DECODE_SYMBOLX2_0(op2, &bitD2); HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
HUF_DECODE_SYMBOLX2_0(op3, &bitD3); HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
HUF_DECODE_SYMBOLX2_0(op4, &bitD4); HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished; endSignal = LIKELY(
endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished; (BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished)
endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished; & (BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished)
endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished; & (BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished)
& (BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished));
#endif #endif
} }

View File

@ -580,7 +580,7 @@ typedef struct {
* Precondition: *ip <= *op * Precondition: *ip <= *op
* Postcondition: *op - *op >= 8 * Postcondition: *op - *op >= 8
*/ */
static void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) { HINT_INLINE void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) {
assert(*ip <= *op); assert(*ip <= *op);
if (offset < 8) { if (offset < 8) {
/* close range match, overlap */ /* close range match, overlap */