Merge pull request #1973 from terrelln/mobile-perf
Fix performance regression on aarch64 with clang
This commit is contained in:
commit
e6d3a61db8
@ -817,7 +817,7 @@ HUF_decompress4X2_usingDTable_internal_body(
|
|||||||
|
|
||||||
/* 16-32 symbols per loop (4-8 symbols per stream) */
|
/* 16-32 symbols per loop (4-8 symbols per stream) */
|
||||||
for ( ; (endSignal) & (op4 < olimit); ) {
|
for ( ; (endSignal) & (op4 < olimit); ) {
|
||||||
#ifdef __clang__
|
#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
|
||||||
HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
|
HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
|
||||||
HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
|
HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
|
||||||
HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
|
HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
|
||||||
@ -855,10 +855,11 @@ HUF_decompress4X2_usingDTable_internal_body(
|
|||||||
HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
|
HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
|
||||||
HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
|
HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
|
||||||
HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
|
HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
|
||||||
endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
|
endSignal = LIKELY(
|
||||||
endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
|
(BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished)
|
||||||
endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
|
& (BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished)
|
||||||
endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
|
& (BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished)
|
||||||
|
& (BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished));
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -580,7 +580,7 @@ typedef struct {
|
|||||||
* Precondition: *ip <= *op
|
* Precondition: *ip <= *op
|
||||||
* Postcondition: *op - *op >= 8
|
* Postcondition: *op - *op >= 8
|
||||||
*/
|
*/
|
||||||
static void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) {
|
HINT_INLINE void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) {
|
||||||
assert(*ip <= *op);
|
assert(*ip <= *op);
|
||||||
if (offset < 8) {
|
if (offset < 8) {
|
||||||
/* close range match, overlap */
|
/* close range match, overlap */
|
||||||
|
Loading…
Reference in New Issue
Block a user