[x64][ia32] Move more AVX_OP into SharedTurboAssembler
Bug: v8:11589 Change-Id: I30dbdbc6266d703ce697352780da1d543afbb457 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2826711 Commit-Queue: Zhi An Ng <zhin@chromium.org> Reviewed-by: Bill Budge <bbudge@chromium.org> Cr-Commit-Position: refs/heads/master@{#73965}
This commit is contained in:
parent
d338a86b67
commit
add293e80e
@ -1852,34 +1852,6 @@ void TurboAssembler::Pshufb(XMMRegister dst, XMMRegister src, Operand mask) {
|
||||
pshufb(dst, mask);
|
||||
}
|
||||
|
||||
void TurboAssembler::Pblendw(XMMRegister dst, Operand src, uint8_t imm8) {
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
CpuFeatureScope scope(this, AVX);
|
||||
vpblendw(dst, dst, src, imm8);
|
||||
return;
|
||||
}
|
||||
if (CpuFeatures::IsSupported(SSE4_1)) {
|
||||
CpuFeatureScope sse_scope(this, SSE4_1);
|
||||
pblendw(dst, src, imm8);
|
||||
return;
|
||||
}
|
||||
FATAL("no AVX or SSE4.1 support");
|
||||
}
|
||||
|
||||
void TurboAssembler::Palignr(XMMRegister dst, Operand src, uint8_t imm8) {
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
CpuFeatureScope scope(this, AVX);
|
||||
vpalignr(dst, dst, src, imm8);
|
||||
return;
|
||||
}
|
||||
if (CpuFeatures::IsSupported(SSSE3)) {
|
||||
CpuFeatureScope sse_scope(this, SSSE3);
|
||||
palignr(dst, src, imm8);
|
||||
return;
|
||||
}
|
||||
FATAL("no AVX or SSE3 support");
|
||||
}
|
||||
|
||||
void TurboAssembler::Pextrd(Register dst, XMMRegister src, uint8_t imm8) {
|
||||
if (imm8 == 0) {
|
||||
Movd(dst, src);
|
||||
|
@ -305,96 +305,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public SharedTurboAssembler {
|
||||
AVX_OP(Pcmpeqb, pcmpeqb)
|
||||
AVX_OP(Pcmpeqw, pcmpeqw)
|
||||
AVX_OP(Pcmpeqd, pcmpeqd)
|
||||
|
||||
// Same as AVX_OP3_WITH_TYPE but supports a CpuFeatureScope
|
||||
#define AVX_OP2_WITH_TYPE_SCOPE(macro_name, name, dst_type, src_type, \
|
||||
sse_scope) \
|
||||
void macro_name(dst_type dst, src_type src) { \
|
||||
if (CpuFeatures::IsSupported(AVX)) { \
|
||||
CpuFeatureScope scope(this, AVX); \
|
||||
v##name(dst, dst, src); \
|
||||
} else if (CpuFeatures::IsSupported(sse_scope)) { \
|
||||
CpuFeatureScope scope(this, sse_scope); \
|
||||
name(dst, src); \
|
||||
} \
|
||||
}
|
||||
#define AVX_OP2_XO(macro_name, name, sse_scope) \
|
||||
AVX_OP2_WITH_TYPE_SCOPE(macro_name, name, XMMRegister, XMMRegister, \
|
||||
sse_scope) \
|
||||
AVX_OP2_WITH_TYPE_SCOPE(macro_name, name, XMMRegister, Operand, sse_scope)
|
||||
AVX_OP2_XO(Psignb, psignb, SSSE3)
|
||||
AVX_OP2_XO(Psignw, psignw, SSSE3)
|
||||
AVX_OP2_XO(Psignd, psignd, SSSE3)
|
||||
AVX_OP2_XO(Pcmpeqq, pcmpeqq, SSE4_1)
|
||||
#undef AVX_OP2_XO
|
||||
#undef AVX_OP2_WITH_TYPE_SCOPE
|
||||
|
||||
// Only use this macro when dst and src1 is the same in SSE case.
|
||||
#define AVX_PACKED_OP3_WITH_TYPE(macro_name, name, dst_type, src_type) \
|
||||
void macro_name(dst_type dst, dst_type src1, src_type src2) { \
|
||||
if (CpuFeatures::IsSupported(AVX)) { \
|
||||
CpuFeatureScope scope(this, AVX); \
|
||||
v##name(dst, src1, src2); \
|
||||
} else { \
|
||||
DCHECK_EQ(dst, src1); \
|
||||
name(dst, src2); \
|
||||
} \
|
||||
}
|
||||
#define AVX_PACKED_OP3(macro_name, name) \
|
||||
AVX_PACKED_OP3_WITH_TYPE(macro_name, name, XMMRegister, XMMRegister) \
|
||||
AVX_PACKED_OP3_WITH_TYPE(macro_name, name, XMMRegister, Operand)
|
||||
|
||||
AVX_PACKED_OP3(Unpcklps, unpcklps)
|
||||
AVX_PACKED_OP3(Andnps, andnps)
|
||||
AVX_PACKED_OP3(Addps, addps)
|
||||
AVX_PACKED_OP3(Addpd, addpd)
|
||||
AVX_PACKED_OP3(Subps, subps)
|
||||
AVX_PACKED_OP3(Subpd, subpd)
|
||||
AVX_PACKED_OP3(Mulps, mulps)
|
||||
AVX_PACKED_OP3(Mulpd, mulpd)
|
||||
AVX_PACKED_OP3(Divps, divps)
|
||||
AVX_PACKED_OP3(Divpd, divpd)
|
||||
AVX_PACKED_OP3(Cmpeqpd, cmpeqpd)
|
||||
AVX_PACKED_OP3(Cmpneqpd, cmpneqpd)
|
||||
AVX_PACKED_OP3(Cmpltpd, cmpltpd)
|
||||
AVX_PACKED_OP3(Cmpleps, cmpleps)
|
||||
AVX_PACKED_OP3(Cmplepd, cmplepd)
|
||||
AVX_PACKED_OP3(Minps, minps)
|
||||
AVX_PACKED_OP3(Minpd, minpd)
|
||||
AVX_PACKED_OP3(Maxps, maxps)
|
||||
AVX_PACKED_OP3(Maxpd, maxpd)
|
||||
AVX_PACKED_OP3(Cmpunordps, cmpunordps)
|
||||
AVX_PACKED_OP3(Cmpunordpd, cmpunordpd)
|
||||
AVX_PACKED_OP3(Psllw, psllw)
|
||||
AVX_PACKED_OP3(Pslld, pslld)
|
||||
AVX_PACKED_OP3(Psllq, psllq)
|
||||
AVX_PACKED_OP3(Psrlw, psrlw)
|
||||
AVX_PACKED_OP3(Psrld, psrld)
|
||||
AVX_PACKED_OP3(Psrad, psrad)
|
||||
AVX_PACKED_OP3(Paddd, paddd)
|
||||
AVX_PACKED_OP3(Paddq, paddq)
|
||||
AVX_PACKED_OP3(Pmuludq, pmuludq)
|
||||
AVX_PACKED_OP3(Pavgb, pavgb)
|
||||
AVX_PACKED_OP3(Pavgw, pavgw)
|
||||
AVX_PACKED_OP3(Pminub, pminub)
|
||||
AVX_PACKED_OP3(Pmaxub, pmaxub)
|
||||
AVX_PACKED_OP3(Paddusb, paddusb)
|
||||
AVX_PACKED_OP3(Psubusb, psubusb)
|
||||
AVX_PACKED_OP3(Pcmpgtb, pcmpgtb)
|
||||
AVX_PACKED_OP3(Paddb, paddb)
|
||||
AVX_PACKED_OP3(Paddsb, paddsb)
|
||||
AVX_PACKED_OP3(Psubsb, psubsb)
|
||||
|
||||
#undef AVX_PACKED_OP3
|
||||
|
||||
AVX_PACKED_OP3_WITH_TYPE(Psllw, psllw, XMMRegister, uint8_t)
|
||||
AVX_PACKED_OP3_WITH_TYPE(Pslld, pslld, XMMRegister, uint8_t)
|
||||
AVX_PACKED_OP3_WITH_TYPE(Psllq, psllq, XMMRegister, uint8_t)
|
||||
AVX_PACKED_OP3_WITH_TYPE(Psrlw, psrlw, XMMRegister, uint8_t)
|
||||
AVX_PACKED_OP3_WITH_TYPE(Psrld, psrld, XMMRegister, uint8_t)
|
||||
AVX_PACKED_OP3_WITH_TYPE(Psrad, psrad, XMMRegister, uint8_t)
|
||||
|
||||
#undef AVX_PACKED_OP3_WITH_TYPE
|
||||
AVX_OP_SSE4_1(Pcmpeqq, pcmpeqq)
|
||||
|
||||
// Macro for instructions that have 2 operands for AVX version and 1 operand for
|
||||
// SSE version. Will move src1 to dst if dst != src1.
|
||||
@ -416,35 +327,6 @@ class V8_EXPORT_PRIVATE TurboAssembler : public SharedTurboAssembler {
|
||||
AVX_OP3_WITH_MOVE(Pmaddwd, pmaddwd, XMMRegister, Operand)
|
||||
#undef AVX_OP3_WITH_MOVE
|
||||
|
||||
#define AVX_OP3_WITH_TYPE_SCOPE(macro_name, name, dst_type, src_type, \
|
||||
sse_scope) \
|
||||
void macro_name(dst_type dst, dst_type src1, src_type src2) { \
|
||||
if (CpuFeatures::IsSupported(AVX)) { \
|
||||
CpuFeatureScope scope(this, AVX); \
|
||||
v##name(dst, src1, src2); \
|
||||
return; \
|
||||
} \
|
||||
if (CpuFeatures::IsSupported(sse_scope)) { \
|
||||
CpuFeatureScope scope(this, sse_scope); \
|
||||
DCHECK_EQ(dst, src1); \
|
||||
name(dst, src2); \
|
||||
return; \
|
||||
} \
|
||||
UNREACHABLE(); \
|
||||
}
|
||||
#define AVX_OP3_XO_SSE4(macro_name, name) \
|
||||
AVX_OP3_WITH_TYPE_SCOPE(macro_name, name, XMMRegister, XMMRegister, SSE4_1) \
|
||||
AVX_OP3_WITH_TYPE_SCOPE(macro_name, name, XMMRegister, Operand, SSE4_1)
|
||||
|
||||
AVX_OP3_WITH_TYPE_SCOPE(Haddps, haddps, XMMRegister, Operand, SSE3)
|
||||
AVX_OP3_XO_SSE4(Pmaxsd, pmaxsd)
|
||||
AVX_OP3_XO_SSE4(Pminsb, pminsb)
|
||||
AVX_OP3_XO_SSE4(Pmaxsb, pmaxsb)
|
||||
AVX_OP3_XO_SSE4(Pcmpeqq, pcmpeqq)
|
||||
|
||||
#undef AVX_OP3_XO_SSE4
|
||||
#undef AVX_OP3_WITH_TYPE_SCOPE
|
||||
|
||||
// TODO(zhin): Remove after moving more definitions into SharedTurboAssembler.
|
||||
void Movlps(Operand dst, XMMRegister src) {
|
||||
SharedTurboAssembler::Movlps(dst, src);
|
||||
@ -461,16 +343,6 @@ class V8_EXPORT_PRIVATE TurboAssembler : public SharedTurboAssembler {
|
||||
}
|
||||
void Pshufb(XMMRegister dst, XMMRegister src, Operand mask);
|
||||
|
||||
void Pblendw(XMMRegister dst, XMMRegister src, uint8_t imm8) {
|
||||
Pblendw(dst, Operand(src), imm8);
|
||||
}
|
||||
void Pblendw(XMMRegister dst, Operand src, uint8_t imm8);
|
||||
|
||||
void Palignr(XMMRegister dst, XMMRegister src, uint8_t imm8) {
|
||||
Palignr(dst, Operand(src), imm8);
|
||||
}
|
||||
void Palignr(XMMRegister dst, Operand src, uint8_t imm8);
|
||||
|
||||
void Pextrd(Register dst, XMMRegister src, uint8_t imm8);
|
||||
void Pinsrb(XMMRegister dst, Register src, int8_t imm8) {
|
||||
Pinsrb(dst, Operand(src), imm8);
|
||||
|
@ -39,7 +39,7 @@ class V8_EXPORT_PRIVATE SharedTurboAssembler : public TurboAssemblerBase {
|
||||
}
|
||||
}
|
||||
|
||||
// Helper struct to implement functions that checks for AVX support and
|
||||
// Helper struct to implement functions that check for AVX support and
|
||||
// dispatch to the appropriate AVX/SSE instruction.
|
||||
template <typename Dst, typename Arg, typename... Args>
|
||||
struct AvxHelper {
|
||||
@ -145,14 +145,30 @@ class V8_EXPORT_PRIVATE SharedTurboAssembler : public TurboAssemblerBase {
|
||||
}
|
||||
|
||||
// Keep this list sorted by required extension, then instruction name.
|
||||
AVX_OP(Addpd, addpd)
|
||||
AVX_OP(Addps, addps)
|
||||
AVX_OP(Andnpd, andnpd)
|
||||
AVX_OP(Andnps, andnps)
|
||||
AVX_OP(Andpd, andpd)
|
||||
AVX_OP(Andps, andps)
|
||||
AVX_OP(Cmpeqpd, cmpeqpd)
|
||||
AVX_OP(Cmplepd, cmplepd)
|
||||
AVX_OP(Cmpleps, cmpleps)
|
||||
AVX_OP(Cmpltpd, cmpltpd)
|
||||
AVX_OP(Cmpneqpd, cmpneqpd)
|
||||
AVX_OP(Cmpunordpd, cmpunordpd)
|
||||
AVX_OP(Cmpunordps, cmpunordps)
|
||||
AVX_OP(Cvtdq2pd, cvtdq2pd)
|
||||
AVX_OP(Cvtdq2ps, cvtdq2ps)
|
||||
AVX_OP(Cvtpd2ps, cvtpd2ps)
|
||||
AVX_OP(Cvtps2pd, cvtps2pd)
|
||||
AVX_OP(Cvttps2dq, cvttps2dq)
|
||||
AVX_OP(Divpd, divpd)
|
||||
AVX_OP(Divps, divps)
|
||||
AVX_OP(Maxpd, maxpd)
|
||||
AVX_OP(Maxps, maxps)
|
||||
AVX_OP(Minpd, minpd)
|
||||
AVX_OP(Minps, minps)
|
||||
AVX_OP(Movaps, movaps)
|
||||
AVX_OP(Movd, movd)
|
||||
AVX_OP(Movhlps, movhlps)
|
||||
@ -164,25 +180,46 @@ class V8_EXPORT_PRIVATE SharedTurboAssembler : public TurboAssemblerBase {
|
||||
AVX_OP(Movss, movss)
|
||||
AVX_OP(Movupd, movupd)
|
||||
AVX_OP(Movups, movups)
|
||||
AVX_OP(Mulpd, mulpd)
|
||||
AVX_OP(Mulps, mulps)
|
||||
AVX_OP(Orpd, orpd)
|
||||
AVX_OP(Orps, orps)
|
||||
AVX_OP(Packssdw, packssdw)
|
||||
AVX_OP(Packsswb, packsswb)
|
||||
AVX_OP(Packuswb, packuswb)
|
||||
AVX_OP(Paddb, paddb)
|
||||
AVX_OP(Paddd, paddd)
|
||||
AVX_OP(Paddq, paddq)
|
||||
AVX_OP(Paddsb, paddsb)
|
||||
AVX_OP(Paddusb, paddusb)
|
||||
AVX_OP(Paddusw, paddusw)
|
||||
AVX_OP(Paddw, paddw)
|
||||
AVX_OP(Pand, pand)
|
||||
AVX_OP(Pavgb, pavgb)
|
||||
AVX_OP(Pavgw, pavgw)
|
||||
AVX_OP(Pcmpgtb, pcmpgtb)
|
||||
AVX_OP(Pmaxub, pmaxub)
|
||||
AVX_OP(Pminub, pminub)
|
||||
AVX_OP(Pmovmskb, pmovmskb)
|
||||
AVX_OP(Pmullw, pmullw)
|
||||
AVX_OP(Pmuludq, pmuludq)
|
||||
AVX_OP(Por, por)
|
||||
AVX_OP(Pshufd, pshufd)
|
||||
AVX_OP(Pshufhw, pshufhw)
|
||||
AVX_OP(Pshuflw, pshuflw)
|
||||
AVX_OP(Pslld, pslld)
|
||||
AVX_OP(Psllq, psllq)
|
||||
AVX_OP(Psllw, psllw)
|
||||
AVX_OP(Psrad, psrad)
|
||||
AVX_OP(Psraw, psraw)
|
||||
AVX_OP(Psrld, psrld)
|
||||
AVX_OP(Psrlq, psrlq)
|
||||
AVX_OP(Psrlw, psrlw)
|
||||
AVX_OP(Psubb, psubb)
|
||||
AVX_OP(Psubd, psubd)
|
||||
AVX_OP(Psubq, psubq)
|
||||
AVX_OP(Psubsb, psubsb)
|
||||
AVX_OP(Psubusb, psubusb)
|
||||
AVX_OP(Psubw, psubw)
|
||||
AVX_OP(Punpckhbw, punpckhbw)
|
||||
AVX_OP(Punpckhdq, punpckhdq)
|
||||
@ -199,16 +236,31 @@ class V8_EXPORT_PRIVATE SharedTurboAssembler : public TurboAssemblerBase {
|
||||
AVX_OP(Sqrtps, sqrtps)
|
||||
AVX_OP(Sqrtsd, sqrtsd)
|
||||
AVX_OP(Sqrtss, sqrtss)
|
||||
AVX_OP(Subpd, subpd)
|
||||
AVX_OP(Subps, subps)
|
||||
AVX_OP(Unpcklps, unpcklps)
|
||||
AVX_OP(Xorpd, xorpd)
|
||||
AVX_OP(Xorps, xorps)
|
||||
|
||||
AVX_OP_SSE3(Haddps, haddps)
|
||||
AVX_OP_SSE3(Movddup, movddup)
|
||||
AVX_OP_SSE3(Movshdup, movshdup)
|
||||
|
||||
AVX_OP_SSSE3(Pabsb, pabsb)
|
||||
AVX_OP_SSSE3(Pabsd, pabsd)
|
||||
AVX_OP_SSSE3(Pabsw, pabsw)
|
||||
AVX_OP_SSSE3(Palignr, palignr)
|
||||
AVX_OP_SSSE3(Psignb, psignb)
|
||||
AVX_OP_SSSE3(Psignd, psignd)
|
||||
AVX_OP_SSSE3(Psignw, psignw)
|
||||
|
||||
AVX_OP_SSE4_1(Extractps, extractps)
|
||||
AVX_OP_SSE4_1(Pblendw, pblendw)
|
||||
AVX_OP_SSE4_1(Pextrb, pextrb)
|
||||
AVX_OP_SSE4_1(Pextrw, pextrw)
|
||||
AVX_OP_SSE4_1(Pmaxsb, pmaxsb)
|
||||
AVX_OP_SSE4_1(Pmaxsd, pmaxsd)
|
||||
AVX_OP_SSE4_1(Pminsb, pminsb)
|
||||
AVX_OP_SSE4_1(Pmovsxbw, pmovsxbw)
|
||||
AVX_OP_SSE4_1(Pmovsxdq, pmovsxdq)
|
||||
AVX_OP_SSE4_1(Pmovsxwd, pmovsxwd)
|
||||
|
@ -1274,6 +1274,7 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
|
||||
SSE_CMP_P(cmpeq, 0x0)
|
||||
SSE_CMP_P(cmplt, 0x1)
|
||||
SSE_CMP_P(cmple, 0x2)
|
||||
SSE_CMP_P(cmpunord, 0x3)
|
||||
SSE_CMP_P(cmpneq, 0x4)
|
||||
SSE_CMP_P(cmpnlt, 0x5)
|
||||
SSE_CMP_P(cmpnle, 0x6)
|
||||
@ -1571,6 +1572,7 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
|
||||
AVX_CMP_P(vcmpeq, 0x0)
|
||||
AVX_CMP_P(vcmplt, 0x1)
|
||||
AVX_CMP_P(vcmple, 0x2)
|
||||
AVX_CMP_P(vcmpunord, 0x3)
|
||||
AVX_CMP_P(vcmpneq, 0x4)
|
||||
AVX_CMP_P(vcmpnlt, 0x5)
|
||||
AVX_CMP_P(vcmpnle, 0x6)
|
||||
|
@ -1885,16 +1885,6 @@ void TurboAssembler::Pmaddubsw(XMMRegister dst, XMMRegister src1,
|
||||
}
|
||||
}
|
||||
|
||||
void TurboAssembler::Unpcklps(XMMRegister dst, XMMRegister src1, Operand src2) {
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
CpuFeatureScope avx_scope(this, AVX);
|
||||
vunpcklps(dst, src1, src2);
|
||||
} else {
|
||||
DCHECK_EQ(dst, src1);
|
||||
unpcklps(dst, src2);
|
||||
}
|
||||
}
|
||||
|
||||
void TurboAssembler::Shufps(XMMRegister dst, XMMRegister src1, XMMRegister src2,
|
||||
byte imm8) {
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
@ -2039,26 +2029,6 @@ void TurboAssembler::Pinsrq(XMMRegister dst, XMMRegister src1, Operand src2,
|
||||
imm8, base::Optional<CpuFeature>(SSE4_1));
|
||||
}
|
||||
|
||||
void TurboAssembler::Psllq(XMMRegister dst, byte imm8) {
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
CpuFeatureScope scope(this, AVX);
|
||||
vpsllq(dst, dst, imm8);
|
||||
} else {
|
||||
DCHECK(!IsEnabled(AVX));
|
||||
psllq(dst, imm8);
|
||||
}
|
||||
}
|
||||
|
||||
void TurboAssembler::Pslld(XMMRegister dst, byte imm8) {
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
CpuFeatureScope scope(this, AVX);
|
||||
vpslld(dst, dst, imm8);
|
||||
} else {
|
||||
DCHECK(!IsEnabled(AVX));
|
||||
pslld(dst, imm8);
|
||||
}
|
||||
}
|
||||
|
||||
void TurboAssembler::Pblendvb(XMMRegister dst, XMMRegister src1,
|
||||
XMMRegister src2, XMMRegister mask) {
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
@ -2396,21 +2366,6 @@ void TurboAssembler::Negpd(XMMRegister dst) {
|
||||
ExternalReference::address_of_double_neg_constant()));
|
||||
}
|
||||
|
||||
void TurboAssembler::Psrld(XMMRegister dst, byte imm8) {
|
||||
Psrld(dst, dst, imm8);
|
||||
}
|
||||
|
||||
void TurboAssembler::Psrld(XMMRegister dst, XMMRegister src, byte imm8) {
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
CpuFeatureScope scope(this, AVX);
|
||||
vpsrld(dst, src, imm8);
|
||||
} else {
|
||||
DCHECK(!IsEnabled(AVX));
|
||||
DCHECK_EQ(dst, src);
|
||||
psrld(dst, imm8);
|
||||
}
|
||||
}
|
||||
|
||||
void TurboAssembler::Lzcntl(Register dst, Register src) {
|
||||
if (CpuFeatures::IsSupported(LZCNT)) {
|
||||
CpuFeatureScope scope(this, LZCNT);
|
||||
|
@ -66,91 +66,43 @@ class V8_EXPORT_PRIVATE TurboAssembler : public SharedTurboAssembler {
|
||||
AVX_OP(Subsd, subsd)
|
||||
AVX_OP(Divss, divss)
|
||||
AVX_OP(Divsd, divsd)
|
||||
AVX_OP(Pcmpgtb, pcmpgtb)
|
||||
AVX_OP(Pcmpgtw, pcmpgtw)
|
||||
AVX_OP(Pmaxsw, pmaxsw)
|
||||
AVX_OP(Pmaxub, pmaxub)
|
||||
AVX_OP(Pminsw, pminsw)
|
||||
AVX_OP(Pminub, pminub)
|
||||
AVX_OP(Addss, addss)
|
||||
AVX_OP(Addsd, addsd)
|
||||
AVX_OP(Mulsd, mulsd)
|
||||
AVX_OP(Andnps, andnps)
|
||||
AVX_OP(Cmpeqps, cmpeqps)
|
||||
AVX_OP(Cmpltps, cmpltps)
|
||||
AVX_OP(Cmpleps, cmpleps)
|
||||
AVX_OP(Cmpneqps, cmpneqps)
|
||||
AVX_OP(Cmpnltps, cmpnltps)
|
||||
AVX_OP(Cmpnleps, cmpnleps)
|
||||
AVX_OP(Cmpeqpd, cmpeqpd)
|
||||
AVX_OP(Cmpltpd, cmpltpd)
|
||||
AVX_OP(Cmplepd, cmplepd)
|
||||
AVX_OP(Cmpneqpd, cmpneqpd)
|
||||
AVX_OP(Cmpnltpd, cmpnltpd)
|
||||
AVX_OP(Cmpnlepd, cmpnlepd)
|
||||
AVX_OP(Cvttpd2dq, cvttpd2dq)
|
||||
AVX_OP(Ucomiss, ucomiss)
|
||||
AVX_OP(Ucomisd, ucomisd)
|
||||
AVX_OP(Psubsb, psubsb)
|
||||
AVX_OP(Psubsw, psubsw)
|
||||
AVX_OP(Psubusb, psubusb)
|
||||
AVX_OP(Psubusw, psubusw)
|
||||
AVX_OP(Pslld, pslld)
|
||||
AVX_OP(Pavgb, pavgb)
|
||||
AVX_OP(Pavgw, pavgw)
|
||||
AVX_OP(Psrad, psrad)
|
||||
AVX_OP(Psllw, psllw)
|
||||
AVX_OP(Psllq, psllq)
|
||||
AVX_OP(Psrlw, psrlw)
|
||||
AVX_OP(Psrld, psrld)
|
||||
AVX_OP(Paddb, paddb)
|
||||
AVX_OP(Paddw, paddw)
|
||||
AVX_OP(Paddd, paddd)
|
||||
AVX_OP(Paddq, paddq)
|
||||
AVX_OP(Paddsb, paddsb)
|
||||
AVX_OP(Paddsw, paddsw)
|
||||
AVX_OP(Pcmpgtd, pcmpgtd)
|
||||
AVX_OP(Pmuludq, pmuludq)
|
||||
AVX_OP(Addpd, addpd)
|
||||
AVX_OP(Subpd, subpd)
|
||||
AVX_OP(Mulpd, mulpd)
|
||||
AVX_OP(Minps, minps)
|
||||
AVX_OP(Minpd, minpd)
|
||||
AVX_OP(Divpd, divpd)
|
||||
AVX_OP(Maxps, maxps)
|
||||
AVX_OP(Maxpd, maxpd)
|
||||
AVX_OP(Addps, addps)
|
||||
AVX_OP(Subps, subps)
|
||||
AVX_OP(Mulps, mulps)
|
||||
AVX_OP(Divps, divps)
|
||||
AVX_OP(Pcmpeqb, pcmpeqb)
|
||||
AVX_OP(Pcmpeqw, pcmpeqw)
|
||||
AVX_OP(Pcmpeqd, pcmpeqd)
|
||||
AVX_OP(Cmpps, cmpps)
|
||||
AVX_OP(Cmppd, cmppd)
|
||||
AVX_OP(Movlhps, movlhps)
|
||||
AVX_OP_SSE3(Haddps, haddps)
|
||||
AVX_OP_SSSE3(Phaddd, phaddd)
|
||||
AVX_OP_SSSE3(Phaddw, phaddw)
|
||||
AVX_OP_SSSE3(Pshufb, pshufb)
|
||||
AVX_OP_SSSE3(Psignb, psignb)
|
||||
AVX_OP_SSSE3(Psignw, psignw)
|
||||
AVX_OP_SSSE3(Psignd, psignd)
|
||||
AVX_OP_SSSE3(Palignr, palignr)
|
||||
AVX_OP_SSE4_1(Pcmpeqq, pcmpeqq)
|
||||
AVX_OP_SSE4_1(Packusdw, packusdw)
|
||||
AVX_OP_SSE4_1(Pminsb, pminsb)
|
||||
AVX_OP_SSE4_1(Pminsd, pminsd)
|
||||
AVX_OP_SSE4_1(Pminuw, pminuw)
|
||||
AVX_OP_SSE4_1(Pminud, pminud)
|
||||
AVX_OP_SSE4_1(Pmaxsb, pmaxsb)
|
||||
AVX_OP_SSE4_1(Pmaxsd, pmaxsd)
|
||||
AVX_OP_SSE4_1(Pmaxuw, pmaxuw)
|
||||
AVX_OP_SSE4_1(Pmaxud, pmaxud)
|
||||
AVX_OP_SSE4_1(Pmulld, pmulld)
|
||||
AVX_OP_SSE4_1(Insertps, insertps)
|
||||
AVX_OP_SSE4_1(Pinsrq, pinsrq)
|
||||
AVX_OP_SSE4_1(Pblendw, pblendw)
|
||||
AVX_OP_SSE4_1(Pextrq, pextrq)
|
||||
AVX_OP_SSE4_1(Roundss, roundss)
|
||||
AVX_OP_SSE4_1(Roundsd, roundsd)
|
||||
@ -427,7 +379,6 @@ class V8_EXPORT_PRIVATE TurboAssembler : public SharedTurboAssembler {
|
||||
void Pmaddubsw(XMMRegister dst, XMMRegister src1, Operand src2);
|
||||
void Pmaddubsw(XMMRegister dst, XMMRegister src1, XMMRegister src2);
|
||||
|
||||
void Unpcklps(XMMRegister dst, XMMRegister src1, Operand src2);
|
||||
// Shufps that will mov src1 into dst if AVX is not supported.
|
||||
void Shufps(XMMRegister dst, XMMRegister src1, XMMRegister src2, byte imm8);
|
||||
|
||||
@ -445,14 +396,6 @@ class V8_EXPORT_PRIVATE TurboAssembler : public SharedTurboAssembler {
|
||||
void Pinsrq(XMMRegister dst, XMMRegister src1, Register src2, uint8_t imm8);
|
||||
void Pinsrq(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t imm8);
|
||||
|
||||
void Psllq(XMMRegister dst, int imm8) { Psllq(dst, static_cast<byte>(imm8)); }
|
||||
void Psllq(XMMRegister dst, byte imm8);
|
||||
void Pslld(XMMRegister dst, byte imm8);
|
||||
void Psrld(XMMRegister dst, byte imm8);
|
||||
|
||||
// Supports both AVX (dst != src1) and SSE (checks that dst == src1).
|
||||
void Psrld(XMMRegister dst, XMMRegister src, byte imm8);
|
||||
|
||||
void Pblendvb(XMMRegister dst, XMMRegister src1, XMMRegister src2,
|
||||
XMMRegister mask);
|
||||
void Blendvps(XMMRegister dst, XMMRegister src1, XMMRegister src2,
|
||||
|
@ -2181,7 +2181,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
|
||||
// Set up a mask [0x80000000,0,0x80000000,0].
|
||||
__ Pcmpeqb(tmp2, tmp2);
|
||||
__ Psllq(tmp2, tmp2, 63);
|
||||
__ Psllq(tmp2, tmp2, byte{63});
|
||||
|
||||
__ Psrlq(tmp2, tmp2, tmp);
|
||||
__ Psrlq(dst, src, tmp);
|
||||
@ -2218,7 +2218,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
__ Pmuludq(tmp2, tmp2, left);
|
||||
|
||||
__ Paddq(tmp2, tmp2, tmp1);
|
||||
__ Psllq(tmp2, tmp2, 32);
|
||||
__ Psllq(tmp2, tmp2, byte{32});
|
||||
|
||||
__ Pmuludq(dst, left, right);
|
||||
__ Paddq(dst, dst, tmp2);
|
||||
@ -2357,10 +2357,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
XMMRegister dst = i.OutputSimd128Register();
|
||||
XMMRegister src = i.InputSimd128Register(0);
|
||||
__ Pxor(kScratchDoubleReg, kScratchDoubleReg); // zeros
|
||||
__ Pblendw(kScratchDoubleReg, src, 0x55); // get lo 16 bits
|
||||
__ Pblendw(kScratchDoubleReg, src, uint8_t{0x55}); // get lo 16 bits
|
||||
__ Psubd(dst, src, kScratchDoubleReg); // get hi 16 bits
|
||||
__ Cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // convert lo exactly
|
||||
__ Psrld(dst, dst, 1); // divide by 2 to get in unsigned range
|
||||
__ Psrld(dst, dst, byte{1}); // divide by 2 to get in unsigned range
|
||||
__ Cvtdq2ps(dst, dst); // convert hi exactly
|
||||
__ Addps(dst, dst, dst); // double hi, exactly
|
||||
__ Addps(dst, dst, kScratchDoubleReg); // add hi and lo, may round.
|
||||
@ -2371,11 +2371,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
XMMRegister src = i.InputSimd128Register(0);
|
||||
if (dst == src) {
|
||||
__ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
|
||||
__ Psrld(kScratchDoubleReg, kScratchDoubleReg, 1);
|
||||
__ Psrld(kScratchDoubleReg, kScratchDoubleReg, byte{1});
|
||||
__ Andps(dst, kScratchDoubleReg);
|
||||
} else {
|
||||
__ Pcmpeqd(dst, dst);
|
||||
__ Psrld(dst, dst, 1);
|
||||
__ Psrld(dst, dst, byte{1});
|
||||
__ Andps(dst, src);
|
||||
}
|
||||
break;
|
||||
@ -2385,11 +2385,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
XMMRegister src = i.InputSimd128Register(0);
|
||||
if (dst == src) {
|
||||
__ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
|
||||
__ Pslld(kScratchDoubleReg, kScratchDoubleReg, 31);
|
||||
__ Pslld(kScratchDoubleReg, kScratchDoubleReg, byte{31});
|
||||
__ Xorps(dst, kScratchDoubleReg);
|
||||
} else {
|
||||
__ Pcmpeqd(dst, dst);
|
||||
__ Pslld(dst, dst, 31);
|
||||
__ Pslld(dst, dst, byte{31});
|
||||
__ Xorps(dst, src);
|
||||
}
|
||||
break;
|
||||
@ -2580,7 +2580,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
__ Cvttps2dq(dst, dst);
|
||||
// Set top bit if >=0 is now < 0
|
||||
__ Pand(kScratchDoubleReg, dst);
|
||||
__ Psrad(kScratchDoubleReg, kScratchDoubleReg, 31);
|
||||
__ Psrad(kScratchDoubleReg, kScratchDoubleReg, byte{31});
|
||||
// Set positive overflow lanes to 0x7FFFFFFF
|
||||
__ Pxor(dst, kScratchDoubleReg);
|
||||
break;
|
||||
@ -3739,7 +3739,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
DCHECK_NE(0xe4, shuffle); // A simple blend should be handled below.
|
||||
__ Pshufd(kScratchDoubleReg, i.InputOperand(1), shuffle);
|
||||
__ Pshufd(i.OutputSimd128Register(), i.InputOperand(0), shuffle);
|
||||
__ Pblendw(i.OutputSimd128Register(), kScratchDoubleReg, i.InputInt8(3));
|
||||
__ Pblendw(i.OutputSimd128Register(), kScratchDoubleReg, i.InputUint8(3));
|
||||
break;
|
||||
}
|
||||
case kIA32S16x8Blend:
|
||||
@ -3757,7 +3757,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
__ Pshufhw(kScratchDoubleReg, kScratchDoubleReg, i.InputUint8(3));
|
||||
__ Pshuflw(dst, i.InputOperand(0), i.InputUint8(2));
|
||||
__ Pshufhw(dst, dst, i.InputUint8(3));
|
||||
__ Pblendw(dst, kScratchDoubleReg, i.InputInt8(4));
|
||||
__ Pblendw(dst, kScratchDoubleReg, i.InputUint8(4));
|
||||
break;
|
||||
}
|
||||
case kIA32S8x16Alignr:
|
||||
|
@ -1486,7 +1486,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
// TODO(bmeurer): Use RIP relative 128-bit constants.
|
||||
XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0));
|
||||
__ Pcmpeqd(tmp, tmp);
|
||||
__ Psllq(tmp, 31);
|
||||
__ Psllq(tmp, byte{31});
|
||||
__ Xorps(i.OutputDoubleRegister(), tmp);
|
||||
break;
|
||||
}
|
||||
@ -2439,7 +2439,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
// propagate -0's and NaNs, which may be non-canonical.
|
||||
__ Orpd(kScratchDoubleReg, dst);
|
||||
// Canonicalize NaNs by quieting and clearing the payload.
|
||||
__ Cmppd(dst, kScratchDoubleReg, int8_t{3});
|
||||
__ Cmpunordpd(dst, kScratchDoubleReg);
|
||||
__ Orpd(kScratchDoubleReg, dst);
|
||||
__ Psrlq(dst, byte{13});
|
||||
__ Andnpd(dst, kScratchDoubleReg);
|
||||
@ -2461,7 +2461,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
// Propagate sign discrepancy and (subtle) quiet NaNs.
|
||||
__ Subpd(kScratchDoubleReg, dst);
|
||||
// Canonicalize NaNs by clearing the payload. Sign is non-deterministic.
|
||||
__ Cmppd(dst, kScratchDoubleReg, int8_t{3});
|
||||
__ Cmpunordpd(dst, kScratchDoubleReg);
|
||||
__ Psrlq(dst, byte{13});
|
||||
__ Andnpd(dst, kScratchDoubleReg);
|
||||
break;
|
||||
@ -2671,7 +2671,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
// propagate -0's and NaNs, which may be non-canonical.
|
||||
__ Orps(kScratchDoubleReg, dst);
|
||||
// Canonicalize NaNs by quieting and clearing the payload.
|
||||
__ Cmpps(dst, kScratchDoubleReg, int8_t{3});
|
||||
__ Cmpunordps(dst, kScratchDoubleReg);
|
||||
__ Orps(kScratchDoubleReg, dst);
|
||||
__ Psrld(dst, byte{10});
|
||||
__ Andnps(dst, kScratchDoubleReg);
|
||||
@ -2693,7 +2693,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
// Propagate sign discrepancy and (subtle) quiet NaNs.
|
||||
__ Subps(kScratchDoubleReg, dst);
|
||||
// Canonicalize NaNs by clearing the payload. Sign is non-deterministic.
|
||||
__ Cmpps(dst, kScratchDoubleReg, int8_t{3});
|
||||
__ Cmpunordps(dst, kScratchDoubleReg);
|
||||
__ Psrld(dst, byte{10});
|
||||
__ Andnps(dst, kScratchDoubleReg);
|
||||
break;
|
||||
@ -2851,7 +2851,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
__ Pmuludq(tmp2, left);
|
||||
|
||||
__ Paddq(tmp2, tmp1);
|
||||
__ Psllq(tmp2, 32);
|
||||
__ Psllq(tmp2, byte{32});
|
||||
|
||||
__ Pmuludq(left, right);
|
||||
__ Paddq(left, tmp2); // left == dst
|
||||
|
@ -3896,7 +3896,7 @@ void LiftoffAssembler::emit_i64x2_shr_s(LiftoffRegister dst,
|
||||
|
||||
// Set up a mask [0x80000000,0,0x80000000,0].
|
||||
Pcmpeqb(tmp, tmp);
|
||||
Psllq(tmp, tmp, 63);
|
||||
Psllq(tmp, tmp, byte{63});
|
||||
|
||||
Psrlq(tmp, tmp, shift);
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
@ -3919,7 +3919,7 @@ void LiftoffAssembler::emit_i64x2_shri_s(LiftoffRegister dst,
|
||||
|
||||
// Set up a mask [0x80000000,0,0x80000000,0].
|
||||
Pcmpeqb(tmp, tmp);
|
||||
Psllq(tmp, tmp, 63);
|
||||
Psllq(tmp, tmp, byte{63});
|
||||
|
||||
Psrlq(tmp, tmp, byte{shift});
|
||||
liftoff::EmitSimdShiftOpImm<&Assembler::vpsrlq, &Assembler::psrlq, 6>(
|
||||
@ -3969,7 +3969,7 @@ void LiftoffAssembler::emit_i64x2_mul(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
Psrlq(tmp2.fp(), byte{32});
|
||||
Pmuludq(tmp2.fp(), tmp2.fp(), lhs.fp());
|
||||
Paddq(tmp2.fp(), tmp2.fp(), tmp1.fp());
|
||||
Psllq(tmp2.fp(), tmp2.fp(), 32);
|
||||
Psllq(tmp2.fp(), tmp2.fp(), byte{32});
|
||||
liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmuludq, &Assembler::pmuludq>(
|
||||
this, dst, lhs, rhs);
|
||||
Paddq(dst.fp(), dst.fp(), tmp2.fp());
|
||||
@ -4032,11 +4032,11 @@ void LiftoffAssembler::emit_f32x4_abs(LiftoffRegister dst,
|
||||
LiftoffRegister src) {
|
||||
if (dst.fp() == src.fp()) {
|
||||
Pcmpeqd(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg);
|
||||
Psrld(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg, 1);
|
||||
Psrld(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg, byte{1});
|
||||
Andps(dst.fp(), liftoff::kScratchDoubleReg);
|
||||
} else {
|
||||
Pcmpeqd(dst.fp(), dst.fp());
|
||||
Psrld(dst.fp(), dst.fp(), 1);
|
||||
Psrld(dst.fp(), dst.fp(), byte{1});
|
||||
Andps(dst.fp(), src.fp());
|
||||
}
|
||||
}
|
||||
@ -4045,11 +4045,11 @@ void LiftoffAssembler::emit_f32x4_neg(LiftoffRegister dst,
|
||||
LiftoffRegister src) {
|
||||
if (dst.fp() == src.fp()) {
|
||||
Pcmpeqd(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg);
|
||||
Pslld(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg, 31);
|
||||
Pslld(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg, byte{31});
|
||||
Xorps(dst.fp(), liftoff::kScratchDoubleReg);
|
||||
} else {
|
||||
Pcmpeqd(dst.fp(), dst.fp());
|
||||
Pslld(dst.fp(), dst.fp(), 31);
|
||||
Pslld(dst.fp(), dst.fp(), byte{31});
|
||||
Xorps(dst.fp(), src.fp());
|
||||
}
|
||||
}
|
||||
@ -4201,11 +4201,11 @@ void LiftoffAssembler::emit_f64x2_neg(LiftoffRegister dst,
|
||||
LiftoffRegister src) {
|
||||
if (dst.fp() == src.fp()) {
|
||||
Pcmpeqd(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg);
|
||||
Psllq(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg, 63);
|
||||
Psllq(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg, byte{63});
|
||||
Xorpd(dst.fp(), liftoff::kScratchDoubleReg);
|
||||
} else {
|
||||
Pcmpeqd(dst.fp(), dst.fp());
|
||||
Psllq(dst.fp(), dst.fp(), 63);
|
||||
Psllq(dst.fp(), dst.fp(), byte{63});
|
||||
Xorpd(dst.fp(), src.fp());
|
||||
}
|
||||
}
|
||||
|
@ -3510,7 +3510,7 @@ void LiftoffAssembler::emit_i64x2_mul(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
Psrlq(tmp2.fp(), byte{32});
|
||||
Pmuludq(tmp2.fp(), lhs.fp());
|
||||
Paddq(tmp2.fp(), tmp1.fp());
|
||||
Psllq(tmp2.fp(), 32);
|
||||
Psllq(tmp2.fp(), byte{32});
|
||||
liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmuludq, &Assembler::pmuludq>(
|
||||
this, dst, lhs, rhs);
|
||||
Paddq(dst.fp(), tmp2.fp());
|
||||
@ -3586,11 +3586,11 @@ void LiftoffAssembler::emit_f32x4_neg(LiftoffRegister dst,
|
||||
LiftoffRegister src) {
|
||||
if (dst.fp() == src.fp()) {
|
||||
Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
|
||||
Pslld(kScratchDoubleReg, static_cast<byte>(31));
|
||||
Pslld(kScratchDoubleReg, byte{31});
|
||||
Xorps(dst.fp(), kScratchDoubleReg);
|
||||
} else {
|
||||
Pcmpeqd(dst.fp(), dst.fp());
|
||||
Pslld(dst.fp(), static_cast<byte>(31));
|
||||
Pslld(dst.fp(), byte{31});
|
||||
Xorps(dst.fp(), src.fp());
|
||||
}
|
||||
}
|
||||
@ -3674,7 +3674,7 @@ void LiftoffAssembler::emit_f32x4_min(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
// propagate -0's and NaNs, which may be non-canonical.
|
||||
Orps(kScratchDoubleReg, dst.fp());
|
||||
// Canonicalize NaNs by quieting and clearing the payload.
|
||||
Cmpps(dst.fp(), kScratchDoubleReg, int8_t{3});
|
||||
Cmpunordps(dst.fp(), kScratchDoubleReg);
|
||||
Orps(kScratchDoubleReg, dst.fp());
|
||||
Psrld(dst.fp(), byte{10});
|
||||
Andnps(dst.fp(), kScratchDoubleReg);
|
||||
@ -3706,7 +3706,7 @@ void LiftoffAssembler::emit_f32x4_max(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
// Propagate sign discrepancy and (subtle) quiet NaNs.
|
||||
Subps(kScratchDoubleReg, dst.fp());
|
||||
// Canonicalize NaNs by clearing the payload. Sign is non-deterministic.
|
||||
Cmpps(dst.fp(), kScratchDoubleReg, int8_t{3});
|
||||
Cmpunordps(dst.fp(), kScratchDoubleReg);
|
||||
Psrld(dst.fp(), byte{10});
|
||||
Andnps(dst.fp(), kScratchDoubleReg);
|
||||
}
|
||||
@ -3830,7 +3830,7 @@ void LiftoffAssembler::emit_f64x2_min(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
// propagate -0's and NaNs, which may be non-canonical.
|
||||
Orpd(kScratchDoubleReg, dst.fp());
|
||||
// Canonicalize NaNs by quieting and clearing the payload.
|
||||
Cmppd(dst.fp(), kScratchDoubleReg, int8_t{3});
|
||||
Cmpunordpd(dst.fp(), kScratchDoubleReg);
|
||||
Orpd(kScratchDoubleReg, dst.fp());
|
||||
Psrlq(dst.fp(), byte{13});
|
||||
Andnpd(dst.fp(), kScratchDoubleReg);
|
||||
@ -3862,7 +3862,7 @@ void LiftoffAssembler::emit_f64x2_max(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
// Propagate sign discrepancy and (subtle) quiet NaNs.
|
||||
Subpd(kScratchDoubleReg, dst.fp());
|
||||
// Canonicalize NaNs by clearing the payload. Sign is non-deterministic.
|
||||
Cmppd(dst.fp(), kScratchDoubleReg, int8_t{3});
|
||||
Cmpunordpd(dst.fp(), kScratchDoubleReg);
|
||||
Psrlq(dst.fp(), byte{13});
|
||||
Andnpd(dst.fp(), kScratchDoubleReg);
|
||||
}
|
||||
|
@ -559,6 +559,8 @@ TEST(DisasmX64) {
|
||||
__ cmpltps(xmm5, Operand(rbx, rcx, times_4, 10000));
|
||||
__ cmpleps(xmm5, xmm1);
|
||||
__ cmpleps(xmm5, Operand(rbx, rcx, times_4, 10000));
|
||||
__ cmpunordps(xmm5, xmm1);
|
||||
__ cmpunordps(xmm5, Operand(rbx, rcx, times_4, 10000));
|
||||
__ cmpneqps(xmm5, xmm1);
|
||||
__ cmpneqps(xmm5, Operand(rbx, rcx, times_4, 10000));
|
||||
__ cmpnltps(xmm5, xmm1);
|
||||
@ -573,6 +575,8 @@ TEST(DisasmX64) {
|
||||
__ cmpltpd(xmm5, Operand(rbx, rcx, times_4, 10000));
|
||||
__ cmplepd(xmm5, xmm1);
|
||||
__ cmplepd(xmm5, Operand(rbx, rcx, times_4, 10000));
|
||||
__ cmpunordpd(xmm5, xmm1);
|
||||
__ cmpunordpd(xmm5, Operand(rbx, rcx, times_4, 10000));
|
||||
__ cmpneqpd(xmm5, xmm1);
|
||||
__ cmpneqpd(xmm5, Operand(rbx, rcx, times_4, 10000));
|
||||
__ cmpnltpd(xmm5, xmm1);
|
||||
@ -727,6 +731,8 @@ TEST(DisasmX64) {
|
||||
__ vcmpltps(xmm5, xmm4, Operand(rbx, rcx, times_4, 10000));
|
||||
__ vcmpleps(xmm5, xmm4, xmm1);
|
||||
__ vcmpleps(xmm5, xmm4, Operand(rbx, rcx, times_4, 10000));
|
||||
__ vcmpunordps(xmm5, xmm4, xmm1);
|
||||
__ vcmpunordps(xmm5, xmm4, Operand(rbx, rcx, times_4, 10000));
|
||||
__ vcmpneqps(xmm5, xmm4, xmm1);
|
||||
__ vcmpneqps(xmm5, xmm4, Operand(rbx, rcx, times_4, 10000));
|
||||
__ vcmpnltps(xmm5, xmm4, xmm1);
|
||||
@ -741,6 +747,8 @@ TEST(DisasmX64) {
|
||||
__ vcmpltpd(xmm5, xmm4, Operand(rbx, rcx, times_4, 10000));
|
||||
__ vcmplepd(xmm5, xmm4, xmm1);
|
||||
__ vcmplepd(xmm5, xmm4, Operand(rbx, rcx, times_4, 10000));
|
||||
__ vcmpunordpd(xmm5, xmm4, xmm1);
|
||||
__ vcmpunordpd(xmm5, xmm4, Operand(rbx, rcx, times_4, 10000));
|
||||
__ vcmpneqpd(xmm5, xmm4, xmm1);
|
||||
__ vcmpneqpd(xmm5, xmm4, Operand(rbx, rcx, times_4, 10000));
|
||||
__ vcmpnltpd(xmm5, xmm4, xmm1);
|
||||
|
Loading…
Reference in New Issue
Block a user