[x64][ia32] Move more AVX_OP into SharedTurboAssembler
We add one more member function template to AvxHelper to allow one new way of calling: - Andps(x, y, z) -> vandps(x, y, z), andps(x, z) && x == y Clean up a bunch of places where we need to pass an int literal as a byte. Unfortunately we cannot define Movq using AVX_OP. Because of the way movq is defined in the assembler, using function templates, there are versions of movq with 1 argument defined. That is not a valid instruction (but is valid for `dec`). We end up selecting vmovq(XMMRegister, Register) and movq(XMMRegister), which is not valid. Bug: v8:11589 Change-Id: I45e3bc213d93ece7f65da8eb1e3fa185aec4c573 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2815560 Commit-Queue: Zhi An Ng <zhin@chromium.org> Reviewed-by: Clemens Backes <clemensb@chromium.org> Reviewed-by: Bill Budge <bbudge@chromium.org> Cr-Commit-Position: refs/heads/master@{#73944}
This commit is contained in:
parent
9d3f354527
commit
a9cd53c74d
@ -301,53 +301,10 @@ class V8_EXPORT_PRIVATE TurboAssembler : public SharedTurboAssembler {
|
||||
// may be bigger than 2^16 - 1. Requires a scratch register.
|
||||
void Ret(int bytes_dropped, Register scratch);
|
||||
|
||||
// Only use these macros when non-destructive source of AVX version is not
|
||||
// needed.
|
||||
#define AVX_OP3_WITH_TYPE(macro_name, name, dst_type, src_type) \
|
||||
void macro_name(dst_type dst, src_type src) { \
|
||||
if (CpuFeatures::IsSupported(AVX)) { \
|
||||
CpuFeatureScope scope(this, AVX); \
|
||||
v##name(dst, dst, src); \
|
||||
} else { \
|
||||
name(dst, src); \
|
||||
} \
|
||||
}
|
||||
#define AVX_OP3_XO(macro_name, name) \
|
||||
AVX_OP3_WITH_TYPE(macro_name, name, XMMRegister, XMMRegister) \
|
||||
AVX_OP3_WITH_TYPE(macro_name, name, XMMRegister, Operand)
|
||||
|
||||
AVX_OP3_XO(Packsswb, packsswb)
|
||||
AVX_OP3_XO(Packuswb, packuswb)
|
||||
AVX_OP3_XO(Paddusb, paddusb)
|
||||
AVX_OP3_XO(Pand, pand)
|
||||
AVX_OP3_XO(Pcmpeqb, pcmpeqb)
|
||||
AVX_OP3_XO(Pcmpeqw, pcmpeqw)
|
||||
AVX_OP3_XO(Pcmpeqd, pcmpeqd)
|
||||
AVX_OP3_XO(Por, por)
|
||||
AVX_OP3_XO(Psubb, psubb)
|
||||
AVX_OP3_XO(Psubw, psubw)
|
||||
AVX_OP3_XO(Psubd, psubd)
|
||||
AVX_OP3_XO(Psubq, psubq)
|
||||
AVX_OP3_XO(Punpcklbw, punpcklbw)
|
||||
AVX_OP3_XO(Punpckhbw, punpckhbw)
|
||||
AVX_OP3_XO(Punpckldq, punpckldq)
|
||||
AVX_OP3_XO(Punpcklqdq, punpcklqdq)
|
||||
AVX_OP3_XO(Pxor, pxor)
|
||||
AVX_OP3_XO(Andps, andps)
|
||||
AVX_OP3_XO(Andpd, andpd)
|
||||
AVX_OP3_XO(Xorps, xorps)
|
||||
AVX_OP3_XO(Xorpd, xorpd)
|
||||
AVX_OP3_XO(Sqrtss, sqrtss)
|
||||
AVX_OP3_XO(Sqrtsd, sqrtsd)
|
||||
AVX_OP3_XO(Orps, orps)
|
||||
AVX_OP3_XO(Orpd, orpd)
|
||||
AVX_OP3_XO(Andnpd, andnpd)
|
||||
AVX_OP3_WITH_TYPE(Movhlps, movhlps, XMMRegister, XMMRegister)
|
||||
AVX_OP3_WITH_TYPE(Psraw, psraw, XMMRegister, uint8_t)
|
||||
AVX_OP3_WITH_TYPE(Psrlq, psrlq, XMMRegister, uint8_t)
|
||||
|
||||
#undef AVX_OP3_XO
|
||||
#undef AVX_OP3_WITH_TYPE
|
||||
// Defined here because some callers take a pointer to member functions.
|
||||
AVX_OP(Pcmpeqb, pcmpeqb)
|
||||
AVX_OP(Pcmpeqw, pcmpeqw)
|
||||
AVX_OP(Pcmpeqd, pcmpeqd)
|
||||
|
||||
// Same as AVX_OP3_WITH_TYPE but supports a CpuFeatureScope
|
||||
#define AVX_OP2_WITH_TYPE_SCOPE(macro_name, name, dst_type, src_type, \
|
||||
@ -413,26 +370,19 @@ class V8_EXPORT_PRIVATE TurboAssembler : public SharedTurboAssembler {
|
||||
AVX_PACKED_OP3(Psllq, psllq)
|
||||
AVX_PACKED_OP3(Psrlw, psrlw)
|
||||
AVX_PACKED_OP3(Psrld, psrld)
|
||||
AVX_PACKED_OP3(Psrlq, psrlq)
|
||||
AVX_PACKED_OP3(Psraw, psraw)
|
||||
AVX_PACKED_OP3(Psrad, psrad)
|
||||
AVX_PACKED_OP3(Paddd, paddd)
|
||||
AVX_PACKED_OP3(Paddq, paddq)
|
||||
AVX_PACKED_OP3(Psubd, psubd)
|
||||
AVX_PACKED_OP3(Psubq, psubq)
|
||||
AVX_PACKED_OP3(Pmuludq, pmuludq)
|
||||
AVX_PACKED_OP3(Pavgb, pavgb)
|
||||
AVX_PACKED_OP3(Pavgw, pavgw)
|
||||
AVX_PACKED_OP3(Pand, pand)
|
||||
AVX_PACKED_OP3(Pminub, pminub)
|
||||
AVX_PACKED_OP3(Pmaxub, pmaxub)
|
||||
AVX_PACKED_OP3(Paddusb, paddusb)
|
||||
AVX_PACKED_OP3(Psubusb, psubusb)
|
||||
AVX_PACKED_OP3(Pcmpgtb, pcmpgtb)
|
||||
AVX_PACKED_OP3(Pcmpeqb, pcmpeqb)
|
||||
AVX_PACKED_OP3(Paddb, paddb)
|
||||
AVX_PACKED_OP3(Paddsb, paddsb)
|
||||
AVX_PACKED_OP3(Psubb, psubb)
|
||||
AVX_PACKED_OP3(Psubsb, psubsb)
|
||||
|
||||
#undef AVX_PACKED_OP3
|
||||
@ -442,8 +392,6 @@ class V8_EXPORT_PRIVATE TurboAssembler : public SharedTurboAssembler {
|
||||
AVX_PACKED_OP3_WITH_TYPE(Psllq, psllq, XMMRegister, uint8_t)
|
||||
AVX_PACKED_OP3_WITH_TYPE(Psrlw, psrlw, XMMRegister, uint8_t)
|
||||
AVX_PACKED_OP3_WITH_TYPE(Psrld, psrld, XMMRegister, uint8_t)
|
||||
AVX_PACKED_OP3_WITH_TYPE(Psrlq, psrlq, XMMRegister, uint8_t)
|
||||
AVX_PACKED_OP3_WITH_TYPE(Psraw, psraw, XMMRegister, uint8_t)
|
||||
AVX_PACKED_OP3_WITH_TYPE(Psrad, psrad, XMMRegister, uint8_t)
|
||||
|
||||
#undef AVX_PACKED_OP3_WITH_TYPE
|
||||
|
@ -39,120 +39,185 @@ class V8_EXPORT_PRIVATE SharedTurboAssembler : public TurboAssemblerBase {
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Dst, typename... Args>
|
||||
// Helper struct to implement functions that checks for AVX support and
|
||||
// dispatch to the appropriate AVX/SSE instruction.
|
||||
template <typename Dst, typename Arg, typename... Args>
|
||||
struct AvxHelper {
|
||||
Assembler* assm;
|
||||
base::Optional<CpuFeature> feature = base::nullopt;
|
||||
// Call a method where the AVX version expects the dst argument to be
|
||||
// duplicated.
|
||||
template <void (Assembler::*avx)(Dst, Dst, Args...),
|
||||
void (Assembler::*no_avx)(Dst, Args...)>
|
||||
void emit(Dst dst, Args... args) {
|
||||
// E.g. Andps(x, y) -> vandps(x, x, y)
|
||||
// -> andps(x, y)
|
||||
template <void (Assembler::*avx)(Dst, Dst, Arg, Args...),
|
||||
void (Assembler::*no_avx)(Dst, Arg, Args...)>
|
||||
void emit(Dst dst, Arg arg, Args... args) {
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
CpuFeatureScope scope(assm, AVX);
|
||||
(assm->*avx)(dst, dst, args...);
|
||||
(assm->*avx)(dst, dst, arg, args...);
|
||||
} else if (feature.has_value()) {
|
||||
DCHECK(CpuFeatures::IsSupported(*feature));
|
||||
CpuFeatureScope scope(assm, *feature);
|
||||
(assm->*no_avx)(dst, arg, args...);
|
||||
} else {
|
||||
(assm->*no_avx)(dst, arg, args...);
|
||||
}
|
||||
}
|
||||
|
||||
// Call a method in the AVX form (one more operand), but if unsupported will
|
||||
// check that dst == first src.
|
||||
// E.g. Andps(x, y, z) -> vandps(x, y, z)
|
||||
// -> andps(x, z) and check that x == y
|
||||
template <void (Assembler::*avx)(Dst, Arg, Args...),
|
||||
void (Assembler::*no_avx)(Dst, Args...)>
|
||||
void emit(Dst dst, Arg arg, Args... args) {
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
CpuFeatureScope scope(assm, AVX);
|
||||
(assm->*avx)(dst, arg, args...);
|
||||
} else if (feature.has_value()) {
|
||||
DCHECK_EQ(dst, arg);
|
||||
DCHECK(CpuFeatures::IsSupported(*feature));
|
||||
CpuFeatureScope scope(assm, *feature);
|
||||
(assm->*no_avx)(dst, args...);
|
||||
} else {
|
||||
DCHECK_EQ(dst, arg);
|
||||
(assm->*no_avx)(dst, args...);
|
||||
}
|
||||
}
|
||||
|
||||
// Call a method where the AVX version expects no duplicated dst argument.
|
||||
template <void (Assembler::*avx)(Dst, Args...),
|
||||
void (Assembler::*no_avx)(Dst, Args...)>
|
||||
void emit(Dst dst, Args... args) {
|
||||
// E.g. Movddup(x, y) -> vmovddup(x, y)
|
||||
// -> movddup(x, y)
|
||||
template <void (Assembler::*avx)(Dst, Arg, Args...),
|
||||
void (Assembler::*no_avx)(Dst, Arg, Args...)>
|
||||
void emit(Dst dst, Arg arg, Args... args) {
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
CpuFeatureScope scope(assm, AVX);
|
||||
(assm->*avx)(dst, args...);
|
||||
(assm->*avx)(dst, arg, args...);
|
||||
} else if (feature.has_value()) {
|
||||
DCHECK(CpuFeatures::IsSupported(*feature));
|
||||
CpuFeatureScope scope(assm, *feature);
|
||||
(assm->*no_avx)(dst, args...);
|
||||
(assm->*no_avx)(dst, arg, args...);
|
||||
} else {
|
||||
(assm->*no_avx)(dst, args...);
|
||||
(assm->*no_avx)(dst, arg, args...);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
#define AVX_OP(macro_name, name) \
|
||||
template <typename Dst, typename... Args> \
|
||||
void macro_name(Dst dst, Args... args) { \
|
||||
AvxHelper<Dst, Args...>{this} \
|
||||
.template emit<&Assembler::v##name, &Assembler::name>(dst, args...); \
|
||||
#define AVX_OP(macro_name, name) \
|
||||
template <typename Dst, typename Arg, typename... Args> \
|
||||
void macro_name(Dst dst, Arg arg, Args... args) { \
|
||||
AvxHelper<Dst, Arg, Args...>{this} \
|
||||
.template emit<&Assembler::v##name, &Assembler::name>(dst, arg, \
|
||||
args...); \
|
||||
}
|
||||
|
||||
#define AVX_OP_SSE3(macro_name, name) \
|
||||
template <typename Dst, typename... Args> \
|
||||
void macro_name(Dst dst, Args... args) { \
|
||||
AvxHelper<Dst, Args...>{this, base::Optional<CpuFeature>(SSE3)} \
|
||||
.template emit<&Assembler::v##name, &Assembler::name>(dst, args...); \
|
||||
#define AVX_OP_SSE3(macro_name, name) \
|
||||
template <typename Dst, typename Arg, typename... Args> \
|
||||
void macro_name(Dst dst, Arg arg, Args... args) { \
|
||||
AvxHelper<Dst, Arg, Args...>{this, base::Optional<CpuFeature>(SSE3)} \
|
||||
.template emit<&Assembler::v##name, &Assembler::name>(dst, arg, \
|
||||
args...); \
|
||||
}
|
||||
|
||||
#define AVX_OP_SSSE3(macro_name, name) \
|
||||
template <typename Dst, typename... Args> \
|
||||
void macro_name(Dst dst, Args... args) { \
|
||||
AvxHelper<Dst, Args...>{this, base::Optional<CpuFeature>(SSSE3)} \
|
||||
.template emit<&Assembler::v##name, &Assembler::name>(dst, args...); \
|
||||
#define AVX_OP_SSSE3(macro_name, name) \
|
||||
template <typename Dst, typename Arg, typename... Args> \
|
||||
void macro_name(Dst dst, Arg arg, Args... args) { \
|
||||
AvxHelper<Dst, Arg, Args...>{this, base::Optional<CpuFeature>(SSSE3)} \
|
||||
.template emit<&Assembler::v##name, &Assembler::name>(dst, arg, \
|
||||
args...); \
|
||||
}
|
||||
|
||||
#define AVX_OP_SSE4_1(macro_name, name) \
|
||||
template <typename Dst, typename... Args> \
|
||||
void macro_name(Dst dst, Args... args) { \
|
||||
AvxHelper<Dst, Args...>{this, base::Optional<CpuFeature>(SSE4_1)} \
|
||||
.template emit<&Assembler::v##name, &Assembler::name>(dst, args...); \
|
||||
#define AVX_OP_SSE4_1(macro_name, name) \
|
||||
template <typename Dst, typename Arg, typename... Args> \
|
||||
void macro_name(Dst dst, Arg arg, Args... args) { \
|
||||
AvxHelper<Dst, Arg, Args...>{this, base::Optional<CpuFeature>(SSE4_1)} \
|
||||
.template emit<&Assembler::v##name, &Assembler::name>(dst, arg, \
|
||||
args...); \
|
||||
}
|
||||
|
||||
#define AVX_OP_SSE4_2(macro_name, name) \
|
||||
template <typename Dst, typename... Args> \
|
||||
void macro_name(Dst dst, Args... args) { \
|
||||
AvxHelper<Dst, Args...>{this, base::Optional<CpuFeature>(SSE4_2)} \
|
||||
.template emit<&Assembler::v##name, &Assembler::name>(dst, args...); \
|
||||
#define AVX_OP_SSE4_2(macro_name, name) \
|
||||
template <typename Dst, typename Arg, typename... Args> \
|
||||
void macro_name(Dst dst, Arg arg, Args... args) { \
|
||||
AvxHelper<Dst, Arg, Args...>{this, base::Optional<CpuFeature>(SSE4_2)} \
|
||||
.template emit<&Assembler::v##name, &Assembler::name>(dst, arg, \
|
||||
args...); \
|
||||
}
|
||||
|
||||
// Keep this list sorted by required extension, then instruction name.
|
||||
AVX_OP(Andnpd, andnpd)
|
||||
AVX_OP(Andpd, andpd)
|
||||
AVX_OP(Andps, andps)
|
||||
AVX_OP(Cvtdq2pd, cvtdq2pd)
|
||||
AVX_OP(Cvtdq2ps, cvtdq2ps)
|
||||
AVX_OP(Cvtps2pd, cvtps2pd)
|
||||
AVX_OP(Cvtpd2ps, cvtpd2ps)
|
||||
AVX_OP(Cvtps2pd, cvtps2pd)
|
||||
AVX_OP(Cvttps2dq, cvttps2dq)
|
||||
AVX_OP(Movaps, movaps)
|
||||
AVX_OP(Movd, movd)
|
||||
AVX_OP(Movhlps, movhlps)
|
||||
AVX_OP(Movhps, movhps)
|
||||
AVX_OP(Movlps, movlps)
|
||||
AVX_OP(Movmskpd, movmskpd)
|
||||
AVX_OP(Movmskps, movmskps)
|
||||
AVX_OP(Movss, movss)
|
||||
AVX_OP(Movsd, movsd)
|
||||
AVX_OP(Movss, movss)
|
||||
AVX_OP(Movupd, movupd)
|
||||
AVX_OP(Movups, movups)
|
||||
AVX_OP(Orpd, orpd)
|
||||
AVX_OP(Orps, orps)
|
||||
AVX_OP(Packssdw, packssdw)
|
||||
AVX_OP(Packsswb, packsswb)
|
||||
AVX_OP(Packuswb, packuswb)
|
||||
AVX_OP(Paddusb, paddusb)
|
||||
AVX_OP(Paddusw, paddusw)
|
||||
AVX_OP(Pand, pand)
|
||||
AVX_OP(Pmovmskb, pmovmskb)
|
||||
AVX_OP(Pmullw, pmullw)
|
||||
AVX_OP(Pshuflw, pshuflw)
|
||||
AVX_OP(Pshufhw, pshufhw)
|
||||
AVX_OP(Por, por)
|
||||
AVX_OP(Pshufd, pshufd)
|
||||
AVX_OP(Pshufhw, pshufhw)
|
||||
AVX_OP(Pshuflw, pshuflw)
|
||||
AVX_OP(Psraw, psraw)
|
||||
AVX_OP(Psrlq, psrlq)
|
||||
AVX_OP(Psubb, psubb)
|
||||
AVX_OP(Psubd, psubd)
|
||||
AVX_OP(Psubq, psubq)
|
||||
AVX_OP(Psubw, psubw)
|
||||
AVX_OP(Punpckhbw, punpckhbw)
|
||||
AVX_OP(Punpckhdq, punpckhdq)
|
||||
AVX_OP(Punpckhqdq, punpckhqdq)
|
||||
AVX_OP(Punpckhwd, punpckhwd)
|
||||
AVX_OP(Punpcklbw, punpcklbw)
|
||||
AVX_OP(Punpckldq, punpckldq)
|
||||
AVX_OP(Punpcklqdq, punpcklqdq)
|
||||
AVX_OP(Punpcklwd, punpcklwd)
|
||||
AVX_OP(Pxor, pxor)
|
||||
AVX_OP(Rcpps, rcpps)
|
||||
AVX_OP(Rsqrtps, rsqrtps)
|
||||
AVX_OP(Sqrtps, sqrtps)
|
||||
AVX_OP(Sqrtpd, sqrtpd)
|
||||
AVX_OP(Sqrtps, sqrtps)
|
||||
AVX_OP(Sqrtsd, sqrtsd)
|
||||
AVX_OP(Sqrtss, sqrtss)
|
||||
AVX_OP(Xorpd, xorpd)
|
||||
AVX_OP(Xorps, xorps)
|
||||
AVX_OP_SSE3(Movddup, movddup)
|
||||
AVX_OP_SSE3(Movshdup, movshdup)
|
||||
AVX_OP_SSSE3(Pabsb, pabsb)
|
||||
AVX_OP_SSSE3(Pabsw, pabsw)
|
||||
AVX_OP_SSSE3(Pabsd, pabsd)
|
||||
AVX_OP_SSSE3(Pabsw, pabsw)
|
||||
AVX_OP_SSE4_1(Extractps, extractps)
|
||||
AVX_OP_SSE4_1(Pextrb, pextrb)
|
||||
AVX_OP_SSE4_1(Pextrw, pextrw)
|
||||
AVX_OP_SSE4_1(Pmovsxbw, pmovsxbw)
|
||||
AVX_OP_SSE4_1(Pmovsxwd, pmovsxwd)
|
||||
AVX_OP_SSE4_1(Pmovsxdq, pmovsxdq)
|
||||
AVX_OP_SSE4_1(Pmovsxwd, pmovsxwd)
|
||||
AVX_OP_SSE4_1(Pmovzxbw, pmovzxbw)
|
||||
AVX_OP_SSE4_1(Pmovzxwd, pmovzxwd)
|
||||
AVX_OP_SSE4_1(Pmovzxdq, pmovzxdq)
|
||||
AVX_OP_SSE4_1(Pmovzxwd, pmovzxwd)
|
||||
AVX_OP_SSE4_1(Ptest, ptest)
|
||||
AVX_OP_SSE4_1(Roundps, roundps)
|
||||
AVX_OP_SSE4_1(Roundpd, roundpd)
|
||||
AVX_OP_SSE4_1(Roundps, roundps)
|
||||
|
||||
void S128Store32Lane(Operand dst, XMMRegister src, uint8_t laneidx);
|
||||
void I16x8ExtMulLow(XMMRegister dst, XMMRegister src1, XMMRegister src2,
|
||||
|
@ -728,6 +728,24 @@ int TurboAssembler::PopCallerSaved(SaveFPRegsMode fp_mode, Register exclusion1,
|
||||
return bytes;
|
||||
}
|
||||
|
||||
void TurboAssembler::Movq(XMMRegister dst, Register src) {
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
CpuFeatureScope avx_scope(this, AVX);
|
||||
vmovq(dst, src);
|
||||
} else {
|
||||
movq(dst, src);
|
||||
}
|
||||
}
|
||||
|
||||
void TurboAssembler::Movq(Register dst, XMMRegister src) {
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
CpuFeatureScope avx_scope(this, AVX);
|
||||
vmovq(dst, src);
|
||||
} else {
|
||||
movq(dst, src);
|
||||
}
|
||||
}
|
||||
|
||||
void TurboAssembler::Movdqa(XMMRegister dst, Operand src) {
|
||||
// See comments in Movdqa(XMMRegister, XMMRegister).
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
@ -2031,16 +2049,6 @@ void TurboAssembler::Psllq(XMMRegister dst, byte imm8) {
|
||||
}
|
||||
}
|
||||
|
||||
void TurboAssembler::Psrlq(XMMRegister dst, byte imm8) {
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
CpuFeatureScope scope(this, AVX);
|
||||
vpsrlq(dst, dst, imm8);
|
||||
} else {
|
||||
DCHECK(!IsEnabled(AVX));
|
||||
psrlq(dst, imm8);
|
||||
}
|
||||
}
|
||||
|
||||
void TurboAssembler::Pslld(XMMRegister dst, byte imm8) {
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
CpuFeatureScope scope(this, AVX);
|
||||
|
@ -66,14 +66,6 @@ class V8_EXPORT_PRIVATE TurboAssembler : public SharedTurboAssembler {
|
||||
AVX_OP(Subsd, subsd)
|
||||
AVX_OP(Divss, divss)
|
||||
AVX_OP(Divsd, divsd)
|
||||
AVX_OP(Orps, orps)
|
||||
AVX_OP(Xorps, xorps)
|
||||
AVX_OP(Xorpd, xorpd)
|
||||
AVX_OP(Movq, movq)
|
||||
AVX_OP(Movhlps, movhlps)
|
||||
AVX_OP(Pcmpeqb, pcmpeqb)
|
||||
AVX_OP(Pcmpeqw, pcmpeqw)
|
||||
AVX_OP(Pcmpeqd, pcmpeqd)
|
||||
AVX_OP(Pcmpgtb, pcmpgtb)
|
||||
AVX_OP(Pcmpgtw, pcmpgtw)
|
||||
AVX_OP(Pmaxsw, pmaxsw)
|
||||
@ -83,11 +75,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public SharedTurboAssembler {
|
||||
AVX_OP(Addss, addss)
|
||||
AVX_OP(Addsd, addsd)
|
||||
AVX_OP(Mulsd, mulsd)
|
||||
AVX_OP(Andps, andps)
|
||||
AVX_OP(Andnps, andnps)
|
||||
AVX_OP(Andpd, andpd)
|
||||
AVX_OP(Andnpd, andnpd)
|
||||
AVX_OP(Orpd, orpd)
|
||||
AVX_OP(Cmpeqps, cmpeqps)
|
||||
AVX_OP(Cmpltps, cmpltps)
|
||||
AVX_OP(Cmpleps, cmpleps)
|
||||
@ -100,18 +88,9 @@ class V8_EXPORT_PRIVATE TurboAssembler : public SharedTurboAssembler {
|
||||
AVX_OP(Cmpneqpd, cmpneqpd)
|
||||
AVX_OP(Cmpnltpd, cmpnltpd)
|
||||
AVX_OP(Cmpnlepd, cmpnlepd)
|
||||
AVX_OP(Sqrtss, sqrtss)
|
||||
AVX_OP(Sqrtsd, sqrtsd)
|
||||
AVX_OP(Cvttpd2dq, cvttpd2dq)
|
||||
AVX_OP(Ucomiss, ucomiss)
|
||||
AVX_OP(Ucomisd, ucomisd)
|
||||
AVX_OP(Pand, pand)
|
||||
AVX_OP(Por, por)
|
||||
AVX_OP(Pxor, pxor)
|
||||
AVX_OP(Psubb, psubb)
|
||||
AVX_OP(Psubw, psubw)
|
||||
AVX_OP(Psubd, psubd)
|
||||
AVX_OP(Psubq, psubq)
|
||||
AVX_OP(Psubsb, psubsb)
|
||||
AVX_OP(Psubsw, psubsw)
|
||||
AVX_OP(Psubusb, psubusb)
|
||||
@ -119,21 +98,17 @@ class V8_EXPORT_PRIVATE TurboAssembler : public SharedTurboAssembler {
|
||||
AVX_OP(Pslld, pslld)
|
||||
AVX_OP(Pavgb, pavgb)
|
||||
AVX_OP(Pavgw, pavgw)
|
||||
AVX_OP(Psraw, psraw)
|
||||
AVX_OP(Psrad, psrad)
|
||||
AVX_OP(Psllw, psllw)
|
||||
AVX_OP(Psllq, psllq)
|
||||
AVX_OP(Psrlw, psrlw)
|
||||
AVX_OP(Psrld, psrld)
|
||||
AVX_OP(Psrlq, psrlq)
|
||||
AVX_OP(Paddb, paddb)
|
||||
AVX_OP(Paddw, paddw)
|
||||
AVX_OP(Paddd, paddd)
|
||||
AVX_OP(Paddq, paddq)
|
||||
AVX_OP(Paddsb, paddsb)
|
||||
AVX_OP(Paddsw, paddsw)
|
||||
AVX_OP(Paddusb, paddusb)
|
||||
AVX_OP(Paddusw, paddusw)
|
||||
AVX_OP(Pcmpgtd, pcmpgtd)
|
||||
AVX_OP(Pmuludq, pmuludq)
|
||||
AVX_OP(Addpd, addpd)
|
||||
@ -148,17 +123,9 @@ class V8_EXPORT_PRIVATE TurboAssembler : public SharedTurboAssembler {
|
||||
AVX_OP(Subps, subps)
|
||||
AVX_OP(Mulps, mulps)
|
||||
AVX_OP(Divps, divps)
|
||||
AVX_OP(Packsswb, packsswb)
|
||||
AVX_OP(Packuswb, packuswb)
|
||||
AVX_OP(Packssdw, packssdw)
|
||||
AVX_OP(Punpcklbw, punpcklbw)
|
||||
AVX_OP(Punpcklwd, punpcklwd)
|
||||
AVX_OP(Punpckldq, punpckldq)
|
||||
AVX_OP(Punpckhbw, punpckhbw)
|
||||
AVX_OP(Punpckhwd, punpckhwd)
|
||||
AVX_OP(Punpckhdq, punpckhdq)
|
||||
AVX_OP(Punpcklqdq, punpcklqdq)
|
||||
AVX_OP(Punpckhqdq, punpckhqdq)
|
||||
AVX_OP(Pcmpeqb, pcmpeqb)
|
||||
AVX_OP(Pcmpeqw, pcmpeqw)
|
||||
AVX_OP(Pcmpeqd, pcmpeqd)
|
||||
AVX_OP(Cmpps, cmpps)
|
||||
AVX_OP(Cmppd, cmppd)
|
||||
AVX_OP(Movlhps, movlhps)
|
||||
@ -191,6 +158,12 @@ class V8_EXPORT_PRIVATE TurboAssembler : public SharedTurboAssembler {
|
||||
|
||||
#undef AVX_OP
|
||||
|
||||
// Define movq here instead of using AVX_OP. movq is defined using templates
|
||||
// and there is a function template `void movq(P1)`, while technically
|
||||
// impossible, will be selected when deducing the arguments for AvxHelper.
|
||||
void Movq(XMMRegister dst, Register src);
|
||||
void Movq(Register dst, XMMRegister src);
|
||||
|
||||
void PushReturnAddressFrom(Register src) { pushq(src); }
|
||||
void PopReturnAddressTo(Register dst) { popq(dst); }
|
||||
|
||||
@ -474,8 +447,6 @@ class V8_EXPORT_PRIVATE TurboAssembler : public SharedTurboAssembler {
|
||||
|
||||
void Psllq(XMMRegister dst, int imm8) { Psllq(dst, static_cast<byte>(imm8)); }
|
||||
void Psllq(XMMRegister dst, byte imm8);
|
||||
void Psrlq(XMMRegister dst, int imm8) { Psrlq(dst, static_cast<byte>(imm8)); }
|
||||
void Psrlq(XMMRegister dst, byte imm8);
|
||||
void Pslld(XMMRegister dst, byte imm8);
|
||||
void Psrld(XMMRegister dst, byte imm8);
|
||||
|
||||
|
@ -1978,7 +1978,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
// Canonicalize NaNs by quieting and clearing the payload.
|
||||
__ Cmpunordpd(dst, dst, tmp);
|
||||
__ Orpd(tmp, dst);
|
||||
__ Psrlq(dst, 13);
|
||||
__ Psrlq(dst, byte{13});
|
||||
__ Andnpd(dst, tmp);
|
||||
break;
|
||||
}
|
||||
@ -2000,7 +2000,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
__ Subpd(tmp, tmp, dst);
|
||||
// Canonicalize NaNs by clearing the payload. Sign is non-deterministic.
|
||||
__ Cmpunordpd(dst, dst, tmp);
|
||||
__ Psrlq(dst, 13);
|
||||
__ Psrlq(dst, byte{13});
|
||||
__ Andnpd(dst, tmp);
|
||||
break;
|
||||
}
|
||||
@ -2210,11 +2210,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
__ Movaps(tmp2, right);
|
||||
|
||||
// Multiply high dword of each qword of left with right.
|
||||
__ Psrlq(tmp1, 32);
|
||||
__ Psrlq(tmp1, byte{32});
|
||||
__ Pmuludq(tmp1, tmp1, right);
|
||||
|
||||
// Multiply high dword of each qword of right with left.
|
||||
__ Psrlq(tmp2, 32);
|
||||
__ Psrlq(tmp2, byte{32});
|
||||
__ Pmuludq(tmp2, tmp2, left);
|
||||
|
||||
__ Paddq(tmp2, tmp2, tmp1);
|
||||
|
@ -1478,7 +1478,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
// TODO(bmeurer): Use RIP relative 128-bit constants.
|
||||
XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0));
|
||||
__ Pcmpeqd(tmp, tmp);
|
||||
__ Psrlq(tmp, 33);
|
||||
__ Psrlq(tmp, byte{33});
|
||||
__ Andps(i.OutputDoubleRegister(), tmp);
|
||||
break;
|
||||
}
|
||||
@ -2441,7 +2441,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
// Canonicalize NaNs by quieting and clearing the payload.
|
||||
__ Cmppd(dst, kScratchDoubleReg, int8_t{3});
|
||||
__ Orpd(kScratchDoubleReg, dst);
|
||||
__ Psrlq(dst, 13);
|
||||
__ Psrlq(dst, byte{13});
|
||||
__ Andnpd(dst, kScratchDoubleReg);
|
||||
break;
|
||||
}
|
||||
@ -2462,7 +2462,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
__ Subpd(kScratchDoubleReg, dst);
|
||||
// Canonicalize NaNs by clearing the payload. Sign is non-deterministic.
|
||||
__ Cmppd(dst, kScratchDoubleReg, int8_t{3});
|
||||
__ Psrlq(dst, 13);
|
||||
__ Psrlq(dst, byte{13});
|
||||
__ Andnpd(dst, kScratchDoubleReg);
|
||||
break;
|
||||
}
|
||||
@ -2843,11 +2843,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
__ Movdqa(tmp2, right);
|
||||
|
||||
// Multiply high dword of each qword of left with right.
|
||||
__ Psrlq(tmp1, 32);
|
||||
__ Psrlq(tmp1, byte{32});
|
||||
__ Pmuludq(tmp1, right);
|
||||
|
||||
// Multiply high dword of each qword of right with left.
|
||||
__ Psrlq(tmp2, 32);
|
||||
__ Psrlq(tmp2, byte{32});
|
||||
__ Pmuludq(tmp2, left);
|
||||
|
||||
__ Paddq(tmp2, tmp1);
|
||||
|
@ -3921,7 +3921,7 @@ void LiftoffAssembler::emit_i64x2_shri_s(LiftoffRegister dst,
|
||||
Pcmpeqb(tmp, tmp);
|
||||
Psllq(tmp, tmp, 63);
|
||||
|
||||
Psrlq(tmp, tmp, shift);
|
||||
Psrlq(tmp, tmp, byte{shift});
|
||||
liftoff::EmitSimdShiftOpImm<&Assembler::vpsrlq, &Assembler::psrlq, 6>(
|
||||
this, dst, lhs, rhs);
|
||||
Pxor(dst.fp(), tmp);
|
||||
@ -3963,10 +3963,10 @@ void LiftoffAssembler::emit_i64x2_mul(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
Movaps(tmp1.fp(), lhs.fp());
|
||||
Movaps(tmp2.fp(), rhs.fp());
|
||||
// Multiply high dword of each qword of left with right.
|
||||
Psrlq(tmp1.fp(), 32);
|
||||
Psrlq(tmp1.fp(), byte{32});
|
||||
Pmuludq(tmp1.fp(), tmp1.fp(), rhs.fp());
|
||||
// Multiply high dword of each qword of right with left.
|
||||
Psrlq(tmp2.fp(), 32);
|
||||
Psrlq(tmp2.fp(), byte{32});
|
||||
Pmuludq(tmp2.fp(), tmp2.fp(), lhs.fp());
|
||||
Paddq(tmp2.fp(), tmp2.fp(), tmp1.fp());
|
||||
Psllq(tmp2.fp(), tmp2.fp(), 32);
|
||||
@ -4188,11 +4188,11 @@ void LiftoffAssembler::emit_f64x2_abs(LiftoffRegister dst,
|
||||
LiftoffRegister src) {
|
||||
if (dst.fp() == src.fp()) {
|
||||
Pcmpeqd(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg);
|
||||
Psrlq(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg, 1);
|
||||
Psrlq(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg, byte{1});
|
||||
Andpd(dst.fp(), liftoff::kScratchDoubleReg);
|
||||
} else {
|
||||
Pcmpeqd(dst.fp(), dst.fp());
|
||||
Psrlq(dst.fp(), dst.fp(), 1);
|
||||
Psrlq(dst.fp(), dst.fp(), byte{1});
|
||||
Andpd(dst.fp(), src.fp());
|
||||
}
|
||||
}
|
||||
@ -4291,7 +4291,7 @@ void LiftoffAssembler::emit_f64x2_min(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
// Canonicalize NaNs by quieting and clearing the payload.
|
||||
Cmpunordpd(dst.fp(), dst.fp(), liftoff::kScratchDoubleReg);
|
||||
Orpd(liftoff::kScratchDoubleReg, dst.fp());
|
||||
Psrlq(dst.fp(), 13);
|
||||
Psrlq(dst.fp(), byte{13});
|
||||
Andnpd(dst.fp(), liftoff::kScratchDoubleReg);
|
||||
}
|
||||
|
||||
@ -4322,7 +4322,7 @@ void LiftoffAssembler::emit_f64x2_max(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
Subpd(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg, dst.fp());
|
||||
// Canonicalize NaNs by clearing the payload. Sign is non-deterministic.
|
||||
Cmpunordpd(dst.fp(), dst.fp(), liftoff::kScratchDoubleReg);
|
||||
Psrlq(dst.fp(), 13);
|
||||
Psrlq(dst.fp(), byte{13});
|
||||
Andnpd(dst.fp(), liftoff::kScratchDoubleReg);
|
||||
}
|
||||
|
||||
|
@ -3504,10 +3504,10 @@ void LiftoffAssembler::emit_i64x2_mul(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
Movaps(tmp1.fp(), lhs.fp());
|
||||
Movaps(tmp2.fp(), rhs.fp());
|
||||
// Multiply high dword of each qword of left with right.
|
||||
Psrlq(tmp1.fp(), 32);
|
||||
Psrlq(tmp1.fp(), byte{32});
|
||||
Pmuludq(tmp1.fp(), rhs.fp());
|
||||
// Multiply high dword of each qword of right with left.
|
||||
Psrlq(tmp2.fp(), 32);
|
||||
Psrlq(tmp2.fp(), byte{32});
|
||||
Pmuludq(tmp2.fp(), lhs.fp());
|
||||
Paddq(tmp2.fp(), tmp1.fp());
|
||||
Psllq(tmp2.fp(), 32);
|
||||
@ -3729,11 +3729,11 @@ void LiftoffAssembler::emit_f64x2_abs(LiftoffRegister dst,
|
||||
LiftoffRegister src) {
|
||||
if (dst.fp() == src.fp()) {
|
||||
Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
|
||||
Psrlq(kScratchDoubleReg, static_cast<byte>(1));
|
||||
Psrlq(kScratchDoubleReg, byte{1});
|
||||
Andpd(dst.fp(), kScratchDoubleReg);
|
||||
} else {
|
||||
Pcmpeqd(dst.fp(), dst.fp());
|
||||
Psrlq(dst.fp(), static_cast<byte>(1));
|
||||
Psrlq(dst.fp(), byte{1});
|
||||
Andpd(dst.fp(), src.fp());
|
||||
}
|
||||
}
|
||||
@ -3832,7 +3832,7 @@ void LiftoffAssembler::emit_f64x2_min(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
// Canonicalize NaNs by quieting and clearing the payload.
|
||||
Cmppd(dst.fp(), kScratchDoubleReg, int8_t{3});
|
||||
Orpd(kScratchDoubleReg, dst.fp());
|
||||
Psrlq(dst.fp(), 13);
|
||||
Psrlq(dst.fp(), byte{13});
|
||||
Andnpd(dst.fp(), kScratchDoubleReg);
|
||||
}
|
||||
|
||||
@ -3863,7 +3863,7 @@ void LiftoffAssembler::emit_f64x2_max(LiftoffRegister dst, LiftoffRegister lhs,
|
||||
Subpd(kScratchDoubleReg, dst.fp());
|
||||
// Canonicalize NaNs by clearing the payload. Sign is non-deterministic.
|
||||
Cmppd(dst.fp(), kScratchDoubleReg, int8_t{3});
|
||||
Psrlq(dst.fp(), 13);
|
||||
Psrlq(dst.fp(), byte{13});
|
||||
Andnpd(dst.fp(), kScratchDoubleReg);
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user