[x64] Introduce FMA3 instructions on scalar data elements.
R=bmeurer@chromium.org Review URL: https://codereview.chromium.org/757503002 Patch from Weiliang Lin <weiliang.lin@intel.com>. Cr-Commit-Position: refs/heads/master@{#25509}
This commit is contained in:
parent
819955b278
commit
83a635e0d7
@ -291,32 +291,35 @@ static bool HasListItem(const char* list, const char* item) {
|
||||
|
||||
#endif // V8_HOST_ARCH_IA32 || V8_HOST_ARCH_X64
|
||||
|
||||
CPU::CPU() : stepping_(0),
|
||||
model_(0),
|
||||
ext_model_(0),
|
||||
family_(0),
|
||||
ext_family_(0),
|
||||
type_(0),
|
||||
implementer_(0),
|
||||
architecture_(0),
|
||||
part_(0),
|
||||
has_fpu_(false),
|
||||
has_cmov_(false),
|
||||
has_sahf_(false),
|
||||
has_mmx_(false),
|
||||
has_sse_(false),
|
||||
has_sse2_(false),
|
||||
has_sse3_(false),
|
||||
has_ssse3_(false),
|
||||
has_sse41_(false),
|
||||
has_sse42_(false),
|
||||
has_idiva_(false),
|
||||
has_neon_(false),
|
||||
has_thumb2_(false),
|
||||
has_vfp_(false),
|
||||
has_vfp3_(false),
|
||||
has_vfp3_d32_(false),
|
||||
is_fp64_mode_(false) {
|
||||
CPU::CPU()
|
||||
: stepping_(0),
|
||||
model_(0),
|
||||
ext_model_(0),
|
||||
family_(0),
|
||||
ext_family_(0),
|
||||
type_(0),
|
||||
implementer_(0),
|
||||
architecture_(0),
|
||||
part_(0),
|
||||
has_fpu_(false),
|
||||
has_cmov_(false),
|
||||
has_sahf_(false),
|
||||
has_mmx_(false),
|
||||
has_sse_(false),
|
||||
has_sse2_(false),
|
||||
has_sse3_(false),
|
||||
has_ssse3_(false),
|
||||
has_sse41_(false),
|
||||
has_sse42_(false),
|
||||
has_avx_(false),
|
||||
has_fma3_(false),
|
||||
has_idiva_(false),
|
||||
has_neon_(false),
|
||||
has_thumb2_(false),
|
||||
has_vfp_(false),
|
||||
has_vfp3_(false),
|
||||
has_vfp3_d32_(false),
|
||||
is_fp64_mode_(false) {
|
||||
memcpy(vendor_, "Unknown", 8);
|
||||
#if V8_OS_NACL
|
||||
// Portable host shouldn't do feature detection.
|
||||
@ -356,6 +359,8 @@ CPU::CPU() : stepping_(0),
|
||||
has_ssse3_ = (cpu_info[2] & 0x00000200) != 0;
|
||||
has_sse41_ = (cpu_info[2] & 0x00080000) != 0;
|
||||
has_sse42_ = (cpu_info[2] & 0x00100000) != 0;
|
||||
has_avx_ = (cpu_info[2] & 0x18000000) != 0;
|
||||
if (has_avx_) has_fma3_ = (cpu_info[2] & 0x00001000) != 0;
|
||||
}
|
||||
|
||||
#if V8_HOST_ARCH_IA32
|
||||
|
@ -68,6 +68,8 @@ class CPU FINAL {
|
||||
bool has_ssse3() const { return has_ssse3_; }
|
||||
bool has_sse41() const { return has_sse41_; }
|
||||
bool has_sse42() const { return has_sse42_; }
|
||||
bool has_avx() const { return has_avx_; }
|
||||
bool has_fma3() const { return has_fma3_; }
|
||||
|
||||
// arm features
|
||||
bool has_idiva() const { return has_idiva_; }
|
||||
@ -101,6 +103,8 @@ class CPU FINAL {
|
||||
bool has_ssse3_;
|
||||
bool has_sse41_;
|
||||
bool has_sse42_;
|
||||
bool has_avx_;
|
||||
bool has_fma3_;
|
||||
bool has_idiva_;
|
||||
bool has_neon_;
|
||||
bool has_thumb2_;
|
||||
|
@ -421,6 +421,8 @@ DEFINE_BOOL(enable_sse4_1, true,
|
||||
"enable use of SSE4.1 instructions if available")
|
||||
DEFINE_BOOL(enable_sahf, true,
|
||||
"enable use of SAHF instruction if available (X64 only)")
|
||||
DEFINE_BOOL(enable_avx, true, "enable use of AVX instructions if available")
|
||||
DEFINE_BOOL(enable_fma3, true, "enable use of FMA3 instructions if available")
|
||||
DEFINE_BOOL(enable_vfp3, ENABLE_VFP3_DEFAULT,
|
||||
"enable use of VFP3 instructions if available")
|
||||
DEFINE_BOOL(enable_armv7, ENABLE_ARMV7_DEFAULT,
|
||||
|
@ -617,6 +617,8 @@ enum CpuFeature {
|
||||
SSE4_1,
|
||||
SSE3,
|
||||
SAHF,
|
||||
AVX,
|
||||
FMA3,
|
||||
// ARM
|
||||
VFP3,
|
||||
ARMv7,
|
||||
|
@ -27,12 +27,19 @@ void CpuFeatures::ProbeImpl(bool cross_compile) {
|
||||
if (cpu.has_sse41() && FLAG_enable_sse4_1) supported_ |= 1u << SSE4_1;
|
||||
if (cpu.has_sse3() && FLAG_enable_sse3) supported_ |= 1u << SSE3;
|
||||
// SAHF is not generally available in long mode.
|
||||
if (cpu.has_sahf() && FLAG_enable_sahf) supported_|= 1u << SAHF;
|
||||
if (cpu.has_sahf() && FLAG_enable_sahf) supported_ |= 1u << SAHF;
|
||||
if (cpu.has_avx() && FLAG_enable_avx) supported_ |= 1u << AVX;
|
||||
if (cpu.has_fma3() && FLAG_enable_fma3) supported_ |= 1u << FMA3;
|
||||
}
|
||||
|
||||
|
||||
void CpuFeatures::PrintTarget() { }
|
||||
void CpuFeatures::PrintFeatures() { }
|
||||
void CpuFeatures::PrintFeatures() {
|
||||
printf("SSE3=%d SSE4_1=%d SAHF=%d AVX=%d FMA3=%d\n",
|
||||
CpuFeatures::IsSupported(SSE3), CpuFeatures::IsSupported(SSE4_1),
|
||||
CpuFeatures::IsSupported(SAHF), CpuFeatures::IsSupported(AVX),
|
||||
CpuFeatures::IsSupported(FMA3));
|
||||
}
|
||||
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
@ -2638,6 +2645,104 @@ void Assembler::movapd(XMMRegister dst, XMMRegister src) {
|
||||
}
|
||||
|
||||
|
||||
void Assembler::addss(XMMRegister dst, XMMRegister src) {
|
||||
EnsureSpace ensure_space(this);
|
||||
emit(0xF3);
|
||||
emit_optional_rex_32(dst, src);
|
||||
emit(0x0F);
|
||||
emit(0x58);
|
||||
emit_sse_operand(dst, src);
|
||||
}
|
||||
|
||||
|
||||
void Assembler::addss(XMMRegister dst, const Operand& src) {
|
||||
EnsureSpace ensure_space(this);
|
||||
emit(0xF3);
|
||||
emit_optional_rex_32(dst, src);
|
||||
emit(0x0F);
|
||||
emit(0x58);
|
||||
emit_sse_operand(dst, src);
|
||||
}
|
||||
|
||||
|
||||
void Assembler::subss(XMMRegister dst, XMMRegister src) {
|
||||
EnsureSpace ensure_space(this);
|
||||
emit(0xF3);
|
||||
emit_optional_rex_32(dst, src);
|
||||
emit(0x0F);
|
||||
emit(0x5C);
|
||||
emit_sse_operand(dst, src);
|
||||
}
|
||||
|
||||
|
||||
void Assembler::subss(XMMRegister dst, const Operand& src) {
|
||||
EnsureSpace ensure_space(this);
|
||||
emit(0xF3);
|
||||
emit_optional_rex_32(dst, src);
|
||||
emit(0x0F);
|
||||
emit(0x5C);
|
||||
emit_sse_operand(dst, src);
|
||||
}
|
||||
|
||||
|
||||
void Assembler::mulss(XMMRegister dst, XMMRegister src) {
|
||||
EnsureSpace ensure_space(this);
|
||||
emit(0xF3);
|
||||
emit_optional_rex_32(dst, src);
|
||||
emit(0x0F);
|
||||
emit(0x59);
|
||||
emit_sse_operand(dst, src);
|
||||
}
|
||||
|
||||
|
||||
void Assembler::mulss(XMMRegister dst, const Operand& src) {
|
||||
EnsureSpace ensure_space(this);
|
||||
emit(0xF3);
|
||||
emit_optional_rex_32(dst, src);
|
||||
emit(0x0F);
|
||||
emit(0x59);
|
||||
emit_sse_operand(dst, src);
|
||||
}
|
||||
|
||||
|
||||
void Assembler::divss(XMMRegister dst, XMMRegister src) {
|
||||
EnsureSpace ensure_space(this);
|
||||
emit(0xF3);
|
||||
emit_optional_rex_32(dst, src);
|
||||
emit(0x0F);
|
||||
emit(0x5E);
|
||||
emit_sse_operand(dst, src);
|
||||
}
|
||||
|
||||
|
||||
void Assembler::divss(XMMRegister dst, const Operand& src) {
|
||||
EnsureSpace ensure_space(this);
|
||||
emit(0xF3);
|
||||
emit_optional_rex_32(dst, src);
|
||||
emit(0x0F);
|
||||
emit(0x5E);
|
||||
emit_sse_operand(dst, src);
|
||||
}
|
||||
|
||||
|
||||
void Assembler::ucomiss(XMMRegister dst, XMMRegister src) {
|
||||
EnsureSpace ensure_space(this);
|
||||
emit_optional_rex_32(dst, src);
|
||||
emit(0x0f);
|
||||
emit(0x2e);
|
||||
emit_sse_operand(dst, src);
|
||||
}
|
||||
|
||||
|
||||
void Assembler::ucomiss(XMMRegister dst, const Operand& src) {
|
||||
EnsureSpace ensure_space(this);
|
||||
emit_optional_rex_32(dst, src);
|
||||
emit(0x0f);
|
||||
emit(0x2e);
|
||||
emit_sse_operand(dst, src);
|
||||
}
|
||||
|
||||
|
||||
void Assembler::movss(XMMRegister dst, const Operand& src) {
|
||||
EnsureSpace ensure_space(this);
|
||||
emit(0xF3); // single
|
||||
@ -3077,6 +3182,86 @@ void Assembler::pcmpeqd(XMMRegister dst, XMMRegister src) {
|
||||
}
|
||||
|
||||
|
||||
// byte 1 of 3-byte VEX
|
||||
void Assembler::emit_vex3_byte1(XMMRegister reg, XMMRegister rm, byte m) {
|
||||
DCHECK(1 <= m && m <= 3);
|
||||
byte rxb = ~((reg.high_bit() << 2) | rm.high_bit()) << 5;
|
||||
emit(rxb | m);
|
||||
}
|
||||
|
||||
|
||||
// byte 1 of 3-byte VEX
|
||||
void Assembler::emit_vex3_byte1(XMMRegister reg, const Operand& rm, byte m) {
|
||||
DCHECK(1 <= m && m <= 3);
|
||||
byte rxb = ~((reg.high_bit() << 2) | rm.rex_) << 5;
|
||||
emit(rxb | m);
|
||||
}
|
||||
|
||||
|
||||
// byte 1 of 2-byte VEX
|
||||
void Assembler::emit_vex2_byte1(XMMRegister reg, XMMRegister v, byte lpp) {
|
||||
DCHECK(lpp <= 3);
|
||||
byte rv = ~((reg.high_bit() << 4) | v.code()) << 3;
|
||||
emit(rv | lpp);
|
||||
}
|
||||
|
||||
|
||||
// byte 2 of 3-byte VEX
|
||||
void Assembler::emit_vex3_byte2(byte w, XMMRegister v, byte lpp) {
|
||||
DCHECK(w <= 1);
|
||||
DCHECK(lpp <= 3);
|
||||
emit((w << 7) | ((~v.code() & 0xf) << 3) | lpp);
|
||||
}
|
||||
|
||||
|
||||
void Assembler::vfmasd(byte op, XMMRegister dst, XMMRegister src1,
|
||||
XMMRegister src2) {
|
||||
DCHECK(IsEnabled(FMA3));
|
||||
EnsureSpace ensure_space(this);
|
||||
emit_vex3_byte0();
|
||||
emit_vex3_byte1(dst, src2, 0x02);
|
||||
emit_vex3_byte2(0x1, src1, 0x01);
|
||||
emit(op);
|
||||
emit_sse_operand(dst, src2);
|
||||
}
|
||||
|
||||
|
||||
void Assembler::vfmasd(byte op, XMMRegister dst, XMMRegister src1,
|
||||
const Operand& src2) {
|
||||
DCHECK(IsEnabled(FMA3));
|
||||
EnsureSpace ensure_space(this);
|
||||
emit_vex3_byte0();
|
||||
emit_vex3_byte1(dst, src2, 0x02);
|
||||
emit_vex3_byte2(0x1, src1, 0x01);
|
||||
emit(op);
|
||||
emit_sse_operand(dst, src2);
|
||||
}
|
||||
|
||||
|
||||
void Assembler::vfmass(byte op, XMMRegister dst, XMMRegister src1,
|
||||
XMMRegister src2) {
|
||||
DCHECK(IsEnabled(FMA3));
|
||||
EnsureSpace ensure_space(this);
|
||||
emit_vex3_byte0();
|
||||
emit_vex3_byte1(dst, src2, 0x02);
|
||||
emit_vex3_byte2(0x0, src1, 0x01);
|
||||
emit(op);
|
||||
emit_sse_operand(dst, src2);
|
||||
}
|
||||
|
||||
|
||||
void Assembler::vfmass(byte op, XMMRegister dst, XMMRegister src1,
|
||||
const Operand& src2) {
|
||||
DCHECK(IsEnabled(FMA3));
|
||||
EnsureSpace ensure_space(this);
|
||||
emit_vex3_byte0();
|
||||
emit_vex3_byte1(dst, src2, 0x02);
|
||||
emit_vex3_byte2(0x0, src1, 0x01);
|
||||
emit(op);
|
||||
emit_sse_operand(dst, src2);
|
||||
}
|
||||
|
||||
|
||||
void Assembler::emit_sse_operand(XMMRegister reg, const Operand& adr) {
|
||||
Register ireg = { reg.code() };
|
||||
emit_operand(ireg, adr);
|
||||
|
@ -1014,6 +1014,17 @@ class Assembler : public AssemblerBase {
|
||||
void sahf();
|
||||
|
||||
// SSE instructions
|
||||
void addss(XMMRegister dst, XMMRegister src);
|
||||
void addss(XMMRegister dst, const Operand& src);
|
||||
void subss(XMMRegister dst, XMMRegister src);
|
||||
void subss(XMMRegister dst, const Operand& src);
|
||||
void mulss(XMMRegister dst, XMMRegister src);
|
||||
void mulss(XMMRegister dst, const Operand& src);
|
||||
void divss(XMMRegister dst, XMMRegister src);
|
||||
void divss(XMMRegister dst, const Operand& src);
|
||||
|
||||
void ucomiss(XMMRegister dst, XMMRegister src);
|
||||
void ucomiss(XMMRegister dst, const Operand& src);
|
||||
void movaps(XMMRegister dst, XMMRegister src);
|
||||
void movss(XMMRegister dst, const Operand& src);
|
||||
void movss(const Operand& dst, XMMRegister src);
|
||||
@ -1123,6 +1134,157 @@ class Assembler : public AssemblerBase {
|
||||
|
||||
void roundsd(XMMRegister dst, XMMRegister src, RoundingMode mode);
|
||||
|
||||
// AVX instruction
|
||||
void vfmadd132sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
|
||||
vfmasd(0x99, dst, src1, src2);
|
||||
}
|
||||
void vfmadd213sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
|
||||
vfmasd(0xa9, dst, src1, src2);
|
||||
}
|
||||
void vfmadd231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
|
||||
vfmasd(0xb9, dst, src1, src2);
|
||||
}
|
||||
void vfmadd132sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
|
||||
vfmasd(0x99, dst, src1, src2);
|
||||
}
|
||||
void vfmadd213sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
|
||||
vfmasd(0xa9, dst, src1, src2);
|
||||
}
|
||||
void vfmadd231sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
|
||||
vfmasd(0xb9, dst, src1, src2);
|
||||
}
|
||||
void vfmsub132sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
|
||||
vfmasd(0x9b, dst, src1, src2);
|
||||
}
|
||||
void vfmsub213sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
|
||||
vfmasd(0xab, dst, src1, src2);
|
||||
}
|
||||
void vfmsub231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
|
||||
vfmasd(0xbb, dst, src1, src2);
|
||||
}
|
||||
void vfmsub132sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
|
||||
vfmasd(0x9b, dst, src1, src2);
|
||||
}
|
||||
void vfmsub213sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
|
||||
vfmasd(0xab, dst, src1, src2);
|
||||
}
|
||||
void vfmsub231sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
|
||||
vfmasd(0xbb, dst, src1, src2);
|
||||
}
|
||||
void vfnmadd132sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
|
||||
vfmasd(0x9d, dst, src1, src2);
|
||||
}
|
||||
void vfnmadd213sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
|
||||
vfmasd(0xad, dst, src1, src2);
|
||||
}
|
||||
void vfnmadd231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
|
||||
vfmasd(0xbd, dst, src1, src2);
|
||||
}
|
||||
void vfnmadd132sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
|
||||
vfmasd(0x9d, dst, src1, src2);
|
||||
}
|
||||
void vfnmadd213sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
|
||||
vfmasd(0xad, dst, src1, src2);
|
||||
}
|
||||
void vfnmadd231sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
|
||||
vfmasd(0xbd, dst, src1, src2);
|
||||
}
|
||||
void vfnmsub132sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
|
||||
vfmasd(0x9f, dst, src1, src2);
|
||||
}
|
||||
void vfnmsub213sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
|
||||
vfmasd(0xaf, dst, src1, src2);
|
||||
}
|
||||
void vfnmsub231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
|
||||
vfmasd(0xbf, dst, src1, src2);
|
||||
}
|
||||
void vfnmsub132sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
|
||||
vfmasd(0x9f, dst, src1, src2);
|
||||
}
|
||||
void vfnmsub213sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
|
||||
vfmasd(0xaf, dst, src1, src2);
|
||||
}
|
||||
void vfnmsub231sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
|
||||
vfmasd(0xbf, dst, src1, src2);
|
||||
}
|
||||
void vfmasd(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
|
||||
void vfmasd(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2);
|
||||
|
||||
void vfmadd132ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
|
||||
vfmass(0x99, dst, src1, src2);
|
||||
}
|
||||
void vfmadd213ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
|
||||
vfmass(0xa9, dst, src1, src2);
|
||||
}
|
||||
void vfmadd231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
|
||||
vfmass(0xb9, dst, src1, src2);
|
||||
}
|
||||
void vfmadd132ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
|
||||
vfmass(0x99, dst, src1, src2);
|
||||
}
|
||||
void vfmadd213ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
|
||||
vfmass(0xa9, dst, src1, src2);
|
||||
}
|
||||
void vfmadd231ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
|
||||
vfmass(0xb9, dst, src1, src2);
|
||||
}
|
||||
void vfmsub132ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
|
||||
vfmass(0x9b, dst, src1, src2);
|
||||
}
|
||||
void vfmsub213ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
|
||||
vfmass(0xab, dst, src1, src2);
|
||||
}
|
||||
void vfmsub231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
|
||||
vfmass(0xbb, dst, src1, src2);
|
||||
}
|
||||
void vfmsub132ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
|
||||
vfmass(0x9b, dst, src1, src2);
|
||||
}
|
||||
void vfmsub213ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
|
||||
vfmass(0xab, dst, src1, src2);
|
||||
}
|
||||
void vfmsub231ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
|
||||
vfmass(0xbb, dst, src1, src2);
|
||||
}
|
||||
void vfnmadd132ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
|
||||
vfmass(0x9d, dst, src1, src2);
|
||||
}
|
||||
void vfnmadd213ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
|
||||
vfmass(0xad, dst, src1, src2);
|
||||
}
|
||||
void vfnmadd231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
|
||||
vfmass(0xbd, dst, src1, src2);
|
||||
}
|
||||
void vfnmadd132ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
|
||||
vfmass(0x9d, dst, src1, src2);
|
||||
}
|
||||
void vfnmadd213ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
|
||||
vfmass(0xad, dst, src1, src2);
|
||||
}
|
||||
void vfnmadd231ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
|
||||
vfmass(0xbd, dst, src1, src2);
|
||||
}
|
||||
void vfnmsub132ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
|
||||
vfmass(0x9f, dst, src1, src2);
|
||||
}
|
||||
void vfnmsub213ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
|
||||
vfmass(0xaf, dst, src1, src2);
|
||||
}
|
||||
void vfnmsub231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
|
||||
vfmass(0xbf, dst, src1, src2);
|
||||
}
|
||||
void vfnmsub132ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
|
||||
vfmass(0x9f, dst, src1, src2);
|
||||
}
|
||||
void vfnmsub213ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
|
||||
vfmass(0xaf, dst, src1, src2);
|
||||
}
|
||||
void vfnmsub231ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
|
||||
vfmass(0xbf, dst, src1, src2);
|
||||
}
|
||||
void vfmass(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
|
||||
void vfmass(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2);
|
||||
|
||||
// Debugging
|
||||
void Print();
|
||||
|
||||
@ -1316,6 +1478,14 @@ class Assembler : public AssemblerBase {
|
||||
}
|
||||
}
|
||||
|
||||
// Emit vex prefix
|
||||
void emit_vex2_byte0() { emit(0xc5); }
|
||||
void emit_vex2_byte1(XMMRegister reg, XMMRegister v, byte lpp);
|
||||
void emit_vex3_byte0() { emit(0xc4); }
|
||||
void emit_vex3_byte1(XMMRegister reg, XMMRegister rm, byte m);
|
||||
void emit_vex3_byte1(XMMRegister reg, const Operand& rm, byte m);
|
||||
void emit_vex3_byte2(byte w, XMMRegister v, byte lpp);
|
||||
|
||||
// Emit the ModR/M byte, and optionally the SIB byte and
|
||||
// 1- or 4-byte offset for a memory operand. Also encodes
|
||||
// the second operand of the operation, a register or operation
|
||||
|
@ -148,6 +148,8 @@ enum Prefixes {
|
||||
ESCAPE_PREFIX = 0x0F,
|
||||
OPERAND_SIZE_OVERRIDE_PREFIX = 0x66,
|
||||
ADDRESS_SIZE_OVERRIDE_PREFIX = 0x67,
|
||||
VEX3_PREFIX = 0xC4,
|
||||
VEX2_PREFIX = 0xC5,
|
||||
REPNE_PREFIX = 0xF2,
|
||||
REP_PREFIX = 0xF3,
|
||||
REPEQ_PREFIX = REP_PREFIX
|
||||
@ -290,11 +292,14 @@ class DisassemblerX64 {
|
||||
ABORT_ON_UNIMPLEMENTED_OPCODE)
|
||||
: converter_(converter),
|
||||
tmp_buffer_pos_(0),
|
||||
abort_on_unimplemented_(
|
||||
unimplemented_action == ABORT_ON_UNIMPLEMENTED_OPCODE),
|
||||
abort_on_unimplemented_(unimplemented_action ==
|
||||
ABORT_ON_UNIMPLEMENTED_OPCODE),
|
||||
rex_(0),
|
||||
operand_size_(0),
|
||||
group_1_prefix_(0),
|
||||
vex_byte0_(0),
|
||||
vex_byte1_(0),
|
||||
vex_byte2_(0),
|
||||
byte_size_operand_(false),
|
||||
instruction_table_(instruction_table.Pointer()) {
|
||||
tmp_buffer_[0] = '\0';
|
||||
@ -323,6 +328,9 @@ class DisassemblerX64 {
|
||||
byte rex_;
|
||||
byte operand_size_; // 0x66 or (if no group 3 prefix is present) 0x0.
|
||||
byte group_1_prefix_; // 0xF2, 0xF3, or (if no group 1 prefix is present) 0.
|
||||
byte vex_byte0_; // 0xc4 or 0xc5
|
||||
byte vex_byte1_;
|
||||
byte vex_byte2_; // only for 3 bytes vex prefix
|
||||
// Byte size operand override.
|
||||
bool byte_size_operand_;
|
||||
const InstructionTable* const instruction_table_;
|
||||
@ -345,6 +353,51 @@ class DisassemblerX64 {
|
||||
|
||||
bool rex_w() { return (rex_ & 0x08) != 0; }
|
||||
|
||||
bool vex_128() {
|
||||
DCHECK(vex_byte0_ == VEX3_PREFIX || vex_byte0_ == VEX2_PREFIX);
|
||||
byte checked = vex_byte0_ == VEX3_PREFIX ? vex_byte2_ : vex_byte1_;
|
||||
return (checked & 4) != 1;
|
||||
}
|
||||
|
||||
bool vex_66() {
|
||||
DCHECK(vex_byte0_ == VEX3_PREFIX || vex_byte0_ == VEX2_PREFIX);
|
||||
byte checked = vex_byte0_ == VEX3_PREFIX ? vex_byte2_ : vex_byte1_;
|
||||
return (checked & 3) == 1;
|
||||
}
|
||||
|
||||
bool vex_f3() {
|
||||
DCHECK(vex_byte0_ == VEX3_PREFIX || vex_byte0_ == VEX2_PREFIX);
|
||||
byte checked = vex_byte0_ == VEX3_PREFIX ? vex_byte2_ : vex_byte1_;
|
||||
return (checked & 3) == 2;
|
||||
}
|
||||
|
||||
bool vex_f2() {
|
||||
DCHECK(vex_byte0_ == VEX3_PREFIX || vex_byte0_ == VEX2_PREFIX);
|
||||
byte checked = vex_byte0_ == VEX3_PREFIX ? vex_byte2_ : vex_byte1_;
|
||||
return (checked & 3) == 3;
|
||||
}
|
||||
|
||||
bool vex_0f() {
|
||||
if (vex_byte0_ == VEX2_PREFIX) return true;
|
||||
return (vex_byte1_ & 3) == 1;
|
||||
}
|
||||
|
||||
bool vex_0f38() {
|
||||
DCHECK(vex_byte0_ == VEX3_PREFIX);
|
||||
return (vex_byte1_ & 3) == 2;
|
||||
}
|
||||
|
||||
bool vex_0f3a() {
|
||||
DCHECK(vex_byte0_ == VEX3_PREFIX);
|
||||
return (vex_byte1_ & 3) == 3;
|
||||
}
|
||||
|
||||
int vex_vreg() {
|
||||
DCHECK(vex_byte0_ == VEX3_PREFIX || vex_byte0_ == VEX2_PREFIX);
|
||||
byte checked = vex_byte0_ == VEX3_PREFIX ? vex_byte2_ : vex_byte1_;
|
||||
return ~(checked >> 3) & 0xf;
|
||||
}
|
||||
|
||||
OperandSize operand_size() {
|
||||
if (byte_size_operand_) return OPERAND_BYTE_SIZE;
|
||||
if (rex_w()) return OPERAND_QUADWORD_SIZE;
|
||||
@ -356,6 +409,8 @@ class DisassemblerX64 {
|
||||
return "bwlq"[operand_size()];
|
||||
}
|
||||
|
||||
char float_size_code() { return "sd"[rex_w()]; }
|
||||
|
||||
const char* NameOfCPURegister(int reg) const {
|
||||
return converter_.NameOfCPURegister(reg);
|
||||
}
|
||||
@ -414,6 +469,7 @@ class DisassemblerX64 {
|
||||
int FPUInstruction(byte* data);
|
||||
int MemoryFPUInstruction(int escape_opcode, int regop, byte* modrm_start);
|
||||
int RegisterFPUInstruction(int escape_opcode, byte modrm_byte);
|
||||
int AVXInstruction(byte* data);
|
||||
void AppendToBuffer(const char* format, ...);
|
||||
|
||||
void UnimplementedInstruction() {
|
||||
@ -811,6 +867,92 @@ int DisassemblerX64::SetCC(byte* data) {
|
||||
}
|
||||
|
||||
|
||||
int DisassemblerX64::AVXInstruction(byte* data) {
|
||||
byte opcode = *data;
|
||||
byte* current = data + 1;
|
||||
if (vex_byte0_ == VEX3_PREFIX) {
|
||||
if (vex_128()) {
|
||||
if (vex_66() && vex_0f38()) {
|
||||
int mod, regop, rm, vvvv = vex_vreg();
|
||||
get_modrm(*current, &mod, ®op, &rm);
|
||||
switch (opcode) {
|
||||
case 0x99:
|
||||
AppendToBuffer("vfmadd132s%c %s,%s,", float_size_code(),
|
||||
NameOfXMMRegister(regop), NameOfXMMRegister(vvvv));
|
||||
current += PrintRightXMMOperand(current);
|
||||
break;
|
||||
case 0xa9:
|
||||
AppendToBuffer("vfmadd213s%c %s,%s,", float_size_code(),
|
||||
NameOfXMMRegister(regop), NameOfXMMRegister(vvvv));
|
||||
current += PrintRightXMMOperand(current);
|
||||
break;
|
||||
case 0xb9:
|
||||
AppendToBuffer("vfmadd231s%c %s,%s,", float_size_code(),
|
||||
NameOfXMMRegister(regop), NameOfXMMRegister(vvvv));
|
||||
current += PrintRightXMMOperand(current);
|
||||
break;
|
||||
case 0x9b:
|
||||
AppendToBuffer("vfmsub132s%c %s,%s,", float_size_code(),
|
||||
NameOfXMMRegister(regop), NameOfXMMRegister(vvvv));
|
||||
current += PrintRightXMMOperand(current);
|
||||
break;
|
||||
case 0xab:
|
||||
AppendToBuffer("vfmsub213s%c %s,%s,", float_size_code(),
|
||||
NameOfXMMRegister(regop), NameOfXMMRegister(vvvv));
|
||||
current += PrintRightXMMOperand(current);
|
||||
break;
|
||||
case 0xbb:
|
||||
AppendToBuffer("vfmsub231s%c %s,%s,", float_size_code(),
|
||||
NameOfXMMRegister(regop), NameOfXMMRegister(vvvv));
|
||||
current += PrintRightXMMOperand(current);
|
||||
break;
|
||||
case 0x9d:
|
||||
AppendToBuffer("vfnmadd132s%c %s,%s,", float_size_code(),
|
||||
NameOfXMMRegister(regop), NameOfXMMRegister(vvvv));
|
||||
current += PrintRightXMMOperand(current);
|
||||
break;
|
||||
case 0xad:
|
||||
AppendToBuffer("vfnmadd213s%c %s,%s,", float_size_code(),
|
||||
NameOfXMMRegister(regop), NameOfXMMRegister(vvvv));
|
||||
current += PrintRightXMMOperand(current);
|
||||
break;
|
||||
case 0xbd:
|
||||
AppendToBuffer("vfnmadd231s%c %s,%s,", float_size_code(),
|
||||
NameOfXMMRegister(regop), NameOfXMMRegister(vvvv));
|
||||
current += PrintRightXMMOperand(current);
|
||||
break;
|
||||
case 0x9f:
|
||||
AppendToBuffer("vfnmsub132s%c %s,%s,", float_size_code(),
|
||||
NameOfXMMRegister(regop), NameOfXMMRegister(vvvv));
|
||||
current += PrintRightXMMOperand(current);
|
||||
break;
|
||||
case 0xaf:
|
||||
AppendToBuffer("vfnmsub213s%c %s,%s,", float_size_code(),
|
||||
NameOfXMMRegister(regop), NameOfXMMRegister(vvvv));
|
||||
current += PrintRightXMMOperand(current);
|
||||
break;
|
||||
case 0xbf:
|
||||
AppendToBuffer("vfnmsub231s%c %s,%s,", float_size_code(),
|
||||
NameOfXMMRegister(regop), NameOfXMMRegister(vvvv));
|
||||
current += PrintRightXMMOperand(current);
|
||||
break;
|
||||
default:
|
||||
UnimplementedInstruction();
|
||||
}
|
||||
}
|
||||
} else {
|
||||
UnimplementedInstruction();
|
||||
}
|
||||
} else if (vex_byte0_ == VEX2_PREFIX) {
|
||||
UnimplementedInstruction();
|
||||
} else {
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
return static_cast<int>(current - data);
|
||||
}
|
||||
|
||||
|
||||
// Returns number of bytes used, including *data.
|
||||
int DisassemblerX64::FPUInstruction(byte* data) {
|
||||
byte escape_opcode = *data;
|
||||
@ -1189,6 +1331,16 @@ int DisassemblerX64::TwoByteOpcodeInstruction(byte* data) {
|
||||
AppendToBuffer("cvttss2si%c %s,",
|
||||
operand_size_code(), NameOfCPURegister(regop));
|
||||
current += PrintRightXMMOperand(current);
|
||||
} else if (opcode == 0x58) {
|
||||
int mod, regop, rm;
|
||||
get_modrm(*current, &mod, ®op, &rm);
|
||||
AppendToBuffer("addss %s,", NameOfXMMRegister(regop));
|
||||
current += PrintRightXMMOperand(current);
|
||||
} else if (opcode == 0x59) {
|
||||
int mod, regop, rm;
|
||||
get_modrm(*current, &mod, ®op, &rm);
|
||||
AppendToBuffer("mulss %s,", NameOfXMMRegister(regop));
|
||||
current += PrintRightXMMOperand(current);
|
||||
} else if (opcode == 0x5A) {
|
||||
// CVTSS2SD:
|
||||
// Convert scalar single-precision FP to scalar double-precision FP.
|
||||
@ -1196,6 +1348,16 @@ int DisassemblerX64::TwoByteOpcodeInstruction(byte* data) {
|
||||
get_modrm(*current, &mod, ®op, &rm);
|
||||
AppendToBuffer("cvtss2sd %s,", NameOfXMMRegister(regop));
|
||||
current += PrintRightXMMOperand(current);
|
||||
} else if (opcode == 0x5c) {
|
||||
int mod, regop, rm;
|
||||
get_modrm(*current, &mod, ®op, &rm);
|
||||
AppendToBuffer("subss %s,", NameOfXMMRegister(regop));
|
||||
current += PrintRightXMMOperand(current);
|
||||
} else if (opcode == 0x5e) {
|
||||
int mod, regop, rm;
|
||||
get_modrm(*current, &mod, ®op, &rm);
|
||||
AppendToBuffer("divss %s,", NameOfXMMRegister(regop));
|
||||
current += PrintRightXMMOperand(current);
|
||||
} else if (opcode == 0x7E) {
|
||||
int mod, regop, rm;
|
||||
get_modrm(*current, &mod, ®op, &rm);
|
||||
@ -1234,6 +1396,11 @@ int DisassemblerX64::TwoByteOpcodeInstruction(byte* data) {
|
||||
current += PrintRightXMMOperand(current);
|
||||
AppendToBuffer(",%s", NameOfXMMRegister(regop));
|
||||
|
||||
} else if (opcode == 0x2e) {
|
||||
int mod, regop, rm;
|
||||
get_modrm(*current, &mod, ®op, &rm);
|
||||
AppendToBuffer("ucomiss %s,", NameOfXMMRegister(regop));
|
||||
current += PrintRightXMMOperand(current);
|
||||
} else if (opcode == 0xA2) {
|
||||
// CPUID
|
||||
AppendToBuffer("%s", mnemonic);
|
||||
@ -1387,99 +1554,114 @@ int DisassemblerX64::InstructionDecode(v8::internal::Vector<char> out_buffer,
|
||||
if (rex_w()) AppendToBuffer("REX.W ");
|
||||
} else if ((current & 0xFE) == 0xF2) { // Group 1 prefix (0xF2 or 0xF3).
|
||||
group_1_prefix_ = current;
|
||||
} else if (current == VEX3_PREFIX) {
|
||||
vex_byte0_ = current;
|
||||
vex_byte1_ = *(data + 1);
|
||||
vex_byte2_ = *(data + 2);
|
||||
setRex(0x40 | (~(vex_byte1_ >> 5) & 7) | ((vex_byte2_ >> 4) & 8));
|
||||
data += 2;
|
||||
} else if (current == VEX2_PREFIX) {
|
||||
vex_byte0_ = current;
|
||||
vex_byte1_ = *(data + 1);
|
||||
setRex(0x40 | (~(vex_byte1_ >> 5) & 4));
|
||||
data++;
|
||||
} else { // Not a prefix - an opcode.
|
||||
break;
|
||||
}
|
||||
data++;
|
||||
}
|
||||
|
||||
const InstructionDesc& idesc = instruction_table_->Get(current);
|
||||
byte_size_operand_ = idesc.byte_size_operation;
|
||||
switch (idesc.type) {
|
||||
case ZERO_OPERANDS_INSTR:
|
||||
if (current >= 0xA4 && current <= 0xA7) {
|
||||
// String move or compare operations.
|
||||
if (group_1_prefix_ == REP_PREFIX) {
|
||||
// REP.
|
||||
AppendToBuffer("rep ");
|
||||
// Decode AVX instructions.
|
||||
if (vex_byte0_ != 0) {
|
||||
processed = true;
|
||||
data += AVXInstruction(data);
|
||||
} else {
|
||||
const InstructionDesc& idesc = instruction_table_->Get(current);
|
||||
byte_size_operand_ = idesc.byte_size_operation;
|
||||
switch (idesc.type) {
|
||||
case ZERO_OPERANDS_INSTR:
|
||||
if (current >= 0xA4 && current <= 0xA7) {
|
||||
// String move or compare operations.
|
||||
if (group_1_prefix_ == REP_PREFIX) {
|
||||
// REP.
|
||||
AppendToBuffer("rep ");
|
||||
}
|
||||
if (rex_w()) AppendToBuffer("REX.W ");
|
||||
AppendToBuffer("%s%c", idesc.mnem, operand_size_code());
|
||||
} else {
|
||||
AppendToBuffer("%s", idesc.mnem, operand_size_code());
|
||||
}
|
||||
if (rex_w()) AppendToBuffer("REX.W ");
|
||||
AppendToBuffer("%s%c", idesc.mnem, operand_size_code());
|
||||
} else {
|
||||
AppendToBuffer("%s", idesc.mnem, operand_size_code());
|
||||
data++;
|
||||
break;
|
||||
|
||||
case TWO_OPERANDS_INSTR:
|
||||
data++;
|
||||
data += PrintOperands(idesc.mnem, idesc.op_order_, data);
|
||||
break;
|
||||
|
||||
case JUMP_CONDITIONAL_SHORT_INSTR:
|
||||
data += JumpConditionalShort(data);
|
||||
break;
|
||||
|
||||
case REGISTER_INSTR:
|
||||
AppendToBuffer("%s%c %s", idesc.mnem, operand_size_code(),
|
||||
NameOfCPURegister(base_reg(current & 0x07)));
|
||||
data++;
|
||||
break;
|
||||
case PUSHPOP_INSTR:
|
||||
AppendToBuffer("%s %s", idesc.mnem,
|
||||
NameOfCPURegister(base_reg(current & 0x07)));
|
||||
data++;
|
||||
break;
|
||||
case MOVE_REG_INSTR: {
|
||||
byte* addr = NULL;
|
||||
switch (operand_size()) {
|
||||
case OPERAND_WORD_SIZE:
|
||||
addr =
|
||||
reinterpret_cast<byte*>(*reinterpret_cast<int16_t*>(data + 1));
|
||||
data += 3;
|
||||
break;
|
||||
case OPERAND_DOUBLEWORD_SIZE:
|
||||
addr =
|
||||
reinterpret_cast<byte*>(*reinterpret_cast<uint32_t*>(data + 1));
|
||||
data += 5;
|
||||
break;
|
||||
case OPERAND_QUADWORD_SIZE:
|
||||
addr =
|
||||
reinterpret_cast<byte*>(*reinterpret_cast<int64_t*>(data + 1));
|
||||
data += 9;
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
AppendToBuffer("mov%c %s,%s", operand_size_code(),
|
||||
NameOfCPURegister(base_reg(current & 0x07)),
|
||||
NameOfAddress(addr));
|
||||
break;
|
||||
}
|
||||
data++;
|
||||
break;
|
||||
|
||||
case TWO_OPERANDS_INSTR:
|
||||
data++;
|
||||
data += PrintOperands(idesc.mnem, idesc.op_order_, data);
|
||||
break;
|
||||
|
||||
case JUMP_CONDITIONAL_SHORT_INSTR:
|
||||
data += JumpConditionalShort(data);
|
||||
break;
|
||||
|
||||
case REGISTER_INSTR:
|
||||
AppendToBuffer("%s%c %s",
|
||||
idesc.mnem,
|
||||
operand_size_code(),
|
||||
NameOfCPURegister(base_reg(current & 0x07)));
|
||||
data++;
|
||||
break;
|
||||
case PUSHPOP_INSTR:
|
||||
AppendToBuffer("%s %s",
|
||||
idesc.mnem,
|
||||
NameOfCPURegister(base_reg(current & 0x07)));
|
||||
data++;
|
||||
break;
|
||||
case MOVE_REG_INSTR: {
|
||||
byte* addr = NULL;
|
||||
switch (operand_size()) {
|
||||
case OPERAND_WORD_SIZE:
|
||||
addr = reinterpret_cast<byte*>(*reinterpret_cast<int16_t*>(data + 1));
|
||||
data += 3;
|
||||
break;
|
||||
case OPERAND_DOUBLEWORD_SIZE:
|
||||
addr =
|
||||
reinterpret_cast<byte*>(*reinterpret_cast<uint32_t*>(data + 1));
|
||||
data += 5;
|
||||
break;
|
||||
case OPERAND_QUADWORD_SIZE:
|
||||
addr = reinterpret_cast<byte*>(*reinterpret_cast<int64_t*>(data + 1));
|
||||
data += 9;
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
case CALL_JUMP_INSTR: {
|
||||
byte* addr = data + *reinterpret_cast<int32_t*>(data + 1) + 5;
|
||||
AppendToBuffer("%s %s", idesc.mnem, NameOfAddress(addr));
|
||||
data += 5;
|
||||
break;
|
||||
}
|
||||
AppendToBuffer("mov%c %s,%s",
|
||||
operand_size_code(),
|
||||
NameOfCPURegister(base_reg(current & 0x07)),
|
||||
NameOfAddress(addr));
|
||||
break;
|
||||
|
||||
case SHORT_IMMEDIATE_INSTR: {
|
||||
byte* addr =
|
||||
reinterpret_cast<byte*>(*reinterpret_cast<int32_t*>(data + 1));
|
||||
AppendToBuffer("%s rax,%s", idesc.mnem, NameOfAddress(addr));
|
||||
data += 5;
|
||||
break;
|
||||
}
|
||||
|
||||
case NO_INSTR:
|
||||
processed = false;
|
||||
break;
|
||||
|
||||
default:
|
||||
UNIMPLEMENTED(); // This type is not implemented.
|
||||
}
|
||||
|
||||
case CALL_JUMP_INSTR: {
|
||||
byte* addr = data + *reinterpret_cast<int32_t*>(data + 1) + 5;
|
||||
AppendToBuffer("%s %s", idesc.mnem, NameOfAddress(addr));
|
||||
data += 5;
|
||||
break;
|
||||
}
|
||||
|
||||
case SHORT_IMMEDIATE_INSTR: {
|
||||
byte* addr =
|
||||
reinterpret_cast<byte*>(*reinterpret_cast<int32_t*>(data + 1));
|
||||
AppendToBuffer("%s rax,%s", idesc.mnem, NameOfAddress(addr));
|
||||
data += 5;
|
||||
break;
|
||||
}
|
||||
|
||||
case NO_INSTR:
|
||||
processed = false;
|
||||
break;
|
||||
|
||||
default:
|
||||
UNIMPLEMENTED(); // This type is not implemented.
|
||||
}
|
||||
|
||||
// The first byte didn't match any of the simple opcodes, so we
|
||||
|
@ -736,4 +736,454 @@ TEST(AssemblerX64SSE) {
|
||||
F6 f = FUNCTION_CAST<F6>(code->entry());
|
||||
CHECK_EQ(2, f(1.0, 2.0));
|
||||
}
|
||||
|
||||
|
||||
typedef int (*F7)(double x, double y, double z);
|
||||
TEST(AssemblerX64FMA_sd) {
|
||||
CcTest::InitializeVM();
|
||||
if (!CpuFeatures::IsSupported(FMA3)) return;
|
||||
|
||||
Isolate* isolate = reinterpret_cast<Isolate*>(CcTest::isolate());
|
||||
HandleScope scope(isolate);
|
||||
v8::internal::byte buffer[1024];
|
||||
MacroAssembler assm(isolate, buffer, sizeof buffer);
|
||||
{
|
||||
CpuFeatureScope fscope(&assm, FMA3);
|
||||
Label exit;
|
||||
// argument in xmm0, xmm1 and xmm2
|
||||
// xmm0 * xmm1 + xmm2
|
||||
__ movaps(xmm3, xmm0);
|
||||
__ mulsd(xmm3, xmm1);
|
||||
__ addsd(xmm3, xmm2); // Expected result in xmm3
|
||||
|
||||
__ subq(rsp, Immediate(kDoubleSize)); // For memory operand
|
||||
// vfmadd132sd
|
||||
__ movl(rax, Immediate(1)); // Test number
|
||||
__ movaps(xmm8, xmm0);
|
||||
__ vfmadd132sd(xmm8, xmm2, xmm1);
|
||||
__ ucomisd(xmm8, xmm3);
|
||||
__ j(not_equal, &exit);
|
||||
// vfmadd213sd
|
||||
__ incq(rax);
|
||||
__ movaps(xmm8, xmm1);
|
||||
__ vfmadd213sd(xmm8, xmm0, xmm2);
|
||||
__ ucomisd(xmm8, xmm3);
|
||||
__ j(not_equal, &exit);
|
||||
// vfmadd231sd
|
||||
__ incq(rax);
|
||||
__ movaps(xmm8, xmm2);
|
||||
__ vfmadd231sd(xmm8, xmm0, xmm1);
|
||||
__ ucomisd(xmm8, xmm3);
|
||||
__ j(not_equal, &exit);
|
||||
|
||||
// vfmadd132sd
|
||||
__ incq(rax);
|
||||
__ movaps(xmm8, xmm0);
|
||||
__ movsd(Operand(rsp, 0), xmm1);
|
||||
__ vfmadd132sd(xmm8, xmm2, Operand(rsp, 0));
|
||||
__ ucomisd(xmm8, xmm3);
|
||||
__ j(not_equal, &exit);
|
||||
// vfmadd213sd
|
||||
__ incq(rax);
|
||||
__ movaps(xmm8, xmm1);
|
||||
__ movsd(Operand(rsp, 0), xmm2);
|
||||
__ vfmadd213sd(xmm8, xmm0, Operand(rsp, 0));
|
||||
__ ucomisd(xmm8, xmm3);
|
||||
__ j(not_equal, &exit);
|
||||
// vfmadd231sd
|
||||
__ incq(rax);
|
||||
__ movaps(xmm8, xmm2);
|
||||
__ movsd(Operand(rsp, 0), xmm1);
|
||||
__ vfmadd231sd(xmm8, xmm0, Operand(rsp, 0));
|
||||
__ ucomisd(xmm8, xmm3);
|
||||
__ j(not_equal, &exit);
|
||||
|
||||
// xmm0 * xmm1 - xmm2
|
||||
__ movaps(xmm3, xmm0);
|
||||
__ mulsd(xmm3, xmm1);
|
||||
__ subsd(xmm3, xmm2); // Expected result in xmm3
|
||||
|
||||
// vfmsub132sd
|
||||
__ incq(rax);
|
||||
__ movaps(xmm8, xmm0);
|
||||
__ vfmsub132sd(xmm8, xmm2, xmm1);
|
||||
__ ucomisd(xmm8, xmm3);
|
||||
__ j(not_equal, &exit);
|
||||
// vfmadd213sd
|
||||
__ incq(rax);
|
||||
__ movaps(xmm8, xmm1);
|
||||
__ vfmsub213sd(xmm8, xmm0, xmm2);
|
||||
__ ucomisd(xmm8, xmm3);
|
||||
__ j(not_equal, &exit);
|
||||
// vfmsub231sd
|
||||
__ incq(rax);
|
||||
__ movaps(xmm8, xmm2);
|
||||
__ vfmsub231sd(xmm8, xmm0, xmm1);
|
||||
__ ucomisd(xmm8, xmm3);
|
||||
__ j(not_equal, &exit);
|
||||
|
||||
// vfmsub132sd
|
||||
__ incq(rax);
|
||||
__ movaps(xmm8, xmm0);
|
||||
__ movsd(Operand(rsp, 0), xmm1);
|
||||
__ vfmsub132sd(xmm8, xmm2, Operand(rsp, 0));
|
||||
__ ucomisd(xmm8, xmm3);
|
||||
__ j(not_equal, &exit);
|
||||
// vfmsub213sd
|
||||
__ incq(rax);
|
||||
__ movaps(xmm8, xmm1);
|
||||
__ movsd(Operand(rsp, 0), xmm2);
|
||||
__ vfmsub213sd(xmm8, xmm0, Operand(rsp, 0));
|
||||
__ ucomisd(xmm8, xmm3);
|
||||
__ j(not_equal, &exit);
|
||||
// vfmsub231sd
|
||||
__ incq(rax);
|
||||
__ movaps(xmm8, xmm2);
|
||||
__ movsd(Operand(rsp, 0), xmm1);
|
||||
__ vfmsub231sd(xmm8, xmm0, Operand(rsp, 0));
|
||||
__ ucomisd(xmm8, xmm3);
|
||||
__ j(not_equal, &exit);
|
||||
|
||||
|
||||
// - xmm0 * xmm1 + xmm2
|
||||
__ movaps(xmm3, xmm0);
|
||||
__ mulsd(xmm3, xmm1);
|
||||
__ Move(xmm4, (uint64_t)1 << 63);
|
||||
__ xorpd(xmm3, xmm4);
|
||||
__ addsd(xmm3, xmm2); // Expected result in xmm3
|
||||
|
||||
// vfnmadd132sd
|
||||
__ incq(rax);
|
||||
__ movaps(xmm8, xmm0);
|
||||
__ vfnmadd132sd(xmm8, xmm2, xmm1);
|
||||
__ ucomisd(xmm8, xmm3);
|
||||
__ j(not_equal, &exit);
|
||||
// vfmadd213sd
|
||||
__ incq(rax);
|
||||
__ movaps(xmm8, xmm1);
|
||||
__ vfnmadd213sd(xmm8, xmm0, xmm2);
|
||||
__ ucomisd(xmm8, xmm3);
|
||||
__ j(not_equal, &exit);
|
||||
// vfnmadd231sd
|
||||
__ incq(rax);
|
||||
__ movaps(xmm8, xmm2);
|
||||
__ vfnmadd231sd(xmm8, xmm0, xmm1);
|
||||
__ ucomisd(xmm8, xmm3);
|
||||
__ j(not_equal, &exit);
|
||||
|
||||
// vfnmadd132sd
|
||||
__ incq(rax);
|
||||
__ movaps(xmm8, xmm0);
|
||||
__ movsd(Operand(rsp, 0), xmm1);
|
||||
__ vfnmadd132sd(xmm8, xmm2, Operand(rsp, 0));
|
||||
__ ucomisd(xmm8, xmm3);
|
||||
__ j(not_equal, &exit);
|
||||
// vfnmadd213sd
|
||||
__ incq(rax);
|
||||
__ movaps(xmm8, xmm1);
|
||||
__ movsd(Operand(rsp, 0), xmm2);
|
||||
__ vfnmadd213sd(xmm8, xmm0, Operand(rsp, 0));
|
||||
__ ucomisd(xmm8, xmm3);
|
||||
__ j(not_equal, &exit);
|
||||
// vfnmadd231sd
|
||||
__ incq(rax);
|
||||
__ movaps(xmm8, xmm2);
|
||||
__ movsd(Operand(rsp, 0), xmm1);
|
||||
__ vfnmadd231sd(xmm8, xmm0, Operand(rsp, 0));
|
||||
__ ucomisd(xmm8, xmm3);
|
||||
__ j(not_equal, &exit);
|
||||
|
||||
|
||||
// - xmm0 * xmm1 - xmm2
|
||||
__ movaps(xmm3, xmm0);
|
||||
__ mulsd(xmm3, xmm1);
|
||||
__ Move(xmm4, (uint64_t)1 << 63);
|
||||
__ xorpd(xmm3, xmm4);
|
||||
__ subsd(xmm3, xmm2); // Expected result in xmm3
|
||||
|
||||
// vfnmsub132sd
|
||||
__ incq(rax);
|
||||
__ movaps(xmm8, xmm0);
|
||||
__ vfnmsub132sd(xmm8, xmm2, xmm1);
|
||||
__ ucomisd(xmm8, xmm3);
|
||||
__ j(not_equal, &exit);
|
||||
// vfmsub213sd
|
||||
__ incq(rax);
|
||||
__ movaps(xmm8, xmm1);
|
||||
__ vfnmsub213sd(xmm8, xmm0, xmm2);
|
||||
__ ucomisd(xmm8, xmm3);
|
||||
__ j(not_equal, &exit);
|
||||
// vfnmsub231sd
|
||||
__ incq(rax);
|
||||
__ movaps(xmm8, xmm2);
|
||||
__ vfnmsub231sd(xmm8, xmm0, xmm1);
|
||||
__ ucomisd(xmm8, xmm3);
|
||||
__ j(not_equal, &exit);
|
||||
|
||||
// vfnmsub132sd
|
||||
__ incq(rax);
|
||||
__ movaps(xmm8, xmm0);
|
||||
__ movsd(Operand(rsp, 0), xmm1);
|
||||
__ vfnmsub132sd(xmm8, xmm2, Operand(rsp, 0));
|
||||
__ ucomisd(xmm8, xmm3);
|
||||
__ j(not_equal, &exit);
|
||||
// vfnmsub213sd
|
||||
__ incq(rax);
|
||||
__ movaps(xmm8, xmm1);
|
||||
__ movsd(Operand(rsp, 0), xmm2);
|
||||
__ vfnmsub213sd(xmm8, xmm0, Operand(rsp, 0));
|
||||
__ ucomisd(xmm8, xmm3);
|
||||
__ j(not_equal, &exit);
|
||||
// vfnmsub231sd
|
||||
__ incq(rax);
|
||||
__ movaps(xmm8, xmm2);
|
||||
__ movsd(Operand(rsp, 0), xmm1);
|
||||
__ vfnmsub231sd(xmm8, xmm0, Operand(rsp, 0));
|
||||
__ ucomisd(xmm8, xmm3);
|
||||
__ j(not_equal, &exit);
|
||||
|
||||
|
||||
__ xorl(rax, rax);
|
||||
__ bind(&exit);
|
||||
__ addq(rsp, Immediate(kDoubleSize));
|
||||
__ ret(0);
|
||||
}
|
||||
|
||||
CodeDesc desc;
|
||||
assm.GetCode(&desc);
|
||||
Handle<Code> code = isolate->factory()->NewCode(
|
||||
desc, Code::ComputeFlags(Code::STUB), Handle<Code>());
|
||||
#ifdef OBJECT_PRINT
|
||||
OFStream os(stdout);
|
||||
code->Print(os);
|
||||
#endif
|
||||
|
||||
F7 f = FUNCTION_CAST<F7>(code->entry());
|
||||
CHECK_EQ(0, f(0.000092662107262076, -2.460774966188315, -1.0958787393627414));
|
||||
}
|
||||
|
||||
|
||||
typedef int (*F8)(float x, float y, float z);
|
||||
TEST(AssemblerX64FMA_ss) {
|
||||
CcTest::InitializeVM();
|
||||
if (!CpuFeatures::IsSupported(FMA3)) return;
|
||||
|
||||
Isolate* isolate = reinterpret_cast<Isolate*>(CcTest::isolate());
|
||||
HandleScope scope(isolate);
|
||||
v8::internal::byte buffer[1024];
|
||||
MacroAssembler assm(isolate, buffer, sizeof buffer);
|
||||
{
|
||||
CpuFeatureScope fscope(&assm, FMA3);
|
||||
Label exit;
|
||||
// arguments in xmm0, xmm1 and xmm2
|
||||
// xmm0 * xmm1 + xmm2
|
||||
__ movaps(xmm3, xmm0);
|
||||
__ mulss(xmm3, xmm1);
|
||||
__ addss(xmm3, xmm2); // Expected result in xmm3
|
||||
|
||||
__ subq(rsp, Immediate(kDoubleSize)); // For memory operand
|
||||
// vfmadd132ss
|
||||
__ movl(rax, Immediate(1)); // Test number
|
||||
__ movaps(xmm8, xmm0);
|
||||
__ vfmadd132ss(xmm8, xmm2, xmm1);
|
||||
__ ucomiss(xmm8, xmm3);
|
||||
__ j(not_equal, &exit);
|
||||
// vfmadd213ss
|
||||
__ incq(rax);
|
||||
__ movaps(xmm8, xmm1);
|
||||
__ vfmadd213ss(xmm8, xmm0, xmm2);
|
||||
__ ucomiss(xmm8, xmm3);
|
||||
__ j(not_equal, &exit);
|
||||
// vfmadd231ss
|
||||
__ incq(rax);
|
||||
__ movaps(xmm8, xmm2);
|
||||
__ vfmadd231ss(xmm8, xmm0, xmm1);
|
||||
__ ucomiss(xmm8, xmm3);
|
||||
__ j(not_equal, &exit);
|
||||
|
||||
// vfmadd132ss
|
||||
__ incq(rax);
|
||||
__ movaps(xmm8, xmm0);
|
||||
__ movss(Operand(rsp, 0), xmm1);
|
||||
__ vfmadd132ss(xmm8, xmm2, Operand(rsp, 0));
|
||||
__ ucomiss(xmm8, xmm3);
|
||||
__ j(not_equal, &exit);
|
||||
// vfmadd213ss
|
||||
__ incq(rax);
|
||||
__ movaps(xmm8, xmm1);
|
||||
__ movss(Operand(rsp, 0), xmm2);
|
||||
__ vfmadd213ss(xmm8, xmm0, Operand(rsp, 0));
|
||||
__ ucomiss(xmm8, xmm3);
|
||||
__ j(not_equal, &exit);
|
||||
// vfmadd231ss
|
||||
__ incq(rax);
|
||||
__ movaps(xmm8, xmm2);
|
||||
__ movss(Operand(rsp, 0), xmm1);
|
||||
__ vfmadd231ss(xmm8, xmm0, Operand(rsp, 0));
|
||||
__ ucomiss(xmm8, xmm3);
|
||||
__ j(not_equal, &exit);
|
||||
|
||||
// xmm0 * xmm1 - xmm2
|
||||
__ movaps(xmm3, xmm0);
|
||||
__ mulss(xmm3, xmm1);
|
||||
__ subss(xmm3, xmm2); // Expected result in xmm3
|
||||
|
||||
// vfmsub132ss
|
||||
__ incq(rax);
|
||||
__ movaps(xmm8, xmm0);
|
||||
__ vfmsub132ss(xmm8, xmm2, xmm1);
|
||||
__ ucomiss(xmm8, xmm3);
|
||||
__ j(not_equal, &exit);
|
||||
// vfmadd213ss
|
||||
__ incq(rax);
|
||||
__ movaps(xmm8, xmm1);
|
||||
__ vfmsub213ss(xmm8, xmm0, xmm2);
|
||||
__ ucomiss(xmm8, xmm3);
|
||||
__ j(not_equal, &exit);
|
||||
// vfmsub231ss
|
||||
__ incq(rax);
|
||||
__ movaps(xmm8, xmm2);
|
||||
__ vfmsub231ss(xmm8, xmm0, xmm1);
|
||||
__ ucomiss(xmm8, xmm3);
|
||||
__ j(not_equal, &exit);
|
||||
|
||||
// vfmsub132ss
|
||||
__ incq(rax);
|
||||
__ movaps(xmm8, xmm0);
|
||||
__ movss(Operand(rsp, 0), xmm1);
|
||||
__ vfmsub132ss(xmm8, xmm2, Operand(rsp, 0));
|
||||
__ ucomiss(xmm8, xmm3);
|
||||
__ j(not_equal, &exit);
|
||||
// vfmsub213ss
|
||||
__ incq(rax);
|
||||
__ movaps(xmm8, xmm1);
|
||||
__ movss(Operand(rsp, 0), xmm2);
|
||||
__ vfmsub213ss(xmm8, xmm0, Operand(rsp, 0));
|
||||
__ ucomiss(xmm8, xmm3);
|
||||
__ j(not_equal, &exit);
|
||||
// vfmsub231ss
|
||||
__ incq(rax);
|
||||
__ movaps(xmm8, xmm2);
|
||||
__ movss(Operand(rsp, 0), xmm1);
|
||||
__ vfmsub231ss(xmm8, xmm0, Operand(rsp, 0));
|
||||
__ ucomiss(xmm8, xmm3);
|
||||
__ j(not_equal, &exit);
|
||||
|
||||
|
||||
// - xmm0 * xmm1 + xmm2
|
||||
__ movaps(xmm3, xmm0);
|
||||
__ mulss(xmm3, xmm1);
|
||||
__ Move(xmm4, (uint32_t)1 << 31);
|
||||
__ xorps(xmm3, xmm4);
|
||||
__ addss(xmm3, xmm2); // Expected result in xmm3
|
||||
|
||||
// vfnmadd132ss
|
||||
__ incq(rax);
|
||||
__ movaps(xmm8, xmm0);
|
||||
__ vfnmadd132ss(xmm8, xmm2, xmm1);
|
||||
__ ucomiss(xmm8, xmm3);
|
||||
__ j(not_equal, &exit);
|
||||
// vfmadd213ss
|
||||
__ incq(rax);
|
||||
__ movaps(xmm8, xmm1);
|
||||
__ vfnmadd213ss(xmm8, xmm0, xmm2);
|
||||
__ ucomiss(xmm8, xmm3);
|
||||
__ j(not_equal, &exit);
|
||||
// vfnmadd231ss
|
||||
__ incq(rax);
|
||||
__ movaps(xmm8, xmm2);
|
||||
__ vfnmadd231ss(xmm8, xmm0, xmm1);
|
||||
__ ucomiss(xmm8, xmm3);
|
||||
__ j(not_equal, &exit);
|
||||
|
||||
// vfnmadd132ss
|
||||
__ incq(rax);
|
||||
__ movaps(xmm8, xmm0);
|
||||
__ movss(Operand(rsp, 0), xmm1);
|
||||
__ vfnmadd132ss(xmm8, xmm2, Operand(rsp, 0));
|
||||
__ ucomiss(xmm8, xmm3);
|
||||
__ j(not_equal, &exit);
|
||||
// vfnmadd213ss
|
||||
__ incq(rax);
|
||||
__ movaps(xmm8, xmm1);
|
||||
__ movss(Operand(rsp, 0), xmm2);
|
||||
__ vfnmadd213ss(xmm8, xmm0, Operand(rsp, 0));
|
||||
__ ucomiss(xmm8, xmm3);
|
||||
__ j(not_equal, &exit);
|
||||
// vfnmadd231ss
|
||||
__ incq(rax);
|
||||
__ movaps(xmm8, xmm2);
|
||||
__ movss(Operand(rsp, 0), xmm1);
|
||||
__ vfnmadd231ss(xmm8, xmm0, Operand(rsp, 0));
|
||||
__ ucomiss(xmm8, xmm3);
|
||||
__ j(not_equal, &exit);
|
||||
|
||||
|
||||
// - xmm0 * xmm1 - xmm2
|
||||
__ movaps(xmm3, xmm0);
|
||||
__ mulss(xmm3, xmm1);
|
||||
__ Move(xmm4, (uint32_t)1 << 31);
|
||||
__ xorps(xmm3, xmm4);
|
||||
__ subss(xmm3, xmm2); // Expected result in xmm3
|
||||
|
||||
// vfnmsub132ss
|
||||
__ incq(rax);
|
||||
__ movaps(xmm8, xmm0);
|
||||
__ vfnmsub132ss(xmm8, xmm2, xmm1);
|
||||
__ ucomiss(xmm8, xmm3);
|
||||
__ j(not_equal, &exit);
|
||||
// vfmsub213ss
|
||||
__ incq(rax);
|
||||
__ movaps(xmm8, xmm1);
|
||||
__ vfnmsub213ss(xmm8, xmm0, xmm2);
|
||||
__ ucomiss(xmm8, xmm3);
|
||||
__ j(not_equal, &exit);
|
||||
// vfnmsub231ss
|
||||
__ incq(rax);
|
||||
__ movaps(xmm8, xmm2);
|
||||
__ vfnmsub231ss(xmm8, xmm0, xmm1);
|
||||
__ ucomiss(xmm8, xmm3);
|
||||
__ j(not_equal, &exit);
|
||||
|
||||
// vfnmsub132ss
|
||||
__ incq(rax);
|
||||
__ movaps(xmm8, xmm0);
|
||||
__ movss(Operand(rsp, 0), xmm1);
|
||||
__ vfnmsub132ss(xmm8, xmm2, Operand(rsp, 0));
|
||||
__ ucomiss(xmm8, xmm3);
|
||||
__ j(not_equal, &exit);
|
||||
// vfnmsub213ss
|
||||
__ incq(rax);
|
||||
__ movaps(xmm8, xmm1);
|
||||
__ movss(Operand(rsp, 0), xmm2);
|
||||
__ vfnmsub213ss(xmm8, xmm0, Operand(rsp, 0));
|
||||
__ ucomiss(xmm8, xmm3);
|
||||
__ j(not_equal, &exit);
|
||||
// vfnmsub231ss
|
||||
__ incq(rax);
|
||||
__ movaps(xmm8, xmm2);
|
||||
__ movss(Operand(rsp, 0), xmm1);
|
||||
__ vfnmsub231ss(xmm8, xmm0, Operand(rsp, 0));
|
||||
__ ucomiss(xmm8, xmm3);
|
||||
__ j(not_equal, &exit);
|
||||
|
||||
|
||||
__ xorl(rax, rax);
|
||||
__ bind(&exit);
|
||||
__ addq(rsp, Immediate(kDoubleSize));
|
||||
__ ret(0);
|
||||
}
|
||||
|
||||
CodeDesc desc;
|
||||
assm.GetCode(&desc);
|
||||
Handle<Code> code = isolate->factory()->NewCode(
|
||||
desc, Code::ComputeFlags(Code::STUB), Handle<Code>());
|
||||
#ifdef OBJECT_PRINT
|
||||
OFStream os(stdout);
|
||||
code->Print(os);
|
||||
#endif
|
||||
|
||||
F8 f = FUNCTION_CAST<F8>(code->entry());
|
||||
CHECK_EQ(0, f(9.26621069e-05f, -2.4607749f, -1.09587872f));
|
||||
}
|
||||
#undef __
|
||||
|
@ -51,7 +51,7 @@ TEST(DisasmX64) {
|
||||
CcTest::InitializeVM();
|
||||
Isolate* isolate = CcTest::i_isolate();
|
||||
HandleScope scope(isolate);
|
||||
v8::internal::byte buffer[2048];
|
||||
v8::internal::byte buffer[4096];
|
||||
Assembler assm(isolate, buffer, sizeof buffer);
|
||||
DummyStaticFunction(NULL); // just bloody use it (DELETE; debugging)
|
||||
|
||||
@ -394,6 +394,14 @@ TEST(DisasmX64) {
|
||||
__ xorps(xmm0, Operand(rbx, rcx, times_4, 10000));
|
||||
|
||||
// Arithmetic operation
|
||||
__ addss(xmm1, xmm0);
|
||||
__ addss(xmm1, Operand(rbx, rcx, times_4, 10000));
|
||||
__ mulss(xmm1, xmm0);
|
||||
__ mulss(xmm1, Operand(rbx, rcx, times_4, 10000));
|
||||
__ subss(xmm1, xmm0);
|
||||
__ subss(xmm1, Operand(rbx, rcx, times_4, 10000));
|
||||
__ divss(xmm1, xmm0);
|
||||
__ divss(xmm1, Operand(rbx, rcx, times_4, 10000));
|
||||
__ addps(xmm1, xmm0);
|
||||
__ addps(xmm1, Operand(rbx, rcx, times_4, 10000));
|
||||
__ subps(xmm1, xmm0);
|
||||
@ -402,6 +410,9 @@ TEST(DisasmX64) {
|
||||
__ mulps(xmm1, Operand(rbx, rcx, times_4, 10000));
|
||||
__ divps(xmm1, xmm0);
|
||||
__ divps(xmm1, Operand(rbx, rcx, times_4, 10000));
|
||||
|
||||
__ ucomiss(xmm0, xmm1);
|
||||
__ ucomiss(xmm0, Operand(rbx, rcx, times_4, 10000));
|
||||
}
|
||||
// SSE 2 instructions
|
||||
{
|
||||
@ -464,6 +475,74 @@ TEST(DisasmX64) {
|
||||
}
|
||||
}
|
||||
|
||||
// FMA3 instruction
|
||||
{
|
||||
if (CpuFeatures::IsSupported(FMA3)) {
|
||||
CpuFeatureScope scope(&assm, FMA3);
|
||||
__ vfmadd132sd(xmm0, xmm1, xmm2);
|
||||
__ vfmadd132sd(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
|
||||
__ vfmadd213sd(xmm0, xmm1, xmm2);
|
||||
__ vfmadd213sd(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
|
||||
__ vfmadd231sd(xmm0, xmm1, xmm2);
|
||||
__ vfmadd231sd(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
|
||||
|
||||
__ vfmadd132sd(xmm9, xmm10, xmm11);
|
||||
__ vfmadd132sd(xmm9, xmm10, Operand(r9, r11, times_4, 10000));
|
||||
__ vfmadd213sd(xmm9, xmm10, xmm11);
|
||||
__ vfmadd213sd(xmm9, xmm10, Operand(r9, r11, times_4, 10000));
|
||||
__ vfmadd231sd(xmm9, xmm10, xmm11);
|
||||
__ vfmadd231sd(xmm9, xmm10, Operand(r9, r11, times_4, 10000));
|
||||
|
||||
__ vfmsub132sd(xmm0, xmm1, xmm2);
|
||||
__ vfmsub132sd(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
|
||||
__ vfmsub213sd(xmm0, xmm1, xmm2);
|
||||
__ vfmsub213sd(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
|
||||
__ vfmsub231sd(xmm0, xmm1, xmm2);
|
||||
__ vfmsub231sd(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
|
||||
|
||||
__ vfnmadd132sd(xmm0, xmm1, xmm2);
|
||||
__ vfnmadd132sd(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
|
||||
__ vfnmadd213sd(xmm0, xmm1, xmm2);
|
||||
__ vfnmadd213sd(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
|
||||
__ vfnmadd231sd(xmm0, xmm1, xmm2);
|
||||
__ vfnmadd231sd(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
|
||||
|
||||
__ vfnmsub132sd(xmm0, xmm1, xmm2);
|
||||
__ vfnmsub132sd(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
|
||||
__ vfnmsub213sd(xmm0, xmm1, xmm2);
|
||||
__ vfnmsub213sd(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
|
||||
__ vfnmsub231sd(xmm0, xmm1, xmm2);
|
||||
__ vfnmsub231sd(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
|
||||
|
||||
__ vfmadd132ss(xmm0, xmm1, xmm2);
|
||||
__ vfmadd132ss(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
|
||||
__ vfmadd213ss(xmm0, xmm1, xmm2);
|
||||
__ vfmadd213ss(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
|
||||
__ vfmadd231ss(xmm0, xmm1, xmm2);
|
||||
__ vfmadd231ss(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
|
||||
|
||||
__ vfmsub132ss(xmm0, xmm1, xmm2);
|
||||
__ vfmsub132ss(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
|
||||
__ vfmsub213ss(xmm0, xmm1, xmm2);
|
||||
__ vfmsub213ss(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
|
||||
__ vfmsub231ss(xmm0, xmm1, xmm2);
|
||||
__ vfmsub231ss(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
|
||||
|
||||
__ vfnmadd132ss(xmm0, xmm1, xmm2);
|
||||
__ vfnmadd132ss(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
|
||||
__ vfnmadd213ss(xmm0, xmm1, xmm2);
|
||||
__ vfnmadd213ss(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
|
||||
__ vfnmadd231ss(xmm0, xmm1, xmm2);
|
||||
__ vfnmadd231ss(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
|
||||
|
||||
__ vfnmsub132ss(xmm0, xmm1, xmm2);
|
||||
__ vfnmsub132ss(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
|
||||
__ vfnmsub213ss(xmm0, xmm1, xmm2);
|
||||
__ vfnmsub213ss(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
|
||||
__ vfnmsub231ss(xmm0, xmm1, xmm2);
|
||||
__ vfnmsub231ss(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
|
||||
}
|
||||
}
|
||||
// xchg.
|
||||
{
|
||||
__ xchgq(rax, rax);
|
||||
|
Loading…
Reference in New Issue
Block a user