[x64] Introduce FMA3 instructions on scalar data elements.

R=bmeurer@chromium.org

Review URL: https://codereview.chromium.org/757503002

Patch from Weiliang Lin <weiliang.lin@intel.com>.

Cr-Commit-Position: refs/heads/master@{#25509}
This commit is contained in:
Weiliang Lin 2014-11-26 06:31:41 +01:00 committed by Benedikt Meurer
parent 819955b278
commit 83a635e0d7
9 changed files with 1192 additions and 113 deletions

View File

@ -291,32 +291,35 @@ static bool HasListItem(const char* list, const char* item) {
#endif // V8_HOST_ARCH_IA32 || V8_HOST_ARCH_X64
CPU::CPU() : stepping_(0),
model_(0),
ext_model_(0),
family_(0),
ext_family_(0),
type_(0),
implementer_(0),
architecture_(0),
part_(0),
has_fpu_(false),
has_cmov_(false),
has_sahf_(false),
has_mmx_(false),
has_sse_(false),
has_sse2_(false),
has_sse3_(false),
has_ssse3_(false),
has_sse41_(false),
has_sse42_(false),
has_idiva_(false),
has_neon_(false),
has_thumb2_(false),
has_vfp_(false),
has_vfp3_(false),
has_vfp3_d32_(false),
is_fp64_mode_(false) {
CPU::CPU()
: stepping_(0),
model_(0),
ext_model_(0),
family_(0),
ext_family_(0),
type_(0),
implementer_(0),
architecture_(0),
part_(0),
has_fpu_(false),
has_cmov_(false),
has_sahf_(false),
has_mmx_(false),
has_sse_(false),
has_sse2_(false),
has_sse3_(false),
has_ssse3_(false),
has_sse41_(false),
has_sse42_(false),
has_avx_(false),
has_fma3_(false),
has_idiva_(false),
has_neon_(false),
has_thumb2_(false),
has_vfp_(false),
has_vfp3_(false),
has_vfp3_d32_(false),
is_fp64_mode_(false) {
memcpy(vendor_, "Unknown", 8);
#if V8_OS_NACL
// Portable host shouldn't do feature detection.
@ -356,6 +359,8 @@ CPU::CPU() : stepping_(0),
has_ssse3_ = (cpu_info[2] & 0x00000200) != 0;
has_sse41_ = (cpu_info[2] & 0x00080000) != 0;
has_sse42_ = (cpu_info[2] & 0x00100000) != 0;
has_avx_ = (cpu_info[2] & 0x18000000) != 0;
if (has_avx_) has_fma3_ = (cpu_info[2] & 0x00001000) != 0;
}
#if V8_HOST_ARCH_IA32

View File

@ -68,6 +68,8 @@ class CPU FINAL {
bool has_ssse3() const { return has_ssse3_; }
bool has_sse41() const { return has_sse41_; }
bool has_sse42() const { return has_sse42_; }
bool has_avx() const { return has_avx_; }
bool has_fma3() const { return has_fma3_; }
// arm features
bool has_idiva() const { return has_idiva_; }
@ -101,6 +103,8 @@ class CPU FINAL {
bool has_ssse3_;
bool has_sse41_;
bool has_sse42_;
bool has_avx_;
bool has_fma3_;
bool has_idiva_;
bool has_neon_;
bool has_thumb2_;

View File

@ -421,6 +421,8 @@ DEFINE_BOOL(enable_sse4_1, true,
"enable use of SSE4.1 instructions if available")
DEFINE_BOOL(enable_sahf, true,
"enable use of SAHF instruction if available (X64 only)")
DEFINE_BOOL(enable_avx, true, "enable use of AVX instructions if available")
DEFINE_BOOL(enable_fma3, true, "enable use of FMA3 instructions if available")
DEFINE_BOOL(enable_vfp3, ENABLE_VFP3_DEFAULT,
"enable use of VFP3 instructions if available")
DEFINE_BOOL(enable_armv7, ENABLE_ARMV7_DEFAULT,

View File

@ -617,6 +617,8 @@ enum CpuFeature {
SSE4_1,
SSE3,
SAHF,
AVX,
FMA3,
// ARM
VFP3,
ARMv7,

View File

@ -27,12 +27,19 @@ void CpuFeatures::ProbeImpl(bool cross_compile) {
if (cpu.has_sse41() && FLAG_enable_sse4_1) supported_ |= 1u << SSE4_1;
if (cpu.has_sse3() && FLAG_enable_sse3) supported_ |= 1u << SSE3;
// SAHF is not generally available in long mode.
if (cpu.has_sahf() && FLAG_enable_sahf) supported_|= 1u << SAHF;
if (cpu.has_sahf() && FLAG_enable_sahf) supported_ |= 1u << SAHF;
if (cpu.has_avx() && FLAG_enable_avx) supported_ |= 1u << AVX;
if (cpu.has_fma3() && FLAG_enable_fma3) supported_ |= 1u << FMA3;
}
void CpuFeatures::PrintTarget() { }
void CpuFeatures::PrintFeatures() { }
void CpuFeatures::PrintFeatures() {
printf("SSE3=%d SSE4_1=%d SAHF=%d AVX=%d FMA3=%d\n",
CpuFeatures::IsSupported(SSE3), CpuFeatures::IsSupported(SSE4_1),
CpuFeatures::IsSupported(SAHF), CpuFeatures::IsSupported(AVX),
CpuFeatures::IsSupported(FMA3));
}
// -----------------------------------------------------------------------------
@ -2638,6 +2645,104 @@ void Assembler::movapd(XMMRegister dst, XMMRegister src) {
}
void Assembler::addss(XMMRegister dst, XMMRegister src) {
EnsureSpace ensure_space(this);
emit(0xF3);
emit_optional_rex_32(dst, src);
emit(0x0F);
emit(0x58);
emit_sse_operand(dst, src);
}
void Assembler::addss(XMMRegister dst, const Operand& src) {
EnsureSpace ensure_space(this);
emit(0xF3);
emit_optional_rex_32(dst, src);
emit(0x0F);
emit(0x58);
emit_sse_operand(dst, src);
}
void Assembler::subss(XMMRegister dst, XMMRegister src) {
EnsureSpace ensure_space(this);
emit(0xF3);
emit_optional_rex_32(dst, src);
emit(0x0F);
emit(0x5C);
emit_sse_operand(dst, src);
}
void Assembler::subss(XMMRegister dst, const Operand& src) {
EnsureSpace ensure_space(this);
emit(0xF3);
emit_optional_rex_32(dst, src);
emit(0x0F);
emit(0x5C);
emit_sse_operand(dst, src);
}
void Assembler::mulss(XMMRegister dst, XMMRegister src) {
EnsureSpace ensure_space(this);
emit(0xF3);
emit_optional_rex_32(dst, src);
emit(0x0F);
emit(0x59);
emit_sse_operand(dst, src);
}
void Assembler::mulss(XMMRegister dst, const Operand& src) {
EnsureSpace ensure_space(this);
emit(0xF3);
emit_optional_rex_32(dst, src);
emit(0x0F);
emit(0x59);
emit_sse_operand(dst, src);
}
void Assembler::divss(XMMRegister dst, XMMRegister src) {
EnsureSpace ensure_space(this);
emit(0xF3);
emit_optional_rex_32(dst, src);
emit(0x0F);
emit(0x5E);
emit_sse_operand(dst, src);
}
void Assembler::divss(XMMRegister dst, const Operand& src) {
EnsureSpace ensure_space(this);
emit(0xF3);
emit_optional_rex_32(dst, src);
emit(0x0F);
emit(0x5E);
emit_sse_operand(dst, src);
}
void Assembler::ucomiss(XMMRegister dst, XMMRegister src) {
EnsureSpace ensure_space(this);
emit_optional_rex_32(dst, src);
emit(0x0f);
emit(0x2e);
emit_sse_operand(dst, src);
}
void Assembler::ucomiss(XMMRegister dst, const Operand& src) {
EnsureSpace ensure_space(this);
emit_optional_rex_32(dst, src);
emit(0x0f);
emit(0x2e);
emit_sse_operand(dst, src);
}
void Assembler::movss(XMMRegister dst, const Operand& src) {
EnsureSpace ensure_space(this);
emit(0xF3); // single
@ -3077,6 +3182,86 @@ void Assembler::pcmpeqd(XMMRegister dst, XMMRegister src) {
}
// byte 1 of 3-byte VEX
void Assembler::emit_vex3_byte1(XMMRegister reg, XMMRegister rm, byte m) {
DCHECK(1 <= m && m <= 3);
byte rxb = ~((reg.high_bit() << 2) | rm.high_bit()) << 5;
emit(rxb | m);
}
// byte 1 of 3-byte VEX
void Assembler::emit_vex3_byte1(XMMRegister reg, const Operand& rm, byte m) {
DCHECK(1 <= m && m <= 3);
byte rxb = ~((reg.high_bit() << 2) | rm.rex_) << 5;
emit(rxb | m);
}
// byte 1 of 2-byte VEX
void Assembler::emit_vex2_byte1(XMMRegister reg, XMMRegister v, byte lpp) {
DCHECK(lpp <= 3);
byte rv = ~((reg.high_bit() << 4) | v.code()) << 3;
emit(rv | lpp);
}
// byte 2 of 3-byte VEX
void Assembler::emit_vex3_byte2(byte w, XMMRegister v, byte lpp) {
DCHECK(w <= 1);
DCHECK(lpp <= 3);
emit((w << 7) | ((~v.code() & 0xf) << 3) | lpp);
}
void Assembler::vfmasd(byte op, XMMRegister dst, XMMRegister src1,
XMMRegister src2) {
DCHECK(IsEnabled(FMA3));
EnsureSpace ensure_space(this);
emit_vex3_byte0();
emit_vex3_byte1(dst, src2, 0x02);
emit_vex3_byte2(0x1, src1, 0x01);
emit(op);
emit_sse_operand(dst, src2);
}
void Assembler::vfmasd(byte op, XMMRegister dst, XMMRegister src1,
const Operand& src2) {
DCHECK(IsEnabled(FMA3));
EnsureSpace ensure_space(this);
emit_vex3_byte0();
emit_vex3_byte1(dst, src2, 0x02);
emit_vex3_byte2(0x1, src1, 0x01);
emit(op);
emit_sse_operand(dst, src2);
}
void Assembler::vfmass(byte op, XMMRegister dst, XMMRegister src1,
XMMRegister src2) {
DCHECK(IsEnabled(FMA3));
EnsureSpace ensure_space(this);
emit_vex3_byte0();
emit_vex3_byte1(dst, src2, 0x02);
emit_vex3_byte2(0x0, src1, 0x01);
emit(op);
emit_sse_operand(dst, src2);
}
void Assembler::vfmass(byte op, XMMRegister dst, XMMRegister src1,
const Operand& src2) {
DCHECK(IsEnabled(FMA3));
EnsureSpace ensure_space(this);
emit_vex3_byte0();
emit_vex3_byte1(dst, src2, 0x02);
emit_vex3_byte2(0x0, src1, 0x01);
emit(op);
emit_sse_operand(dst, src2);
}
void Assembler::emit_sse_operand(XMMRegister reg, const Operand& adr) {
Register ireg = { reg.code() };
emit_operand(ireg, adr);

View File

@ -1014,6 +1014,17 @@ class Assembler : public AssemblerBase {
void sahf();
// SSE instructions
void addss(XMMRegister dst, XMMRegister src);
void addss(XMMRegister dst, const Operand& src);
void subss(XMMRegister dst, XMMRegister src);
void subss(XMMRegister dst, const Operand& src);
void mulss(XMMRegister dst, XMMRegister src);
void mulss(XMMRegister dst, const Operand& src);
void divss(XMMRegister dst, XMMRegister src);
void divss(XMMRegister dst, const Operand& src);
void ucomiss(XMMRegister dst, XMMRegister src);
void ucomiss(XMMRegister dst, const Operand& src);
void movaps(XMMRegister dst, XMMRegister src);
void movss(XMMRegister dst, const Operand& src);
void movss(const Operand& dst, XMMRegister src);
@ -1123,6 +1134,157 @@ class Assembler : public AssemblerBase {
void roundsd(XMMRegister dst, XMMRegister src, RoundingMode mode);
// AVX instruction
void vfmadd132sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfmasd(0x99, dst, src1, src2);
}
void vfmadd213sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfmasd(0xa9, dst, src1, src2);
}
void vfmadd231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfmasd(0xb9, dst, src1, src2);
}
void vfmadd132sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
vfmasd(0x99, dst, src1, src2);
}
void vfmadd213sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
vfmasd(0xa9, dst, src1, src2);
}
void vfmadd231sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
vfmasd(0xb9, dst, src1, src2);
}
void vfmsub132sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfmasd(0x9b, dst, src1, src2);
}
void vfmsub213sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfmasd(0xab, dst, src1, src2);
}
void vfmsub231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfmasd(0xbb, dst, src1, src2);
}
void vfmsub132sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
vfmasd(0x9b, dst, src1, src2);
}
void vfmsub213sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
vfmasd(0xab, dst, src1, src2);
}
void vfmsub231sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
vfmasd(0xbb, dst, src1, src2);
}
void vfnmadd132sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfmasd(0x9d, dst, src1, src2);
}
void vfnmadd213sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfmasd(0xad, dst, src1, src2);
}
void vfnmadd231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfmasd(0xbd, dst, src1, src2);
}
void vfnmadd132sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
vfmasd(0x9d, dst, src1, src2);
}
void vfnmadd213sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
vfmasd(0xad, dst, src1, src2);
}
void vfnmadd231sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
vfmasd(0xbd, dst, src1, src2);
}
void vfnmsub132sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfmasd(0x9f, dst, src1, src2);
}
void vfnmsub213sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfmasd(0xaf, dst, src1, src2);
}
void vfnmsub231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfmasd(0xbf, dst, src1, src2);
}
void vfnmsub132sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
vfmasd(0x9f, dst, src1, src2);
}
void vfnmsub213sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
vfmasd(0xaf, dst, src1, src2);
}
void vfnmsub231sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
vfmasd(0xbf, dst, src1, src2);
}
void vfmasd(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
void vfmasd(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2);
void vfmadd132ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfmass(0x99, dst, src1, src2);
}
void vfmadd213ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfmass(0xa9, dst, src1, src2);
}
void vfmadd231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfmass(0xb9, dst, src1, src2);
}
void vfmadd132ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
vfmass(0x99, dst, src1, src2);
}
void vfmadd213ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
vfmass(0xa9, dst, src1, src2);
}
void vfmadd231ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
vfmass(0xb9, dst, src1, src2);
}
void vfmsub132ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfmass(0x9b, dst, src1, src2);
}
void vfmsub213ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfmass(0xab, dst, src1, src2);
}
void vfmsub231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfmass(0xbb, dst, src1, src2);
}
void vfmsub132ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
vfmass(0x9b, dst, src1, src2);
}
void vfmsub213ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
vfmass(0xab, dst, src1, src2);
}
void vfmsub231ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
vfmass(0xbb, dst, src1, src2);
}
void vfnmadd132ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfmass(0x9d, dst, src1, src2);
}
void vfnmadd213ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfmass(0xad, dst, src1, src2);
}
void vfnmadd231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfmass(0xbd, dst, src1, src2);
}
void vfnmadd132ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
vfmass(0x9d, dst, src1, src2);
}
void vfnmadd213ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
vfmass(0xad, dst, src1, src2);
}
void vfnmadd231ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
vfmass(0xbd, dst, src1, src2);
}
void vfnmsub132ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfmass(0x9f, dst, src1, src2);
}
void vfnmsub213ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfmass(0xaf, dst, src1, src2);
}
void vfnmsub231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfmass(0xbf, dst, src1, src2);
}
void vfnmsub132ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
vfmass(0x9f, dst, src1, src2);
}
void vfnmsub213ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
vfmass(0xaf, dst, src1, src2);
}
void vfnmsub231ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
vfmass(0xbf, dst, src1, src2);
}
void vfmass(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
void vfmass(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2);
// Debugging
void Print();
@ -1316,6 +1478,14 @@ class Assembler : public AssemblerBase {
}
}
// Emit vex prefix
void emit_vex2_byte0() { emit(0xc5); }
void emit_vex2_byte1(XMMRegister reg, XMMRegister v, byte lpp);
void emit_vex3_byte0() { emit(0xc4); }
void emit_vex3_byte1(XMMRegister reg, XMMRegister rm, byte m);
void emit_vex3_byte1(XMMRegister reg, const Operand& rm, byte m);
void emit_vex3_byte2(byte w, XMMRegister v, byte lpp);
// Emit the ModR/M byte, and optionally the SIB byte and
// 1- or 4-byte offset for a memory operand. Also encodes
// the second operand of the operation, a register or operation

View File

@ -148,6 +148,8 @@ enum Prefixes {
ESCAPE_PREFIX = 0x0F,
OPERAND_SIZE_OVERRIDE_PREFIX = 0x66,
ADDRESS_SIZE_OVERRIDE_PREFIX = 0x67,
VEX3_PREFIX = 0xC4,
VEX2_PREFIX = 0xC5,
REPNE_PREFIX = 0xF2,
REP_PREFIX = 0xF3,
REPEQ_PREFIX = REP_PREFIX
@ -290,11 +292,14 @@ class DisassemblerX64 {
ABORT_ON_UNIMPLEMENTED_OPCODE)
: converter_(converter),
tmp_buffer_pos_(0),
abort_on_unimplemented_(
unimplemented_action == ABORT_ON_UNIMPLEMENTED_OPCODE),
abort_on_unimplemented_(unimplemented_action ==
ABORT_ON_UNIMPLEMENTED_OPCODE),
rex_(0),
operand_size_(0),
group_1_prefix_(0),
vex_byte0_(0),
vex_byte1_(0),
vex_byte2_(0),
byte_size_operand_(false),
instruction_table_(instruction_table.Pointer()) {
tmp_buffer_[0] = '\0';
@ -323,6 +328,9 @@ class DisassemblerX64 {
byte rex_;
byte operand_size_; // 0x66 or (if no group 3 prefix is present) 0x0.
byte group_1_prefix_; // 0xF2, 0xF3, or (if no group 1 prefix is present) 0.
byte vex_byte0_; // 0xc4 or 0xc5
byte vex_byte1_;
byte vex_byte2_; // only for 3 bytes vex prefix
// Byte size operand override.
bool byte_size_operand_;
const InstructionTable* const instruction_table_;
@ -345,6 +353,51 @@ class DisassemblerX64 {
bool rex_w() { return (rex_ & 0x08) != 0; }
bool vex_128() {
DCHECK(vex_byte0_ == VEX3_PREFIX || vex_byte0_ == VEX2_PREFIX);
byte checked = vex_byte0_ == VEX3_PREFIX ? vex_byte2_ : vex_byte1_;
return (checked & 4) != 1;
}
bool vex_66() {
DCHECK(vex_byte0_ == VEX3_PREFIX || vex_byte0_ == VEX2_PREFIX);
byte checked = vex_byte0_ == VEX3_PREFIX ? vex_byte2_ : vex_byte1_;
return (checked & 3) == 1;
}
bool vex_f3() {
DCHECK(vex_byte0_ == VEX3_PREFIX || vex_byte0_ == VEX2_PREFIX);
byte checked = vex_byte0_ == VEX3_PREFIX ? vex_byte2_ : vex_byte1_;
return (checked & 3) == 2;
}
bool vex_f2() {
DCHECK(vex_byte0_ == VEX3_PREFIX || vex_byte0_ == VEX2_PREFIX);
byte checked = vex_byte0_ == VEX3_PREFIX ? vex_byte2_ : vex_byte1_;
return (checked & 3) == 3;
}
bool vex_0f() {
if (vex_byte0_ == VEX2_PREFIX) return true;
return (vex_byte1_ & 3) == 1;
}
bool vex_0f38() {
DCHECK(vex_byte0_ == VEX3_PREFIX);
return (vex_byte1_ & 3) == 2;
}
bool vex_0f3a() {
DCHECK(vex_byte0_ == VEX3_PREFIX);
return (vex_byte1_ & 3) == 3;
}
int vex_vreg() {
DCHECK(vex_byte0_ == VEX3_PREFIX || vex_byte0_ == VEX2_PREFIX);
byte checked = vex_byte0_ == VEX3_PREFIX ? vex_byte2_ : vex_byte1_;
return ~(checked >> 3) & 0xf;
}
OperandSize operand_size() {
if (byte_size_operand_) return OPERAND_BYTE_SIZE;
if (rex_w()) return OPERAND_QUADWORD_SIZE;
@ -356,6 +409,8 @@ class DisassemblerX64 {
return "bwlq"[operand_size()];
}
char float_size_code() { return "sd"[rex_w()]; }
const char* NameOfCPURegister(int reg) const {
return converter_.NameOfCPURegister(reg);
}
@ -414,6 +469,7 @@ class DisassemblerX64 {
int FPUInstruction(byte* data);
int MemoryFPUInstruction(int escape_opcode, int regop, byte* modrm_start);
int RegisterFPUInstruction(int escape_opcode, byte modrm_byte);
int AVXInstruction(byte* data);
void AppendToBuffer(const char* format, ...);
void UnimplementedInstruction() {
@ -811,6 +867,92 @@ int DisassemblerX64::SetCC(byte* data) {
}
int DisassemblerX64::AVXInstruction(byte* data) {
byte opcode = *data;
byte* current = data + 1;
if (vex_byte0_ == VEX3_PREFIX) {
if (vex_128()) {
if (vex_66() && vex_0f38()) {
int mod, regop, rm, vvvv = vex_vreg();
get_modrm(*current, &mod, &regop, &rm);
switch (opcode) {
case 0x99:
AppendToBuffer("vfmadd132s%c %s,%s,", float_size_code(),
NameOfXMMRegister(regop), NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
break;
case 0xa9:
AppendToBuffer("vfmadd213s%c %s,%s,", float_size_code(),
NameOfXMMRegister(regop), NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
break;
case 0xb9:
AppendToBuffer("vfmadd231s%c %s,%s,", float_size_code(),
NameOfXMMRegister(regop), NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
break;
case 0x9b:
AppendToBuffer("vfmsub132s%c %s,%s,", float_size_code(),
NameOfXMMRegister(regop), NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
break;
case 0xab:
AppendToBuffer("vfmsub213s%c %s,%s,", float_size_code(),
NameOfXMMRegister(regop), NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
break;
case 0xbb:
AppendToBuffer("vfmsub231s%c %s,%s,", float_size_code(),
NameOfXMMRegister(regop), NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
break;
case 0x9d:
AppendToBuffer("vfnmadd132s%c %s,%s,", float_size_code(),
NameOfXMMRegister(regop), NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
break;
case 0xad:
AppendToBuffer("vfnmadd213s%c %s,%s,", float_size_code(),
NameOfXMMRegister(regop), NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
break;
case 0xbd:
AppendToBuffer("vfnmadd231s%c %s,%s,", float_size_code(),
NameOfXMMRegister(regop), NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
break;
case 0x9f:
AppendToBuffer("vfnmsub132s%c %s,%s,", float_size_code(),
NameOfXMMRegister(regop), NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
break;
case 0xaf:
AppendToBuffer("vfnmsub213s%c %s,%s,", float_size_code(),
NameOfXMMRegister(regop), NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
break;
case 0xbf:
AppendToBuffer("vfnmsub231s%c %s,%s,", float_size_code(),
NameOfXMMRegister(regop), NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
break;
default:
UnimplementedInstruction();
}
}
} else {
UnimplementedInstruction();
}
} else if (vex_byte0_ == VEX2_PREFIX) {
UnimplementedInstruction();
} else {
UNREACHABLE();
}
return static_cast<int>(current - data);
}
// Returns number of bytes used, including *data.
int DisassemblerX64::FPUInstruction(byte* data) {
byte escape_opcode = *data;
@ -1189,6 +1331,16 @@ int DisassemblerX64::TwoByteOpcodeInstruction(byte* data) {
AppendToBuffer("cvttss2si%c %s,",
operand_size_code(), NameOfCPURegister(regop));
current += PrintRightXMMOperand(current);
} else if (opcode == 0x58) {
int mod, regop, rm;
get_modrm(*current, &mod, &regop, &rm);
AppendToBuffer("addss %s,", NameOfXMMRegister(regop));
current += PrintRightXMMOperand(current);
} else if (opcode == 0x59) {
int mod, regop, rm;
get_modrm(*current, &mod, &regop, &rm);
AppendToBuffer("mulss %s,", NameOfXMMRegister(regop));
current += PrintRightXMMOperand(current);
} else if (opcode == 0x5A) {
// CVTSS2SD:
// Convert scalar single-precision FP to scalar double-precision FP.
@ -1196,6 +1348,16 @@ int DisassemblerX64::TwoByteOpcodeInstruction(byte* data) {
get_modrm(*current, &mod, &regop, &rm);
AppendToBuffer("cvtss2sd %s,", NameOfXMMRegister(regop));
current += PrintRightXMMOperand(current);
} else if (opcode == 0x5c) {
int mod, regop, rm;
get_modrm(*current, &mod, &regop, &rm);
AppendToBuffer("subss %s,", NameOfXMMRegister(regop));
current += PrintRightXMMOperand(current);
} else if (opcode == 0x5e) {
int mod, regop, rm;
get_modrm(*current, &mod, &regop, &rm);
AppendToBuffer("divss %s,", NameOfXMMRegister(regop));
current += PrintRightXMMOperand(current);
} else if (opcode == 0x7E) {
int mod, regop, rm;
get_modrm(*current, &mod, &regop, &rm);
@ -1234,6 +1396,11 @@ int DisassemblerX64::TwoByteOpcodeInstruction(byte* data) {
current += PrintRightXMMOperand(current);
AppendToBuffer(",%s", NameOfXMMRegister(regop));
} else if (opcode == 0x2e) {
int mod, regop, rm;
get_modrm(*current, &mod, &regop, &rm);
AppendToBuffer("ucomiss %s,", NameOfXMMRegister(regop));
current += PrintRightXMMOperand(current);
} else if (opcode == 0xA2) {
// CPUID
AppendToBuffer("%s", mnemonic);
@ -1387,99 +1554,114 @@ int DisassemblerX64::InstructionDecode(v8::internal::Vector<char> out_buffer,
if (rex_w()) AppendToBuffer("REX.W ");
} else if ((current & 0xFE) == 0xF2) { // Group 1 prefix (0xF2 or 0xF3).
group_1_prefix_ = current;
} else if (current == VEX3_PREFIX) {
vex_byte0_ = current;
vex_byte1_ = *(data + 1);
vex_byte2_ = *(data + 2);
setRex(0x40 | (~(vex_byte1_ >> 5) & 7) | ((vex_byte2_ >> 4) & 8));
data += 2;
} else if (current == VEX2_PREFIX) {
vex_byte0_ = current;
vex_byte1_ = *(data + 1);
setRex(0x40 | (~(vex_byte1_ >> 5) & 4));
data++;
} else { // Not a prefix - an opcode.
break;
}
data++;
}
const InstructionDesc& idesc = instruction_table_->Get(current);
byte_size_operand_ = idesc.byte_size_operation;
switch (idesc.type) {
case ZERO_OPERANDS_INSTR:
if (current >= 0xA4 && current <= 0xA7) {
// String move or compare operations.
if (group_1_prefix_ == REP_PREFIX) {
// REP.
AppendToBuffer("rep ");
// Decode AVX instructions.
if (vex_byte0_ != 0) {
processed = true;
data += AVXInstruction(data);
} else {
const InstructionDesc& idesc = instruction_table_->Get(current);
byte_size_operand_ = idesc.byte_size_operation;
switch (idesc.type) {
case ZERO_OPERANDS_INSTR:
if (current >= 0xA4 && current <= 0xA7) {
// String move or compare operations.
if (group_1_prefix_ == REP_PREFIX) {
// REP.
AppendToBuffer("rep ");
}
if (rex_w()) AppendToBuffer("REX.W ");
AppendToBuffer("%s%c", idesc.mnem, operand_size_code());
} else {
AppendToBuffer("%s", idesc.mnem, operand_size_code());
}
if (rex_w()) AppendToBuffer("REX.W ");
AppendToBuffer("%s%c", idesc.mnem, operand_size_code());
} else {
AppendToBuffer("%s", idesc.mnem, operand_size_code());
data++;
break;
case TWO_OPERANDS_INSTR:
data++;
data += PrintOperands(idesc.mnem, idesc.op_order_, data);
break;
case JUMP_CONDITIONAL_SHORT_INSTR:
data += JumpConditionalShort(data);
break;
case REGISTER_INSTR:
AppendToBuffer("%s%c %s", idesc.mnem, operand_size_code(),
NameOfCPURegister(base_reg(current & 0x07)));
data++;
break;
case PUSHPOP_INSTR:
AppendToBuffer("%s %s", idesc.mnem,
NameOfCPURegister(base_reg(current & 0x07)));
data++;
break;
case MOVE_REG_INSTR: {
byte* addr = NULL;
switch (operand_size()) {
case OPERAND_WORD_SIZE:
addr =
reinterpret_cast<byte*>(*reinterpret_cast<int16_t*>(data + 1));
data += 3;
break;
case OPERAND_DOUBLEWORD_SIZE:
addr =
reinterpret_cast<byte*>(*reinterpret_cast<uint32_t*>(data + 1));
data += 5;
break;
case OPERAND_QUADWORD_SIZE:
addr =
reinterpret_cast<byte*>(*reinterpret_cast<int64_t*>(data + 1));
data += 9;
break;
default:
UNREACHABLE();
}
AppendToBuffer("mov%c %s,%s", operand_size_code(),
NameOfCPURegister(base_reg(current & 0x07)),
NameOfAddress(addr));
break;
}
data++;
break;
case TWO_OPERANDS_INSTR:
data++;
data += PrintOperands(idesc.mnem, idesc.op_order_, data);
break;
case JUMP_CONDITIONAL_SHORT_INSTR:
data += JumpConditionalShort(data);
break;
case REGISTER_INSTR:
AppendToBuffer("%s%c %s",
idesc.mnem,
operand_size_code(),
NameOfCPURegister(base_reg(current & 0x07)));
data++;
break;
case PUSHPOP_INSTR:
AppendToBuffer("%s %s",
idesc.mnem,
NameOfCPURegister(base_reg(current & 0x07)));
data++;
break;
case MOVE_REG_INSTR: {
byte* addr = NULL;
switch (operand_size()) {
case OPERAND_WORD_SIZE:
addr = reinterpret_cast<byte*>(*reinterpret_cast<int16_t*>(data + 1));
data += 3;
break;
case OPERAND_DOUBLEWORD_SIZE:
addr =
reinterpret_cast<byte*>(*reinterpret_cast<uint32_t*>(data + 1));
data += 5;
break;
case OPERAND_QUADWORD_SIZE:
addr = reinterpret_cast<byte*>(*reinterpret_cast<int64_t*>(data + 1));
data += 9;
break;
default:
UNREACHABLE();
case CALL_JUMP_INSTR: {
byte* addr = data + *reinterpret_cast<int32_t*>(data + 1) + 5;
AppendToBuffer("%s %s", idesc.mnem, NameOfAddress(addr));
data += 5;
break;
}
AppendToBuffer("mov%c %s,%s",
operand_size_code(),
NameOfCPURegister(base_reg(current & 0x07)),
NameOfAddress(addr));
break;
case SHORT_IMMEDIATE_INSTR: {
byte* addr =
reinterpret_cast<byte*>(*reinterpret_cast<int32_t*>(data + 1));
AppendToBuffer("%s rax,%s", idesc.mnem, NameOfAddress(addr));
data += 5;
break;
}
case NO_INSTR:
processed = false;
break;
default:
UNIMPLEMENTED(); // This type is not implemented.
}
case CALL_JUMP_INSTR: {
byte* addr = data + *reinterpret_cast<int32_t*>(data + 1) + 5;
AppendToBuffer("%s %s", idesc.mnem, NameOfAddress(addr));
data += 5;
break;
}
case SHORT_IMMEDIATE_INSTR: {
byte* addr =
reinterpret_cast<byte*>(*reinterpret_cast<int32_t*>(data + 1));
AppendToBuffer("%s rax,%s", idesc.mnem, NameOfAddress(addr));
data += 5;
break;
}
case NO_INSTR:
processed = false;
break;
default:
UNIMPLEMENTED(); // This type is not implemented.
}
// The first byte didn't match any of the simple opcodes, so we

View File

@ -736,4 +736,454 @@ TEST(AssemblerX64SSE) {
F6 f = FUNCTION_CAST<F6>(code->entry());
CHECK_EQ(2, f(1.0, 2.0));
}
typedef int (*F7)(double x, double y, double z);
TEST(AssemblerX64FMA_sd) {
CcTest::InitializeVM();
if (!CpuFeatures::IsSupported(FMA3)) return;
Isolate* isolate = reinterpret_cast<Isolate*>(CcTest::isolate());
HandleScope scope(isolate);
v8::internal::byte buffer[1024];
MacroAssembler assm(isolate, buffer, sizeof buffer);
{
CpuFeatureScope fscope(&assm, FMA3);
Label exit;
// argument in xmm0, xmm1 and xmm2
// xmm0 * xmm1 + xmm2
__ movaps(xmm3, xmm0);
__ mulsd(xmm3, xmm1);
__ addsd(xmm3, xmm2); // Expected result in xmm3
__ subq(rsp, Immediate(kDoubleSize)); // For memory operand
// vfmadd132sd
__ movl(rax, Immediate(1)); // Test number
__ movaps(xmm8, xmm0);
__ vfmadd132sd(xmm8, xmm2, xmm1);
__ ucomisd(xmm8, xmm3);
__ j(not_equal, &exit);
// vfmadd213sd
__ incq(rax);
__ movaps(xmm8, xmm1);
__ vfmadd213sd(xmm8, xmm0, xmm2);
__ ucomisd(xmm8, xmm3);
__ j(not_equal, &exit);
// vfmadd231sd
__ incq(rax);
__ movaps(xmm8, xmm2);
__ vfmadd231sd(xmm8, xmm0, xmm1);
__ ucomisd(xmm8, xmm3);
__ j(not_equal, &exit);
// vfmadd132sd
__ incq(rax);
__ movaps(xmm8, xmm0);
__ movsd(Operand(rsp, 0), xmm1);
__ vfmadd132sd(xmm8, xmm2, Operand(rsp, 0));
__ ucomisd(xmm8, xmm3);
__ j(not_equal, &exit);
// vfmadd213sd
__ incq(rax);
__ movaps(xmm8, xmm1);
__ movsd(Operand(rsp, 0), xmm2);
__ vfmadd213sd(xmm8, xmm0, Operand(rsp, 0));
__ ucomisd(xmm8, xmm3);
__ j(not_equal, &exit);
// vfmadd231sd
__ incq(rax);
__ movaps(xmm8, xmm2);
__ movsd(Operand(rsp, 0), xmm1);
__ vfmadd231sd(xmm8, xmm0, Operand(rsp, 0));
__ ucomisd(xmm8, xmm3);
__ j(not_equal, &exit);
// xmm0 * xmm1 - xmm2
__ movaps(xmm3, xmm0);
__ mulsd(xmm3, xmm1);
__ subsd(xmm3, xmm2); // Expected result in xmm3
// vfmsub132sd
__ incq(rax);
__ movaps(xmm8, xmm0);
__ vfmsub132sd(xmm8, xmm2, xmm1);
__ ucomisd(xmm8, xmm3);
__ j(not_equal, &exit);
// vfmadd213sd
__ incq(rax);
__ movaps(xmm8, xmm1);
__ vfmsub213sd(xmm8, xmm0, xmm2);
__ ucomisd(xmm8, xmm3);
__ j(not_equal, &exit);
// vfmsub231sd
__ incq(rax);
__ movaps(xmm8, xmm2);
__ vfmsub231sd(xmm8, xmm0, xmm1);
__ ucomisd(xmm8, xmm3);
__ j(not_equal, &exit);
// vfmsub132sd
__ incq(rax);
__ movaps(xmm8, xmm0);
__ movsd(Operand(rsp, 0), xmm1);
__ vfmsub132sd(xmm8, xmm2, Operand(rsp, 0));
__ ucomisd(xmm8, xmm3);
__ j(not_equal, &exit);
// vfmsub213sd
__ incq(rax);
__ movaps(xmm8, xmm1);
__ movsd(Operand(rsp, 0), xmm2);
__ vfmsub213sd(xmm8, xmm0, Operand(rsp, 0));
__ ucomisd(xmm8, xmm3);
__ j(not_equal, &exit);
// vfmsub231sd
__ incq(rax);
__ movaps(xmm8, xmm2);
__ movsd(Operand(rsp, 0), xmm1);
__ vfmsub231sd(xmm8, xmm0, Operand(rsp, 0));
__ ucomisd(xmm8, xmm3);
__ j(not_equal, &exit);
// - xmm0 * xmm1 + xmm2
__ movaps(xmm3, xmm0);
__ mulsd(xmm3, xmm1);
__ Move(xmm4, (uint64_t)1 << 63);
__ xorpd(xmm3, xmm4);
__ addsd(xmm3, xmm2); // Expected result in xmm3
// vfnmadd132sd
__ incq(rax);
__ movaps(xmm8, xmm0);
__ vfnmadd132sd(xmm8, xmm2, xmm1);
__ ucomisd(xmm8, xmm3);
__ j(not_equal, &exit);
// vfmadd213sd
__ incq(rax);
__ movaps(xmm8, xmm1);
__ vfnmadd213sd(xmm8, xmm0, xmm2);
__ ucomisd(xmm8, xmm3);
__ j(not_equal, &exit);
// vfnmadd231sd
__ incq(rax);
__ movaps(xmm8, xmm2);
__ vfnmadd231sd(xmm8, xmm0, xmm1);
__ ucomisd(xmm8, xmm3);
__ j(not_equal, &exit);
// vfnmadd132sd
__ incq(rax);
__ movaps(xmm8, xmm0);
__ movsd(Operand(rsp, 0), xmm1);
__ vfnmadd132sd(xmm8, xmm2, Operand(rsp, 0));
__ ucomisd(xmm8, xmm3);
__ j(not_equal, &exit);
// vfnmadd213sd
__ incq(rax);
__ movaps(xmm8, xmm1);
__ movsd(Operand(rsp, 0), xmm2);
__ vfnmadd213sd(xmm8, xmm0, Operand(rsp, 0));
__ ucomisd(xmm8, xmm3);
__ j(not_equal, &exit);
// vfnmadd231sd
__ incq(rax);
__ movaps(xmm8, xmm2);
__ movsd(Operand(rsp, 0), xmm1);
__ vfnmadd231sd(xmm8, xmm0, Operand(rsp, 0));
__ ucomisd(xmm8, xmm3);
__ j(not_equal, &exit);
// - xmm0 * xmm1 - xmm2
__ movaps(xmm3, xmm0);
__ mulsd(xmm3, xmm1);
__ Move(xmm4, (uint64_t)1 << 63);
__ xorpd(xmm3, xmm4);
__ subsd(xmm3, xmm2); // Expected result in xmm3
// vfnmsub132sd
__ incq(rax);
__ movaps(xmm8, xmm0);
__ vfnmsub132sd(xmm8, xmm2, xmm1);
__ ucomisd(xmm8, xmm3);
__ j(not_equal, &exit);
// vfmsub213sd
__ incq(rax);
__ movaps(xmm8, xmm1);
__ vfnmsub213sd(xmm8, xmm0, xmm2);
__ ucomisd(xmm8, xmm3);
__ j(not_equal, &exit);
// vfnmsub231sd
__ incq(rax);
__ movaps(xmm8, xmm2);
__ vfnmsub231sd(xmm8, xmm0, xmm1);
__ ucomisd(xmm8, xmm3);
__ j(not_equal, &exit);
// vfnmsub132sd
__ incq(rax);
__ movaps(xmm8, xmm0);
__ movsd(Operand(rsp, 0), xmm1);
__ vfnmsub132sd(xmm8, xmm2, Operand(rsp, 0));
__ ucomisd(xmm8, xmm3);
__ j(not_equal, &exit);
// vfnmsub213sd
__ incq(rax);
__ movaps(xmm8, xmm1);
__ movsd(Operand(rsp, 0), xmm2);
__ vfnmsub213sd(xmm8, xmm0, Operand(rsp, 0));
__ ucomisd(xmm8, xmm3);
__ j(not_equal, &exit);
// vfnmsub231sd
__ incq(rax);
__ movaps(xmm8, xmm2);
__ movsd(Operand(rsp, 0), xmm1);
__ vfnmsub231sd(xmm8, xmm0, Operand(rsp, 0));
__ ucomisd(xmm8, xmm3);
__ j(not_equal, &exit);
__ xorl(rax, rax);
__ bind(&exit);
__ addq(rsp, Immediate(kDoubleSize));
__ ret(0);
}
CodeDesc desc;
assm.GetCode(&desc);
Handle<Code> code = isolate->factory()->NewCode(
desc, Code::ComputeFlags(Code::STUB), Handle<Code>());
#ifdef OBJECT_PRINT
OFStream os(stdout);
code->Print(os);
#endif
F7 f = FUNCTION_CAST<F7>(code->entry());
CHECK_EQ(0, f(0.000092662107262076, -2.460774966188315, -1.0958787393627414));
}
typedef int (*F8)(float x, float y, float z);
TEST(AssemblerX64FMA_ss) {
CcTest::InitializeVM();
if (!CpuFeatures::IsSupported(FMA3)) return;
Isolate* isolate = reinterpret_cast<Isolate*>(CcTest::isolate());
HandleScope scope(isolate);
v8::internal::byte buffer[1024];
MacroAssembler assm(isolate, buffer, sizeof buffer);
{
CpuFeatureScope fscope(&assm, FMA3);
Label exit;
// arguments in xmm0, xmm1 and xmm2
// xmm0 * xmm1 + xmm2
__ movaps(xmm3, xmm0);
__ mulss(xmm3, xmm1);
__ addss(xmm3, xmm2); // Expected result in xmm3
__ subq(rsp, Immediate(kDoubleSize)); // For memory operand
// vfmadd132ss
__ movl(rax, Immediate(1)); // Test number
__ movaps(xmm8, xmm0);
__ vfmadd132ss(xmm8, xmm2, xmm1);
__ ucomiss(xmm8, xmm3);
__ j(not_equal, &exit);
// vfmadd213ss
__ incq(rax);
__ movaps(xmm8, xmm1);
__ vfmadd213ss(xmm8, xmm0, xmm2);
__ ucomiss(xmm8, xmm3);
__ j(not_equal, &exit);
// vfmadd231ss
__ incq(rax);
__ movaps(xmm8, xmm2);
__ vfmadd231ss(xmm8, xmm0, xmm1);
__ ucomiss(xmm8, xmm3);
__ j(not_equal, &exit);
// vfmadd132ss
__ incq(rax);
__ movaps(xmm8, xmm0);
__ movss(Operand(rsp, 0), xmm1);
__ vfmadd132ss(xmm8, xmm2, Operand(rsp, 0));
__ ucomiss(xmm8, xmm3);
__ j(not_equal, &exit);
// vfmadd213ss
__ incq(rax);
__ movaps(xmm8, xmm1);
__ movss(Operand(rsp, 0), xmm2);
__ vfmadd213ss(xmm8, xmm0, Operand(rsp, 0));
__ ucomiss(xmm8, xmm3);
__ j(not_equal, &exit);
// vfmadd231ss
__ incq(rax);
__ movaps(xmm8, xmm2);
__ movss(Operand(rsp, 0), xmm1);
__ vfmadd231ss(xmm8, xmm0, Operand(rsp, 0));
__ ucomiss(xmm8, xmm3);
__ j(not_equal, &exit);
// xmm0 * xmm1 - xmm2
__ movaps(xmm3, xmm0);
__ mulss(xmm3, xmm1);
__ subss(xmm3, xmm2); // Expected result in xmm3
// vfmsub132ss
__ incq(rax);
__ movaps(xmm8, xmm0);
__ vfmsub132ss(xmm8, xmm2, xmm1);
__ ucomiss(xmm8, xmm3);
__ j(not_equal, &exit);
// vfmadd213ss
__ incq(rax);
__ movaps(xmm8, xmm1);
__ vfmsub213ss(xmm8, xmm0, xmm2);
__ ucomiss(xmm8, xmm3);
__ j(not_equal, &exit);
// vfmsub231ss
__ incq(rax);
__ movaps(xmm8, xmm2);
__ vfmsub231ss(xmm8, xmm0, xmm1);
__ ucomiss(xmm8, xmm3);
__ j(not_equal, &exit);
// vfmsub132ss
__ incq(rax);
__ movaps(xmm8, xmm0);
__ movss(Operand(rsp, 0), xmm1);
__ vfmsub132ss(xmm8, xmm2, Operand(rsp, 0));
__ ucomiss(xmm8, xmm3);
__ j(not_equal, &exit);
// vfmsub213ss
__ incq(rax);
__ movaps(xmm8, xmm1);
__ movss(Operand(rsp, 0), xmm2);
__ vfmsub213ss(xmm8, xmm0, Operand(rsp, 0));
__ ucomiss(xmm8, xmm3);
__ j(not_equal, &exit);
// vfmsub231ss
__ incq(rax);
__ movaps(xmm8, xmm2);
__ movss(Operand(rsp, 0), xmm1);
__ vfmsub231ss(xmm8, xmm0, Operand(rsp, 0));
__ ucomiss(xmm8, xmm3);
__ j(not_equal, &exit);
// - xmm0 * xmm1 + xmm2
__ movaps(xmm3, xmm0);
__ mulss(xmm3, xmm1);
__ Move(xmm4, (uint32_t)1 << 31);
__ xorps(xmm3, xmm4);
__ addss(xmm3, xmm2); // Expected result in xmm3
// vfnmadd132ss
__ incq(rax);
__ movaps(xmm8, xmm0);
__ vfnmadd132ss(xmm8, xmm2, xmm1);
__ ucomiss(xmm8, xmm3);
__ j(not_equal, &exit);
// vfmadd213ss
__ incq(rax);
__ movaps(xmm8, xmm1);
__ vfnmadd213ss(xmm8, xmm0, xmm2);
__ ucomiss(xmm8, xmm3);
__ j(not_equal, &exit);
// vfnmadd231ss
__ incq(rax);
__ movaps(xmm8, xmm2);
__ vfnmadd231ss(xmm8, xmm0, xmm1);
__ ucomiss(xmm8, xmm3);
__ j(not_equal, &exit);
// vfnmadd132ss
__ incq(rax);
__ movaps(xmm8, xmm0);
__ movss(Operand(rsp, 0), xmm1);
__ vfnmadd132ss(xmm8, xmm2, Operand(rsp, 0));
__ ucomiss(xmm8, xmm3);
__ j(not_equal, &exit);
// vfnmadd213ss
__ incq(rax);
__ movaps(xmm8, xmm1);
__ movss(Operand(rsp, 0), xmm2);
__ vfnmadd213ss(xmm8, xmm0, Operand(rsp, 0));
__ ucomiss(xmm8, xmm3);
__ j(not_equal, &exit);
// vfnmadd231ss
__ incq(rax);
__ movaps(xmm8, xmm2);
__ movss(Operand(rsp, 0), xmm1);
__ vfnmadd231ss(xmm8, xmm0, Operand(rsp, 0));
__ ucomiss(xmm8, xmm3);
__ j(not_equal, &exit);
// - xmm0 * xmm1 - xmm2
__ movaps(xmm3, xmm0);
__ mulss(xmm3, xmm1);
__ Move(xmm4, (uint32_t)1 << 31);
__ xorps(xmm3, xmm4);
__ subss(xmm3, xmm2); // Expected result in xmm3
// vfnmsub132ss
__ incq(rax);
__ movaps(xmm8, xmm0);
__ vfnmsub132ss(xmm8, xmm2, xmm1);
__ ucomiss(xmm8, xmm3);
__ j(not_equal, &exit);
// vfmsub213ss
__ incq(rax);
__ movaps(xmm8, xmm1);
__ vfnmsub213ss(xmm8, xmm0, xmm2);
__ ucomiss(xmm8, xmm3);
__ j(not_equal, &exit);
// vfnmsub231ss
__ incq(rax);
__ movaps(xmm8, xmm2);
__ vfnmsub231ss(xmm8, xmm0, xmm1);
__ ucomiss(xmm8, xmm3);
__ j(not_equal, &exit);
// vfnmsub132ss
__ incq(rax);
__ movaps(xmm8, xmm0);
__ movss(Operand(rsp, 0), xmm1);
__ vfnmsub132ss(xmm8, xmm2, Operand(rsp, 0));
__ ucomiss(xmm8, xmm3);
__ j(not_equal, &exit);
// vfnmsub213ss
__ incq(rax);
__ movaps(xmm8, xmm1);
__ movss(Operand(rsp, 0), xmm2);
__ vfnmsub213ss(xmm8, xmm0, Operand(rsp, 0));
__ ucomiss(xmm8, xmm3);
__ j(not_equal, &exit);
// vfnmsub231ss
__ incq(rax);
__ movaps(xmm8, xmm2);
__ movss(Operand(rsp, 0), xmm1);
__ vfnmsub231ss(xmm8, xmm0, Operand(rsp, 0));
__ ucomiss(xmm8, xmm3);
__ j(not_equal, &exit);
__ xorl(rax, rax);
__ bind(&exit);
__ addq(rsp, Immediate(kDoubleSize));
__ ret(0);
}
CodeDesc desc;
assm.GetCode(&desc);
Handle<Code> code = isolate->factory()->NewCode(
desc, Code::ComputeFlags(Code::STUB), Handle<Code>());
#ifdef OBJECT_PRINT
OFStream os(stdout);
code->Print(os);
#endif
F8 f = FUNCTION_CAST<F8>(code->entry());
CHECK_EQ(0, f(9.26621069e-05f, -2.4607749f, -1.09587872f));
}
#undef __

View File

@ -51,7 +51,7 @@ TEST(DisasmX64) {
CcTest::InitializeVM();
Isolate* isolate = CcTest::i_isolate();
HandleScope scope(isolate);
v8::internal::byte buffer[2048];
v8::internal::byte buffer[4096];
Assembler assm(isolate, buffer, sizeof buffer);
DummyStaticFunction(NULL); // just bloody use it (DELETE; debugging)
@ -394,6 +394,14 @@ TEST(DisasmX64) {
__ xorps(xmm0, Operand(rbx, rcx, times_4, 10000));
// Arithmetic operation
__ addss(xmm1, xmm0);
__ addss(xmm1, Operand(rbx, rcx, times_4, 10000));
__ mulss(xmm1, xmm0);
__ mulss(xmm1, Operand(rbx, rcx, times_4, 10000));
__ subss(xmm1, xmm0);
__ subss(xmm1, Operand(rbx, rcx, times_4, 10000));
__ divss(xmm1, xmm0);
__ divss(xmm1, Operand(rbx, rcx, times_4, 10000));
__ addps(xmm1, xmm0);
__ addps(xmm1, Operand(rbx, rcx, times_4, 10000));
__ subps(xmm1, xmm0);
@ -402,6 +410,9 @@ TEST(DisasmX64) {
__ mulps(xmm1, Operand(rbx, rcx, times_4, 10000));
__ divps(xmm1, xmm0);
__ divps(xmm1, Operand(rbx, rcx, times_4, 10000));
__ ucomiss(xmm0, xmm1);
__ ucomiss(xmm0, Operand(rbx, rcx, times_4, 10000));
}
// SSE 2 instructions
{
@ -464,6 +475,74 @@ TEST(DisasmX64) {
}
}
// FMA3 instruction
{
if (CpuFeatures::IsSupported(FMA3)) {
CpuFeatureScope scope(&assm, FMA3);
__ vfmadd132sd(xmm0, xmm1, xmm2);
__ vfmadd132sd(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
__ vfmadd213sd(xmm0, xmm1, xmm2);
__ vfmadd213sd(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
__ vfmadd231sd(xmm0, xmm1, xmm2);
__ vfmadd231sd(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
__ vfmadd132sd(xmm9, xmm10, xmm11);
__ vfmadd132sd(xmm9, xmm10, Operand(r9, r11, times_4, 10000));
__ vfmadd213sd(xmm9, xmm10, xmm11);
__ vfmadd213sd(xmm9, xmm10, Operand(r9, r11, times_4, 10000));
__ vfmadd231sd(xmm9, xmm10, xmm11);
__ vfmadd231sd(xmm9, xmm10, Operand(r9, r11, times_4, 10000));
__ vfmsub132sd(xmm0, xmm1, xmm2);
__ vfmsub132sd(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
__ vfmsub213sd(xmm0, xmm1, xmm2);
__ vfmsub213sd(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
__ vfmsub231sd(xmm0, xmm1, xmm2);
__ vfmsub231sd(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
__ vfnmadd132sd(xmm0, xmm1, xmm2);
__ vfnmadd132sd(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
__ vfnmadd213sd(xmm0, xmm1, xmm2);
__ vfnmadd213sd(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
__ vfnmadd231sd(xmm0, xmm1, xmm2);
__ vfnmadd231sd(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
__ vfnmsub132sd(xmm0, xmm1, xmm2);
__ vfnmsub132sd(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
__ vfnmsub213sd(xmm0, xmm1, xmm2);
__ vfnmsub213sd(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
__ vfnmsub231sd(xmm0, xmm1, xmm2);
__ vfnmsub231sd(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
__ vfmadd132ss(xmm0, xmm1, xmm2);
__ vfmadd132ss(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
__ vfmadd213ss(xmm0, xmm1, xmm2);
__ vfmadd213ss(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
__ vfmadd231ss(xmm0, xmm1, xmm2);
__ vfmadd231ss(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
__ vfmsub132ss(xmm0, xmm1, xmm2);
__ vfmsub132ss(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
__ vfmsub213ss(xmm0, xmm1, xmm2);
__ vfmsub213ss(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
__ vfmsub231ss(xmm0, xmm1, xmm2);
__ vfmsub231ss(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
__ vfnmadd132ss(xmm0, xmm1, xmm2);
__ vfnmadd132ss(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
__ vfnmadd213ss(xmm0, xmm1, xmm2);
__ vfnmadd213ss(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
__ vfnmadd231ss(xmm0, xmm1, xmm2);
__ vfnmadd231ss(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
__ vfnmsub132ss(xmm0, xmm1, xmm2);
__ vfnmsub132ss(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
__ vfnmsub213ss(xmm0, xmm1, xmm2);
__ vfnmsub213ss(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
__ vfnmsub231ss(xmm0, xmm1, xmm2);
__ vfnmsub231ss(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
}
}
// xchg.
{
__ xchgq(rax, rax);