From 0dee9a7942e37a86a60e82d1f517c992dfae8055 Mon Sep 17 00:00:00 2001 From: "lrn@chromium.org" Date: Wed, 23 Jun 2010 14:05:18 +0000 Subject: [PATCH] X64: Change some fpu operations to use XMM registers. Review URL: http://codereview.chromium.org/2827022 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@4930 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- src/x64/assembler-x64.cc | 67 ++++++++++++++++++++++++++++++++++++++++ src/x64/assembler-x64.h | 8 +++++ src/x64/codegen-x64.cc | 15 ++++----- src/x64/disasm-x64.cc | 26 +++++++++++++--- src/x64/ic-x64.cc | 61 +++++++++++++----------------------- 5 files changed, 125 insertions(+), 52 deletions(-) diff --git a/src/x64/assembler-x64.cc b/src/x64/assembler-x64.cc index edad1e2a01..58d473920e 100644 --- a/src/x64/assembler-x64.cc +++ b/src/x64/assembler-x64.cc @@ -2606,6 +2606,28 @@ void Assembler::movsd(XMMRegister dst, const Operand& src) { } +void Assembler::movss(XMMRegister dst, const Operand& src) { + EnsureSpace ensure_space(this); + last_pc_ = pc_; + emit(0xF3); // single + emit_optional_rex_32(dst, src); + emit(0x0F); + emit(0x10); // load + emit_sse_operand(dst, src); +} + + +void Assembler::movss(const Operand& src, XMMRegister dst) { + EnsureSpace ensure_space(this); + last_pc_ = pc_; + emit(0xF3); // single + emit_optional_rex_32(dst, src); + emit(0x0F); + emit(0x11); // store + emit_sse_operand(dst, src); +} + + void Assembler::cvttss2si(Register dst, const Operand& src) { EnsureSpace ensure_space(this); last_pc_ = pc_; @@ -2661,6 +2683,17 @@ void Assembler::cvtlsi2sd(XMMRegister dst, Register src) { } +void Assembler::cvtlsi2ss(XMMRegister dst, Register src) { + EnsureSpace ensure_space(this); + last_pc_ = pc_; + emit(0xF3); + emit_optional_rex_32(dst, src); + emit(0x0F); + emit(0x2A); + emit_sse_operand(dst, src); +} + + void Assembler::cvtqsi2sd(XMMRegister dst, Register src) { EnsureSpace ensure_space(this); last_pc_ = pc_; @@ -2683,6 +2716,28 @@ void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) { } +void Assembler::cvtss2sd(XMMRegister dst, const Operand& src) { + EnsureSpace ensure_space(this); + last_pc_ = pc_; + emit(0xF3); + emit_optional_rex_32(dst, src); + emit(0x0F); + emit(0x5A); + emit_sse_operand(dst, src); +} + + +void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) { + EnsureSpace ensure_space(this); + last_pc_ = pc_; + emit(0xF2); + emit_optional_rex_32(dst, src); + emit(0x0F); + emit(0x5A); + emit_sse_operand(dst, src); +} + + void Assembler::addsd(XMMRegister dst, XMMRegister src) { EnsureSpace ensure_space(this); last_pc_ = pc_; @@ -2760,6 +2815,18 @@ void Assembler::ucomisd(XMMRegister dst, XMMRegister src) { } +void Assembler::ucomisd(XMMRegister dst, const Operand& src) { + EnsureSpace ensure_space(this); + last_pc_ = pc_; + emit(0x66); + emit_optional_rex_32(dst, src); + emit(0x0f); + emit(0x2e); + emit_sse_operand(dst, src); +} + + + void Assembler::emit_sse_operand(XMMRegister reg, const Operand& adr) { Register ireg = { reg.code() }; emit_operand(ireg, adr); diff --git a/src/x64/assembler-x64.h b/src/x64/assembler-x64.h index 1bddea4bf6..407863f697 100644 --- a/src/x64/assembler-x64.h +++ b/src/x64/assembler-x64.h @@ -1110,6 +1110,9 @@ class Assembler : public Malloced { void movsd(XMMRegister dst, XMMRegister src); void movsd(XMMRegister dst, const Operand& src); + void movss(XMMRegister dst, const Operand& src); + void movss(const Operand& dst, XMMRegister src); + void cvttss2si(Register dst, const Operand& src); void cvttsd2si(Register dst, const Operand& src); void cvttsd2siq(Register dst, XMMRegister src); @@ -1119,7 +1122,11 @@ class Assembler : public Malloced { void cvtqsi2sd(XMMRegister dst, const Operand& src); void cvtqsi2sd(XMMRegister dst, Register src); + void cvtlsi2ss(XMMRegister dst, Register src); + void cvtss2sd(XMMRegister dst, XMMRegister src); + void cvtss2sd(XMMRegister dst, const Operand& src); + void cvtsd2ss(XMMRegister dst, XMMRegister src); void addsd(XMMRegister dst, XMMRegister src); void subsd(XMMRegister dst, XMMRegister src); @@ -1130,6 +1137,7 @@ class Assembler : public Malloced { void sqrtsd(XMMRegister dst, XMMRegister src); void ucomisd(XMMRegister dst, XMMRegister src); + void ucomisd(XMMRegister dst, const Operand& src); // The first argument is the reg field, the second argument is the r/m field. void emit_sse_operand(XMMRegister dst, XMMRegister src); diff --git a/src/x64/codegen-x64.cc b/src/x64/codegen-x64.cc index 5851fb1a93..572fa623ee 100644 --- a/src/x64/codegen-x64.cc +++ b/src/x64/codegen-x64.cc @@ -5336,9 +5336,8 @@ void CodeGenerator::ToBoolean(ControlDestination* dest) { dest->false_target()->Branch(equal); Condition is_smi = masm_->CheckSmi(value.reg()); dest->true_target()->Branch(is_smi); - __ fldz(); - __ fld_d(FieldOperand(value.reg(), HeapNumber::kValueOffset)); - __ FCmp(); + __ xorpd(xmm0, xmm0); + __ ucomisd(xmm0, FieldOperand(value.reg(), HeapNumber::kValueOffset)); value.Unuse(); dest->Split(not_zero); } else { @@ -8000,14 +7999,12 @@ void ToBooleanStub::Generate(MacroAssembler* masm) { __ jmp(&true_result); __ bind(¬_string); - // HeapNumber => false iff +0, -0, or NaN. - // These three cases set C3 when compared to zero in the FPU. __ CompareRoot(rdx, Heap::kHeapNumberMapRootIndex); __ j(not_equal, &true_result); - __ fldz(); // Load zero onto fp stack - // Load heap-number double value onto fp stack - __ fld_d(FieldOperand(rax, HeapNumber::kValueOffset)); - __ FCmp(); + // HeapNumber => false iff +0, -0, or NaN. + // These three cases set the zero flag when compared to zero using ucomisd. + __ xorpd(xmm0, xmm0); + __ ucomisd(xmm0, FieldOperand(rax, HeapNumber::kValueOffset)); __ j(zero, &false_result); // Fall through to |true_result|. diff --git a/src/x64/disasm-x64.cc b/src/x64/disasm-x64.cc index 57847571dc..002a5ebc7b 100644 --- a/src/x64/disasm-x64.cc +++ b/src/x64/disasm-x64.cc @@ -1057,7 +1057,7 @@ int DisassemblerX64::TwoByteOpcodeInstruction(byte* data) { // CVTSI2SD: integer to XMM double conversion. int mod, regop, rm; get_modrm(*current, &mod, ®op, &rm); - AppendToBuffer("%s %s,", mnemonic, NameOfXMMRegister(regop)); + AppendToBuffer("%sd %s,", mnemonic, NameOfXMMRegister(regop)); current += PrintRightOperand(current); } else if ((opcode & 0xF8) == 0x58 || opcode == 0x51) { // XMM arithmetic. Mnemonic was retrieved at the start of this function. @@ -1070,7 +1070,25 @@ int DisassemblerX64::TwoByteOpcodeInstruction(byte* data) { } } else if (group_1_prefix_ == 0xF3) { // Instructions with prefix 0xF3. - if (opcode == 0x2C) { + if (opcode == 0x11 || opcode == 0x10) { + // MOVSS: Move scalar double-precision fp to/from/between XMM registers. + AppendToBuffer("movss "); + int mod, regop, rm; + get_modrm(*current, &mod, ®op, &rm); + if (opcode == 0x11) { + current += PrintRightOperand(current); + AppendToBuffer(",%s", NameOfXMMRegister(regop)); + } else { + AppendToBuffer("%s,", NameOfXMMRegister(regop)); + current += PrintRightOperand(current); + } + } else if (opcode == 0x2A) { + // CVTSI2SS: integer to XMM single conversion. + int mod, regop, rm; + get_modrm(*current, &mod, ®op, &rm); + AppendToBuffer("%ss %s,", mnemonic, NameOfXMMRegister(regop)); + current += PrintRightOperand(current); + } else if (opcode == 0x2C) { // CVTTSS2SI: Convert scalar single-precision FP to dword integer. // Assert that mod is not 3, so source is memory, not an XMM register. ASSERT_NE(0xC0, *current & 0xC0); @@ -1146,8 +1164,8 @@ const char* DisassemblerX64::TwoByteMnemonic(byte opcode) { switch (opcode) { case 0x1F: return "nop"; - case 0x2A: // F2 prefix. - return "cvtsi2sd"; + case 0x2A: // F2/F3 prefix. + return "cvtsi2s"; case 0x31: return "rdtsc"; case 0x51: // F2 prefix. diff --git a/src/x64/ic-x64.cc b/src/x64/ic-x64.cc index 20ed734b8a..e17ff1fa4b 100644 --- a/src/x64/ic-x64.cc +++ b/src/x64/ic-x64.cc @@ -702,7 +702,7 @@ void KeyedLoadIC::GenerateExternalArray(MacroAssembler* masm, // -- rdx : receiver // -- rsp[0] : return address // ----------------------------------- - Label slow, failed_allocation; + Label slow; // Check that the object isn't a smi. __ JumpIfSmi(rdx, &slow); @@ -761,7 +761,7 @@ void KeyedLoadIC::GenerateExternalArray(MacroAssembler* masm, __ movl(rcx, Operand(rbx, rcx, times_4, 0)); break; case kExternalFloatArray: - __ fld_s(Operand(rbx, rcx, times_4, 0)); + __ cvtss2sd(xmm0, Operand(rbx, rcx, times_4, 0)); break; default: UNREACHABLE(); @@ -773,20 +773,16 @@ void KeyedLoadIC::GenerateExternalArray(MacroAssembler* masm, // For integer array types: // rcx: value // For floating-point array type: - // FP(0): value + // xmm0: value as double. - if (array_type == kExternalIntArray || - array_type == kExternalUnsignedIntArray) { - // For the Int and UnsignedInt array types, we need to see whether + ASSERT(kSmiValueSize == 32); + if (array_type == kExternalUnsignedIntArray) { + // For the UnsignedInt array type, we need to see whether // the value can be represented in a Smi. If not, we need to convert // it to a HeapNumber. Label box_int; - if (array_type == kExternalIntArray) { - __ JumpIfNotValidSmiValue(rcx, &box_int); - } else { - ASSERT_EQ(array_type, kExternalUnsignedIntArray); - __ JumpIfUIntNotValidSmiValue(rcx, &box_int); - } + + __ JumpIfUIntNotValidSmiValue(rcx, &box_int); __ Integer32ToSmi(rax, rcx); __ ret(0); @@ -795,42 +791,29 @@ void KeyedLoadIC::GenerateExternalArray(MacroAssembler* masm, // Allocate a HeapNumber for the int and perform int-to-double // conversion. - __ push(rcx); - if (array_type == kExternalIntArray) { - __ fild_s(Operand(rsp, 0)); - } else { - ASSERT(array_type == kExternalUnsignedIntArray); - // The value is zero-extended on the stack, because all pushes are - // 64-bit and we loaded the value from memory with movl. - __ fild_d(Operand(rsp, 0)); - } - __ pop(rcx); - // FP(0): value - __ AllocateHeapNumber(rcx, rbx, &failed_allocation); + ASSERT(array_type == kExternalUnsignedIntArray); + // The value is zero-extended since we loaded the value from memory + // with movl. + __ cvtqsi2sd(xmm0, rcx); + + __ AllocateHeapNumber(rcx, rbx, &slow); // Set the value. + __ movsd(FieldOperand(rcx, HeapNumber::kValueOffset), xmm0); __ movq(rax, rcx); - __ fstp_d(FieldOperand(rax, HeapNumber::kValueOffset)); __ ret(0); } else if (array_type == kExternalFloatArray) { // For the floating-point array type, we need to always allocate a // HeapNumber. - __ AllocateHeapNumber(rcx, rbx, &failed_allocation); + __ AllocateHeapNumber(rcx, rbx, &slow); // Set the value. + __ movsd(FieldOperand(rcx, HeapNumber::kValueOffset), xmm0); __ movq(rax, rcx); - __ fstp_d(FieldOperand(rax, HeapNumber::kValueOffset)); __ ret(0); } else { __ Integer32ToSmi(rax, rcx); __ ret(0); } - // If we fail allocation of the HeapNumber, we still have a value on - // top of the FPU stack. Remove it. - __ bind(&failed_allocation); - __ ffree(); - __ fincstp(); - // Fall through to slow case. - // Slow case: Jump to runtime. __ bind(&slow); __ IncrementCounter(&Counters::keyed_load_external_array_slow, 1); @@ -1116,10 +1099,8 @@ void KeyedStoreIC::GenerateExternalArray(MacroAssembler* masm, break; case kExternalFloatArray: // Need to perform int-to-float conversion. - __ push(rdx); - __ fild_s(Operand(rsp, 0)); - __ pop(rdx); - __ fstp_s(Operand(rbx, rdi, times_4, 0)); + __ cvtlsi2ss(xmm0, rdx); + __ movss(Operand(rbx, rdi, times_4, 0), xmm0); break; default: UNREACHABLE(); @@ -1156,6 +1137,8 @@ void KeyedStoreIC::GenerateExternalArray(MacroAssembler* masm, __ j(parity_even, &is_nan); __ push(rdx); // Make room on the stack. Receiver is no longer needed. + // TODO(lrn): If the rounding of this conversion is not deliberate, maybe + // switch to xmm registers. __ fistp_d(Operand(rsp, 0)); __ pop(rdx); // rdx: value (converted to an untagged integer) @@ -1201,7 +1184,7 @@ void KeyedStoreIC::GenerateExternalArray(MacroAssembler* masm, // rbx: base pointer of external storage __ ffree(); __ fincstp(); - __ movq(rdx, Immediate(0)); + __ Set(rdx, 0); switch (array_type) { case kExternalByteArray: case kExternalUnsignedByteArray: