From 5cda2bcfabc24f988f57ecfc63abc83006245597 Mon Sep 17 00:00:00 2001 From: alph Date: Wed, 14 Oct 2015 21:14:31 -0700 Subject: [PATCH] [x64] Use vmovapd and vmovsd when AVX is enabled. R=bmeurer@chromium.org BUG=v8:4406 LOG=N Review URL: https://codereview.chromium.org/1391963005 Cr-Commit-Position: refs/heads/master@{#31275} --- src/compiler/x64/code-generator-x64.cc | 34 ++++++++-------- src/x64/assembler-x64.cc | 12 ++++++ src/x64/assembler-x64.h | 8 ++++ src/x64/code-stubs-x64.cc | 38 ++++++++--------- src/x64/codegen-x64.cc | 27 ++++++------- src/x64/deoptimizer-x64.cc | 4 +- src/x64/disasm-x64.cc | 18 +++++++++ src/x64/lithium-codegen-x64.cc | 44 ++++++++++---------- src/x64/lithium-gap-resolver-x64.cc | 16 ++++---- src/x64/macro-assembler-x64.cc | 56 ++++++++++++++++++++++---- src/x64/macro-assembler-x64.h | 5 +++ test/cctest/test-assembler-x64.cc | 9 ++++- test/cctest/test-code-stubs-x64.cc | 2 +- test/cctest/test-disasm-x64.cc | 5 +++ 14 files changed, 186 insertions(+), 92 deletions(-) diff --git a/src/compiler/x64/code-generator-x64.cc b/src/compiler/x64/code-generator-x64.cc index f14412b757..4bb4d94659 100644 --- a/src/compiler/x64/code-generator-x64.cc +++ b/src/compiler/x64/code-generator-x64.cc @@ -181,7 +181,7 @@ class OutOfLineTruncateDoubleToI final : public OutOfLineCode { void Generate() final { __ subp(rsp, Immediate(kDoubleSize)); - __ movsd(MemOperand(rsp, 0), input_); + __ Movsd(MemOperand(rsp, 0), input_); __ SlowTruncateToI(result_, rsp, 0); __ addp(rsp, Immediate(kDoubleSize)); } @@ -835,9 +835,9 @@ void CodeGenerator::AssembleArchInstruction(Instruction* instr) { case kSSEFloat64Mod: { __ subq(rsp, Immediate(kDoubleSize)); // Move values to st(0) and st(1). - __ movsd(Operand(rsp, 0), i.InputDoubleRegister(1)); + __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(1)); __ fld_d(Operand(rsp, 0)); - __ movsd(Operand(rsp, 0), i.InputDoubleRegister(0)); + __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(0)); __ fld_d(Operand(rsp, 0)); // Loop while fprem isn't done. Label mod_loop; @@ -860,7 +860,7 @@ void CodeGenerator::AssembleArchInstruction(Instruction* instr) { // Move output to stack and clean up. __ fstp(1); __ fstp_d(Operand(rsp, 0)); - __ movsd(i.OutputDoubleRegister(), Operand(rsp, 0)); + __ Movsd(i.OutputDoubleRegister(), Operand(rsp, 0)); __ addq(rsp, Immediate(kDoubleSize)); break; } @@ -1164,11 +1164,11 @@ void CodeGenerator::AssembleArchInstruction(Instruction* instr) { break; case kX64Movsd: if (instr->HasOutput()) { - __ movsd(i.OutputDoubleRegister(), i.MemoryOperand()); + __ Movsd(i.OutputDoubleRegister(), i.MemoryOperand()); } else { size_t index = 0; Operand operand = i.MemoryOperand(&index); - __ movsd(operand, i.InputDoubleRegister(index)); + __ Movsd(operand, i.InputDoubleRegister(index)); } break; case kX64BitcastFI: @@ -1196,7 +1196,7 @@ void CodeGenerator::AssembleArchInstruction(Instruction* instr) { if (instr->InputAt(0)->IsRegister()) { __ movq(i.OutputDoubleRegister(), i.InputRegister(0)); } else { - __ movsd(i.OutputDoubleRegister(), i.InputOperand(0)); + __ Movsd(i.OutputDoubleRegister(), i.InputOperand(0)); } break; case kX64Lea32: { @@ -1251,7 +1251,7 @@ void CodeGenerator::AssembleArchInstruction(Instruction* instr) { } else if (instr->InputAt(0)->IsDoubleRegister()) { // TODO(titzer): use another machine instruction? __ subq(rsp, Immediate(kDoubleSize)); - __ movsd(Operand(rsp, 0), i.InputDoubleRegister(0)); + __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(0)); } else { __ pushq(i.InputOperand(0)); } @@ -1306,7 +1306,7 @@ void CodeGenerator::AssembleArchInstruction(Instruction* instr) { ASSEMBLE_CHECKED_LOAD_FLOAT(movss); break; case kCheckedLoadFloat64: - ASSEMBLE_CHECKED_LOAD_FLOAT(movsd); + ASSEMBLE_CHECKED_LOAD_FLOAT(Movsd); break; case kCheckedStoreWord8: ASSEMBLE_CHECKED_STORE_INTEGER(movb); @@ -1324,7 +1324,7 @@ void CodeGenerator::AssembleArchInstruction(Instruction* instr) { ASSEMBLE_CHECKED_STORE_FLOAT(movss); break; case kCheckedStoreFloat64: - ASSEMBLE_CHECKED_STORE_FLOAT(movsd); + ASSEMBLE_CHECKED_STORE_FLOAT(Movsd); break; case kX64StackCheck: __ CompareRoot(rsp, Heap::kStackLimitRootIndex); @@ -1724,19 +1724,19 @@ void CodeGenerator::AssembleMove(InstructionOperand* source, } else { DCHECK(destination->IsDoubleStackSlot()); Operand dst = g.ToOperand(destination); - __ movsd(dst, src); + __ Movsd(dst, src); } } else if (source->IsDoubleStackSlot()) { DCHECK(destination->IsDoubleRegister() || destination->IsDoubleStackSlot()); Operand src = g.ToOperand(source); if (destination->IsDoubleRegister()) { XMMRegister dst = g.ToDoubleRegister(destination); - __ movsd(dst, src); + __ Movsd(dst, src); } else { // We rely on having xmm0 available as a fixed scratch register. Operand dst = g.ToOperand(destination); - __ movsd(xmm0, src); - __ movsd(dst, xmm0); + __ Movsd(xmm0, src); + __ Movsd(dst, xmm0); } } else { UNREACHABLE(); @@ -1779,9 +1779,9 @@ void CodeGenerator::AssembleSwap(InstructionOperand* source, // available as a fixed scratch register. XMMRegister src = g.ToDoubleRegister(source); Operand dst = g.ToOperand(destination); - __ movsd(xmm0, src); - __ movsd(src, dst); - __ movsd(dst, xmm0); + __ Movsd(xmm0, src); + __ Movsd(src, dst); + __ Movsd(dst, xmm0); } else { // No other combinations are possible. UNREACHABLE(); diff --git a/src/x64/assembler-x64.cc b/src/x64/assembler-x64.cc index 059b576bea..5789782d09 100644 --- a/src/x64/assembler-x64.cc +++ b/src/x64/assembler-x64.cc @@ -2685,6 +2685,7 @@ void Assembler::pinsrd(XMMRegister dst, const Operand& src, int8_t imm8) { void Assembler::movsd(const Operand& dst, XMMRegister src) { + DCHECK(!IsEnabled(AVX)); EnsureSpace ensure_space(this); emit(0xF2); // double emit_optional_rex_32(src, dst); @@ -2695,6 +2696,7 @@ void Assembler::movsd(const Operand& dst, XMMRegister src) { void Assembler::movsd(XMMRegister dst, XMMRegister src) { + DCHECK(!IsEnabled(AVX)); EnsureSpace ensure_space(this); emit(0xF2); // double emit_optional_rex_32(dst, src); @@ -2705,6 +2707,7 @@ void Assembler::movsd(XMMRegister dst, XMMRegister src) { void Assembler::movsd(XMMRegister dst, const Operand& src) { + DCHECK(!IsEnabled(AVX)); EnsureSpace ensure_space(this); emit(0xF2); // double emit_optional_rex_32(dst, src); @@ -3458,6 +3461,15 @@ void Assembler::vfmass(byte op, XMMRegister dst, XMMRegister src1, } +void Assembler::vmovapd(XMMRegister dst, XMMRegister src) { + DCHECK(IsEnabled(AVX)); + EnsureSpace ensure_space(this); + emit_vex_prefix(dst, xmm0, src, kLIG, k66, k0F, kWIG); + emit(0x28); + emit_sse_operand(dst, src); +} + + void Assembler::vucomisd(XMMRegister dst, XMMRegister src) { DCHECK(IsEnabled(AVX)); EnsureSpace ensure_space(this); diff --git a/src/x64/assembler-x64.h b/src/x64/assembler-x64.h index b12fc4c53f..551a7c16f2 100644 --- a/src/x64/assembler-x64.h +++ b/src/x64/assembler-x64.h @@ -1267,6 +1267,14 @@ class Assembler : public AssemblerBase { void vfmass(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2); void vfmass(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2); + void vmovapd(XMMRegister dst, XMMRegister src); + void vmovsd(XMMRegister dst, const Operand& src) { + vsd(0x10, dst, xmm0, src); + } + void vmovsd(XMMRegister dst, XMMRegister src) { vsd(0x10, dst, xmm0, src); } + void vmovsd(const Operand& dst, XMMRegister src) { + vsd(0x11, src, xmm0, dst); + } void vaddsd(XMMRegister dst, XMMRegister src1, XMMRegister src2) { vsd(0x58, dst, src1, src2); } diff --git a/src/x64/code-stubs-x64.cc b/src/x64/code-stubs-x64.cc index 3a9ad53c0f..058e99bd4c 100644 --- a/src/x64/code-stubs-x64.cc +++ b/src/x64/code-stubs-x64.cc @@ -180,7 +180,7 @@ void DoubleToIStub::Generate(MacroAssembler* masm) { bool stash_exponent_copy = !input_reg.is(rsp); __ movl(scratch1, mantissa_operand); - __ movsd(xmm0, mantissa_operand); + __ Movsd(xmm0, mantissa_operand); __ movl(rcx, exponent_operand); if (stash_exponent_copy) __ pushq(rcx); @@ -237,14 +237,14 @@ void FloatingPointHelper::LoadSSE2UnknownOperands(MacroAssembler* masm, __ JumpIfSmi(rdx, &load_smi_rdx); __ cmpp(FieldOperand(rdx, HeapObject::kMapOffset), rcx); __ j(not_equal, not_numbers); // Argument in rdx is not a number. - __ movsd(xmm0, FieldOperand(rdx, HeapNumber::kValueOffset)); + __ Movsd(xmm0, FieldOperand(rdx, HeapNumber::kValueOffset)); // Load operand in rax into xmm1, or branch to not_numbers. __ JumpIfSmi(rax, &load_smi_rax); __ bind(&load_nonsmi_rax); __ cmpp(FieldOperand(rax, HeapObject::kMapOffset), rcx); __ j(not_equal, not_numbers); - __ movsd(xmm1, FieldOperand(rax, HeapNumber::kValueOffset)); + __ Movsd(xmm1, FieldOperand(rax, HeapNumber::kValueOffset)); __ jmp(&done); __ bind(&load_smi_rdx); @@ -288,7 +288,7 @@ void MathPowStub::Generate(MacroAssembler* masm) { Heap::kHeapNumberMapRootIndex); __ j(not_equal, &call_runtime); - __ movsd(double_base, FieldOperand(base, HeapNumber::kValueOffset)); + __ Movsd(double_base, FieldOperand(base, HeapNumber::kValueOffset)); __ jmp(&unpack_exponent, Label::kNear); __ bind(&base_is_smi); @@ -304,14 +304,14 @@ void MathPowStub::Generate(MacroAssembler* masm) { __ CompareRoot(FieldOperand(exponent, HeapObject::kMapOffset), Heap::kHeapNumberMapRootIndex); __ j(not_equal, &call_runtime); - __ movsd(double_exponent, FieldOperand(exponent, HeapNumber::kValueOffset)); + __ Movsd(double_exponent, FieldOperand(exponent, HeapNumber::kValueOffset)); } else if (exponent_type() == TAGGED) { __ JumpIfNotSmi(exponent, &exponent_not_smi, Label::kNear); __ SmiToInteger32(exponent, exponent); __ jmp(&int_exponent); __ bind(&exponent_not_smi); - __ movsd(double_exponent, FieldOperand(exponent, HeapNumber::kValueOffset)); + __ Movsd(double_exponent, FieldOperand(exponent, HeapNumber::kValueOffset)); } if (exponent_type() != INTEGER) { @@ -405,9 +405,9 @@ void MathPowStub::Generate(MacroAssembler* masm) { __ fnclex(); // Clear flags to catch exceptions later. // Transfer (B)ase and (E)xponent onto the FPU register stack. __ subp(rsp, Immediate(kDoubleSize)); - __ movsd(Operand(rsp, 0), double_exponent); + __ Movsd(Operand(rsp, 0), double_exponent); __ fld_d(Operand(rsp, 0)); // E - __ movsd(Operand(rsp, 0), double_base); + __ Movsd(Operand(rsp, 0), double_base); __ fld_d(Operand(rsp, 0)); // B, E // Exponent is in st(1) and base is in st(0) @@ -430,7 +430,7 @@ void MathPowStub::Generate(MacroAssembler* masm) { __ testb(rax, Immediate(0x5F)); // Check for all but precision exception. __ j(not_zero, &fast_power_failed, Label::kNear); __ fstp_d(Operand(rsp, 0)); - __ movsd(double_result, Operand(rsp, 0)); + __ Movsd(double_result, Operand(rsp, 0)); __ addp(rsp, Immediate(kDoubleSize)); __ jmp(&done); @@ -445,8 +445,8 @@ void MathPowStub::Generate(MacroAssembler* masm) { const XMMRegister double_scratch2 = double_exponent; // Back up exponent as we need to check if exponent is negative later. __ movp(scratch, exponent); // Back up exponent. - __ movsd(double_scratch, double_base); // Back up base. - __ movsd(double_scratch2, double_result); // Load double_exponent with 1. + __ Movsd(double_scratch, double_base); // Back up base. + __ Movsd(double_scratch2, double_result); // Load double_exponent with 1. // Get absolute value of exponent. Label no_neg, while_true, while_false; @@ -460,7 +460,7 @@ void MathPowStub::Generate(MacroAssembler* masm) { // Above condition means CF==0 && ZF==0. This means that the // bit that has been shifted out is 0 and the result is not 0. __ j(above, &while_true, Label::kNear); - __ movsd(double_result, double_scratch); + __ Movsd(double_result, double_scratch); __ j(zero, &while_false, Label::kNear); __ bind(&while_true); @@ -475,7 +475,7 @@ void MathPowStub::Generate(MacroAssembler* masm) { __ testl(exponent, exponent); __ j(greater, &done); __ divsd(double_scratch2, double_result); - __ movsd(double_result, double_scratch2); + __ Movsd(double_result, double_scratch2); // Test whether result is zero. Bail out to check for subnormal result. // Due to subnormals, x^-y == (1/x)^y does not hold in all cases. __ xorps(double_scratch2, double_scratch2); @@ -497,13 +497,13 @@ void MathPowStub::Generate(MacroAssembler* masm) { // as heap number in rax. __ bind(&done); __ AllocateHeapNumber(rax, rcx, &call_runtime); - __ movsd(FieldOperand(rax, HeapNumber::kValueOffset), double_result); + __ Movsd(FieldOperand(rax, HeapNumber::kValueOffset), double_result); __ IncrementCounter(counters->math_pow(), 1); __ ret(2 * kPointerSize); } else { __ bind(&call_runtime); // Move base to the correct argument register. Exponent is already in xmm1. - __ movsd(xmm0, double_base); + __ Movsd(xmm0, double_base); DCHECK(double_exponent.is(xmm1)); { AllowExternalCallThatCantCauseGC scope(masm); @@ -512,7 +512,7 @@ void MathPowStub::Generate(MacroAssembler* masm) { ExternalReference::power_double_double_function(isolate()), 2); } // Return value is in xmm0. - __ movsd(double_result, xmm0); + __ Movsd(double_result, xmm0); __ bind(&done); __ IncrementCounter(counters->math_pow(), 1); @@ -1565,7 +1565,7 @@ void CompareICStub::GenerateGeneric(MacroAssembler* masm) { // greater-equal. Return -1 for them, so the comparison yields // false for all conditions except not-equal. __ Set(rax, EQUAL); - __ movsd(xmm0, FieldOperand(rdx, HeapNumber::kValueOffset)); + __ Movsd(xmm0, FieldOperand(rdx, HeapNumber::kValueOffset)); __ ucomisd(xmm0, xmm0); __ setcc(parity_even, rax); // rax is 0 for equal non-NaN heapnumbers, 1 for NaNs. @@ -3434,7 +3434,7 @@ void CompareICStub::GenerateNumbers(MacroAssembler* masm) { __ JumpIfSmi(rax, &right_smi, Label::kNear); __ CompareMap(rax, isolate()->factory()->heap_number_map()); __ j(not_equal, &maybe_undefined1, Label::kNear); - __ movsd(xmm1, FieldOperand(rax, HeapNumber::kValueOffset)); + __ Movsd(xmm1, FieldOperand(rax, HeapNumber::kValueOffset)); __ jmp(&left, Label::kNear); __ bind(&right_smi); __ SmiToInteger32(rcx, rax); // Can't clobber rax yet. @@ -3444,7 +3444,7 @@ void CompareICStub::GenerateNumbers(MacroAssembler* masm) { __ JumpIfSmi(rdx, &left_smi, Label::kNear); __ CompareMap(rdx, isolate()->factory()->heap_number_map()); __ j(not_equal, &maybe_undefined2, Label::kNear); - __ movsd(xmm0, FieldOperand(rdx, HeapNumber::kValueOffset)); + __ Movsd(xmm0, FieldOperand(rdx, HeapNumber::kValueOffset)); __ jmp(&done); __ bind(&left_smi); __ SmiToInteger32(rcx, rdx); // Can't clobber rdx yet. diff --git a/src/x64/codegen-x64.cc b/src/x64/codegen-x64.cc index 4f08c7e7a6..7ca231e4b5 100644 --- a/src/x64/codegen-x64.cc +++ b/src/x64/codegen-x64.cc @@ -51,7 +51,7 @@ UnaryMathFunction CreateExpFunction() { __ popq(rbx); __ popq(rax); - __ movsd(xmm0, result); + __ Movsd(xmm0, result); __ Ret(); CodeDesc desc; @@ -95,7 +95,7 @@ ModuloFunction CreateModuloFunction() { byte* buffer = static_cast( base::OS::Allocate(Assembler::kMinimalBufferSize, &actual_size, true)); CHECK(buffer); - Assembler masm(NULL, buffer, static_cast(actual_size)); + MacroAssembler masm(NULL, buffer, static_cast(actual_size)); // Generated code is put into a fixed, unmovable, buffer, and not into // the V8 heap. We can't, and don't, refer to any relocatable addresses // (e.g. the JavaScript nan-object). @@ -107,8 +107,8 @@ ModuloFunction CreateModuloFunction() { // Compute x mod y. // Load y and x (use argument backing store as temporary storage). - __ movsd(Operand(rsp, kRegisterSize * 2), xmm1); - __ movsd(Operand(rsp, kRegisterSize), xmm0); + __ Movsd(Operand(rsp, kRegisterSize * 2), xmm1); + __ Movsd(Operand(rsp, kRegisterSize), xmm0); __ fld_d(Operand(rsp, kRegisterSize * 2)); __ fld_d(Operand(rsp, kRegisterSize)); @@ -147,13 +147,13 @@ ModuloFunction CreateModuloFunction() { int64_t kNaNValue = V8_INT64_C(0x7ff8000000000000); __ movq(rcx, kNaNValue); __ movq(Operand(rsp, kRegisterSize), rcx); - __ movsd(xmm0, Operand(rsp, kRegisterSize)); + __ Movsd(xmm0, Operand(rsp, kRegisterSize)); __ jmp(&return_result); // If result is valid, return that. __ bind(&valid_result); __ fstp_d(Operand(rsp, kRegisterSize)); - __ movsd(xmm0, Operand(rsp, kRegisterSize)); + __ Movsd(xmm0, Operand(rsp, kRegisterSize)); // Clean up FPU stack and exceptions and return xmm0 __ bind(&return_result); @@ -333,8 +333,7 @@ void ElementsTransitionGenerator::GenerateSmiToDouble( __ JumpIfNotSmi(rbx, &convert_hole); __ SmiToInteger32(rbx, rbx); __ Cvtlsi2sd(xmm0, rbx); - __ movsd(FieldOperand(r14, r9, times_8, FixedDoubleArray::kHeaderSize), - xmm0); + __ Movsd(FieldOperand(r14, r9, times_8, FixedDoubleArray::kHeaderSize), xmm0); __ jmp(&entry); __ bind(&convert_hole); @@ -604,20 +603,20 @@ void MathExpGenerator::EmitMathExp(MacroAssembler* masm, Label done; __ Move(kScratchRegister, ExternalReference::math_exp_constants(0)); - __ movsd(double_scratch, Operand(kScratchRegister, 0 * kDoubleSize)); + __ Movsd(double_scratch, Operand(kScratchRegister, 0 * kDoubleSize)); __ xorpd(result, result); __ ucomisd(double_scratch, input); __ j(above_equal, &done); __ ucomisd(input, Operand(kScratchRegister, 1 * kDoubleSize)); - __ movsd(result, Operand(kScratchRegister, 2 * kDoubleSize)); + __ Movsd(result, Operand(kScratchRegister, 2 * kDoubleSize)); __ j(above_equal, &done); - __ movsd(double_scratch, Operand(kScratchRegister, 3 * kDoubleSize)); - __ movsd(result, Operand(kScratchRegister, 4 * kDoubleSize)); + __ Movsd(double_scratch, Operand(kScratchRegister, 3 * kDoubleSize)); + __ Movsd(result, Operand(kScratchRegister, 4 * kDoubleSize)); __ mulsd(double_scratch, input); __ addsd(double_scratch, result); __ movq(temp2, double_scratch); __ subsd(double_scratch, result); - __ movsd(result, Operand(kScratchRegister, 6 * kDoubleSize)); + __ Movsd(result, Operand(kScratchRegister, 6 * kDoubleSize)); __ leaq(temp1, Operand(temp2, 0x1ff800)); __ andq(temp2, Immediate(0x7ff)); __ shrq(temp1, Immediate(11)); @@ -627,7 +626,7 @@ void MathExpGenerator::EmitMathExp(MacroAssembler* masm, __ orq(temp1, Operand(kScratchRegister, temp2, times_8, 0)); __ Move(kScratchRegister, ExternalReference::math_exp_constants(0)); __ subsd(double_scratch, input); - __ movsd(input, double_scratch); + __ Movsd(input, double_scratch); __ subsd(result, double_scratch); __ mulsd(input, double_scratch); __ mulsd(result, input); diff --git a/src/x64/deoptimizer-x64.cc b/src/x64/deoptimizer-x64.cc index 8ec123a5da..1e6e3dd77a 100644 --- a/src/x64/deoptimizer-x64.cc +++ b/src/x64/deoptimizer-x64.cc @@ -147,7 +147,7 @@ void Deoptimizer::TableEntryGenerator::Generate() { int code = config->GetAllocatableDoubleCode(i); XMMRegister xmm_reg = XMMRegister::from_code(code); int offset = code * kDoubleSize; - __ movsd(Operand(rsp, offset), xmm_reg); + __ Movsd(Operand(rsp, offset), xmm_reg); } // We push all registers onto the stack, even though we do not need @@ -280,7 +280,7 @@ void Deoptimizer::TableEntryGenerator::Generate() { int code = config->GetAllocatableDoubleCode(i); XMMRegister xmm_reg = XMMRegister::from_code(code); int src_offset = code * kDoubleSize + double_regs_offset; - __ movsd(xmm_reg, Operand(rbx, src_offset)); + __ Movsd(xmm_reg, Operand(rbx, src_offset)); } // Push state, pc, and continuation from the last output frame. diff --git a/src/x64/disasm-x64.cc b/src/x64/disasm-x64.cc index 5534887f5a..75c9bee846 100644 --- a/src/x64/disasm-x64.cc +++ b/src/x64/disasm-x64.cc @@ -988,6 +988,15 @@ int DisassemblerX64::AVXInstruction(byte* data) { int mod, regop, rm, vvvv = vex_vreg(); get_modrm(*current, &mod, ®op, &rm); switch (opcode) { + case 0x10: + AppendToBuffer("vmovsd %s,", NameOfXMMRegister(regop)); + current += PrintRightXMMOperand(current); + break; + case 0x11: + AppendToBuffer("vmovsd "); + current += PrintRightXMMOperand(current); + AppendToBuffer(",%s", NameOfXMMRegister(regop)); + break; case 0x58: AppendToBuffer("vaddsd %s,%s,", NameOfXMMRegister(regop), NameOfXMMRegister(vvvv)); @@ -1154,6 +1163,15 @@ int DisassemblerX64::AVXInstruction(byte* data) { int mod, regop, rm, vvvv = vex_vreg(); get_modrm(*current, &mod, ®op, &rm); switch (opcode) { + case 0x28: + AppendToBuffer("vmovapd %s,", NameOfXMMRegister(regop)); + current += PrintRightXMMOperand(current); + break; + case 0x29: + AppendToBuffer("vmovapd "); + current += PrintRightXMMOperand(current); + AppendToBuffer(",%s", NameOfXMMRegister(regop)); + break; case 0x2e: AppendToBuffer("vucomisd %s,", NameOfXMMRegister(regop)); current += PrintRightXMMOperand(current); diff --git a/src/x64/lithium-codegen-x64.cc b/src/x64/lithium-codegen-x64.cc index dfb18c7c4f..e39c6eaa81 100644 --- a/src/x64/lithium-codegen-x64.cc +++ b/src/x64/lithium-codegen-x64.cc @@ -88,7 +88,7 @@ void LCodeGen::SaveCallerDoubles() { BitVector* doubles = chunk()->allocated_double_registers(); BitVector::Iterator save_iterator(doubles); while (!save_iterator.Done()) { - __ movsd(MemOperand(rsp, count * kDoubleSize), + __ Movsd(MemOperand(rsp, count * kDoubleSize), XMMRegister::from_code(save_iterator.Current())); save_iterator.Advance(); count++; @@ -104,7 +104,7 @@ void LCodeGen::RestoreCallerDoubles() { BitVector::Iterator save_iterator(doubles); int count = 0; while (!save_iterator.Done()) { - __ movsd(XMMRegister::from_code(save_iterator.Current()), + __ Movsd(XMMRegister::from_code(save_iterator.Current()), MemOperand(rsp, count * kDoubleSize)); save_iterator.Advance(); count++; @@ -1991,7 +1991,7 @@ void LCodeGen::DoMathMinMax(LMathMinMax* instr) { __ ucomisd(left_reg, left_reg); // NaN check. __ j(parity_even, &return_left, Label::kNear); __ bind(&return_right); - __ movaps(left_reg, right_reg); + __ Movapd(left_reg, right_reg); __ bind(&return_left); } @@ -2040,16 +2040,16 @@ void LCodeGen::DoArithmeticD(LArithmeticD* instr) { } // Don't delete this mov. It may improve performance on some CPUs, // when there is a (v)mulsd depending on the result - __ movaps(result, result); + __ Movapd(result, result); break; case Token::MOD: { XMMRegister xmm_scratch = double_scratch0(); __ PrepareCallCFunction(2); - __ movaps(xmm_scratch, left); + __ Movapd(xmm_scratch, left); DCHECK(right.is(xmm1)); __ CallCFunction( ExternalReference::mod_two_doubles_operation(isolate()), 2); - __ movaps(result, xmm_scratch); + __ Movapd(result, xmm_scratch); break; } default: @@ -2391,7 +2391,7 @@ void LCodeGen::DoCmpHoleAndBranch(LCmpHoleAndBranch* instr) { EmitFalseBranch(instr, parity_odd); __ subp(rsp, Immediate(kDoubleSize)); - __ movsd(MemOperand(rsp, 0), input_reg); + __ Movsd(MemOperand(rsp, 0), input_reg); __ addp(rsp, Immediate(kDoubleSize)); int offset = sizeof(kHoleNanUpper32); @@ -2869,7 +2869,7 @@ void LCodeGen::DoLoadNamedField(LLoadNamedField* instr) { if (instr->hydrogen()->representation().IsDouble()) { DCHECK(access.IsInobject()); XMMRegister result = ToDoubleRegister(instr->result()); - __ movsd(result, FieldOperand(object, offset)); + __ Movsd(result, FieldOperand(object, offset)); return; } @@ -3002,7 +3002,7 @@ void LCodeGen::DoLoadKeyedExternalArray(LLoadKeyed* instr) { __ movss(result, operand); __ cvtss2sd(result, result); } else if (elements_kind == FLOAT64_ELEMENTS) { - __ movsd(ToDoubleRegister(instr->result()), operand); + __ Movsd(ToDoubleRegister(instr->result()), operand); } else { Register result(ToRegister(instr->result())); switch (elements_kind) { @@ -3073,7 +3073,7 @@ void LCodeGen::DoLoadKeyedFixedDoubleArray(LLoadKeyed* instr) { instr->hydrogen()->key()->representation(), FAST_DOUBLE_ELEMENTS, instr->base_offset()); - __ movsd(result, double_load_operand); + __ Movsd(result, double_load_operand); } @@ -3825,16 +3825,16 @@ void LCodeGen::DoMathLog(LMathLog* instr) { ExternalReference ninf = ExternalReference::address_of_negative_infinity(); Operand ninf_operand = masm()->ExternalOperand(ninf); - __ movsd(input_reg, ninf_operand); + __ Movsd(input_reg, ninf_operand); __ jmp(&done, Label::kNear); __ bind(&positive); __ fldln2(); __ subp(rsp, Immediate(kDoubleSize)); - __ movsd(Operand(rsp, 0), input_reg); + __ Movsd(Operand(rsp, 0), input_reg); __ fld_d(Operand(rsp, 0)); __ fyl2x(); __ fstp_d(Operand(rsp, 0)); - __ movsd(input_reg, Operand(rsp, 0)); + __ Movsd(input_reg, Operand(rsp, 0)); __ addp(rsp, Immediate(kDoubleSize)); __ bind(&done); } @@ -4025,7 +4025,7 @@ void LCodeGen::DoStoreNamedField(LStoreNamedField* instr) { DCHECK(!hinstr->has_transition()); DCHECK(!hinstr->NeedsWriteBarrier()); XMMRegister value = ToDoubleRegister(instr->value()); - __ movsd(FieldOperand(object, offset), value); + __ Movsd(FieldOperand(object, offset), value); return; } @@ -4073,7 +4073,7 @@ void LCodeGen::DoStoreNamedField(LStoreNamedField* instr) { if (FLAG_unbox_double_fields && representation.IsDouble()) { DCHECK(access.IsInobject()); XMMRegister value = ToDoubleRegister(instr->value()); - __ movsd(operand, value); + __ Movsd(operand, value); } else if (instr->value()->IsRegister()) { Register value = ToRegister(instr->value()); @@ -4246,7 +4246,7 @@ void LCodeGen::DoStoreKeyedExternalArray(LStoreKeyed* instr) { __ cvtsd2ss(value, value); __ movss(operand, value); } else if (elements_kind == FLOAT64_ELEMENTS) { - __ movsd(operand, ToDoubleRegister(instr->value())); + __ Movsd(operand, ToDoubleRegister(instr->value())); } else { Register value(ToRegister(instr->value())); switch (elements_kind) { @@ -4304,7 +4304,7 @@ void LCodeGen::DoStoreKeyedFixedDoubleArray(LStoreKeyed* instr) { FAST_DOUBLE_ELEMENTS, instr->base_offset()); - __ movsd(double_store_operand, value); + __ Movsd(double_store_operand, value); } @@ -4811,7 +4811,7 @@ void LCodeGen::DoDeferredNumberTagIU(LInstruction* instr, // Done. Put the value in temp_xmm into the value of the allocated heap // number. __ bind(&done); - __ movsd(FieldOperand(reg, HeapNumber::kValueOffset), temp_xmm); + __ Movsd(FieldOperand(reg, HeapNumber::kValueOffset), temp_xmm); } @@ -4838,7 +4838,7 @@ void LCodeGen::DoNumberTagD(LNumberTagD* instr) { __ jmp(deferred->entry()); } __ bind(deferred->exit()); - __ movsd(FieldOperand(reg, HeapNumber::kValueOffset), input_reg); + __ Movsd(FieldOperand(reg, HeapNumber::kValueOffset), input_reg); } @@ -4914,7 +4914,7 @@ void LCodeGen::EmitNumberUntagD(LNumberUntagD* instr, Register input_reg, // On x64 it is safe to load at heap number offset before evaluating the map // check, since all heap objects are at least two words long. - __ movsd(result_reg, FieldOperand(input_reg, HeapNumber::kValueOffset)); + __ Movsd(result_reg, FieldOperand(input_reg, HeapNumber::kValueOffset)); if (can_convert_undefined_to_nan) { __ j(not_equal, &convert, Label::kNear); @@ -4993,7 +4993,7 @@ void LCodeGen::DoDeferredTaggedToI(LTaggedToI* instr, Label* done) { __ CompareRoot(FieldOperand(input_reg, HeapObject::kMapOffset), Heap::kHeapNumberMapRootIndex); DeoptimizeIf(not_equal, instr, Deoptimizer::kNotAHeapNumber); - __ movsd(xmm0, FieldOperand(input_reg, HeapNumber::kValueOffset)); + __ Movsd(xmm0, FieldOperand(input_reg, HeapNumber::kValueOffset)); __ cvttsd2si(input_reg, xmm0); __ Cvtlsi2sd(scratch, input_reg); __ ucomisd(xmm0, scratch); @@ -5305,7 +5305,7 @@ void LCodeGen::DoClampTToUint8(LClampTToUint8* instr) { // Heap number __ bind(&heap_number); - __ movsd(xmm_scratch, FieldOperand(input_reg, HeapNumber::kValueOffset)); + __ Movsd(xmm_scratch, FieldOperand(input_reg, HeapNumber::kValueOffset)); __ ClampDoubleToUint8(xmm_scratch, temp_xmm_reg, input_reg); __ jmp(&done, Label::kNear); diff --git a/src/x64/lithium-gap-resolver-x64.cc b/src/x64/lithium-gap-resolver-x64.cc index 800fb3f61c..914e278685 100644 --- a/src/x64/lithium-gap-resolver-x64.cc +++ b/src/x64/lithium-gap-resolver-x64.cc @@ -214,16 +214,16 @@ void LGapResolver::EmitMove(int index) { __ movaps(cgen_->ToDoubleRegister(destination), src); } else { DCHECK(destination->IsDoubleStackSlot()); - __ movsd(cgen_->ToOperand(destination), src); + __ Movsd(cgen_->ToOperand(destination), src); } } else if (source->IsDoubleStackSlot()) { Operand src = cgen_->ToOperand(source); if (destination->IsDoubleRegister()) { - __ movsd(cgen_->ToDoubleRegister(destination), src); + __ Movsd(cgen_->ToDoubleRegister(destination), src); } else { DCHECK(destination->IsDoubleStackSlot()); - __ movsd(xmm0, src); - __ movsd(cgen_->ToOperand(destination), xmm0); + __ Movsd(xmm0, src); + __ Movsd(cgen_->ToOperand(destination), xmm0); } } else { UNREACHABLE(); @@ -261,9 +261,9 @@ void LGapResolver::EmitSwap(int index) { // Swap two stack slots or two double stack slots. Operand src = cgen_->ToOperand(source); Operand dst = cgen_->ToOperand(destination); - __ movsd(xmm0, src); + __ Movsd(xmm0, src); __ movp(kScratchRegister, dst); - __ movsd(dst, xmm0); + __ Movsd(dst, xmm0); __ movp(src, kScratchRegister); } else if (source->IsDoubleRegister() && destination->IsDoubleRegister()) { @@ -284,8 +284,8 @@ void LGapResolver::EmitSwap(int index) { LOperand* other = source->IsDoubleRegister() ? destination : source; DCHECK(other->IsDoubleStackSlot()); Operand other_operand = cgen_->ToOperand(other); - __ movsd(xmm0, other_operand); - __ movsd(other_operand, reg); + __ Movsd(xmm0, other_operand); + __ Movsd(other_operand, reg); __ movaps(reg, xmm0); } else { diff --git a/src/x64/macro-assembler-x64.cc b/src/x64/macro-assembler-x64.cc index bde32be47e..ad3827aba5 100644 --- a/src/x64/macro-assembler-x64.cc +++ b/src/x64/macro-assembler-x64.cc @@ -761,7 +761,7 @@ void MacroAssembler::PushCallerSaved(SaveFPRegsMode fp_mode, subp(rsp, Immediate(kDoubleSize * XMMRegister::kMaxNumRegisters)); for (int i = 0; i < XMMRegister::kMaxNumRegisters; i++) { XMMRegister reg = XMMRegister::from_code(i); - movsd(Operand(rsp, i * kDoubleSize), reg); + Movsd(Operand(rsp, i * kDoubleSize), reg); } } } @@ -774,7 +774,7 @@ void MacroAssembler::PopCallerSaved(SaveFPRegsMode fp_mode, if (fp_mode == kSaveFPRegs) { for (int i = 0; i < XMMRegister::kMaxNumRegisters; i++) { XMMRegister reg = XMMRegister::from_code(i); - movsd(reg, Operand(rsp, i * kDoubleSize)); + Movsd(reg, Operand(rsp, i * kDoubleSize)); } addp(rsp, Immediate(kDoubleSize * XMMRegister::kMaxNumRegisters)); } @@ -2437,6 +2437,46 @@ void MacroAssembler::Move(XMMRegister dst, uint64_t src) { } +void MacroAssembler::Movapd(XMMRegister dst, XMMRegister src) { + if (CpuFeatures::IsSupported(AVX)) { + CpuFeatureScope scope(this, AVX); + vmovapd(dst, src); + } else { + movapd(dst, src); + } +} + + +void MacroAssembler::Movsd(XMMRegister dst, XMMRegister src) { + if (CpuFeatures::IsSupported(AVX)) { + CpuFeatureScope scope(this, AVX); + vmovsd(dst, src); + } else { + movsd(dst, src); + } +} + + +void MacroAssembler::Movsd(XMMRegister dst, const Operand& src) { + if (CpuFeatures::IsSupported(AVX)) { + CpuFeatureScope scope(this, AVX); + vmovsd(dst, src); + } else { + movsd(dst, src); + } +} + + +void MacroAssembler::Movsd(const Operand& dst, XMMRegister src) { + if (CpuFeatures::IsSupported(AVX)) { + CpuFeatureScope scope(this, AVX); + vmovsd(dst, src); + } else { + movsd(dst, src); + } +} + + void MacroAssembler::Cmp(Register dst, Handle source) { AllowDeferredHandleDereference smi_check; if (source->IsSmi()) { @@ -3045,7 +3085,7 @@ void MacroAssembler::StoreNumberToDoubleElements( SmiToInteger32(kScratchRegister, maybe_number); Cvtlsi2sd(xmm_scratch, kScratchRegister); bind(&done); - movsd(FieldOperand(elements, index, times_8, + Movsd(FieldOperand(elements, index, times_8, FixedDoubleArray::kHeaderSize - elements_offset), xmm_scratch); } @@ -3125,7 +3165,7 @@ void MacroAssembler::SlowTruncateToI(Register result_reg, void MacroAssembler::TruncateHeapNumberToI(Register result_reg, Register input_reg) { Label done; - movsd(xmm0, FieldOperand(input_reg, HeapNumber::kValueOffset)); + Movsd(xmm0, FieldOperand(input_reg, HeapNumber::kValueOffset)); cvttsd2siq(result_reg, xmm0); cmpq(result_reg, Immediate(1)); j(no_overflow, &done, Label::kNear); @@ -3133,7 +3173,7 @@ void MacroAssembler::TruncateHeapNumberToI(Register result_reg, // Slow case. if (input_reg.is(result_reg)) { subp(rsp, Immediate(kDoubleSize)); - movsd(MemOperand(rsp, 0), xmm0); + Movsd(MemOperand(rsp, 0), xmm0); SlowTruncateToI(result_reg, rsp, 0); addp(rsp, Immediate(kDoubleSize)); } else { @@ -3154,7 +3194,7 @@ void MacroAssembler::TruncateDoubleToI(Register result_reg, j(no_overflow, &done, Label::kNear); subp(rsp, Immediate(kDoubleSize)); - movsd(MemOperand(rsp, 0), input_reg); + Movsd(MemOperand(rsp, 0), input_reg); SlowTruncateToI(result_reg, rsp, 0); addp(rsp, Immediate(kDoubleSize)); @@ -3720,7 +3760,7 @@ void MacroAssembler::EnterExitFrameEpilogue(int arg_stack_space, for (int i = 0; i < config->num_allocatable_double_registers(); ++i) { DoubleRegister reg = DoubleRegister::from_code(config->GetAllocatableDoubleCode(i)); - movsd(Operand(rbp, offset - ((i + 1) * kDoubleSize)), reg); + Movsd(Operand(rbp, offset - ((i + 1) * kDoubleSize)), reg); } } else if (arg_stack_space > 0) { subp(rsp, Immediate(arg_stack_space * kRegisterSize)); @@ -3766,7 +3806,7 @@ void MacroAssembler::LeaveExitFrame(bool save_doubles, bool pop_arguments) { for (int i = 0; i < config->num_allocatable_double_registers(); ++i) { DoubleRegister reg = DoubleRegister::from_code(config->GetAllocatableDoubleCode(i)); - movsd(reg, Operand(rbp, offset - ((i + 1) * kDoubleSize))); + Movsd(reg, Operand(rbp, offset - ((i + 1) * kDoubleSize))); } } diff --git a/src/x64/macro-assembler-x64.h b/src/x64/macro-assembler-x64.h index a7fd91cdd2..f2ecd237f2 100644 --- a/src/x64/macro-assembler-x64.h +++ b/src/x64/macro-assembler-x64.h @@ -894,6 +894,11 @@ class MacroAssembler: public Assembler { void Move(XMMRegister dst, float src) { Move(dst, bit_cast(src)); } void Move(XMMRegister dst, double src) { Move(dst, bit_cast(src)); } + void Movapd(XMMRegister dst, XMMRegister src); + void Movsd(XMMRegister dst, XMMRegister src); + void Movsd(XMMRegister dst, const Operand& src); + void Movsd(const Operand& dst, XMMRegister src); + // Control Flow void Jump(Address destination, RelocInfo::Mode rmode); void Jump(ExternalReference ext); diff --git a/test/cctest/test-assembler-x64.cc b/test/cctest/test-assembler-x64.cc index c067ac46f2..86acfc9c2f 100644 --- a/test/cctest/test-assembler-x64.cc +++ b/test/cctest/test-assembler-x64.cc @@ -1353,7 +1353,14 @@ TEST(AssemblerX64AVX_sd) { // arguments in xmm0, xmm1 and xmm2 __ movl(rax, Immediate(0)); - __ vmaxsd(xmm3, xmm0, xmm1); + __ vmaxsd(xmm4, xmm0, xmm1); + __ subq(rsp, Immediate(kDoubleSize * 2)); // For memory operand + __ vmovsd(Operand(rsp, kDoubleSize), xmm4); + __ vmovsd(xmm5, Operand(rsp, kDoubleSize)); + __ vmovsd(xmm6, xmm5); + __ vmovapd(xmm3, xmm6); + __ addq(rsp, Immediate(kDoubleSize * 2)); + __ vucomisd(xmm3, xmm1); __ j(parity_even, &exit); __ j(not_equal, &exit); diff --git a/test/cctest/test-code-stubs-x64.cc b/test/cctest/test-code-stubs-x64.cc index 8f22dd9fcf..9245d9d085 100644 --- a/test/cctest/test-code-stubs-x64.cc +++ b/test/cctest/test-code-stubs-x64.cc @@ -88,7 +88,7 @@ ConvertDToIFunc MakeConvertDToIFuncTrampoline(Isolate* isolate, // Put the double argument into the designated double argument slot. __ subq(rsp, Immediate(kDoubleSize)); - __ movsd(MemOperand(rsp, 0), xmm0); + __ Movsd(MemOperand(rsp, 0), xmm0); // Call through to the actual stub __ Call(start, RelocInfo::EXTERNAL_REFERENCE); diff --git a/test/cctest/test-disasm-x64.cc b/test/cctest/test-disasm-x64.cc index 980f5d5b0f..c82cc49c10 100644 --- a/test/cctest/test-disasm-x64.cc +++ b/test/cctest/test-disasm-x64.cc @@ -511,6 +511,11 @@ TEST(DisasmX64) { __ vucomiss(xmm9, xmm1); __ vucomiss(xmm8, Operand(rbx, rdx, times_2, 10981)); + __ vmovapd(xmm7, xmm0); + __ vmovsd(xmm6, xmm2); + __ vmovsd(xmm9, Operand(rbx, rcx, times_4, 10000)); + __ vmovsd(Operand(rbx, rcx, times_4, 10000), xmm0); + __ vaddsd(xmm0, xmm1, xmm2); __ vaddsd(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000)); __ vmulsd(xmm0, xmm1, xmm2);