[x64] Replace addsd, subsd, mulsd, divsd with AVX versions under AVX.

BUG=v8:4406 LOG=N Review URL: https://codereview.chromium.org/1416053010 Cr-Commit-Position: refs/heads/master@{#31498}
2015-10-23 01:25:24 -07:00 · 2015-10-23 01:25:24 -07:00 · a96b87fad9
commit a96b87fad9
parent f5c227b27d
5 changed files with 60 additions and 94 deletions
--- a/src/crankshaft/x64/lithium-codegen-x64.cc
+++ b/src/crankshaft/x64/lithium-codegen-x64.cc
@ -3565,8 +3565,8 @@ void LCodeGen::DoMathAbs(LMathAbs* instr) {
    XMMRegister scratch = double_scratch0();
    XMMRegister input_reg = ToDoubleRegister(instr->value());
    __ Xorpd(scratch, scratch);
-    __ subsd(scratch, input_reg);
-    __ andps(input_reg, scratch);
+    __ Subsd(scratch, input_reg);
+    __ Andpd(input_reg, scratch);
  } else if (r.IsInteger32()) {
    EmitIntegerMathAbs(instr);
  } else if (r.IsSmi()) {
@ -3658,7 +3658,7 @@ void LCodeGen::DoMathRound(LMathRound* instr) {
  __ j(above, &below_one_half, Label::kNear);

  // CVTTSD2SI rounds towards zero, since 0.5 <= x, we use floor(0.5 + x).
-  __ addsd(xmm_scratch, input_reg);
+  __ Addsd(xmm_scratch, input_reg);
  __ Cvttsd2si(output_reg, xmm_scratch);
  // Overflow is signalled with minint.
  __ cmpl(output_reg, Immediate(0x1));
@ -3674,7 +3674,7 @@ void LCodeGen::DoMathRound(LMathRound* instr) {
  // CVTTSD2SI rounds towards zero, we use ceil(x - (-0.5)) and then
  // compare and compensate.
  __ Movapd(input_temp, input_reg);  // Do not alter input_reg.
-  __ subsd(input_temp, xmm_scratch);
+  __ Subsd(input_temp, xmm_scratch);
  __ Cvttsd2si(output_reg, input_temp);
  // Catch minint due to overflow, and to prevent overflow when compensating.
  __ cmpl(output_reg, Immediate(0x1));
@ -3740,13 +3740,13 @@ void LCodeGen::DoMathPowHalf(LMathPowHalf* instr) {
  __ j(carry, &sqrt, Label::kNear);
  // If input is -Infinity, return Infinity.
  __ Xorpd(input_reg, input_reg);
-  __ subsd(input_reg, xmm_scratch);
+  __ Subsd(input_reg, xmm_scratch);
  __ jmp(&done, Label::kNear);

  // Square root.
  __ bind(&sqrt);
  __ Xorpd(xmm_scratch, xmm_scratch);
-  __ addsd(input_reg, xmm_scratch);  // Convert -0 to +0.
+  __ Addsd(input_reg, xmm_scratch);  // Convert -0 to +0.
  __ Sqrtsd(input_reg, input_reg);
  __ bind(&done);
 }
@ -4259,7 +4259,7 @@ void LCodeGen::DoStoreKeyedFixedDoubleArray(LStoreKeyed* instr) {
    XMMRegister xmm_scratch = double_scratch0();
    // Turn potential sNaN value into qNaN.
    __ Xorpd(xmm_scratch, xmm_scratch);
-    __ subsd(value, xmm_scratch);
+    __ Subsd(value, xmm_scratch);
  }

  Operand double_store_operand = BuildFastArrayOperand(
--- a/src/x64/code-stubs-x64.cc
+++ b/src/x64/code-stubs-x64.cc
@ -356,20 +356,20 @@ void MathPowStub::Generate(MacroAssembler* masm) {

      // Set result to Infinity in the special case.
      __ Xorpd(double_result, double_result);
-      __ subsd(double_result, double_scratch);
+      __ Subsd(double_result, double_scratch);
      __ jmp(&done);

      __ bind(&continue_sqrt);
      // sqrtsd returns -0 when input is -0.  ECMA spec requires +0.
      __ Xorpd(double_scratch, double_scratch);
-      __ addsd(double_scratch, double_base);  // Convert -0 to 0.
+      __ Addsd(double_scratch, double_base);  // Convert -0 to 0.
      __ Sqrtsd(double_result, double_scratch);
      __ jmp(&done);

      // Test for -0.5.
      __ bind(&not_plus_half);
      // Load double_scratch with -0.5 by substracting 1.
-      __ subsd(double_scratch, double_result);
+      __ Subsd(double_scratch, double_result);
      // Already ruled out NaNs for exponent.
      __ Ucomisd(double_scratch, double_exponent);
      __ j(not_equal, &fast_power, Label::kNear);
@ -393,9 +393,9 @@ void MathPowStub::Generate(MacroAssembler* masm) {
      __ bind(&continue_rsqrt);
      // sqrtsd returns -0 when input is -0.  ECMA spec requires +0.
      __ Xorpd(double_exponent, double_exponent);
-      __ addsd(double_exponent, double_base);  // Convert -0 to +0.
+      __ Addsd(double_exponent, double_base);  // Convert -0 to +0.
      __ Sqrtsd(double_exponent, double_exponent);
-      __ divsd(double_result, double_exponent);
+      __ Divsd(double_result, double_exponent);
      __ jmp(&done);
    }

@ -465,16 +465,16 @@ void MathPowStub::Generate(MacroAssembler* masm) {

  __ bind(&while_true);
  __ shrl(scratch, Immediate(1));
-  __ mulsd(double_scratch, double_scratch);
+  __ Mulsd(double_scratch, double_scratch);
  __ j(above, &while_true, Label::kNear);
-  __ mulsd(double_result, double_scratch);
+  __ Mulsd(double_result, double_scratch);
  __ j(not_zero, &while_true);

  __ bind(&while_false);
  // If the exponent is negative, return 1/result.
  __ testl(exponent, exponent);
  __ j(greater, &done);
-  __ divsd(double_scratch2, double_result);
+  __ Divsd(double_scratch2, double_result);
  __ Movsd(double_result, double_scratch2);
  // Test whether result is zero.  Bail out to check for subnormal result.
  // Due to subnormals, x^-y == (1/x)^y does not hold in all cases.
--- a/src/x64/codegen-x64.cc
+++ b/src/x64/codegen-x64.cc
@ -612,29 +612,29 @@ void MathExpGenerator::EmitMathExp(MacroAssembler* masm,
  __ j(above_equal, &done);
  __ Movsd(double_scratch, Operand(kScratchRegister, 3 * kDoubleSize));
  __ Movsd(result, Operand(kScratchRegister, 4 * kDoubleSize));
-  __ mulsd(double_scratch, input);
-  __ addsd(double_scratch, result);
+  __ Mulsd(double_scratch, input);
+  __ Addsd(double_scratch, result);
  __ Movq(temp2, double_scratch);
-  __ subsd(double_scratch, result);
+  __ Subsd(double_scratch, result);
  __ Movsd(result, Operand(kScratchRegister, 6 * kDoubleSize));
  __ leaq(temp1, Operand(temp2, 0x1ff800));
  __ andq(temp2, Immediate(0x7ff));
  __ shrq(temp1, Immediate(11));
-  __ mulsd(double_scratch, Operand(kScratchRegister, 5 * kDoubleSize));
+  __ Mulsd(double_scratch, Operand(kScratchRegister, 5 * kDoubleSize));
  __ Move(kScratchRegister, ExternalReference::math_exp_log_table());
  __ shlq(temp1, Immediate(52));
  __ orq(temp1, Operand(kScratchRegister, temp2, times_8, 0));
  __ Move(kScratchRegister, ExternalReference::math_exp_constants(0));
-  __ subsd(double_scratch, input);
+  __ Subsd(double_scratch, input);
  __ Movsd(input, double_scratch);
-  __ subsd(result, double_scratch);
-  __ mulsd(input, double_scratch);
-  __ mulsd(result, input);
+  __ Subsd(result, double_scratch);
+  __ Mulsd(input, double_scratch);
+  __ Mulsd(result, input);
  __ Movq(input, temp1);
-  __ mulsd(result, Operand(kScratchRegister, 7 * kDoubleSize));
-  __ subsd(result, double_scratch);
-  __ addsd(result, Operand(kScratchRegister, 8 * kDoubleSize));
-  __ mulsd(result, input);
+  __ Mulsd(result, Operand(kScratchRegister, 7 * kDoubleSize));
+  __ Subsd(result, double_scratch);
+  __ Addsd(result, Operand(kScratchRegister, 8 * kDoubleSize));
+  __ Mulsd(result, input);

  __ bind(&done);
 }
--- a/src/x64/macro-assembler-x64.cc
+++ b/src/x64/macro-assembler-x64.cc
@ -2740,66 +2740,6 @@ void MacroAssembler::Ucomisd(XMMRegister src1, const Operand& src2) {
 }


-void MacroAssembler::Andpd(XMMRegister dst, XMMRegister src) {
-  if (CpuFeatures::IsSupported(AVX)) {
-    CpuFeatureScope scope(this, AVX);
-    vandpd(dst, dst, src);
-  } else {
-    andpd(dst, src);
-  }
-}
-
-
-void MacroAssembler::Orpd(XMMRegister dst, XMMRegister src) {
-  if (CpuFeatures::IsSupported(AVX)) {
-    CpuFeatureScope scope(this, AVX);
-    vorpd(dst, dst, src);
-  } else {
-    orpd(dst, src);
-  }
-}
-
-
-void MacroAssembler::Xorpd(XMMRegister dst, XMMRegister src) {
-  if (CpuFeatures::IsSupported(AVX)) {
-    CpuFeatureScope scope(this, AVX);
-    vxorpd(dst, dst, src);
-  } else {
-    xorpd(dst, src);
-  }
-}
-
-
-void MacroAssembler::Pcmpeqd(XMMRegister dst, XMMRegister src) {
-  if (CpuFeatures::IsSupported(AVX)) {
-    CpuFeatureScope scope(this, AVX);
-    vpcmpeqd(dst, dst, src);
-  } else {
-    pcmpeqd(dst, src);
-  }
-}
-
-
-void MacroAssembler::Psllq(XMMRegister dst, byte imm8) {
-  if (CpuFeatures::IsSupported(AVX)) {
-    CpuFeatureScope scope(this, AVX);
-    vpsllq(dst, dst, imm8);
-  } else {
-    psllq(dst, imm8);
-  }
-}
-
-
-void MacroAssembler::Psrlq(XMMRegister dst, byte imm8) {
-  if (CpuFeatures::IsSupported(AVX)) {
-    CpuFeatureScope scope(this, AVX);
-    vpsrlq(dst, dst, imm8);
-  } else {
-    psrlq(dst, imm8);
-  }
-}
-
-
 void MacroAssembler::Cmp(Register dst, Handle<Object> source) {
  AllowDeferredHandleDereference smi_check;
  if (source->IsSmi()) {
--- a/src/x64/macro-assembler-x64.h
+++ b/src/x64/macro-assembler-x64.h
@ -904,6 +904,39 @@ class MacroAssembler: public Assembler {
  void Move(XMMRegister dst, float src) { Move(dst, bit_cast<uint32_t>(src)); }
  void Move(XMMRegister dst, double src) { Move(dst, bit_cast<uint64_t>(src)); }

+#define AVX_OP2_WITH_TYPE(macro_name, name, src_type) \
+  void macro_name(XMMRegister dst, src_type src) {    \
+    if (CpuFeatures::IsSupported(AVX)) {              \
+      CpuFeatureScope scope(this, AVX);               \
+      v##name(dst, dst, src);                         \
+    } else {                                          \
+      name(dst, src);                                 \
+    }                                                 \
+  }
+#define AVX_OP2_X(macro_name, name) \
+  AVX_OP2_WITH_TYPE(macro_name, name, XMMRegister)
+#define AVX_OP2_O(macro_name, name) \
+  AVX_OP2_WITH_TYPE(macro_name, name, const Operand&)
+#define AVX_OP2_XO(macro_name, name) \
+  AVX_OP2_X(macro_name, name)        \
+  AVX_OP2_O(macro_name, name)
+
+  AVX_OP2_XO(Addsd, addsd)
+  AVX_OP2_XO(Subsd, subsd)
+  AVX_OP2_XO(Mulsd, mulsd)
+  AVX_OP2_XO(Divsd, divsd)
+  AVX_OP2_X(Andpd, andpd)
+  AVX_OP2_X(Orpd, orpd)
+  AVX_OP2_X(Xorpd, xorpd)
+  AVX_OP2_X(Pcmpeqd, pcmpeqd)
+  AVX_OP2_WITH_TYPE(Psllq, psllq, byte)
+  AVX_OP2_WITH_TYPE(Psrlq, psrlq, byte)
+
+#undef AVX_OP2_O
+#undef AVX_OP2_X
+#undef AVX_OP2_XO
+#undef AVX_OP2_WITH_TYPE
+
  void Movsd(XMMRegister dst, XMMRegister src);
  void Movsd(XMMRegister dst, const Operand& src);
  void Movsd(const Operand& dst, XMMRegister src);
@ -930,13 +963,6 @@ class MacroAssembler: public Assembler {
  void Ucomisd(XMMRegister src1, XMMRegister src2);
  void Ucomisd(XMMRegister src1, const Operand& src2);

-  void Andpd(XMMRegister dst, XMMRegister src);
-  void Orpd(XMMRegister dst, XMMRegister src);
-  void Xorpd(XMMRegister dst, XMMRegister src);
-  void Pcmpeqd(XMMRegister dst, XMMRegister src);
-  void Psllq(XMMRegister dst, byte imm8);
-  void Psrlq(XMMRegister dst, byte imm8);
-
  // Control Flow
  void Jump(Address destination, RelocInfo::Mode rmode);
  void Jump(ExternalReference ext);