Porting Math.pow changes to x64.

TEST=math-pow.js, regress-397.js Review URL: http://codereview.chromium.org/8821019 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@10185 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
2011-12-07 08:34:27 +00:00 · 2011-12-07 08:34:27 +00:00 · c9c9ea676b
commit c9c9ea676b
parent b5b91b5add
9 changed files with 401 additions and 272 deletions
--- a/src/ia32/code-stubs-ia32.cc
+++ b/src/ia32/code-stubs-ia32.cc
@ -2940,64 +2940,72 @@ void FloatingPointHelper::CheckFloatOperandsAreInt32(MacroAssembler* masm,
 void MathPowStub::Generate(MacroAssembler* masm) {
  CpuFeatures::Scope use_sse2(SSE2);
  Factory* factory = masm->isolate()->factory();
+  const Register exponent = eax;
+  const Register base = edx;
+  const Register scratch = ecx;
+  const XMMRegister double_result = xmm3;
+  const XMMRegister double_base = xmm2;
+  const XMMRegister double_exponent = xmm1;
+  const XMMRegister double_scratch = xmm4;
+
  Label double_int_runtime, generic_runtime, done;
-  Label base_is_smi, unpack_exponent, exponent_not_smi, int_exponent;
-  // Save 1 in xmm3 - we need this several times later on.
-  __ mov(ecx, Immediate(1));
-  __ cvtsi2sd(xmm3, ecx);
+  Label exponent_not_smi, int_exponent;
+
+  // Save 1 in double_result - we need this several times later on.
+  __ mov(scratch, Immediate(1));
+  __ cvtsi2sd(double_result, scratch);

  if (exponent_type_ == ON_STACK) {
-    // The exponent (and base) are supplied as arguments on the stack.
+    Label base_is_smi, unpack_exponent;
+    // The exponent and base are supplied as arguments on the stack.
    // This can only happen if the stub is called from non-optimized code.
-    // Load input parameters from stack
-    __ mov(edx, Operand(esp, 2 * kPointerSize));
-    __ mov(eax, Operand(esp, 1 * kPointerSize));
-    // edx: base (smi or heap number)
-    // eax: exponent (smi or heap number)
-    __ JumpIfSmi(edx, &base_is_smi, Label::kNear);
-    __ cmp(FieldOperand(edx, HeapObject::kMapOffset),
+    // Load input parameters from stack.
+    __ mov(base, Operand(esp, 2 * kPointerSize));
+    __ mov(exponent, Operand(esp, 1 * kPointerSize));
+
+    __ JumpIfSmi(base, &base_is_smi, Label::kNear);
+    __ cmp(FieldOperand(base, HeapObject::kMapOffset),
           factory->heap_number_map());
    __ j(not_equal, &generic_runtime);

-    __ movdbl(xmm1, FieldOperand(edx, HeapNumber::kValueOffset));
+    __ movdbl(double_base, FieldOperand(base, HeapNumber::kValueOffset));
    __ jmp(&unpack_exponent, Label::kNear);

    __ bind(&base_is_smi);
-    __ SmiUntag(edx);
-    __ cvtsi2sd(xmm1, edx);
-    __ bind(&unpack_exponent);
+    __ SmiUntag(base);
+    __ cvtsi2sd(double_base, base);

-    __ JumpIfNotSmi(eax, &exponent_not_smi, Label::kNear);
-    __ SmiUntag(eax);
+    __ bind(&unpack_exponent);
+    __ JumpIfNotSmi(exponent, &exponent_not_smi, Label::kNear);
+    __ SmiUntag(exponent);
    __ jmp(&int_exponent);

    __ bind(&exponent_not_smi);
-    __ cmp(FieldOperand(eax, HeapObject::kMapOffset),
+    __ cmp(FieldOperand(exponent, HeapObject::kMapOffset),
           factory->heap_number_map());
    __ j(not_equal, &generic_runtime);
-    __ movdbl(xmm2, FieldOperand(eax, HeapNumber::kValueOffset));
+    __ movdbl(double_exponent,
+              FieldOperand(exponent, HeapNumber::kValueOffset));
  } else if (exponent_type_ == TAGGED) {
-    // xmm1: base as double
-    // eax: exponent (smi or heap number)
-    __ JumpIfNotSmi(eax, &exponent_not_smi, Label::kNear);
-    __ SmiUntag(eax);
+    __ JumpIfNotSmi(exponent, &exponent_not_smi, Label::kNear);
+    __ SmiUntag(exponent);
    __ jmp(&int_exponent);

    __ bind(&exponent_not_smi);
-    __ movdbl(xmm2, FieldOperand(eax, HeapNumber::kValueOffset));
+    __ movdbl(double_exponent,
+              FieldOperand(exponent, HeapNumber::kValueOffset));
  }

  if (exponent_type_ != INTEGER) {
    Label fast_power;
-    // xmm1: base as double that is not +/- Infinity or NaN
-    // xmm2: exponent as double
    // Detect integer exponents stored as double.
-    __ cvttsd2si(eax, Operand(xmm2));
+    __ cvttsd2si(exponent, Operand(double_exponent));
    // Skip to runtime if possibly NaN (indicated by the indefinite integer).
-    __ cmp(eax, Immediate(0x80000000u));
+    __ cmp(exponent, Immediate(0x80000000u));
    __ j(equal, &generic_runtime);
-    __ cvtsi2sd(xmm4, eax);
-    __ ucomisd(xmm2, xmm4);  // Already ruled out NaNs for exponent.
+    __ cvtsi2sd(double_scratch, exponent);
+    // Already ruled out NaNs for exponent.
+    __ ucomisd(double_exponent, double_scratch);
    __ j(equal, &int_exponent);

    if (exponent_type_ == ON_STACK) {
@ -3006,71 +3014,70 @@ void MathPowStub::Generate(MacroAssembler* masm) {
      // for non-constant cases of +/-0.5 as these hardly occur.
      Label continue_sqrt, continue_rsqrt, not_plus_half;
      // Test for 0.5.
-      // Load xmm4 with 0.5.
-      __ mov(ecx, Immediate(0x3F000000u));
-      __ movd(xmm4, ecx);
-      __ cvtss2sd(xmm4, xmm4);
-      // xmm4 now has 0.5.
-      __ ucomisd(xmm4, xmm2);  // Already ruled out NaNs for exponent.
+      // Load double_scratch with 0.5.
+      __ mov(scratch, Immediate(0x3F000000u));
+      __ movd(double_scratch, scratch);
+      __ cvtss2sd(double_scratch, double_scratch);
+      // Already ruled out NaNs for exponent.
+      __ ucomisd(double_scratch, double_exponent);
      __ j(not_equal, &not_plus_half, Label::kNear);

      // Calculates square root of base.  Check for the special case of
      // Math.pow(-Infinity, 0.5) == Infinity (ECMA spec, 15.8.2.13).
      // According to IEEE-754, single-precision -Infinity has the highest
      // 9 bits set and the lowest 23 bits cleared.
-      __ mov(ecx, 0xFF800000u);
-      __ movd(xmm4, ecx);
-      __ cvtss2sd(xmm4, xmm4);
-      __ ucomisd(xmm1, xmm4);
+      __ mov(scratch, 0xFF800000u);
+      __ movd(double_scratch, scratch);
+      __ cvtss2sd(double_scratch, double_scratch);
+      __ ucomisd(double_base, double_scratch);
      // Comparing -Infinity with NaN results in "unordered", which sets the
      // zero flag as if both were equal.  However, it also sets the carry flag.
      __ j(not_equal, &continue_sqrt, Label::kNear);
      __ j(carry, &continue_sqrt, Label::kNear);

      // Set result to Infinity in the special case.
-      __ xorps(xmm3, xmm3);
-      __ subsd(xmm3, xmm4);
+      __ xorps(double_result, double_result);
+      __ subsd(double_result, double_scratch);
      __ jmp(&done);

      __ bind(&continue_sqrt);
      // sqrtsd returns -0 when input is -0.  ECMA spec requires +0.
-      __ xorps(xmm4, xmm4);
-      __ addsd(xmm4, xmm1);  // Convert -0 to +0.
-      __ sqrtsd(xmm3, xmm4);
+      __ xorps(double_scratch, double_scratch);
+      __ addsd(double_scratch, double_base);  // Convert -0 to +0.
+      __ sqrtsd(double_result, double_scratch);
      __ jmp(&done);

      // Test for -0.5.
      __ bind(&not_plus_half);
-      // Load xmm2 with -0.5.
-      // Since xmm3 is 1 and xmm4 is 0.5 this is simply xmm4 - xmm3.
-      __ subsd(xmm4, xmm3);
-      // xmm4 now has -0.5.
-      __ ucomisd(xmm4, xmm2);  // Already ruled out NaNs for exponent.
+      // Load double_exponent with -0.5 by substracting 1.
+      __ subsd(double_scratch, double_result);
+      // Already ruled out NaNs for exponent.
+      __ ucomisd(double_scratch, double_exponent);
      __ j(not_equal, &fast_power, Label::kNear);

      // Calculates reciprocal of square root of base.  Check for the special
      // case of Math.pow(-Infinity, -0.5) == 0 (ECMA spec, 15.8.2.13).
      // According to IEEE-754, single-precision -Infinity has the highest
      // 9 bits set and the lowest 23 bits cleared.
-      __ mov(ecx, 0xFF800000u);
-      __ movd(xmm4, ecx);
-      __ cvtss2sd(xmm4, xmm4);
-      __ ucomisd(xmm1, xmm4);
+      __ mov(scratch, 0xFF800000u);
+      __ movd(double_scratch, scratch);
+      __ cvtss2sd(double_scratch, double_scratch);
+      __ ucomisd(double_base, double_scratch);
      // Comparing -Infinity with NaN results in "unordered", which sets the
      // zero flag as if both were equal.  However, it also sets the carry flag.
      __ j(not_equal, &continue_rsqrt, Label::kNear);
      __ j(carry, &continue_rsqrt, Label::kNear);

      // Set result to 0 in the special case.
-      __ xorps(xmm3, xmm3);
+      __ xorps(double_result, double_result);
      __ jmp(&done);

      __ bind(&continue_rsqrt);
      // sqrtsd returns -0 when input is -0.  ECMA spec requires +0.
-      __ xorps(xmm2, xmm2);
-      __ addsd(xmm2, xmm1);  // Convert -0 to +0.
-      __ sqrtsd(xmm2, xmm2);
-      __ divsd(xmm3, xmm2);
+      __ xorps(double_exponent, double_exponent);
+      __ addsd(double_exponent, double_base);  // Convert -0 to +0.
+      __ sqrtsd(double_exponent, double_exponent);
+      __ divsd(double_result, double_exponent);
      __ jmp(&done);
    }

@ -3080,9 +3087,9 @@ void MathPowStub::Generate(MacroAssembler* masm) {
    __ fnclex();  // Clear flags to catch exceptions later.
    // Transfer (B)ase and (E)xponent onto the FPU register stack.
    __ sub(esp, Immediate(kDoubleSize));
-    __ movdbl(Operand(esp, 0), xmm2);
+    __ movdbl(Operand(esp, 0), double_exponent);
    __ fld_d(Operand(esp, 0));  // E
-    __ movdbl(Operand(esp, 0), xmm1);
+    __ movdbl(Operand(esp, 0), double_base);
    __ fld_d(Operand(esp, 0));  // B, E

    // Exponent is in st(1) and base is in st(0)
@ -3105,7 +3112,7 @@ void MathPowStub::Generate(MacroAssembler* masm) {
    __ test_b(eax, 0x5F);  // We check for all but precision exception.
    __ j(not_zero, &fast_power_failed, Label::kNear);
    __ fstp_d(Operand(esp, 0));
-    __ movdbl(xmm3, Operand(esp, 0));
+    __ movdbl(double_result, Operand(esp, 0));
    __ add(esp, Immediate(kDoubleSize));
    __ jmp(&done);

@ -3117,49 +3124,46 @@ void MathPowStub::Generate(MacroAssembler* masm) {

  // Calculate power with integer exponent.
  __ bind(&int_exponent);
-  // xmm1: base as double that is not +/- Infinity or NaN
-  // eax: exponent as untagged integer
-  __ mov(ecx, eax);      // Back up exponent.
-  __ movsd(xmm4, xmm1);  // Back up base.
-  __ movsd(xmm2, xmm3);  // Load xmm2 with 1.
-
+  const XMMRegister double_scratch2 = double_exponent;
+  __ mov(scratch, exponent);      // Back up exponent.
+  __ movsd(double_scratch, double_base);  // Back up base.
+  __ movsd(double_scratch2, double_result);  // Load double_exponent with 1.

  // Get absolute value of exponent.
  Label no_neg, while_true, no_multiply;
-  __ cmp(eax, 0);
+  __ cmp(exponent, 0);
  __ j(greater_equal, &no_neg, Label::kNear);
-  __ neg(eax);
+  __ neg(exponent);
  __ bind(&no_neg);

  __ bind(&while_true);
-  __ shr(eax, 1);
+  __ shr(exponent, 1);
  __ j(not_carry, &no_multiply, Label::kNear);
-  __ mulsd(xmm3, xmm1);
+  __ mulsd(double_result, double_base);
  __ bind(&no_multiply);

-  __ mulsd(xmm1, xmm1);
+  __ mulsd(double_base, double_base);
  __ j(not_zero, &while_true);

-  // base has the original value of the exponent - if the exponent  is
-  // negative return 1/result.
-  __ test(ecx, ecx);
+  // scratch has the original value of the exponent - if the exponent is
+  // negative, return 1/result.
+  __ test(scratch, scratch);
  __ j(positive, &done);
-  __ divsd(xmm2, xmm3);
-  __ movsd(xmm3, xmm2);
+  __ divsd(double_scratch2, double_result);
+  __ movsd(double_result, double_scratch2);
  // Test whether result is zero.  Bail out to check for subnormal result.
  // Due to subnormals, x^-y == (1/x)^y does not hold in all cases.
-  __ xorps(xmm2, xmm2);
-  __ ucomisd(xmm2, xmm3);  // Result cannot be NaN.
+  __ xorps(double_scratch2, double_scratch2);
+  __ ucomisd(double_scratch2, double_result);  // Result cannot be NaN.
  __ j(equal, &double_int_runtime);

  // Returning or bailing out.
  if (exponent_type_ == ON_STACK) {
    // The stub is called from non-optimized code, which expects the result
-    // as heap number in eax.
+    // as heap number in exponent.
    __ bind(&done);
-    // xmm3: result
-    __ AllocateHeapNumber(eax, ecx, edx, &generic_runtime);
-    __ movdbl(FieldOperand(eax, HeapNumber::kValueOffset), xmm3);
+    __ AllocateHeapNumber(exponent, scratch, base, &generic_runtime);
+    __ movdbl(FieldOperand(exponent, HeapNumber::kValueOffset), double_result);
    __ ret(2 * kPointerSize);

    // The arguments are still on the stack.
@ -3170,28 +3174,23 @@ void MathPowStub::Generate(MacroAssembler* masm) {
    __ jmp(&done);

    Label return_from_runtime;
-    StubRuntimeCallHelper callhelper;
    __ bind(&generic_runtime);
-    // xmm1: base
-    // xmm2: exponent
    {
      AllowExternalCallThatCantCauseGC scope(masm);
-      __ PrepareCallCFunction(4, eax);
-      __ movdbl(Operand(esp, 0 * kDoubleSize), xmm1);
-      __ movdbl(Operand(esp, 1 * kDoubleSize), xmm2);
+      __ PrepareCallCFunction(4, exponent);
+      __ movdbl(Operand(esp, 0 * kDoubleSize), double_base);
+      __ movdbl(Operand(esp, 1 * kDoubleSize), double_exponent);
      __ CallCFunction(
          ExternalReference::power_double_double_function(masm->isolate()), 4);
    }
    __ jmp(&return_from_runtime, Label::kNear);

    __ bind(&double_int_runtime);
-    // xmm4: base
-    // ecx: exponent
    {
      AllowExternalCallThatCantCauseGC scope(masm);
-      __ PrepareCallCFunction(4, eax);
-      __ movdbl(Operand(esp, 0 * kDoubleSize), xmm4);
-      __ mov(Operand(esp, 1 * kDoubleSize), ecx);
+      __ PrepareCallCFunction(4, exponent);
+      __ movdbl(Operand(esp, 0 * kDoubleSize), double_scratch);
+      __ mov(Operand(esp, 1 * kDoubleSize), scratch);
      __ CallCFunction(
          ExternalReference::power_double_int_function(masm->isolate()), 4);
    }
@ -3201,10 +3200,9 @@ void MathPowStub::Generate(MacroAssembler* masm) {
    // Store it into the (fixed) result register.
    __ sub(esp, Immediate(kDoubleSize));
    __ fstp_d(Operand(esp, 0));
-    __ movdbl(xmm3, Operand(esp, 0));
+    __ movdbl(double_result, Operand(esp, 0));
    __ add(esp, Immediate(kDoubleSize));

-    // xmm3: result
    __ bind(&done);
    __ ret(0);
  }
--- a/src/ia32/lithium-codegen-ia32.cc
+++ b/src/ia32/lithium-codegen-ia32.cc
@ -2991,12 +2991,12 @@ void LCodeGen::DoMathPowHalf(LMathPowHalf* instr) {
 void LCodeGen::DoPower(LPower* instr) {
  Representation exponent_type = instr->hydrogen()->right()->representation();
  // Having marked this as a call, we can use any registers.
-  // Just make sure that the input registers are the expected ones.
+  // Just make sure that the input/output registers are the expected ones.
  ASSERT(!instr->InputAt(1)->IsDoubleRegister() ||
-         ToDoubleRegister(instr->InputAt(1)).is(xmm2));
+         ToDoubleRegister(instr->InputAt(1)).is(xmm1));
  ASSERT(!instr->InputAt(1)->IsRegister() ||
         ToRegister(instr->InputAt(1)).is(eax));
-  ASSERT(ToDoubleRegister(instr->InputAt(0)).is(xmm1));
+  ASSERT(ToDoubleRegister(instr->InputAt(0)).is(xmm2));
  ASSERT(ToDoubleRegister(instr->result()).is(xmm3));

  if (exponent_type.IsTagged()) {
--- a/src/ia32/lithium-ia32.cc
+++ b/src/ia32/lithium-ia32.cc
@ -1446,9 +1446,9 @@ LInstruction* LChunkBuilder::DoPower(HPower* instr) {
  // We need to use fixed result register for the call.
  Representation exponent_type = instr->right()->representation();
  ASSERT(instr->left()->representation().IsDouble());
-  LOperand* left = UseFixedDouble(instr->left(), xmm1);
+  LOperand* left = UseFixedDouble(instr->left(), xmm2);
  LOperand* right = exponent_type.IsDouble() ?
-      UseFixedDouble(instr->right(), xmm2) :
+      UseFixedDouble(instr->right(), xmm1) :
      UseFixed(instr->right(), eax);
  LPower* result = new(zone()) LPower(left, right);
  return MarkAsCall(DefineFixedDouble(result, xmm3), instr,
--- a/src/x64/assembler-x64.cc
+++ b/src/x64/assembler-x64.cc
@ -2307,6 +2307,27 @@ void Assembler::fyl2x() {
 }


+void Assembler::f2xm1() {
+  EnsureSpace ensure_space(this);
+  emit(0xD9);
+  emit(0xF0);
+}
+
+
+void Assembler::fscale() {
+  EnsureSpace ensure_space(this);
+  emit(0xD9);
+  emit(0xFD);
+}
+
+
+void Assembler::fninit() {
+  EnsureSpace ensure_space(this);
+  emit(0xDB);
+  emit(0xE3);
+}
+
+
 void Assembler::fadd(int i) {
  EnsureSpace ensure_space(this);
  emit_farith(0xDC, 0xC0, i);
--- a/src/x64/assembler-x64.h
+++ b/src/x64/assembler-x64.h
@ -1277,6 +1277,9 @@ class Assembler : public AssemblerBase {
  void fcos();
  void fptan();
  void fyl2x();
+  void f2xm1();
+  void fscale();
+  void fninit();

  void frndint();

--- a/src/x64/code-stubs-x64.cc
+++ b/src/x64/code-stubs-x64.cc
@ -1991,152 +1991,274 @@ void FloatingPointHelper::NumbersToSmis(MacroAssembler* masm,


 void MathPowStub::Generate(MacroAssembler* masm) {
-  // Registers are used as follows:
-  // rdx = base
-  // rax = exponent
-  // rcx = temporary, result
+  // Choose register conforming to calling convention (when bailing out).
+#ifdef _WIN64
+  const Register exponent = rdx;
+#else
+  const Register exponent = rdi;
+#endif
+  const Register base = rax;
+  const Register scratch = rcx;
+  const XMMRegister double_result = xmm3;
+  const XMMRegister double_base = xmm2;
+  const XMMRegister double_exponent = xmm1;
+  const XMMRegister double_scratch = xmm4;

-  Label allocate_return, call_runtime;
+  Label double_int_runtime, generic_runtime, done;
+  Label exponent_not_smi, int_exponent;

-  // Load input parameters.
-  __ movq(rdx, Operand(rsp, 2 * kPointerSize));
-  __ movq(rax, Operand(rsp, 1 * kPointerSize));
+  // Save 1 in double_result - we need this several times later on.
+  __ movq(scratch, Immediate(1));
+  __ cvtlsi2sd(double_result, scratch);

-  // Save 1 in xmm3 - we need this several times later on.
-  __ Set(rcx, 1);
-  __ cvtlsi2sd(xmm3, rcx);
+  if (exponent_type_ == ON_STACK) {
+    Label base_is_smi, unpack_exponent;
+    // The exponent and base are supplied as arguments on the stack.
+    // This can only happen if the stub is called from non-optimized code.
+    // Load input parameters from stack.
+    __ movq(base, Operand(rsp, 2 * kPointerSize));
+    __ movq(exponent, Operand(rsp, 1 * kPointerSize));
+    __ JumpIfSmi(base, &base_is_smi, Label::kNear);
+    __ CompareRoot(FieldOperand(base, HeapObject::kMapOffset),
+                   Heap::kHeapNumberMapRootIndex);
+    __ j(not_equal, &generic_runtime);

-  Label exponent_nonsmi;
-  Label base_nonsmi;
-  // If the exponent is a heap number go to that specific case.
-  __ JumpIfNotSmi(rax, &exponent_nonsmi);
-  __ JumpIfNotSmi(rdx, &base_nonsmi);
+    __ movsd(double_base, FieldOperand(base, HeapNumber::kValueOffset));
+    __ jmp(&unpack_exponent, Label::kNear);

-  // Optimized version when both exponent and base are smis.
-  Label powi;
-  __ SmiToInteger32(rdx, rdx);
-  __ cvtlsi2sd(xmm0, rdx);
-  __ jmp(&powi);
-  // Exponent is a smi and base is a heapnumber.
-  __ bind(&base_nonsmi);
-  __ CompareRoot(FieldOperand(rdx, HeapObject::kMapOffset),
-                 Heap::kHeapNumberMapRootIndex);
-  __ j(not_equal, &call_runtime);
+    __ bind(&base_is_smi);
+    __ SmiToInteger32(base, base);
+    __ cvtlsi2sd(double_base, base);
+    __ bind(&unpack_exponent);

-  __ movsd(xmm0, FieldOperand(rdx, HeapNumber::kValueOffset));
+    __ JumpIfNotSmi(exponent, &exponent_not_smi, Label::kNear);
+    __ SmiToInteger32(exponent, exponent);
+    __ jmp(&int_exponent);

-  // Optimized version of pow if exponent is a smi.
-  // xmm0 contains the base.
-  __ bind(&powi);
-  __ SmiToInteger32(rax, rax);
+    __ bind(&exponent_not_smi);
+    __ CompareRoot(FieldOperand(exponent, HeapObject::kMapOffset),
+                   Heap::kHeapNumberMapRootIndex);
+    __ j(not_equal, &generic_runtime);
+    __ movsd(double_exponent, FieldOperand(exponent, HeapNumber::kValueOffset));
+  } else if (exponent_type_ == TAGGED) {
+    __ JumpIfNotSmi(exponent, &exponent_not_smi, Label::kNear);
+    __ SmiToInteger32(exponent, exponent);
+    __ jmp(&int_exponent);

-  // Save exponent in base as we need to check if exponent is negative later.
-  // We know that base and exponent are in different registers.
-  __ movq(rdx, rax);
+    __ bind(&exponent_not_smi);
+    __ movsd(double_exponent, FieldOperand(exponent, HeapNumber::kValueOffset));
+  }
+
+  if (exponent_type_ != INTEGER) {
+    Label fast_power;
+    // Detect integer exponents stored as double.
+    __ cvttsd2si(exponent, double_exponent);
+    // Skip to runtime if possibly NaN (indicated by the indefinite integer).
+    __ cmpl(exponent, Immediate(0x80000000u));
+    __ j(equal, &generic_runtime);
+    __ cvtlsi2sd(double_scratch, exponent);
+    // Already ruled out NaNs for exponent.
+    __ ucomisd(double_exponent, double_scratch);
+    __ j(equal, &int_exponent);
+
+    if (exponent_type_ == ON_STACK) {
+      // Detect square root case.  Crankshaft detects constant +/-0.5 at
+      // compile time and uses DoMathPowHalf instead.  We then skip this check
+      // for non-constant cases of +/-0.5 as these hardly occur.
+      Label continue_sqrt, continue_rsqrt, not_plus_half;
+      // Test for 0.5.
+      // Load double_scratch with 0.5.
+      __ movq(scratch, V8_UINT64_C(0x3FE0000000000000), RelocInfo::NONE);
+      __ movq(double_scratch, scratch);
+      // Already ruled out NaNs for exponent.
+      __ ucomisd(double_scratch, double_exponent);
+      __ j(not_equal, &not_plus_half, Label::kNear);
+
+      // Calculates square root of base.  Check for the special case of
+      // Math.pow(-Infinity, 0.5) == Infinity (ECMA spec, 15.8.2.13).
+      // According to IEEE-754, double-precision -Infinity has the highest
+      // 12 bits set and the lowest 52 bits cleared.
+      __ movq(scratch, V8_UINT64_C(0xFFF0000000000000), RelocInfo::NONE);
+      __ movq(double_scratch, scratch);
+      __ ucomisd(double_scratch, double_base);
+      // Comparing -Infinity with NaN results in "unordered", which sets the
+      // zero flag as if both were equal.  However, it also sets the carry flag.
+      __ j(not_equal, &continue_sqrt, Label::kNear);
+      __ j(carry, &continue_sqrt, Label::kNear);
+
+      // Set result to Infinity in the special case.
+      __ xorps(double_result, double_result);
+      __ subsd(double_result, double_scratch);
+      __ jmp(&done);
+
+      __ bind(&continue_sqrt);
+      // sqrtsd returns -0 when input is -0.  ECMA spec requires +0.
+      __ xorps(double_scratch, double_scratch);
+      __ addsd(double_scratch, double_base);  // Convert -0 to 0.
+      __ sqrtsd(double_result, double_scratch);
+      __ jmp(&done);
+
+      // Test for -0.5.
+      __ bind(&not_plus_half);
+      // Load double_scratch with -0.5 by substracting 1.
+      __ subsd(double_scratch, double_result);
+      // Already ruled out NaNs for exponent.
+      __ ucomisd(double_scratch, double_exponent);
+      __ j(not_equal, &fast_power, Label::kNear);
+
+      // Calculates reciprocal of square root of base.  Check for the special
+      // case of Math.pow(-Infinity, -0.5) == 0 (ECMA spec, 15.8.2.13).
+      // According to IEEE-754, double-precision -Infinity has the highest
+      // 12 bits set and the lowest 52 bits cleared.
+      __ movq(scratch, V8_UINT64_C(0xFFF0000000000000), RelocInfo::NONE);
+      __ movq(double_scratch, scratch);
+      __ ucomisd(double_scratch, double_base);
+      // Comparing -Infinity with NaN results in "unordered", which sets the
+      // zero flag as if both were equal.  However, it also sets the carry flag.
+      __ j(not_equal, &continue_rsqrt, Label::kNear);
+      __ j(carry, &continue_rsqrt, Label::kNear);
+
+      // Set result to 0 in the special case.
+      __ xorps(double_result, double_result);
+      __ jmp(&done);
+
+      __ bind(&continue_rsqrt);
+      // sqrtsd returns -0 when input is -0.  ECMA spec requires +0.
+      __ xorps(double_exponent, double_exponent);
+      __ addsd(double_exponent, double_base);  // Convert -0 to +0.
+      __ sqrtsd(double_exponent, double_exponent);
+      __ divsd(double_result, double_exponent);
+      __ jmp(&done);
+    }
+
+    // Using FPU instructions to calculate power.
+    Label fast_power_failed;
+    __ bind(&fast_power);
+    __ fnclex();  // Clear flags to catch exceptions later.
+    // Transfer (B)ase and (E)xponent onto the FPU register stack.
+    __ subq(rsp, Immediate(kDoubleSize));
+    __ movsd(Operand(rsp, 0), double_exponent);
+    __ fld_d(Operand(rsp, 0));  // E
+    __ movsd(Operand(rsp, 0), double_base);
+    __ fld_d(Operand(rsp, 0));  // B, E
+
+    // Exponent is in st(1) and base is in st(0)
+    // B ^ E = (2^(E * log2(B)) - 1) + 1 = (2^X - 1) + 1 for X = E * log2(B)
+    // FYL2X calculates st(1) * log2(st(0))
+    __ fyl2x();    // X
+    __ fld(0);     // X, X
+    __ frndint();  // rnd(X), X
+    __ fsub(1);    // rnd(X), X-rnd(X)
+    __ fxch(1);    // X - rnd(X), rnd(X)
+    // F2XM1 calculates 2^st(0) - 1 for -1 < st(0) < 1
+    __ f2xm1();    // 2^(X-rnd(X)) - 1, rnd(X)
+    __ fld1();     // 1, 2^(X-rnd(X)) - 1, rnd(X)
+    __ faddp(1);   // 1, 2^(X-rnd(X)), rnd(X)
+    // FSCALE calculates st(0) * 2^st(1)
+    __ fscale();   // 2^X, rnd(X)
+    __ fstp(1);
+    // Bail out to runtime in case of exceptions in the status word.
+    __ fnstsw_ax();
+    __ testb(rax, Immediate(0x5F));  // Check for all but precision exception.
+    __ j(not_zero, &fast_power_failed, Label::kNear);
+    __ fstp_d(Operand(rsp, 0));
+    __ movsd(double_result, Operand(rsp, 0));
+    __ addq(rsp, Immediate(kDoubleSize));
+    __ jmp(&done);
+
+    __ bind(&fast_power_failed);
+    __ fninit();
+    __ addq(rsp, Immediate(kDoubleSize));
+    __ jmp(&generic_runtime);
+  }
+
+  // Calculate power with integer exponent.
+  __ bind(&int_exponent);
+  const XMMRegister double_scratch2 = double_exponent;
+  // Back up exponent as we need to check if exponent is negative later.
+  __ movq(scratch, exponent);  // Back up exponent.
+  __ movsd(double_scratch, double_base);  // Back up base.
+  __ movsd(double_scratch2, double_result);  // Load double_exponent with 1.

  // Get absolute value of exponent.
-  Label no_neg;
-  __ cmpl(rax, Immediate(0));
-  __ j(greater_equal, &no_neg, Label::kNear);
-  __ negl(rax);
+  Label no_neg, while_true, no_multiply;
+  __ cmpl(scratch, Immediate(0));
+  __ j(positive, &no_neg, Label::kNear);
+  __ negl(scratch);
  __ bind(&no_neg);

-  // Load xmm1 with 1.
-  __ movaps(xmm1, xmm3);
-  Label while_true;
-  Label no_multiply;
-
  __ bind(&while_true);
-  __ shrl(rax, Immediate(1));
+  __ shrl(scratch, Immediate(1));
  __ j(not_carry, &no_multiply, Label::kNear);
-  __ mulsd(xmm1, xmm0);
+  __ mulsd(double_result, double_scratch);
  __ bind(&no_multiply);
-  __ mulsd(xmm0, xmm0);
+
+  __ mulsd(double_scratch, double_scratch);
  __ j(not_zero, &while_true);

-  // Base has the original value of the exponent - if the exponent  is
-  // negative return 1/result.
-  __ testl(rdx, rdx);
-  __ j(positive, &allocate_return);
-  // Special case if xmm1 has reached infinity.
-  __ divsd(xmm3, xmm1);
-  __ movaps(xmm1, xmm3);
-  __ xorps(xmm0, xmm0);
-  __ ucomisd(xmm0, xmm1);
-  __ j(equal, &call_runtime);
+  // scratch has the original value of the exponent - if the exponent is
+  // negative, return 1/result.
+  __ testl(exponent, exponent);
+  __ j(greater, &done);
+  __ divsd(double_scratch2, double_result);
+  __ movsd(double_result, double_scratch2);
+  // Test whether result is zero.  Bail out to check for subnormal result.
+  // Due to subnormals, x^-y == (1/x)^y does not hold in all cases.
+  __ xorps(double_scratch2, double_scratch2);
+  __ ucomisd(double_scratch2, double_result);
+  __ j(equal, &double_int_runtime);

-  __ jmp(&allocate_return);
+  // Returning or bailing out.
+  if (exponent_type_ == ON_STACK) {
+    // The stub is called from non-optimized code, which expects the result
+    // as heap number in eax.
+    __ bind(&done);
+    __ AllocateHeapNumber(rax, rcx, &generic_runtime);
+    __ movsd(FieldOperand(rax, HeapNumber::kValueOffset), double_result);
+    __ ret(2 * kPointerSize);

-  // Exponent (or both) is a heapnumber - no matter what we should now work
-  // on doubles.
-  __ bind(&exponent_nonsmi);
-  __ CompareRoot(FieldOperand(rax, HeapObject::kMapOffset),
-                 Heap::kHeapNumberMapRootIndex);
-  __ j(not_equal, &call_runtime);
-  __ movsd(xmm1, FieldOperand(rax, HeapNumber::kValueOffset));
-  // Test if exponent is nan.
-  __ ucomisd(xmm1, xmm1);
-  __ j(parity_even, &call_runtime);
+    // The arguments are still on the stack.
+    __ bind(&generic_runtime);
+    __ bind(&double_int_runtime);
+    __ TailCallRuntime(Runtime::kMath_pow_cfunction, 2, 1);
+  } else {
+    __ jmp(&done);

-  Label base_not_smi, handle_special_cases;
-  __ JumpIfNotSmi(rdx, &base_not_smi, Label::kNear);
-  __ SmiToInteger32(rdx, rdx);
-  __ cvtlsi2sd(xmm0, rdx);
-  __ jmp(&handle_special_cases, Label::kNear);
+    Label return_from_runtime;
+    StubRuntimeCallHelper callhelper;
+    __ bind(&generic_runtime);
+    // Move base to the correct argument register.  Exponent is already in xmm1.
+    __ movsd(xmm0, double_base);
+    ASSERT(double_exponent.is(xmm1));
+    {
+      AllowExternalCallThatCantCauseGC scope(masm);
+      __ PrepareCallCFunction(2);
+      __ CallCFunction(
+          ExternalReference::power_double_double_function(masm->isolate()), 2);
+    }
+    __ jmp(&return_from_runtime, Label::kNear);

-  __ bind(&base_not_smi);
-  __ CompareRoot(FieldOperand(rdx, HeapObject::kMapOffset),
-                 Heap::kHeapNumberMapRootIndex);
-  __ j(not_equal, &call_runtime);
-  __ movl(rcx, FieldOperand(rdx, HeapNumber::kExponentOffset));
-  __ andl(rcx, Immediate(HeapNumber::kExponentMask));
-  __ cmpl(rcx, Immediate(HeapNumber::kExponentMask));
-  // base is NaN or +/-Infinity
-  __ j(greater_equal, &call_runtime);
-  __ movsd(xmm0, FieldOperand(rdx, HeapNumber::kValueOffset));
+    __ bind(&double_int_runtime);
+    // Move base to the correct argument register.
+    __ movsd(xmm0, double_base);
+    // Exponent is already in the correct argument register:
+    // edi (not rdi) on Linux and edx on Windows.
+    {
+      AllowExternalCallThatCantCauseGC scope(masm);
+      __ PrepareCallCFunction(2);
+      __ CallCFunction(
+          ExternalReference::power_double_int_function(masm->isolate()), 2);
+    }

-  // base is in xmm0 and exponent is in xmm1.
-  __ bind(&handle_special_cases);
-  Label not_minus_half;
-  // Test for -0.5.
-  // Load xmm2 with -0.5.
-  __ movq(rcx, V8_UINT64_C(0xBFE0000000000000), RelocInfo::NONE);
-  __ movq(xmm2, rcx);
-  // xmm2 now has -0.5.
-  __ ucomisd(xmm2, xmm1);
-  __ j(not_equal, &not_minus_half, Label::kNear);
+    __ bind(&return_from_runtime);
+    // Return value is in xmm0.
+    __ movsd(double_result, xmm0);
+    // Restore context register.
+    __ movq(rsi, Operand(rbp, StandardFrameConstants::kContextOffset));

-  // Calculates reciprocal of square root.
-  // sqrtsd returns -0 when input is -0.  ECMA spec requires +0.
-  __ xorps(xmm1, xmm1);
-  __ addsd(xmm1, xmm0);
-  __ sqrtsd(xmm1, xmm1);
-  __ divsd(xmm3, xmm1);
-  __ movaps(xmm1, xmm3);
-  __ jmp(&allocate_return);
-
-  // Test for 0.5.
-  __ bind(&not_minus_half);
-  // Load xmm2 with 0.5.
-  // Since xmm3 is 1 and xmm2 is -0.5 this is simply xmm2 + xmm3.
-  __ addsd(xmm2, xmm3);
-  // xmm2 now has 0.5.
-  __ ucomisd(xmm2, xmm1);
-  __ j(not_equal, &call_runtime);
-  // Calculates square root.
-  // sqrtsd returns -0 when input is -0.  ECMA spec requires +0.
-  __ xorps(xmm1, xmm1);
-  __ addsd(xmm1, xmm0);  // Convert -0 to 0.
-  __ sqrtsd(xmm1, xmm1);
-
-  __ bind(&allocate_return);
-  __ AllocateHeapNumber(rcx, rax, &call_runtime);
-  __ movsd(FieldOperand(rcx, HeapNumber::kValueOffset), xmm1);
-  __ movq(rax, rcx);
-  __ ret(2 * kPointerSize);
-
-  __ bind(&call_runtime);
-  __ TailCallRuntime(Runtime::kMath_pow_cfunction, 2, 1);
+    __ bind(&done);
+    __ ret(0);
+  }
 }


--- a/src/x64/disasm-x64.cc
+++ b/src/x64/disasm-x64.cc
@ -911,15 +911,19 @@ int DisassemblerX64::RegisterFPUInstruction(int escape_opcode,
          switch (modrm_byte) {
            case 0xE0: mnem = "fchs"; break;
            case 0xE1: mnem = "fabs"; break;
+            case 0xE3: mnem = "fninit"; break;
            case 0xE4: mnem = "ftst"; break;
            case 0xE8: mnem = "fld1"; break;
            case 0xEB: mnem = "fldpi"; break;
            case 0xED: mnem = "fldln2"; break;
            case 0xEE: mnem = "fldz"; break;
+            case 0xF0: mnem = "f2xm1"; break;
            case 0xF1: mnem = "fyl2x"; break;
+            case 0xF2: mnem = "fptan"; break;
            case 0xF5: mnem = "fprem1"; break;
            case 0xF7: mnem = "fincstp"; break;
            case 0xF8: mnem = "fprem"; break;
+            case 0xFD: mnem = "fscale"; break;
            case 0xFE: mnem = "fsin"; break;
            case 0xFF: mnem = "fcos"; break;
            default: UnimplementedInstruction();
--- a/src/x64/lithium-codegen-x64.cc
+++ b/src/x64/lithium-codegen-x64.cc
@ -2880,58 +2880,39 @@ void LCodeGen::DoMathPowHalf(LUnaryMathOperation* instr) {


 void LCodeGen::DoPower(LPower* instr) {
-  LOperand* left = instr->InputAt(0);
-  XMMRegister left_reg = ToDoubleRegister(left);
-  ASSERT(!left_reg.is(xmm1));
-  LOperand* right = instr->InputAt(1);
-  XMMRegister result_reg = ToDoubleRegister(instr->result());
  Representation exponent_type = instr->hydrogen()->right()->representation();
-  if (exponent_type.IsDouble()) {
-    __ PrepareCallCFunction(2);
-    // Move arguments to correct registers
-    __ movaps(xmm0, left_reg);
-    ASSERT(ToDoubleRegister(right).is(xmm1));
-    __ CallCFunction(
-        ExternalReference::power_double_double_function(isolate()), 2);
-  } else if (exponent_type.IsInteger32()) {
-    __ PrepareCallCFunction(2);
-    // Move arguments to correct registers: xmm0 and edi (not rdi).
-    // On Windows, the registers are xmm0 and edx.
-    __ movaps(xmm0, left_reg);
+  // Having marked this as a call, we can use any registers.
+  // Just make sure that the input/output registers are the expected ones.
+
+  // Choose register conforming to calling convention (when bailing out).
 #ifdef _WIN64
-    ASSERT(ToRegister(right).is(rdx));
+  Register exponent = rdx;
 #else
-    ASSERT(ToRegister(right).is(rdi));
+  Register exponent = rdi;
 #endif
-    __ CallCFunction(
-        ExternalReference::power_double_int_function(isolate()), 2);
-  } else {
-    ASSERT(exponent_type.IsTagged());
-    Register right_reg = ToRegister(right);
+  ASSERT(!instr->InputAt(1)->IsRegister() ||
+         ToRegister(instr->InputAt(1)).is(exponent));
+  ASSERT(!instr->InputAt(1)->IsDoubleRegister() ||
+         ToDoubleRegister(instr->InputAt(1)).is(xmm1));
+  ASSERT(ToDoubleRegister(instr->InputAt(0)).is(xmm2));
+  ASSERT(ToDoubleRegister(instr->result()).is(xmm3));

-    Label non_smi, call;
-    __ JumpIfNotSmi(right_reg, &non_smi);
-    __ SmiToInteger32(right_reg, right_reg);
-    __ cvtlsi2sd(xmm1, right_reg);
-    __ jmp(&call);
-
-    __ bind(&non_smi);
-    __ CmpObjectType(right_reg, HEAP_NUMBER_TYPE , kScratchRegister);
+  if (exponent_type.IsTagged()) {
+    Label no_deopt;
+    __ JumpIfSmi(exponent, &no_deopt);
+    __ CmpObjectType(exponent, HEAP_NUMBER_TYPE, rcx);
    DeoptimizeIf(not_equal, instr->environment());
-    __ movsd(xmm1, FieldOperand(right_reg, HeapNumber::kValueOffset));
-
-    __ bind(&call);
-    __ PrepareCallCFunction(2);
-    // Move arguments to correct registers xmm0 and xmm1.
-    __ movaps(xmm0, left_reg);
-    // Right argument is already in xmm1.
-    __ CallCFunction(
-        ExternalReference::power_double_double_function(isolate()), 2);
+    __ bind(&no_deopt);
+    MathPowStub stub(MathPowStub::TAGGED);
+    __ CallStub(&stub);
+  } else if (exponent_type.IsInteger32()) {
+    MathPowStub stub(MathPowStub::INTEGER);
+    __ CallStub(&stub);
+  } else {
+    ASSERT(exponent_type.IsDouble());
+    MathPowStub stub(MathPowStub::DOUBLE);
+    __ CallStub(&stub);
  }
-  // Return value is in xmm0.
-  __ movaps(result_reg, xmm0);
-  // Restore context register.
-  __ movq(rsi, Operand(rbp, StandardFrameConstants::kContextOffset));
 }


--- a/src/x64/lithium-x64.cc
+++ b/src/x64/lithium-x64.cc
@ -1397,7 +1397,7 @@ LInstruction* LChunkBuilder::DoPower(HPower* instr) {
      UseFixed(instr->right(), rdi);
 #endif
  LPower* result = new LPower(left, right);
-  return MarkAsCall(DefineFixedDouble(result, xmm1), instr,
+  return MarkAsCall(DefineFixedDouble(result, xmm3), instr,
                    CAN_DEOPTIMIZE_EAGERLY);
 }