MIPS: Faster implementation of Math.exp()

Port r13054 (636985d7) BUG= TEST= Review URL: https://codereview.chromium.org/11415192 Patch from Akos Palfi <palfia@homejinni.com>. git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@13089 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
2012-11-29 09:22:08 +00:00 · 2012-11-29 09:22:08 +00:00 · b43ec5112f
commit b43ec5112f
parent bea4580fa8
7 changed files with 294 additions and 29 deletions
--- a/src/mips/codegen-mips.cc
+++ b/src/mips/codegen-mips.cc
@ -31,11 +31,11 @@

 #include "codegen.h"
 #include "macro-assembler.h"
+#include "simulator-mips.h"

 namespace v8 {
 namespace internal {

-#define __ ACCESS_MASM(masm)

 UnaryMathFunction CreateTranscendentalFunction(TranscendentalCache::Type type) {
  switch (type) {
@ -49,6 +49,74 @@ UnaryMathFunction CreateTranscendentalFunction(TranscendentalCache::Type type) {
 }


+#define __ masm.
+
+
+#if defined(USE_SIMULATOR)
+byte* fast_exp_mips_machine_code = NULL;
+double fast_exp_simulator(double x) {
+  return Simulator::current(Isolate::Current())->CallFP(
+      fast_exp_mips_machine_code, x, 0);
+}
+#endif
+
+
+UnaryMathFunction CreateExpFunction() {
+  if (!CpuFeatures::IsSupported(FPU)) return &exp;
+  if (!FLAG_fast_math) return &exp;
+  size_t actual_size;
+  byte* buffer = static_cast<byte*>(OS::Allocate(1 * KB, &actual_size, true));
+  if (buffer == NULL) return &exp;
+  ExternalReference::InitializeMathExpData();
+
+  MacroAssembler masm(NULL, buffer, static_cast<int>(actual_size));
+
+  {
+    CpuFeatures::Scope use_fpu(FPU);
+    DoubleRegister input = f12;
+    DoubleRegister result = f0;
+    DoubleRegister double_scratch1 = f4;
+    DoubleRegister double_scratch2 = f6;
+    Register temp1 = t0;
+    Register temp2 = t1;
+    Register temp3 = t2;
+
+    if (!IsMipsSoftFloatABI) {
+      // Input value is in f12 anyway, nothing to do.
+    } else {
+      __ Move(input, a0, a1);
+    }
+    __ Push(temp3, temp2, temp1);
+    MathExpGenerator::EmitMathExp(
+        &masm, input, result, double_scratch1, double_scratch2,
+        temp1, temp2, temp3);
+    __ Pop(temp3, temp2, temp1);
+    if (!IsMipsSoftFloatABI) {
+      // Result is already in f0, nothing to do.
+    } else {
+      __ Move(a0, a1, result);
+    }
+    __ Ret();
+  }
+
+  CodeDesc desc;
+  masm.GetCode(&desc);
+
+  CPU::FlushICache(buffer, actual_size);
+  OS::ProtectCode(buffer, actual_size);
+
+#if !defined(USE_SIMULATOR)
+  return FUNCTION_CAST<UnaryMathFunction>(buffer);
+#else
+  fast_exp_mips_machine_code = buffer;
+  return &fast_exp_simulator;
+#endif
+}
+
+
+#undef __
+
+
 UnaryMathFunction CreateSqrtFunction() {
  return &sqrt;
 }
@ -72,6 +140,8 @@ void StubRuntimeCallHelper::AfterCall(MacroAssembler* masm) const {
 // -------------------------------------------------------------------------
 // Code generators

+#define __ ACCESS_MASM(masm)
+
 void ElementsTransitionGenerator::GenerateMapChangeElementsTransition(
    MacroAssembler* masm) {
  // ----------- S t a t e -------------
@ -446,6 +516,81 @@ void StringCharLoadGenerator::Generate(MacroAssembler* masm,
  __ bind(&done);
 }

+
+static MemOperand ExpConstant(int index, Register base) {
+  return MemOperand(base, index * kDoubleSize);
+}
+
+
+void MathExpGenerator::EmitMathExp(MacroAssembler* masm,
+                                   DoubleRegister input,
+                                   DoubleRegister result,
+                                   DoubleRegister double_scratch1,
+                                   DoubleRegister double_scratch2,
+                                   Register temp1,
+                                   Register temp2,
+                                   Register temp3) {
+  ASSERT(!input.is(result));
+  ASSERT(!input.is(double_scratch1));
+  ASSERT(!input.is(double_scratch2));
+  ASSERT(!result.is(double_scratch1));
+  ASSERT(!result.is(double_scratch2));
+  ASSERT(!double_scratch1.is(double_scratch2));
+  ASSERT(!temp1.is(temp2));
+  ASSERT(!temp1.is(temp3));
+  ASSERT(!temp2.is(temp3));
+  ASSERT(ExternalReference::math_exp_constants(0).address() != NULL);
+
+  Label done;
+
+  __ li(temp3, Operand(ExternalReference::math_exp_constants(0)));
+
+  __ ldc1(double_scratch1, ExpConstant(0, temp3));
+  __ Move(result, kDoubleRegZero);
+  __ BranchF(&done, NULL, ge, double_scratch1, input);
+  __ ldc1(double_scratch2, ExpConstant(1, temp3));
+  __ ldc1(result, ExpConstant(2, temp3));
+  __ BranchF(&done, NULL, ge, input, double_scratch2);
+  __ ldc1(double_scratch1, ExpConstant(3, temp3));
+  __ ldc1(result, ExpConstant(4, temp3));
+  __ mul_d(double_scratch1, double_scratch1, input);
+  __ add_d(double_scratch1, double_scratch1, result);
+  __ Move(temp2, temp1, double_scratch1);
+  __ sub_d(double_scratch1, double_scratch1, result);
+  __ ldc1(result, ExpConstant(6, temp3));
+  __ ldc1(double_scratch2, ExpConstant(5, temp3));
+  __ mul_d(double_scratch1, double_scratch1, double_scratch2);
+  __ sub_d(double_scratch1, double_scratch1, input);
+  __ sub_d(result, result, double_scratch1);
+  __ mul_d(input, double_scratch1, double_scratch1);
+  __ mul_d(result, result, input);
+  __ srl(temp1, temp2, 11);
+  __ ldc1(double_scratch2, ExpConstant(7, temp3));
+  __ mul_d(result, result, double_scratch2);
+  __ sub_d(result, result, double_scratch1);
+  __ ldc1(double_scratch2, ExpConstant(8, temp3));
+  __ add_d(result, result, double_scratch2);
+  __ li(at, 0x7ff);
+  __ And(temp2, temp2, at);
+  __ Addu(temp1, temp1, Operand(0x3ff));
+  __ sll(temp1, temp1, 20);
+
+  // Must not call ExpConstant() after overwriting temp3!
+  __ li(temp3, Operand(ExternalReference::math_exp_log_table()));
+  __ sll(at, temp2, 3);
+  __ addu(at, at, temp3);
+  __ lw(at, MemOperand(at));
+  __ Addu(temp3, temp3, Operand(kPointerSize));
+  __ sll(temp2, temp2, 3);
+  __ addu(temp2, temp2, temp3);
+  __ lw(temp2, MemOperand(temp2));
+  __ Or(temp1, temp1, temp2);
+  __ Move(input, at, temp1);
+  __ mul_d(result, result, input);
+  __ bind(&done);
+}
+
+
 // nop(CODE_AGE_MARKER_NOP)
 static const uint32_t kCodeAgePatchFirstInstruction = 0x00010180;

--- a/src/mips/codegen-mips.h
+++ b/src/mips/codegen-mips.h
@ -90,6 +90,22 @@ class StringCharLoadGenerator : public AllStatic {
  DISALLOW_COPY_AND_ASSIGN(StringCharLoadGenerator);
 };

+
+class MathExpGenerator : public AllStatic {
+ public:
+  static void EmitMathExp(MacroAssembler* masm,
+                          DoubleRegister input,
+                          DoubleRegister result,
+                          DoubleRegister double_scratch1,
+                          DoubleRegister double_scratch2,
+                          Register temp1,
+                          Register temp2,
+                          Register temp3);
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(MathExpGenerator);
+};
+
 } }  // namespace v8::internal

 #endif  // V8_MIPS_CODEGEN_MIPS_H_
--- a/src/mips/lithium-codegen-mips.cc
+++ b/src/mips/lithium-codegen-mips.cc
@ -3500,6 +3500,20 @@ void LCodeGen::DoDeferredRandom(LRandom* instr) {
 }


+void LCodeGen::DoMathExp(LMathExp* instr) {
+  DoubleRegister input = ToDoubleRegister(instr->value());
+  DoubleRegister result = ToDoubleRegister(instr->result());
+  DoubleRegister double_scratch1 = ToDoubleRegister(instr->double_temp());
+  DoubleRegister double_scratch2 = double_scratch0();
+  Register temp1 = ToRegister(instr->temp1());
+  Register temp2 = ToRegister(instr->temp2());
+
+  MathExpGenerator::EmitMathExp(
+      masm(), input, result, double_scratch1, double_scratch2,
+      temp1, temp2, scratch0());
+}
+
+
 void LCodeGen::DoMathLog(LUnaryMathOperation* instr) {
  ASSERT(ToDoubleRegister(instr->result()).is(f4));
  TranscendentalCacheStub stub(TranscendentalCache::LOG,
--- a/src/mips/lithium-mips.cc
+++ b/src/mips/lithium-mips.cc
@ -297,6 +297,11 @@ void LUnaryMathOperation::PrintDataTo(StringStream* stream) {
 }


+void LMathExp::PrintDataTo(StringStream* stream) {
+  value()->PrintTo(stream);
+}
+
+
 void LLoadContextSlot::PrintDataTo(StringStream* stream) {
  context()->PrintTo(stream);
  stream->Add("[%d]", slot_index());
@ -1040,6 +1045,15 @@ LInstruction* LChunkBuilder::DoUnaryMathOperation(HUnaryMathOperation* instr) {
    LOperand* input = UseFixedDouble(instr->value(), f4);
    LUnaryMathOperation* result = new(zone()) LUnaryMathOperation(input, NULL);
    return MarkAsCall(DefineFixedDouble(result, f4), instr);
+  } else if (op == kMathExp) {
+    ASSERT(instr->representation().IsDouble());
+    ASSERT(instr->value()->representation().IsDouble());
+    LOperand* input = UseTempRegister(instr->value());
+    LOperand* temp1 = TempRegister();
+    LOperand* temp2 = TempRegister();
+    LOperand* double_temp = FixedTemp(f6);  // Chosen by fair dice roll.
+    LMathExp* result = new(zone()) LMathExp(input, double_temp, temp1, temp2);
+    return DefineAsRegister(result);
  } else if (op == kMathPowHalf) {
    // Input cannot be the same as the result.
    // See lithium-codegen-mips.cc::DoMathPowHalf.
--- a/src/mips/lithium-mips.h
+++ b/src/mips/lithium-mips.h
@ -131,6 +131,7 @@ class LCodeGen;
  V(LoadNamedFieldPolymorphic)                  \
  V(LoadNamedGeneric)                           \
  V(MapEnumLength)                              \
+  V(MathExp)                                    \
  V(MathMinMax)                                 \
  V(ModI)                                       \
  V(MulI)                                       \
@ -641,6 +642,30 @@ class LUnaryMathOperation: public LTemplateInstruction<1, 1, 1> {
 };


+class LMathExp: public LTemplateInstruction<1, 1, 3> {
+ public:
+  LMathExp(LOperand* value,
+           LOperand* double_temp,
+           LOperand* temp1,
+           LOperand* temp2) {
+    inputs_[0] = value;
+    temps_[0] = temp1;
+    temps_[1] = temp2;
+    temps_[2] = double_temp;
+    ExternalReference::InitializeMathExpData();
+  }
+
+  LOperand* value() { return inputs_[0]; }
+  LOperand* temp1() { return temps_[0]; }
+  LOperand* temp2() { return temps_[1]; }
+  LOperand* double_temp() { return temps_[2]; }
+
+  DECLARE_CONCRETE_INSTRUCTION(MathExp, "math-exp")
+
+  virtual void PrintDataTo(StringStream* stream);
+};
+
+
 class LCmpObjectEqAndBranch: public LControlInstruction<2, 0> {
 public:
  LCmpObjectEqAndBranch(LOperand* left, LOperand* right) {
--- a/src/mips/simulator-mips.cc
+++ b/src/mips/simulator-mips.cc
@ -1016,6 +1016,13 @@ void Simulator::set_register(int reg, int32_t value) {
 }


+void Simulator::set_dw_register(int reg, const int* dbl) {
+  ASSERT((reg >= 0) && (reg < kNumSimuRegisters));
+  registers_[reg] = dbl[0];
+  registers_[reg + 1] = dbl[1];
+}
+
+
 void Simulator::set_fpu_register(int fpureg, int32_t value) {
  ASSERT((fpureg >= 0) && (fpureg < kNumFPURegisters));
  FPUregisters_[fpureg] = value;
@ -1045,6 +1052,19 @@ int32_t Simulator::get_register(int reg) const {
 }


+double Simulator::get_double_from_register_pair(int reg) {
+  ASSERT((reg >= 0) && (reg < kNumSimuRegisters) && ((reg % 2) == 0));
+
+  double dm_val = 0.0;
+  // Read the bits from the unsigned integer register_[] array
+  // into the double precision floating point value and return it.
+  char buffer[2 * sizeof(registers_[0])];
+  memcpy(buffer, &registers_[reg], 2 * sizeof(registers_[0]));
+  memcpy(&dm_val, buffer, 2 * sizeof(registers_[0]));
+  return(dm_val);
+}
+
+
 int32_t Simulator::get_fpu_register(int fpureg) const {
  ASSERT((fpureg >= 0) && (fpureg < kNumFPURegisters));
  return FPUregisters_[fpureg];
@ -2718,34 +2738,7 @@ void Simulator::Execute() {
 }


-int32_t Simulator::Call(byte* entry, int argument_count, ...) {
-  va_list parameters;
-  va_start(parameters, argument_count);
-  // Set up arguments.
-
-  // First four arguments passed in registers.
-  ASSERT(argument_count >= 4);
-  set_register(a0, va_arg(parameters, int32_t));
-  set_register(a1, va_arg(parameters, int32_t));
-  set_register(a2, va_arg(parameters, int32_t));
-  set_register(a3, va_arg(parameters, int32_t));
-
-  // Remaining arguments passed on stack.
-  int original_stack = get_register(sp);
-  // Compute position of stack on entry to generated code.
-  int entry_stack = (original_stack - (argument_count - 4) * sizeof(int32_t)
-                                    - kCArgsSlotsSize);
-  if (OS::ActivationFrameAlignment() != 0) {
-    entry_stack &= -OS::ActivationFrameAlignment();
-  }
-  // Store remaining arguments on stack, from low to high memory.
-  intptr_t* stack_argument = reinterpret_cast<intptr_t*>(entry_stack);
-  for (int i = 4; i < argument_count; i++) {
-    stack_argument[i - 4 + kCArgSlotCount] = va_arg(parameters, int32_t);
-  }
-  va_end(parameters);
-  set_register(sp, entry_stack);
-
+void Simulator::CallInternal(byte* entry) {
  // Prepare to execute the code at entry.
  set_register(pc, reinterpret_cast<int32_t>(entry));
  // Put down marker for end of simulation. The simulator will stop simulation
@ -2809,6 +2802,38 @@ int32_t Simulator::Call(byte* entry, int argument_count, ...) {
  set_register(gp, gp_val);
  set_register(sp, sp_val);
  set_register(fp, fp_val);
+}
+
+
+int32_t Simulator::Call(byte* entry, int argument_count, ...) {
+  va_list parameters;
+  va_start(parameters, argument_count);
+  // Set up arguments.
+
+  // First four arguments passed in registers.
+  ASSERT(argument_count >= 4);
+  set_register(a0, va_arg(parameters, int32_t));
+  set_register(a1, va_arg(parameters, int32_t));
+  set_register(a2, va_arg(parameters, int32_t));
+  set_register(a3, va_arg(parameters, int32_t));
+
+  // Remaining arguments passed on stack.
+  int original_stack = get_register(sp);
+  // Compute position of stack on entry to generated code.
+  int entry_stack = (original_stack - (argument_count - 4) * sizeof(int32_t)
+                                    - kCArgsSlotsSize);
+  if (OS::ActivationFrameAlignment() != 0) {
+    entry_stack &= -OS::ActivationFrameAlignment();
+  }
+  // Store remaining arguments on stack, from low to high memory.
+  intptr_t* stack_argument = reinterpret_cast<intptr_t*>(entry_stack);
+  for (int i = 4; i < argument_count; i++) {
+    stack_argument[i - 4 + kCArgSlotCount] = va_arg(parameters, int32_t);
+  }
+  va_end(parameters);
+  set_register(sp, entry_stack);
+
+  CallInternal(entry);

  // Pop stack passed arguments.
  CHECK_EQ(entry_stack, get_register(sp));
@ -2819,6 +2844,27 @@ int32_t Simulator::Call(byte* entry, int argument_count, ...) {
 }


+double Simulator::CallFP(byte* entry, double d0, double d1) {
+  if (!IsMipsSoftFloatABI) {
+    set_fpu_register_double(f12, d0);
+    set_fpu_register_double(f14, d1);
+  } else {
+    int buffer[2];
+    ASSERT(sizeof(buffer[0]) * 2 == sizeof(d0));
+    memcpy(buffer, &d0, sizeof(d0));
+    set_dw_register(a0, buffer);
+    memcpy(buffer, &d1, sizeof(d1));
+    set_dw_register(a2, buffer);
+  }
+  CallInternal(entry);
+  if (!IsMipsSoftFloatABI) {
+    return get_fpu_register_double(f0);
+  } else {
+    return get_double_from_register_pair(v0);
+  }
+}
+
+
 uintptr_t Simulator::PushAddress(uintptr_t address) {
  int new_sp = get_register(sp) - sizeof(uintptr_t);
  uintptr_t* stack_slot = reinterpret_cast<uintptr_t*>(new_sp);
--- a/src/mips/simulator-mips.h
+++ b/src/mips/simulator-mips.h
@ -184,7 +184,9 @@ class Simulator {
  // architecture specification and is off by a 8 from the currently executing
  // instruction.
  void set_register(int reg, int32_t value);
+  void set_dw_register(int dreg, const int* dbl);
  int32_t get_register(int reg) const;
+  double get_double_from_register_pair(int reg);
  // Same for FPURegisters.
  void set_fpu_register(int fpureg, int32_t value);
  void set_fpu_register_float(int fpureg, float value);
@ -214,6 +216,8 @@ class Simulator {
  // generated RegExp code with 7 parameters. This is a convenience function,
  // which sets up the simulator state and grabs the result on return.
  int32_t Call(byte* entry, int argument_count, ...);
+  // Alternative: call a 2-argument double function.
+  double CallFP(byte* entry, double d0, double d1);

  // Push an address onto the JS stack.
  uintptr_t PushAddress(uintptr_t address);
@ -353,6 +357,7 @@ class Simulator {
  void GetFpArgs(double* x, int32_t* y);
  void SetFpResult(const double& result);

+  void CallInternal(byte* entry);

  // Architecture state.
  // Registers.