MIPS: Faster implementation of Math.exp()

Port r13054 (636985d7)

BUG=
TEST=

Review URL: https://codereview.chromium.org/11415192
Patch from Akos Palfi <palfia@homejinni.com>.

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@13089 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
jkummerow@chromium.org 2012-11-29 09:22:08 +00:00
parent bea4580fa8
commit b43ec5112f
7 changed files with 294 additions and 29 deletions

View File

@ -31,11 +31,11 @@
#include "codegen.h"
#include "macro-assembler.h"
#include "simulator-mips.h"
namespace v8 {
namespace internal {
#define __ ACCESS_MASM(masm)
UnaryMathFunction CreateTranscendentalFunction(TranscendentalCache::Type type) {
switch (type) {
@ -49,6 +49,74 @@ UnaryMathFunction CreateTranscendentalFunction(TranscendentalCache::Type type) {
}
#define __ masm.
#if defined(USE_SIMULATOR)
byte* fast_exp_mips_machine_code = NULL;
double fast_exp_simulator(double x) {
return Simulator::current(Isolate::Current())->CallFP(
fast_exp_mips_machine_code, x, 0);
}
#endif
UnaryMathFunction CreateExpFunction() {
if (!CpuFeatures::IsSupported(FPU)) return &exp;
if (!FLAG_fast_math) return &exp;
size_t actual_size;
byte* buffer = static_cast<byte*>(OS::Allocate(1 * KB, &actual_size, true));
if (buffer == NULL) return &exp;
ExternalReference::InitializeMathExpData();
MacroAssembler masm(NULL, buffer, static_cast<int>(actual_size));
{
CpuFeatures::Scope use_fpu(FPU);
DoubleRegister input = f12;
DoubleRegister result = f0;
DoubleRegister double_scratch1 = f4;
DoubleRegister double_scratch2 = f6;
Register temp1 = t0;
Register temp2 = t1;
Register temp3 = t2;
if (!IsMipsSoftFloatABI) {
// Input value is in f12 anyway, nothing to do.
} else {
__ Move(input, a0, a1);
}
__ Push(temp3, temp2, temp1);
MathExpGenerator::EmitMathExp(
&masm, input, result, double_scratch1, double_scratch2,
temp1, temp2, temp3);
__ Pop(temp3, temp2, temp1);
if (!IsMipsSoftFloatABI) {
// Result is already in f0, nothing to do.
} else {
__ Move(a0, a1, result);
}
__ Ret();
}
CodeDesc desc;
masm.GetCode(&desc);
CPU::FlushICache(buffer, actual_size);
OS::ProtectCode(buffer, actual_size);
#if !defined(USE_SIMULATOR)
return FUNCTION_CAST<UnaryMathFunction>(buffer);
#else
fast_exp_mips_machine_code = buffer;
return &fast_exp_simulator;
#endif
}
#undef __
UnaryMathFunction CreateSqrtFunction() {
return &sqrt;
}
@ -72,6 +140,8 @@ void StubRuntimeCallHelper::AfterCall(MacroAssembler* masm) const {
// -------------------------------------------------------------------------
// Code generators
#define __ ACCESS_MASM(masm)
void ElementsTransitionGenerator::GenerateMapChangeElementsTransition(
MacroAssembler* masm) {
// ----------- S t a t e -------------
@ -446,6 +516,81 @@ void StringCharLoadGenerator::Generate(MacroAssembler* masm,
__ bind(&done);
}
static MemOperand ExpConstant(int index, Register base) {
return MemOperand(base, index * kDoubleSize);
}
void MathExpGenerator::EmitMathExp(MacroAssembler* masm,
DoubleRegister input,
DoubleRegister result,
DoubleRegister double_scratch1,
DoubleRegister double_scratch2,
Register temp1,
Register temp2,
Register temp3) {
ASSERT(!input.is(result));
ASSERT(!input.is(double_scratch1));
ASSERT(!input.is(double_scratch2));
ASSERT(!result.is(double_scratch1));
ASSERT(!result.is(double_scratch2));
ASSERT(!double_scratch1.is(double_scratch2));
ASSERT(!temp1.is(temp2));
ASSERT(!temp1.is(temp3));
ASSERT(!temp2.is(temp3));
ASSERT(ExternalReference::math_exp_constants(0).address() != NULL);
Label done;
__ li(temp3, Operand(ExternalReference::math_exp_constants(0)));
__ ldc1(double_scratch1, ExpConstant(0, temp3));
__ Move(result, kDoubleRegZero);
__ BranchF(&done, NULL, ge, double_scratch1, input);
__ ldc1(double_scratch2, ExpConstant(1, temp3));
__ ldc1(result, ExpConstant(2, temp3));
__ BranchF(&done, NULL, ge, input, double_scratch2);
__ ldc1(double_scratch1, ExpConstant(3, temp3));
__ ldc1(result, ExpConstant(4, temp3));
__ mul_d(double_scratch1, double_scratch1, input);
__ add_d(double_scratch1, double_scratch1, result);
__ Move(temp2, temp1, double_scratch1);
__ sub_d(double_scratch1, double_scratch1, result);
__ ldc1(result, ExpConstant(6, temp3));
__ ldc1(double_scratch2, ExpConstant(5, temp3));
__ mul_d(double_scratch1, double_scratch1, double_scratch2);
__ sub_d(double_scratch1, double_scratch1, input);
__ sub_d(result, result, double_scratch1);
__ mul_d(input, double_scratch1, double_scratch1);
__ mul_d(result, result, input);
__ srl(temp1, temp2, 11);
__ ldc1(double_scratch2, ExpConstant(7, temp3));
__ mul_d(result, result, double_scratch2);
__ sub_d(result, result, double_scratch1);
__ ldc1(double_scratch2, ExpConstant(8, temp3));
__ add_d(result, result, double_scratch2);
__ li(at, 0x7ff);
__ And(temp2, temp2, at);
__ Addu(temp1, temp1, Operand(0x3ff));
__ sll(temp1, temp1, 20);
// Must not call ExpConstant() after overwriting temp3!
__ li(temp3, Operand(ExternalReference::math_exp_log_table()));
__ sll(at, temp2, 3);
__ addu(at, at, temp3);
__ lw(at, MemOperand(at));
__ Addu(temp3, temp3, Operand(kPointerSize));
__ sll(temp2, temp2, 3);
__ addu(temp2, temp2, temp3);
__ lw(temp2, MemOperand(temp2));
__ Or(temp1, temp1, temp2);
__ Move(input, at, temp1);
__ mul_d(result, result, input);
__ bind(&done);
}
// nop(CODE_AGE_MARKER_NOP)
static const uint32_t kCodeAgePatchFirstInstruction = 0x00010180;

View File

@ -90,6 +90,22 @@ class StringCharLoadGenerator : public AllStatic {
DISALLOW_COPY_AND_ASSIGN(StringCharLoadGenerator);
};
class MathExpGenerator : public AllStatic {
public:
static void EmitMathExp(MacroAssembler* masm,
DoubleRegister input,
DoubleRegister result,
DoubleRegister double_scratch1,
DoubleRegister double_scratch2,
Register temp1,
Register temp2,
Register temp3);
private:
DISALLOW_COPY_AND_ASSIGN(MathExpGenerator);
};
} } // namespace v8::internal
#endif // V8_MIPS_CODEGEN_MIPS_H_

View File

@ -3500,6 +3500,20 @@ void LCodeGen::DoDeferredRandom(LRandom* instr) {
}
void LCodeGen::DoMathExp(LMathExp* instr) {
DoubleRegister input = ToDoubleRegister(instr->value());
DoubleRegister result = ToDoubleRegister(instr->result());
DoubleRegister double_scratch1 = ToDoubleRegister(instr->double_temp());
DoubleRegister double_scratch2 = double_scratch0();
Register temp1 = ToRegister(instr->temp1());
Register temp2 = ToRegister(instr->temp2());
MathExpGenerator::EmitMathExp(
masm(), input, result, double_scratch1, double_scratch2,
temp1, temp2, scratch0());
}
void LCodeGen::DoMathLog(LUnaryMathOperation* instr) {
ASSERT(ToDoubleRegister(instr->result()).is(f4));
TranscendentalCacheStub stub(TranscendentalCache::LOG,

View File

@ -297,6 +297,11 @@ void LUnaryMathOperation::PrintDataTo(StringStream* stream) {
}
void LMathExp::PrintDataTo(StringStream* stream) {
value()->PrintTo(stream);
}
void LLoadContextSlot::PrintDataTo(StringStream* stream) {
context()->PrintTo(stream);
stream->Add("[%d]", slot_index());
@ -1040,6 +1045,15 @@ LInstruction* LChunkBuilder::DoUnaryMathOperation(HUnaryMathOperation* instr) {
LOperand* input = UseFixedDouble(instr->value(), f4);
LUnaryMathOperation* result = new(zone()) LUnaryMathOperation(input, NULL);
return MarkAsCall(DefineFixedDouble(result, f4), instr);
} else if (op == kMathExp) {
ASSERT(instr->representation().IsDouble());
ASSERT(instr->value()->representation().IsDouble());
LOperand* input = UseTempRegister(instr->value());
LOperand* temp1 = TempRegister();
LOperand* temp2 = TempRegister();
LOperand* double_temp = FixedTemp(f6); // Chosen by fair dice roll.
LMathExp* result = new(zone()) LMathExp(input, double_temp, temp1, temp2);
return DefineAsRegister(result);
} else if (op == kMathPowHalf) {
// Input cannot be the same as the result.
// See lithium-codegen-mips.cc::DoMathPowHalf.

View File

@ -131,6 +131,7 @@ class LCodeGen;
V(LoadNamedFieldPolymorphic) \
V(LoadNamedGeneric) \
V(MapEnumLength) \
V(MathExp) \
V(MathMinMax) \
V(ModI) \
V(MulI) \
@ -641,6 +642,30 @@ class LUnaryMathOperation: public LTemplateInstruction<1, 1, 1> {
};
class LMathExp: public LTemplateInstruction<1, 1, 3> {
public:
LMathExp(LOperand* value,
LOperand* double_temp,
LOperand* temp1,
LOperand* temp2) {
inputs_[0] = value;
temps_[0] = temp1;
temps_[1] = temp2;
temps_[2] = double_temp;
ExternalReference::InitializeMathExpData();
}
LOperand* value() { return inputs_[0]; }
LOperand* temp1() { return temps_[0]; }
LOperand* temp2() { return temps_[1]; }
LOperand* double_temp() { return temps_[2]; }
DECLARE_CONCRETE_INSTRUCTION(MathExp, "math-exp")
virtual void PrintDataTo(StringStream* stream);
};
class LCmpObjectEqAndBranch: public LControlInstruction<2, 0> {
public:
LCmpObjectEqAndBranch(LOperand* left, LOperand* right) {

View File

@ -1016,6 +1016,13 @@ void Simulator::set_register(int reg, int32_t value) {
}
void Simulator::set_dw_register(int reg, const int* dbl) {
ASSERT((reg >= 0) && (reg < kNumSimuRegisters));
registers_[reg] = dbl[0];
registers_[reg + 1] = dbl[1];
}
void Simulator::set_fpu_register(int fpureg, int32_t value) {
ASSERT((fpureg >= 0) && (fpureg < kNumFPURegisters));
FPUregisters_[fpureg] = value;
@ -1045,6 +1052,19 @@ int32_t Simulator::get_register(int reg) const {
}
double Simulator::get_double_from_register_pair(int reg) {
ASSERT((reg >= 0) && (reg < kNumSimuRegisters) && ((reg % 2) == 0));
double dm_val = 0.0;
// Read the bits from the unsigned integer register_[] array
// into the double precision floating point value and return it.
char buffer[2 * sizeof(registers_[0])];
memcpy(buffer, &registers_[reg], 2 * sizeof(registers_[0]));
memcpy(&dm_val, buffer, 2 * sizeof(registers_[0]));
return(dm_val);
}
int32_t Simulator::get_fpu_register(int fpureg) const {
ASSERT((fpureg >= 0) && (fpureg < kNumFPURegisters));
return FPUregisters_[fpureg];
@ -2718,34 +2738,7 @@ void Simulator::Execute() {
}
int32_t Simulator::Call(byte* entry, int argument_count, ...) {
va_list parameters;
va_start(parameters, argument_count);
// Set up arguments.
// First four arguments passed in registers.
ASSERT(argument_count >= 4);
set_register(a0, va_arg(parameters, int32_t));
set_register(a1, va_arg(parameters, int32_t));
set_register(a2, va_arg(parameters, int32_t));
set_register(a3, va_arg(parameters, int32_t));
// Remaining arguments passed on stack.
int original_stack = get_register(sp);
// Compute position of stack on entry to generated code.
int entry_stack = (original_stack - (argument_count - 4) * sizeof(int32_t)
- kCArgsSlotsSize);
if (OS::ActivationFrameAlignment() != 0) {
entry_stack &= -OS::ActivationFrameAlignment();
}
// Store remaining arguments on stack, from low to high memory.
intptr_t* stack_argument = reinterpret_cast<intptr_t*>(entry_stack);
for (int i = 4; i < argument_count; i++) {
stack_argument[i - 4 + kCArgSlotCount] = va_arg(parameters, int32_t);
}
va_end(parameters);
set_register(sp, entry_stack);
void Simulator::CallInternal(byte* entry) {
// Prepare to execute the code at entry.
set_register(pc, reinterpret_cast<int32_t>(entry));
// Put down marker for end of simulation. The simulator will stop simulation
@ -2809,6 +2802,38 @@ int32_t Simulator::Call(byte* entry, int argument_count, ...) {
set_register(gp, gp_val);
set_register(sp, sp_val);
set_register(fp, fp_val);
}
int32_t Simulator::Call(byte* entry, int argument_count, ...) {
va_list parameters;
va_start(parameters, argument_count);
// Set up arguments.
// First four arguments passed in registers.
ASSERT(argument_count >= 4);
set_register(a0, va_arg(parameters, int32_t));
set_register(a1, va_arg(parameters, int32_t));
set_register(a2, va_arg(parameters, int32_t));
set_register(a3, va_arg(parameters, int32_t));
// Remaining arguments passed on stack.
int original_stack = get_register(sp);
// Compute position of stack on entry to generated code.
int entry_stack = (original_stack - (argument_count - 4) * sizeof(int32_t)
- kCArgsSlotsSize);
if (OS::ActivationFrameAlignment() != 0) {
entry_stack &= -OS::ActivationFrameAlignment();
}
// Store remaining arguments on stack, from low to high memory.
intptr_t* stack_argument = reinterpret_cast<intptr_t*>(entry_stack);
for (int i = 4; i < argument_count; i++) {
stack_argument[i - 4 + kCArgSlotCount] = va_arg(parameters, int32_t);
}
va_end(parameters);
set_register(sp, entry_stack);
CallInternal(entry);
// Pop stack passed arguments.
CHECK_EQ(entry_stack, get_register(sp));
@ -2819,6 +2844,27 @@ int32_t Simulator::Call(byte* entry, int argument_count, ...) {
}
double Simulator::CallFP(byte* entry, double d0, double d1) {
if (!IsMipsSoftFloatABI) {
set_fpu_register_double(f12, d0);
set_fpu_register_double(f14, d1);
} else {
int buffer[2];
ASSERT(sizeof(buffer[0]) * 2 == sizeof(d0));
memcpy(buffer, &d0, sizeof(d0));
set_dw_register(a0, buffer);
memcpy(buffer, &d1, sizeof(d1));
set_dw_register(a2, buffer);
}
CallInternal(entry);
if (!IsMipsSoftFloatABI) {
return get_fpu_register_double(f0);
} else {
return get_double_from_register_pair(v0);
}
}
uintptr_t Simulator::PushAddress(uintptr_t address) {
int new_sp = get_register(sp) - sizeof(uintptr_t);
uintptr_t* stack_slot = reinterpret_cast<uintptr_t*>(new_sp);

View File

@ -184,7 +184,9 @@ class Simulator {
// architecture specification and is off by a 8 from the currently executing
// instruction.
void set_register(int reg, int32_t value);
void set_dw_register(int dreg, const int* dbl);
int32_t get_register(int reg) const;
double get_double_from_register_pair(int reg);
// Same for FPURegisters.
void set_fpu_register(int fpureg, int32_t value);
void set_fpu_register_float(int fpureg, float value);
@ -214,6 +216,8 @@ class Simulator {
// generated RegExp code with 7 parameters. This is a convenience function,
// which sets up the simulator state and grabs the result on return.
int32_t Call(byte* entry, int argument_count, ...);
// Alternative: call a 2-argument double function.
double CallFP(byte* entry, double d0, double d1);
// Push an address onto the JS stack.
uintptr_t PushAddress(uintptr_t address);
@ -353,6 +357,7 @@ class Simulator {
void GetFpArgs(double* x, int32_t* y);
void SetFpResult(const double& result);
void CallInternal(byte* entry);
// Architecture state.
// Registers.