[x64] Replace addsd, subsd, mulsd, divsd with AVX versions under AVX.

BUG=v8:4406
LOG=N

Review URL: https://codereview.chromium.org/1416053010

Cr-Commit-Position: refs/heads/master@{#31498}
This commit is contained in:
alph 2015-10-23 01:25:24 -07:00 committed by Commit bot
parent f5c227b27d
commit a96b87fad9
5 changed files with 60 additions and 94 deletions

View File

@ -3565,8 +3565,8 @@ void LCodeGen::DoMathAbs(LMathAbs* instr) {
XMMRegister scratch = double_scratch0();
XMMRegister input_reg = ToDoubleRegister(instr->value());
__ Xorpd(scratch, scratch);
__ subsd(scratch, input_reg);
__ andps(input_reg, scratch);
__ Subsd(scratch, input_reg);
__ Andpd(input_reg, scratch);
} else if (r.IsInteger32()) {
EmitIntegerMathAbs(instr);
} else if (r.IsSmi()) {
@ -3658,7 +3658,7 @@ void LCodeGen::DoMathRound(LMathRound* instr) {
__ j(above, &below_one_half, Label::kNear);
// CVTTSD2SI rounds towards zero, since 0.5 <= x, we use floor(0.5 + x).
__ addsd(xmm_scratch, input_reg);
__ Addsd(xmm_scratch, input_reg);
__ Cvttsd2si(output_reg, xmm_scratch);
// Overflow is signalled with minint.
__ cmpl(output_reg, Immediate(0x1));
@ -3674,7 +3674,7 @@ void LCodeGen::DoMathRound(LMathRound* instr) {
// CVTTSD2SI rounds towards zero, we use ceil(x - (-0.5)) and then
// compare and compensate.
__ Movapd(input_temp, input_reg); // Do not alter input_reg.
__ subsd(input_temp, xmm_scratch);
__ Subsd(input_temp, xmm_scratch);
__ Cvttsd2si(output_reg, input_temp);
// Catch minint due to overflow, and to prevent overflow when compensating.
__ cmpl(output_reg, Immediate(0x1));
@ -3740,13 +3740,13 @@ void LCodeGen::DoMathPowHalf(LMathPowHalf* instr) {
__ j(carry, &sqrt, Label::kNear);
// If input is -Infinity, return Infinity.
__ Xorpd(input_reg, input_reg);
__ subsd(input_reg, xmm_scratch);
__ Subsd(input_reg, xmm_scratch);
__ jmp(&done, Label::kNear);
// Square root.
__ bind(&sqrt);
__ Xorpd(xmm_scratch, xmm_scratch);
__ addsd(input_reg, xmm_scratch); // Convert -0 to +0.
__ Addsd(input_reg, xmm_scratch); // Convert -0 to +0.
__ Sqrtsd(input_reg, input_reg);
__ bind(&done);
}
@ -4259,7 +4259,7 @@ void LCodeGen::DoStoreKeyedFixedDoubleArray(LStoreKeyed* instr) {
XMMRegister xmm_scratch = double_scratch0();
// Turn potential sNaN value into qNaN.
__ Xorpd(xmm_scratch, xmm_scratch);
__ subsd(value, xmm_scratch);
__ Subsd(value, xmm_scratch);
}
Operand double_store_operand = BuildFastArrayOperand(

View File

@ -356,20 +356,20 @@ void MathPowStub::Generate(MacroAssembler* masm) {
// Set result to Infinity in the special case.
__ Xorpd(double_result, double_result);
__ subsd(double_result, double_scratch);
__ Subsd(double_result, double_scratch);
__ jmp(&done);
__ bind(&continue_sqrt);
// sqrtsd returns -0 when input is -0. ECMA spec requires +0.
__ Xorpd(double_scratch, double_scratch);
__ addsd(double_scratch, double_base); // Convert -0 to 0.
__ Addsd(double_scratch, double_base); // Convert -0 to 0.
__ Sqrtsd(double_result, double_scratch);
__ jmp(&done);
// Test for -0.5.
__ bind(&not_plus_half);
// Load double_scratch with -0.5 by substracting 1.
__ subsd(double_scratch, double_result);
__ Subsd(double_scratch, double_result);
// Already ruled out NaNs for exponent.
__ Ucomisd(double_scratch, double_exponent);
__ j(not_equal, &fast_power, Label::kNear);
@ -393,9 +393,9 @@ void MathPowStub::Generate(MacroAssembler* masm) {
__ bind(&continue_rsqrt);
// sqrtsd returns -0 when input is -0. ECMA spec requires +0.
__ Xorpd(double_exponent, double_exponent);
__ addsd(double_exponent, double_base); // Convert -0 to +0.
__ Addsd(double_exponent, double_base); // Convert -0 to +0.
__ Sqrtsd(double_exponent, double_exponent);
__ divsd(double_result, double_exponent);
__ Divsd(double_result, double_exponent);
__ jmp(&done);
}
@ -465,16 +465,16 @@ void MathPowStub::Generate(MacroAssembler* masm) {
__ bind(&while_true);
__ shrl(scratch, Immediate(1));
__ mulsd(double_scratch, double_scratch);
__ Mulsd(double_scratch, double_scratch);
__ j(above, &while_true, Label::kNear);
__ mulsd(double_result, double_scratch);
__ Mulsd(double_result, double_scratch);
__ j(not_zero, &while_true);
__ bind(&while_false);
// If the exponent is negative, return 1/result.
__ testl(exponent, exponent);
__ j(greater, &done);
__ divsd(double_scratch2, double_result);
__ Divsd(double_scratch2, double_result);
__ Movsd(double_result, double_scratch2);
// Test whether result is zero. Bail out to check for subnormal result.
// Due to subnormals, x^-y == (1/x)^y does not hold in all cases.

View File

@ -612,29 +612,29 @@ void MathExpGenerator::EmitMathExp(MacroAssembler* masm,
__ j(above_equal, &done);
__ Movsd(double_scratch, Operand(kScratchRegister, 3 * kDoubleSize));
__ Movsd(result, Operand(kScratchRegister, 4 * kDoubleSize));
__ mulsd(double_scratch, input);
__ addsd(double_scratch, result);
__ Mulsd(double_scratch, input);
__ Addsd(double_scratch, result);
__ Movq(temp2, double_scratch);
__ subsd(double_scratch, result);
__ Subsd(double_scratch, result);
__ Movsd(result, Operand(kScratchRegister, 6 * kDoubleSize));
__ leaq(temp1, Operand(temp2, 0x1ff800));
__ andq(temp2, Immediate(0x7ff));
__ shrq(temp1, Immediate(11));
__ mulsd(double_scratch, Operand(kScratchRegister, 5 * kDoubleSize));
__ Mulsd(double_scratch, Operand(kScratchRegister, 5 * kDoubleSize));
__ Move(kScratchRegister, ExternalReference::math_exp_log_table());
__ shlq(temp1, Immediate(52));
__ orq(temp1, Operand(kScratchRegister, temp2, times_8, 0));
__ Move(kScratchRegister, ExternalReference::math_exp_constants(0));
__ subsd(double_scratch, input);
__ Subsd(double_scratch, input);
__ Movsd(input, double_scratch);
__ subsd(result, double_scratch);
__ mulsd(input, double_scratch);
__ mulsd(result, input);
__ Subsd(result, double_scratch);
__ Mulsd(input, double_scratch);
__ Mulsd(result, input);
__ Movq(input, temp1);
__ mulsd(result, Operand(kScratchRegister, 7 * kDoubleSize));
__ subsd(result, double_scratch);
__ addsd(result, Operand(kScratchRegister, 8 * kDoubleSize));
__ mulsd(result, input);
__ Mulsd(result, Operand(kScratchRegister, 7 * kDoubleSize));
__ Subsd(result, double_scratch);
__ Addsd(result, Operand(kScratchRegister, 8 * kDoubleSize));
__ Mulsd(result, input);
__ bind(&done);
}

View File

@ -2740,66 +2740,6 @@ void MacroAssembler::Ucomisd(XMMRegister src1, const Operand& src2) {
}
void MacroAssembler::Andpd(XMMRegister dst, XMMRegister src) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vandpd(dst, dst, src);
} else {
andpd(dst, src);
}
}
void MacroAssembler::Orpd(XMMRegister dst, XMMRegister src) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vorpd(dst, dst, src);
} else {
orpd(dst, src);
}
}
void MacroAssembler::Xorpd(XMMRegister dst, XMMRegister src) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vxorpd(dst, dst, src);
} else {
xorpd(dst, src);
}
}
void MacroAssembler::Pcmpeqd(XMMRegister dst, XMMRegister src) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vpcmpeqd(dst, dst, src);
} else {
pcmpeqd(dst, src);
}
}
void MacroAssembler::Psllq(XMMRegister dst, byte imm8) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vpsllq(dst, dst, imm8);
} else {
psllq(dst, imm8);
}
}
void MacroAssembler::Psrlq(XMMRegister dst, byte imm8) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vpsrlq(dst, dst, imm8);
} else {
psrlq(dst, imm8);
}
}
void MacroAssembler::Cmp(Register dst, Handle<Object> source) {
AllowDeferredHandleDereference smi_check;
if (source->IsSmi()) {

View File

@ -904,6 +904,39 @@ class MacroAssembler: public Assembler {
void Move(XMMRegister dst, float src) { Move(dst, bit_cast<uint32_t>(src)); }
void Move(XMMRegister dst, double src) { Move(dst, bit_cast<uint64_t>(src)); }
#define AVX_OP2_WITH_TYPE(macro_name, name, src_type) \
void macro_name(XMMRegister dst, src_type src) { \
if (CpuFeatures::IsSupported(AVX)) { \
CpuFeatureScope scope(this, AVX); \
v##name(dst, dst, src); \
} else { \
name(dst, src); \
} \
}
#define AVX_OP2_X(macro_name, name) \
AVX_OP2_WITH_TYPE(macro_name, name, XMMRegister)
#define AVX_OP2_O(macro_name, name) \
AVX_OP2_WITH_TYPE(macro_name, name, const Operand&)
#define AVX_OP2_XO(macro_name, name) \
AVX_OP2_X(macro_name, name) \
AVX_OP2_O(macro_name, name)
AVX_OP2_XO(Addsd, addsd)
AVX_OP2_XO(Subsd, subsd)
AVX_OP2_XO(Mulsd, mulsd)
AVX_OP2_XO(Divsd, divsd)
AVX_OP2_X(Andpd, andpd)
AVX_OP2_X(Orpd, orpd)
AVX_OP2_X(Xorpd, xorpd)
AVX_OP2_X(Pcmpeqd, pcmpeqd)
AVX_OP2_WITH_TYPE(Psllq, psllq, byte)
AVX_OP2_WITH_TYPE(Psrlq, psrlq, byte)
#undef AVX_OP2_O
#undef AVX_OP2_X
#undef AVX_OP2_XO
#undef AVX_OP2_WITH_TYPE
void Movsd(XMMRegister dst, XMMRegister src);
void Movsd(XMMRegister dst, const Operand& src);
void Movsd(const Operand& dst, XMMRegister src);
@ -930,13 +963,6 @@ class MacroAssembler: public Assembler {
void Ucomisd(XMMRegister src1, XMMRegister src2);
void Ucomisd(XMMRegister src1, const Operand& src2);
void Andpd(XMMRegister dst, XMMRegister src);
void Orpd(XMMRegister dst, XMMRegister src);
void Xorpd(XMMRegister dst, XMMRegister src);
void Pcmpeqd(XMMRegister dst, XMMRegister src);
void Psllq(XMMRegister dst, byte imm8);
void Psrlq(XMMRegister dst, byte imm8);
// Control Flow
void Jump(Address destination, RelocInfo::Mode rmode);
void Jump(ExternalReference ext);