[x64] Replace addsd, subsd, mulsd, divsd with AVX versions under AVX.
BUG=v8:4406 LOG=N Review URL: https://codereview.chromium.org/1416053010 Cr-Commit-Position: refs/heads/master@{#31498}
This commit is contained in:
parent
f5c227b27d
commit
a96b87fad9
@ -3565,8 +3565,8 @@ void LCodeGen::DoMathAbs(LMathAbs* instr) {
|
||||
XMMRegister scratch = double_scratch0();
|
||||
XMMRegister input_reg = ToDoubleRegister(instr->value());
|
||||
__ Xorpd(scratch, scratch);
|
||||
__ subsd(scratch, input_reg);
|
||||
__ andps(input_reg, scratch);
|
||||
__ Subsd(scratch, input_reg);
|
||||
__ Andpd(input_reg, scratch);
|
||||
} else if (r.IsInteger32()) {
|
||||
EmitIntegerMathAbs(instr);
|
||||
} else if (r.IsSmi()) {
|
||||
@ -3658,7 +3658,7 @@ void LCodeGen::DoMathRound(LMathRound* instr) {
|
||||
__ j(above, &below_one_half, Label::kNear);
|
||||
|
||||
// CVTTSD2SI rounds towards zero, since 0.5 <= x, we use floor(0.5 + x).
|
||||
__ addsd(xmm_scratch, input_reg);
|
||||
__ Addsd(xmm_scratch, input_reg);
|
||||
__ Cvttsd2si(output_reg, xmm_scratch);
|
||||
// Overflow is signalled with minint.
|
||||
__ cmpl(output_reg, Immediate(0x1));
|
||||
@ -3674,7 +3674,7 @@ void LCodeGen::DoMathRound(LMathRound* instr) {
|
||||
// CVTTSD2SI rounds towards zero, we use ceil(x - (-0.5)) and then
|
||||
// compare and compensate.
|
||||
__ Movapd(input_temp, input_reg); // Do not alter input_reg.
|
||||
__ subsd(input_temp, xmm_scratch);
|
||||
__ Subsd(input_temp, xmm_scratch);
|
||||
__ Cvttsd2si(output_reg, input_temp);
|
||||
// Catch minint due to overflow, and to prevent overflow when compensating.
|
||||
__ cmpl(output_reg, Immediate(0x1));
|
||||
@ -3740,13 +3740,13 @@ void LCodeGen::DoMathPowHalf(LMathPowHalf* instr) {
|
||||
__ j(carry, &sqrt, Label::kNear);
|
||||
// If input is -Infinity, return Infinity.
|
||||
__ Xorpd(input_reg, input_reg);
|
||||
__ subsd(input_reg, xmm_scratch);
|
||||
__ Subsd(input_reg, xmm_scratch);
|
||||
__ jmp(&done, Label::kNear);
|
||||
|
||||
// Square root.
|
||||
__ bind(&sqrt);
|
||||
__ Xorpd(xmm_scratch, xmm_scratch);
|
||||
__ addsd(input_reg, xmm_scratch); // Convert -0 to +0.
|
||||
__ Addsd(input_reg, xmm_scratch); // Convert -0 to +0.
|
||||
__ Sqrtsd(input_reg, input_reg);
|
||||
__ bind(&done);
|
||||
}
|
||||
@ -4259,7 +4259,7 @@ void LCodeGen::DoStoreKeyedFixedDoubleArray(LStoreKeyed* instr) {
|
||||
XMMRegister xmm_scratch = double_scratch0();
|
||||
// Turn potential sNaN value into qNaN.
|
||||
__ Xorpd(xmm_scratch, xmm_scratch);
|
||||
__ subsd(value, xmm_scratch);
|
||||
__ Subsd(value, xmm_scratch);
|
||||
}
|
||||
|
||||
Operand double_store_operand = BuildFastArrayOperand(
|
||||
|
@ -356,20 +356,20 @@ void MathPowStub::Generate(MacroAssembler* masm) {
|
||||
|
||||
// Set result to Infinity in the special case.
|
||||
__ Xorpd(double_result, double_result);
|
||||
__ subsd(double_result, double_scratch);
|
||||
__ Subsd(double_result, double_scratch);
|
||||
__ jmp(&done);
|
||||
|
||||
__ bind(&continue_sqrt);
|
||||
// sqrtsd returns -0 when input is -0. ECMA spec requires +0.
|
||||
__ Xorpd(double_scratch, double_scratch);
|
||||
__ addsd(double_scratch, double_base); // Convert -0 to 0.
|
||||
__ Addsd(double_scratch, double_base); // Convert -0 to 0.
|
||||
__ Sqrtsd(double_result, double_scratch);
|
||||
__ jmp(&done);
|
||||
|
||||
// Test for -0.5.
|
||||
__ bind(¬_plus_half);
|
||||
// Load double_scratch with -0.5 by substracting 1.
|
||||
__ subsd(double_scratch, double_result);
|
||||
__ Subsd(double_scratch, double_result);
|
||||
// Already ruled out NaNs for exponent.
|
||||
__ Ucomisd(double_scratch, double_exponent);
|
||||
__ j(not_equal, &fast_power, Label::kNear);
|
||||
@ -393,9 +393,9 @@ void MathPowStub::Generate(MacroAssembler* masm) {
|
||||
__ bind(&continue_rsqrt);
|
||||
// sqrtsd returns -0 when input is -0. ECMA spec requires +0.
|
||||
__ Xorpd(double_exponent, double_exponent);
|
||||
__ addsd(double_exponent, double_base); // Convert -0 to +0.
|
||||
__ Addsd(double_exponent, double_base); // Convert -0 to +0.
|
||||
__ Sqrtsd(double_exponent, double_exponent);
|
||||
__ divsd(double_result, double_exponent);
|
||||
__ Divsd(double_result, double_exponent);
|
||||
__ jmp(&done);
|
||||
}
|
||||
|
||||
@ -465,16 +465,16 @@ void MathPowStub::Generate(MacroAssembler* masm) {
|
||||
|
||||
__ bind(&while_true);
|
||||
__ shrl(scratch, Immediate(1));
|
||||
__ mulsd(double_scratch, double_scratch);
|
||||
__ Mulsd(double_scratch, double_scratch);
|
||||
__ j(above, &while_true, Label::kNear);
|
||||
__ mulsd(double_result, double_scratch);
|
||||
__ Mulsd(double_result, double_scratch);
|
||||
__ j(not_zero, &while_true);
|
||||
|
||||
__ bind(&while_false);
|
||||
// If the exponent is negative, return 1/result.
|
||||
__ testl(exponent, exponent);
|
||||
__ j(greater, &done);
|
||||
__ divsd(double_scratch2, double_result);
|
||||
__ Divsd(double_scratch2, double_result);
|
||||
__ Movsd(double_result, double_scratch2);
|
||||
// Test whether result is zero. Bail out to check for subnormal result.
|
||||
// Due to subnormals, x^-y == (1/x)^y does not hold in all cases.
|
||||
|
@ -612,29 +612,29 @@ void MathExpGenerator::EmitMathExp(MacroAssembler* masm,
|
||||
__ j(above_equal, &done);
|
||||
__ Movsd(double_scratch, Operand(kScratchRegister, 3 * kDoubleSize));
|
||||
__ Movsd(result, Operand(kScratchRegister, 4 * kDoubleSize));
|
||||
__ mulsd(double_scratch, input);
|
||||
__ addsd(double_scratch, result);
|
||||
__ Mulsd(double_scratch, input);
|
||||
__ Addsd(double_scratch, result);
|
||||
__ Movq(temp2, double_scratch);
|
||||
__ subsd(double_scratch, result);
|
||||
__ Subsd(double_scratch, result);
|
||||
__ Movsd(result, Operand(kScratchRegister, 6 * kDoubleSize));
|
||||
__ leaq(temp1, Operand(temp2, 0x1ff800));
|
||||
__ andq(temp2, Immediate(0x7ff));
|
||||
__ shrq(temp1, Immediate(11));
|
||||
__ mulsd(double_scratch, Operand(kScratchRegister, 5 * kDoubleSize));
|
||||
__ Mulsd(double_scratch, Operand(kScratchRegister, 5 * kDoubleSize));
|
||||
__ Move(kScratchRegister, ExternalReference::math_exp_log_table());
|
||||
__ shlq(temp1, Immediate(52));
|
||||
__ orq(temp1, Operand(kScratchRegister, temp2, times_8, 0));
|
||||
__ Move(kScratchRegister, ExternalReference::math_exp_constants(0));
|
||||
__ subsd(double_scratch, input);
|
||||
__ Subsd(double_scratch, input);
|
||||
__ Movsd(input, double_scratch);
|
||||
__ subsd(result, double_scratch);
|
||||
__ mulsd(input, double_scratch);
|
||||
__ mulsd(result, input);
|
||||
__ Subsd(result, double_scratch);
|
||||
__ Mulsd(input, double_scratch);
|
||||
__ Mulsd(result, input);
|
||||
__ Movq(input, temp1);
|
||||
__ mulsd(result, Operand(kScratchRegister, 7 * kDoubleSize));
|
||||
__ subsd(result, double_scratch);
|
||||
__ addsd(result, Operand(kScratchRegister, 8 * kDoubleSize));
|
||||
__ mulsd(result, input);
|
||||
__ Mulsd(result, Operand(kScratchRegister, 7 * kDoubleSize));
|
||||
__ Subsd(result, double_scratch);
|
||||
__ Addsd(result, Operand(kScratchRegister, 8 * kDoubleSize));
|
||||
__ Mulsd(result, input);
|
||||
|
||||
__ bind(&done);
|
||||
}
|
||||
|
@ -2740,66 +2740,6 @@ void MacroAssembler::Ucomisd(XMMRegister src1, const Operand& src2) {
|
||||
}
|
||||
|
||||
|
||||
void MacroAssembler::Andpd(XMMRegister dst, XMMRegister src) {
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
CpuFeatureScope scope(this, AVX);
|
||||
vandpd(dst, dst, src);
|
||||
} else {
|
||||
andpd(dst, src);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void MacroAssembler::Orpd(XMMRegister dst, XMMRegister src) {
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
CpuFeatureScope scope(this, AVX);
|
||||
vorpd(dst, dst, src);
|
||||
} else {
|
||||
orpd(dst, src);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void MacroAssembler::Xorpd(XMMRegister dst, XMMRegister src) {
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
CpuFeatureScope scope(this, AVX);
|
||||
vxorpd(dst, dst, src);
|
||||
} else {
|
||||
xorpd(dst, src);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void MacroAssembler::Pcmpeqd(XMMRegister dst, XMMRegister src) {
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
CpuFeatureScope scope(this, AVX);
|
||||
vpcmpeqd(dst, dst, src);
|
||||
} else {
|
||||
pcmpeqd(dst, src);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void MacroAssembler::Psllq(XMMRegister dst, byte imm8) {
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
CpuFeatureScope scope(this, AVX);
|
||||
vpsllq(dst, dst, imm8);
|
||||
} else {
|
||||
psllq(dst, imm8);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void MacroAssembler::Psrlq(XMMRegister dst, byte imm8) {
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
CpuFeatureScope scope(this, AVX);
|
||||
vpsrlq(dst, dst, imm8);
|
||||
} else {
|
||||
psrlq(dst, imm8);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void MacroAssembler::Cmp(Register dst, Handle<Object> source) {
|
||||
AllowDeferredHandleDereference smi_check;
|
||||
if (source->IsSmi()) {
|
||||
|
@ -904,6 +904,39 @@ class MacroAssembler: public Assembler {
|
||||
void Move(XMMRegister dst, float src) { Move(dst, bit_cast<uint32_t>(src)); }
|
||||
void Move(XMMRegister dst, double src) { Move(dst, bit_cast<uint64_t>(src)); }
|
||||
|
||||
#define AVX_OP2_WITH_TYPE(macro_name, name, src_type) \
|
||||
void macro_name(XMMRegister dst, src_type src) { \
|
||||
if (CpuFeatures::IsSupported(AVX)) { \
|
||||
CpuFeatureScope scope(this, AVX); \
|
||||
v##name(dst, dst, src); \
|
||||
} else { \
|
||||
name(dst, src); \
|
||||
} \
|
||||
}
|
||||
#define AVX_OP2_X(macro_name, name) \
|
||||
AVX_OP2_WITH_TYPE(macro_name, name, XMMRegister)
|
||||
#define AVX_OP2_O(macro_name, name) \
|
||||
AVX_OP2_WITH_TYPE(macro_name, name, const Operand&)
|
||||
#define AVX_OP2_XO(macro_name, name) \
|
||||
AVX_OP2_X(macro_name, name) \
|
||||
AVX_OP2_O(macro_name, name)
|
||||
|
||||
AVX_OP2_XO(Addsd, addsd)
|
||||
AVX_OP2_XO(Subsd, subsd)
|
||||
AVX_OP2_XO(Mulsd, mulsd)
|
||||
AVX_OP2_XO(Divsd, divsd)
|
||||
AVX_OP2_X(Andpd, andpd)
|
||||
AVX_OP2_X(Orpd, orpd)
|
||||
AVX_OP2_X(Xorpd, xorpd)
|
||||
AVX_OP2_X(Pcmpeqd, pcmpeqd)
|
||||
AVX_OP2_WITH_TYPE(Psllq, psllq, byte)
|
||||
AVX_OP2_WITH_TYPE(Psrlq, psrlq, byte)
|
||||
|
||||
#undef AVX_OP2_O
|
||||
#undef AVX_OP2_X
|
||||
#undef AVX_OP2_XO
|
||||
#undef AVX_OP2_WITH_TYPE
|
||||
|
||||
void Movsd(XMMRegister dst, XMMRegister src);
|
||||
void Movsd(XMMRegister dst, const Operand& src);
|
||||
void Movsd(const Operand& dst, XMMRegister src);
|
||||
@ -930,13 +963,6 @@ class MacroAssembler: public Assembler {
|
||||
void Ucomisd(XMMRegister src1, XMMRegister src2);
|
||||
void Ucomisd(XMMRegister src1, const Operand& src2);
|
||||
|
||||
void Andpd(XMMRegister dst, XMMRegister src);
|
||||
void Orpd(XMMRegister dst, XMMRegister src);
|
||||
void Xorpd(XMMRegister dst, XMMRegister src);
|
||||
void Pcmpeqd(XMMRegister dst, XMMRegister src);
|
||||
void Psllq(XMMRegister dst, byte imm8);
|
||||
void Psrlq(XMMRegister dst, byte imm8);
|
||||
|
||||
// Control Flow
|
||||
void Jump(Address destination, RelocInfo::Mode rmode);
|
||||
void Jump(ExternalReference ext);
|
||||
|
Loading…
Reference in New Issue
Block a user