Use xorps to break the cvtsi2sd unnecessary dependence due to its partially written

BUG=
R=yangguo@chromium.org

Review URL: https://codereview.chromium.org/23654026

Patch from Weiliang Lin <weiliang.lin2@gmail.com>.

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@16702 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
yangguo@chromium.org 2013-09-13 07:59:48 +00:00
parent 8650fae525
commit e4c179e2d3
12 changed files with 80 additions and 50 deletions

View File

@ -984,7 +984,7 @@ static void BinaryOpStub_GenerateSmiCode(
ASSERT_EQ(Token::SHL, op);
if (CpuFeatures::IsSupported(SSE2)) {
CpuFeatureScope use_sse2(masm, SSE2);
__ cvtsi2sd(xmm0, left);
__ Cvtsi2sd(xmm0, left);
__ movdbl(FieldOperand(eax, HeapNumber::kValueOffset), xmm0);
} else {
__ mov(Operand(esp, 1 * kPointerSize), left);
@ -1370,7 +1370,7 @@ void BinaryOpStub::GenerateInt32Stub(MacroAssembler* masm) {
// Store the result in the HeapNumber and return.
if (CpuFeatures::IsSupported(SSE2)) {
CpuFeatureScope use_sse2(masm, SSE2);
__ cvtsi2sd(xmm0, ebx);
__ Cvtsi2sd(xmm0, ebx);
__ movdbl(FieldOperand(eax, HeapNumber::kValueOffset), xmm0);
} else {
__ mov(Operand(esp, 1 * kPointerSize), ebx);
@ -1594,7 +1594,7 @@ void BinaryOpStub::GenerateNumberStub(MacroAssembler* masm) {
// Store the result in the HeapNumber and return.
if (CpuFeatures::IsSupported(SSE2)) {
CpuFeatureScope use_sse2(masm, SSE2);
__ cvtsi2sd(xmm0, ebx);
__ Cvtsi2sd(xmm0, ebx);
__ movdbl(FieldOperand(eax, HeapNumber::kValueOffset), xmm0);
} else {
__ mov(Operand(esp, 1 * kPointerSize), ebx);
@ -1782,7 +1782,7 @@ void BinaryOpStub::GenerateGeneric(MacroAssembler* masm) {
// Store the result in the HeapNumber and return.
if (CpuFeatures::IsSupported(SSE2)) {
CpuFeatureScope use_sse2(masm, SSE2);
__ cvtsi2sd(xmm0, ebx);
__ Cvtsi2sd(xmm0, ebx);
__ movdbl(FieldOperand(eax, HeapNumber::kValueOffset), xmm0);
} else {
__ mov(Operand(esp, 1 * kPointerSize), ebx);
@ -2329,12 +2329,12 @@ void FloatingPointHelper::LoadSSE2Operands(MacroAssembler* masm,
__ jmp(not_numbers); // Argument in eax is not a number.
__ bind(&load_smi_edx);
__ SmiUntag(edx); // Untag smi before converting to float.
__ cvtsi2sd(xmm0, edx);
__ Cvtsi2sd(xmm0, edx);
__ SmiTag(edx); // Retag smi for heap number overwriting test.
__ jmp(&load_eax);
__ bind(&load_smi_eax);
__ SmiUntag(eax); // Untag smi before converting to float.
__ cvtsi2sd(xmm1, eax);
__ Cvtsi2sd(xmm1, eax);
__ SmiTag(eax); // Retag smi for heap number overwriting test.
__ jmp(&done, Label::kNear);
__ bind(&load_float_eax);
@ -2350,11 +2350,11 @@ void FloatingPointHelper::LoadSSE2Smis(MacroAssembler* masm,
__ mov(scratch, left);
ASSERT(!scratch.is(right)); // We're about to clobber scratch.
__ SmiUntag(scratch);
__ cvtsi2sd(xmm0, scratch);
__ Cvtsi2sd(xmm0, scratch);
__ mov(scratch, right);
__ SmiUntag(scratch);
__ cvtsi2sd(xmm1, scratch);
__ Cvtsi2sd(xmm1, scratch);
}
@ -2365,7 +2365,7 @@ void FloatingPointHelper::CheckSSE2OperandIsInt32(MacroAssembler* masm,
Register scratch,
XMMRegister xmm_scratch) {
__ cvttsd2si(int32_result, Operand(operand));
__ cvtsi2sd(xmm_scratch, int32_result);
__ Cvtsi2sd(xmm_scratch, int32_result);
__ pcmpeqd(xmm_scratch, operand);
__ movmskps(scratch, xmm_scratch);
// Two least significant bits should be both set.
@ -2470,7 +2470,7 @@ void MathPowStub::Generate(MacroAssembler* masm) {
// Save 1 in double_result - we need this several times later on.
__ mov(scratch, Immediate(1));
__ cvtsi2sd(double_result, scratch);
__ Cvtsi2sd(double_result, scratch);
if (exponent_type_ == ON_STACK) {
Label base_is_smi, unpack_exponent;
@ -2490,7 +2490,7 @@ void MathPowStub::Generate(MacroAssembler* masm) {
__ bind(&base_is_smi);
__ SmiUntag(base);
__ cvtsi2sd(double_base, base);
__ Cvtsi2sd(double_base, base);
__ bind(&unpack_exponent);
__ JumpIfNotSmi(exponent, &exponent_not_smi, Label::kNear);
@ -2683,7 +2683,7 @@ void MathPowStub::Generate(MacroAssembler* masm) {
// and may not have contained the exponent value in the first place when the
// exponent is a smi. We reset it with exponent value before bailing out.
__ j(not_equal, &done);
__ cvtsi2sd(double_exponent, exponent);
__ Cvtsi2sd(double_exponent, exponent);
// Returning or bailing out.
Counters* counters = masm->isolate()->counters();
@ -6258,7 +6258,7 @@ void ICCompareStub::GenerateNumbers(MacroAssembler* masm) {
__ bind(&right_smi);
__ mov(ecx, eax); // Can't clobber eax because we can still jump away.
__ SmiUntag(ecx);
__ cvtsi2sd(xmm1, ecx);
__ Cvtsi2sd(xmm1, ecx);
__ bind(&left);
__ JumpIfSmi(edx, &left_smi, Label::kNear);
@ -6270,7 +6270,7 @@ void ICCompareStub::GenerateNumbers(MacroAssembler* masm) {
__ bind(&left_smi);
__ mov(ecx, edx); // Can't clobber edx because we can still jump away.
__ SmiUntag(ecx);
__ cvtsi2sd(xmm0, ecx);
__ Cvtsi2sd(xmm0, ecx);
__ bind(&done);
// Compare operands.

View File

@ -768,7 +768,7 @@ void ElementsTransitionGenerator::GenerateSmiToDouble(
__ SmiUntag(ebx);
if (CpuFeatures::IsSupported(SSE2)) {
CpuFeatureScope fscope(masm, SSE2);
__ cvtsi2sd(xmm0, ebx);
__ Cvtsi2sd(xmm0, ebx);
__ movdbl(FieldOperand(eax, edi, times_4, FixedDoubleArray::kHeaderSize),
xmm0);
} else {

View File

@ -3977,7 +3977,7 @@ void LCodeGen::DoMathFloor(LMathFloor* instr) {
__ bind(&negative_sign);
// Truncate, then compare and compensate.
__ cvttsd2si(output_reg, Operand(input_reg));
__ cvtsi2sd(xmm_scratch, output_reg);
__ Cvtsi2sd(xmm_scratch, output_reg);
__ ucomisd(input_reg, xmm_scratch);
__ j(equal, &done, Label::kNear);
__ sub(output_reg, Immediate(1));
@ -4027,7 +4027,7 @@ void LCodeGen::DoMathRound(LMathRound* instr) {
__ RecordComment("D2I conversion overflow");
DeoptimizeIf(equal, instr->environment());
__ cvtsi2sd(xmm_scratch, output_reg);
__ Cvtsi2sd(xmm_scratch, output_reg);
__ ucomisd(xmm_scratch, input_temp);
__ j(equal, &done);
__ sub(output_reg, Immediate(1));
@ -4978,7 +4978,7 @@ void LCodeGen::DoInteger32ToDouble(LInteger32ToDouble* instr) {
ASSERT(output->IsDoubleRegister());
if (CpuFeatures::IsSupported(SSE2)) {
CpuFeatureScope scope(masm(), SSE2);
__ cvtsi2sd(ToDoubleRegister(output), ToOperand(input));
__ Cvtsi2sd(ToDoubleRegister(output), ToOperand(input));
} else if (input->IsRegister()) {
Register input_reg = ToRegister(input);
__ push(input_reg);
@ -5087,7 +5087,7 @@ void LCodeGen::DoDeferredNumberTagI(LInstruction* instr,
__ xor_(reg, 0x80000000);
if (CpuFeatures::IsSupported(SSE2)) {
CpuFeatureScope feature_scope(masm(), SSE2);
__ cvtsi2sd(xmm0, Operand(reg));
__ Cvtsi2sd(xmm0, Operand(reg));
} else {
__ push(reg);
__ fild_s(Operand(esp, 0));
@ -5356,7 +5356,7 @@ void LCodeGen::EmitNumberUntagD(Register input_reg,
// input register since we avoid dependencies.
__ mov(temp_reg, input_reg);
__ SmiUntag(temp_reg); // Untag smi before converting to float.
__ cvtsi2sd(result_reg, Operand(temp_reg));
__ Cvtsi2sd(result_reg, Operand(temp_reg));
__ bind(&done);
}

View File

@ -283,7 +283,7 @@ void MacroAssembler::DoubleToI(Register result_reg,
Label::Distance dst) {
ASSERT(!input_reg.is(scratch));
cvttsd2si(result_reg, Operand(input_reg));
cvtsi2sd(scratch, Operand(result_reg));
Cvtsi2sd(scratch, Operand(result_reg));
ucomisd(scratch, input_reg);
j(not_equal, conversion_failed, dst);
j(parity_even, conversion_failed, dst); // NaN.
@ -392,7 +392,7 @@ void MacroAssembler::TaggedToI(Register result_reg,
movdbl(xmm0, FieldOperand(input_reg, HeapNumber::kValueOffset));
cvttsd2si(result_reg, Operand(xmm0));
cvtsi2sd(temp, Operand(result_reg));
Cvtsi2sd(temp, Operand(result_reg));
ucomisd(xmm0, temp);
RecordComment("Deferred TaggedToI: lost precision");
j(not_equal, lost_precision, Label::kNear);
@ -457,7 +457,7 @@ void MacroAssembler::LoadUint32(XMMRegister dst,
cmp(src, Immediate(0));
movdbl(scratch,
Operand(reinterpret_cast<int32_t>(&kUint32Bias), RelocInfo::NONE32));
cvtsi2sd(dst, src);
Cvtsi2sd(dst, src);
j(not_sign, &done, Label::kNear);
addsd(dst, scratch);
bind(&done);
@ -676,6 +676,12 @@ void MacroAssembler::DebugBreak() {
#endif
void MacroAssembler::Cvtsi2sd(XMMRegister dst, const Operand& src) {
xorps(dst, dst);
cvtsi2sd(dst, src);
}
void MacroAssembler::Set(Register dst, const Immediate& x) {
if (x.is_zero()) {
xor_(dst, dst); // Shorter than mov.
@ -834,7 +840,7 @@ void MacroAssembler::StoreNumberToDoubleElements(
SmiUntag(scratch1);
if (CpuFeatures::IsSupported(SSE2) && specialize_for_processor) {
CpuFeatureScope fscope(this, SSE2);
cvtsi2sd(scratch2, scratch1);
Cvtsi2sd(scratch2, scratch1);
movdbl(FieldOperand(elements, key, times_4,
FixedDoubleArray::kHeaderSize - elements_offset),
scratch2);

View File

@ -366,6 +366,12 @@ class MacroAssembler: public Assembler {
void Set(Register dst, const Immediate& x);
void Set(const Operand& dst, const Immediate& x);
// cvtsi2sd instruction only writes to the low 64-bit of dst register, which
// hinders register renaming and makes dependence chains longer. So we use
// xorps to clear the dst register before cvtsi2sd to solve this issue.
void Cvtsi2sd(XMMRegister dst, Register src) { Cvtsi2sd(dst, Operand(src)); }
void Cvtsi2sd(XMMRegister dst, const Operand& src);
// Support for constant splitting.
bool IsUnsafeImmediate(const Immediate& x);
void SafeSet(Register dst, const Immediate& x);

View File

@ -862,7 +862,7 @@ void BaseStoreStubCompiler::GenerateStoreTransition(MacroAssembler* masm,
__ SmiUntag(value_reg);
if (CpuFeatures::IsSupported(SSE2)) {
CpuFeatureScope use_sse2(masm, SSE2);
__ cvtsi2sd(xmm0, value_reg);
__ Cvtsi2sd(xmm0, value_reg);
} else {
__ push(value_reg);
__ fild_s(Operand(esp, 0));
@ -1041,7 +1041,7 @@ void BaseStoreStubCompiler::GenerateStoreField(MacroAssembler* masm,
__ SmiUntag(value_reg);
if (CpuFeatures::IsSupported(SSE2)) {
CpuFeatureScope use_sse2(masm, SSE2);
__ cvtsi2sd(xmm0, value_reg);
__ Cvtsi2sd(xmm0, value_reg);
} else {
__ push(value_reg);
__ fild_s(Operand(esp, 0));

View File

@ -1009,7 +1009,7 @@ static void BinaryOpStub_CheckSmiInput(MacroAssembler* masm,
__ movsd(xmm0, FieldOperand(input, HeapNumber::kValueOffset));
// Convert, convert back, and compare the two doubles' bits.
__ cvttsd2siq(scratch2, xmm0);
__ cvtlsi2sd(xmm1, scratch2);
__ Cvtlsi2sd(xmm1, scratch2);
__ movq(scratch1, xmm0);
__ movq(scratch2, xmm1);
__ cmpq(scratch1, scratch2);
@ -1145,7 +1145,7 @@ void TranscendentalCacheStub::Generate(MacroAssembler* masm) {
// Then load the bits of the double into rbx.
__ SmiToInteger32(rax, rax);
__ subq(rsp, Immediate(kDoubleSize));
__ cvtlsi2sd(xmm1, rax);
__ Cvtlsi2sd(xmm1, rax);
__ movsd(Operand(rsp, 0), xmm1);
__ movq(rbx, xmm1);
__ movq(rdx, xmm1);
@ -1477,9 +1477,9 @@ void FloatingPointHelper::LoadAsIntegers(MacroAssembler* masm,
void FloatingPointHelper::LoadSSE2SmiOperands(MacroAssembler* masm) {
__ SmiToInteger32(kScratchRegister, rdx);
__ cvtlsi2sd(xmm0, kScratchRegister);
__ Cvtlsi2sd(xmm0, kScratchRegister);
__ SmiToInteger32(kScratchRegister, rax);
__ cvtlsi2sd(xmm1, kScratchRegister);
__ Cvtlsi2sd(xmm1, kScratchRegister);
}
@ -1503,12 +1503,12 @@ void FloatingPointHelper::LoadSSE2UnknownOperands(MacroAssembler* masm,
__ bind(&load_smi_rdx);
__ SmiToInteger32(kScratchRegister, rdx);
__ cvtlsi2sd(xmm0, kScratchRegister);
__ Cvtlsi2sd(xmm0, kScratchRegister);
__ JumpIfNotSmi(rax, &load_nonsmi_rax);
__ bind(&load_smi_rax);
__ SmiToInteger32(kScratchRegister, rax);
__ cvtlsi2sd(xmm1, kScratchRegister);
__ Cvtlsi2sd(xmm1, kScratchRegister);
__ bind(&done);
}
@ -1541,7 +1541,7 @@ void FloatingPointHelper::NumbersToSmis(MacroAssembler* masm,
__ cvttsd2siq(smi_result, xmm0);
// Check if conversion was successful by converting back and
// comparing to the original double's bits.
__ cvtlsi2sd(xmm1, smi_result);
__ Cvtlsi2sd(xmm1, smi_result);
__ movq(kScratchRegister, xmm1);
__ cmpq(scratch2, kScratchRegister);
__ j(not_equal, on_not_smis);
@ -1560,7 +1560,7 @@ void FloatingPointHelper::NumbersToSmis(MacroAssembler* masm,
__ movsd(xmm0, FieldOperand(second, HeapNumber::kValueOffset));
__ movq(scratch2, xmm0);
__ cvttsd2siq(smi_result, xmm0);
__ cvtlsi2sd(xmm1, smi_result);
__ Cvtlsi2sd(xmm1, smi_result);
__ movq(kScratchRegister, xmm1);
__ cmpq(scratch2, kScratchRegister);
__ j(not_equal, on_not_smis);
@ -1603,7 +1603,7 @@ void MathPowStub::Generate(MacroAssembler* masm) {
// Save 1 in double_result - we need this several times later on.
__ movq(scratch, Immediate(1));
__ cvtlsi2sd(double_result, scratch);
__ Cvtlsi2sd(double_result, scratch);
if (exponent_type_ == ON_STACK) {
Label base_is_smi, unpack_exponent;
@ -1623,7 +1623,7 @@ void MathPowStub::Generate(MacroAssembler* masm) {
__ bind(&base_is_smi);
__ SmiToInteger32(base, base);
__ cvtlsi2sd(double_base, base);
__ Cvtlsi2sd(double_base, base);
__ bind(&unpack_exponent);
__ JumpIfNotSmi(exponent, &exponent_not_smi, Label::kNear);
@ -1812,7 +1812,7 @@ void MathPowStub::Generate(MacroAssembler* masm) {
// and may not have contained the exponent value in the first place when the
// input was a smi. We reset it with exponent value before bailing out.
__ j(not_equal, &done);
__ cvtlsi2sd(double_exponent, exponent);
__ Cvtlsi2sd(double_exponent, exponent);
// Returning or bailing out.
Counters* counters = masm->isolate()->counters();
@ -5376,7 +5376,7 @@ void ICCompareStub::GenerateNumbers(MacroAssembler* masm) {
__ jmp(&left, Label::kNear);
__ bind(&right_smi);
__ SmiToInteger32(rcx, rax); // Can't clobber rax yet.
__ cvtlsi2sd(xmm1, rcx);
__ Cvtlsi2sd(xmm1, rcx);
__ bind(&left);
__ JumpIfSmi(rdx, &left_smi, Label::kNear);
@ -5386,7 +5386,7 @@ void ICCompareStub::GenerateNumbers(MacroAssembler* masm) {
__ jmp(&done);
__ bind(&left_smi);
__ SmiToInteger32(rcx, rdx); // Can't clobber rdx yet.
__ cvtlsi2sd(xmm0, rcx);
__ Cvtlsi2sd(xmm0, rcx);
__ bind(&done);
// Compare operands

View File

@ -386,7 +386,7 @@ void ElementsTransitionGenerator::GenerateSmiToDouble(
// rbx: current element (smi-tagged)
__ JumpIfNotSmi(rbx, &convert_hole);
__ SmiToInteger32(rbx, rbx);
__ cvtlsi2sd(xmm0, rbx);
__ Cvtlsi2sd(xmm0, rbx);
__ movsd(FieldOperand(r14, r9, times_8, FixedDoubleArray::kHeaderSize),
xmm0);
__ jmp(&entry);

View File

@ -3520,7 +3520,7 @@ void LCodeGen::DoMathFloor(LMathFloor* instr) {
__ bind(&negative_sign);
// Truncate, then compare and compensate.
__ cvttsd2si(output_reg, input_reg);
__ cvtlsi2sd(xmm_scratch, output_reg);
__ Cvtlsi2sd(xmm_scratch, output_reg);
__ ucomisd(input_reg, xmm_scratch);
__ j(equal, &done, Label::kNear);
__ subl(output_reg, Immediate(1));
@ -3569,7 +3569,7 @@ void LCodeGen::DoMathRound(LMathRound* instr) {
__ RecordComment("D2I conversion overflow");
DeoptimizeIf(equal, instr->environment());
__ cvtlsi2sd(xmm_scratch, output_reg);
__ Cvtlsi2sd(xmm_scratch, output_reg);
__ ucomisd(input_reg, xmm_scratch);
__ j(equal, &restore, Label::kNear);
__ subl(output_reg, Immediate(1));
@ -4449,9 +4449,9 @@ void LCodeGen::DoInteger32ToDouble(LInteger32ToDouble* instr) {
LOperand* output = instr->result();
ASSERT(output->IsDoubleRegister());
if (input->IsRegister()) {
__ cvtlsi2sd(ToDoubleRegister(output), ToRegister(input));
__ Cvtlsi2sd(ToDoubleRegister(output), ToRegister(input));
} else {
__ cvtlsi2sd(ToDoubleRegister(output), ToOperand(input));
__ Cvtlsi2sd(ToDoubleRegister(output), ToOperand(input));
}
}
@ -4668,7 +4668,7 @@ void LCodeGen::EmitNumberUntagD(Register input_reg,
// Smi to XMM conversion
__ bind(&load_smi);
__ SmiToInteger32(kScratchRegister, input_reg);
__ cvtlsi2sd(result_reg, kScratchRegister);
__ Cvtlsi2sd(result_reg, kScratchRegister);
__ bind(&done);
}

View File

@ -936,6 +936,18 @@ void MacroAssembler::PopCallerSaved(SaveFPRegsMode fp_mode,
}
void MacroAssembler::Cvtlsi2sd(XMMRegister dst, Register src) {
xorps(dst, dst);
cvtlsi2sd(dst, src);
}
void MacroAssembler::Cvtlsi2sd(XMMRegister dst, const Operand& src) {
xorps(dst, dst);
cvtlsi2sd(dst, src);
}
void MacroAssembler::Set(Register dst, int64_t x) {
if (x == 0) {
xorl(dst, dst);
@ -2917,7 +2929,7 @@ void MacroAssembler::StoreNumberToDoubleElements(
// Value is a smi. convert to a double and store.
// Preserve original value.
SmiToInteger32(kScratchRegister, maybe_number);
cvtlsi2sd(xmm_scratch, kScratchRegister);
Cvtlsi2sd(xmm_scratch, kScratchRegister);
movsd(FieldOperand(elements, index, times_8,
FixedDoubleArray::kHeaderSize - elements_offset),
xmm_scratch);
@ -3050,7 +3062,7 @@ void MacroAssembler::DoubleToI(Register result_reg,
Label* conversion_failed,
Label::Distance dst) {
cvttsd2si(result_reg, input_reg);
cvtlsi2sd(xmm0, result_reg);
Cvtlsi2sd(xmm0, result_reg);
ucomisd(xmm0, input_reg);
j(not_equal, conversion_failed, dst);
j(parity_even, conversion_failed, dst); // NaN.
@ -3087,7 +3099,7 @@ void MacroAssembler::TaggedToI(Register result_reg,
movsd(xmm0, FieldOperand(input_reg, HeapNumber::kValueOffset));
cvttsd2si(result_reg, xmm0);
cvtlsi2sd(temp, result_reg);
Cvtlsi2sd(temp, result_reg);
ucomisd(xmm0, temp);
RecordComment("Deferred TaggedToI: lost precision");
j(not_equal, lost_precision, dst);

View File

@ -784,6 +784,12 @@ class MacroAssembler: public Assembler {
void Set(Register dst, int64_t x);
void Set(const Operand& dst, int64_t x);
// cvtsi2sd instruction only writes to the low 64-bit of dst register, which
// hinders register renaming and makes dependence chains longer. So we use
// xorps to clear the dst register before cvtsi2sd to solve this issue.
void Cvtlsi2sd(XMMRegister dst, Register src);
void Cvtlsi2sd(XMMRegister dst, const Operand& src);
// Move if the registers are not identical.
void Move(Register target, Register source);

View File

@ -842,7 +842,7 @@ void BaseStoreStubCompiler::GenerateStoreTransition(MacroAssembler* masm,
__ JumpIfNotSmi(value_reg, &heap_number);
__ SmiToInteger32(scratch1, value_reg);
__ cvtlsi2sd(xmm0, scratch1);
__ Cvtlsi2sd(xmm0, scratch1);
__ jmp(&do_store);
__ bind(&heap_number);
@ -996,7 +996,7 @@ void BaseStoreStubCompiler::GenerateStoreField(MacroAssembler* masm,
Label do_store, heap_number;
__ JumpIfNotSmi(value_reg, &heap_number);
__ SmiToInteger32(scratch2, value_reg);
__ cvtlsi2sd(xmm0, scratch2);
__ Cvtlsi2sd(xmm0, scratch2);
__ jmp(&do_store);
__ bind(&heap_number);