X64 tweaks.
Use movapd instead of movsd to move between xmm registers. Avoids partial register writes and potential stalls. Use xorps instead of xorpd to zero a register. Same effect and latency, but one byte smaller. Review URL: http://codereview.chromium.org/6873006 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@7631 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
parent
5fd4fa2a9e
commit
3b445f14f8
@ -2540,6 +2540,24 @@ void Assembler::movq(Register dst, XMMRegister src) {
|
||||
}
|
||||
|
||||
|
||||
void Assembler::movq(XMMRegister dst, XMMRegister src) {
|
||||
EnsureSpace ensure_space(this);
|
||||
if (dst.low_bits() == 4) {
|
||||
// Avoid unnecessary SIB byte.
|
||||
emit(0xf3);
|
||||
emit_optional_rex_32(dst, src);
|
||||
emit(0x0F);
|
||||
emit(0x7e);
|
||||
emit_sse_operand(dst, src);
|
||||
} else {
|
||||
emit(0x66);
|
||||
emit_optional_rex_32(src, dst);
|
||||
emit(0x0F);
|
||||
emit(0xD6);
|
||||
emit_sse_operand(src, dst);
|
||||
}
|
||||
}
|
||||
|
||||
void Assembler::movdqa(const Operand& dst, XMMRegister src) {
|
||||
EnsureSpace ensure_space(this);
|
||||
emit(0x66);
|
||||
@ -2603,6 +2621,42 @@ void Assembler::movsd(XMMRegister dst, const Operand& src) {
|
||||
}
|
||||
|
||||
|
||||
void Assembler::movaps(XMMRegister dst, XMMRegister src) {
|
||||
EnsureSpace ensure_space(this);
|
||||
if (src.low_bits() == 4) {
|
||||
// Try to avoid an unnecessary SIB byte.
|
||||
emit_optional_rex_32(src, dst);
|
||||
emit(0x0F);
|
||||
emit(0x29);
|
||||
emit_sse_operand(src, dst);
|
||||
} else {
|
||||
emit_optional_rex_32(dst, src);
|
||||
emit(0x0F);
|
||||
emit(0x28);
|
||||
emit_sse_operand(dst, src);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void Assembler::movapd(XMMRegister dst, XMMRegister src) {
|
||||
EnsureSpace ensure_space(this);
|
||||
if (src.low_bits() == 4) {
|
||||
// Try to avoid an unnecessary SIB byte.
|
||||
emit(0x66);
|
||||
emit_optional_rex_32(src, dst);
|
||||
emit(0x0F);
|
||||
emit(0x29);
|
||||
emit_sse_operand(src, dst);
|
||||
} else {
|
||||
emit(0x66);
|
||||
emit_optional_rex_32(dst, src);
|
||||
emit(0x0F);
|
||||
emit(0x28);
|
||||
emit_sse_operand(dst, src);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void Assembler::movss(XMMRegister dst, const Operand& src) {
|
||||
EnsureSpace ensure_space(this);
|
||||
emit(0xF3); // single
|
||||
@ -2833,6 +2887,15 @@ void Assembler::xorpd(XMMRegister dst, XMMRegister src) {
|
||||
}
|
||||
|
||||
|
||||
void Assembler::xorps(XMMRegister dst, XMMRegister src) {
|
||||
EnsureSpace ensure_space(this);
|
||||
emit_optional_rex_32(dst, src);
|
||||
emit(0x0F);
|
||||
emit(0x57);
|
||||
emit_sse_operand(dst, src);
|
||||
}
|
||||
|
||||
|
||||
void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) {
|
||||
EnsureSpace ensure_space(this);
|
||||
emit(0xF2);
|
||||
|
@ -1291,15 +1291,24 @@ class Assembler : public AssemblerBase {
|
||||
void movd(Register dst, XMMRegister src);
|
||||
void movq(XMMRegister dst, Register src);
|
||||
void movq(Register dst, XMMRegister src);
|
||||
void movq(XMMRegister dst, XMMRegister src);
|
||||
void extractps(Register dst, XMMRegister src, byte imm8);
|
||||
|
||||
void movsd(const Operand& dst, XMMRegister src);
|
||||
// Don't use this unless it's important to keep the
|
||||
// top half of the destination register unchanged.
|
||||
// Used movaps when moving double values and movq for integer
|
||||
// values in xmm registers.
|
||||
void movsd(XMMRegister dst, XMMRegister src);
|
||||
|
||||
void movsd(const Operand& dst, XMMRegister src);
|
||||
void movsd(XMMRegister dst, const Operand& src);
|
||||
|
||||
void movdqa(const Operand& dst, XMMRegister src);
|
||||
void movdqa(XMMRegister dst, const Operand& src);
|
||||
|
||||
void movapd(XMMRegister dst, XMMRegister src);
|
||||
void movaps(XMMRegister dst, XMMRegister src);
|
||||
|
||||
void movss(XMMRegister dst, const Operand& src);
|
||||
void movss(const Operand& dst, XMMRegister src);
|
||||
|
||||
@ -1331,6 +1340,7 @@ class Assembler : public AssemblerBase {
|
||||
void andpd(XMMRegister dst, XMMRegister src);
|
||||
void orpd(XMMRegister dst, XMMRegister src);
|
||||
void xorpd(XMMRegister dst, XMMRegister src);
|
||||
void xorps(XMMRegister dst, XMMRegister src);
|
||||
void sqrtsd(XMMRegister dst, XMMRegister src);
|
||||
|
||||
void ucomisd(XMMRegister dst, XMMRegister src);
|
||||
|
@ -266,7 +266,7 @@ void ToBooleanStub::Generate(MacroAssembler* masm) {
|
||||
__ j(not_equal, &true_result);
|
||||
// HeapNumber => false iff +0, -0, or NaN.
|
||||
// These three cases set the zero flag when compared to zero using ucomisd.
|
||||
__ xorpd(xmm0, xmm0);
|
||||
__ xorps(xmm0, xmm0);
|
||||
__ ucomisd(xmm0, FieldOperand(rax, HeapNumber::kValueOffset));
|
||||
__ j(zero, &false_result);
|
||||
// Fall through to |true_result|.
|
||||
@ -1602,7 +1602,7 @@ void MathPowStub::Generate(MacroAssembler* masm) {
|
||||
__ bind(&no_neg);
|
||||
|
||||
// Load xmm1 with 1.
|
||||
__ movsd(xmm1, xmm3);
|
||||
__ movaps(xmm1, xmm3);
|
||||
NearLabel while_true;
|
||||
NearLabel no_multiply;
|
||||
|
||||
@ -1620,8 +1620,8 @@ void MathPowStub::Generate(MacroAssembler* masm) {
|
||||
__ j(positive, &allocate_return);
|
||||
// Special case if xmm1 has reached infinity.
|
||||
__ divsd(xmm3, xmm1);
|
||||
__ movsd(xmm1, xmm3);
|
||||
__ xorpd(xmm0, xmm0);
|
||||
__ movaps(xmm1, xmm3);
|
||||
__ xorps(xmm0, xmm0);
|
||||
__ ucomisd(xmm0, xmm1);
|
||||
__ j(equal, &call_runtime);
|
||||
|
||||
@ -1669,11 +1669,11 @@ void MathPowStub::Generate(MacroAssembler* masm) {
|
||||
|
||||
// Calculates reciprocal of square root.
|
||||
// sqrtsd returns -0 when input is -0. ECMA spec requires +0.
|
||||
__ xorpd(xmm1, xmm1);
|
||||
__ xorps(xmm1, xmm1);
|
||||
__ addsd(xmm1, xmm0);
|
||||
__ sqrtsd(xmm1, xmm1);
|
||||
__ divsd(xmm3, xmm1);
|
||||
__ movsd(xmm1, xmm3);
|
||||
__ movaps(xmm1, xmm3);
|
||||
__ jmp(&allocate_return);
|
||||
|
||||
// Test for 0.5.
|
||||
@ -1686,8 +1686,8 @@ void MathPowStub::Generate(MacroAssembler* masm) {
|
||||
__ j(not_equal, &call_runtime);
|
||||
// Calculates square root.
|
||||
// sqrtsd returns -0 when input is -0. ECMA spec requires +0.
|
||||
__ xorpd(xmm1, xmm1);
|
||||
__ addsd(xmm1, xmm0);
|
||||
__ xorps(xmm1, xmm1);
|
||||
__ addsd(xmm1, xmm0); // Convert -0 to 0.
|
||||
__ sqrtsd(xmm1, xmm1);
|
||||
|
||||
__ bind(&allocate_return);
|
||||
|
@ -1033,7 +1033,14 @@ int DisassemblerX64::TwoByteOpcodeInstruction(byte* data) {
|
||||
}
|
||||
} else {
|
||||
get_modrm(*current, &mod, ®op, &rm);
|
||||
if (opcode == 0x6E) {
|
||||
if (opcode == 0x28) {
|
||||
AppendToBuffer("movapd %s, ", NameOfXMMRegister(regop));
|
||||
current += PrintRightXMMOperand(current);
|
||||
} else if (opcode == 0x29) {
|
||||
AppendToBuffer("movapd ");
|
||||
current += PrintRightXMMOperand(current);
|
||||
AppendToBuffer(", %s", NameOfXMMRegister(regop));
|
||||
} else if (opcode == 0x6E) {
|
||||
AppendToBuffer("mov%c %s,",
|
||||
rex_w() ? 'q' : 'd',
|
||||
NameOfXMMRegister(regop));
|
||||
@ -1051,6 +1058,10 @@ int DisassemblerX64::TwoByteOpcodeInstruction(byte* data) {
|
||||
AppendToBuffer("movdqa ");
|
||||
current += PrintRightXMMOperand(current);
|
||||
AppendToBuffer(", %s", NameOfXMMRegister(regop));
|
||||
} else if (opcode == 0xD6) {
|
||||
AppendToBuffer("movq ");
|
||||
current += PrintRightXMMOperand(current);
|
||||
AppendToBuffer(", %s", NameOfXMMRegister(regop));
|
||||
} else {
|
||||
const char* mnemonic = "?";
|
||||
if (opcode == 0x50) {
|
||||
@ -1152,6 +1163,11 @@ int DisassemblerX64::TwoByteOpcodeInstruction(byte* data) {
|
||||
get_modrm(*current, &mod, ®op, &rm);
|
||||
AppendToBuffer("cvtss2sd %s,", NameOfXMMRegister(regop));
|
||||
current += PrintRightXMMOperand(current);
|
||||
} else if (opcode == 0x7E) {
|
||||
int mod, regop, rm;
|
||||
get_modrm(*current, &mod, ®op, &rm);
|
||||
AppendToBuffer("movq %s, ", NameOfXMMRegister(regop));
|
||||
current += PrintRightXMMOperand(current);
|
||||
} else {
|
||||
UnimplementedInstruction();
|
||||
}
|
||||
@ -1169,6 +1185,22 @@ int DisassemblerX64::TwoByteOpcodeInstruction(byte* data) {
|
||||
current += 4;
|
||||
} // else no immediate displacement.
|
||||
AppendToBuffer("nop");
|
||||
|
||||
} else if (opcode == 28) {
|
||||
// movaps xmm, xmm/m128
|
||||
int mod, regop, rm;
|
||||
get_modrm(*current, &mod, ®op, &rm);
|
||||
AppendToBuffer("movaps %s, ", NameOfXMMRegister(regop));
|
||||
current += PrintRightXMMOperand(current);
|
||||
|
||||
} else if (opcode == 29) {
|
||||
// movaps xmm/m128, xmm
|
||||
int mod, regop, rm;
|
||||
get_modrm(*current, &mod, ®op, &rm);
|
||||
AppendToBuffer("movaps");
|
||||
current += PrintRightXMMOperand(current);
|
||||
AppendToBuffer(", %s", NameOfXMMRegister(regop));
|
||||
|
||||
} else if (opcode == 0xA2 || opcode == 0x31) {
|
||||
// RDTSC or CPUID
|
||||
AppendToBuffer("%s", mnemonic);
|
||||
@ -1180,6 +1212,13 @@ int DisassemblerX64::TwoByteOpcodeInstruction(byte* data) {
|
||||
byte_size_operand_ = idesc.byte_size_operation;
|
||||
current += PrintOperands(idesc.mnem, idesc.op_order_, current);
|
||||
|
||||
} else if (opcode == 57) {
|
||||
// xoprps xmm, xmm/m128
|
||||
int mod, regop, rm;
|
||||
get_modrm(*current, &mod, ®op, &rm);
|
||||
AppendToBuffer("xorps %s, ", NameOfXMMRegister(regop));
|
||||
current += PrintRightXMMOperand(current);
|
||||
|
||||
} else if ((opcode & 0xF0) == 0x80) {
|
||||
// Jcc: Conditional jump (branch).
|
||||
current = data + JumpConditional(data);
|
||||
|
@ -2758,7 +2758,7 @@ void FullCodeGenerator::EmitRandomHeapNumber(ZoneList<Expression*>* args) {
|
||||
__ movd(xmm1, rcx);
|
||||
__ movd(xmm0, rax);
|
||||
__ cvtss2sd(xmm1, xmm1);
|
||||
__ xorpd(xmm0, xmm1);
|
||||
__ xorps(xmm0, xmm1);
|
||||
__ subsd(xmm0, xmm1);
|
||||
__ movsd(FieldOperand(rbx, HeapNumber::kValueOffset), xmm0);
|
||||
|
||||
|
@ -1111,7 +1111,7 @@ void LCodeGen::DoConstantD(LConstantD* instr) {
|
||||
// Use xor to produce +0.0 in a fast and compact way, but avoid to
|
||||
// do so if the constant is -0.0.
|
||||
if (int_val == 0) {
|
||||
__ xorpd(res, res);
|
||||
__ xorps(res, res);
|
||||
} else {
|
||||
Register tmp = ToRegister(instr->TempAt(0));
|
||||
__ Set(tmp, int_val);
|
||||
@ -1223,12 +1223,12 @@ void LCodeGen::DoArithmeticD(LArithmeticD* instr) {
|
||||
break;
|
||||
case Token::MOD:
|
||||
__ PrepareCallCFunction(2);
|
||||
__ movsd(xmm0, left);
|
||||
__ movaps(xmm0, left);
|
||||
ASSERT(right.is(xmm1));
|
||||
__ CallCFunction(
|
||||
ExternalReference::double_fp_operation(Token::MOD, isolate()), 2);
|
||||
__ movq(rsi, Operand(rbp, StandardFrameConstants::kContextOffset));
|
||||
__ movsd(result, xmm0);
|
||||
__ movaps(result, xmm0);
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
@ -1287,7 +1287,7 @@ void LCodeGen::DoBranch(LBranch* instr) {
|
||||
EmitBranch(true_block, false_block, not_zero);
|
||||
} else if (r.IsDouble()) {
|
||||
XMMRegister reg = ToDoubleRegister(instr->InputAt(0));
|
||||
__ xorpd(xmm0, xmm0);
|
||||
__ xorps(xmm0, xmm0);
|
||||
__ ucomisd(reg, xmm0);
|
||||
EmitBranch(true_block, false_block, not_equal);
|
||||
} else {
|
||||
@ -1322,7 +1322,7 @@ void LCodeGen::DoBranch(LBranch* instr) {
|
||||
|
||||
// HeapNumber => false iff +0, -0, or NaN. These three cases set the
|
||||
// zero flag when compared to zero using ucomisd.
|
||||
__ xorpd(xmm0, xmm0);
|
||||
__ xorps(xmm0, xmm0);
|
||||
__ ucomisd(xmm0, FieldOperand(reg, HeapNumber::kValueOffset));
|
||||
__ j(zero, false_label);
|
||||
__ jmp(true_label);
|
||||
@ -2671,7 +2671,7 @@ void LCodeGen::DoMathAbs(LUnaryMathOperation* instr) {
|
||||
if (r.IsDouble()) {
|
||||
XMMRegister scratch = xmm0;
|
||||
XMMRegister input_reg = ToDoubleRegister(instr->InputAt(0));
|
||||
__ xorpd(scratch, scratch);
|
||||
__ xorps(scratch, scratch);
|
||||
__ subsd(scratch, input_reg);
|
||||
__ andpd(input_reg, scratch);
|
||||
} else if (r.IsInteger32()) {
|
||||
@ -2708,7 +2708,7 @@ void LCodeGen::DoMathFloor(LUnaryMathOperation* instr) {
|
||||
__ cmpl(output_reg, Immediate(0x80000000));
|
||||
DeoptimizeIf(equal, instr->environment());
|
||||
} else {
|
||||
__ xorpd(xmm_scratch, xmm_scratch); // Zero the register.
|
||||
__ xorps(xmm_scratch, xmm_scratch); // Zero the register.
|
||||
__ ucomisd(input_reg, xmm_scratch);
|
||||
|
||||
if (instr->hydrogen()->CheckFlag(HValue::kBailoutOnMinusZero)) {
|
||||
@ -2784,7 +2784,7 @@ void LCodeGen::DoMathPowHalf(LUnaryMathOperation* instr) {
|
||||
XMMRegister xmm_scratch = xmm0;
|
||||
XMMRegister input_reg = ToDoubleRegister(instr->InputAt(0));
|
||||
ASSERT(ToDoubleRegister(instr->result()).is(input_reg));
|
||||
__ xorpd(xmm_scratch, xmm_scratch);
|
||||
__ xorps(xmm_scratch, xmm_scratch);
|
||||
__ addsd(input_reg, xmm_scratch); // Convert -0 to +0.
|
||||
__ sqrtsd(input_reg, input_reg);
|
||||
}
|
||||
@ -2800,7 +2800,7 @@ void LCodeGen::DoPower(LPower* instr) {
|
||||
if (exponent_type.IsDouble()) {
|
||||
__ PrepareCallCFunction(2);
|
||||
// Move arguments to correct registers
|
||||
__ movsd(xmm0, left_reg);
|
||||
__ movaps(xmm0, left_reg);
|
||||
ASSERT(ToDoubleRegister(right).is(xmm1));
|
||||
__ CallCFunction(
|
||||
ExternalReference::power_double_double_function(isolate()), 2);
|
||||
@ -2808,7 +2808,7 @@ void LCodeGen::DoPower(LPower* instr) {
|
||||
__ PrepareCallCFunction(2);
|
||||
// Move arguments to correct registers: xmm0 and edi (not rdi).
|
||||
// On Windows, the registers are xmm0 and edx.
|
||||
__ movsd(xmm0, left_reg);
|
||||
__ movaps(xmm0, left_reg);
|
||||
#ifdef _WIN64
|
||||
ASSERT(ToRegister(right).is(rdx));
|
||||
#else
|
||||
@ -2834,13 +2834,13 @@ void LCodeGen::DoPower(LPower* instr) {
|
||||
__ bind(&call);
|
||||
__ PrepareCallCFunction(2);
|
||||
// Move arguments to correct registers xmm0 and xmm1.
|
||||
__ movsd(xmm0, left_reg);
|
||||
__ movaps(xmm0, left_reg);
|
||||
// Right argument is already in xmm1.
|
||||
__ CallCFunction(
|
||||
ExternalReference::power_double_double_function(isolate()), 2);
|
||||
}
|
||||
// Return value is in xmm0.
|
||||
__ movsd(result_reg, xmm0);
|
||||
__ movaps(result_reg, xmm0);
|
||||
// Restore context register.
|
||||
__ movq(rsi, Operand(rbp, StandardFrameConstants::kContextOffset));
|
||||
}
|
||||
@ -3422,7 +3422,7 @@ void LCodeGen::EmitNumberUntagD(Register input_reg,
|
||||
DeoptimizeIf(not_equal, env);
|
||||
|
||||
// Convert undefined to NaN. Compute NaN as 0/0.
|
||||
__ xorpd(result_reg, result_reg);
|
||||
__ xorps(result_reg, result_reg);
|
||||
__ divsd(result_reg, result_reg);
|
||||
__ jmp(&done);
|
||||
|
||||
|
@ -214,7 +214,7 @@ void LGapResolver::EmitMove(int index) {
|
||||
} else if (source->IsDoubleRegister()) {
|
||||
XMMRegister src = cgen_->ToDoubleRegister(source);
|
||||
if (destination->IsDoubleRegister()) {
|
||||
__ movsd(cgen_->ToDoubleRegister(destination), src);
|
||||
__ movaps(cgen_->ToDoubleRegister(destination), src);
|
||||
} else {
|
||||
ASSERT(destination->IsDoubleStackSlot());
|
||||
__ movsd(cgen_->ToOperand(destination), src);
|
||||
@ -273,9 +273,9 @@ void LGapResolver::EmitSwap(int index) {
|
||||
// Swap two double registers.
|
||||
XMMRegister source_reg = cgen_->ToDoubleRegister(source);
|
||||
XMMRegister destination_reg = cgen_->ToDoubleRegister(destination);
|
||||
__ movsd(xmm0, source_reg);
|
||||
__ movsd(source_reg, destination_reg);
|
||||
__ movsd(destination_reg, xmm0);
|
||||
__ movaps(xmm0, source_reg);
|
||||
__ movaps(source_reg, destination_reg);
|
||||
__ movaps(destination_reg, xmm0);
|
||||
|
||||
} else if (source->IsDoubleRegister() || destination->IsDoubleRegister()) {
|
||||
// Swap a double register and a double stack slot.
|
||||
|
@ -1248,12 +1248,17 @@ void MacroAssembler::SmiAdd(Register dst,
|
||||
Register src2) {
|
||||
// No overflow checking. Use only when it's known that
|
||||
// overflowing is impossible.
|
||||
ASSERT(!dst.is(src2));
|
||||
if (!dst.is(src1)) {
|
||||
movq(dst, src1);
|
||||
if (emit_debug_code()) {
|
||||
movq(kScratchRegister, src1);
|
||||
addq(kScratchRegister, src2);
|
||||
Check(no_overflow, "Smi addition overflow");
|
||||
}
|
||||
lea(dst, Operand(src1, src2, times_1, 0));
|
||||
} else {
|
||||
addq(dst, src2);
|
||||
Assert(no_overflow, "Smi addition overflow");
|
||||
}
|
||||
addq(dst, src2);
|
||||
Assert(no_overflow, "Smi addition overflow");
|
||||
}
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user