X64 tweaks.

Use movapd instead of movsd to move between xmm registers. Avoids partial
register writes and potential stalls.

Use xorps instead of xorpd to zero a register. Same effect and latency, but
one byte smaller.

Review URL: http://codereview.chromium.org/6873006

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@7631 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
lrn@chromium.org 2011-04-15 13:06:41 +00:00
parent 5fd4fa2a9e
commit 3b445f14f8
8 changed files with 149 additions and 32 deletions

View File

@ -2540,6 +2540,24 @@ void Assembler::movq(Register dst, XMMRegister src) {
}
void Assembler::movq(XMMRegister dst, XMMRegister src) {
EnsureSpace ensure_space(this);
if (dst.low_bits() == 4) {
// Avoid unnecessary SIB byte.
emit(0xf3);
emit_optional_rex_32(dst, src);
emit(0x0F);
emit(0x7e);
emit_sse_operand(dst, src);
} else {
emit(0x66);
emit_optional_rex_32(src, dst);
emit(0x0F);
emit(0xD6);
emit_sse_operand(src, dst);
}
}
void Assembler::movdqa(const Operand& dst, XMMRegister src) {
EnsureSpace ensure_space(this);
emit(0x66);
@ -2603,6 +2621,42 @@ void Assembler::movsd(XMMRegister dst, const Operand& src) {
}
void Assembler::movaps(XMMRegister dst, XMMRegister src) {
EnsureSpace ensure_space(this);
if (src.low_bits() == 4) {
// Try to avoid an unnecessary SIB byte.
emit_optional_rex_32(src, dst);
emit(0x0F);
emit(0x29);
emit_sse_operand(src, dst);
} else {
emit_optional_rex_32(dst, src);
emit(0x0F);
emit(0x28);
emit_sse_operand(dst, src);
}
}
void Assembler::movapd(XMMRegister dst, XMMRegister src) {
EnsureSpace ensure_space(this);
if (src.low_bits() == 4) {
// Try to avoid an unnecessary SIB byte.
emit(0x66);
emit_optional_rex_32(src, dst);
emit(0x0F);
emit(0x29);
emit_sse_operand(src, dst);
} else {
emit(0x66);
emit_optional_rex_32(dst, src);
emit(0x0F);
emit(0x28);
emit_sse_operand(dst, src);
}
}
void Assembler::movss(XMMRegister dst, const Operand& src) {
EnsureSpace ensure_space(this);
emit(0xF3); // single
@ -2833,6 +2887,15 @@ void Assembler::xorpd(XMMRegister dst, XMMRegister src) {
}
void Assembler::xorps(XMMRegister dst, XMMRegister src) {
EnsureSpace ensure_space(this);
emit_optional_rex_32(dst, src);
emit(0x0F);
emit(0x57);
emit_sse_operand(dst, src);
}
void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) {
EnsureSpace ensure_space(this);
emit(0xF2);

View File

@ -1291,15 +1291,24 @@ class Assembler : public AssemblerBase {
void movd(Register dst, XMMRegister src);
void movq(XMMRegister dst, Register src);
void movq(Register dst, XMMRegister src);
void movq(XMMRegister dst, XMMRegister src);
void extractps(Register dst, XMMRegister src, byte imm8);
void movsd(const Operand& dst, XMMRegister src);
// Don't use this unless it's important to keep the
// top half of the destination register unchanged.
// Used movaps when moving double values and movq for integer
// values in xmm registers.
void movsd(XMMRegister dst, XMMRegister src);
void movsd(const Operand& dst, XMMRegister src);
void movsd(XMMRegister dst, const Operand& src);
void movdqa(const Operand& dst, XMMRegister src);
void movdqa(XMMRegister dst, const Operand& src);
void movapd(XMMRegister dst, XMMRegister src);
void movaps(XMMRegister dst, XMMRegister src);
void movss(XMMRegister dst, const Operand& src);
void movss(const Operand& dst, XMMRegister src);
@ -1331,6 +1340,7 @@ class Assembler : public AssemblerBase {
void andpd(XMMRegister dst, XMMRegister src);
void orpd(XMMRegister dst, XMMRegister src);
void xorpd(XMMRegister dst, XMMRegister src);
void xorps(XMMRegister dst, XMMRegister src);
void sqrtsd(XMMRegister dst, XMMRegister src);
void ucomisd(XMMRegister dst, XMMRegister src);

View File

@ -266,7 +266,7 @@ void ToBooleanStub::Generate(MacroAssembler* masm) {
__ j(not_equal, &true_result);
// HeapNumber => false iff +0, -0, or NaN.
// These three cases set the zero flag when compared to zero using ucomisd.
__ xorpd(xmm0, xmm0);
__ xorps(xmm0, xmm0);
__ ucomisd(xmm0, FieldOperand(rax, HeapNumber::kValueOffset));
__ j(zero, &false_result);
// Fall through to |true_result|.
@ -1602,7 +1602,7 @@ void MathPowStub::Generate(MacroAssembler* masm) {
__ bind(&no_neg);
// Load xmm1 with 1.
__ movsd(xmm1, xmm3);
__ movaps(xmm1, xmm3);
NearLabel while_true;
NearLabel no_multiply;
@ -1620,8 +1620,8 @@ void MathPowStub::Generate(MacroAssembler* masm) {
__ j(positive, &allocate_return);
// Special case if xmm1 has reached infinity.
__ divsd(xmm3, xmm1);
__ movsd(xmm1, xmm3);
__ xorpd(xmm0, xmm0);
__ movaps(xmm1, xmm3);
__ xorps(xmm0, xmm0);
__ ucomisd(xmm0, xmm1);
__ j(equal, &call_runtime);
@ -1669,11 +1669,11 @@ void MathPowStub::Generate(MacroAssembler* masm) {
// Calculates reciprocal of square root.
// sqrtsd returns -0 when input is -0. ECMA spec requires +0.
__ xorpd(xmm1, xmm1);
__ xorps(xmm1, xmm1);
__ addsd(xmm1, xmm0);
__ sqrtsd(xmm1, xmm1);
__ divsd(xmm3, xmm1);
__ movsd(xmm1, xmm3);
__ movaps(xmm1, xmm3);
__ jmp(&allocate_return);
// Test for 0.5.
@ -1686,8 +1686,8 @@ void MathPowStub::Generate(MacroAssembler* masm) {
__ j(not_equal, &call_runtime);
// Calculates square root.
// sqrtsd returns -0 when input is -0. ECMA spec requires +0.
__ xorpd(xmm1, xmm1);
__ addsd(xmm1, xmm0);
__ xorps(xmm1, xmm1);
__ addsd(xmm1, xmm0); // Convert -0 to 0.
__ sqrtsd(xmm1, xmm1);
__ bind(&allocate_return);

View File

@ -1033,7 +1033,14 @@ int DisassemblerX64::TwoByteOpcodeInstruction(byte* data) {
}
} else {
get_modrm(*current, &mod, &regop, &rm);
if (opcode == 0x6E) {
if (opcode == 0x28) {
AppendToBuffer("movapd %s, ", NameOfXMMRegister(regop));
current += PrintRightXMMOperand(current);
} else if (opcode == 0x29) {
AppendToBuffer("movapd ");
current += PrintRightXMMOperand(current);
AppendToBuffer(", %s", NameOfXMMRegister(regop));
} else if (opcode == 0x6E) {
AppendToBuffer("mov%c %s,",
rex_w() ? 'q' : 'd',
NameOfXMMRegister(regop));
@ -1051,6 +1058,10 @@ int DisassemblerX64::TwoByteOpcodeInstruction(byte* data) {
AppendToBuffer("movdqa ");
current += PrintRightXMMOperand(current);
AppendToBuffer(", %s", NameOfXMMRegister(regop));
} else if (opcode == 0xD6) {
AppendToBuffer("movq ");
current += PrintRightXMMOperand(current);
AppendToBuffer(", %s", NameOfXMMRegister(regop));
} else {
const char* mnemonic = "?";
if (opcode == 0x50) {
@ -1152,6 +1163,11 @@ int DisassemblerX64::TwoByteOpcodeInstruction(byte* data) {
get_modrm(*current, &mod, &regop, &rm);
AppendToBuffer("cvtss2sd %s,", NameOfXMMRegister(regop));
current += PrintRightXMMOperand(current);
} else if (opcode == 0x7E) {
int mod, regop, rm;
get_modrm(*current, &mod, &regop, &rm);
AppendToBuffer("movq %s, ", NameOfXMMRegister(regop));
current += PrintRightXMMOperand(current);
} else {
UnimplementedInstruction();
}
@ -1169,6 +1185,22 @@ int DisassemblerX64::TwoByteOpcodeInstruction(byte* data) {
current += 4;
} // else no immediate displacement.
AppendToBuffer("nop");
} else if (opcode == 28) {
// movaps xmm, xmm/m128
int mod, regop, rm;
get_modrm(*current, &mod, &regop, &rm);
AppendToBuffer("movaps %s, ", NameOfXMMRegister(regop));
current += PrintRightXMMOperand(current);
} else if (opcode == 29) {
// movaps xmm/m128, xmm
int mod, regop, rm;
get_modrm(*current, &mod, &regop, &rm);
AppendToBuffer("movaps");
current += PrintRightXMMOperand(current);
AppendToBuffer(", %s", NameOfXMMRegister(regop));
} else if (opcode == 0xA2 || opcode == 0x31) {
// RDTSC or CPUID
AppendToBuffer("%s", mnemonic);
@ -1180,6 +1212,13 @@ int DisassemblerX64::TwoByteOpcodeInstruction(byte* data) {
byte_size_operand_ = idesc.byte_size_operation;
current += PrintOperands(idesc.mnem, idesc.op_order_, current);
} else if (opcode == 57) {
// xoprps xmm, xmm/m128
int mod, regop, rm;
get_modrm(*current, &mod, &regop, &rm);
AppendToBuffer("xorps %s, ", NameOfXMMRegister(regop));
current += PrintRightXMMOperand(current);
} else if ((opcode & 0xF0) == 0x80) {
// Jcc: Conditional jump (branch).
current = data + JumpConditional(data);

View File

@ -2758,7 +2758,7 @@ void FullCodeGenerator::EmitRandomHeapNumber(ZoneList<Expression*>* args) {
__ movd(xmm1, rcx);
__ movd(xmm0, rax);
__ cvtss2sd(xmm1, xmm1);
__ xorpd(xmm0, xmm1);
__ xorps(xmm0, xmm1);
__ subsd(xmm0, xmm1);
__ movsd(FieldOperand(rbx, HeapNumber::kValueOffset), xmm0);

View File

@ -1111,7 +1111,7 @@ void LCodeGen::DoConstantD(LConstantD* instr) {
// Use xor to produce +0.0 in a fast and compact way, but avoid to
// do so if the constant is -0.0.
if (int_val == 0) {
__ xorpd(res, res);
__ xorps(res, res);
} else {
Register tmp = ToRegister(instr->TempAt(0));
__ Set(tmp, int_val);
@ -1223,12 +1223,12 @@ void LCodeGen::DoArithmeticD(LArithmeticD* instr) {
break;
case Token::MOD:
__ PrepareCallCFunction(2);
__ movsd(xmm0, left);
__ movaps(xmm0, left);
ASSERT(right.is(xmm1));
__ CallCFunction(
ExternalReference::double_fp_operation(Token::MOD, isolate()), 2);
__ movq(rsi, Operand(rbp, StandardFrameConstants::kContextOffset));
__ movsd(result, xmm0);
__ movaps(result, xmm0);
break;
default:
UNREACHABLE();
@ -1287,7 +1287,7 @@ void LCodeGen::DoBranch(LBranch* instr) {
EmitBranch(true_block, false_block, not_zero);
} else if (r.IsDouble()) {
XMMRegister reg = ToDoubleRegister(instr->InputAt(0));
__ xorpd(xmm0, xmm0);
__ xorps(xmm0, xmm0);
__ ucomisd(reg, xmm0);
EmitBranch(true_block, false_block, not_equal);
} else {
@ -1322,7 +1322,7 @@ void LCodeGen::DoBranch(LBranch* instr) {
// HeapNumber => false iff +0, -0, or NaN. These three cases set the
// zero flag when compared to zero using ucomisd.
__ xorpd(xmm0, xmm0);
__ xorps(xmm0, xmm0);
__ ucomisd(xmm0, FieldOperand(reg, HeapNumber::kValueOffset));
__ j(zero, false_label);
__ jmp(true_label);
@ -2671,7 +2671,7 @@ void LCodeGen::DoMathAbs(LUnaryMathOperation* instr) {
if (r.IsDouble()) {
XMMRegister scratch = xmm0;
XMMRegister input_reg = ToDoubleRegister(instr->InputAt(0));
__ xorpd(scratch, scratch);
__ xorps(scratch, scratch);
__ subsd(scratch, input_reg);
__ andpd(input_reg, scratch);
} else if (r.IsInteger32()) {
@ -2708,7 +2708,7 @@ void LCodeGen::DoMathFloor(LUnaryMathOperation* instr) {
__ cmpl(output_reg, Immediate(0x80000000));
DeoptimizeIf(equal, instr->environment());
} else {
__ xorpd(xmm_scratch, xmm_scratch); // Zero the register.
__ xorps(xmm_scratch, xmm_scratch); // Zero the register.
__ ucomisd(input_reg, xmm_scratch);
if (instr->hydrogen()->CheckFlag(HValue::kBailoutOnMinusZero)) {
@ -2784,7 +2784,7 @@ void LCodeGen::DoMathPowHalf(LUnaryMathOperation* instr) {
XMMRegister xmm_scratch = xmm0;
XMMRegister input_reg = ToDoubleRegister(instr->InputAt(0));
ASSERT(ToDoubleRegister(instr->result()).is(input_reg));
__ xorpd(xmm_scratch, xmm_scratch);
__ xorps(xmm_scratch, xmm_scratch);
__ addsd(input_reg, xmm_scratch); // Convert -0 to +0.
__ sqrtsd(input_reg, input_reg);
}
@ -2800,7 +2800,7 @@ void LCodeGen::DoPower(LPower* instr) {
if (exponent_type.IsDouble()) {
__ PrepareCallCFunction(2);
// Move arguments to correct registers
__ movsd(xmm0, left_reg);
__ movaps(xmm0, left_reg);
ASSERT(ToDoubleRegister(right).is(xmm1));
__ CallCFunction(
ExternalReference::power_double_double_function(isolate()), 2);
@ -2808,7 +2808,7 @@ void LCodeGen::DoPower(LPower* instr) {
__ PrepareCallCFunction(2);
// Move arguments to correct registers: xmm0 and edi (not rdi).
// On Windows, the registers are xmm0 and edx.
__ movsd(xmm0, left_reg);
__ movaps(xmm0, left_reg);
#ifdef _WIN64
ASSERT(ToRegister(right).is(rdx));
#else
@ -2834,13 +2834,13 @@ void LCodeGen::DoPower(LPower* instr) {
__ bind(&call);
__ PrepareCallCFunction(2);
// Move arguments to correct registers xmm0 and xmm1.
__ movsd(xmm0, left_reg);
__ movaps(xmm0, left_reg);
// Right argument is already in xmm1.
__ CallCFunction(
ExternalReference::power_double_double_function(isolate()), 2);
}
// Return value is in xmm0.
__ movsd(result_reg, xmm0);
__ movaps(result_reg, xmm0);
// Restore context register.
__ movq(rsi, Operand(rbp, StandardFrameConstants::kContextOffset));
}
@ -3422,7 +3422,7 @@ void LCodeGen::EmitNumberUntagD(Register input_reg,
DeoptimizeIf(not_equal, env);
// Convert undefined to NaN. Compute NaN as 0/0.
__ xorpd(result_reg, result_reg);
__ xorps(result_reg, result_reg);
__ divsd(result_reg, result_reg);
__ jmp(&done);

View File

@ -214,7 +214,7 @@ void LGapResolver::EmitMove(int index) {
} else if (source->IsDoubleRegister()) {
XMMRegister src = cgen_->ToDoubleRegister(source);
if (destination->IsDoubleRegister()) {
__ movsd(cgen_->ToDoubleRegister(destination), src);
__ movaps(cgen_->ToDoubleRegister(destination), src);
} else {
ASSERT(destination->IsDoubleStackSlot());
__ movsd(cgen_->ToOperand(destination), src);
@ -273,9 +273,9 @@ void LGapResolver::EmitSwap(int index) {
// Swap two double registers.
XMMRegister source_reg = cgen_->ToDoubleRegister(source);
XMMRegister destination_reg = cgen_->ToDoubleRegister(destination);
__ movsd(xmm0, source_reg);
__ movsd(source_reg, destination_reg);
__ movsd(destination_reg, xmm0);
__ movaps(xmm0, source_reg);
__ movaps(source_reg, destination_reg);
__ movaps(destination_reg, xmm0);
} else if (source->IsDoubleRegister() || destination->IsDoubleRegister()) {
// Swap a double register and a double stack slot.

View File

@ -1248,12 +1248,17 @@ void MacroAssembler::SmiAdd(Register dst,
Register src2) {
// No overflow checking. Use only when it's known that
// overflowing is impossible.
ASSERT(!dst.is(src2));
if (!dst.is(src1)) {
movq(dst, src1);
if (emit_debug_code()) {
movq(kScratchRegister, src1);
addq(kScratchRegister, src2);
Check(no_overflow, "Smi addition overflow");
}
lea(dst, Operand(src1, src2, times_1, 0));
} else {
addq(dst, src2);
Assert(no_overflow, "Smi addition overflow");
}
addq(dst, src2);
Assert(no_overflow, "Smi addition overflow");
}