[codegen][x64] Improve code for float to int64
This improves the code generated for float to int64 conversions on x64. Instead of explicitly checking the input for specific values and executing conditional jumps, just convert the integer back to a float and check if this results in the rounded input. The "success value" is then materialized via vmov + and instead of via branches. old: 7 c4e1fb2cd9 vcvttsd2siq rbx,xmm1 c ba01000000 movl rdx,0x1 11 49ba000000000000e0c3 REX.W movq r10,0xc3e0000000000000 1b c441f96efa vmovq xmm15,r10 20 c5792ef9 vucomisd xmm15,xmm1 24 7a08 jpe 0x3599421714ee <+0x2e> 26 7408 jz 0x3599421714f0 <+0x30> 28 4883fb01 REX.W cmpq rbx,0x1 2c 7102 jno 0x3599421714f0 <+0x30> 2e 33d2 xorl rdx,rdx new: 7 c463010bf90b vroundsd xmm15,xmm15,xmm1,0xb d c4e1fb2cd9 vcvttsd2siq rbx,xmm1 12 c4e1832ac3 vcvtqsi2sd xmm0,xmm15,rbx 17 c4c17bc2c700 vcmpss xmm0,xmm0,xmm15, (eq) 1d c4e1f97ec2 vmovq rdx,xmm0 22 83e201 andl rdx,0x1 A follow-up step would be to replace the explicitly materialized success value by a direct jump to the code handling the error case, but that requires more rewrite in TurboFan. R=tebbi@chromium.org Bug: v8:10005 Change-Id: Iaedc3f395fb3a8c11c936faa8c6e55c2dfe86cd9 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3560434 Reviewed-by: Tobias Tebbi <tebbi@chromium.org> Commit-Queue: Clemens Backes <clemensb@chromium.org> Cr-Commit-Position: refs/heads/main@{#79854}
This commit is contained in:
parent
51b99213e7
commit
08e514a894
@ -3372,6 +3372,28 @@ void Assembler::haddps(XMMRegister dst, Operand src) {
|
||||
emit_sse_operand(dst, src);
|
||||
}
|
||||
|
||||
void Assembler::cmpeqss(XMMRegister dst, XMMRegister src) {
|
||||
DCHECK(!IsEnabled(AVX));
|
||||
EnsureSpace ensure_space(this);
|
||||
emit(0xF3);
|
||||
emit_optional_rex_32(dst, src);
|
||||
emit(0x0F);
|
||||
emit(0xC2);
|
||||
emit_sse_operand(dst, src);
|
||||
emit(0x00); // EQ == 0
|
||||
}
|
||||
|
||||
void Assembler::cmpeqsd(XMMRegister dst, XMMRegister src) {
|
||||
DCHECK(!IsEnabled(AVX));
|
||||
EnsureSpace ensure_space(this);
|
||||
emit(0xF2);
|
||||
emit_optional_rex_32(dst, src);
|
||||
emit(0x0F);
|
||||
emit(0xC2);
|
||||
emit_sse_operand(dst, src);
|
||||
emit(0x00); // EQ == 0
|
||||
}
|
||||
|
||||
void Assembler::cmpltsd(XMMRegister dst, XMMRegister src) {
|
||||
EnsureSpace ensure_space(this);
|
||||
emit(0xF2);
|
||||
@ -3389,6 +3411,13 @@ void Assembler::roundss(XMMRegister dst, XMMRegister src, RoundingMode mode) {
|
||||
emit(static_cast<byte>(mode) | 0x8);
|
||||
}
|
||||
|
||||
void Assembler::roundss(XMMRegister dst, Operand src, RoundingMode mode) {
|
||||
DCHECK(!IsEnabled(AVX));
|
||||
sse4_instr(dst, src, 0x66, 0x0F, 0x3A, 0x0A);
|
||||
// Mask precision exception.
|
||||
emit(static_cast<byte>(mode) | 0x8);
|
||||
}
|
||||
|
||||
void Assembler::roundsd(XMMRegister dst, XMMRegister src, RoundingMode mode) {
|
||||
DCHECK(!IsEnabled(AVX));
|
||||
sse4_instr(dst, src, 0x66, 0x0F, 0x3A, 0x0B);
|
||||
@ -3396,6 +3425,13 @@ void Assembler::roundsd(XMMRegister dst, XMMRegister src, RoundingMode mode) {
|
||||
emit(static_cast<byte>(mode) | 0x8);
|
||||
}
|
||||
|
||||
void Assembler::roundsd(XMMRegister dst, Operand src, RoundingMode mode) {
|
||||
DCHECK(!IsEnabled(AVX));
|
||||
sse4_instr(dst, src, 0x66, 0x0F, 0x3A, 0x0B);
|
||||
// Mask precision exception.
|
||||
emit(static_cast<byte>(mode) | 0x8);
|
||||
}
|
||||
|
||||
void Assembler::roundps(XMMRegister dst, XMMRegister src, RoundingMode mode) {
|
||||
DCHECK(!IsEnabled(AVX));
|
||||
sse4_instr(dst, src, 0x66, 0x0F, 0x3A, 0x08);
|
||||
|
@ -1288,6 +1288,8 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
|
||||
void haddps(XMMRegister dst, XMMRegister src);
|
||||
void haddps(XMMRegister dst, Operand src);
|
||||
|
||||
void cmpeqsd(XMMRegister dst, XMMRegister src);
|
||||
void cmpeqss(XMMRegister dst, XMMRegister src);
|
||||
void cmpltsd(XMMRegister dst, XMMRegister src);
|
||||
|
||||
void movmskpd(Register dst, XMMRegister src);
|
||||
@ -1309,7 +1311,9 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
|
||||
void pinsrq(XMMRegister dst, Operand src, uint8_t imm8);
|
||||
|
||||
void roundss(XMMRegister dst, XMMRegister src, RoundingMode mode);
|
||||
void roundss(XMMRegister dst, Operand src, RoundingMode mode);
|
||||
void roundsd(XMMRegister dst, XMMRegister src, RoundingMode mode);
|
||||
void roundsd(XMMRegister dst, Operand src, RoundingMode mode);
|
||||
void roundps(XMMRegister dst, XMMRegister src, RoundingMode mode);
|
||||
void roundpd(XMMRegister dst, XMMRegister src, RoundingMode mode);
|
||||
|
||||
@ -1556,11 +1560,21 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
|
||||
vinstr(0x0a, dst, src1, src2, k66, k0F3A, kWIG);
|
||||
emit(static_cast<byte>(mode) | 0x8); // Mask precision exception.
|
||||
}
|
||||
void vroundss(XMMRegister dst, XMMRegister src1, Operand src2,
|
||||
RoundingMode mode) {
|
||||
vinstr(0x0a, dst, src1, src2, k66, k0F3A, kWIG);
|
||||
emit(static_cast<byte>(mode) | 0x8); // Mask precision exception.
|
||||
}
|
||||
void vroundsd(XMMRegister dst, XMMRegister src1, XMMRegister src2,
|
||||
RoundingMode mode) {
|
||||
vinstr(0x0b, dst, src1, src2, k66, k0F3A, kWIG);
|
||||
emit(static_cast<byte>(mode) | 0x8); // Mask precision exception.
|
||||
}
|
||||
void vroundsd(XMMRegister dst, XMMRegister src1, Operand src2,
|
||||
RoundingMode mode) {
|
||||
vinstr(0x0b, dst, src1, src2, k66, k0F3A, kWIG);
|
||||
emit(static_cast<byte>(mode) | 0x8); // Mask precision exception.
|
||||
}
|
||||
void vroundps(XMMRegister dst, XMMRegister src, RoundingMode mode) {
|
||||
vinstr(0x08, dst, xmm0, src, k66, k0F3A, kWIG);
|
||||
emit(static_cast<byte>(mode) | 0x8); // Mask precision exception.
|
||||
@ -1625,6 +1639,14 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
|
||||
vpd(0x50, idst, xmm0, src);
|
||||
}
|
||||
void vpmovmskb(Register dst, XMMRegister src);
|
||||
void vcmpeqss(XMMRegister dst, XMMRegister src) {
|
||||
vss(0xC2, dst, dst, src);
|
||||
emit(0x00); // EQ == 0
|
||||
}
|
||||
void vcmpeqsd(XMMRegister dst, XMMRegister src) {
|
||||
vsd(0xC2, dst, dst, src);
|
||||
emit(0x00); // EQ == 0
|
||||
}
|
||||
void vcmpps(XMMRegister dst, XMMRegister src1, XMMRegister src2, int8_t cmp) {
|
||||
vps(0xC2, dst, src1, src2);
|
||||
emit(cmp);
|
||||
|
@ -1215,6 +1215,23 @@ void TurboAssembler::Cvttss2uiq(Register dst, XMMRegister src, Label* fail) {
|
||||
ConvertFloatToUint64<XMMRegister, false>(this, dst, src, fail);
|
||||
}
|
||||
|
||||
void TurboAssembler::Cmpeqss(XMMRegister dst, XMMRegister src) {
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
CpuFeatureScope avx_scope(this, AVX);
|
||||
vcmpeqss(dst, src);
|
||||
} else {
|
||||
cmpeqss(dst, src);
|
||||
}
|
||||
}
|
||||
|
||||
void TurboAssembler::Cmpeqsd(XMMRegister dst, XMMRegister src) {
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
CpuFeatureScope avx_scope(this, AVX);
|
||||
vcmpeqsd(dst, src);
|
||||
} else {
|
||||
cmpeqsd(dst, src);
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Smi tagging, untagging and tag detection.
|
||||
|
@ -169,6 +169,9 @@ class V8_EXPORT_PRIVATE TurboAssembler
|
||||
void Cvtlsi2sd(XMMRegister dst, Register src);
|
||||
void Cvtlsi2sd(XMMRegister dst, Operand src);
|
||||
|
||||
void Cmpeqss(XMMRegister dst, XMMRegister src);
|
||||
void Cmpeqsd(XMMRegister dst, XMMRegister src);
|
||||
|
||||
void PextrdPreSse41(Register dst, XMMRegister src, uint8_t imm8);
|
||||
void Pextrq(Register dst, XMMRegister src, int8_t imm8);
|
||||
|
||||
|
@ -2011,64 +2011,68 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
}
|
||||
break;
|
||||
}
|
||||
case kSSEFloat32ToInt64:
|
||||
if (instr->InputAt(0)->IsFPRegister()) {
|
||||
__ Cvttss2siq(i.OutputRegister(), i.InputDoubleRegister(0));
|
||||
} else {
|
||||
__ Cvttss2siq(i.OutputRegister(), i.InputOperand(0));
|
||||
}
|
||||
if (instr->OutputCount() > 1) {
|
||||
__ Move(i.OutputRegister(1), 1);
|
||||
Label done;
|
||||
Label fail;
|
||||
__ Move(kScratchDoubleReg, static_cast<float>(INT64_MIN));
|
||||
case kSSEFloat32ToInt64: {
|
||||
Register output_reg = i.OutputRegister(0);
|
||||
if (instr->OutputCount() == 1) {
|
||||
if (instr->InputAt(0)->IsFPRegister()) {
|
||||
__ Ucomiss(kScratchDoubleReg, i.InputDoubleRegister(0));
|
||||
__ Cvttss2siq(output_reg, i.InputDoubleRegister(0));
|
||||
} else {
|
||||
__ Ucomiss(kScratchDoubleReg, i.InputOperand(0));
|
||||
__ Cvttss2siq(output_reg, i.InputOperand(0));
|
||||
}
|
||||
// If the input is NaN, then the conversion fails.
|
||||
__ j(parity_even, &fail, Label::kNear);
|
||||
// If the input is INT64_MIN, then the conversion succeeds.
|
||||
__ j(equal, &done, Label::kNear);
|
||||
__ cmpq(i.OutputRegister(0), Immediate(1));
|
||||
// If the conversion results in INT64_MIN, but the input was not
|
||||
// INT64_MIN, then the conversion fails.
|
||||
__ j(no_overflow, &done, Label::kNear);
|
||||
__ bind(&fail);
|
||||
__ Move(i.OutputRegister(1), 0);
|
||||
__ bind(&done);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case kSSEFloat64ToInt64:
|
||||
DCHECK_EQ(2, instr->OutputCount());
|
||||
Register success_reg = i.OutputRegister(1);
|
||||
DoubleRegister rounded = kScratchDoubleReg;
|
||||
if (instr->InputAt(0)->IsFPRegister()) {
|
||||
__ Cvttsd2siq(i.OutputRegister(0), i.InputDoubleRegister(0));
|
||||
__ Roundss(rounded, i.InputDoubleRegister(0), kRoundToZero);
|
||||
__ Cvttss2siq(output_reg, i.InputDoubleRegister(0));
|
||||
} else {
|
||||
__ Cvttsd2siq(i.OutputRegister(0), i.InputOperand(0));
|
||||
}
|
||||
if (instr->OutputCount() > 1) {
|
||||
__ Move(i.OutputRegister(1), 1);
|
||||
Label done;
|
||||
Label fail;
|
||||
__ Move(kScratchDoubleReg, static_cast<double>(INT64_MIN));
|
||||
if (instr->InputAt(0)->IsFPRegister()) {
|
||||
__ Ucomisd(kScratchDoubleReg, i.InputDoubleRegister(0));
|
||||
} else {
|
||||
__ Ucomisd(kScratchDoubleReg, i.InputOperand(0));
|
||||
}
|
||||
// If the input is NaN, then the conversion fails.
|
||||
__ j(parity_even, &fail, Label::kNear);
|
||||
// If the input is INT64_MIN, then the conversion succeeds.
|
||||
__ j(equal, &done, Label::kNear);
|
||||
__ cmpq(i.OutputRegister(0), Immediate(1));
|
||||
// If the conversion results in INT64_MIN, but the input was not
|
||||
// INT64_MIN, then the conversion fails.
|
||||
__ j(no_overflow, &done, Label::kNear);
|
||||
__ bind(&fail);
|
||||
__ Move(i.OutputRegister(1), 0);
|
||||
__ bind(&done);
|
||||
__ Roundss(rounded, i.InputOperand(0), kRoundToZero);
|
||||
// Convert {rounded} instead of the input operand, to avoid another
|
||||
// load.
|
||||
__ Cvttss2siq(output_reg, rounded);
|
||||
}
|
||||
DoubleRegister converted_back = i.TempSimd128Register(0);
|
||||
__ Cvtqsi2ss(converted_back, output_reg);
|
||||
// Compare the converted back value to the rounded value, set success_reg
|
||||
// to 0 if they differ, or 1 on success.
|
||||
__ Cmpeqss(converted_back, rounded);
|
||||
__ Movq(success_reg, converted_back);
|
||||
__ And(success_reg, Immediate(1));
|
||||
break;
|
||||
}
|
||||
case kSSEFloat64ToInt64: {
|
||||
Register output_reg = i.OutputRegister(0);
|
||||
if (instr->OutputCount() == 1) {
|
||||
if (instr->InputAt(0)->IsFPRegister()) {
|
||||
__ Cvttsd2siq(output_reg, i.InputDoubleRegister(0));
|
||||
} else {
|
||||
__ Cvttsd2siq(output_reg, i.InputOperand(0));
|
||||
}
|
||||
break;
|
||||
}
|
||||
DCHECK_EQ(2, instr->OutputCount());
|
||||
Register success_reg = i.OutputRegister(1);
|
||||
DoubleRegister rounded = kScratchDoubleReg;
|
||||
if (instr->InputAt(0)->IsFPRegister()) {
|
||||
__ Roundsd(rounded, i.InputDoubleRegister(0), kRoundToZero);
|
||||
__ Cvttsd2siq(output_reg, i.InputDoubleRegister(0));
|
||||
} else {
|
||||
__ Roundsd(rounded, i.InputOperand(0), kRoundToZero);
|
||||
// Convert {rounded} instead of the input operand, to avoid another
|
||||
// load.
|
||||
__ Cvttsd2siq(output_reg, rounded);
|
||||
}
|
||||
DoubleRegister converted_back = i.TempSimd128Register(0);
|
||||
__ Cvtqsi2sd(converted_back, output_reg);
|
||||
// Compare the converted back value to the rounded value, set success_reg
|
||||
// to 0 if they differ, or 1 on success.
|
||||
__ Cmpeqsd(converted_back, rounded);
|
||||
__ Movq(success_reg, converted_back);
|
||||
__ And(success_reg, Immediate(1));
|
||||
break;
|
||||
}
|
||||
case kSSEFloat32ToUint64: {
|
||||
Label fail;
|
||||
if (instr->OutputCount() > 1) __ Move(i.OutputRegister(1), 0);
|
||||
|
@ -1445,30 +1445,36 @@ void InstructionSelector::VisitTryTruncateFloat32ToInt64(Node* node) {
|
||||
X64OperandGenerator g(this);
|
||||
InstructionOperand inputs[] = {g.UseRegister(node->InputAt(0))};
|
||||
InstructionOperand outputs[2];
|
||||
InstructionOperand temps[1];
|
||||
size_t output_count = 0;
|
||||
size_t temp_count = 0;
|
||||
outputs[output_count++] = g.DefineAsRegister(node);
|
||||
|
||||
Node* success_output = NodeProperties::FindProjection(node, 1);
|
||||
if (success_output) {
|
||||
outputs[output_count++] = g.DefineAsRegister(success_output);
|
||||
temps[temp_count++] = g.TempSimd128Register();
|
||||
}
|
||||
|
||||
Emit(kSSEFloat32ToInt64, output_count, outputs, 1, inputs);
|
||||
Emit(kSSEFloat32ToInt64, output_count, outputs, 1, inputs, temp_count, temps);
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitTryTruncateFloat64ToInt64(Node* node) {
|
||||
X64OperandGenerator g(this);
|
||||
InstructionOperand inputs[] = {g.UseRegister(node->InputAt(0))};
|
||||
InstructionOperand outputs[2];
|
||||
InstructionOperand temps[1];
|
||||
size_t output_count = 0;
|
||||
size_t temp_count = 0;
|
||||
outputs[output_count++] = g.DefineAsRegister(node);
|
||||
|
||||
Node* success_output = NodeProperties::FindProjection(node, 1);
|
||||
if (success_output) {
|
||||
outputs[output_count++] = g.DefineAsRegister(success_output);
|
||||
temps[temp_count++] = g.TempSimd128Register();
|
||||
}
|
||||
|
||||
Emit(kSSEFloat64ToInt64, output_count, outputs, 1, inputs);
|
||||
Emit(kSSEFloat64ToInt64, output_count, outputs, 1, inputs, temp_count, temps);
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitTryTruncateFloat32ToUint64(Node* node) {
|
||||
|
@ -1158,6 +1158,13 @@ int DisassemblerX64::AVXInstruction(byte* data) {
|
||||
AppendToBuffer("vcvtdq2pd %s,", NameOfAVXRegister(regop));
|
||||
current += PrintRightAVXOperand(current);
|
||||
break;
|
||||
case 0xC2:
|
||||
AppendToBuffer("vcmpss %s,%s,", NameOfAVXRegister(regop),
|
||||
NameOfAVXRegister(vvvv));
|
||||
current += PrintRightAVXOperand(current);
|
||||
AppendToBuffer(", (%s)", cmp_pseudo_op[*current]);
|
||||
current += 1;
|
||||
break;
|
||||
default:
|
||||
UnimplementedInstruction();
|
||||
}
|
||||
@ -1213,6 +1220,13 @@ int DisassemblerX64::AVXInstruction(byte* data) {
|
||||
NameOfAVXRegister(vvvv));
|
||||
current += PrintRightAVXOperand(current);
|
||||
break;
|
||||
case 0xC2:
|
||||
AppendToBuffer("vcmpsd %s,%s,", NameOfAVXRegister(regop),
|
||||
NameOfAVXRegister(vvvv));
|
||||
current += PrintRightAVXOperand(current);
|
||||
AppendToBuffer(", (%s)", cmp_pseudo_op[*current]);
|
||||
current += 1;
|
||||
break;
|
||||
#define DISASM_SSE2_INSTRUCTION_LIST_SD(instruction, _1, _2, opcode) \
|
||||
case 0x##opcode: \
|
||||
AppendToBuffer("v" #instruction " %s,%s,", NameOfAVXRegister(regop), \
|
||||
@ -2296,6 +2310,8 @@ const char* DisassemblerX64::TwoByteMnemonic(byte opcode) {
|
||||
return "movsxb";
|
||||
case 0xBF:
|
||||
return "movsxw";
|
||||
case 0xC2:
|
||||
return "cmpss";
|
||||
default:
|
||||
return nullptr;
|
||||
}
|
||||
|
@ -780,6 +780,8 @@ UNINITIALIZED_TEST(DisasmX64CheckOutputSSE) {
|
||||
COMPARE("440f178c8b10270000 movhps [rbx+rcx*4+0x2710],xmm9",
|
||||
movhps(Operand(rbx, rcx, times_4, 10000), xmm9));
|
||||
COMPARE("410fc6c100 shufps xmm0, xmm9, 0", shufps(xmm0, xmm9, 0x0));
|
||||
COMPARE("f30fc2c100 cmpeqss xmm0,xmm1", cmpeqss(xmm0, xmm1));
|
||||
COMPARE("f20fc2c100 cmpeqsd xmm0,xmm1", cmpeqsd(xmm0, xmm1));
|
||||
COMPARE("0f2ec1 ucomiss xmm0,xmm1", ucomiss(xmm0, xmm1));
|
||||
COMPARE("0f2e848b10270000 ucomiss xmm0,[rbx+rcx*4+0x2710]",
|
||||
ucomiss(xmm0, Operand(rbx, rcx, times_4, 10000)));
|
||||
@ -1027,8 +1029,12 @@ UNINITIALIZED_TEST(DisasmX64CheckOutputSSE4_1) {
|
||||
roundpd(xmm8, xmm3, kRoundToNearest));
|
||||
COMPARE("66440f3a0ac309 roundss xmm8,xmm3,0x1",
|
||||
roundss(xmm8, xmm3, kRoundDown));
|
||||
COMPARE("66440f3a0a420b09 roundss xmm8,[rdx+0xb],0x1",
|
||||
roundss(xmm8, Operand(rdx, 11), kRoundDown));
|
||||
COMPARE("66440f3a0bc309 roundsd xmm8,xmm3,0x1",
|
||||
roundsd(xmm8, xmm3, kRoundDown));
|
||||
COMPARE("66440f3a0b420b09 roundsd xmm8,[rdx+0xb],0x1",
|
||||
roundsd(xmm8, Operand(rdx, 11), kRoundDown));
|
||||
|
||||
#define COMPARE_SSE4_1_INSTR(instruction, _, __, ___, ____) \
|
||||
exp = #instruction " xmm5,xmm1"; \
|
||||
@ -1167,6 +1173,10 @@ UNINITIALIZED_TEST(DisasmX64CheckOutputAVX) {
|
||||
vmovss(xmm9, Operand(r11, rcx, times_8, -10000)));
|
||||
COMPARE("c4a17a118c8b10270000 vmovss [rbx+r9*4+0x2710],xmm1",
|
||||
vmovss(Operand(rbx, r9, times_4, 10000), xmm1));
|
||||
COMPARE("c532c2c900 vcmpss xmm9,xmm9,xmm1, (eq)",
|
||||
vcmpeqss(xmm9, xmm1));
|
||||
COMPARE("c533c2c900 vcmpsd xmm9,xmm9,xmm1, (eq)",
|
||||
vcmpeqsd(xmm9, xmm1));
|
||||
COMPARE("c5782ec9 vucomiss xmm9,xmm1", vucomiss(xmm9, xmm1));
|
||||
COMPARE("c5782e8453e52a0000 vucomiss xmm8,[rbx+rdx*2+0x2ae5]",
|
||||
vucomiss(xmm8, Operand(rbx, rdx, times_2, 10981)));
|
||||
|
Loading…
Reference in New Issue
Block a user