[x64] Implement vmovd and vmovq AVX instructions.

BUG=v8:4406
LOG=N

Review URL: https://codereview.chromium.org/1409873002

Cr-Commit-Position: refs/heads/master@{#31322}
This commit is contained in:
alph 2015-10-15 22:20:31 -07:00 committed by Commit bot
parent 07449ddc86
commit 5d9c7ab648
12 changed files with 184 additions and 28 deletions

View File

@ -946,7 +946,7 @@ void CodeGenerator::AssembleArchInstruction(Instruction* instr) {
if (instr->InputAt(0)->IsDoubleStackSlot()) {
__ movl(i.OutputRegister(), i.InputOperand(0));
} else {
__ movd(i.OutputRegister(), i.InputDoubleRegister(0));
__ Movd(i.OutputRegister(), i.InputDoubleRegister(0));
}
break;
case kSSEFloat64ExtractHighWord32:
@ -972,9 +972,9 @@ void CodeGenerator::AssembleArchInstruction(Instruction* instr) {
break;
case kSSEFloat64LoadLowWord32:
if (instr->InputAt(0)->IsRegister()) {
__ movd(i.OutputDoubleRegister(), i.InputRegister(0));
__ Movd(i.OutputDoubleRegister(), i.InputRegister(0));
} else {
__ movd(i.OutputDoubleRegister(), i.InputOperand(0));
__ Movd(i.OutputDoubleRegister(), i.InputOperand(0));
}
break;
case kAVXFloat32Cmp: {
@ -1189,26 +1189,26 @@ void CodeGenerator::AssembleArchInstruction(Instruction* instr) {
if (instr->InputAt(0)->IsDoubleStackSlot()) {
__ movl(i.OutputRegister(), i.InputOperand(0));
} else {
__ movd(i.OutputRegister(), i.InputDoubleRegister(0));
__ Movd(i.OutputRegister(), i.InputDoubleRegister(0));
}
break;
case kX64BitcastDL:
if (instr->InputAt(0)->IsDoubleStackSlot()) {
__ movq(i.OutputRegister(), i.InputOperand(0));
} else {
__ movq(i.OutputRegister(), i.InputDoubleRegister(0));
__ Movq(i.OutputRegister(), i.InputDoubleRegister(0));
}
break;
case kX64BitcastIF:
if (instr->InputAt(0)->IsRegister()) {
__ movd(i.OutputDoubleRegister(), i.InputRegister(0));
__ Movd(i.OutputDoubleRegister(), i.InputRegister(0));
} else {
__ movss(i.OutputDoubleRegister(), i.InputOperand(0));
}
break;
case kX64BitcastLD:
if (instr->InputAt(0)->IsRegister()) {
__ movq(i.OutputDoubleRegister(), i.InputRegister(0));
__ Movq(i.OutputDoubleRegister(), i.InputRegister(0));
} else {
__ Movsd(i.OutputDoubleRegister(), i.InputOperand(0));
}

View File

@ -2541,6 +2541,7 @@ void Assembler::divps(XMMRegister dst, const Operand& src) {
// SSE 2 operations.
void Assembler::movd(XMMRegister dst, Register src) {
DCHECK(!IsEnabled(AVX));
EnsureSpace ensure_space(this);
emit(0x66);
emit_optional_rex_32(dst, src);
@ -2551,6 +2552,7 @@ void Assembler::movd(XMMRegister dst, Register src) {
void Assembler::movd(XMMRegister dst, const Operand& src) {
DCHECK(!IsEnabled(AVX));
EnsureSpace ensure_space(this);
emit(0x66);
emit_optional_rex_32(dst, src);
@ -2561,6 +2563,7 @@ void Assembler::movd(XMMRegister dst, const Operand& src) {
void Assembler::movd(Register dst, XMMRegister src) {
DCHECK(!IsEnabled(AVX));
EnsureSpace ensure_space(this);
emit(0x66);
emit_optional_rex_32(src, dst);
@ -2571,6 +2574,7 @@ void Assembler::movd(Register dst, XMMRegister src) {
void Assembler::movq(XMMRegister dst, Register src) {
DCHECK(!IsEnabled(AVX));
EnsureSpace ensure_space(this);
emit(0x66);
emit_rex_64(dst, src);
@ -2581,6 +2585,7 @@ void Assembler::movq(XMMRegister dst, Register src) {
void Assembler::movq(Register dst, XMMRegister src) {
DCHECK(!IsEnabled(AVX));
EnsureSpace ensure_space(this);
emit(0x66);
emit_rex_64(src, dst);
@ -2591,6 +2596,7 @@ void Assembler::movq(Register dst, XMMRegister src) {
void Assembler::movq(XMMRegister dst, XMMRegister src) {
DCHECK(!IsEnabled(AVX));
EnsureSpace ensure_space(this);
if (dst.low_bits() == 4) {
// Avoid unnecessary SIB byte.
@ -3481,6 +3487,64 @@ void Assembler::vfmass(byte op, XMMRegister dst, XMMRegister src1,
}
void Assembler::vmovd(XMMRegister dst, Register src) {
DCHECK(IsEnabled(AVX));
EnsureSpace ensure_space(this);
XMMRegister isrc = {src.code()};
emit_vex_prefix(dst, xmm0, isrc, kL128, k66, k0F, kW0);
emit(0x6e);
emit_sse_operand(dst, src);
}
void Assembler::vmovd(XMMRegister dst, const Operand& src) {
DCHECK(IsEnabled(AVX));
EnsureSpace ensure_space(this);
emit_vex_prefix(dst, xmm0, src, kL128, k66, k0F, kW0);
emit(0x6e);
emit_sse_operand(dst, src);
}
void Assembler::vmovd(Register dst, XMMRegister src) {
DCHECK(IsEnabled(AVX));
EnsureSpace ensure_space(this);
XMMRegister idst = {dst.code()};
emit_vex_prefix(src, xmm0, idst, kL128, k66, k0F, kW0);
emit(0x7e);
emit_sse_operand(src, dst);
}
void Assembler::vmovq(XMMRegister dst, Register src) {
DCHECK(IsEnabled(AVX));
EnsureSpace ensure_space(this);
XMMRegister isrc = {src.code()};
emit_vex_prefix(dst, xmm0, isrc, kL128, k66, k0F, kW1);
emit(0x6e);
emit_sse_operand(dst, src);
}
void Assembler::vmovq(XMMRegister dst, const Operand& src) {
DCHECK(IsEnabled(AVX));
EnsureSpace ensure_space(this);
emit_vex_prefix(dst, xmm0, src, kL128, k66, k0F, kW1);
emit(0x6e);
emit_sse_operand(dst, src);
}
void Assembler::vmovq(Register dst, XMMRegister src) {
DCHECK(IsEnabled(AVX));
EnsureSpace ensure_space(this);
XMMRegister idst = {dst.code()};
emit_vex_prefix(src, xmm0, idst, kL128, k66, k0F, kW1);
emit(0x7e);
emit_sse_operand(src, dst);
}
void Assembler::vmovapd(XMMRegister dst, XMMRegister src) {
DCHECK(IsEnabled(AVX));
EnsureSpace ensure_space(this);

View File

@ -1269,6 +1269,13 @@ class Assembler : public AssemblerBase {
void vfmass(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
void vfmass(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2);
void vmovd(XMMRegister dst, Register src);
void vmovd(XMMRegister dst, const Operand& src);
void vmovd(Register dst, XMMRegister src);
void vmovq(XMMRegister dst, Register src);
void vmovq(XMMRegister dst, const Operand& src);
void vmovq(Register dst, XMMRegister src);
void vmovapd(XMMRegister dst, XMMRegister src);
void vmovsd(XMMRegister dst, const Operand& src) {
vsd(0x10, dst, xmm0, src);

View File

@ -337,7 +337,7 @@ void MathPowStub::Generate(MacroAssembler* masm) {
// Test for 0.5.
// Load double_scratch with 0.5.
__ movq(scratch, V8_UINT64_C(0x3FE0000000000000));
__ movq(double_scratch, scratch);
__ Movq(double_scratch, scratch);
// Already ruled out NaNs for exponent.
__ ucomisd(double_scratch, double_exponent);
__ j(not_equal, &not_plus_half, Label::kNear);
@ -347,7 +347,7 @@ void MathPowStub::Generate(MacroAssembler* masm) {
// According to IEEE-754, double-precision -Infinity has the highest
// 12 bits set and the lowest 52 bits cleared.
__ movq(scratch, V8_UINT64_C(0xFFF0000000000000));
__ movq(double_scratch, scratch);
__ Movq(double_scratch, scratch);
__ ucomisd(double_scratch, double_base);
// Comparing -Infinity with NaN results in "unordered", which sets the
// zero flag as if both were equal. However, it also sets the carry flag.
@ -379,7 +379,7 @@ void MathPowStub::Generate(MacroAssembler* masm) {
// According to IEEE-754, double-precision -Infinity has the highest
// 12 bits set and the lowest 52 bits cleared.
__ movq(scratch, V8_UINT64_C(0xFFF0000000000000));
__ movq(double_scratch, scratch);
__ Movq(double_scratch, scratch);
__ ucomisd(double_scratch, double_base);
// Comparing -Infinity with NaN results in "unordered", which sets the
// zero flag as if both were equal. However, it also sets the carry flag.

View File

@ -614,7 +614,7 @@ void MathExpGenerator::EmitMathExp(MacroAssembler* masm,
__ Movsd(result, Operand(kScratchRegister, 4 * kDoubleSize));
__ mulsd(double_scratch, input);
__ addsd(double_scratch, result);
__ movq(temp2, double_scratch);
__ Movq(temp2, double_scratch);
__ subsd(double_scratch, result);
__ Movsd(result, Operand(kScratchRegister, 6 * kDoubleSize));
__ leaq(temp1, Operand(temp2, 0x1ff800));
@ -630,7 +630,7 @@ void MathExpGenerator::EmitMathExp(MacroAssembler* masm,
__ subsd(result, double_scratch);
__ mulsd(input, double_scratch);
__ mulsd(result, input);
__ movq(input, temp1);
__ Movq(input, temp1);
__ mulsd(result, Operand(kScratchRegister, 7 * kDoubleSize));
__ subsd(result, double_scratch);
__ addsd(result, Operand(kScratchRegister, 8 * kDoubleSize));

View File

@ -351,6 +351,11 @@ class DisassemblerX64 {
bool rex_w() { return (rex_ & 0x08) != 0; }
bool vex_w() {
DCHECK(vex_byte0_ == VEX3_PREFIX || vex_byte0_ == VEX2_PREFIX);
return vex_byte0_ == VEX3_PREFIX ? (vex_byte2_ & 0x80) != 0 : false;
}
bool vex_128() {
DCHECK(vex_byte0_ == VEX3_PREFIX || vex_byte0_ == VEX2_PREFIX);
byte checked = vex_byte0_ == VEX3_PREFIX ? vex_byte2_ : vex_byte1_;
@ -1191,6 +1196,16 @@ int DisassemblerX64::AVXInstruction(byte* data) {
NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
break;
case 0x6e:
AppendToBuffer("vmov%c %s,", vex_w() ? 'q' : 'd',
NameOfXMMRegister(regop));
current += PrintRightOperand(current);
break;
case 0x7e:
AppendToBuffer("vmov%c ", vex_w() ? 'q' : 'd');
current += PrintRightOperand(current);
AppendToBuffer(",%s", NameOfXMMRegister(regop));
break;
default:
UnimplementedInstruction();
}

View File

@ -3604,7 +3604,7 @@ void LCodeGen::DoMathFloor(LMathFloor* instr) {
CpuFeatureScope scope(masm(), SSE4_1);
if (instr->hydrogen()->CheckFlag(HValue::kBailoutOnMinusZero)) {
// Deoptimize if minus zero.
__ movq(output_reg, input_reg);
__ Movq(output_reg, input_reg);
__ subq(output_reg, Immediate(1));
DeoptimizeIf(overflow, instr, Deoptimizer::kMinusZero);
}
@ -3665,7 +3665,7 @@ void LCodeGen::DoMathRound(LMathRound* instr) {
Label done, round_to_zero, below_one_half;
Label::Distance dist = DeoptEveryNTimes() ? Label::kFar : Label::kNear;
__ movq(kScratchRegister, one_half);
__ movq(xmm_scratch, kScratchRegister);
__ Movq(xmm_scratch, kScratchRegister);
__ ucomisd(xmm_scratch, input_reg);
__ j(above, &below_one_half, Label::kNear);
@ -3679,13 +3679,13 @@ void LCodeGen::DoMathRound(LMathRound* instr) {
__ bind(&below_one_half);
__ movq(kScratchRegister, minus_one_half);
__ movq(xmm_scratch, kScratchRegister);
__ Movq(xmm_scratch, kScratchRegister);
__ ucomisd(xmm_scratch, input_reg);
__ j(below_equal, &round_to_zero, Label::kNear);
// CVTTSD2SI rounds towards zero, we use ceil(x - (-0.5)) and then
// compare and compensate.
__ movq(input_temp, input_reg); // Do not alter input_reg.
__ Movapd(input_temp, input_reg); // Do not alter input_reg.
__ subsd(input_temp, xmm_scratch);
__ cvttsd2si(output_reg, input_temp);
// Catch minint due to overflow, and to prevent overflow when compensating.
@ -3703,7 +3703,7 @@ void LCodeGen::DoMathRound(LMathRound* instr) {
// We return 0 for the input range [+0, 0.5[, or [-0.5, 0.5[ if
// we can ignore the difference between a result of -0 and +0.
if (instr->hydrogen()->CheckFlag(HValue::kBailoutOnMinusZero)) {
__ movq(output_reg, input_reg);
__ Movq(output_reg, input_reg);
__ testq(output_reg, output_reg);
DeoptimizeIf(negative, instr, Deoptimizer::kMinusZero);
}
@ -3744,7 +3744,7 @@ void LCodeGen::DoMathPowHalf(LMathPowHalf* instr) {
// Check base for -Infinity. According to IEEE-754, double-precision
// -Infinity has the highest 12 bits set and the lowest 52 bits cleared.
__ movq(kScratchRegister, V8_INT64_C(0xFFF0000000000000));
__ movq(xmm_scratch, kScratchRegister);
__ Movq(xmm_scratch, kScratchRegister);
__ ucomisd(xmm_scratch, input_reg);
// Comparing -Infinity with NaN results in "unordered", which sets the
// zero flag as if both were equal. However, it also sets the carry flag.
@ -5322,10 +5322,10 @@ void LCodeGen::DoDoubleBits(LDoubleBits* instr) {
XMMRegister value_reg = ToDoubleRegister(instr->value());
Register result_reg = ToRegister(instr->result());
if (instr->hydrogen()->bits() == HDoubleBits::HIGH) {
__ movq(result_reg, value_reg);
__ Movq(result_reg, value_reg);
__ shrq(result_reg, Immediate(32));
} else {
__ movd(result_reg, value_reg);
__ Movd(result_reg, value_reg);
}
}
@ -5335,9 +5335,9 @@ void LCodeGen::DoConstructDouble(LConstructDouble* instr) {
Register lo_reg = ToRegister(instr->lo());
XMMRegister result_reg = ToDoubleRegister(instr->result());
XMMRegister xmm_scratch = double_scratch0();
__ movd(result_reg, hi_reg);
__ Movd(result_reg, hi_reg);
__ psllq(result_reg, 32);
__ movd(xmm_scratch, lo_reg);
__ Movd(xmm_scratch, lo_reg);
__ orps(result_reg, xmm_scratch);
}

View File

@ -192,7 +192,7 @@ void LGapResolver::EmitMove(int index) {
__ xorps(dst, dst);
} else {
__ Set(kScratchRegister, int_val);
__ movq(dst, kScratchRegister);
__ Movq(dst, kScratchRegister);
}
} else {
DCHECK(destination->IsStackSlot());

View File

@ -2413,7 +2413,7 @@ void MacroAssembler::Move(XMMRegister dst, uint32_t src) {
pcmpeqd(dst, dst);
} else {
movl(kScratchRegister, Immediate(src));
movq(dst, kScratchRegister);
Movq(dst, kScratchRegister);
}
}
}
@ -2442,7 +2442,7 @@ void MacroAssembler::Move(XMMRegister dst, uint64_t src) {
Move(dst, lower);
} else {
movq(kScratchRegister, src);
movq(dst, kScratchRegister);
Movq(dst, kScratchRegister);
}
}
}
@ -2489,6 +2489,56 @@ void MacroAssembler::Movsd(const Operand& dst, XMMRegister src) {
}
void MacroAssembler::Movd(XMMRegister dst, Register src) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vmovd(dst, src);
} else {
movd(dst, src);
}
}
void MacroAssembler::Movd(XMMRegister dst, const Operand& src) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vmovd(dst, src);
} else {
movd(dst, src);
}
}
void MacroAssembler::Movd(Register dst, XMMRegister src) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vmovd(dst, src);
} else {
movd(dst, src);
}
}
void MacroAssembler::Movq(XMMRegister dst, Register src) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vmovq(dst, src);
} else {
movq(dst, src);
}
}
void MacroAssembler::Movq(Register dst, XMMRegister src) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vmovq(dst, src);
} else {
movq(dst, src);
}
}
void MacroAssembler::Cmp(Register dst, Handle<Object> source) {
AllowDeferredHandleDereference smi_check;
if (source->IsSmi()) {
@ -2797,7 +2847,7 @@ void MacroAssembler::Call(Handle<Code> code_object,
void MacroAssembler::Pextrd(Register dst, XMMRegister src, int8_t imm8) {
if (imm8 == 0) {
movd(dst, src);
Movd(dst, src);
return;
}
DCHECK_EQ(1, imm8);
@ -2817,7 +2867,7 @@ void MacroAssembler::Pinsrd(XMMRegister dst, Register src, int8_t imm8) {
pinsrd(dst, src, imm8);
return;
}
movd(xmm0, src);
Movd(xmm0, src);
if (imm8 == 1) {
punpckldq(dst, xmm0);
} else {
@ -2836,7 +2886,7 @@ void MacroAssembler::Pinsrd(XMMRegister dst, const Operand& src, int8_t imm8) {
pinsrd(dst, src, imm8);
return;
}
movd(xmm0, src);
Movd(xmm0, src);
if (imm8 == 1) {
punpckldq(dst, xmm0);
} else {

View File

@ -899,6 +899,12 @@ class MacroAssembler: public Assembler {
void Movsd(XMMRegister dst, const Operand& src);
void Movsd(const Operand& dst, XMMRegister src);
void Movd(XMMRegister dst, Register src);
void Movd(XMMRegister dst, const Operand& src);
void Movd(Register dst, XMMRegister src);
void Movq(XMMRegister dst, Register src);
void Movq(Register dst, XMMRegister src);
// Control Flow
void Jump(Address destination, RelocInfo::Mode rmode);
void Jump(ExternalReference ext);

View File

@ -1366,6 +1366,13 @@ TEST(AssemblerX64AVX_sd) {
__ vcvtlsi2sd(xmm7, xmm7, Operand(rsp, 0));
__ vsubsd(xmm7, xmm6, xmm7); // xmm7 is 1.0
__ vmulsd(xmm1, xmm1, xmm7);
__ movq(rdx, V8_INT64_C(0x3ff0000000000000)); // 1.0
__ vmovq(xmm7, rdx);
__ vmulsd(xmm1, xmm1, xmm7);
__ movq(Operand(rsp, 0), rdx);
__ vmovq(xmm6, Operand(rsp, 0));
__ vmulsd(xmm1, xmm1, xmm6);
__ addq(rsp, Immediate(kDoubleSize * 2));
__ vucomisd(xmm3, xmm1);

View File

@ -511,6 +511,13 @@ TEST(DisasmX64) {
__ vucomiss(xmm9, xmm1);
__ vucomiss(xmm8, Operand(rbx, rdx, times_2, 10981));
__ vmovd(xmm5, rdi);
__ vmovd(xmm9, Operand(rbx, rcx, times_4, 10000));
__ vmovd(r9, xmm6);
__ vmovq(xmm5, rdi);
__ vmovq(xmm9, Operand(rbx, rcx, times_4, 10000));
__ vmovq(r9, xmm6);
__ vmovapd(xmm7, xmm0);
__ vmovsd(xmm6, xmm2);
__ vmovsd(xmm9, Operand(rbx, rcx, times_4, 10000));