diff --git a/src/arm/macro-assembler-arm.cc b/src/arm/macro-assembler-arm.cc index 6f55d4ae32..3d760f0ad1 100644 --- a/src/arm/macro-assembler-arm.cc +++ b/src/arm/macro-assembler-arm.cc @@ -1051,6 +1051,69 @@ void MacroAssembler::VmovLow(DwVfpRegister dst, Register src) { } } +void MacroAssembler::VmovExtended(Register dst, int src_code) { + DCHECK_LE(32, src_code); + DCHECK_GT(64, src_code); + if (src_code & 0x1) { + VmovHigh(dst, DwVfpRegister::from_code(src_code / 2)); + } else { + VmovLow(dst, DwVfpRegister::from_code(src_code / 2)); + } +} + +void MacroAssembler::VmovExtended(int dst_code, Register src) { + DCHECK_LE(32, dst_code); + DCHECK_GT(64, dst_code); + if (dst_code & 0x1) { + VmovHigh(DwVfpRegister::from_code(dst_code / 2), src); + } else { + VmovLow(DwVfpRegister::from_code(dst_code / 2), src); + } +} + +void MacroAssembler::VmovExtended(int dst_code, int src_code, + Register scratch) { + if (src_code < 32 && dst_code < 32) { + // src and dst are both s-registers. + vmov(SwVfpRegister::from_code(dst_code), + SwVfpRegister::from_code(src_code)); + } else if (src_code < 32) { + // src is an s-register. + vmov(scratch, SwVfpRegister::from_code(src_code)); + VmovExtended(dst_code, scratch); + } else if (dst_code < 32) { + // dst is an s-register. + VmovExtended(scratch, src_code); + vmov(SwVfpRegister::from_code(dst_code), scratch); + } else { + // Neither src or dst are s-registers. + DCHECK_GT(64, src_code); + DCHECK_GT(64, dst_code); + VmovExtended(scratch, src_code); + VmovExtended(dst_code, scratch); + } +} + +void MacroAssembler::VmovExtended(int dst_code, const MemOperand& src, + Register scratch) { + if (dst_code >= 32) { + ldr(scratch, src); + VmovExtended(dst_code, scratch); + } else { + vldr(SwVfpRegister::from_code(dst_code), src); + } +} + +void MacroAssembler::VmovExtended(const MemOperand& dst, int src_code, + Register scratch) { + if (src_code >= 32) { + VmovExtended(scratch, src_code); + str(scratch, dst); + } else { + vstr(SwVfpRegister::from_code(src_code), dst); + } +} + void MacroAssembler::LslPair(Register dst_low, Register dst_high, Register src_low, Register src_high, Register scratch, Register shift) { diff --git a/src/arm/macro-assembler-arm.h b/src/arm/macro-assembler-arm.h index d4dca48a8b..78a65f0c5c 100644 --- a/src/arm/macro-assembler-arm.h +++ b/src/arm/macro-assembler-arm.h @@ -549,6 +549,14 @@ class MacroAssembler: public Assembler { void VmovLow(Register dst, DwVfpRegister src); void VmovLow(DwVfpRegister dst, Register src); + // Simulate s-register moves for imaginary s32 - s63 registers. + void VmovExtended(Register dst, int src_code); + void VmovExtended(int dst_code, Register src); + // Move between s-registers and imaginary s-registers. + void VmovExtended(int dst_code, int src_code, Register scratch); + void VmovExtended(int dst_code, const MemOperand& src, Register scratch); + void VmovExtended(const MemOperand& dst, int src_code, Register scratch); + void LslPair(Register dst_low, Register dst_high, Register src_low, Register src_high, Register scratch, Register shift); void LslPair(Register dst_low, Register dst_high, Register src_low, diff --git a/src/compiler/arm/code-generator-arm.cc b/src/compiler/arm/code-generator-arm.cc index 8d409d3e37..4cdb6da56c 100644 --- a/src/compiler/arm/code-generator-arm.cc +++ b/src/compiler/arm/code-generator-arm.cc @@ -136,25 +136,13 @@ class ArmOperandConverter final : public InstructionOperandConverter { FrameOffset offset = frame_access_state()->GetFrameOffset(slot); return MemOperand(offset.from_stack_pointer() ? sp : fp, offset.offset()); } - - FloatRegister InputFloat32Register(size_t index) { - return ToFloat32Register(instr_->InputAt(index)); - } - - FloatRegister OutputFloat32Register() { - return ToFloat32Register(instr_->Output()); - } - - FloatRegister ToFloat32Register(InstructionOperand* op) { - return LowDwVfpRegister::from_code(ToDoubleRegister(op).code()).low(); - } }; namespace { -class OutOfLineLoadFloat32 final : public OutOfLineCode { +class OutOfLineLoadFloat final : public OutOfLineCode { public: - OutOfLineLoadFloat32(CodeGenerator* gen, SwVfpRegister result) + OutOfLineLoadFloat(CodeGenerator* gen, SwVfpRegister result) : OutOfLineCode(gen), result_(result) {} void Generate() final { @@ -1119,54 +1107,54 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } case kArmVcmpF32: if (instr->InputAt(1)->IsFPRegister()) { - __ VFPCompareAndSetFlags(i.InputFloat32Register(0), - i.InputFloat32Register(1)); + __ VFPCompareAndSetFlags(i.InputFloatRegister(0), + i.InputFloatRegister(1)); } else { DCHECK(instr->InputAt(1)->IsImmediate()); // 0.0 is the only immediate supported by vcmp instructions. DCHECK(i.InputFloat32(1) == 0.0f); - __ VFPCompareAndSetFlags(i.InputFloat32Register(0), i.InputFloat32(1)); + __ VFPCompareAndSetFlags(i.InputFloatRegister(0), i.InputFloat32(1)); } DCHECK_EQ(SetCC, i.OutputSBit()); break; case kArmVaddF32: - __ vadd(i.OutputFloat32Register(), i.InputFloat32Register(0), - i.InputFloat32Register(1)); + __ vadd(i.OutputFloatRegister(), i.InputFloatRegister(0), + i.InputFloatRegister(1)); DCHECK_EQ(LeaveCC, i.OutputSBit()); break; case kArmVsubF32: - __ vsub(i.OutputFloat32Register(), i.InputFloat32Register(0), - i.InputFloat32Register(1)); + __ vsub(i.OutputFloatRegister(), i.InputFloatRegister(0), + i.InputFloatRegister(1)); DCHECK_EQ(LeaveCC, i.OutputSBit()); break; case kArmVmulF32: - __ vmul(i.OutputFloat32Register(), i.InputFloat32Register(0), - i.InputFloat32Register(1)); + __ vmul(i.OutputFloatRegister(), i.InputFloatRegister(0), + i.InputFloatRegister(1)); DCHECK_EQ(LeaveCC, i.OutputSBit()); break; case kArmVmlaF32: - __ vmla(i.OutputFloat32Register(), i.InputFloat32Register(1), - i.InputFloat32Register(2)); + __ vmla(i.OutputFloatRegister(), i.InputFloatRegister(1), + i.InputFloatRegister(2)); DCHECK_EQ(LeaveCC, i.OutputSBit()); break; case kArmVmlsF32: - __ vmls(i.OutputFloat32Register(), i.InputFloat32Register(1), - i.InputFloat32Register(2)); + __ vmls(i.OutputFloatRegister(), i.InputFloatRegister(1), + i.InputFloatRegister(2)); DCHECK_EQ(LeaveCC, i.OutputSBit()); break; case kArmVdivF32: - __ vdiv(i.OutputFloat32Register(), i.InputFloat32Register(0), - i.InputFloat32Register(1)); + __ vdiv(i.OutputFloatRegister(), i.InputFloatRegister(0), + i.InputFloatRegister(1)); DCHECK_EQ(LeaveCC, i.OutputSBit()); break; case kArmVsqrtF32: - __ vsqrt(i.OutputFloat32Register(), i.InputFloat32Register(0)); + __ vsqrt(i.OutputFloatRegister(), i.InputFloatRegister(0)); break; case kArmVabsF32: - __ vabs(i.OutputFloat32Register(), i.InputFloat32Register(0)); + __ vabs(i.OutputFloatRegister(), i.InputFloatRegister(0)); break; case kArmVnegF32: - __ vneg(i.OutputFloat32Register(), i.InputFloat32Register(0)); + __ vneg(i.OutputFloatRegister(), i.InputFloatRegister(0)); break; case kArmVcmpF64: if (instr->InputAt(1)->IsFPRegister()) { @@ -1235,7 +1223,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; case kArmVrintmF32: { CpuFeatureScope scope(masm(), ARMv8); - __ vrintm(i.OutputFloat32Register(), i.InputFloat32Register(0)); + __ vrintm(i.OutputFloatRegister(), i.InputFloatRegister(0)); break; } case kArmVrintmF64: { @@ -1245,7 +1233,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } case kArmVrintpF32: { CpuFeatureScope scope(masm(), ARMv8); - __ vrintp(i.OutputFloat32Register(), i.InputFloat32Register(0)); + __ vrintp(i.OutputFloatRegister(), i.InputFloatRegister(0)); break; } case kArmVrintpF64: { @@ -1255,7 +1243,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } case kArmVrintzF32: { CpuFeatureScope scope(masm(), ARMv8); - __ vrintz(i.OutputFloat32Register(), i.InputFloat32Register(0)); + __ vrintz(i.OutputFloatRegister(), i.InputFloatRegister(0)); break; } case kArmVrintzF64: { @@ -1270,7 +1258,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } case kArmVrintnF32: { CpuFeatureScope scope(masm(), ARMv8); - __ vrintn(i.OutputFloat32Register(), i.InputFloat32Register(0)); + __ vrintn(i.OutputFloatRegister(), i.InputFloatRegister(0)); break; } case kArmVrintnF64: { @@ -1279,26 +1267,26 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; } case kArmVcvtF32F64: { - __ vcvt_f32_f64(i.OutputFloat32Register(), i.InputDoubleRegister(0)); + __ vcvt_f32_f64(i.OutputFloatRegister(), i.InputDoubleRegister(0)); DCHECK_EQ(LeaveCC, i.OutputSBit()); break; } case kArmVcvtF64F32: { - __ vcvt_f64_f32(i.OutputDoubleRegister(), i.InputFloat32Register(0)); + __ vcvt_f64_f32(i.OutputDoubleRegister(), i.InputFloatRegister(0)); DCHECK_EQ(LeaveCC, i.OutputSBit()); break; } case kArmVcvtF32S32: { SwVfpRegister scratch = kScratchDoubleReg.low(); __ vmov(scratch, i.InputRegister(0)); - __ vcvt_f32_s32(i.OutputFloat32Register(), scratch); + __ vcvt_f32_s32(i.OutputFloatRegister(), scratch); DCHECK_EQ(LeaveCC, i.OutputSBit()); break; } case kArmVcvtF32U32: { SwVfpRegister scratch = kScratchDoubleReg.low(); __ vmov(scratch, i.InputRegister(0)); - __ vcvt_f32_u32(i.OutputFloat32Register(), scratch); + __ vcvt_f32_u32(i.OutputFloatRegister(), scratch); DCHECK_EQ(LeaveCC, i.OutputSBit()); break; } @@ -1318,7 +1306,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } case kArmVcvtS32F32: { SwVfpRegister scratch = kScratchDoubleReg.low(); - __ vcvt_s32_f32(scratch, i.InputFloat32Register(0)); + __ vcvt_s32_f32(scratch, i.InputFloatRegister(0)); __ vmov(i.OutputRegister(), scratch); // Avoid INT32_MAX as an overflow indicator and use INT32_MIN instead, // because INT32_MIN allows easier out-of-bounds detection. @@ -1329,7 +1317,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } case kArmVcvtU32F32: { SwVfpRegister scratch = kScratchDoubleReg.low(); - __ vcvt_u32_f32(scratch, i.InputFloat32Register(0)); + __ vcvt_u32_f32(scratch, i.InputFloatRegister(0)); __ vmov(i.OutputRegister(), scratch); // Avoid UINT32_MAX as an overflow indicator and use 0 instead, // because 0 allows easier out-of-bounds detection. @@ -1353,11 +1341,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; } case kArmVmovU32F32: - __ vmov(i.OutputRegister(), i.InputFloat32Register(0)); + __ vmov(i.OutputRegister(), i.InputFloatRegister(0)); DCHECK_EQ(LeaveCC, i.OutputSBit()); break; case kArmVmovF32U32: - __ vmov(i.OutputFloat32Register(), i.InputRegister(0)); + __ vmov(i.OutputFloatRegister(), i.InputRegister(0)); DCHECK_EQ(LeaveCC, i.OutputSBit()); break; case kArmVmovLowU32F64: @@ -1415,12 +1403,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( DCHECK_EQ(LeaveCC, i.OutputSBit()); break; case kArmVldrF32: { - __ vldr(i.OutputFloat32Register(), i.InputOffset()); + __ vldr(i.OutputFloatRegister(), i.InputOffset()); DCHECK_EQ(LeaveCC, i.OutputSBit()); break; } case kArmVstrF32: - __ vstr(i.InputFloat32Register(0), i.InputOffset(1)); + __ vstr(i.InputFloatRegister(0), i.InputOffset(1)); DCHECK_EQ(LeaveCC, i.OutputSBit()); break; case kArmVldrF64: @@ -1432,9 +1420,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( DCHECK_EQ(LeaveCC, i.OutputSBit()); break; case kArmFloat32Max: { - SwVfpRegister result = i.OutputFloat32Register(); - SwVfpRegister left = i.InputFloat32Register(0); - SwVfpRegister right = i.InputFloat32Register(1); + SwVfpRegister result = i.OutputFloatRegister(); + SwVfpRegister left = i.InputFloatRegister(0); + SwVfpRegister right = i.InputFloatRegister(1); if (left.is(right)) { __ Move(result, left); } else { @@ -1460,9 +1448,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; } case kArmFloat32Min: { - SwVfpRegister result = i.OutputFloat32Register(); - SwVfpRegister left = i.InputFloat32Register(0); - SwVfpRegister right = i.InputFloat32Register(1); + SwVfpRegister result = i.OutputFloatRegister(); + SwVfpRegister left = i.InputFloatRegister(0); + SwVfpRegister right = i.InputFloatRegister(1); if (left.is(right)) { __ Move(result, left); } else { @@ -1501,7 +1489,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( frame_access_state()->IncreaseSPDelta(kDoubleSize / kPointerSize); } else { DCHECK_EQ(MachineRepresentation::kFloat32, op->representation()); - __ vpush(i.InputFloat32Register(0)); + __ vpush(i.InputFloatRegister(0)); frame_access_state()->IncreaseSPDelta(1); } } else { @@ -1532,7 +1520,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ASSEMBLE_CHECKED_LOAD_INTEGER(ldr); break; case kCheckedLoadFloat32: - ASSEMBLE_CHECKED_LOAD_FP(Float32); + ASSEMBLE_CHECKED_LOAD_FP(Float); break; case kCheckedLoadFloat64: ASSEMBLE_CHECKED_LOAD_FP(Double); @@ -1547,7 +1535,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ASSEMBLE_CHECKED_STORE_INTEGER(str); break; case kCheckedStoreFloat32: - ASSEMBLE_CHECKED_STORE_FP(Float32); + ASSEMBLE_CHECKED_STORE_FP(Float); break; case kCheckedStoreFloat64: ASSEMBLE_CHECKED_STORE_FP(Double); @@ -1789,7 +1777,6 @@ void CodeGenerator::AssembleReturn() { __ Ret(pop_count); } - void CodeGenerator::AssembleMove(InstructionOperand* source, InstructionOperand* destination) { ArmOperandConverter g(this, nullptr); @@ -1858,12 +1845,12 @@ void CodeGenerator::AssembleMove(InstructionOperand* source, } if (destination->IsStackSlot()) __ str(dst, g.ToMemOperand(destination)); } else if (src.type() == Constant::kFloat32) { - if (destination->IsFPStackSlot()) { + if (destination->IsFloatStackSlot()) { MemOperand dst = g.ToMemOperand(destination); __ mov(ip, Operand(bit_cast(src.ToFloat32()))); __ str(ip, dst); } else { - SwVfpRegister dst = g.ToFloat32Register(destination); + SwVfpRegister dst = g.ToFloatRegister(destination); __ vmov(dst, src.ToFloat32()); } } else { @@ -1872,28 +1859,60 @@ void CodeGenerator::AssembleMove(InstructionOperand* source, ? g.ToDoubleRegister(destination) : kScratchDoubleReg; __ vmov(dst, src.ToFloat64(), kScratchReg); - if (destination->IsFPStackSlot()) { + if (destination->IsDoubleStackSlot()) { __ vstr(dst, g.ToMemOperand(destination)); } } } else if (source->IsFPRegister()) { - DwVfpRegister src = g.ToDoubleRegister(source); - if (destination->IsFPRegister()) { - DwVfpRegister dst = g.ToDoubleRegister(destination); - __ Move(dst, src); + MachineRepresentation rep = LocationOperand::cast(source)->representation(); + if (rep == MachineRepresentation::kFloat64) { + DwVfpRegister src = g.ToDoubleRegister(source); + if (destination->IsDoubleRegister()) { + DwVfpRegister dst = g.ToDoubleRegister(destination); + __ Move(dst, src); + } else { + DCHECK(destination->IsDoubleStackSlot()); + __ vstr(src, g.ToMemOperand(destination)); + } } else { - DCHECK(destination->IsFPStackSlot()); - __ vstr(src, g.ToMemOperand(destination)); + DCHECK_EQ(MachineRepresentation::kFloat32, rep); + // GapResolver may give us reg codes that don't map to actual s-registers. + // Generate code to work around those cases. + int src_code = LocationOperand::cast(source)->register_code(); + if (destination->IsFloatRegister()) { + int dst_code = LocationOperand::cast(destination)->register_code(); + __ VmovExtended(dst_code, src_code, kScratchReg); + } else { + DCHECK(destination->IsFloatStackSlot()); + __ VmovExtended(g.ToMemOperand(destination), src_code, kScratchReg); + } } } else if (source->IsFPStackSlot()) { MemOperand src = g.ToMemOperand(source); + MachineRepresentation rep = + LocationOperand::cast(destination)->representation(); if (destination->IsFPRegister()) { + if (rep == MachineRepresentation::kFloat64) { __ vldr(g.ToDoubleRegister(destination), src); + } else { + DCHECK_EQ(MachineRepresentation::kFloat32, rep); + // GapResolver may give us reg codes that don't map to actual + // s-registers. Generate code to work around those cases. + int dst_code = LocationOperand::cast(destination)->register_code(); + __ VmovExtended(dst_code, src, kScratchReg); + } } else { DCHECK(destination->IsFPStackSlot()); + if (rep == MachineRepresentation::kFloat64) { DwVfpRegister temp = kScratchDoubleReg; __ vldr(temp, src); __ vstr(temp, g.ToMemOperand(destination)); + } else { + DCHECK_EQ(MachineRepresentation::kFloat32, rep); + SwVfpRegister temp = kScratchDoubleReg.low(); + __ vldr(temp, src); + __ vstr(temp, g.ToMemOperand(destination)); + } } } else { UNREACHABLE(); @@ -1933,17 +1952,35 @@ void CodeGenerator::AssembleSwap(InstructionOperand* source, __ str(temp_0, dst); __ vstr(temp_1, src); } else if (source->IsFPRegister()) { + MachineRepresentation rep = LocationOperand::cast(source)->representation(); LowDwVfpRegister temp = kScratchDoubleReg; - DwVfpRegister src = g.ToDoubleRegister(source); - if (destination->IsFPRegister()) { - DwVfpRegister dst = g.ToDoubleRegister(destination); - __ vswp(src, dst); + if (rep == MachineRepresentation::kFloat64) { + DwVfpRegister src = g.ToDoubleRegister(source); + if (destination->IsFPRegister()) { + DwVfpRegister dst = g.ToDoubleRegister(destination); + __ vswp(src, dst); + } else { + DCHECK(destination->IsFPStackSlot()); + MemOperand dst = g.ToMemOperand(destination); + __ Move(temp, src); + __ vldr(src, dst); + __ vstr(temp, dst); + } } else { - DCHECK(destination->IsFPStackSlot()); - MemOperand dst = g.ToMemOperand(destination); - __ Move(temp, src); - __ vldr(src, dst); - __ vstr(temp, dst); + DCHECK_EQ(MachineRepresentation::kFloat32, rep); + int src_code = LocationOperand::cast(source)->register_code(); + if (destination->IsFPRegister()) { + int dst_code = LocationOperand::cast(destination)->register_code(); + __ VmovExtended(temp.low().code(), src_code, kScratchReg); + __ VmovExtended(src_code, dst_code, kScratchReg); + __ VmovExtended(dst_code, temp.low().code(), kScratchReg); + } else { + DCHECK(destination->IsFPStackSlot()); + MemOperand dst = g.ToMemOperand(destination); + __ VmovExtended(temp.low().code(), src_code, kScratchReg); + __ VmovExtended(src_code, dst, kScratchReg); + __ vstr(temp.low(), dst); + } } } else if (source->IsFPStackSlot()) { DCHECK(destination->IsFPStackSlot()); @@ -1951,21 +1988,29 @@ void CodeGenerator::AssembleSwap(InstructionOperand* source, LowDwVfpRegister temp_1 = kScratchDoubleReg; MemOperand src0 = g.ToMemOperand(source); MemOperand dst0 = g.ToMemOperand(destination); - MemOperand src1(src0.rn(), src0.offset() + kPointerSize); - MemOperand dst1(dst0.rn(), dst0.offset() + kPointerSize); - __ vldr(temp_1, dst0); // Save destination in temp_1. - __ ldr(temp_0, src0); // Then use temp_0 to copy source to destination. - __ str(temp_0, dst0); - __ ldr(temp_0, src1); - __ str(temp_0, dst1); - __ vstr(temp_1, src0); + MachineRepresentation rep = LocationOperand::cast(source)->representation(); + if (rep == MachineRepresentation::kFloat64) { + MemOperand src1(src0.rn(), src0.offset() + kPointerSize); + MemOperand dst1(dst0.rn(), dst0.offset() + kPointerSize); + __ vldr(temp_1, dst0); // Save destination in temp_1. + __ ldr(temp_0, src0); // Then use temp_0 to copy source to destination. + __ str(temp_0, dst0); + __ ldr(temp_0, src1); + __ str(temp_0, dst1); + __ vstr(temp_1, src0); + } else { + DCHECK_EQ(MachineRepresentation::kFloat32, rep); + __ vldr(temp_1.low(), dst0); // Save destination in temp_1. + __ ldr(temp_0, src0); // Then use temp_0 to copy source to destination. + __ str(temp_0, dst0); + __ vstr(temp_1.low(), src0); + } } else { // No other combinations are possible. UNREACHABLE(); } } - void CodeGenerator::AssembleJumpTable(Label** targets, size_t target_count) { // On 32-bit ARM we emit the jump tables inline. UNREACHABLE(); diff --git a/src/compiler/gap-resolver.cc b/src/compiler/gap-resolver.cc index 7b04198e81..1ba1044eab 100644 --- a/src/compiler/gap-resolver.cc +++ b/src/compiler/gap-resolver.cc @@ -14,27 +14,124 @@ namespace compiler { namespace { +#define REP_BIT(rep) (1 << static_cast(rep)) + +const int kFloat32Bit = REP_BIT(MachineRepresentation::kFloat32); +const int kFloat64Bit = REP_BIT(MachineRepresentation::kFloat64); + inline bool Blocks(MoveOperands* move, InstructionOperand destination) { - return move->Blocks(destination); + return !move->IsEliminated() && move->source().InterferesWith(destination); } +// Splits a FP move between two location operands into the equivalent series of +// moves between smaller sub-operands, e.g. a double move to two single moves. +// This helps reduce the number of cycles that would normally occur under FP +// aliasing, and makes swaps much easier to implement. +MoveOperands* Split(MoveOperands* move, MachineRepresentation smaller_rep, + ParallelMove* moves) { + DCHECK(!kSimpleFPAliasing); + // Splitting is only possible when the slot size is the same as float size. + DCHECK_EQ(kPointerSize, kFloatSize); + const LocationOperand& src_loc = LocationOperand::cast(move->source()); + const LocationOperand& dst_loc = LocationOperand::cast(move->destination()); + MachineRepresentation dst_rep = dst_loc.representation(); + DCHECK_NE(smaller_rep, dst_rep); + auto src_kind = src_loc.location_kind(); + auto dst_kind = dst_loc.location_kind(); -inline bool IsRedundant(MoveOperands* move) { return move->IsRedundant(); } + int aliases = + 1 << (ElementSizeLog2Of(dst_rep) - ElementSizeLog2Of(smaller_rep)); + int base = -1; + USE(base); + DCHECK_EQ(aliases, RegisterConfiguration::Turbofan()->GetAliases( + dst_rep, 0, smaller_rep, &base)); + + int src_index = -1; + int slot_size = (1 << ElementSizeLog2Of(smaller_rep)) / kPointerSize; + int src_step = 1; + if (src_kind == LocationOperand::REGISTER) { + src_index = src_loc.register_code() * aliases; + } else { + src_index = src_loc.index(); + // For operands that occuply multiple slots, the index refers to the last + // slot. On little-endian architectures, we start at the high slot and use a + // negative step so that register-to-slot moves are in the correct order. + src_step = -slot_size; + } + int dst_index = -1; + int dst_step = 1; + if (dst_kind == LocationOperand::REGISTER) { + dst_index = dst_loc.register_code() * aliases; + } else { + dst_index = dst_loc.index(); + dst_step = -slot_size; + } + + // Reuse 'move' for the first fragment. It is not pending. + move->set_source(AllocatedOperand(src_kind, smaller_rep, src_index)); + move->set_destination(AllocatedOperand(dst_kind, smaller_rep, dst_index)); + // Add the remaining fragment moves. + for (int i = 1; i < aliases; ++i) { + src_index += src_step; + dst_index += dst_step; + moves->AddMove(AllocatedOperand(src_kind, smaller_rep, src_index), + AllocatedOperand(dst_kind, smaller_rep, dst_index)); + } + // Return the first fragment. + return move; +} } // namespace +void GapResolver::Resolve(ParallelMove* moves) { + // Clear redundant moves, and collect FP move representations if aliasing + // is non-simple. + int reps = 0; + for (size_t i = 0; i < moves->size();) { + MoveOperands* move = (*moves)[i]; + if (move->IsRedundant()) { + (*moves)[i] = moves->back(); + moves->pop_back(); + continue; + } + i++; + if (!kSimpleFPAliasing && move->destination().IsFPRegister()) { + reps |= + REP_BIT(LocationOperand::cast(move->destination()).representation()); + } + } -void GapResolver::Resolve(ParallelMove* moves) const { - // Clear redundant moves. - auto it = - std::remove_if(moves->begin(), moves->end(), std::ptr_fun(IsRedundant)); - moves->erase(it, moves->end()); - for (MoveOperands* move : *moves) { + if (!kSimpleFPAliasing) { + if (reps && !base::bits::IsPowerOfTwo32(reps)) { + // Start with the smallest FP moves, so we never encounter smaller moves + // in the middle of a cycle of larger moves. + if ((reps & kFloat32Bit) != 0) { + split_rep_ = MachineRepresentation::kFloat32; + for (size_t i = 0; i < moves->size(); ++i) { + auto move = (*moves)[i]; + if (!move->IsEliminated() && move->destination().IsFloatRegister()) + PerformMove(moves, move); + } + } + if ((reps & kFloat64Bit) != 0) { + split_rep_ = MachineRepresentation::kFloat64; + for (size_t i = 0; i < moves->size(); ++i) { + auto move = (*moves)[i]; + if (!move->IsEliminated() && move->destination().IsDoubleRegister()) + PerformMove(moves, move); + } + } + } + split_rep_ = MachineRepresentation::kSimd128; + } + + for (size_t i = 0; i < moves->size(); ++i) { + auto move = (*moves)[i]; if (!move->IsEliminated()) PerformMove(moves, move); } } -void GapResolver::PerformMove(ParallelMove* moves, MoveOperands* move) const { +void GapResolver::PerformMove(ParallelMove* moves, MoveOperands* move) { // Each call to this function performs a move and deletes it from the move // graph. We first recursively perform any move blocking this one. We mark a // move as "pending" on entry to PerformMove in order to detect cycles in the @@ -45,15 +142,32 @@ void GapResolver::PerformMove(ParallelMove* moves, MoveOperands* move) const { // Clear this move's destination to indicate a pending move. The actual // destination is saved on the side. - DCHECK(!move->source().IsInvalid()); // Or else it will look eliminated. + InstructionOperand source = move->source(); + DCHECK(!source.IsInvalid()); // Or else it will look eliminated. InstructionOperand destination = move->destination(); move->SetPending(); + // We may need to split moves between FP locations differently. + bool is_fp_loc_move = !kSimpleFPAliasing && destination.IsFPLocationOperand(); + // Perform a depth-first traversal of the move graph to resolve dependencies. // Any unperformed, unpending move with a source the same as this one's // destination blocks this one so recursively perform all such moves. - for (MoveOperands* other : *moves) { - if (other->Blocks(destination) && !other->IsPending()) { + for (size_t i = 0; i < moves->size(); ++i) { + auto other = (*moves)[i]; + if (other->IsEliminated()) continue; + if (other->IsPending()) continue; + if (other->source().InterferesWith(destination)) { + if (!kSimpleFPAliasing && is_fp_loc_move && + LocationOperand::cast(other->source()).representation() > + split_rep_) { + // 'other' must also be an FP location move. Break it into fragments + // of the same size as 'move'. 'other' is set to one of the fragments, + // and the rest are appended to 'moves'. + other = Split(other, split_rep_, moves); + // 'other' may not block destination now. + if (!other->source().InterferesWith(destination)) continue; + } // Though PerformMove can change any source operand in the move graph, // this call cannot create a blocking move via a swap (this loop does not // miss any). Assume there is a non-blocking move with source A and this @@ -67,18 +181,18 @@ void GapResolver::PerformMove(ParallelMove* moves, MoveOperands* move) const { } } - // We are about to resolve this move and don't need it marked as pending, so - // restore its destination. - move->set_destination(destination); - // This move's source may have changed due to swaps to resolve cycles and so // it may now be the last move in the cycle. If so remove it. - InstructionOperand source = move->source(); - if (source.InterferesWith(destination)) { + source = move->source(); + if (source.EqualsCanonicalized(destination)) { move->Eliminate(); return; } + // We are about to resolve this move and don't need it marked as pending, so + // restore its destination. + move->set_destination(destination); + // The move may be blocked on a (at most one) pending move, in which case we // have a cycle. Search for such a blocking move and perform a swap to // resolve it. @@ -91,7 +205,6 @@ void GapResolver::PerformMove(ParallelMove* moves, MoveOperands* move) const { return; } - DCHECK((*blocker)->IsPending()); // Ensure source is a register or both are stack slots, to limit swap cases. if (source.IsStackSlot() || source.IsFPStackSlot()) { std::swap(source, destination); @@ -99,14 +212,36 @@ void GapResolver::PerformMove(ParallelMove* moves, MoveOperands* move) const { assembler_->AssembleSwap(&source, &destination); move->Eliminate(); - // Any unperformed (including pending) move with a source of either this - // move's source or destination needs to have their source changed to - // reflect the state of affairs after the swap. - for (MoveOperands* other : *moves) { - if (other->Blocks(source)) { - other->set_source(destination); - } else if (other->Blocks(destination)) { - other->set_source(source); + // Update outstanding moves whose source may now have been moved. + if (!kSimpleFPAliasing && is_fp_loc_move) { + // We may have to split larger moves. + for (size_t i = 0; i < moves->size(); ++i) { + auto other = (*moves)[i]; + if (other->IsEliminated()) continue; + if (source.InterferesWith(other->source())) { + if (LocationOperand::cast(other->source()).representation() > + split_rep_) { + other = Split(other, split_rep_, moves); + if (!source.InterferesWith(other->source())) continue; + } + other->set_source(destination); + } else if (destination.InterferesWith(other->source())) { + if (LocationOperand::cast(other->source()).representation() > + split_rep_) { + other = Split(other, split_rep_, moves); + if (!destination.InterferesWith(other->source())) continue; + } + other->set_source(source); + } + } + } else { + for (auto other : *moves) { + if (other->IsEliminated()) continue; + if (source.EqualsCanonicalized(other->source())) { + other->set_source(destination); + } else if (destination.EqualsCanonicalized(other->source())) { + other->set_source(source); + } } } } diff --git a/src/compiler/gap-resolver.h b/src/compiler/gap-resolver.h index 19806f568a..d4c402587f 100644 --- a/src/compiler/gap-resolver.h +++ b/src/compiler/gap-resolver.h @@ -26,18 +26,24 @@ class GapResolver final { InstructionOperand* destination) = 0; }; - explicit GapResolver(Assembler* assembler) : assembler_(assembler) {} + explicit GapResolver(Assembler* assembler) + : assembler_(assembler), split_rep_(MachineRepresentation::kSimd128) {} // Resolve a set of parallel moves, emitting assembler instructions. - void Resolve(ParallelMove* parallel_move) const; + void Resolve(ParallelMove* parallel_move); private: - // Perform the given move, possibly requiring other moves to satisfy - // dependencies. - void PerformMove(ParallelMove* moves, MoveOperands* move) const; + // Performs the given move, possibly performing other moves to unblock the + // destination operand. + void PerformMove(ParallelMove* moves, MoveOperands* move); // Assembler used to emit moves and save registers. Assembler* const assembler_; + + // While resolving moves, the largest FP representation that can be moved. + // Any larger moves must be split into an equivalent series of moves of this + // representation. + MachineRepresentation split_rep_; }; } // namespace compiler diff --git a/src/compiler/instruction.cc b/src/compiler/instruction.cc index 39ea47689c..174ee300fe 100644 --- a/src/compiler/instruction.cc +++ b/src/compiler/instruction.cc @@ -64,8 +64,35 @@ FlagsCondition CommuteFlagsCondition(FlagsCondition condition) { return condition; } -bool InstructionOperand::InterferesWith(const InstructionOperand& that) const { - return EqualsCanonicalized(that); +bool InstructionOperand::InterferesWith(const InstructionOperand& other) const { + if (kSimpleFPAliasing || !this->IsFPLocationOperand() || + !other.IsFPLocationOperand()) + return EqualsCanonicalized(other); + // Aliasing is complex and both operands are fp locations. + const LocationOperand& loc = *LocationOperand::cast(this); + const LocationOperand& other_loc = LocationOperand::cast(other); + LocationOperand::LocationKind kind = loc.location_kind(); + LocationOperand::LocationKind other_kind = other_loc.location_kind(); + if (kind != other_kind) return false; + MachineRepresentation rep = loc.representation(); + MachineRepresentation other_rep = other_loc.representation(); + if (rep == other_rep) return EqualsCanonicalized(other); + if (kind == LocationOperand::REGISTER) { + // FP register-register interference. + return GetRegConfig()->AreAliases(rep, loc.register_code(), other_rep, + other_loc.register_code()); + } else { + // FP slot-slot interference. Slots of different FP reps can alias because + // the gap resolver may break a move into 2 or 4 equivalent smaller moves. + DCHECK_EQ(LocationOperand::STACK_SLOT, kind); + int index_hi = loc.index(); + int index_lo = index_hi - (1 << ElementSizeLog2Of(rep)) / kPointerSize + 1; + int other_index_hi = other_loc.index(); + int other_index_lo = + other_index_hi - (1 << ElementSizeLog2Of(other_rep)) / kPointerSize + 1; + return other_index_hi >= index_lo && index_hi >= other_index_lo; + } + return false; } void InstructionOperand::Print(const RegisterConfiguration* config) const { @@ -232,28 +259,31 @@ bool ParallelMove::IsRedundant() const { return true; } - -MoveOperands* ParallelMove::PrepareInsertAfter(MoveOperands* move) const { +void ParallelMove::PrepareInsertAfter( + MoveOperands* move, ZoneVector* to_eliminate) const { + bool no_aliasing = + kSimpleFPAliasing || !move->destination().IsFPLocationOperand(); MoveOperands* replacement = nullptr; - MoveOperands* to_eliminate = nullptr; + MoveOperands* eliminated = nullptr; for (MoveOperands* curr : *this) { if (curr->IsEliminated()) continue; if (curr->destination().EqualsCanonicalized(move->source())) { + // We must replace move's source with curr's destination in order to + // insert it into this ParallelMove. DCHECK(!replacement); replacement = curr; - if (to_eliminate != nullptr) break; - } else if (curr->destination().EqualsCanonicalized(move->destination())) { - DCHECK(!to_eliminate); - to_eliminate = curr; - if (replacement != nullptr) break; + if (no_aliasing && eliminated != nullptr) break; + } else if (curr->destination().InterferesWith(move->destination())) { + // We can eliminate curr, since move overwrites at least a part of its + // destination, implying its value is no longer live. + eliminated = curr; + to_eliminate->push_back(curr); + if (no_aliasing && replacement != nullptr) break; } } - DCHECK_IMPLIES(replacement == to_eliminate, replacement == nullptr); if (replacement != nullptr) move->set_source(replacement->source()); - return to_eliminate; } - ExplicitOperand::ExplicitOperand(LocationKind kind, MachineRepresentation rep, int index) : LocationOperand(EXPLICIT, kind, rep, index) { diff --git a/src/compiler/instruction.h b/src/compiler/instruction.h index 9de68e0122..5d8864eea7 100644 --- a/src/compiler/instruction.h +++ b/src/compiler/instruction.h @@ -28,8 +28,7 @@ namespace compiler { // Forward declarations. class Schedule; - -class InstructionOperand { +class V8_EXPORT_PRIVATE InstructionOperand { public: static const int kInvalidVirtualRegister = -1; @@ -119,7 +118,7 @@ class InstructionOperand { return this->GetCanonicalizedValue() < that.GetCanonicalizedValue(); } - bool InterferesWith(const InstructionOperand& that) const; + bool InterferesWith(const InstructionOperand& other) const; // APIs to aid debugging. For general-stream APIs, use operator<< void Print(const RegisterConfiguration* config) const; @@ -641,8 +640,14 @@ uint64_t InstructionOperand::GetCanonicalizedValue() const { if (IsAnyLocationOperand()) { MachineRepresentation canonical = MachineRepresentation::kNone; if (IsFPRegister()) { - // We treat all FP register operands the same for simple aliasing. - canonical = MachineRepresentation::kFloat64; + if (kSimpleFPAliasing) { + // We treat all FP register operands the same for simple aliasing. + canonical = MachineRepresentation::kFloat64; + } else { + // We need to distinguish FP register operands of different reps when + // aliasing is not simple (e.g. ARM). + canonical = LocationOperand::cast(this)->representation(); + } } return InstructionOperand::KindField::update( LocationOperand::RepresentationField::update(this->value_, canonical), @@ -659,8 +664,8 @@ struct CompareOperandModuloType { } }; - -class MoveOperands final : public ZoneObject { +class V8_EXPORT_PRIVATE MoveOperands final + : public NON_EXPORTED_BASE(ZoneObject) { public: MoveOperands(const InstructionOperand& source, const InstructionOperand& destination) @@ -685,11 +690,6 @@ class MoveOperands final : public ZoneObject { } void SetPending() { destination_ = InstructionOperand(); } - // True if this move is a move into the given destination operand. - bool Blocks(const InstructionOperand& destination) const { - return !IsEliminated() && source().InterferesWith(destination); - } - // A move is redundant if it's been eliminated or if its source and // destination are the same. bool IsRedundant() const { @@ -724,8 +724,9 @@ struct PrintableMoveOperands { std::ostream& operator<<(std::ostream& os, const PrintableMoveOperands& mo); - -class ParallelMove final : public ZoneVector, public ZoneObject { +class V8_EXPORT_PRIVATE ParallelMove final + : public NON_EXPORTED_BASE(ZoneVector), + public NON_EXPORTED_BASE(ZoneObject) { public: explicit ParallelMove(Zone* zone) : ZoneVector(zone) { reserve(4); @@ -748,9 +749,10 @@ class ParallelMove final : public ZoneVector, public ZoneObject { bool IsRedundant() const; // Prepare this ParallelMove to insert move as if it happened in a subsequent - // ParallelMove. move->source() may be changed. The MoveOperand returned - // must be Eliminated. - MoveOperands* PrepareInsertAfter(MoveOperands* move) const; + // ParallelMove. move->source() may be changed. Any MoveOperands added to + // to_eliminate must be Eliminated. + void PrepareInsertAfter(MoveOperands* move, + ZoneVector* to_eliminate) const; private: DISALLOW_COPY_AND_ASSIGN(ParallelMove); diff --git a/src/compiler/move-optimizer.cc b/src/compiler/move-optimizer.cc index 953692dc13..b40b07ca86 100644 --- a/src/compiler/move-optimizer.cc +++ b/src/compiler/move-optimizer.cc @@ -25,11 +25,79 @@ struct MoveKeyCompare { }; typedef ZoneMap MoveMap; -typedef ZoneSet OperandSet; -bool Blocks(const OperandSet& set, const InstructionOperand& operand) { - return set.find(operand) != set.end(); -} +class OperandSet { + public: + explicit OperandSet(Zone* zone) : set_(zone), fp_reps_(0) {} + + void InsertOp(const InstructionOperand& op) { + set_.insert(op); + if (!kSimpleFPAliasing && op.IsFPRegister()) + fp_reps_ |= RepBit(LocationOperand::cast(op).representation()); + } + + bool ContainsOpOrAlias(const InstructionOperand& op) const { + if (set_.find(op) != set_.end()) return true; + + if (!kSimpleFPAliasing && op.IsFPRegister()) { + // Platforms where FP registers have complex aliasing need extra checks. + const LocationOperand& loc = LocationOperand::cast(op); + MachineRepresentation rep = loc.representation(); + // If haven't encountered mixed rep FP registers, skip the extra checks. + if (!HasMixedFPReps(fp_reps_ | RepBit(rep))) return false; + + // Check register against aliasing registers of other FP representations. + MachineRepresentation other_rep1, other_rep2; + switch (rep) { + case MachineRepresentation::kFloat32: + other_rep1 = MachineRepresentation::kFloat64; + other_rep2 = MachineRepresentation::kSimd128; + break; + case MachineRepresentation::kFloat64: + other_rep1 = MachineRepresentation::kFloat32; + other_rep2 = MachineRepresentation::kSimd128; + break; + case MachineRepresentation::kSimd128: + other_rep1 = MachineRepresentation::kFloat32; + other_rep2 = MachineRepresentation::kFloat64; + break; + default: + UNREACHABLE(); + break; + } + const RegisterConfiguration* config = RegisterConfiguration::Turbofan(); + int base = -1; + int aliases = + config->GetAliases(rep, loc.register_code(), other_rep1, &base); + DCHECK(aliases > 0 || (aliases == 0 && base == -1)); + while (aliases--) { + if (set_.find(AllocatedOperand(LocationOperand::REGISTER, other_rep1, + base + aliases)) != set_.end()) + return true; + } + aliases = config->GetAliases(rep, loc.register_code(), other_rep2, &base); + DCHECK(aliases > 0 || (aliases == 0 && base == -1)); + while (aliases--) { + if (set_.find(AllocatedOperand(LocationOperand::REGISTER, other_rep2, + base + aliases)) != set_.end()) + return true; + } + } + return false; + } + + private: + static int RepBit(MachineRepresentation rep) { + return 1 << static_cast(rep); + } + + static bool HasMixedFPReps(int reps) { + return reps && !base::bits::IsPowerOfTwo32(reps); + } + + ZoneSet set_; + int fp_reps_; +}; int FindFirstNonEmptySlot(const Instruction* instr) { int i = Instruction::FIRST_GAP_POSITION; @@ -98,21 +166,21 @@ void MoveOptimizer::RemoveClobberedDestinations(Instruction* instruction) { // Outputs and temps are treated together as potentially clobbering a // destination operand. for (size_t i = 0; i < instruction->OutputCount(); ++i) { - outputs.insert(*instruction->OutputAt(i)); + outputs.InsertOp(*instruction->OutputAt(i)); } for (size_t i = 0; i < instruction->TempCount(); ++i) { - outputs.insert(*instruction->TempAt(i)); + outputs.InsertOp(*instruction->TempAt(i)); } // Input operands block elisions. for (size_t i = 0; i < instruction->InputCount(); ++i) { - inputs.insert(*instruction->InputAt(i)); + inputs.InsertOp(*instruction->InputAt(i)); } // Elide moves made redundant by the instruction. for (MoveOperands* move : *moves) { - if (outputs.find(move->destination()) != outputs.end() && - inputs.find(move->destination()) == inputs.end()) { + if (outputs.ContainsOpOrAlias(move->destination()) && + !inputs.ContainsOpOrAlias(move->destination())) { move->Eliminate(); } } @@ -121,7 +189,7 @@ void MoveOptimizer::RemoveClobberedDestinations(Instruction* instruction) { // the one for its input. if (instruction->IsRet() || instruction->IsTailCall()) { for (MoveOperands* move : *moves) { - if (inputs.find(move->destination()) == inputs.end()) { + if (!inputs.ContainsOpOrAlias(move->destination())) { move->Eliminate(); } } @@ -140,7 +208,7 @@ void MoveOptimizer::MigrateMoves(Instruction* to, Instruction* from) { // If an operand is an input to the instruction, we cannot move assignments // where it appears on the LHS. for (size_t i = 0; i < from->InputCount(); ++i) { - dst_cant_be.insert(*from->InputAt(i)); + dst_cant_be.InsertOp(*from->InputAt(i)); } // If an operand is output to the instruction, we cannot move assignments // where it appears on the RHS, because we would lose its value before the @@ -149,10 +217,10 @@ void MoveOptimizer::MigrateMoves(Instruction* to, Instruction* from) { // The output can't appear on the LHS because we performed // RemoveClobberedDestinations for the "from" instruction. for (size_t i = 0; i < from->OutputCount(); ++i) { - src_cant_be.insert(*from->OutputAt(i)); + src_cant_be.InsertOp(*from->OutputAt(i)); } for (size_t i = 0; i < from->TempCount(); ++i) { - src_cant_be.insert(*from->TempAt(i)); + src_cant_be.InsertOp(*from->TempAt(i)); } for (MoveOperands* move : *from_moves) { if (move->IsRedundant()) continue; @@ -160,7 +228,7 @@ void MoveOptimizer::MigrateMoves(Instruction* to, Instruction* from) { // move "z = dest", because z would become y rather than "V". // We assume CompressMoves has happened before this, which means we don't // have more than one assignment to dest. - src_cant_be.insert(move->destination()); + src_cant_be.InsertOp(move->destination()); } ZoneSet move_candidates(local_zone()); @@ -168,7 +236,7 @@ void MoveOptimizer::MigrateMoves(Instruction* to, Instruction* from) { // destination operands are eligible for being moved down. for (MoveOperands* move : *from_moves) { if (move->IsRedundant()) continue; - if (!Blocks(dst_cant_be, move->destination())) { + if (!dst_cant_be.ContainsOpOrAlias(move->destination())) { MoveKey key = {move->source(), move->destination()}; move_candidates.insert(key); } @@ -183,8 +251,8 @@ void MoveOptimizer::MigrateMoves(Instruction* to, Instruction* from) { auto current = iter; ++iter; InstructionOperand src = current->source; - if (Blocks(src_cant_be, src)) { - src_cant_be.insert(current->destination); + if (src_cant_be.ContainsOpOrAlias(src)) { + src_cant_be.InsertOp(current->destination); move_candidates.erase(current); changed = true; } @@ -223,8 +291,7 @@ void MoveOptimizer::CompressMoves(ParallelMove* left, MoveOpVector* right) { // merging the two gaps. for (MoveOperands* move : *right) { if (move->IsRedundant()) continue; - MoveOperands* to_eliminate = left->PrepareInsertAfter(move); - if (to_eliminate != nullptr) eliminated.push_back(to_eliminate); + left->PrepareInsertAfter(move, &eliminated); } // Eliminate dead moves. for (MoveOperands* to_eliminate : eliminated) { @@ -360,7 +427,7 @@ void MoveOptimizer::OptimizeMerge(InstructionBlock* block) { // there are such moves, we could move them, but the destination of the // moves staying behind can't appear as a source of a common move, // because the move staying behind will clobber this destination. - conflicting_srcs.insert(dest); + conflicting_srcs.InsertOp(dest); move_map.erase(current); } } @@ -374,9 +441,8 @@ void MoveOptimizer::OptimizeMerge(InstructionBlock* block) { auto current = iter; ++iter; DCHECK_EQ(block->PredecessorCount(), current->second); - if (conflicting_srcs.find(current->first.source) != - conflicting_srcs.end()) { - conflicting_srcs.insert(current->first.destination); + if (conflicting_srcs.ContainsOpOrAlias(current->first.source)) { + conflicting_srcs.InsertOp(current->first.destination); move_map.erase(current); changed = true; } diff --git a/src/compiler/register-allocator.cc b/src/compiler/register-allocator.cc index f3d558f964..977953509a 100644 --- a/src/compiler/register-allocator.cc +++ b/src/compiler/register-allocator.cc @@ -33,7 +33,7 @@ int GetRegisterCount(const RegisterConfiguration* cfg, RegisterKind kind) { int GetAllocatableRegisterCount(const RegisterConfiguration* cfg, RegisterKind kind) { - return kind == FP_REGISTERS ? cfg->num_allocatable_aliased_double_registers() + return kind == FP_REGISTERS ? cfg->num_allocatable_double_registers() : cfg->num_allocatable_general_registers(); } @@ -74,14 +74,8 @@ int GetByteWidth(MachineRepresentation rep) { case MachineRepresentation::kTaggedSigned: case MachineRepresentation::kTaggedPointer: case MachineRepresentation::kTagged: - return kPointerSize; case MachineRepresentation::kFloat32: -// TODO(bbudge) Eliminate this when FP register aliasing works. -#if V8_TARGET_ARCH_ARM - return kDoubleSize; -#else return kPointerSize; -#endif case MachineRepresentation::kWord64: case MachineRepresentation::kFloat64: return kDoubleSize; @@ -498,6 +492,12 @@ UsePosition* LiveRange::NextUsePositionRegisterIsBeneficial( return pos; } +LifetimePosition LiveRange::NextLifetimePositionRegisterIsBeneficial( + const LifetimePosition& start) const { + UsePosition* next_use = NextUsePositionRegisterIsBeneficial(start); + if (next_use == nullptr) return End(); + return next_use->pos(); +} UsePosition* LiveRange::PreviousUsePositionRegisterIsBeneficial( LifetimePosition start) const { @@ -1360,8 +1360,12 @@ RegisterAllocationData::RegisterAllocationData( allocation_zone()), fixed_live_ranges_(this->config()->num_general_registers(), nullptr, allocation_zone()), + fixed_float_live_ranges_(this->config()->num_float_registers(), nullptr, + allocation_zone()), fixed_double_live_ranges_(this->config()->num_double_registers(), nullptr, allocation_zone()), + fixed_simd128_live_ranges_(this->config()->num_simd128_registers(), + nullptr, allocation_zone()), spill_ranges_(code->VirtualRegisterCount(), nullptr, allocation_zone()), delayed_references_(allocation_zone()), assigned_registers_(nullptr), @@ -1539,8 +1543,21 @@ void RegisterAllocationData::MarkAllocated(MachineRepresentation rep, int index) { switch (rep) { case MachineRepresentation::kFloat32: - case MachineRepresentation::kFloat64: case MachineRepresentation::kSimd128: + if (kSimpleFPAliasing) { + assigned_double_registers_->Add(index); + } else { + int alias_base_index = -1; + int aliases = config()->GetAliases( + rep, index, MachineRepresentation::kFloat64, &alias_base_index); + DCHECK(aliases > 0 || (aliases == 0 && alias_base_index == -1)); + while (aliases--) { + int aliased_reg = alias_base_index + aliases; + assigned_double_registers_->Add(aliased_reg); + } + } + break; + case MachineRepresentation::kFloat64: assigned_double_registers_->Add(index); break; default: @@ -1867,7 +1884,11 @@ int LiveRangeBuilder::FixedFPLiveRangeID(int index, MachineRepresentation rep) { int result = -index - 1; switch (rep) { case MachineRepresentation::kSimd128: + result -= config()->num_float_registers(); + // Fall through. case MachineRepresentation::kFloat32: + result -= config()->num_double_registers(); + // Fall through. case MachineRepresentation::kFloat64: result -= config()->num_general_registers(); break; @@ -1894,25 +1915,35 @@ TopLevelLiveRange* LiveRangeBuilder::FixedLiveRangeFor(int index) { TopLevelLiveRange* LiveRangeBuilder::FixedFPLiveRangeFor( int index, MachineRepresentation rep) { - TopLevelLiveRange* result = nullptr; + int num_regs = -1; + ZoneVector* live_ranges = nullptr; switch (rep) { case MachineRepresentation::kFloat32: + num_regs = config()->num_float_registers(); + live_ranges = &data()->fixed_float_live_ranges(); + break; case MachineRepresentation::kFloat64: + num_regs = config()->num_double_registers(); + live_ranges = &data()->fixed_double_live_ranges(); + break; case MachineRepresentation::kSimd128: - DCHECK(index < config()->num_double_registers()); - result = data()->fixed_double_live_ranges()[index]; - if (result == nullptr) { - result = data()->NewLiveRange(FixedFPLiveRangeID(index, rep), rep); - DCHECK(result->IsFixed()); - result->set_assigned_register(index); - data()->MarkAllocated(rep, index); - data()->fixed_double_live_ranges()[index] = result; - } + num_regs = config()->num_simd128_registers(); + live_ranges = &data()->fixed_simd128_live_ranges(); break; default: UNREACHABLE(); break; } + + DCHECK(index < num_regs); + TopLevelLiveRange* result = (*live_ranges)[index]; + if (result == nullptr) { + result = data()->NewLiveRange(FixedFPLiveRangeID(index, rep), rep); + DCHECK(result->IsFixed()); + result->set_assigned_register(index); + data()->MarkAllocated(rep, index); + (*live_ranges)[index] = result; + } return result; } @@ -2035,8 +2066,7 @@ void LiveRangeBuilder::ProcessInstructions(const InstructionBlock* block, } if (instr->ClobbersDoubleRegisters()) { - for (int i = 0; i < config()->num_allocatable_aliased_double_registers(); - ++i) { + for (int i = 0; i < config()->num_allocatable_double_registers(); ++i) { // Add a UseInterval for all DoubleRegisters. See comment above for // general registers. int code = config()->GetAllocatableDoubleCode(i); @@ -2045,6 +2075,26 @@ void LiveRangeBuilder::ProcessInstructions(const InstructionBlock* block, range->AddUseInterval(curr_position, curr_position.End(), allocation_zone()); } + // Clobber fixed float registers on archs with non-simple aliasing. + if (!kSimpleFPAliasing) { + for (int i = 0; i < config()->num_allocatable_float_registers(); ++i) { + // Add a UseInterval for all FloatRegisters. See comment above for + // general registers. + int code = config()->GetAllocatableFloatCode(i); + TopLevelLiveRange* range = + FixedFPLiveRangeFor(code, MachineRepresentation::kFloat32); + range->AddUseInterval(curr_position, curr_position.End(), + allocation_zone()); + } + for (int i = 0; i < config()->num_allocatable_simd128_registers(); + ++i) { + int code = config()->GetAllocatableSimd128Code(i); + TopLevelLiveRange* range = + FixedFPLiveRangeFor(code, MachineRepresentation::kSimd128); + range->AddUseInterval(curr_position, curr_position.End(), + allocation_zone()); + } + } } for (size_t i = 0; i < instr->InputCount(); i++) { @@ -2690,9 +2740,15 @@ void LinearScanAllocator::AllocateRegisters() { if (current != nullptr) AddToInactive(current); } } else { + for (TopLevelLiveRange* current : data()->fixed_float_live_ranges()) { + if (current != nullptr) AddToInactive(current); + } for (TopLevelLiveRange* current : data()->fixed_double_live_ranges()) { if (current != nullptr) AddToInactive(current); } + for (TopLevelLiveRange* current : data()->fixed_simd128_live_ranges()) { + if (current != nullptr) AddToInactive(current); + } } while (!unhandled_live_ranges().empty()) { @@ -2873,9 +2929,32 @@ void LinearScanAllocator::InactiveToActive(LiveRange* range) { range->TopLevel()->vreg(), range->relative_id()); } +void LinearScanAllocator::GetFPRegisterSet(MachineRepresentation rep, + int* num_regs, int* num_codes, + const int** codes) const { + DCHECK(!kSimpleFPAliasing); + if (rep == MachineRepresentation::kFloat32) { + *num_regs = data()->config()->num_float_registers(); + *num_codes = data()->config()->num_allocatable_float_registers(); + *codes = data()->config()->allocatable_float_codes(); + } else if (rep == MachineRepresentation::kSimd128) { + *num_regs = data()->config()->num_simd128_registers(); + *num_codes = data()->config()->num_allocatable_simd128_registers(); + *codes = data()->config()->allocatable_simd128_codes(); + } else { + UNREACHABLE(); + } +} + void LinearScanAllocator::FindFreeRegistersForRange( LiveRange* range, Vector positions) { int num_regs = num_registers(); + int num_codes = num_allocatable_registers(); + const int* codes = allocatable_register_codes(); + MachineRepresentation rep = range->representation(); + if (!kSimpleFPAliasing && (rep == MachineRepresentation::kFloat32 || + rep == MachineRepresentation::kSimd128)) + GetFPRegisterSet(rep, &num_regs, &num_codes, &codes); DCHECK_GE(positions.length(), num_regs); for (int i = 0; i < num_regs; i++) { @@ -2884,9 +2963,20 @@ void LinearScanAllocator::FindFreeRegistersForRange( for (LiveRange* cur_active : active_live_ranges()) { int cur_reg = cur_active->assigned_register(); - positions[cur_reg] = LifetimePosition::GapFromInstructionIndex(0); - TRACE("Register %s is free until pos %d (1)\n", RegisterName(cur_reg), - LifetimePosition::GapFromInstructionIndex(0).value()); + if (kSimpleFPAliasing || mode() == GENERAL_REGISTERS) { + positions[cur_reg] = LifetimePosition::GapFromInstructionIndex(0); + TRACE("Register %s is free until pos %d (1)\n", RegisterName(cur_reg), + LifetimePosition::GapFromInstructionIndex(0).value()); + } else { + int alias_base_index = -1; + int aliases = data()->config()->GetAliases( + cur_active->representation(), cur_reg, rep, &alias_base_index); + DCHECK(aliases > 0 || (aliases == 0 && alias_base_index == -1)); + while (aliases--) { + int aliased_reg = alias_base_index + aliases; + positions[aliased_reg] = LifetimePosition::GapFromInstructionIndex(0); + } + } } for (LiveRange* cur_inactive : inactive_live_ranges()) { @@ -2894,9 +2984,20 @@ void LinearScanAllocator::FindFreeRegistersForRange( LifetimePosition next_intersection = cur_inactive->FirstIntersection(range); if (!next_intersection.IsValid()) continue; int cur_reg = cur_inactive->assigned_register(); - positions[cur_reg] = Min(positions[cur_reg], next_intersection); - TRACE("Register %s is free until pos %d (2)\n", RegisterName(cur_reg), - Min(positions[cur_reg], next_intersection).value()); + if (kSimpleFPAliasing || mode() == GENERAL_REGISTERS) { + positions[cur_reg] = Min(positions[cur_reg], next_intersection); + TRACE("Register %s is free until pos %d (2)\n", RegisterName(cur_reg), + Min(positions[cur_reg], next_intersection).value()); + } else { + int alias_base_index = -1; + int aliases = data()->config()->GetAliases( + cur_inactive->representation(), cur_reg, rep, &alias_base_index); + DCHECK(aliases > 0 || (aliases == 0 && alias_base_index == -1)); + while (aliases--) { + int aliased_reg = alias_base_index + aliases; + positions[aliased_reg] = Min(positions[aliased_reg], next_intersection); + } + } } } @@ -2977,8 +3078,14 @@ bool LinearScanAllocator::TryAllocatePreferredReg( bool LinearScanAllocator::TryAllocateFreeReg( LiveRange* current, const Vector& free_until_pos) { + int num_regs = 0; // used only for the call to GetFPRegisterSet. int num_codes = num_allocatable_registers(); const int* codes = allocatable_register_codes(); + MachineRepresentation rep = current->representation(); + if (!kSimpleFPAliasing && (rep == MachineRepresentation::kFloat32 || + rep == MachineRepresentation::kSimd128)) + GetFPRegisterSet(rep, &num_regs, &num_codes, &codes); + DCHECK_GE(free_until_pos.length(), num_codes); // Find the register which stays free for the longest time. @@ -3026,6 +3133,10 @@ void LinearScanAllocator::AllocateBlockedReg(LiveRange* current) { int num_regs = num_registers(); int num_codes = num_allocatable_registers(); const int* codes = allocatable_register_codes(); + MachineRepresentation rep = current->representation(); + if (!kSimpleFPAliasing && (rep == MachineRepresentation::kFloat32 || + rep == MachineRepresentation::kSimd128)) + GetFPRegisterSet(rep, &num_regs, &num_codes, &codes); LifetimePosition use_pos[RegisterConfiguration::kMaxFPRegisters]; LifetimePosition block_pos[RegisterConfiguration::kMaxFPRegisters]; @@ -3037,16 +3148,28 @@ void LinearScanAllocator::AllocateBlockedReg(LiveRange* current) { int cur_reg = range->assigned_register(); bool is_fixed_or_cant_spill = range->TopLevel()->IsFixed() || !range->CanBeSpilled(current->Start()); - if (is_fixed_or_cant_spill) { - block_pos[cur_reg] = use_pos[cur_reg] = - LifetimePosition::GapFromInstructionIndex(0); - } else { - UsePosition* next_use = - range->NextUsePositionRegisterIsBeneficial(current->Start()); - if (next_use == nullptr) { - use_pos[cur_reg] = range->End(); + if (kSimpleFPAliasing || mode() == GENERAL_REGISTERS) { + if (is_fixed_or_cant_spill) { + block_pos[cur_reg] = use_pos[cur_reg] = + LifetimePosition::GapFromInstructionIndex(0); } else { - use_pos[cur_reg] = next_use->pos(); + use_pos[cur_reg] = + range->NextLifetimePositionRegisterIsBeneficial(current->Start()); + } + } else { + int alias_base_index = -1; + int aliases = data()->config()->GetAliases( + range->representation(), cur_reg, rep, &alias_base_index); + DCHECK(aliases > 0 || (aliases == 0 && alias_base_index == -1)); + while (aliases--) { + int aliased_reg = alias_base_index + aliases; + if (is_fixed_or_cant_spill) { + block_pos[aliased_reg] = use_pos[aliased_reg] = + LifetimePosition::GapFromInstructionIndex(0); + } else { + use_pos[aliased_reg] = + range->NextLifetimePositionRegisterIsBeneficial(current->Start()); + } } } } @@ -3057,11 +3180,29 @@ void LinearScanAllocator::AllocateBlockedReg(LiveRange* current) { if (!next_intersection.IsValid()) continue; int cur_reg = range->assigned_register(); bool is_fixed = range->TopLevel()->IsFixed(); - if (is_fixed) { - block_pos[cur_reg] = Min(block_pos[cur_reg], next_intersection); - use_pos[cur_reg] = Min(block_pos[cur_reg], use_pos[cur_reg]); + if (kSimpleFPAliasing || mode() == GENERAL_REGISTERS) { + if (is_fixed) { + block_pos[cur_reg] = Min(block_pos[cur_reg], next_intersection); + use_pos[cur_reg] = Min(block_pos[cur_reg], use_pos[cur_reg]); + } else { + use_pos[cur_reg] = Min(use_pos[cur_reg], next_intersection); + } } else { - use_pos[cur_reg] = Min(use_pos[cur_reg], next_intersection); + int alias_base_index = -1; + int aliases = data()->config()->GetAliases( + range->representation(), cur_reg, rep, &alias_base_index); + DCHECK(aliases > 0 || (aliases == 0 && alias_base_index == -1)); + while (aliases--) { + int aliased_reg = alias_base_index + aliases; + if (is_fixed) { + block_pos[aliased_reg] = + Min(block_pos[aliased_reg], next_intersection); + use_pos[aliased_reg] = + Min(block_pos[aliased_reg], use_pos[aliased_reg]); + } else { + use_pos[aliased_reg] = Min(use_pos[aliased_reg], next_intersection); + } + } } } @@ -3113,7 +3254,15 @@ void LinearScanAllocator::SplitAndSpillIntersecting(LiveRange* current) { LifetimePosition split_pos = current->Start(); for (size_t i = 0; i < active_live_ranges().size(); ++i) { LiveRange* range = active_live_ranges()[i]; - if (range->assigned_register() != reg) continue; + if (kSimpleFPAliasing || mode() == GENERAL_REGISTERS) { + if (range->assigned_register() != reg) continue; + } else { + if (!data()->config()->AreAliases(current->representation(), reg, + range->representation(), + range->assigned_register())) { + continue; + } + } UsePosition* next_pos = range->NextRegisterPosition(current->Start()); LifetimePosition spill_pos = FindOptimalSpillingPos(range, split_pos); @@ -3140,7 +3289,14 @@ void LinearScanAllocator::SplitAndSpillIntersecting(LiveRange* current) { LiveRange* range = inactive_live_ranges()[i]; DCHECK(range->End() > current->Start()); if (range->TopLevel()->IsFixed()) continue; - if (range->assigned_register() != reg) continue; + if (kSimpleFPAliasing || mode() == GENERAL_REGISTERS) { + if (range->assigned_register() != reg) continue; + } else { + if (!data()->config()->AreAliases(current->representation(), reg, + range->representation(), + range->assigned_register())) + continue; + } LifetimePosition next_intersection = range->FirstIntersection(current); if (next_intersection.IsValid()) { @@ -3631,7 +3787,6 @@ int LiveRangeConnector::ResolveControlFlow(const InstructionBlock* block, return gap_index; } - void LiveRangeConnector::ConnectRanges(Zone* local_zone) { DelayedInsertionMap delayed_insertion_map(local_zone); for (TopLevelLiveRange* top_range : data()->live_ranges()) { @@ -3719,9 +3874,8 @@ void LiveRangeConnector::ConnectRanges(Zone* local_zone) { // Gather all MoveOperands for a single ParallelMove. MoveOperands* move = new (code_zone()) MoveOperands(it->first.second, it->second); - MoveOperands* eliminate = moves->PrepareInsertAfter(move); + moves->PrepareInsertAfter(move, &to_eliminate); to_insert.push_back(move); - if (eliminate != nullptr) to_eliminate.push_back(eliminate); } } diff --git a/src/compiler/register-allocator.h b/src/compiler/register-allocator.h index 2ea877c804..4d94f1eac0 100644 --- a/src/compiler/register-allocator.h +++ b/src/compiler/register-allocator.h @@ -357,6 +357,11 @@ class V8_EXPORT_PRIVATE LiveRange : public NON_EXPORTED_BASE(ZoneObject) { UsePosition* NextUsePositionRegisterIsBeneficial( LifetimePosition start) const; + // Returns lifetime position for which register is beneficial in this live + // range and which follows both start and last processed use position. + LifetimePosition NextLifetimePositionRegisterIsBeneficial( + const LifetimePosition& start) const; + // Returns use position for which register is beneficial in this live // range and which precedes start. UsePosition* PreviousUsePositionRegisterIsBeneficial( @@ -773,12 +778,24 @@ class RegisterAllocationData final : public ZoneObject { ZoneVector& fixed_live_ranges() { return fixed_live_ranges_; } + ZoneVector& fixed_float_live_ranges() { + return fixed_float_live_ranges_; + } + const ZoneVector& fixed_float_live_ranges() const { + return fixed_float_live_ranges_; + } ZoneVector& fixed_double_live_ranges() { return fixed_double_live_ranges_; } const ZoneVector& fixed_double_live_ranges() const { return fixed_double_live_ranges_; } + ZoneVector& fixed_simd128_live_ranges() { + return fixed_simd128_live_ranges_; + } + const ZoneVector& fixed_simd128_live_ranges() const { + return fixed_simd128_live_ranges_; + } ZoneVector& live_in_sets() { return live_in_sets_; } ZoneVector& live_out_sets() { return live_out_sets_; } ZoneVector& spill_ranges() { return spill_ranges_; } @@ -840,7 +857,9 @@ class RegisterAllocationData final : public ZoneObject { ZoneVector live_out_sets_; ZoneVector live_ranges_; ZoneVector fixed_live_ranges_; + ZoneVector fixed_float_live_ranges_; ZoneVector fixed_double_live_ranges_; + ZoneVector fixed_simd128_live_ranges_; ZoneVector spill_ranges_; DelayedReferences delayed_references_; BitVector* assigned_registers_; @@ -1058,6 +1077,8 @@ class LinearScanAllocator final : public RegisterAllocator { const Vector& free_until_pos); bool TryAllocatePreferredReg(LiveRange* range, const Vector& free_until_pos); + void GetFPRegisterSet(MachineRepresentation rep, int* num_regs, + int* num_codes, const int** codes) const; void FindFreeRegistersForRange(LiveRange* range, Vector free_until_pos); void ProcessCurrentRange(LiveRange* current); diff --git a/src/compiler/wasm-linkage.cc b/src/compiler/wasm-linkage.cc index 5dfcbcc552..a41c93ca35 100644 --- a/src/compiler/wasm-linkage.cc +++ b/src/compiler/wasm-linkage.cc @@ -178,6 +178,17 @@ struct Allocator { // Allocate a floating point register/stack location. if (fp_offset < fp_count) { DoubleRegister reg = fp_regs[fp_offset++]; +#if V8_TARGET_ARCH_ARM + // Allocate floats using a double register, but modify the code to + // reflect how ARM FP registers alias. + // TODO(bbudge) Modify wasm linkage to allow use of all float regs. + if (type == kAstF32) { + int float_reg_code = reg.code() * 2; + DCHECK(float_reg_code < RegisterConfiguration::kMaxFPRegisters); + return regloc(DoubleRegister::from_code(float_reg_code), + MachineTypeFor(type)); + } +#endif return regloc(reg, MachineTypeFor(type)); } else { int offset = -1 - stack_offset; diff --git a/src/machine-type.h b/src/machine-type.h index 975b93a416..69cb6d79fa 100644 --- a/src/machine-type.h +++ b/src/machine-type.h @@ -239,7 +239,7 @@ inline bool IsAnyTagged(MachineRepresentation rep) { } // Gets the log2 of the element size in bytes of the machine type. -inline int ElementSizeLog2Of(MachineRepresentation rep) { +V8_EXPORT_PRIVATE inline int ElementSizeLog2Of(MachineRepresentation rep) { switch (rep) { case MachineRepresentation::kBit: case MachineRepresentation::kWord8: diff --git a/src/register-configuration.cc b/src/register-configuration.cc index 1a198ea42c..28d0ab2cc5 100644 --- a/src/register-configuration.cc +++ b/src/register-configuration.cc @@ -70,15 +70,12 @@ class ArchDefaultRegisterConfiguration : public RegisterConfiguration { #if V8_TARGET_ARCH_IA32 kMaxAllocatableGeneralRegisterCount, kMaxAllocatableDoubleRegisterCount, - kMaxAllocatableDoubleRegisterCount, #elif V8_TARGET_ARCH_X87 kMaxAllocatableGeneralRegisterCount, compiler == TURBOFAN ? 1 : kMaxAllocatableDoubleRegisterCount, - compiler == TURBOFAN ? 1 : kMaxAllocatableDoubleRegisterCount, #elif V8_TARGET_ARCH_X64 kMaxAllocatableGeneralRegisterCount, kMaxAllocatableDoubleRegisterCount, - kMaxAllocatableDoubleRegisterCount, #elif V8_TARGET_ARCH_ARM FLAG_enable_embedded_constant_pool ? (kMaxAllocatableGeneralRegisterCount - 1) @@ -86,27 +83,21 @@ class ArchDefaultRegisterConfiguration : public RegisterConfiguration { CpuFeatures::IsSupported(VFP32DREGS) ? kMaxAllocatableDoubleRegisterCount : (ALLOCATABLE_NO_VFP32_DOUBLE_REGISTERS(REGISTER_COUNT) 0), - ALLOCATABLE_NO_VFP32_DOUBLE_REGISTERS(REGISTER_COUNT) 0, #elif V8_TARGET_ARCH_ARM64 kMaxAllocatableGeneralRegisterCount, kMaxAllocatableDoubleRegisterCount, - kMaxAllocatableDoubleRegisterCount, #elif V8_TARGET_ARCH_MIPS kMaxAllocatableGeneralRegisterCount, kMaxAllocatableDoubleRegisterCount, - kMaxAllocatableDoubleRegisterCount, #elif V8_TARGET_ARCH_MIPS64 kMaxAllocatableGeneralRegisterCount, kMaxAllocatableDoubleRegisterCount, - kMaxAllocatableDoubleRegisterCount, #elif V8_TARGET_ARCH_PPC kMaxAllocatableGeneralRegisterCount, kMaxAllocatableDoubleRegisterCount, - kMaxAllocatableDoubleRegisterCount, #elif V8_TARGET_ARCH_S390 kMaxAllocatableGeneralRegisterCount, kMaxAllocatableDoubleRegisterCount, - kMaxAllocatableDoubleRegisterCount, #else #error Unsupported target architecture. #endif @@ -145,7 +136,6 @@ const RegisterConfiguration* RegisterConfiguration::Turbofan() { RegisterConfiguration::RegisterConfiguration( int num_general_registers, int num_double_registers, int num_allocatable_general_registers, int num_allocatable_double_registers, - int num_allocatable_aliased_double_registers, const int* allocatable_general_codes, const int* allocatable_double_codes, AliasingKind fp_aliasing_kind, const char* const* general_register_names, const char* const* float_register_names, @@ -158,8 +148,6 @@ RegisterConfiguration::RegisterConfiguration( num_allocatable_general_registers_(num_allocatable_general_registers), num_allocatable_float_registers_(0), num_allocatable_double_registers_(num_allocatable_double_registers), - num_allocatable_aliased_double_registers_( - num_allocatable_aliased_double_registers), num_allocatable_simd128_registers_(0), allocatable_general_codes_mask_(0), allocatable_float_codes_mask_(0), diff --git a/src/register-configuration.h b/src/register-configuration.h index cd09a88988..946c82e56c 100644 --- a/src/register-configuration.h +++ b/src/register-configuration.h @@ -36,7 +36,6 @@ class V8_EXPORT_PRIVATE RegisterConfiguration { RegisterConfiguration(int num_general_registers, int num_double_registers, int num_allocatable_general_registers, int num_allocatable_double_registers, - int num_allocatable_aliased_double_registers, const int* allocatable_general_codes, const int* allocatable_double_codes, AliasingKind fp_aliasing_kind, @@ -58,12 +57,6 @@ class V8_EXPORT_PRIVATE RegisterConfiguration { int num_allocatable_double_registers() const { return num_allocatable_double_registers_; } - // TODO(bbudge): This is a temporary work-around required because our - // register allocator does not yet support the aliasing of single/double - // registers on ARM. - int num_allocatable_aliased_double_registers() const { - return num_allocatable_aliased_double_registers_; - } int num_allocatable_simd128_registers() const { return num_allocatable_simd128_registers_; } @@ -143,7 +136,6 @@ class V8_EXPORT_PRIVATE RegisterConfiguration { int num_allocatable_general_registers_; int num_allocatable_float_registers_; int num_allocatable_double_registers_; - int num_allocatable_aliased_double_registers_; int num_allocatable_simd128_registers_; int32_t allocatable_general_codes_mask_; int32_t allocatable_float_codes_mask_; diff --git a/src/zone/zone-allocator.h b/src/zone/zone-allocator.h index 8370d73e49..1e2862a2c1 100644 --- a/src/zone/zone-allocator.h +++ b/src/zone/zone-allocator.h @@ -26,6 +26,8 @@ class zone_allocator { typedef zone_allocator other; }; + // TODO(bbudge) Remove when V8 updates to MSVS 2015. See crbug.com/603131. + zone_allocator() : zone_(nullptr) { UNREACHABLE(); } explicit zone_allocator(Zone* zone) throw() : zone_(zone) {} explicit zone_allocator(const zone_allocator& other) throw() : zone_(other.zone_) {} @@ -62,7 +64,6 @@ class zone_allocator { Zone* zone() { return zone_; } private: - zone_allocator(); Zone* zone_; }; diff --git a/test/cctest/compiler/test-gap-resolver.cc b/test/cctest/compiler/test-gap-resolver.cc index 3b1cdb6d81..1cceb9cd59 100644 --- a/test/cctest/compiler/test-gap-resolver.cc +++ b/test/cctest/compiler/test-gap-resolver.cc @@ -13,15 +13,32 @@ namespace compiler { const auto GetRegConfig = RegisterConfiguration::Turbofan; -// Fragments the given operand into an equivalent set of operands to simplify -// ParallelMove equivalence testing. +// Fragments the given FP operand into an equivalent set of FP operands to +// simplify ParallelMove equivalence testing. void GetCanonicalOperands(const InstructionOperand& op, std::vector* fragments) { CHECK(!kSimpleFPAliasing); CHECK(op.IsFPLocationOperand()); - // TODO(bbudge) Split into float operands on platforms with non-simple FP - // register aliasing. - fragments->push_back(op); + const LocationOperand& loc = LocationOperand::cast(op); + MachineRepresentation rep = loc.representation(); + int base = -1; + int aliases = GetRegConfig()->GetAliases( + rep, 0, MachineRepresentation::kFloat32, &base); + CHECK_LT(0, aliases); + CHECK_GE(4, aliases); + int index = -1; + int step = 1; + if (op.IsFPRegister()) { + index = loc.register_code() * aliases; + } else { + index = loc.index(); + step = -1; + } + for (int i = 0; i < aliases; i++) { + fragments->push_back(AllocatedOperand(loc.location_kind(), + MachineRepresentation::kFloat32, + index + i * step)); + } } // The state of our move interpreter is the mapping of operands to values. Note @@ -36,7 +53,9 @@ class InterpreterState { const InstructionOperand& dst = m->destination(); if (!kSimpleFPAliasing && src.IsFPLocationOperand() && dst.IsFPLocationOperand()) { - // Canonicalize FP location-location moves. + // Canonicalize FP location-location moves by fragmenting them into + // an equivalent sequence of float32 moves, to simplify state + // equivalence testing. std::vector src_fragments; GetCanonicalOperands(src, &src_fragments); CHECK(!src_fragments.empty()); @@ -115,9 +134,11 @@ class InterpreterState { int index; if (!is_constant) { const LocationOperand& loc_op = LocationOperand::cast(op); - // Canonicalize FP location operand representations to kFloat64. + // Preserve FP representation when FP register aliasing is complex. + // Otherwise, canonicalize to kFloat64. if (IsFloatingPoint(loc_op.representation())) { - rep = MachineRepresentation::kFloat64; + rep = kSimpleFPAliasing ? MachineRepresentation::kFloat64 + : loc_op.representation(); } if (loc_op.IsAnyRegister()) { index = loc_op.register_code(); @@ -321,9 +342,11 @@ class ParallelMoveCreator : public HandleAndZoneScope { auto GetValidRegisterCode = [&conf](MachineRepresentation rep, int index) { switch (rep) { case MachineRepresentation::kFloat32: + return conf->RegisterConfiguration::GetAllocatableFloatCode(index); case MachineRepresentation::kFloat64: - case MachineRepresentation::kSimd128: return conf->RegisterConfiguration::GetAllocatableDoubleCode(index); + case MachineRepresentation::kSimd128: + return conf->RegisterConfiguration::GetAllocatableSimd128Code(index); default: return conf->RegisterConfiguration::GetAllocatableGeneralCode(index); } @@ -368,6 +391,118 @@ void RunTest(ParallelMove* pm, Zone* zone) { CHECK_EQ(mi1.state(), mi2.state()); } +TEST(Aliasing) { + // On platforms with simple aliasing, these parallel moves are ill-formed. + if (kSimpleFPAliasing) return; + + ParallelMoveCreator pmc; + Zone* zone = pmc.main_zone(); + + auto s0 = AllocatedOperand(LocationOperand::REGISTER, + MachineRepresentation::kFloat32, 0); + auto s1 = AllocatedOperand(LocationOperand::REGISTER, + MachineRepresentation::kFloat32, 1); + auto s2 = AllocatedOperand(LocationOperand::REGISTER, + MachineRepresentation::kFloat32, 2); + auto s3 = AllocatedOperand(LocationOperand::REGISTER, + MachineRepresentation::kFloat32, 3); + auto s4 = AllocatedOperand(LocationOperand::REGISTER, + MachineRepresentation::kFloat32, 4); + + auto d0 = AllocatedOperand(LocationOperand::REGISTER, + MachineRepresentation::kFloat64, 0); + auto d1 = AllocatedOperand(LocationOperand::REGISTER, + MachineRepresentation::kFloat64, 1); + auto d16 = AllocatedOperand(LocationOperand::REGISTER, + MachineRepresentation::kFloat64, 16); + + // Double slots must be odd to match frame allocation. + auto dSlot = AllocatedOperand(LocationOperand::STACK_SLOT, + MachineRepresentation::kFloat64, 3); + + // Cycles involving s- and d-registers. + { + std::vector moves = { + s2, s0, // s2 <- s0 + d0, d1 // d0 <- d1 + }; + RunTest(pmc.Create(moves), zone); + } + { + std::vector moves = { + d0, d1, // d0 <- d1 + s2, s0 // s2 <- s0 + }; + RunTest(pmc.Create(moves), zone); + } + { + std::vector moves = { + s2, s1, // s2 <- s1 + d0, d1 // d0 <- d1 + }; + RunTest(pmc.Create(moves), zone); + } + { + std::vector moves = { + d0, d1, // d0 <- d1 + s2, s1 // s2 <- s1 + }; + RunTest(pmc.Create(moves), zone); + } + // Two cycles involving a single d-register. + { + std::vector moves = { + d0, d1, // d0 <- d1 + s2, s1, // s2 <- s1 + s3, s0 // s3 <- s0 + }; + RunTest(pmc.Create(moves), zone); + } + // Cycle with a float move that must be deferred until after swaps. + { + std::vector moves = { + d0, d1, // d0 <- d1 + s2, s0, // s2 <- s0 + s3, s4 // s3 <- s4 must be deferred + }; + RunTest(pmc.Create(moves), zone); + } + // Cycles involving s-registers and a non-aliased d-register. + { + std::vector moves = { + d16, d0, // d16 <- d0 + s1, s2, // s1 <- s2 + d1, d16 // d1 <- d16 + }; + RunTest(pmc.Create(moves), zone); + } + { + std::vector moves = { + s2, s1, // s1 <- s2 + d0, d16, // d16 <- d0 + d16, d1 // d1 <- d16 + }; + RunTest(pmc.Create(moves), zone); + } + { + std::vector moves = { + d0, d16, // d0 <- d16 + d16, d1, // s2 <- s0 + s3, s0 // d0 <- d1 + }; + RunTest(pmc.Create(moves), zone); + } + // Cycle involving aliasing registers and a slot. + { + std::vector moves = { + dSlot, d0, // dSlot <- d0 + d1, dSlot, // d1 <- dSlot + s0, s3 // s0 <- s3 + }; + RunTest(pmc.Create(moves), zone); + } +} + TEST(FuzzResolver) { ParallelMoveCreator pmc; for (int size = 0; size < 80; ++size) { diff --git a/test/cctest/compiler/test-run-native-calls.cc b/test/cctest/compiler/test-run-native-calls.cc index 4c271859f4..7885ba4014 100644 --- a/test/cctest/compiler/test-run-native-calls.cc +++ b/test/cctest/compiler/test-run-native-calls.cc @@ -87,8 +87,16 @@ class RegisterPairs : public Pairs { class Float32RegisterPairs : public Pairs { public: Float32RegisterPairs() - : Pairs(100, GetRegConfig()->num_allocatable_aliased_double_registers(), - GetRegConfig()->allocatable_double_codes()) {} + : Pairs( + 100, +#if V8_TARGET_ARCH_ARM + // TODO(bbudge) Modify wasm linkage to allow use of all float regs. + GetRegConfig()->num_allocatable_double_registers() / 2 - 2, +#else + GetRegConfig()->num_allocatable_double_registers(), +#endif + GetRegConfig()->allocatable_double_codes()) { + } }; @@ -127,6 +135,10 @@ struct Allocator { // Allocate a floating point register/stack location. if (fp_offset < fp_count) { int code = fp_regs[fp_offset++]; +#if V8_TARGET_ARCH_ARM + // TODO(bbudge) Modify wasm linkage to allow use of all float regs. + if (type.representation() == MachineRepresentation::kFloat32) code *= 2; +#endif return LinkageLocation::ForRegister(code, type); } else { int offset = -1 - stack_offset; diff --git a/test/unittests/BUILD.gn b/test/unittests/BUILD.gn index c005b144f0..ff88879ac8 100644 --- a/test/unittests/BUILD.gn +++ b/test/unittests/BUILD.gn @@ -50,6 +50,7 @@ v8_executable("unittests") { "compiler/instruction-selector-unittest.h", "compiler/instruction-sequence-unittest.cc", "compiler/instruction-sequence-unittest.h", + "compiler/instruction-unittest.cc", "compiler/int64-lowering-unittest.cc", "compiler/js-builtin-reducer-unittest.cc", "compiler/js-create-lowering-unittest.cc", diff --git a/test/unittests/compiler/instruction-sequence-unittest.cc b/test/unittests/compiler/instruction-sequence-unittest.cc index 0a17100f25..e61f690b4e 100644 --- a/test/unittests/compiler/instruction-sequence-unittest.cc +++ b/test/unittests/compiler/instruction-sequence-unittest.cc @@ -22,11 +22,8 @@ static char register_names_[10 * (RegisterConfiguration::kMaxGeneralRegisters + namespace { static int allocatable_codes[InstructionSequenceTest::kDefaultNRegs] = { 0, 1, 2, 3, 4, 5, 6, 7}; -static int allocatable_double_codes[InstructionSequenceTest::kDefaultNRegs] = { - 0, 1, 2, 3, 4, 5, 6, 7}; } - static void InitializeRegisterNames() { char* loc = register_names_; for (int i = 0; i < RegisterConfiguration::kMaxGeneralRegisters; ++i) { @@ -92,8 +89,7 @@ RegisterConfiguration* InstructionSequenceTest::config() { if (!config_) { config_.reset(new RegisterConfiguration( num_general_registers_, num_double_registers_, num_general_registers_, - num_double_registers_, num_double_registers_, allocatable_codes, - allocatable_double_codes, + num_double_registers_, allocatable_codes, allocatable_codes, kSimpleFPAliasing ? RegisterConfiguration::OVERLAP : RegisterConfiguration::COMBINE, general_register_names_, diff --git a/test/unittests/compiler/instruction-sequence-unittest.h b/test/unittests/compiler/instruction-sequence-unittest.h index 1aa552a1cc..8424834816 100644 --- a/test/unittests/compiler/instruction-sequence-unittest.h +++ b/test/unittests/compiler/instruction-sequence-unittest.h @@ -20,7 +20,9 @@ class InstructionSequenceTest : public TestWithIsolateAndZone { static const int kDefaultNRegs = 8; static const int kNoValue = kMinInt; static const MachineRepresentation kNoRep = MachineRepresentation::kNone; + static const MachineRepresentation kFloat32 = MachineRepresentation::kFloat32; static const MachineRepresentation kFloat64 = MachineRepresentation::kFloat64; + static const MachineRepresentation kSimd128 = MachineRepresentation::kSimd128; typedef RpoNumber Rpo; diff --git a/test/unittests/compiler/instruction-unittest.cc b/test/unittests/compiler/instruction-unittest.cc new file mode 100644 index 0000000000..443c42b62a --- /dev/null +++ b/test/unittests/compiler/instruction-unittest.cc @@ -0,0 +1,175 @@ +// Copyright 2016 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "src/compiler/instruction.h" +#include "src/register-configuration.h" +#include "test/unittests/test-utils.h" +#include "testing/gtest-support.h" + +namespace v8 { +namespace internal { +namespace compiler { + +namespace { + +const MachineRepresentation kWord = MachineRepresentation::kWord32; +const MachineRepresentation kFloat = MachineRepresentation::kFloat32; +const MachineRepresentation kDouble = MachineRepresentation::kFloat64; + +bool Interfere(LocationOperand::LocationKind kind, MachineRepresentation rep1, + int index1, MachineRepresentation rep2, int index2) { + return AllocatedOperand(kind, rep1, index1) + .InterferesWith(AllocatedOperand(kind, rep2, index2)); +} + +bool Contains(const ZoneVector* moves, + const InstructionOperand& to, const InstructionOperand& from) { + for (auto move : *moves) { + if (move->destination().Equals(to) && move->source().Equals(from)) { + return true; + } + } + return false; +} + +} // namespace + +class InstructionTest : public TestWithZone { + public: + InstructionTest() {} + virtual ~InstructionTest() {} + + ParallelMove* CreateParallelMove( + const std::vector& operand_pairs) { + ParallelMove* parallel_move = new (zone()) ParallelMove(zone()); + for (size_t i = 0; i < operand_pairs.size(); i += 2) + parallel_move->AddMove(operand_pairs[i + 1], operand_pairs[i]); + return parallel_move; + } +}; + +TEST_F(InstructionTest, OperandInterference) { + // All general registers and slots interfere only with themselves. + for (int i = 0; i < RegisterConfiguration::kMaxGeneralRegisters; ++i) { + EXPECT_TRUE(Interfere(LocationOperand::REGISTER, kWord, i, kWord, i)); + EXPECT_TRUE(Interfere(LocationOperand::STACK_SLOT, kWord, i, kWord, i)); + for (int j = i + 1; j < RegisterConfiguration::kMaxGeneralRegisters; ++j) { + EXPECT_FALSE(Interfere(LocationOperand::REGISTER, kWord, i, kWord, j)); + EXPECT_FALSE(Interfere(LocationOperand::STACK_SLOT, kWord, i, kWord, j)); + } + } + + // All FP registers interfere with themselves. + for (int i = 0; i < RegisterConfiguration::kMaxFPRegisters; ++i) { + EXPECT_TRUE(Interfere(LocationOperand::REGISTER, kFloat, i, kFloat, i)); + EXPECT_TRUE(Interfere(LocationOperand::STACK_SLOT, kFloat, i, kFloat, i)); + EXPECT_TRUE(Interfere(LocationOperand::REGISTER, kDouble, i, kDouble, i)); + EXPECT_TRUE(Interfere(LocationOperand::STACK_SLOT, kDouble, i, kDouble, i)); + } + + if (kSimpleFPAliasing) { + // Simple FP aliasing: interfering registers of different reps have the same + // index. + for (int i = 0; i < RegisterConfiguration::kMaxFPRegisters; ++i) { + EXPECT_TRUE(Interfere(LocationOperand::REGISTER, kFloat, i, kDouble, i)); + EXPECT_TRUE(Interfere(LocationOperand::REGISTER, kDouble, i, kFloat, i)); + for (int j = i + 1; j < RegisterConfiguration::kMaxFPRegisters; ++j) { + EXPECT_FALSE(Interfere(LocationOperand::REGISTER, kWord, i, kWord, j)); + EXPECT_FALSE( + Interfere(LocationOperand::STACK_SLOT, kWord, i, kWord, j)); + } + } + } else { + // Complex FP aliasing: sub-registers intefere with containing registers. + // Test sub-register indices which may not exist on the platform. This is + // necessary since the GapResolver may split large moves into smaller ones. + for (int i = 0; i < RegisterConfiguration::kMaxFPRegisters; ++i) { + EXPECT_TRUE( + Interfere(LocationOperand::REGISTER, kFloat, i * 2, kDouble, i)); + EXPECT_TRUE( + Interfere(LocationOperand::REGISTER, kFloat, i * 2 + 1, kDouble, i)); + EXPECT_TRUE( + Interfere(LocationOperand::REGISTER, kDouble, i, kFloat, i * 2)); + EXPECT_TRUE( + Interfere(LocationOperand::REGISTER, kDouble, i, kFloat, i * 2 + 1)); + + for (int j = i + 1; j < RegisterConfiguration::kMaxFPRegisters; ++j) { + EXPECT_FALSE( + Interfere(LocationOperand::REGISTER, kFloat, i * 2, kDouble, j)); + EXPECT_FALSE(Interfere(LocationOperand::REGISTER, kFloat, i * 2 + 1, + kDouble, j)); + EXPECT_FALSE( + Interfere(LocationOperand::REGISTER, kDouble, i, kFloat, j * 2)); + EXPECT_FALSE(Interfere(LocationOperand::REGISTER, kDouble, i, kFloat, + j * 2 + 1)); + } + } + } +} + +TEST_F(InstructionTest, PrepareInsertAfter) { + InstructionOperand r0 = AllocatedOperand(LocationOperand::REGISTER, + MachineRepresentation::kWord32, 0); + InstructionOperand r1 = AllocatedOperand(LocationOperand::REGISTER, + MachineRepresentation::kWord32, 1); + InstructionOperand r2 = AllocatedOperand(LocationOperand::REGISTER, + MachineRepresentation::kWord32, 2); + + InstructionOperand d0 = AllocatedOperand(LocationOperand::REGISTER, + MachineRepresentation::kFloat64, 0); + InstructionOperand d1 = AllocatedOperand(LocationOperand::REGISTER, + MachineRepresentation::kFloat64, 1); + InstructionOperand d2 = AllocatedOperand(LocationOperand::REGISTER, + MachineRepresentation::kFloat64, 2); + + { + // Moves inserted after should pick up assignments to their sources. + // Moves inserted after should cause interfering moves to be eliminated. + ZoneVector to_eliminate(zone()); + std::vector moves = { + r1, r0, // r1 <- r0 + r2, r0, // r2 <- r0 + d1, d0, // d1 <- d0 + d2, d0 // d2 <- d0 + }; + + ParallelMove* pm = CreateParallelMove(moves); + MoveOperands m1(r1, r2); // r2 <- r1 + pm->PrepareInsertAfter(&m1, &to_eliminate); + CHECK(m1.source().Equals(r0)); + CHECK(Contains(&to_eliminate, r2, r0)); + MoveOperands m2(d1, d2); // d2 <- d1 + pm->PrepareInsertAfter(&m2, &to_eliminate); + CHECK(m2.source().Equals(d0)); + CHECK(Contains(&to_eliminate, d2, d0)); + } + + if (!kSimpleFPAliasing) { + // Moves inserted after should cause all interfering moves to be eliminated. + auto s0 = AllocatedOperand(LocationOperand::REGISTER, + MachineRepresentation::kFloat32, 0); + auto s1 = AllocatedOperand(LocationOperand::REGISTER, + MachineRepresentation::kFloat32, 1); + auto s2 = AllocatedOperand(LocationOperand::REGISTER, + MachineRepresentation::kFloat32, 2); + + { + ZoneVector to_eliminate(zone()); + std::vector moves = { + s0, s2, // s0 <- s2 + s1, s2 // s1 <- s2 + }; + + ParallelMove* pm = CreateParallelMove(moves); + MoveOperands m1(d1, d0); // d0 <- d1 + pm->PrepareInsertAfter(&m1, &to_eliminate); + CHECK(Contains(&to_eliminate, s0, s2)); + CHECK(Contains(&to_eliminate, s1, s2)); + } + } +} + +} // namespace compiler +} // namespace internal +} // namespace v8 diff --git a/test/unittests/compiler/move-optimizer-unittest.cc b/test/unittests/compiler/move-optimizer-unittest.cc index abbbc8fd10..71571488e1 100644 --- a/test/unittests/compiler/move-optimizer-unittest.cc +++ b/test/unittests/compiler/move-optimizer-unittest.cc @@ -12,6 +12,14 @@ namespace compiler { class MoveOptimizerTest : public InstructionSequenceTest { public: + // FP register indices which don't interfere under simple or complex aliasing. + static const int kF64_1 = 0; + static const int kF64_2 = 1; + static const int kF32_1 = 4; + static const int kF32_2 = 5; + static const int kS128_1 = 2; + static const int kS128_2 = 3; + Instruction* LastInstruction() { return sequence()->instructions().back(); } void AddMove(Instruction* instr, TestOperand from, TestOperand to, @@ -99,8 +107,12 @@ TEST_F(MoveOptimizerTest, RemovesRedundant) { AddMove(first_instr, Reg(0), Reg(1)); AddMove(last_instr, Reg(1), Reg(0)); - AddMove(first_instr, FPReg(0), FPReg(1)); - AddMove(last_instr, FPReg(1), FPReg(0)); + AddMove(first_instr, FPReg(kS128_1, kSimd128), FPReg(kS128_2, kSimd128)); + AddMove(last_instr, FPReg(kS128_2, kSimd128), FPReg(kS128_1, kSimd128)); + AddMove(first_instr, FPReg(kF64_1, kFloat64), FPReg(kF64_2, kFloat64)); + AddMove(last_instr, FPReg(kF64_2, kFloat64), FPReg(kF64_1, kFloat64)); + AddMove(first_instr, FPReg(kF32_1, kFloat32), FPReg(kF32_2, kFloat32)); + AddMove(last_instr, FPReg(kF32_2, kFloat32), FPReg(kF32_1, kFloat32)); EndBlock(Last()); @@ -108,22 +120,38 @@ TEST_F(MoveOptimizerTest, RemovesRedundant) { CHECK_EQ(0, NonRedundantSize(first_instr->parallel_moves()[0])); auto move = last_instr->parallel_moves()[0]; - CHECK_EQ(2, NonRedundantSize(move)); + CHECK_EQ(4, NonRedundantSize(move)); CHECK(Contains(move, Reg(0), Reg(1))); - CHECK(Contains(move, FPReg(0), FPReg(1))); + CHECK(Contains(move, FPReg(kS128_1, kSimd128), FPReg(kS128_2, kSimd128))); + CHECK(Contains(move, FPReg(kF64_1, kFloat64), FPReg(kF64_2, kFloat64))); + CHECK(Contains(move, FPReg(kF32_1, kFloat32), FPReg(kF32_2, kFloat32))); } TEST_F(MoveOptimizerTest, RemovesRedundantExplicit) { - int first_reg_index = GetAllocatableCode(0); - int second_reg_index = GetAllocatableCode(1); + int index1 = GetAllocatableCode(0); + int index2 = GetAllocatableCode(1); + int s128_1 = GetAllocatableCode(kS128_1, kSimd128); + int s128_2 = GetAllocatableCode(kS128_2, kSimd128); + int f64_1 = GetAllocatableCode(kF64_1, kFloat64); + int f64_2 = GetAllocatableCode(kF64_2, kFloat64); + int f32_1 = GetAllocatableCode(kF32_1, kFloat32); + int f32_2 = GetAllocatableCode(kF32_2, kFloat32); StartBlock(); auto first_instr = EmitNop(); auto last_instr = EmitNop(); - AddMove(first_instr, Reg(first_reg_index), ExplicitReg(second_reg_index)); - AddMove(last_instr, Reg(second_reg_index), Reg(first_reg_index)); + AddMove(first_instr, Reg(index1), ExplicitReg(index2)); + AddMove(last_instr, Reg(index2), Reg(index1)); + + AddMove(first_instr, FPReg(s128_1, kSimd128), + ExplicitFPReg(s128_2, kSimd128)); + AddMove(last_instr, FPReg(s128_2, kSimd128), FPReg(s128_1, kSimd128)); + AddMove(first_instr, FPReg(f64_1, kFloat64), ExplicitFPReg(f64_2, kFloat64)); + AddMove(last_instr, FPReg(f64_2, kFloat64), FPReg(f64_1, kFloat64)); + AddMove(first_instr, FPReg(f32_1, kFloat32), ExplicitFPReg(f32_2, kFloat32)); + AddMove(last_instr, FPReg(f32_2, kFloat32), FPReg(f32_1, kFloat32)); EndBlock(Last()); @@ -131,8 +159,12 @@ TEST_F(MoveOptimizerTest, RemovesRedundantExplicit) { CHECK_EQ(0, NonRedundantSize(first_instr->parallel_moves()[0])); auto move = last_instr->parallel_moves()[0]; - CHECK_EQ(1, NonRedundantSize(move)); - CHECK(Contains(move, Reg(first_reg_index), ExplicitReg(second_reg_index))); + CHECK_EQ(4, NonRedundantSize(move)); + CHECK(Contains(move, Reg(index1), ExplicitReg(index2))); + CHECK( + Contains(move, FPReg(s128_1, kSimd128), ExplicitFPReg(s128_2, kSimd128))); + CHECK(Contains(move, FPReg(f64_1, kFloat64), ExplicitFPReg(f64_2, kFloat64))); + CHECK(Contains(move, FPReg(f32_1, kFloat32), ExplicitFPReg(f32_2, kFloat32))); } @@ -167,10 +199,18 @@ TEST_F(MoveOptimizerTest, SimpleMerge) { StartBlock(); EndBlock(Jump(2)); AddMove(LastInstruction(), Reg(0), Reg(1)); + AddMove(LastInstruction(), FPReg(kS128_1, kSimd128), + FPReg(kS128_2, kSimd128)); + AddMove(LastInstruction(), FPReg(kF64_1, kFloat64), FPReg(kF64_2, kFloat64)); + AddMove(LastInstruction(), FPReg(kF32_1, kFloat32), FPReg(kF32_2, kFloat32)); StartBlock(); EndBlock(Jump(1)); AddMove(LastInstruction(), Reg(0), Reg(1)); + AddMove(LastInstruction(), FPReg(kS128_1, kSimd128), + FPReg(kS128_2, kSimd128)); + AddMove(LastInstruction(), FPReg(kF64_1, kFloat64), FPReg(kF64_2, kFloat64)); + AddMove(LastInstruction(), FPReg(kF32_1, kFloat32), FPReg(kF32_2, kFloat32)); StartBlock(); EndBlock(Last()); @@ -180,8 +220,11 @@ TEST_F(MoveOptimizerTest, SimpleMerge) { Optimize(); auto move = last->parallel_moves()[0]; - CHECK_EQ(1, NonRedundantSize(move)); + CHECK_EQ(4, NonRedundantSize(move)); CHECK(Contains(move, Reg(0), Reg(1))); + CHECK(Contains(move, FPReg(kS128_1, kSimd128), FPReg(kS128_2, kSimd128))); + CHECK(Contains(move, FPReg(kF64_1, kFloat64), FPReg(kF64_2, kFloat64))); + CHECK(Contains(move, FPReg(kF32_1, kFloat32), FPReg(kF32_2, kFloat32))); } @@ -195,16 +238,25 @@ TEST_F(MoveOptimizerTest, SimpleMergeCycle) { AddMove(gap_0, Reg(0), Reg(1)); AddMove(LastInstruction(), Reg(1), Reg(0)); - AddMove(gap_0, FPReg(0), FPReg(1)); - AddMove(LastInstruction(), FPReg(1), FPReg(0)); + AddMove(gap_0, FPReg(kS128_1, kSimd128), FPReg(kS128_2, kSimd128)); + AddMove(LastInstruction(), FPReg(kS128_2, kSimd128), + FPReg(kS128_1, kSimd128)); + AddMove(gap_0, FPReg(kF64_1, kFloat64), FPReg(kF64_2, kFloat64)); + AddMove(LastInstruction(), FPReg(kF64_2, kFloat64), FPReg(kF64_1, kFloat64)); + AddMove(gap_0, FPReg(kF32_1, kFloat32), FPReg(kF32_2, kFloat32)); + AddMove(LastInstruction(), FPReg(kF32_2, kFloat32), FPReg(kF32_1, kFloat32)); StartBlock(); EndBlock(Jump(1)); auto gap_1 = LastInstruction(); AddMove(gap_1, Reg(0), Reg(1)); AddMove(gap_1, Reg(1), Reg(0)); - AddMove(gap_1, FPReg(0), FPReg(1)); - AddMove(gap_1, FPReg(1), FPReg(0)); + AddMove(gap_1, FPReg(kS128_1, kSimd128), FPReg(kS128_2, kSimd128)); + AddMove(gap_1, FPReg(kS128_2, kSimd128), FPReg(kS128_1, kSimd128)); + AddMove(gap_1, FPReg(kF64_1, kFloat64), FPReg(kF64_2, kFloat64)); + AddMove(gap_1, FPReg(kF64_2, kFloat64), FPReg(kF64_1, kFloat64)); + AddMove(gap_1, FPReg(kF32_1, kFloat32), FPReg(kF32_2, kFloat32)); + AddMove(gap_1, FPReg(kF32_2, kFloat32), FPReg(kF32_1, kFloat32)); StartBlock(); EndBlock(Last()); @@ -216,11 +268,15 @@ TEST_F(MoveOptimizerTest, SimpleMergeCycle) { CHECK(gap_0->AreMovesRedundant()); CHECK(gap_1->AreMovesRedundant()); auto move = last->parallel_moves()[0]; - CHECK_EQ(4, NonRedundantSize(move)); + CHECK_EQ(8, NonRedundantSize(move)); CHECK(Contains(move, Reg(0), Reg(1))); CHECK(Contains(move, Reg(1), Reg(0))); - CHECK(Contains(move, FPReg(0), FPReg(1))); - CHECK(Contains(move, FPReg(1), FPReg(0))); + CHECK(Contains(move, FPReg(kS128_1, kSimd128), FPReg(kS128_2, kSimd128))); + CHECK(Contains(move, FPReg(kS128_2, kSimd128), FPReg(kS128_1, kSimd128))); + CHECK(Contains(move, FPReg(kF64_1, kFloat64), FPReg(kF64_2, kFloat64))); + CHECK(Contains(move, FPReg(kF64_2, kFloat64), FPReg(kF64_1, kFloat64))); + CHECK(Contains(move, FPReg(kF32_1, kFloat32), FPReg(kF32_2, kFloat32))); + CHECK(Contains(move, FPReg(kF32_2, kFloat32), FPReg(kF32_1, kFloat32))); } @@ -342,8 +398,31 @@ TEST_F(MoveOptimizerTest, ClobberedDestinationsAreEliminated) { EmitNop(); Instruction* first_instr = LastInstruction(); AddMove(first_instr, Reg(0), Reg(1)); - AddMove(first_instr, FPReg(0), FPReg(1)); - EmitOOI(Reg(1), FPReg(1), 0, nullptr); + EmitOI(Reg(1), 0, nullptr); + Instruction* last_instr = LastInstruction(); + EndBlock(); + Optimize(); + + ParallelMove* first_move = first_instr->parallel_moves()[0]; + CHECK_EQ(0, NonRedundantSize(first_move)); + + ParallelMove* last_move = last_instr->parallel_moves()[0]; + CHECK_EQ(0, NonRedundantSize(last_move)); +} + +TEST_F(MoveOptimizerTest, ClobberedFPDestinationsAreEliminated) { + StartBlock(); + EmitNop(); + Instruction* first_instr = LastInstruction(); + AddMove(first_instr, FPReg(4, kFloat64), FPReg(1, kFloat64)); + if (!kSimpleFPAliasing) { + // We clobber q0 below. This is aliased by d0, d1, s0, s1, s2, and s3. + // Add moves to registers s2 and s3. + AddMove(first_instr, FPReg(10, kFloat32), FPReg(0, kFloat32)); + AddMove(first_instr, FPReg(11, kFloat32), FPReg(1, kFloat32)); + } + // Clobbers output register 0. + EmitOI(FPReg(0, kSimd128), 0, nullptr); Instruction* last_instr = LastInstruction(); EndBlock(); Optimize(); diff --git a/test/unittests/compiler/register-allocator-unittest.cc b/test/unittests/compiler/register-allocator-unittest.cc index 22b1b44485..ed3558dc60 100644 --- a/test/unittests/compiler/register-allocator-unittest.cc +++ b/test/unittests/compiler/register-allocator-unittest.cc @@ -101,13 +101,14 @@ TEST_F(RegisterAllocatorTest, CanAllocateThreeRegisters) { Allocate(); } -TEST_F(RegisterAllocatorTest, CanAllocateThreeFPRegisters) { - // return p0 + p1; +TEST_F(RegisterAllocatorTest, CanAllocateFPRegisters) { StartBlock(); - VReg a_reg = FPParameter(); - VReg b_reg = FPParameter(); - VReg c_reg = EmitOI(FPReg(1), Reg(a_reg, 1), Reg(b_reg, 0)); - Return(c_reg); + TestOperand inputs[] = { + Reg(FPParameter(kFloat64)), Reg(FPParameter(kFloat64)), + Reg(FPParameter(kFloat32)), Reg(FPParameter(kFloat32)), + Reg(FPParameter(kSimd128)), Reg(FPParameter(kSimd128))}; + VReg out1 = EmitOI(FPReg(1, kFloat64), arraysize(inputs), inputs); + Return(out1); EndBlock(Last()); Allocate(); diff --git a/test/unittests/register-configuration-unittest.cc b/test/unittests/register-configuration-unittest.cc index 33453ce9bb..0688a5e54e 100644 --- a/test/unittests/register-configuration-unittest.cc +++ b/test/unittests/register-configuration-unittest.cc @@ -16,8 +16,6 @@ class RegisterConfigurationUnitTest : public ::testing::Test { public: RegisterConfigurationUnitTest() {} virtual ~RegisterConfigurationUnitTest() {} - - private: }; TEST_F(RegisterConfigurationUnitTest, BasicProperties) { @@ -30,9 +28,8 @@ TEST_F(RegisterConfigurationUnitTest, BasicProperties) { RegisterConfiguration test( kNumGeneralRegs, kNumDoubleRegs, kNumAllocatableGeneralRegs, - kNumAllocatableDoubleRegs, kNumAllocatableDoubleRegs, general_codes, - double_codes, RegisterConfiguration::OVERLAP, nullptr, nullptr, nullptr, - nullptr); + kNumAllocatableDoubleRegs, general_codes, double_codes, + RegisterConfiguration::OVERLAP, nullptr, nullptr, nullptr, nullptr); EXPECT_EQ(test.num_general_registers(), kNumGeneralRegs); EXPECT_EQ(test.num_double_registers(), kNumDoubleRegs); @@ -67,9 +64,8 @@ TEST_F(RegisterConfigurationUnitTest, CombineAliasing) { RegisterConfiguration test( kNumGeneralRegs, kNumDoubleRegs, kNumAllocatableGeneralRegs, - kNumAllocatableDoubleRegs, kNumAllocatableDoubleRegs, general_codes, - double_codes, RegisterConfiguration::COMBINE, nullptr, nullptr, nullptr, - nullptr); + kNumAllocatableDoubleRegs, general_codes, double_codes, + RegisterConfiguration::COMBINE, nullptr, nullptr, nullptr, nullptr); // There are 3 allocatable double regs, but only 2 can alias float regs. EXPECT_EQ(test.num_allocatable_float_registers(), 4); @@ -157,9 +153,10 @@ TEST_F(RegisterConfigurationUnitTest, CombineAliasing) { test.GetAliases(kFloat64, RegisterConfiguration::kMaxFPRegisters / 2 + 1, kFloat32, &alias_base_index), 0); - EXPECT_EQ(test.GetAliases(kFloat64, RegisterConfiguration::kMaxFPRegisters, - kFloat32, &alias_base_index), - 0); + EXPECT_EQ( + test.GetAliases(kFloat64, RegisterConfiguration::kMaxFPRegisters - 1, + kFloat32, &alias_base_index), + 0); } } // namespace internal diff --git a/test/unittests/unittests.gyp b/test/unittests/unittests.gyp index 0b3ef19481..1fae5307ec 100644 --- a/test/unittests/unittests.gyp +++ b/test/unittests/unittests.gyp @@ -42,6 +42,7 @@ 'compiler/graph-trimmer-unittest.cc', 'compiler/graph-unittest.cc', 'compiler/graph-unittest.h', + 'compiler/instruction-unittest.cc', 'compiler/instruction-selector-unittest.cc', 'compiler/instruction-selector-unittest.h', 'compiler/instruction-sequence-unittest.cc',