[Turbofan] Add concept of FP register aliasing on ARM 32.

- Modifies RegisterConfiguration to specify complex aliasing on ARM 32.
- Modifies RegisterAllocator to consider aliasing.
- Modifies ParallelMove::PrepareInsertAfter to handle aliasing.
- Modifies GapResolver to split wider register moves when interference
with smaller moves is detected.
- Modifies MoveOptimizer to handle aliasing.
- Adds ARM 32 macro-assembler pseudo move instructions to handle cases where
  split moves don't correspond to actual s-registers.
- Modifies CodeGenerator::AssembleMove and AssembleSwap to handle moves of
  different widths, and moves involving pseudo-s-registers.
- Adds unit tests for FP operand interference checking and PrepareInsertAfter.
- Adds more tests of FP for the move optimizer and register allocator.

LOG=N
BUG=v8:4124

Review-Url: https://codereview.chromium.org/2410673002
Cr-Commit-Position: refs/heads/master@{#40597}
This commit is contained in:
bbudge 2016-10-26 09:04:11 -07:00 committed by Commit bot
parent f6c3fd0a74
commit 09ab8e6ad9
25 changed files with 1208 additions and 287 deletions

View File

@ -1051,6 +1051,69 @@ void MacroAssembler::VmovLow(DwVfpRegister dst, Register src) {
}
}
void MacroAssembler::VmovExtended(Register dst, int src_code) {
DCHECK_LE(32, src_code);
DCHECK_GT(64, src_code);
if (src_code & 0x1) {
VmovHigh(dst, DwVfpRegister::from_code(src_code / 2));
} else {
VmovLow(dst, DwVfpRegister::from_code(src_code / 2));
}
}
void MacroAssembler::VmovExtended(int dst_code, Register src) {
DCHECK_LE(32, dst_code);
DCHECK_GT(64, dst_code);
if (dst_code & 0x1) {
VmovHigh(DwVfpRegister::from_code(dst_code / 2), src);
} else {
VmovLow(DwVfpRegister::from_code(dst_code / 2), src);
}
}
void MacroAssembler::VmovExtended(int dst_code, int src_code,
Register scratch) {
if (src_code < 32 && dst_code < 32) {
// src and dst are both s-registers.
vmov(SwVfpRegister::from_code(dst_code),
SwVfpRegister::from_code(src_code));
} else if (src_code < 32) {
// src is an s-register.
vmov(scratch, SwVfpRegister::from_code(src_code));
VmovExtended(dst_code, scratch);
} else if (dst_code < 32) {
// dst is an s-register.
VmovExtended(scratch, src_code);
vmov(SwVfpRegister::from_code(dst_code), scratch);
} else {
// Neither src or dst are s-registers.
DCHECK_GT(64, src_code);
DCHECK_GT(64, dst_code);
VmovExtended(scratch, src_code);
VmovExtended(dst_code, scratch);
}
}
void MacroAssembler::VmovExtended(int dst_code, const MemOperand& src,
Register scratch) {
if (dst_code >= 32) {
ldr(scratch, src);
VmovExtended(dst_code, scratch);
} else {
vldr(SwVfpRegister::from_code(dst_code), src);
}
}
void MacroAssembler::VmovExtended(const MemOperand& dst, int src_code,
Register scratch) {
if (src_code >= 32) {
VmovExtended(scratch, src_code);
str(scratch, dst);
} else {
vstr(SwVfpRegister::from_code(src_code), dst);
}
}
void MacroAssembler::LslPair(Register dst_low, Register dst_high,
Register src_low, Register src_high,
Register scratch, Register shift) {

View File

@ -549,6 +549,14 @@ class MacroAssembler: public Assembler {
void VmovLow(Register dst, DwVfpRegister src);
void VmovLow(DwVfpRegister dst, Register src);
// Simulate s-register moves for imaginary s32 - s63 registers.
void VmovExtended(Register dst, int src_code);
void VmovExtended(int dst_code, Register src);
// Move between s-registers and imaginary s-registers.
void VmovExtended(int dst_code, int src_code, Register scratch);
void VmovExtended(int dst_code, const MemOperand& src, Register scratch);
void VmovExtended(const MemOperand& dst, int src_code, Register scratch);
void LslPair(Register dst_low, Register dst_high, Register src_low,
Register src_high, Register scratch, Register shift);
void LslPair(Register dst_low, Register dst_high, Register src_low,

View File

@ -136,25 +136,13 @@ class ArmOperandConverter final : public InstructionOperandConverter {
FrameOffset offset = frame_access_state()->GetFrameOffset(slot);
return MemOperand(offset.from_stack_pointer() ? sp : fp, offset.offset());
}
FloatRegister InputFloat32Register(size_t index) {
return ToFloat32Register(instr_->InputAt(index));
}
FloatRegister OutputFloat32Register() {
return ToFloat32Register(instr_->Output());
}
FloatRegister ToFloat32Register(InstructionOperand* op) {
return LowDwVfpRegister::from_code(ToDoubleRegister(op).code()).low();
}
};
namespace {
class OutOfLineLoadFloat32 final : public OutOfLineCode {
class OutOfLineLoadFloat final : public OutOfLineCode {
public:
OutOfLineLoadFloat32(CodeGenerator* gen, SwVfpRegister result)
OutOfLineLoadFloat(CodeGenerator* gen, SwVfpRegister result)
: OutOfLineCode(gen), result_(result) {}
void Generate() final {
@ -1119,54 +1107,54 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kArmVcmpF32:
if (instr->InputAt(1)->IsFPRegister()) {
__ VFPCompareAndSetFlags(i.InputFloat32Register(0),
i.InputFloat32Register(1));
__ VFPCompareAndSetFlags(i.InputFloatRegister(0),
i.InputFloatRegister(1));
} else {
DCHECK(instr->InputAt(1)->IsImmediate());
// 0.0 is the only immediate supported by vcmp instructions.
DCHECK(i.InputFloat32(1) == 0.0f);
__ VFPCompareAndSetFlags(i.InputFloat32Register(0), i.InputFloat32(1));
__ VFPCompareAndSetFlags(i.InputFloatRegister(0), i.InputFloat32(1));
}
DCHECK_EQ(SetCC, i.OutputSBit());
break;
case kArmVaddF32:
__ vadd(i.OutputFloat32Register(), i.InputFloat32Register(0),
i.InputFloat32Register(1));
__ vadd(i.OutputFloatRegister(), i.InputFloatRegister(0),
i.InputFloatRegister(1));
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
case kArmVsubF32:
__ vsub(i.OutputFloat32Register(), i.InputFloat32Register(0),
i.InputFloat32Register(1));
__ vsub(i.OutputFloatRegister(), i.InputFloatRegister(0),
i.InputFloatRegister(1));
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
case kArmVmulF32:
__ vmul(i.OutputFloat32Register(), i.InputFloat32Register(0),
i.InputFloat32Register(1));
__ vmul(i.OutputFloatRegister(), i.InputFloatRegister(0),
i.InputFloatRegister(1));
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
case kArmVmlaF32:
__ vmla(i.OutputFloat32Register(), i.InputFloat32Register(1),
i.InputFloat32Register(2));
__ vmla(i.OutputFloatRegister(), i.InputFloatRegister(1),
i.InputFloatRegister(2));
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
case kArmVmlsF32:
__ vmls(i.OutputFloat32Register(), i.InputFloat32Register(1),
i.InputFloat32Register(2));
__ vmls(i.OutputFloatRegister(), i.InputFloatRegister(1),
i.InputFloatRegister(2));
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
case kArmVdivF32:
__ vdiv(i.OutputFloat32Register(), i.InputFloat32Register(0),
i.InputFloat32Register(1));
__ vdiv(i.OutputFloatRegister(), i.InputFloatRegister(0),
i.InputFloatRegister(1));
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
case kArmVsqrtF32:
__ vsqrt(i.OutputFloat32Register(), i.InputFloat32Register(0));
__ vsqrt(i.OutputFloatRegister(), i.InputFloatRegister(0));
break;
case kArmVabsF32:
__ vabs(i.OutputFloat32Register(), i.InputFloat32Register(0));
__ vabs(i.OutputFloatRegister(), i.InputFloatRegister(0));
break;
case kArmVnegF32:
__ vneg(i.OutputFloat32Register(), i.InputFloat32Register(0));
__ vneg(i.OutputFloatRegister(), i.InputFloatRegister(0));
break;
case kArmVcmpF64:
if (instr->InputAt(1)->IsFPRegister()) {
@ -1235,7 +1223,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
case kArmVrintmF32: {
CpuFeatureScope scope(masm(), ARMv8);
__ vrintm(i.OutputFloat32Register(), i.InputFloat32Register(0));
__ vrintm(i.OutputFloatRegister(), i.InputFloatRegister(0));
break;
}
case kArmVrintmF64: {
@ -1245,7 +1233,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kArmVrintpF32: {
CpuFeatureScope scope(masm(), ARMv8);
__ vrintp(i.OutputFloat32Register(), i.InputFloat32Register(0));
__ vrintp(i.OutputFloatRegister(), i.InputFloatRegister(0));
break;
}
case kArmVrintpF64: {
@ -1255,7 +1243,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kArmVrintzF32: {
CpuFeatureScope scope(masm(), ARMv8);
__ vrintz(i.OutputFloat32Register(), i.InputFloat32Register(0));
__ vrintz(i.OutputFloatRegister(), i.InputFloatRegister(0));
break;
}
case kArmVrintzF64: {
@ -1270,7 +1258,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kArmVrintnF32: {
CpuFeatureScope scope(masm(), ARMv8);
__ vrintn(i.OutputFloat32Register(), i.InputFloat32Register(0));
__ vrintn(i.OutputFloatRegister(), i.InputFloatRegister(0));
break;
}
case kArmVrintnF64: {
@ -1279,26 +1267,26 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kArmVcvtF32F64: {
__ vcvt_f32_f64(i.OutputFloat32Register(), i.InputDoubleRegister(0));
__ vcvt_f32_f64(i.OutputFloatRegister(), i.InputDoubleRegister(0));
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
}
case kArmVcvtF64F32: {
__ vcvt_f64_f32(i.OutputDoubleRegister(), i.InputFloat32Register(0));
__ vcvt_f64_f32(i.OutputDoubleRegister(), i.InputFloatRegister(0));
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
}
case kArmVcvtF32S32: {
SwVfpRegister scratch = kScratchDoubleReg.low();
__ vmov(scratch, i.InputRegister(0));
__ vcvt_f32_s32(i.OutputFloat32Register(), scratch);
__ vcvt_f32_s32(i.OutputFloatRegister(), scratch);
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
}
case kArmVcvtF32U32: {
SwVfpRegister scratch = kScratchDoubleReg.low();
__ vmov(scratch, i.InputRegister(0));
__ vcvt_f32_u32(i.OutputFloat32Register(), scratch);
__ vcvt_f32_u32(i.OutputFloatRegister(), scratch);
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
}
@ -1318,7 +1306,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kArmVcvtS32F32: {
SwVfpRegister scratch = kScratchDoubleReg.low();
__ vcvt_s32_f32(scratch, i.InputFloat32Register(0));
__ vcvt_s32_f32(scratch, i.InputFloatRegister(0));
__ vmov(i.OutputRegister(), scratch);
// Avoid INT32_MAX as an overflow indicator and use INT32_MIN instead,
// because INT32_MIN allows easier out-of-bounds detection.
@ -1329,7 +1317,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kArmVcvtU32F32: {
SwVfpRegister scratch = kScratchDoubleReg.low();
__ vcvt_u32_f32(scratch, i.InputFloat32Register(0));
__ vcvt_u32_f32(scratch, i.InputFloatRegister(0));
__ vmov(i.OutputRegister(), scratch);
// Avoid UINT32_MAX as an overflow indicator and use 0 instead,
// because 0 allows easier out-of-bounds detection.
@ -1353,11 +1341,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kArmVmovU32F32:
__ vmov(i.OutputRegister(), i.InputFloat32Register(0));
__ vmov(i.OutputRegister(), i.InputFloatRegister(0));
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
case kArmVmovF32U32:
__ vmov(i.OutputFloat32Register(), i.InputRegister(0));
__ vmov(i.OutputFloatRegister(), i.InputRegister(0));
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
case kArmVmovLowU32F64:
@ -1415,12 +1403,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
case kArmVldrF32: {
__ vldr(i.OutputFloat32Register(), i.InputOffset());
__ vldr(i.OutputFloatRegister(), i.InputOffset());
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
}
case kArmVstrF32:
__ vstr(i.InputFloat32Register(0), i.InputOffset(1));
__ vstr(i.InputFloatRegister(0), i.InputOffset(1));
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
case kArmVldrF64:
@ -1432,9 +1420,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
case kArmFloat32Max: {
SwVfpRegister result = i.OutputFloat32Register();
SwVfpRegister left = i.InputFloat32Register(0);
SwVfpRegister right = i.InputFloat32Register(1);
SwVfpRegister result = i.OutputFloatRegister();
SwVfpRegister left = i.InputFloatRegister(0);
SwVfpRegister right = i.InputFloatRegister(1);
if (left.is(right)) {
__ Move(result, left);
} else {
@ -1460,9 +1448,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kArmFloat32Min: {
SwVfpRegister result = i.OutputFloat32Register();
SwVfpRegister left = i.InputFloat32Register(0);
SwVfpRegister right = i.InputFloat32Register(1);
SwVfpRegister result = i.OutputFloatRegister();
SwVfpRegister left = i.InputFloatRegister(0);
SwVfpRegister right = i.InputFloatRegister(1);
if (left.is(right)) {
__ Move(result, left);
} else {
@ -1501,7 +1489,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
frame_access_state()->IncreaseSPDelta(kDoubleSize / kPointerSize);
} else {
DCHECK_EQ(MachineRepresentation::kFloat32, op->representation());
__ vpush(i.InputFloat32Register(0));
__ vpush(i.InputFloatRegister(0));
frame_access_state()->IncreaseSPDelta(1);
}
} else {
@ -1532,7 +1520,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
ASSEMBLE_CHECKED_LOAD_INTEGER(ldr);
break;
case kCheckedLoadFloat32:
ASSEMBLE_CHECKED_LOAD_FP(Float32);
ASSEMBLE_CHECKED_LOAD_FP(Float);
break;
case kCheckedLoadFloat64:
ASSEMBLE_CHECKED_LOAD_FP(Double);
@ -1547,7 +1535,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
ASSEMBLE_CHECKED_STORE_INTEGER(str);
break;
case kCheckedStoreFloat32:
ASSEMBLE_CHECKED_STORE_FP(Float32);
ASSEMBLE_CHECKED_STORE_FP(Float);
break;
case kCheckedStoreFloat64:
ASSEMBLE_CHECKED_STORE_FP(Double);
@ -1789,7 +1777,6 @@ void CodeGenerator::AssembleReturn() {
__ Ret(pop_count);
}
void CodeGenerator::AssembleMove(InstructionOperand* source,
InstructionOperand* destination) {
ArmOperandConverter g(this, nullptr);
@ -1858,12 +1845,12 @@ void CodeGenerator::AssembleMove(InstructionOperand* source,
}
if (destination->IsStackSlot()) __ str(dst, g.ToMemOperand(destination));
} else if (src.type() == Constant::kFloat32) {
if (destination->IsFPStackSlot()) {
if (destination->IsFloatStackSlot()) {
MemOperand dst = g.ToMemOperand(destination);
__ mov(ip, Operand(bit_cast<int32_t>(src.ToFloat32())));
__ str(ip, dst);
} else {
SwVfpRegister dst = g.ToFloat32Register(destination);
SwVfpRegister dst = g.ToFloatRegister(destination);
__ vmov(dst, src.ToFloat32());
}
} else {
@ -1872,28 +1859,60 @@ void CodeGenerator::AssembleMove(InstructionOperand* source,
? g.ToDoubleRegister(destination)
: kScratchDoubleReg;
__ vmov(dst, src.ToFloat64(), kScratchReg);
if (destination->IsFPStackSlot()) {
if (destination->IsDoubleStackSlot()) {
__ vstr(dst, g.ToMemOperand(destination));
}
}
} else if (source->IsFPRegister()) {
DwVfpRegister src = g.ToDoubleRegister(source);
if (destination->IsFPRegister()) {
DwVfpRegister dst = g.ToDoubleRegister(destination);
__ Move(dst, src);
MachineRepresentation rep = LocationOperand::cast(source)->representation();
if (rep == MachineRepresentation::kFloat64) {
DwVfpRegister src = g.ToDoubleRegister(source);
if (destination->IsDoubleRegister()) {
DwVfpRegister dst = g.ToDoubleRegister(destination);
__ Move(dst, src);
} else {
DCHECK(destination->IsDoubleStackSlot());
__ vstr(src, g.ToMemOperand(destination));
}
} else {
DCHECK(destination->IsFPStackSlot());
__ vstr(src, g.ToMemOperand(destination));
DCHECK_EQ(MachineRepresentation::kFloat32, rep);
// GapResolver may give us reg codes that don't map to actual s-registers.
// Generate code to work around those cases.
int src_code = LocationOperand::cast(source)->register_code();
if (destination->IsFloatRegister()) {
int dst_code = LocationOperand::cast(destination)->register_code();
__ VmovExtended(dst_code, src_code, kScratchReg);
} else {
DCHECK(destination->IsFloatStackSlot());
__ VmovExtended(g.ToMemOperand(destination), src_code, kScratchReg);
}
}
} else if (source->IsFPStackSlot()) {
MemOperand src = g.ToMemOperand(source);
MachineRepresentation rep =
LocationOperand::cast(destination)->representation();
if (destination->IsFPRegister()) {
if (rep == MachineRepresentation::kFloat64) {
__ vldr(g.ToDoubleRegister(destination), src);
} else {
DCHECK_EQ(MachineRepresentation::kFloat32, rep);
// GapResolver may give us reg codes that don't map to actual
// s-registers. Generate code to work around those cases.
int dst_code = LocationOperand::cast(destination)->register_code();
__ VmovExtended(dst_code, src, kScratchReg);
}
} else {
DCHECK(destination->IsFPStackSlot());
if (rep == MachineRepresentation::kFloat64) {
DwVfpRegister temp = kScratchDoubleReg;
__ vldr(temp, src);
__ vstr(temp, g.ToMemOperand(destination));
} else {
DCHECK_EQ(MachineRepresentation::kFloat32, rep);
SwVfpRegister temp = kScratchDoubleReg.low();
__ vldr(temp, src);
__ vstr(temp, g.ToMemOperand(destination));
}
}
} else {
UNREACHABLE();
@ -1933,17 +1952,35 @@ void CodeGenerator::AssembleSwap(InstructionOperand* source,
__ str(temp_0, dst);
__ vstr(temp_1, src);
} else if (source->IsFPRegister()) {
MachineRepresentation rep = LocationOperand::cast(source)->representation();
LowDwVfpRegister temp = kScratchDoubleReg;
DwVfpRegister src = g.ToDoubleRegister(source);
if (destination->IsFPRegister()) {
DwVfpRegister dst = g.ToDoubleRegister(destination);
__ vswp(src, dst);
if (rep == MachineRepresentation::kFloat64) {
DwVfpRegister src = g.ToDoubleRegister(source);
if (destination->IsFPRegister()) {
DwVfpRegister dst = g.ToDoubleRegister(destination);
__ vswp(src, dst);
} else {
DCHECK(destination->IsFPStackSlot());
MemOperand dst = g.ToMemOperand(destination);
__ Move(temp, src);
__ vldr(src, dst);
__ vstr(temp, dst);
}
} else {
DCHECK(destination->IsFPStackSlot());
MemOperand dst = g.ToMemOperand(destination);
__ Move(temp, src);
__ vldr(src, dst);
__ vstr(temp, dst);
DCHECK_EQ(MachineRepresentation::kFloat32, rep);
int src_code = LocationOperand::cast(source)->register_code();
if (destination->IsFPRegister()) {
int dst_code = LocationOperand::cast(destination)->register_code();
__ VmovExtended(temp.low().code(), src_code, kScratchReg);
__ VmovExtended(src_code, dst_code, kScratchReg);
__ VmovExtended(dst_code, temp.low().code(), kScratchReg);
} else {
DCHECK(destination->IsFPStackSlot());
MemOperand dst = g.ToMemOperand(destination);
__ VmovExtended(temp.low().code(), src_code, kScratchReg);
__ VmovExtended(src_code, dst, kScratchReg);
__ vstr(temp.low(), dst);
}
}
} else if (source->IsFPStackSlot()) {
DCHECK(destination->IsFPStackSlot());
@ -1951,21 +1988,29 @@ void CodeGenerator::AssembleSwap(InstructionOperand* source,
LowDwVfpRegister temp_1 = kScratchDoubleReg;
MemOperand src0 = g.ToMemOperand(source);
MemOperand dst0 = g.ToMemOperand(destination);
MemOperand src1(src0.rn(), src0.offset() + kPointerSize);
MemOperand dst1(dst0.rn(), dst0.offset() + kPointerSize);
__ vldr(temp_1, dst0); // Save destination in temp_1.
__ ldr(temp_0, src0); // Then use temp_0 to copy source to destination.
__ str(temp_0, dst0);
__ ldr(temp_0, src1);
__ str(temp_0, dst1);
__ vstr(temp_1, src0);
MachineRepresentation rep = LocationOperand::cast(source)->representation();
if (rep == MachineRepresentation::kFloat64) {
MemOperand src1(src0.rn(), src0.offset() + kPointerSize);
MemOperand dst1(dst0.rn(), dst0.offset() + kPointerSize);
__ vldr(temp_1, dst0); // Save destination in temp_1.
__ ldr(temp_0, src0); // Then use temp_0 to copy source to destination.
__ str(temp_0, dst0);
__ ldr(temp_0, src1);
__ str(temp_0, dst1);
__ vstr(temp_1, src0);
} else {
DCHECK_EQ(MachineRepresentation::kFloat32, rep);
__ vldr(temp_1.low(), dst0); // Save destination in temp_1.
__ ldr(temp_0, src0); // Then use temp_0 to copy source to destination.
__ str(temp_0, dst0);
__ vstr(temp_1.low(), src0);
}
} else {
// No other combinations are possible.
UNREACHABLE();
}
}
void CodeGenerator::AssembleJumpTable(Label** targets, size_t target_count) {
// On 32-bit ARM we emit the jump tables inline.
UNREACHABLE();

View File

@ -14,27 +14,124 @@ namespace compiler {
namespace {
#define REP_BIT(rep) (1 << static_cast<int>(rep))
const int kFloat32Bit = REP_BIT(MachineRepresentation::kFloat32);
const int kFloat64Bit = REP_BIT(MachineRepresentation::kFloat64);
inline bool Blocks(MoveOperands* move, InstructionOperand destination) {
return move->Blocks(destination);
return !move->IsEliminated() && move->source().InterferesWith(destination);
}
// Splits a FP move between two location operands into the equivalent series of
// moves between smaller sub-operands, e.g. a double move to two single moves.
// This helps reduce the number of cycles that would normally occur under FP
// aliasing, and makes swaps much easier to implement.
MoveOperands* Split(MoveOperands* move, MachineRepresentation smaller_rep,
ParallelMove* moves) {
DCHECK(!kSimpleFPAliasing);
// Splitting is only possible when the slot size is the same as float size.
DCHECK_EQ(kPointerSize, kFloatSize);
const LocationOperand& src_loc = LocationOperand::cast(move->source());
const LocationOperand& dst_loc = LocationOperand::cast(move->destination());
MachineRepresentation dst_rep = dst_loc.representation();
DCHECK_NE(smaller_rep, dst_rep);
auto src_kind = src_loc.location_kind();
auto dst_kind = dst_loc.location_kind();
inline bool IsRedundant(MoveOperands* move) { return move->IsRedundant(); }
int aliases =
1 << (ElementSizeLog2Of(dst_rep) - ElementSizeLog2Of(smaller_rep));
int base = -1;
USE(base);
DCHECK_EQ(aliases, RegisterConfiguration::Turbofan()->GetAliases(
dst_rep, 0, smaller_rep, &base));
int src_index = -1;
int slot_size = (1 << ElementSizeLog2Of(smaller_rep)) / kPointerSize;
int src_step = 1;
if (src_kind == LocationOperand::REGISTER) {
src_index = src_loc.register_code() * aliases;
} else {
src_index = src_loc.index();
// For operands that occuply multiple slots, the index refers to the last
// slot. On little-endian architectures, we start at the high slot and use a
// negative step so that register-to-slot moves are in the correct order.
src_step = -slot_size;
}
int dst_index = -1;
int dst_step = 1;
if (dst_kind == LocationOperand::REGISTER) {
dst_index = dst_loc.register_code() * aliases;
} else {
dst_index = dst_loc.index();
dst_step = -slot_size;
}
// Reuse 'move' for the first fragment. It is not pending.
move->set_source(AllocatedOperand(src_kind, smaller_rep, src_index));
move->set_destination(AllocatedOperand(dst_kind, smaller_rep, dst_index));
// Add the remaining fragment moves.
for (int i = 1; i < aliases; ++i) {
src_index += src_step;
dst_index += dst_step;
moves->AddMove(AllocatedOperand(src_kind, smaller_rep, src_index),
AllocatedOperand(dst_kind, smaller_rep, dst_index));
}
// Return the first fragment.
return move;
}
} // namespace
void GapResolver::Resolve(ParallelMove* moves) {
// Clear redundant moves, and collect FP move representations if aliasing
// is non-simple.
int reps = 0;
for (size_t i = 0; i < moves->size();) {
MoveOperands* move = (*moves)[i];
if (move->IsRedundant()) {
(*moves)[i] = moves->back();
moves->pop_back();
continue;
}
i++;
if (!kSimpleFPAliasing && move->destination().IsFPRegister()) {
reps |=
REP_BIT(LocationOperand::cast(move->destination()).representation());
}
}
void GapResolver::Resolve(ParallelMove* moves) const {
// Clear redundant moves.
auto it =
std::remove_if(moves->begin(), moves->end(), std::ptr_fun(IsRedundant));
moves->erase(it, moves->end());
for (MoveOperands* move : *moves) {
if (!kSimpleFPAliasing) {
if (reps && !base::bits::IsPowerOfTwo32(reps)) {
// Start with the smallest FP moves, so we never encounter smaller moves
// in the middle of a cycle of larger moves.
if ((reps & kFloat32Bit) != 0) {
split_rep_ = MachineRepresentation::kFloat32;
for (size_t i = 0; i < moves->size(); ++i) {
auto move = (*moves)[i];
if (!move->IsEliminated() && move->destination().IsFloatRegister())
PerformMove(moves, move);
}
}
if ((reps & kFloat64Bit) != 0) {
split_rep_ = MachineRepresentation::kFloat64;
for (size_t i = 0; i < moves->size(); ++i) {
auto move = (*moves)[i];
if (!move->IsEliminated() && move->destination().IsDoubleRegister())
PerformMove(moves, move);
}
}
}
split_rep_ = MachineRepresentation::kSimd128;
}
for (size_t i = 0; i < moves->size(); ++i) {
auto move = (*moves)[i];
if (!move->IsEliminated()) PerformMove(moves, move);
}
}
void GapResolver::PerformMove(ParallelMove* moves, MoveOperands* move) const {
void GapResolver::PerformMove(ParallelMove* moves, MoveOperands* move) {
// Each call to this function performs a move and deletes it from the move
// graph. We first recursively perform any move blocking this one. We mark a
// move as "pending" on entry to PerformMove in order to detect cycles in the
@ -45,15 +142,32 @@ void GapResolver::PerformMove(ParallelMove* moves, MoveOperands* move) const {
// Clear this move's destination to indicate a pending move. The actual
// destination is saved on the side.
DCHECK(!move->source().IsInvalid()); // Or else it will look eliminated.
InstructionOperand source = move->source();
DCHECK(!source.IsInvalid()); // Or else it will look eliminated.
InstructionOperand destination = move->destination();
move->SetPending();
// We may need to split moves between FP locations differently.
bool is_fp_loc_move = !kSimpleFPAliasing && destination.IsFPLocationOperand();
// Perform a depth-first traversal of the move graph to resolve dependencies.
// Any unperformed, unpending move with a source the same as this one's
// destination blocks this one so recursively perform all such moves.
for (MoveOperands* other : *moves) {
if (other->Blocks(destination) && !other->IsPending()) {
for (size_t i = 0; i < moves->size(); ++i) {
auto other = (*moves)[i];
if (other->IsEliminated()) continue;
if (other->IsPending()) continue;
if (other->source().InterferesWith(destination)) {
if (!kSimpleFPAliasing && is_fp_loc_move &&
LocationOperand::cast(other->source()).representation() >
split_rep_) {
// 'other' must also be an FP location move. Break it into fragments
// of the same size as 'move'. 'other' is set to one of the fragments,
// and the rest are appended to 'moves'.
other = Split(other, split_rep_, moves);
// 'other' may not block destination now.
if (!other->source().InterferesWith(destination)) continue;
}
// Though PerformMove can change any source operand in the move graph,
// this call cannot create a blocking move via a swap (this loop does not
// miss any). Assume there is a non-blocking move with source A and this
@ -67,18 +181,18 @@ void GapResolver::PerformMove(ParallelMove* moves, MoveOperands* move) const {
}
}
// We are about to resolve this move and don't need it marked as pending, so
// restore its destination.
move->set_destination(destination);
// This move's source may have changed due to swaps to resolve cycles and so
// it may now be the last move in the cycle. If so remove it.
InstructionOperand source = move->source();
if (source.InterferesWith(destination)) {
source = move->source();
if (source.EqualsCanonicalized(destination)) {
move->Eliminate();
return;
}
// We are about to resolve this move and don't need it marked as pending, so
// restore its destination.
move->set_destination(destination);
// The move may be blocked on a (at most one) pending move, in which case we
// have a cycle. Search for such a blocking move and perform a swap to
// resolve it.
@ -91,7 +205,6 @@ void GapResolver::PerformMove(ParallelMove* moves, MoveOperands* move) const {
return;
}
DCHECK((*blocker)->IsPending());
// Ensure source is a register or both are stack slots, to limit swap cases.
if (source.IsStackSlot() || source.IsFPStackSlot()) {
std::swap(source, destination);
@ -99,14 +212,36 @@ void GapResolver::PerformMove(ParallelMove* moves, MoveOperands* move) const {
assembler_->AssembleSwap(&source, &destination);
move->Eliminate();
// Any unperformed (including pending) move with a source of either this
// move's source or destination needs to have their source changed to
// reflect the state of affairs after the swap.
for (MoveOperands* other : *moves) {
if (other->Blocks(source)) {
other->set_source(destination);
} else if (other->Blocks(destination)) {
other->set_source(source);
// Update outstanding moves whose source may now have been moved.
if (!kSimpleFPAliasing && is_fp_loc_move) {
// We may have to split larger moves.
for (size_t i = 0; i < moves->size(); ++i) {
auto other = (*moves)[i];
if (other->IsEliminated()) continue;
if (source.InterferesWith(other->source())) {
if (LocationOperand::cast(other->source()).representation() >
split_rep_) {
other = Split(other, split_rep_, moves);
if (!source.InterferesWith(other->source())) continue;
}
other->set_source(destination);
} else if (destination.InterferesWith(other->source())) {
if (LocationOperand::cast(other->source()).representation() >
split_rep_) {
other = Split(other, split_rep_, moves);
if (!destination.InterferesWith(other->source())) continue;
}
other->set_source(source);
}
}
} else {
for (auto other : *moves) {
if (other->IsEliminated()) continue;
if (source.EqualsCanonicalized(other->source())) {
other->set_source(destination);
} else if (destination.EqualsCanonicalized(other->source())) {
other->set_source(source);
}
}
}
}

View File

@ -26,18 +26,24 @@ class GapResolver final {
InstructionOperand* destination) = 0;
};
explicit GapResolver(Assembler* assembler) : assembler_(assembler) {}
explicit GapResolver(Assembler* assembler)
: assembler_(assembler), split_rep_(MachineRepresentation::kSimd128) {}
// Resolve a set of parallel moves, emitting assembler instructions.
void Resolve(ParallelMove* parallel_move) const;
void Resolve(ParallelMove* parallel_move);
private:
// Perform the given move, possibly requiring other moves to satisfy
// dependencies.
void PerformMove(ParallelMove* moves, MoveOperands* move) const;
// Performs the given move, possibly performing other moves to unblock the
// destination operand.
void PerformMove(ParallelMove* moves, MoveOperands* move);
// Assembler used to emit moves and save registers.
Assembler* const assembler_;
// While resolving moves, the largest FP representation that can be moved.
// Any larger moves must be split into an equivalent series of moves of this
// representation.
MachineRepresentation split_rep_;
};
} // namespace compiler

View File

@ -64,8 +64,35 @@ FlagsCondition CommuteFlagsCondition(FlagsCondition condition) {
return condition;
}
bool InstructionOperand::InterferesWith(const InstructionOperand& that) const {
return EqualsCanonicalized(that);
bool InstructionOperand::InterferesWith(const InstructionOperand& other) const {
if (kSimpleFPAliasing || !this->IsFPLocationOperand() ||
!other.IsFPLocationOperand())
return EqualsCanonicalized(other);
// Aliasing is complex and both operands are fp locations.
const LocationOperand& loc = *LocationOperand::cast(this);
const LocationOperand& other_loc = LocationOperand::cast(other);
LocationOperand::LocationKind kind = loc.location_kind();
LocationOperand::LocationKind other_kind = other_loc.location_kind();
if (kind != other_kind) return false;
MachineRepresentation rep = loc.representation();
MachineRepresentation other_rep = other_loc.representation();
if (rep == other_rep) return EqualsCanonicalized(other);
if (kind == LocationOperand::REGISTER) {
// FP register-register interference.
return GetRegConfig()->AreAliases(rep, loc.register_code(), other_rep,
other_loc.register_code());
} else {
// FP slot-slot interference. Slots of different FP reps can alias because
// the gap resolver may break a move into 2 or 4 equivalent smaller moves.
DCHECK_EQ(LocationOperand::STACK_SLOT, kind);
int index_hi = loc.index();
int index_lo = index_hi - (1 << ElementSizeLog2Of(rep)) / kPointerSize + 1;
int other_index_hi = other_loc.index();
int other_index_lo =
other_index_hi - (1 << ElementSizeLog2Of(other_rep)) / kPointerSize + 1;
return other_index_hi >= index_lo && index_hi >= other_index_lo;
}
return false;
}
void InstructionOperand::Print(const RegisterConfiguration* config) const {
@ -232,28 +259,31 @@ bool ParallelMove::IsRedundant() const {
return true;
}
MoveOperands* ParallelMove::PrepareInsertAfter(MoveOperands* move) const {
void ParallelMove::PrepareInsertAfter(
MoveOperands* move, ZoneVector<MoveOperands*>* to_eliminate) const {
bool no_aliasing =
kSimpleFPAliasing || !move->destination().IsFPLocationOperand();
MoveOperands* replacement = nullptr;
MoveOperands* to_eliminate = nullptr;
MoveOperands* eliminated = nullptr;
for (MoveOperands* curr : *this) {
if (curr->IsEliminated()) continue;
if (curr->destination().EqualsCanonicalized(move->source())) {
// We must replace move's source with curr's destination in order to
// insert it into this ParallelMove.
DCHECK(!replacement);
replacement = curr;
if (to_eliminate != nullptr) break;
} else if (curr->destination().EqualsCanonicalized(move->destination())) {
DCHECK(!to_eliminate);
to_eliminate = curr;
if (replacement != nullptr) break;
if (no_aliasing && eliminated != nullptr) break;
} else if (curr->destination().InterferesWith(move->destination())) {
// We can eliminate curr, since move overwrites at least a part of its
// destination, implying its value is no longer live.
eliminated = curr;
to_eliminate->push_back(curr);
if (no_aliasing && replacement != nullptr) break;
}
}
DCHECK_IMPLIES(replacement == to_eliminate, replacement == nullptr);
if (replacement != nullptr) move->set_source(replacement->source());
return to_eliminate;
}
ExplicitOperand::ExplicitOperand(LocationKind kind, MachineRepresentation rep,
int index)
: LocationOperand(EXPLICIT, kind, rep, index) {

View File

@ -28,8 +28,7 @@ namespace compiler {
// Forward declarations.
class Schedule;
class InstructionOperand {
class V8_EXPORT_PRIVATE InstructionOperand {
public:
static const int kInvalidVirtualRegister = -1;
@ -119,7 +118,7 @@ class InstructionOperand {
return this->GetCanonicalizedValue() < that.GetCanonicalizedValue();
}
bool InterferesWith(const InstructionOperand& that) const;
bool InterferesWith(const InstructionOperand& other) const;
// APIs to aid debugging. For general-stream APIs, use operator<<
void Print(const RegisterConfiguration* config) const;
@ -641,8 +640,14 @@ uint64_t InstructionOperand::GetCanonicalizedValue() const {
if (IsAnyLocationOperand()) {
MachineRepresentation canonical = MachineRepresentation::kNone;
if (IsFPRegister()) {
// We treat all FP register operands the same for simple aliasing.
canonical = MachineRepresentation::kFloat64;
if (kSimpleFPAliasing) {
// We treat all FP register operands the same for simple aliasing.
canonical = MachineRepresentation::kFloat64;
} else {
// We need to distinguish FP register operands of different reps when
// aliasing is not simple (e.g. ARM).
canonical = LocationOperand::cast(this)->representation();
}
}
return InstructionOperand::KindField::update(
LocationOperand::RepresentationField::update(this->value_, canonical),
@ -659,8 +664,8 @@ struct CompareOperandModuloType {
}
};
class MoveOperands final : public ZoneObject {
class V8_EXPORT_PRIVATE MoveOperands final
: public NON_EXPORTED_BASE(ZoneObject) {
public:
MoveOperands(const InstructionOperand& source,
const InstructionOperand& destination)
@ -685,11 +690,6 @@ class MoveOperands final : public ZoneObject {
}
void SetPending() { destination_ = InstructionOperand(); }
// True if this move is a move into the given destination operand.
bool Blocks(const InstructionOperand& destination) const {
return !IsEliminated() && source().InterferesWith(destination);
}
// A move is redundant if it's been eliminated or if its source and
// destination are the same.
bool IsRedundant() const {
@ -724,8 +724,9 @@ struct PrintableMoveOperands {
std::ostream& operator<<(std::ostream& os, const PrintableMoveOperands& mo);
class ParallelMove final : public ZoneVector<MoveOperands*>, public ZoneObject {
class V8_EXPORT_PRIVATE ParallelMove final
: public NON_EXPORTED_BASE(ZoneVector<MoveOperands *>),
public NON_EXPORTED_BASE(ZoneObject) {
public:
explicit ParallelMove(Zone* zone) : ZoneVector<MoveOperands*>(zone) {
reserve(4);
@ -748,9 +749,10 @@ class ParallelMove final : public ZoneVector<MoveOperands*>, public ZoneObject {
bool IsRedundant() const;
// Prepare this ParallelMove to insert move as if it happened in a subsequent
// ParallelMove. move->source() may be changed. The MoveOperand returned
// must be Eliminated.
MoveOperands* PrepareInsertAfter(MoveOperands* move) const;
// ParallelMove. move->source() may be changed. Any MoveOperands added to
// to_eliminate must be Eliminated.
void PrepareInsertAfter(MoveOperands* move,
ZoneVector<MoveOperands*>* to_eliminate) const;
private:
DISALLOW_COPY_AND_ASSIGN(ParallelMove);

View File

@ -25,11 +25,79 @@ struct MoveKeyCompare {
};
typedef ZoneMap<MoveKey, unsigned, MoveKeyCompare> MoveMap;
typedef ZoneSet<InstructionOperand, CompareOperandModuloType> OperandSet;
bool Blocks(const OperandSet& set, const InstructionOperand& operand) {
return set.find(operand) != set.end();
}
class OperandSet {
public:
explicit OperandSet(Zone* zone) : set_(zone), fp_reps_(0) {}
void InsertOp(const InstructionOperand& op) {
set_.insert(op);
if (!kSimpleFPAliasing && op.IsFPRegister())
fp_reps_ |= RepBit(LocationOperand::cast(op).representation());
}
bool ContainsOpOrAlias(const InstructionOperand& op) const {
if (set_.find(op) != set_.end()) return true;
if (!kSimpleFPAliasing && op.IsFPRegister()) {
// Platforms where FP registers have complex aliasing need extra checks.
const LocationOperand& loc = LocationOperand::cast(op);
MachineRepresentation rep = loc.representation();
// If haven't encountered mixed rep FP registers, skip the extra checks.
if (!HasMixedFPReps(fp_reps_ | RepBit(rep))) return false;
// Check register against aliasing registers of other FP representations.
MachineRepresentation other_rep1, other_rep2;
switch (rep) {
case MachineRepresentation::kFloat32:
other_rep1 = MachineRepresentation::kFloat64;
other_rep2 = MachineRepresentation::kSimd128;
break;
case MachineRepresentation::kFloat64:
other_rep1 = MachineRepresentation::kFloat32;
other_rep2 = MachineRepresentation::kSimd128;
break;
case MachineRepresentation::kSimd128:
other_rep1 = MachineRepresentation::kFloat32;
other_rep2 = MachineRepresentation::kFloat64;
break;
default:
UNREACHABLE();
break;
}
const RegisterConfiguration* config = RegisterConfiguration::Turbofan();
int base = -1;
int aliases =
config->GetAliases(rep, loc.register_code(), other_rep1, &base);
DCHECK(aliases > 0 || (aliases == 0 && base == -1));
while (aliases--) {
if (set_.find(AllocatedOperand(LocationOperand::REGISTER, other_rep1,
base + aliases)) != set_.end())
return true;
}
aliases = config->GetAliases(rep, loc.register_code(), other_rep2, &base);
DCHECK(aliases > 0 || (aliases == 0 && base == -1));
while (aliases--) {
if (set_.find(AllocatedOperand(LocationOperand::REGISTER, other_rep2,
base + aliases)) != set_.end())
return true;
}
}
return false;
}
private:
static int RepBit(MachineRepresentation rep) {
return 1 << static_cast<int>(rep);
}
static bool HasMixedFPReps(int reps) {
return reps && !base::bits::IsPowerOfTwo32(reps);
}
ZoneSet<InstructionOperand, CompareOperandModuloType> set_;
int fp_reps_;
};
int FindFirstNonEmptySlot(const Instruction* instr) {
int i = Instruction::FIRST_GAP_POSITION;
@ -98,21 +166,21 @@ void MoveOptimizer::RemoveClobberedDestinations(Instruction* instruction) {
// Outputs and temps are treated together as potentially clobbering a
// destination operand.
for (size_t i = 0; i < instruction->OutputCount(); ++i) {
outputs.insert(*instruction->OutputAt(i));
outputs.InsertOp(*instruction->OutputAt(i));
}
for (size_t i = 0; i < instruction->TempCount(); ++i) {
outputs.insert(*instruction->TempAt(i));
outputs.InsertOp(*instruction->TempAt(i));
}
// Input operands block elisions.
for (size_t i = 0; i < instruction->InputCount(); ++i) {
inputs.insert(*instruction->InputAt(i));
inputs.InsertOp(*instruction->InputAt(i));
}
// Elide moves made redundant by the instruction.
for (MoveOperands* move : *moves) {
if (outputs.find(move->destination()) != outputs.end() &&
inputs.find(move->destination()) == inputs.end()) {
if (outputs.ContainsOpOrAlias(move->destination()) &&
!inputs.ContainsOpOrAlias(move->destination())) {
move->Eliminate();
}
}
@ -121,7 +189,7 @@ void MoveOptimizer::RemoveClobberedDestinations(Instruction* instruction) {
// the one for its input.
if (instruction->IsRet() || instruction->IsTailCall()) {
for (MoveOperands* move : *moves) {
if (inputs.find(move->destination()) == inputs.end()) {
if (!inputs.ContainsOpOrAlias(move->destination())) {
move->Eliminate();
}
}
@ -140,7 +208,7 @@ void MoveOptimizer::MigrateMoves(Instruction* to, Instruction* from) {
// If an operand is an input to the instruction, we cannot move assignments
// where it appears on the LHS.
for (size_t i = 0; i < from->InputCount(); ++i) {
dst_cant_be.insert(*from->InputAt(i));
dst_cant_be.InsertOp(*from->InputAt(i));
}
// If an operand is output to the instruction, we cannot move assignments
// where it appears on the RHS, because we would lose its value before the
@ -149,10 +217,10 @@ void MoveOptimizer::MigrateMoves(Instruction* to, Instruction* from) {
// The output can't appear on the LHS because we performed
// RemoveClobberedDestinations for the "from" instruction.
for (size_t i = 0; i < from->OutputCount(); ++i) {
src_cant_be.insert(*from->OutputAt(i));
src_cant_be.InsertOp(*from->OutputAt(i));
}
for (size_t i = 0; i < from->TempCount(); ++i) {
src_cant_be.insert(*from->TempAt(i));
src_cant_be.InsertOp(*from->TempAt(i));
}
for (MoveOperands* move : *from_moves) {
if (move->IsRedundant()) continue;
@ -160,7 +228,7 @@ void MoveOptimizer::MigrateMoves(Instruction* to, Instruction* from) {
// move "z = dest", because z would become y rather than "V".
// We assume CompressMoves has happened before this, which means we don't
// have more than one assignment to dest.
src_cant_be.insert(move->destination());
src_cant_be.InsertOp(move->destination());
}
ZoneSet<MoveKey, MoveKeyCompare> move_candidates(local_zone());
@ -168,7 +236,7 @@ void MoveOptimizer::MigrateMoves(Instruction* to, Instruction* from) {
// destination operands are eligible for being moved down.
for (MoveOperands* move : *from_moves) {
if (move->IsRedundant()) continue;
if (!Blocks(dst_cant_be, move->destination())) {
if (!dst_cant_be.ContainsOpOrAlias(move->destination())) {
MoveKey key = {move->source(), move->destination()};
move_candidates.insert(key);
}
@ -183,8 +251,8 @@ void MoveOptimizer::MigrateMoves(Instruction* to, Instruction* from) {
auto current = iter;
++iter;
InstructionOperand src = current->source;
if (Blocks(src_cant_be, src)) {
src_cant_be.insert(current->destination);
if (src_cant_be.ContainsOpOrAlias(src)) {
src_cant_be.InsertOp(current->destination);
move_candidates.erase(current);
changed = true;
}
@ -223,8 +291,7 @@ void MoveOptimizer::CompressMoves(ParallelMove* left, MoveOpVector* right) {
// merging the two gaps.
for (MoveOperands* move : *right) {
if (move->IsRedundant()) continue;
MoveOperands* to_eliminate = left->PrepareInsertAfter(move);
if (to_eliminate != nullptr) eliminated.push_back(to_eliminate);
left->PrepareInsertAfter(move, &eliminated);
}
// Eliminate dead moves.
for (MoveOperands* to_eliminate : eliminated) {
@ -360,7 +427,7 @@ void MoveOptimizer::OptimizeMerge(InstructionBlock* block) {
// there are such moves, we could move them, but the destination of the
// moves staying behind can't appear as a source of a common move,
// because the move staying behind will clobber this destination.
conflicting_srcs.insert(dest);
conflicting_srcs.InsertOp(dest);
move_map.erase(current);
}
}
@ -374,9 +441,8 @@ void MoveOptimizer::OptimizeMerge(InstructionBlock* block) {
auto current = iter;
++iter;
DCHECK_EQ(block->PredecessorCount(), current->second);
if (conflicting_srcs.find(current->first.source) !=
conflicting_srcs.end()) {
conflicting_srcs.insert(current->first.destination);
if (conflicting_srcs.ContainsOpOrAlias(current->first.source)) {
conflicting_srcs.InsertOp(current->first.destination);
move_map.erase(current);
changed = true;
}

View File

@ -33,7 +33,7 @@ int GetRegisterCount(const RegisterConfiguration* cfg, RegisterKind kind) {
int GetAllocatableRegisterCount(const RegisterConfiguration* cfg,
RegisterKind kind) {
return kind == FP_REGISTERS ? cfg->num_allocatable_aliased_double_registers()
return kind == FP_REGISTERS ? cfg->num_allocatable_double_registers()
: cfg->num_allocatable_general_registers();
}
@ -74,14 +74,8 @@ int GetByteWidth(MachineRepresentation rep) {
case MachineRepresentation::kTaggedSigned:
case MachineRepresentation::kTaggedPointer:
case MachineRepresentation::kTagged:
return kPointerSize;
case MachineRepresentation::kFloat32:
// TODO(bbudge) Eliminate this when FP register aliasing works.
#if V8_TARGET_ARCH_ARM
return kDoubleSize;
#else
return kPointerSize;
#endif
case MachineRepresentation::kWord64:
case MachineRepresentation::kFloat64:
return kDoubleSize;
@ -498,6 +492,12 @@ UsePosition* LiveRange::NextUsePositionRegisterIsBeneficial(
return pos;
}
LifetimePosition LiveRange::NextLifetimePositionRegisterIsBeneficial(
const LifetimePosition& start) const {
UsePosition* next_use = NextUsePositionRegisterIsBeneficial(start);
if (next_use == nullptr) return End();
return next_use->pos();
}
UsePosition* LiveRange::PreviousUsePositionRegisterIsBeneficial(
LifetimePosition start) const {
@ -1360,8 +1360,12 @@ RegisterAllocationData::RegisterAllocationData(
allocation_zone()),
fixed_live_ranges_(this->config()->num_general_registers(), nullptr,
allocation_zone()),
fixed_float_live_ranges_(this->config()->num_float_registers(), nullptr,
allocation_zone()),
fixed_double_live_ranges_(this->config()->num_double_registers(), nullptr,
allocation_zone()),
fixed_simd128_live_ranges_(this->config()->num_simd128_registers(),
nullptr, allocation_zone()),
spill_ranges_(code->VirtualRegisterCount(), nullptr, allocation_zone()),
delayed_references_(allocation_zone()),
assigned_registers_(nullptr),
@ -1539,8 +1543,21 @@ void RegisterAllocationData::MarkAllocated(MachineRepresentation rep,
int index) {
switch (rep) {
case MachineRepresentation::kFloat32:
case MachineRepresentation::kFloat64:
case MachineRepresentation::kSimd128:
if (kSimpleFPAliasing) {
assigned_double_registers_->Add(index);
} else {
int alias_base_index = -1;
int aliases = config()->GetAliases(
rep, index, MachineRepresentation::kFloat64, &alias_base_index);
DCHECK(aliases > 0 || (aliases == 0 && alias_base_index == -1));
while (aliases--) {
int aliased_reg = alias_base_index + aliases;
assigned_double_registers_->Add(aliased_reg);
}
}
break;
case MachineRepresentation::kFloat64:
assigned_double_registers_->Add(index);
break;
default:
@ -1867,7 +1884,11 @@ int LiveRangeBuilder::FixedFPLiveRangeID(int index, MachineRepresentation rep) {
int result = -index - 1;
switch (rep) {
case MachineRepresentation::kSimd128:
result -= config()->num_float_registers();
// Fall through.
case MachineRepresentation::kFloat32:
result -= config()->num_double_registers();
// Fall through.
case MachineRepresentation::kFloat64:
result -= config()->num_general_registers();
break;
@ -1894,25 +1915,35 @@ TopLevelLiveRange* LiveRangeBuilder::FixedLiveRangeFor(int index) {
TopLevelLiveRange* LiveRangeBuilder::FixedFPLiveRangeFor(
int index, MachineRepresentation rep) {
TopLevelLiveRange* result = nullptr;
int num_regs = -1;
ZoneVector<TopLevelLiveRange*>* live_ranges = nullptr;
switch (rep) {
case MachineRepresentation::kFloat32:
num_regs = config()->num_float_registers();
live_ranges = &data()->fixed_float_live_ranges();
break;
case MachineRepresentation::kFloat64:
num_regs = config()->num_double_registers();
live_ranges = &data()->fixed_double_live_ranges();
break;
case MachineRepresentation::kSimd128:
DCHECK(index < config()->num_double_registers());
result = data()->fixed_double_live_ranges()[index];
if (result == nullptr) {
result = data()->NewLiveRange(FixedFPLiveRangeID(index, rep), rep);
DCHECK(result->IsFixed());
result->set_assigned_register(index);
data()->MarkAllocated(rep, index);
data()->fixed_double_live_ranges()[index] = result;
}
num_regs = config()->num_simd128_registers();
live_ranges = &data()->fixed_simd128_live_ranges();
break;
default:
UNREACHABLE();
break;
}
DCHECK(index < num_regs);
TopLevelLiveRange* result = (*live_ranges)[index];
if (result == nullptr) {
result = data()->NewLiveRange(FixedFPLiveRangeID(index, rep), rep);
DCHECK(result->IsFixed());
result->set_assigned_register(index);
data()->MarkAllocated(rep, index);
(*live_ranges)[index] = result;
}
return result;
}
@ -2035,8 +2066,7 @@ void LiveRangeBuilder::ProcessInstructions(const InstructionBlock* block,
}
if (instr->ClobbersDoubleRegisters()) {
for (int i = 0; i < config()->num_allocatable_aliased_double_registers();
++i) {
for (int i = 0; i < config()->num_allocatable_double_registers(); ++i) {
// Add a UseInterval for all DoubleRegisters. See comment above for
// general registers.
int code = config()->GetAllocatableDoubleCode(i);
@ -2045,6 +2075,26 @@ void LiveRangeBuilder::ProcessInstructions(const InstructionBlock* block,
range->AddUseInterval(curr_position, curr_position.End(),
allocation_zone());
}
// Clobber fixed float registers on archs with non-simple aliasing.
if (!kSimpleFPAliasing) {
for (int i = 0; i < config()->num_allocatable_float_registers(); ++i) {
// Add a UseInterval for all FloatRegisters. See comment above for
// general registers.
int code = config()->GetAllocatableFloatCode(i);
TopLevelLiveRange* range =
FixedFPLiveRangeFor(code, MachineRepresentation::kFloat32);
range->AddUseInterval(curr_position, curr_position.End(),
allocation_zone());
}
for (int i = 0; i < config()->num_allocatable_simd128_registers();
++i) {
int code = config()->GetAllocatableSimd128Code(i);
TopLevelLiveRange* range =
FixedFPLiveRangeFor(code, MachineRepresentation::kSimd128);
range->AddUseInterval(curr_position, curr_position.End(),
allocation_zone());
}
}
}
for (size_t i = 0; i < instr->InputCount(); i++) {
@ -2690,9 +2740,15 @@ void LinearScanAllocator::AllocateRegisters() {
if (current != nullptr) AddToInactive(current);
}
} else {
for (TopLevelLiveRange* current : data()->fixed_float_live_ranges()) {
if (current != nullptr) AddToInactive(current);
}
for (TopLevelLiveRange* current : data()->fixed_double_live_ranges()) {
if (current != nullptr) AddToInactive(current);
}
for (TopLevelLiveRange* current : data()->fixed_simd128_live_ranges()) {
if (current != nullptr) AddToInactive(current);
}
}
while (!unhandled_live_ranges().empty()) {
@ -2873,9 +2929,32 @@ void LinearScanAllocator::InactiveToActive(LiveRange* range) {
range->TopLevel()->vreg(), range->relative_id());
}
void LinearScanAllocator::GetFPRegisterSet(MachineRepresentation rep,
int* num_regs, int* num_codes,
const int** codes) const {
DCHECK(!kSimpleFPAliasing);
if (rep == MachineRepresentation::kFloat32) {
*num_regs = data()->config()->num_float_registers();
*num_codes = data()->config()->num_allocatable_float_registers();
*codes = data()->config()->allocatable_float_codes();
} else if (rep == MachineRepresentation::kSimd128) {
*num_regs = data()->config()->num_simd128_registers();
*num_codes = data()->config()->num_allocatable_simd128_registers();
*codes = data()->config()->allocatable_simd128_codes();
} else {
UNREACHABLE();
}
}
void LinearScanAllocator::FindFreeRegistersForRange(
LiveRange* range, Vector<LifetimePosition> positions) {
int num_regs = num_registers();
int num_codes = num_allocatable_registers();
const int* codes = allocatable_register_codes();
MachineRepresentation rep = range->representation();
if (!kSimpleFPAliasing && (rep == MachineRepresentation::kFloat32 ||
rep == MachineRepresentation::kSimd128))
GetFPRegisterSet(rep, &num_regs, &num_codes, &codes);
DCHECK_GE(positions.length(), num_regs);
for (int i = 0; i < num_regs; i++) {
@ -2884,9 +2963,20 @@ void LinearScanAllocator::FindFreeRegistersForRange(
for (LiveRange* cur_active : active_live_ranges()) {
int cur_reg = cur_active->assigned_register();
positions[cur_reg] = LifetimePosition::GapFromInstructionIndex(0);
TRACE("Register %s is free until pos %d (1)\n", RegisterName(cur_reg),
LifetimePosition::GapFromInstructionIndex(0).value());
if (kSimpleFPAliasing || mode() == GENERAL_REGISTERS) {
positions[cur_reg] = LifetimePosition::GapFromInstructionIndex(0);
TRACE("Register %s is free until pos %d (1)\n", RegisterName(cur_reg),
LifetimePosition::GapFromInstructionIndex(0).value());
} else {
int alias_base_index = -1;
int aliases = data()->config()->GetAliases(
cur_active->representation(), cur_reg, rep, &alias_base_index);
DCHECK(aliases > 0 || (aliases == 0 && alias_base_index == -1));
while (aliases--) {
int aliased_reg = alias_base_index + aliases;
positions[aliased_reg] = LifetimePosition::GapFromInstructionIndex(0);
}
}
}
for (LiveRange* cur_inactive : inactive_live_ranges()) {
@ -2894,9 +2984,20 @@ void LinearScanAllocator::FindFreeRegistersForRange(
LifetimePosition next_intersection = cur_inactive->FirstIntersection(range);
if (!next_intersection.IsValid()) continue;
int cur_reg = cur_inactive->assigned_register();
positions[cur_reg] = Min(positions[cur_reg], next_intersection);
TRACE("Register %s is free until pos %d (2)\n", RegisterName(cur_reg),
Min(positions[cur_reg], next_intersection).value());
if (kSimpleFPAliasing || mode() == GENERAL_REGISTERS) {
positions[cur_reg] = Min(positions[cur_reg], next_intersection);
TRACE("Register %s is free until pos %d (2)\n", RegisterName(cur_reg),
Min(positions[cur_reg], next_intersection).value());
} else {
int alias_base_index = -1;
int aliases = data()->config()->GetAliases(
cur_inactive->representation(), cur_reg, rep, &alias_base_index);
DCHECK(aliases > 0 || (aliases == 0 && alias_base_index == -1));
while (aliases--) {
int aliased_reg = alias_base_index + aliases;
positions[aliased_reg] = Min(positions[aliased_reg], next_intersection);
}
}
}
}
@ -2977,8 +3078,14 @@ bool LinearScanAllocator::TryAllocatePreferredReg(
bool LinearScanAllocator::TryAllocateFreeReg(
LiveRange* current, const Vector<LifetimePosition>& free_until_pos) {
int num_regs = 0; // used only for the call to GetFPRegisterSet.
int num_codes = num_allocatable_registers();
const int* codes = allocatable_register_codes();
MachineRepresentation rep = current->representation();
if (!kSimpleFPAliasing && (rep == MachineRepresentation::kFloat32 ||
rep == MachineRepresentation::kSimd128))
GetFPRegisterSet(rep, &num_regs, &num_codes, &codes);
DCHECK_GE(free_until_pos.length(), num_codes);
// Find the register which stays free for the longest time.
@ -3026,6 +3133,10 @@ void LinearScanAllocator::AllocateBlockedReg(LiveRange* current) {
int num_regs = num_registers();
int num_codes = num_allocatable_registers();
const int* codes = allocatable_register_codes();
MachineRepresentation rep = current->representation();
if (!kSimpleFPAliasing && (rep == MachineRepresentation::kFloat32 ||
rep == MachineRepresentation::kSimd128))
GetFPRegisterSet(rep, &num_regs, &num_codes, &codes);
LifetimePosition use_pos[RegisterConfiguration::kMaxFPRegisters];
LifetimePosition block_pos[RegisterConfiguration::kMaxFPRegisters];
@ -3037,16 +3148,28 @@ void LinearScanAllocator::AllocateBlockedReg(LiveRange* current) {
int cur_reg = range->assigned_register();
bool is_fixed_or_cant_spill =
range->TopLevel()->IsFixed() || !range->CanBeSpilled(current->Start());
if (is_fixed_or_cant_spill) {
block_pos[cur_reg] = use_pos[cur_reg] =
LifetimePosition::GapFromInstructionIndex(0);
} else {
UsePosition* next_use =
range->NextUsePositionRegisterIsBeneficial(current->Start());
if (next_use == nullptr) {
use_pos[cur_reg] = range->End();
if (kSimpleFPAliasing || mode() == GENERAL_REGISTERS) {
if (is_fixed_or_cant_spill) {
block_pos[cur_reg] = use_pos[cur_reg] =
LifetimePosition::GapFromInstructionIndex(0);
} else {
use_pos[cur_reg] = next_use->pos();
use_pos[cur_reg] =
range->NextLifetimePositionRegisterIsBeneficial(current->Start());
}
} else {
int alias_base_index = -1;
int aliases = data()->config()->GetAliases(
range->representation(), cur_reg, rep, &alias_base_index);
DCHECK(aliases > 0 || (aliases == 0 && alias_base_index == -1));
while (aliases--) {
int aliased_reg = alias_base_index + aliases;
if (is_fixed_or_cant_spill) {
block_pos[aliased_reg] = use_pos[aliased_reg] =
LifetimePosition::GapFromInstructionIndex(0);
} else {
use_pos[aliased_reg] =
range->NextLifetimePositionRegisterIsBeneficial(current->Start());
}
}
}
}
@ -3057,11 +3180,29 @@ void LinearScanAllocator::AllocateBlockedReg(LiveRange* current) {
if (!next_intersection.IsValid()) continue;
int cur_reg = range->assigned_register();
bool is_fixed = range->TopLevel()->IsFixed();
if (is_fixed) {
block_pos[cur_reg] = Min(block_pos[cur_reg], next_intersection);
use_pos[cur_reg] = Min(block_pos[cur_reg], use_pos[cur_reg]);
if (kSimpleFPAliasing || mode() == GENERAL_REGISTERS) {
if (is_fixed) {
block_pos[cur_reg] = Min(block_pos[cur_reg], next_intersection);
use_pos[cur_reg] = Min(block_pos[cur_reg], use_pos[cur_reg]);
} else {
use_pos[cur_reg] = Min(use_pos[cur_reg], next_intersection);
}
} else {
use_pos[cur_reg] = Min(use_pos[cur_reg], next_intersection);
int alias_base_index = -1;
int aliases = data()->config()->GetAliases(
range->representation(), cur_reg, rep, &alias_base_index);
DCHECK(aliases > 0 || (aliases == 0 && alias_base_index == -1));
while (aliases--) {
int aliased_reg = alias_base_index + aliases;
if (is_fixed) {
block_pos[aliased_reg] =
Min(block_pos[aliased_reg], next_intersection);
use_pos[aliased_reg] =
Min(block_pos[aliased_reg], use_pos[aliased_reg]);
} else {
use_pos[aliased_reg] = Min(use_pos[aliased_reg], next_intersection);
}
}
}
}
@ -3113,7 +3254,15 @@ void LinearScanAllocator::SplitAndSpillIntersecting(LiveRange* current) {
LifetimePosition split_pos = current->Start();
for (size_t i = 0; i < active_live_ranges().size(); ++i) {
LiveRange* range = active_live_ranges()[i];
if (range->assigned_register() != reg) continue;
if (kSimpleFPAliasing || mode() == GENERAL_REGISTERS) {
if (range->assigned_register() != reg) continue;
} else {
if (!data()->config()->AreAliases(current->representation(), reg,
range->representation(),
range->assigned_register())) {
continue;
}
}
UsePosition* next_pos = range->NextRegisterPosition(current->Start());
LifetimePosition spill_pos = FindOptimalSpillingPos(range, split_pos);
@ -3140,7 +3289,14 @@ void LinearScanAllocator::SplitAndSpillIntersecting(LiveRange* current) {
LiveRange* range = inactive_live_ranges()[i];
DCHECK(range->End() > current->Start());
if (range->TopLevel()->IsFixed()) continue;
if (range->assigned_register() != reg) continue;
if (kSimpleFPAliasing || mode() == GENERAL_REGISTERS) {
if (range->assigned_register() != reg) continue;
} else {
if (!data()->config()->AreAliases(current->representation(), reg,
range->representation(),
range->assigned_register()))
continue;
}
LifetimePosition next_intersection = range->FirstIntersection(current);
if (next_intersection.IsValid()) {
@ -3631,7 +3787,6 @@ int LiveRangeConnector::ResolveControlFlow(const InstructionBlock* block,
return gap_index;
}
void LiveRangeConnector::ConnectRanges(Zone* local_zone) {
DelayedInsertionMap delayed_insertion_map(local_zone);
for (TopLevelLiveRange* top_range : data()->live_ranges()) {
@ -3719,9 +3874,8 @@ void LiveRangeConnector::ConnectRanges(Zone* local_zone) {
// Gather all MoveOperands for a single ParallelMove.
MoveOperands* move =
new (code_zone()) MoveOperands(it->first.second, it->second);
MoveOperands* eliminate = moves->PrepareInsertAfter(move);
moves->PrepareInsertAfter(move, &to_eliminate);
to_insert.push_back(move);
if (eliminate != nullptr) to_eliminate.push_back(eliminate);
}
}

View File

@ -357,6 +357,11 @@ class V8_EXPORT_PRIVATE LiveRange : public NON_EXPORTED_BASE(ZoneObject) {
UsePosition* NextUsePositionRegisterIsBeneficial(
LifetimePosition start) const;
// Returns lifetime position for which register is beneficial in this live
// range and which follows both start and last processed use position.
LifetimePosition NextLifetimePositionRegisterIsBeneficial(
const LifetimePosition& start) const;
// Returns use position for which register is beneficial in this live
// range and which precedes start.
UsePosition* PreviousUsePositionRegisterIsBeneficial(
@ -773,12 +778,24 @@ class RegisterAllocationData final : public ZoneObject {
ZoneVector<TopLevelLiveRange*>& fixed_live_ranges() {
return fixed_live_ranges_;
}
ZoneVector<TopLevelLiveRange*>& fixed_float_live_ranges() {
return fixed_float_live_ranges_;
}
const ZoneVector<TopLevelLiveRange*>& fixed_float_live_ranges() const {
return fixed_float_live_ranges_;
}
ZoneVector<TopLevelLiveRange*>& fixed_double_live_ranges() {
return fixed_double_live_ranges_;
}
const ZoneVector<TopLevelLiveRange*>& fixed_double_live_ranges() const {
return fixed_double_live_ranges_;
}
ZoneVector<TopLevelLiveRange*>& fixed_simd128_live_ranges() {
return fixed_simd128_live_ranges_;
}
const ZoneVector<TopLevelLiveRange*>& fixed_simd128_live_ranges() const {
return fixed_simd128_live_ranges_;
}
ZoneVector<BitVector*>& live_in_sets() { return live_in_sets_; }
ZoneVector<BitVector*>& live_out_sets() { return live_out_sets_; }
ZoneVector<SpillRange*>& spill_ranges() { return spill_ranges_; }
@ -840,7 +857,9 @@ class RegisterAllocationData final : public ZoneObject {
ZoneVector<BitVector*> live_out_sets_;
ZoneVector<TopLevelLiveRange*> live_ranges_;
ZoneVector<TopLevelLiveRange*> fixed_live_ranges_;
ZoneVector<TopLevelLiveRange*> fixed_float_live_ranges_;
ZoneVector<TopLevelLiveRange*> fixed_double_live_ranges_;
ZoneVector<TopLevelLiveRange*> fixed_simd128_live_ranges_;
ZoneVector<SpillRange*> spill_ranges_;
DelayedReferences delayed_references_;
BitVector* assigned_registers_;
@ -1058,6 +1077,8 @@ class LinearScanAllocator final : public RegisterAllocator {
const Vector<LifetimePosition>& free_until_pos);
bool TryAllocatePreferredReg(LiveRange* range,
const Vector<LifetimePosition>& free_until_pos);
void GetFPRegisterSet(MachineRepresentation rep, int* num_regs,
int* num_codes, const int** codes) const;
void FindFreeRegistersForRange(LiveRange* range,
Vector<LifetimePosition> free_until_pos);
void ProcessCurrentRange(LiveRange* current);

View File

@ -178,6 +178,17 @@ struct Allocator {
// Allocate a floating point register/stack location.
if (fp_offset < fp_count) {
DoubleRegister reg = fp_regs[fp_offset++];
#if V8_TARGET_ARCH_ARM
// Allocate floats using a double register, but modify the code to
// reflect how ARM FP registers alias.
// TODO(bbudge) Modify wasm linkage to allow use of all float regs.
if (type == kAstF32) {
int float_reg_code = reg.code() * 2;
DCHECK(float_reg_code < RegisterConfiguration::kMaxFPRegisters);
return regloc(DoubleRegister::from_code(float_reg_code),
MachineTypeFor(type));
}
#endif
return regloc(reg, MachineTypeFor(type));
} else {
int offset = -1 - stack_offset;

View File

@ -239,7 +239,7 @@ inline bool IsAnyTagged(MachineRepresentation rep) {
}
// Gets the log2 of the element size in bytes of the machine type.
inline int ElementSizeLog2Of(MachineRepresentation rep) {
V8_EXPORT_PRIVATE inline int ElementSizeLog2Of(MachineRepresentation rep) {
switch (rep) {
case MachineRepresentation::kBit:
case MachineRepresentation::kWord8:

View File

@ -70,15 +70,12 @@ class ArchDefaultRegisterConfiguration : public RegisterConfiguration {
#if V8_TARGET_ARCH_IA32
kMaxAllocatableGeneralRegisterCount,
kMaxAllocatableDoubleRegisterCount,
kMaxAllocatableDoubleRegisterCount,
#elif V8_TARGET_ARCH_X87
kMaxAllocatableGeneralRegisterCount,
compiler == TURBOFAN ? 1 : kMaxAllocatableDoubleRegisterCount,
compiler == TURBOFAN ? 1 : kMaxAllocatableDoubleRegisterCount,
#elif V8_TARGET_ARCH_X64
kMaxAllocatableGeneralRegisterCount,
kMaxAllocatableDoubleRegisterCount,
kMaxAllocatableDoubleRegisterCount,
#elif V8_TARGET_ARCH_ARM
FLAG_enable_embedded_constant_pool
? (kMaxAllocatableGeneralRegisterCount - 1)
@ -86,27 +83,21 @@ class ArchDefaultRegisterConfiguration : public RegisterConfiguration {
CpuFeatures::IsSupported(VFP32DREGS)
? kMaxAllocatableDoubleRegisterCount
: (ALLOCATABLE_NO_VFP32_DOUBLE_REGISTERS(REGISTER_COUNT) 0),
ALLOCATABLE_NO_VFP32_DOUBLE_REGISTERS(REGISTER_COUNT) 0,
#elif V8_TARGET_ARCH_ARM64
kMaxAllocatableGeneralRegisterCount,
kMaxAllocatableDoubleRegisterCount,
kMaxAllocatableDoubleRegisterCount,
#elif V8_TARGET_ARCH_MIPS
kMaxAllocatableGeneralRegisterCount,
kMaxAllocatableDoubleRegisterCount,
kMaxAllocatableDoubleRegisterCount,
#elif V8_TARGET_ARCH_MIPS64
kMaxAllocatableGeneralRegisterCount,
kMaxAllocatableDoubleRegisterCount,
kMaxAllocatableDoubleRegisterCount,
#elif V8_TARGET_ARCH_PPC
kMaxAllocatableGeneralRegisterCount,
kMaxAllocatableDoubleRegisterCount,
kMaxAllocatableDoubleRegisterCount,
#elif V8_TARGET_ARCH_S390
kMaxAllocatableGeneralRegisterCount,
kMaxAllocatableDoubleRegisterCount,
kMaxAllocatableDoubleRegisterCount,
#else
#error Unsupported target architecture.
#endif
@ -145,7 +136,6 @@ const RegisterConfiguration* RegisterConfiguration::Turbofan() {
RegisterConfiguration::RegisterConfiguration(
int num_general_registers, int num_double_registers,
int num_allocatable_general_registers, int num_allocatable_double_registers,
int num_allocatable_aliased_double_registers,
const int* allocatable_general_codes, const int* allocatable_double_codes,
AliasingKind fp_aliasing_kind, const char* const* general_register_names,
const char* const* float_register_names,
@ -158,8 +148,6 @@ RegisterConfiguration::RegisterConfiguration(
num_allocatable_general_registers_(num_allocatable_general_registers),
num_allocatable_float_registers_(0),
num_allocatable_double_registers_(num_allocatable_double_registers),
num_allocatable_aliased_double_registers_(
num_allocatable_aliased_double_registers),
num_allocatable_simd128_registers_(0),
allocatable_general_codes_mask_(0),
allocatable_float_codes_mask_(0),

View File

@ -36,7 +36,6 @@ class V8_EXPORT_PRIVATE RegisterConfiguration {
RegisterConfiguration(int num_general_registers, int num_double_registers,
int num_allocatable_general_registers,
int num_allocatable_double_registers,
int num_allocatable_aliased_double_registers,
const int* allocatable_general_codes,
const int* allocatable_double_codes,
AliasingKind fp_aliasing_kind,
@ -58,12 +57,6 @@ class V8_EXPORT_PRIVATE RegisterConfiguration {
int num_allocatable_double_registers() const {
return num_allocatable_double_registers_;
}
// TODO(bbudge): This is a temporary work-around required because our
// register allocator does not yet support the aliasing of single/double
// registers on ARM.
int num_allocatable_aliased_double_registers() const {
return num_allocatable_aliased_double_registers_;
}
int num_allocatable_simd128_registers() const {
return num_allocatable_simd128_registers_;
}
@ -143,7 +136,6 @@ class V8_EXPORT_PRIVATE RegisterConfiguration {
int num_allocatable_general_registers_;
int num_allocatable_float_registers_;
int num_allocatable_double_registers_;
int num_allocatable_aliased_double_registers_;
int num_allocatable_simd128_registers_;
int32_t allocatable_general_codes_mask_;
int32_t allocatable_float_codes_mask_;

View File

@ -26,6 +26,8 @@ class zone_allocator {
typedef zone_allocator<O> other;
};
// TODO(bbudge) Remove when V8 updates to MSVS 2015. See crbug.com/603131.
zone_allocator() : zone_(nullptr) { UNREACHABLE(); }
explicit zone_allocator(Zone* zone) throw() : zone_(zone) {}
explicit zone_allocator(const zone_allocator& other) throw()
: zone_(other.zone_) {}
@ -62,7 +64,6 @@ class zone_allocator {
Zone* zone() { return zone_; }
private:
zone_allocator();
Zone* zone_;
};

View File

@ -13,15 +13,32 @@ namespace compiler {
const auto GetRegConfig = RegisterConfiguration::Turbofan;
// Fragments the given operand into an equivalent set of operands to simplify
// ParallelMove equivalence testing.
// Fragments the given FP operand into an equivalent set of FP operands to
// simplify ParallelMove equivalence testing.
void GetCanonicalOperands(const InstructionOperand& op,
std::vector<InstructionOperand>* fragments) {
CHECK(!kSimpleFPAliasing);
CHECK(op.IsFPLocationOperand());
// TODO(bbudge) Split into float operands on platforms with non-simple FP
// register aliasing.
fragments->push_back(op);
const LocationOperand& loc = LocationOperand::cast(op);
MachineRepresentation rep = loc.representation();
int base = -1;
int aliases = GetRegConfig()->GetAliases(
rep, 0, MachineRepresentation::kFloat32, &base);
CHECK_LT(0, aliases);
CHECK_GE(4, aliases);
int index = -1;
int step = 1;
if (op.IsFPRegister()) {
index = loc.register_code() * aliases;
} else {
index = loc.index();
step = -1;
}
for (int i = 0; i < aliases; i++) {
fragments->push_back(AllocatedOperand(loc.location_kind(),
MachineRepresentation::kFloat32,
index + i * step));
}
}
// The state of our move interpreter is the mapping of operands to values. Note
@ -36,7 +53,9 @@ class InterpreterState {
const InstructionOperand& dst = m->destination();
if (!kSimpleFPAliasing && src.IsFPLocationOperand() &&
dst.IsFPLocationOperand()) {
// Canonicalize FP location-location moves.
// Canonicalize FP location-location moves by fragmenting them into
// an equivalent sequence of float32 moves, to simplify state
// equivalence testing.
std::vector<InstructionOperand> src_fragments;
GetCanonicalOperands(src, &src_fragments);
CHECK(!src_fragments.empty());
@ -115,9 +134,11 @@ class InterpreterState {
int index;
if (!is_constant) {
const LocationOperand& loc_op = LocationOperand::cast(op);
// Canonicalize FP location operand representations to kFloat64.
// Preserve FP representation when FP register aliasing is complex.
// Otherwise, canonicalize to kFloat64.
if (IsFloatingPoint(loc_op.representation())) {
rep = MachineRepresentation::kFloat64;
rep = kSimpleFPAliasing ? MachineRepresentation::kFloat64
: loc_op.representation();
}
if (loc_op.IsAnyRegister()) {
index = loc_op.register_code();
@ -321,9 +342,11 @@ class ParallelMoveCreator : public HandleAndZoneScope {
auto GetValidRegisterCode = [&conf](MachineRepresentation rep, int index) {
switch (rep) {
case MachineRepresentation::kFloat32:
return conf->RegisterConfiguration::GetAllocatableFloatCode(index);
case MachineRepresentation::kFloat64:
case MachineRepresentation::kSimd128:
return conf->RegisterConfiguration::GetAllocatableDoubleCode(index);
case MachineRepresentation::kSimd128:
return conf->RegisterConfiguration::GetAllocatableSimd128Code(index);
default:
return conf->RegisterConfiguration::GetAllocatableGeneralCode(index);
}
@ -368,6 +391,118 @@ void RunTest(ParallelMove* pm, Zone* zone) {
CHECK_EQ(mi1.state(), mi2.state());
}
TEST(Aliasing) {
// On platforms with simple aliasing, these parallel moves are ill-formed.
if (kSimpleFPAliasing) return;
ParallelMoveCreator pmc;
Zone* zone = pmc.main_zone();
auto s0 = AllocatedOperand(LocationOperand::REGISTER,
MachineRepresentation::kFloat32, 0);
auto s1 = AllocatedOperand(LocationOperand::REGISTER,
MachineRepresentation::kFloat32, 1);
auto s2 = AllocatedOperand(LocationOperand::REGISTER,
MachineRepresentation::kFloat32, 2);
auto s3 = AllocatedOperand(LocationOperand::REGISTER,
MachineRepresentation::kFloat32, 3);
auto s4 = AllocatedOperand(LocationOperand::REGISTER,
MachineRepresentation::kFloat32, 4);
auto d0 = AllocatedOperand(LocationOperand::REGISTER,
MachineRepresentation::kFloat64, 0);
auto d1 = AllocatedOperand(LocationOperand::REGISTER,
MachineRepresentation::kFloat64, 1);
auto d16 = AllocatedOperand(LocationOperand::REGISTER,
MachineRepresentation::kFloat64, 16);
// Double slots must be odd to match frame allocation.
auto dSlot = AllocatedOperand(LocationOperand::STACK_SLOT,
MachineRepresentation::kFloat64, 3);
// Cycles involving s- and d-registers.
{
std::vector<InstructionOperand> moves = {
s2, s0, // s2 <- s0
d0, d1 // d0 <- d1
};
RunTest(pmc.Create(moves), zone);
}
{
std::vector<InstructionOperand> moves = {
d0, d1, // d0 <- d1
s2, s0 // s2 <- s0
};
RunTest(pmc.Create(moves), zone);
}
{
std::vector<InstructionOperand> moves = {
s2, s1, // s2 <- s1
d0, d1 // d0 <- d1
};
RunTest(pmc.Create(moves), zone);
}
{
std::vector<InstructionOperand> moves = {
d0, d1, // d0 <- d1
s2, s1 // s2 <- s1
};
RunTest(pmc.Create(moves), zone);
}
// Two cycles involving a single d-register.
{
std::vector<InstructionOperand> moves = {
d0, d1, // d0 <- d1
s2, s1, // s2 <- s1
s3, s0 // s3 <- s0
};
RunTest(pmc.Create(moves), zone);
}
// Cycle with a float move that must be deferred until after swaps.
{
std::vector<InstructionOperand> moves = {
d0, d1, // d0 <- d1
s2, s0, // s2 <- s0
s3, s4 // s3 <- s4 must be deferred
};
RunTest(pmc.Create(moves), zone);
}
// Cycles involving s-registers and a non-aliased d-register.
{
std::vector<InstructionOperand> moves = {
d16, d0, // d16 <- d0
s1, s2, // s1 <- s2
d1, d16 // d1 <- d16
};
RunTest(pmc.Create(moves), zone);
}
{
std::vector<InstructionOperand> moves = {
s2, s1, // s1 <- s2
d0, d16, // d16 <- d0
d16, d1 // d1 <- d16
};
RunTest(pmc.Create(moves), zone);
}
{
std::vector<InstructionOperand> moves = {
d0, d16, // d0 <- d16
d16, d1, // s2 <- s0
s3, s0 // d0 <- d1
};
RunTest(pmc.Create(moves), zone);
}
// Cycle involving aliasing registers and a slot.
{
std::vector<InstructionOperand> moves = {
dSlot, d0, // dSlot <- d0
d1, dSlot, // d1 <- dSlot
s0, s3 // s0 <- s3
};
RunTest(pmc.Create(moves), zone);
}
}
TEST(FuzzResolver) {
ParallelMoveCreator pmc;
for (int size = 0; size < 80; ++size) {

View File

@ -87,8 +87,16 @@ class RegisterPairs : public Pairs {
class Float32RegisterPairs : public Pairs {
public:
Float32RegisterPairs()
: Pairs(100, GetRegConfig()->num_allocatable_aliased_double_registers(),
GetRegConfig()->allocatable_double_codes()) {}
: Pairs(
100,
#if V8_TARGET_ARCH_ARM
// TODO(bbudge) Modify wasm linkage to allow use of all float regs.
GetRegConfig()->num_allocatable_double_registers() / 2 - 2,
#else
GetRegConfig()->num_allocatable_double_registers(),
#endif
GetRegConfig()->allocatable_double_codes()) {
}
};
@ -127,6 +135,10 @@ struct Allocator {
// Allocate a floating point register/stack location.
if (fp_offset < fp_count) {
int code = fp_regs[fp_offset++];
#if V8_TARGET_ARCH_ARM
// TODO(bbudge) Modify wasm linkage to allow use of all float regs.
if (type.representation() == MachineRepresentation::kFloat32) code *= 2;
#endif
return LinkageLocation::ForRegister(code, type);
} else {
int offset = -1 - stack_offset;

View File

@ -50,6 +50,7 @@ v8_executable("unittests") {
"compiler/instruction-selector-unittest.h",
"compiler/instruction-sequence-unittest.cc",
"compiler/instruction-sequence-unittest.h",
"compiler/instruction-unittest.cc",
"compiler/int64-lowering-unittest.cc",
"compiler/js-builtin-reducer-unittest.cc",
"compiler/js-create-lowering-unittest.cc",

View File

@ -22,11 +22,8 @@ static char register_names_[10 * (RegisterConfiguration::kMaxGeneralRegisters +
namespace {
static int allocatable_codes[InstructionSequenceTest::kDefaultNRegs] = {
0, 1, 2, 3, 4, 5, 6, 7};
static int allocatable_double_codes[InstructionSequenceTest::kDefaultNRegs] = {
0, 1, 2, 3, 4, 5, 6, 7};
}
static void InitializeRegisterNames() {
char* loc = register_names_;
for (int i = 0; i < RegisterConfiguration::kMaxGeneralRegisters; ++i) {
@ -92,8 +89,7 @@ RegisterConfiguration* InstructionSequenceTest::config() {
if (!config_) {
config_.reset(new RegisterConfiguration(
num_general_registers_, num_double_registers_, num_general_registers_,
num_double_registers_, num_double_registers_, allocatable_codes,
allocatable_double_codes,
num_double_registers_, allocatable_codes, allocatable_codes,
kSimpleFPAliasing ? RegisterConfiguration::OVERLAP
: RegisterConfiguration::COMBINE,
general_register_names_,

View File

@ -20,7 +20,9 @@ class InstructionSequenceTest : public TestWithIsolateAndZone {
static const int kDefaultNRegs = 8;
static const int kNoValue = kMinInt;
static const MachineRepresentation kNoRep = MachineRepresentation::kNone;
static const MachineRepresentation kFloat32 = MachineRepresentation::kFloat32;
static const MachineRepresentation kFloat64 = MachineRepresentation::kFloat64;
static const MachineRepresentation kSimd128 = MachineRepresentation::kSimd128;
typedef RpoNumber Rpo;

View File

@ -0,0 +1,175 @@
// Copyright 2016 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "src/compiler/instruction.h"
#include "src/register-configuration.h"
#include "test/unittests/test-utils.h"
#include "testing/gtest-support.h"
namespace v8 {
namespace internal {
namespace compiler {
namespace {
const MachineRepresentation kWord = MachineRepresentation::kWord32;
const MachineRepresentation kFloat = MachineRepresentation::kFloat32;
const MachineRepresentation kDouble = MachineRepresentation::kFloat64;
bool Interfere(LocationOperand::LocationKind kind, MachineRepresentation rep1,
int index1, MachineRepresentation rep2, int index2) {
return AllocatedOperand(kind, rep1, index1)
.InterferesWith(AllocatedOperand(kind, rep2, index2));
}
bool Contains(const ZoneVector<MoveOperands*>* moves,
const InstructionOperand& to, const InstructionOperand& from) {
for (auto move : *moves) {
if (move->destination().Equals(to) && move->source().Equals(from)) {
return true;
}
}
return false;
}
} // namespace
class InstructionTest : public TestWithZone {
public:
InstructionTest() {}
virtual ~InstructionTest() {}
ParallelMove* CreateParallelMove(
const std::vector<InstructionOperand>& operand_pairs) {
ParallelMove* parallel_move = new (zone()) ParallelMove(zone());
for (size_t i = 0; i < operand_pairs.size(); i += 2)
parallel_move->AddMove(operand_pairs[i + 1], operand_pairs[i]);
return parallel_move;
}
};
TEST_F(InstructionTest, OperandInterference) {
// All general registers and slots interfere only with themselves.
for (int i = 0; i < RegisterConfiguration::kMaxGeneralRegisters; ++i) {
EXPECT_TRUE(Interfere(LocationOperand::REGISTER, kWord, i, kWord, i));
EXPECT_TRUE(Interfere(LocationOperand::STACK_SLOT, kWord, i, kWord, i));
for (int j = i + 1; j < RegisterConfiguration::kMaxGeneralRegisters; ++j) {
EXPECT_FALSE(Interfere(LocationOperand::REGISTER, kWord, i, kWord, j));
EXPECT_FALSE(Interfere(LocationOperand::STACK_SLOT, kWord, i, kWord, j));
}
}
// All FP registers interfere with themselves.
for (int i = 0; i < RegisterConfiguration::kMaxFPRegisters; ++i) {
EXPECT_TRUE(Interfere(LocationOperand::REGISTER, kFloat, i, kFloat, i));
EXPECT_TRUE(Interfere(LocationOperand::STACK_SLOT, kFloat, i, kFloat, i));
EXPECT_TRUE(Interfere(LocationOperand::REGISTER, kDouble, i, kDouble, i));
EXPECT_TRUE(Interfere(LocationOperand::STACK_SLOT, kDouble, i, kDouble, i));
}
if (kSimpleFPAliasing) {
// Simple FP aliasing: interfering registers of different reps have the same
// index.
for (int i = 0; i < RegisterConfiguration::kMaxFPRegisters; ++i) {
EXPECT_TRUE(Interfere(LocationOperand::REGISTER, kFloat, i, kDouble, i));
EXPECT_TRUE(Interfere(LocationOperand::REGISTER, kDouble, i, kFloat, i));
for (int j = i + 1; j < RegisterConfiguration::kMaxFPRegisters; ++j) {
EXPECT_FALSE(Interfere(LocationOperand::REGISTER, kWord, i, kWord, j));
EXPECT_FALSE(
Interfere(LocationOperand::STACK_SLOT, kWord, i, kWord, j));
}
}
} else {
// Complex FP aliasing: sub-registers intefere with containing registers.
// Test sub-register indices which may not exist on the platform. This is
// necessary since the GapResolver may split large moves into smaller ones.
for (int i = 0; i < RegisterConfiguration::kMaxFPRegisters; ++i) {
EXPECT_TRUE(
Interfere(LocationOperand::REGISTER, kFloat, i * 2, kDouble, i));
EXPECT_TRUE(
Interfere(LocationOperand::REGISTER, kFloat, i * 2 + 1, kDouble, i));
EXPECT_TRUE(
Interfere(LocationOperand::REGISTER, kDouble, i, kFloat, i * 2));
EXPECT_TRUE(
Interfere(LocationOperand::REGISTER, kDouble, i, kFloat, i * 2 + 1));
for (int j = i + 1; j < RegisterConfiguration::kMaxFPRegisters; ++j) {
EXPECT_FALSE(
Interfere(LocationOperand::REGISTER, kFloat, i * 2, kDouble, j));
EXPECT_FALSE(Interfere(LocationOperand::REGISTER, kFloat, i * 2 + 1,
kDouble, j));
EXPECT_FALSE(
Interfere(LocationOperand::REGISTER, kDouble, i, kFloat, j * 2));
EXPECT_FALSE(Interfere(LocationOperand::REGISTER, kDouble, i, kFloat,
j * 2 + 1));
}
}
}
}
TEST_F(InstructionTest, PrepareInsertAfter) {
InstructionOperand r0 = AllocatedOperand(LocationOperand::REGISTER,
MachineRepresentation::kWord32, 0);
InstructionOperand r1 = AllocatedOperand(LocationOperand::REGISTER,
MachineRepresentation::kWord32, 1);
InstructionOperand r2 = AllocatedOperand(LocationOperand::REGISTER,
MachineRepresentation::kWord32, 2);
InstructionOperand d0 = AllocatedOperand(LocationOperand::REGISTER,
MachineRepresentation::kFloat64, 0);
InstructionOperand d1 = AllocatedOperand(LocationOperand::REGISTER,
MachineRepresentation::kFloat64, 1);
InstructionOperand d2 = AllocatedOperand(LocationOperand::REGISTER,
MachineRepresentation::kFloat64, 2);
{
// Moves inserted after should pick up assignments to their sources.
// Moves inserted after should cause interfering moves to be eliminated.
ZoneVector<MoveOperands*> to_eliminate(zone());
std::vector<InstructionOperand> moves = {
r1, r0, // r1 <- r0
r2, r0, // r2 <- r0
d1, d0, // d1 <- d0
d2, d0 // d2 <- d0
};
ParallelMove* pm = CreateParallelMove(moves);
MoveOperands m1(r1, r2); // r2 <- r1
pm->PrepareInsertAfter(&m1, &to_eliminate);
CHECK(m1.source().Equals(r0));
CHECK(Contains(&to_eliminate, r2, r0));
MoveOperands m2(d1, d2); // d2 <- d1
pm->PrepareInsertAfter(&m2, &to_eliminate);
CHECK(m2.source().Equals(d0));
CHECK(Contains(&to_eliminate, d2, d0));
}
if (!kSimpleFPAliasing) {
// Moves inserted after should cause all interfering moves to be eliminated.
auto s0 = AllocatedOperand(LocationOperand::REGISTER,
MachineRepresentation::kFloat32, 0);
auto s1 = AllocatedOperand(LocationOperand::REGISTER,
MachineRepresentation::kFloat32, 1);
auto s2 = AllocatedOperand(LocationOperand::REGISTER,
MachineRepresentation::kFloat32, 2);
{
ZoneVector<MoveOperands*> to_eliminate(zone());
std::vector<InstructionOperand> moves = {
s0, s2, // s0 <- s2
s1, s2 // s1 <- s2
};
ParallelMove* pm = CreateParallelMove(moves);
MoveOperands m1(d1, d0); // d0 <- d1
pm->PrepareInsertAfter(&m1, &to_eliminate);
CHECK(Contains(&to_eliminate, s0, s2));
CHECK(Contains(&to_eliminate, s1, s2));
}
}
}
} // namespace compiler
} // namespace internal
} // namespace v8

View File

@ -12,6 +12,14 @@ namespace compiler {
class MoveOptimizerTest : public InstructionSequenceTest {
public:
// FP register indices which don't interfere under simple or complex aliasing.
static const int kF64_1 = 0;
static const int kF64_2 = 1;
static const int kF32_1 = 4;
static const int kF32_2 = 5;
static const int kS128_1 = 2;
static const int kS128_2 = 3;
Instruction* LastInstruction() { return sequence()->instructions().back(); }
void AddMove(Instruction* instr, TestOperand from, TestOperand to,
@ -99,8 +107,12 @@ TEST_F(MoveOptimizerTest, RemovesRedundant) {
AddMove(first_instr, Reg(0), Reg(1));
AddMove(last_instr, Reg(1), Reg(0));
AddMove(first_instr, FPReg(0), FPReg(1));
AddMove(last_instr, FPReg(1), FPReg(0));
AddMove(first_instr, FPReg(kS128_1, kSimd128), FPReg(kS128_2, kSimd128));
AddMove(last_instr, FPReg(kS128_2, kSimd128), FPReg(kS128_1, kSimd128));
AddMove(first_instr, FPReg(kF64_1, kFloat64), FPReg(kF64_2, kFloat64));
AddMove(last_instr, FPReg(kF64_2, kFloat64), FPReg(kF64_1, kFloat64));
AddMove(first_instr, FPReg(kF32_1, kFloat32), FPReg(kF32_2, kFloat32));
AddMove(last_instr, FPReg(kF32_2, kFloat32), FPReg(kF32_1, kFloat32));
EndBlock(Last());
@ -108,22 +120,38 @@ TEST_F(MoveOptimizerTest, RemovesRedundant) {
CHECK_EQ(0, NonRedundantSize(first_instr->parallel_moves()[0]));
auto move = last_instr->parallel_moves()[0];
CHECK_EQ(2, NonRedundantSize(move));
CHECK_EQ(4, NonRedundantSize(move));
CHECK(Contains(move, Reg(0), Reg(1)));
CHECK(Contains(move, FPReg(0), FPReg(1)));
CHECK(Contains(move, FPReg(kS128_1, kSimd128), FPReg(kS128_2, kSimd128)));
CHECK(Contains(move, FPReg(kF64_1, kFloat64), FPReg(kF64_2, kFloat64)));
CHECK(Contains(move, FPReg(kF32_1, kFloat32), FPReg(kF32_2, kFloat32)));
}
TEST_F(MoveOptimizerTest, RemovesRedundantExplicit) {
int first_reg_index = GetAllocatableCode(0);
int second_reg_index = GetAllocatableCode(1);
int index1 = GetAllocatableCode(0);
int index2 = GetAllocatableCode(1);
int s128_1 = GetAllocatableCode(kS128_1, kSimd128);
int s128_2 = GetAllocatableCode(kS128_2, kSimd128);
int f64_1 = GetAllocatableCode(kF64_1, kFloat64);
int f64_2 = GetAllocatableCode(kF64_2, kFloat64);
int f32_1 = GetAllocatableCode(kF32_1, kFloat32);
int f32_2 = GetAllocatableCode(kF32_2, kFloat32);
StartBlock();
auto first_instr = EmitNop();
auto last_instr = EmitNop();
AddMove(first_instr, Reg(first_reg_index), ExplicitReg(second_reg_index));
AddMove(last_instr, Reg(second_reg_index), Reg(first_reg_index));
AddMove(first_instr, Reg(index1), ExplicitReg(index2));
AddMove(last_instr, Reg(index2), Reg(index1));
AddMove(first_instr, FPReg(s128_1, kSimd128),
ExplicitFPReg(s128_2, kSimd128));
AddMove(last_instr, FPReg(s128_2, kSimd128), FPReg(s128_1, kSimd128));
AddMove(first_instr, FPReg(f64_1, kFloat64), ExplicitFPReg(f64_2, kFloat64));
AddMove(last_instr, FPReg(f64_2, kFloat64), FPReg(f64_1, kFloat64));
AddMove(first_instr, FPReg(f32_1, kFloat32), ExplicitFPReg(f32_2, kFloat32));
AddMove(last_instr, FPReg(f32_2, kFloat32), FPReg(f32_1, kFloat32));
EndBlock(Last());
@ -131,8 +159,12 @@ TEST_F(MoveOptimizerTest, RemovesRedundantExplicit) {
CHECK_EQ(0, NonRedundantSize(first_instr->parallel_moves()[0]));
auto move = last_instr->parallel_moves()[0];
CHECK_EQ(1, NonRedundantSize(move));
CHECK(Contains(move, Reg(first_reg_index), ExplicitReg(second_reg_index)));
CHECK_EQ(4, NonRedundantSize(move));
CHECK(Contains(move, Reg(index1), ExplicitReg(index2)));
CHECK(
Contains(move, FPReg(s128_1, kSimd128), ExplicitFPReg(s128_2, kSimd128)));
CHECK(Contains(move, FPReg(f64_1, kFloat64), ExplicitFPReg(f64_2, kFloat64)));
CHECK(Contains(move, FPReg(f32_1, kFloat32), ExplicitFPReg(f32_2, kFloat32)));
}
@ -167,10 +199,18 @@ TEST_F(MoveOptimizerTest, SimpleMerge) {
StartBlock();
EndBlock(Jump(2));
AddMove(LastInstruction(), Reg(0), Reg(1));
AddMove(LastInstruction(), FPReg(kS128_1, kSimd128),
FPReg(kS128_2, kSimd128));
AddMove(LastInstruction(), FPReg(kF64_1, kFloat64), FPReg(kF64_2, kFloat64));
AddMove(LastInstruction(), FPReg(kF32_1, kFloat32), FPReg(kF32_2, kFloat32));
StartBlock();
EndBlock(Jump(1));
AddMove(LastInstruction(), Reg(0), Reg(1));
AddMove(LastInstruction(), FPReg(kS128_1, kSimd128),
FPReg(kS128_2, kSimd128));
AddMove(LastInstruction(), FPReg(kF64_1, kFloat64), FPReg(kF64_2, kFloat64));
AddMove(LastInstruction(), FPReg(kF32_1, kFloat32), FPReg(kF32_2, kFloat32));
StartBlock();
EndBlock(Last());
@ -180,8 +220,11 @@ TEST_F(MoveOptimizerTest, SimpleMerge) {
Optimize();
auto move = last->parallel_moves()[0];
CHECK_EQ(1, NonRedundantSize(move));
CHECK_EQ(4, NonRedundantSize(move));
CHECK(Contains(move, Reg(0), Reg(1)));
CHECK(Contains(move, FPReg(kS128_1, kSimd128), FPReg(kS128_2, kSimd128)));
CHECK(Contains(move, FPReg(kF64_1, kFloat64), FPReg(kF64_2, kFloat64)));
CHECK(Contains(move, FPReg(kF32_1, kFloat32), FPReg(kF32_2, kFloat32)));
}
@ -195,16 +238,25 @@ TEST_F(MoveOptimizerTest, SimpleMergeCycle) {
AddMove(gap_0, Reg(0), Reg(1));
AddMove(LastInstruction(), Reg(1), Reg(0));
AddMove(gap_0, FPReg(0), FPReg(1));
AddMove(LastInstruction(), FPReg(1), FPReg(0));
AddMove(gap_0, FPReg(kS128_1, kSimd128), FPReg(kS128_2, kSimd128));
AddMove(LastInstruction(), FPReg(kS128_2, kSimd128),
FPReg(kS128_1, kSimd128));
AddMove(gap_0, FPReg(kF64_1, kFloat64), FPReg(kF64_2, kFloat64));
AddMove(LastInstruction(), FPReg(kF64_2, kFloat64), FPReg(kF64_1, kFloat64));
AddMove(gap_0, FPReg(kF32_1, kFloat32), FPReg(kF32_2, kFloat32));
AddMove(LastInstruction(), FPReg(kF32_2, kFloat32), FPReg(kF32_1, kFloat32));
StartBlock();
EndBlock(Jump(1));
auto gap_1 = LastInstruction();
AddMove(gap_1, Reg(0), Reg(1));
AddMove(gap_1, Reg(1), Reg(0));
AddMove(gap_1, FPReg(0), FPReg(1));
AddMove(gap_1, FPReg(1), FPReg(0));
AddMove(gap_1, FPReg(kS128_1, kSimd128), FPReg(kS128_2, kSimd128));
AddMove(gap_1, FPReg(kS128_2, kSimd128), FPReg(kS128_1, kSimd128));
AddMove(gap_1, FPReg(kF64_1, kFloat64), FPReg(kF64_2, kFloat64));
AddMove(gap_1, FPReg(kF64_2, kFloat64), FPReg(kF64_1, kFloat64));
AddMove(gap_1, FPReg(kF32_1, kFloat32), FPReg(kF32_2, kFloat32));
AddMove(gap_1, FPReg(kF32_2, kFloat32), FPReg(kF32_1, kFloat32));
StartBlock();
EndBlock(Last());
@ -216,11 +268,15 @@ TEST_F(MoveOptimizerTest, SimpleMergeCycle) {
CHECK(gap_0->AreMovesRedundant());
CHECK(gap_1->AreMovesRedundant());
auto move = last->parallel_moves()[0];
CHECK_EQ(4, NonRedundantSize(move));
CHECK_EQ(8, NonRedundantSize(move));
CHECK(Contains(move, Reg(0), Reg(1)));
CHECK(Contains(move, Reg(1), Reg(0)));
CHECK(Contains(move, FPReg(0), FPReg(1)));
CHECK(Contains(move, FPReg(1), FPReg(0)));
CHECK(Contains(move, FPReg(kS128_1, kSimd128), FPReg(kS128_2, kSimd128)));
CHECK(Contains(move, FPReg(kS128_2, kSimd128), FPReg(kS128_1, kSimd128)));
CHECK(Contains(move, FPReg(kF64_1, kFloat64), FPReg(kF64_2, kFloat64)));
CHECK(Contains(move, FPReg(kF64_2, kFloat64), FPReg(kF64_1, kFloat64)));
CHECK(Contains(move, FPReg(kF32_1, kFloat32), FPReg(kF32_2, kFloat32)));
CHECK(Contains(move, FPReg(kF32_2, kFloat32), FPReg(kF32_1, kFloat32)));
}
@ -342,8 +398,31 @@ TEST_F(MoveOptimizerTest, ClobberedDestinationsAreEliminated) {
EmitNop();
Instruction* first_instr = LastInstruction();
AddMove(first_instr, Reg(0), Reg(1));
AddMove(first_instr, FPReg(0), FPReg(1));
EmitOOI(Reg(1), FPReg(1), 0, nullptr);
EmitOI(Reg(1), 0, nullptr);
Instruction* last_instr = LastInstruction();
EndBlock();
Optimize();
ParallelMove* first_move = first_instr->parallel_moves()[0];
CHECK_EQ(0, NonRedundantSize(first_move));
ParallelMove* last_move = last_instr->parallel_moves()[0];
CHECK_EQ(0, NonRedundantSize(last_move));
}
TEST_F(MoveOptimizerTest, ClobberedFPDestinationsAreEliminated) {
StartBlock();
EmitNop();
Instruction* first_instr = LastInstruction();
AddMove(first_instr, FPReg(4, kFloat64), FPReg(1, kFloat64));
if (!kSimpleFPAliasing) {
// We clobber q0 below. This is aliased by d0, d1, s0, s1, s2, and s3.
// Add moves to registers s2 and s3.
AddMove(first_instr, FPReg(10, kFloat32), FPReg(0, kFloat32));
AddMove(first_instr, FPReg(11, kFloat32), FPReg(1, kFloat32));
}
// Clobbers output register 0.
EmitOI(FPReg(0, kSimd128), 0, nullptr);
Instruction* last_instr = LastInstruction();
EndBlock();
Optimize();

View File

@ -101,13 +101,14 @@ TEST_F(RegisterAllocatorTest, CanAllocateThreeRegisters) {
Allocate();
}
TEST_F(RegisterAllocatorTest, CanAllocateThreeFPRegisters) {
// return p0 + p1;
TEST_F(RegisterAllocatorTest, CanAllocateFPRegisters) {
StartBlock();
VReg a_reg = FPParameter();
VReg b_reg = FPParameter();
VReg c_reg = EmitOI(FPReg(1), Reg(a_reg, 1), Reg(b_reg, 0));
Return(c_reg);
TestOperand inputs[] = {
Reg(FPParameter(kFloat64)), Reg(FPParameter(kFloat64)),
Reg(FPParameter(kFloat32)), Reg(FPParameter(kFloat32)),
Reg(FPParameter(kSimd128)), Reg(FPParameter(kSimd128))};
VReg out1 = EmitOI(FPReg(1, kFloat64), arraysize(inputs), inputs);
Return(out1);
EndBlock(Last());
Allocate();

View File

@ -16,8 +16,6 @@ class RegisterConfigurationUnitTest : public ::testing::Test {
public:
RegisterConfigurationUnitTest() {}
virtual ~RegisterConfigurationUnitTest() {}
private:
};
TEST_F(RegisterConfigurationUnitTest, BasicProperties) {
@ -30,9 +28,8 @@ TEST_F(RegisterConfigurationUnitTest, BasicProperties) {
RegisterConfiguration test(
kNumGeneralRegs, kNumDoubleRegs, kNumAllocatableGeneralRegs,
kNumAllocatableDoubleRegs, kNumAllocatableDoubleRegs, general_codes,
double_codes, RegisterConfiguration::OVERLAP, nullptr, nullptr, nullptr,
nullptr);
kNumAllocatableDoubleRegs, general_codes, double_codes,
RegisterConfiguration::OVERLAP, nullptr, nullptr, nullptr, nullptr);
EXPECT_EQ(test.num_general_registers(), kNumGeneralRegs);
EXPECT_EQ(test.num_double_registers(), kNumDoubleRegs);
@ -67,9 +64,8 @@ TEST_F(RegisterConfigurationUnitTest, CombineAliasing) {
RegisterConfiguration test(
kNumGeneralRegs, kNumDoubleRegs, kNumAllocatableGeneralRegs,
kNumAllocatableDoubleRegs, kNumAllocatableDoubleRegs, general_codes,
double_codes, RegisterConfiguration::COMBINE, nullptr, nullptr, nullptr,
nullptr);
kNumAllocatableDoubleRegs, general_codes, double_codes,
RegisterConfiguration::COMBINE, nullptr, nullptr, nullptr, nullptr);
// There are 3 allocatable double regs, but only 2 can alias float regs.
EXPECT_EQ(test.num_allocatable_float_registers(), 4);
@ -157,9 +153,10 @@ TEST_F(RegisterConfigurationUnitTest, CombineAliasing) {
test.GetAliases(kFloat64, RegisterConfiguration::kMaxFPRegisters / 2 + 1,
kFloat32, &alias_base_index),
0);
EXPECT_EQ(test.GetAliases(kFloat64, RegisterConfiguration::kMaxFPRegisters,
kFloat32, &alias_base_index),
0);
EXPECT_EQ(
test.GetAliases(kFloat64, RegisterConfiguration::kMaxFPRegisters - 1,
kFloat32, &alias_base_index),
0);
}
} // namespace internal

View File

@ -42,6 +42,7 @@
'compiler/graph-trimmer-unittest.cc',
'compiler/graph-unittest.cc',
'compiler/graph-unittest.h',
'compiler/instruction-unittest.cc',
'compiler/instruction-selector-unittest.cc',
'compiler/instruction-selector-unittest.h',
'compiler/instruction-sequence-unittest.cc',