[Turbofan] Add concept of FP register aliasing on ARM 32.

- Modifies RegisterConfiguration to specify complex aliasing on ARM 32. - Modifies RegisterAllocator to consider aliasing. - Modifies ParallelMove::PrepareInsertAfter to handle aliasing. - Modifies GapResolver to split wider register moves when interference with smaller moves is detected. - Modifies MoveOptimizer to handle aliasing. - Adds ARM 32 macro-assembler pseudo move instructions to handle cases where split moves don't correspond to actual s-registers. - Modifies CodeGenerator::AssembleMove and AssembleSwap to handle moves of different widths, and moves involving pseudo-s-registers. - Adds unit tests for FP operand interference checking and PrepareInsertAfter. - Adds more tests of FP for the move optimizer and register allocator. LOG=N BUG=v8:4124 Review-Url: https://codereview.chromium.org/2410673002 Cr-Commit-Position: refs/heads/master@{#40597}
2016-10-26 09:04:11 -07:00 · 2016-10-26 09:04:11 -07:00 · 09ab8e6ad9
commit 09ab8e6ad9
parent f6c3fd0a74
25 changed files with 1208 additions and 287 deletions
--- a/src/arm/macro-assembler-arm.cc
+++ b/src/arm/macro-assembler-arm.cc
@ -1051,6 +1051,69 @@ void MacroAssembler::VmovLow(DwVfpRegister dst, Register src) {
  }
 }

+void MacroAssembler::VmovExtended(Register dst, int src_code) {
+  DCHECK_LE(32, src_code);
+  DCHECK_GT(64, src_code);
+  if (src_code & 0x1) {
+    VmovHigh(dst, DwVfpRegister::from_code(src_code / 2));
+  } else {
+    VmovLow(dst, DwVfpRegister::from_code(src_code / 2));
+  }
+}
+
+void MacroAssembler::VmovExtended(int dst_code, Register src) {
+  DCHECK_LE(32, dst_code);
+  DCHECK_GT(64, dst_code);
+  if (dst_code & 0x1) {
+    VmovHigh(DwVfpRegister::from_code(dst_code / 2), src);
+  } else {
+    VmovLow(DwVfpRegister::from_code(dst_code / 2), src);
+  }
+}
+
+void MacroAssembler::VmovExtended(int dst_code, int src_code,
+                                  Register scratch) {
+  if (src_code < 32 && dst_code < 32) {
+    // src and dst are both s-registers.
+    vmov(SwVfpRegister::from_code(dst_code),
+         SwVfpRegister::from_code(src_code));
+  } else if (src_code < 32) {
+    // src is an s-register.
+    vmov(scratch, SwVfpRegister::from_code(src_code));
+    VmovExtended(dst_code, scratch);
+  } else if (dst_code < 32) {
+    // dst is an s-register.
+    VmovExtended(scratch, src_code);
+    vmov(SwVfpRegister::from_code(dst_code), scratch);
+  } else {
+    // Neither src or dst are s-registers.
+    DCHECK_GT(64, src_code);
+    DCHECK_GT(64, dst_code);
+    VmovExtended(scratch, src_code);
+    VmovExtended(dst_code, scratch);
+  }
+}
+
+void MacroAssembler::VmovExtended(int dst_code, const MemOperand& src,
+                                  Register scratch) {
+  if (dst_code >= 32) {
+    ldr(scratch, src);
+    VmovExtended(dst_code, scratch);
+  } else {
+    vldr(SwVfpRegister::from_code(dst_code), src);
+  }
+}
+
+void MacroAssembler::VmovExtended(const MemOperand& dst, int src_code,
+                                  Register scratch) {
+  if (src_code >= 32) {
+    VmovExtended(scratch, src_code);
+    str(scratch, dst);
+  } else {
+    vstr(SwVfpRegister::from_code(src_code), dst);
+  }
+}
+
 void MacroAssembler::LslPair(Register dst_low, Register dst_high,
                             Register src_low, Register src_high,
                             Register scratch, Register shift) {
--- a/src/arm/macro-assembler-arm.h
+++ b/src/arm/macro-assembler-arm.h
@ -549,6 +549,14 @@ class MacroAssembler: public Assembler {
  void VmovLow(Register dst, DwVfpRegister src);
  void VmovLow(DwVfpRegister dst, Register src);

+  // Simulate s-register moves for imaginary s32 - s63 registers.
+  void VmovExtended(Register dst, int src_code);
+  void VmovExtended(int dst_code, Register src);
+  // Move between s-registers and imaginary s-registers.
+  void VmovExtended(int dst_code, int src_code, Register scratch);
+  void VmovExtended(int dst_code, const MemOperand& src, Register scratch);
+  void VmovExtended(const MemOperand& dst, int src_code, Register scratch);
+
  void LslPair(Register dst_low, Register dst_high, Register src_low,
               Register src_high, Register scratch, Register shift);
  void LslPair(Register dst_low, Register dst_high, Register src_low,
--- a/src/compiler/arm/code-generator-arm.cc
+++ b/src/compiler/arm/code-generator-arm.cc
@ -136,25 +136,13 @@ class ArmOperandConverter final : public InstructionOperandConverter {
    FrameOffset offset = frame_access_state()->GetFrameOffset(slot);
    return MemOperand(offset.from_stack_pointer() ? sp : fp, offset.offset());
  }
-
-  FloatRegister InputFloat32Register(size_t index) {
-    return ToFloat32Register(instr_->InputAt(index));
-  }
-
-  FloatRegister OutputFloat32Register() {
-    return ToFloat32Register(instr_->Output());
-  }
-
-  FloatRegister ToFloat32Register(InstructionOperand* op) {
-    return LowDwVfpRegister::from_code(ToDoubleRegister(op).code()).low();
-  }
 };

 namespace {

-class OutOfLineLoadFloat32 final : public OutOfLineCode {
+class OutOfLineLoadFloat final : public OutOfLineCode {
 public:
-  OutOfLineLoadFloat32(CodeGenerator* gen, SwVfpRegister result)
+  OutOfLineLoadFloat(CodeGenerator* gen, SwVfpRegister result)
      : OutOfLineCode(gen), result_(result) {}

  void Generate() final {
@ -1119,54 +1107,54 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
    }
    case kArmVcmpF32:
      if (instr->InputAt(1)->IsFPRegister()) {
-        __ VFPCompareAndSetFlags(i.InputFloat32Register(0),
-                                 i.InputFloat32Register(1));
+        __ VFPCompareAndSetFlags(i.InputFloatRegister(0),
+                                 i.InputFloatRegister(1));
      } else {
        DCHECK(instr->InputAt(1)->IsImmediate());
        // 0.0 is the only immediate supported by vcmp instructions.
        DCHECK(i.InputFloat32(1) == 0.0f);
-        __ VFPCompareAndSetFlags(i.InputFloat32Register(0), i.InputFloat32(1));
+        __ VFPCompareAndSetFlags(i.InputFloatRegister(0), i.InputFloat32(1));
      }
      DCHECK_EQ(SetCC, i.OutputSBit());
      break;
    case kArmVaddF32:
-      __ vadd(i.OutputFloat32Register(), i.InputFloat32Register(0),
-              i.InputFloat32Register(1));
+      __ vadd(i.OutputFloatRegister(), i.InputFloatRegister(0),
+              i.InputFloatRegister(1));
      DCHECK_EQ(LeaveCC, i.OutputSBit());
      break;
    case kArmVsubF32:
-      __ vsub(i.OutputFloat32Register(), i.InputFloat32Register(0),
-              i.InputFloat32Register(1));
+      __ vsub(i.OutputFloatRegister(), i.InputFloatRegister(0),
+              i.InputFloatRegister(1));
      DCHECK_EQ(LeaveCC, i.OutputSBit());
      break;
    case kArmVmulF32:
-      __ vmul(i.OutputFloat32Register(), i.InputFloat32Register(0),
-              i.InputFloat32Register(1));
+      __ vmul(i.OutputFloatRegister(), i.InputFloatRegister(0),
+              i.InputFloatRegister(1));
      DCHECK_EQ(LeaveCC, i.OutputSBit());
      break;
    case kArmVmlaF32:
-      __ vmla(i.OutputFloat32Register(), i.InputFloat32Register(1),
-              i.InputFloat32Register(2));
+      __ vmla(i.OutputFloatRegister(), i.InputFloatRegister(1),
+              i.InputFloatRegister(2));
      DCHECK_EQ(LeaveCC, i.OutputSBit());
      break;
    case kArmVmlsF32:
-      __ vmls(i.OutputFloat32Register(), i.InputFloat32Register(1),
-              i.InputFloat32Register(2));
+      __ vmls(i.OutputFloatRegister(), i.InputFloatRegister(1),
+              i.InputFloatRegister(2));
      DCHECK_EQ(LeaveCC, i.OutputSBit());
      break;
    case kArmVdivF32:
-      __ vdiv(i.OutputFloat32Register(), i.InputFloat32Register(0),
-              i.InputFloat32Register(1));
+      __ vdiv(i.OutputFloatRegister(), i.InputFloatRegister(0),
+              i.InputFloatRegister(1));
      DCHECK_EQ(LeaveCC, i.OutputSBit());
      break;
    case kArmVsqrtF32:
-      __ vsqrt(i.OutputFloat32Register(), i.InputFloat32Register(0));
+      __ vsqrt(i.OutputFloatRegister(), i.InputFloatRegister(0));
      break;
    case kArmVabsF32:
-      __ vabs(i.OutputFloat32Register(), i.InputFloat32Register(0));
+      __ vabs(i.OutputFloatRegister(), i.InputFloatRegister(0));
      break;
    case kArmVnegF32:
-      __ vneg(i.OutputFloat32Register(), i.InputFloat32Register(0));
+      __ vneg(i.OutputFloatRegister(), i.InputFloatRegister(0));
      break;
    case kArmVcmpF64:
      if (instr->InputAt(1)->IsFPRegister()) {
@ -1235,7 +1223,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
      break;
    case kArmVrintmF32: {
      CpuFeatureScope scope(masm(), ARMv8);
-      __ vrintm(i.OutputFloat32Register(), i.InputFloat32Register(0));
+      __ vrintm(i.OutputFloatRegister(), i.InputFloatRegister(0));
      break;
    }
    case kArmVrintmF64: {
@ -1245,7 +1233,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
    }
    case kArmVrintpF32: {
      CpuFeatureScope scope(masm(), ARMv8);
-      __ vrintp(i.OutputFloat32Register(), i.InputFloat32Register(0));
+      __ vrintp(i.OutputFloatRegister(), i.InputFloatRegister(0));
      break;
    }
    case kArmVrintpF64: {
@ -1255,7 +1243,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
    }
    case kArmVrintzF32: {
      CpuFeatureScope scope(masm(), ARMv8);
-      __ vrintz(i.OutputFloat32Register(), i.InputFloat32Register(0));
+      __ vrintz(i.OutputFloatRegister(), i.InputFloatRegister(0));
      break;
    }
    case kArmVrintzF64: {
@ -1270,7 +1258,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
    }
    case kArmVrintnF32: {
      CpuFeatureScope scope(masm(), ARMv8);
-      __ vrintn(i.OutputFloat32Register(), i.InputFloat32Register(0));
+      __ vrintn(i.OutputFloatRegister(), i.InputFloatRegister(0));
      break;
    }
    case kArmVrintnF64: {
@ -1279,26 +1267,26 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
      break;
    }
    case kArmVcvtF32F64: {
-      __ vcvt_f32_f64(i.OutputFloat32Register(), i.InputDoubleRegister(0));
+      __ vcvt_f32_f64(i.OutputFloatRegister(), i.InputDoubleRegister(0));
      DCHECK_EQ(LeaveCC, i.OutputSBit());
      break;
    }
    case kArmVcvtF64F32: {
-      __ vcvt_f64_f32(i.OutputDoubleRegister(), i.InputFloat32Register(0));
+      __ vcvt_f64_f32(i.OutputDoubleRegister(), i.InputFloatRegister(0));
      DCHECK_EQ(LeaveCC, i.OutputSBit());
      break;
    }
    case kArmVcvtF32S32: {
      SwVfpRegister scratch = kScratchDoubleReg.low();
      __ vmov(scratch, i.InputRegister(0));
-      __ vcvt_f32_s32(i.OutputFloat32Register(), scratch);
+      __ vcvt_f32_s32(i.OutputFloatRegister(), scratch);
      DCHECK_EQ(LeaveCC, i.OutputSBit());
      break;
    }
    case kArmVcvtF32U32: {
      SwVfpRegister scratch = kScratchDoubleReg.low();
      __ vmov(scratch, i.InputRegister(0));
-      __ vcvt_f32_u32(i.OutputFloat32Register(), scratch);
+      __ vcvt_f32_u32(i.OutputFloatRegister(), scratch);
      DCHECK_EQ(LeaveCC, i.OutputSBit());
      break;
    }
@ -1318,7 +1306,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
    }
    case kArmVcvtS32F32: {
      SwVfpRegister scratch = kScratchDoubleReg.low();
-      __ vcvt_s32_f32(scratch, i.InputFloat32Register(0));
+      __ vcvt_s32_f32(scratch, i.InputFloatRegister(0));
      __ vmov(i.OutputRegister(), scratch);
      // Avoid INT32_MAX as an overflow indicator and use INT32_MIN instead,
      // because INT32_MIN allows easier out-of-bounds detection.
@ -1329,7 +1317,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
    }
    case kArmVcvtU32F32: {
      SwVfpRegister scratch = kScratchDoubleReg.low();
-      __ vcvt_u32_f32(scratch, i.InputFloat32Register(0));
+      __ vcvt_u32_f32(scratch, i.InputFloatRegister(0));
      __ vmov(i.OutputRegister(), scratch);
      // Avoid UINT32_MAX as an overflow indicator and use 0 instead,
      // because 0 allows easier out-of-bounds detection.
@ -1353,11 +1341,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
      break;
    }
    case kArmVmovU32F32:
-      __ vmov(i.OutputRegister(), i.InputFloat32Register(0));
+      __ vmov(i.OutputRegister(), i.InputFloatRegister(0));
      DCHECK_EQ(LeaveCC, i.OutputSBit());
      break;
    case kArmVmovF32U32:
-      __ vmov(i.OutputFloat32Register(), i.InputRegister(0));
+      __ vmov(i.OutputFloatRegister(), i.InputRegister(0));
      DCHECK_EQ(LeaveCC, i.OutputSBit());
      break;
    case kArmVmovLowU32F64:
@ -1415,12 +1403,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
      DCHECK_EQ(LeaveCC, i.OutputSBit());
      break;
    case kArmVldrF32: {
-      __ vldr(i.OutputFloat32Register(), i.InputOffset());
+      __ vldr(i.OutputFloatRegister(), i.InputOffset());
      DCHECK_EQ(LeaveCC, i.OutputSBit());
      break;
    }
    case kArmVstrF32:
-      __ vstr(i.InputFloat32Register(0), i.InputOffset(1));
+      __ vstr(i.InputFloatRegister(0), i.InputOffset(1));
      DCHECK_EQ(LeaveCC, i.OutputSBit());
      break;
    case kArmVldrF64:
@ -1432,9 +1420,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
      DCHECK_EQ(LeaveCC, i.OutputSBit());
      break;
    case kArmFloat32Max: {
-      SwVfpRegister result = i.OutputFloat32Register();
-      SwVfpRegister left = i.InputFloat32Register(0);
-      SwVfpRegister right = i.InputFloat32Register(1);
+      SwVfpRegister result = i.OutputFloatRegister();
+      SwVfpRegister left = i.InputFloatRegister(0);
+      SwVfpRegister right = i.InputFloatRegister(1);
      if (left.is(right)) {
        __ Move(result, left);
      } else {
@ -1460,9 +1448,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
      break;
    }
    case kArmFloat32Min: {
-      SwVfpRegister result = i.OutputFloat32Register();
-      SwVfpRegister left = i.InputFloat32Register(0);
-      SwVfpRegister right = i.InputFloat32Register(1);
+      SwVfpRegister result = i.OutputFloatRegister();
+      SwVfpRegister left = i.InputFloatRegister(0);
+      SwVfpRegister right = i.InputFloatRegister(1);
      if (left.is(right)) {
        __ Move(result, left);
      } else {
@ -1501,7 +1489,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
          frame_access_state()->IncreaseSPDelta(kDoubleSize / kPointerSize);
        } else {
          DCHECK_EQ(MachineRepresentation::kFloat32, op->representation());
-          __ vpush(i.InputFloat32Register(0));
+          __ vpush(i.InputFloatRegister(0));
          frame_access_state()->IncreaseSPDelta(1);
        }
      } else {
@ -1532,7 +1520,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
      ASSEMBLE_CHECKED_LOAD_INTEGER(ldr);
      break;
    case kCheckedLoadFloat32:
-      ASSEMBLE_CHECKED_LOAD_FP(Float32);
+      ASSEMBLE_CHECKED_LOAD_FP(Float);
      break;
    case kCheckedLoadFloat64:
      ASSEMBLE_CHECKED_LOAD_FP(Double);
@ -1547,7 +1535,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
      ASSEMBLE_CHECKED_STORE_INTEGER(str);
      break;
    case kCheckedStoreFloat32:
-      ASSEMBLE_CHECKED_STORE_FP(Float32);
+      ASSEMBLE_CHECKED_STORE_FP(Float);
      break;
    case kCheckedStoreFloat64:
      ASSEMBLE_CHECKED_STORE_FP(Double);
@ -1789,7 +1777,6 @@ void CodeGenerator::AssembleReturn() {
  __ Ret(pop_count);
 }

-
 void CodeGenerator::AssembleMove(InstructionOperand* source,
                                 InstructionOperand* destination) {
  ArmOperandConverter g(this, nullptr);
@ -1858,12 +1845,12 @@ void CodeGenerator::AssembleMove(InstructionOperand* source,
      }
      if (destination->IsStackSlot()) __ str(dst, g.ToMemOperand(destination));
    } else if (src.type() == Constant::kFloat32) {
-      if (destination->IsFPStackSlot()) {
+      if (destination->IsFloatStackSlot()) {
        MemOperand dst = g.ToMemOperand(destination);
        __ mov(ip, Operand(bit_cast<int32_t>(src.ToFloat32())));
        __ str(ip, dst);
      } else {
-        SwVfpRegister dst = g.ToFloat32Register(destination);
+        SwVfpRegister dst = g.ToFloatRegister(destination);
        __ vmov(dst, src.ToFloat32());
      }
    } else {
@ -1872,28 +1859,60 @@ void CodeGenerator::AssembleMove(InstructionOperand* source,
                              ? g.ToDoubleRegister(destination)
                              : kScratchDoubleReg;
      __ vmov(dst, src.ToFloat64(), kScratchReg);
-      if (destination->IsFPStackSlot()) {
+      if (destination->IsDoubleStackSlot()) {
        __ vstr(dst, g.ToMemOperand(destination));
      }
    }
  } else if (source->IsFPRegister()) {
-    DwVfpRegister src = g.ToDoubleRegister(source);
-    if (destination->IsFPRegister()) {
-      DwVfpRegister dst = g.ToDoubleRegister(destination);
-      __ Move(dst, src);
+    MachineRepresentation rep = LocationOperand::cast(source)->representation();
+    if (rep == MachineRepresentation::kFloat64) {
+      DwVfpRegister src = g.ToDoubleRegister(source);
+      if (destination->IsDoubleRegister()) {
+        DwVfpRegister dst = g.ToDoubleRegister(destination);
+        __ Move(dst, src);
+      } else {
+        DCHECK(destination->IsDoubleStackSlot());
+        __ vstr(src, g.ToMemOperand(destination));
+      }
    } else {
-      DCHECK(destination->IsFPStackSlot());
-      __ vstr(src, g.ToMemOperand(destination));
+      DCHECK_EQ(MachineRepresentation::kFloat32, rep);
+      // GapResolver may give us reg codes that don't map to actual s-registers.
+      // Generate code to work around those cases.
+      int src_code = LocationOperand::cast(source)->register_code();
+      if (destination->IsFloatRegister()) {
+        int dst_code = LocationOperand::cast(destination)->register_code();
+        __ VmovExtended(dst_code, src_code, kScratchReg);
+      } else {
+        DCHECK(destination->IsFloatStackSlot());
+        __ VmovExtended(g.ToMemOperand(destination), src_code, kScratchReg);
+      }
    }
  } else if (source->IsFPStackSlot()) {
    MemOperand src = g.ToMemOperand(source);
+    MachineRepresentation rep =
+        LocationOperand::cast(destination)->representation();
    if (destination->IsFPRegister()) {
+      if (rep == MachineRepresentation::kFloat64) {
        __ vldr(g.ToDoubleRegister(destination), src);
+      } else {
+        DCHECK_EQ(MachineRepresentation::kFloat32, rep);
+        // GapResolver may give us reg codes that don't map to actual
+        // s-registers. Generate code to work around those cases.
+        int dst_code = LocationOperand::cast(destination)->register_code();
+        __ VmovExtended(dst_code, src, kScratchReg);
+      }
    } else {
      DCHECK(destination->IsFPStackSlot());
+      if (rep == MachineRepresentation::kFloat64) {
        DwVfpRegister temp = kScratchDoubleReg;
        __ vldr(temp, src);
        __ vstr(temp, g.ToMemOperand(destination));
+      } else {
+        DCHECK_EQ(MachineRepresentation::kFloat32, rep);
+        SwVfpRegister temp = kScratchDoubleReg.low();
+        __ vldr(temp, src);
+        __ vstr(temp, g.ToMemOperand(destination));
+      }
    }
  } else {
    UNREACHABLE();
@ -1933,17 +1952,35 @@ void CodeGenerator::AssembleSwap(InstructionOperand* source,
    __ str(temp_0, dst);
    __ vstr(temp_1, src);
  } else if (source->IsFPRegister()) {
+    MachineRepresentation rep = LocationOperand::cast(source)->representation();
    LowDwVfpRegister temp = kScratchDoubleReg;
-    DwVfpRegister src = g.ToDoubleRegister(source);
-    if (destination->IsFPRegister()) {
-      DwVfpRegister dst = g.ToDoubleRegister(destination);
-      __ vswp(src, dst);
+    if (rep == MachineRepresentation::kFloat64) {
+      DwVfpRegister src = g.ToDoubleRegister(source);
+      if (destination->IsFPRegister()) {
+        DwVfpRegister dst = g.ToDoubleRegister(destination);
+        __ vswp(src, dst);
+      } else {
+        DCHECK(destination->IsFPStackSlot());
+        MemOperand dst = g.ToMemOperand(destination);
+        __ Move(temp, src);
+        __ vldr(src, dst);
+        __ vstr(temp, dst);
+      }
    } else {
-      DCHECK(destination->IsFPStackSlot());
-      MemOperand dst = g.ToMemOperand(destination);
-      __ Move(temp, src);
-      __ vldr(src, dst);
-      __ vstr(temp, dst);
+      DCHECK_EQ(MachineRepresentation::kFloat32, rep);
+      int src_code = LocationOperand::cast(source)->register_code();
+      if (destination->IsFPRegister()) {
+        int dst_code = LocationOperand::cast(destination)->register_code();
+        __ VmovExtended(temp.low().code(), src_code, kScratchReg);
+        __ VmovExtended(src_code, dst_code, kScratchReg);
+        __ VmovExtended(dst_code, temp.low().code(), kScratchReg);
+      } else {
+        DCHECK(destination->IsFPStackSlot());
+        MemOperand dst = g.ToMemOperand(destination);
+        __ VmovExtended(temp.low().code(), src_code, kScratchReg);
+        __ VmovExtended(src_code, dst, kScratchReg);
+        __ vstr(temp.low(), dst);
+      }
    }
  } else if (source->IsFPStackSlot()) {
    DCHECK(destination->IsFPStackSlot());
@ -1951,21 +1988,29 @@ void CodeGenerator::AssembleSwap(InstructionOperand* source,
    LowDwVfpRegister temp_1 = kScratchDoubleReg;
    MemOperand src0 = g.ToMemOperand(source);
    MemOperand dst0 = g.ToMemOperand(destination);
-    MemOperand src1(src0.rn(), src0.offset() + kPointerSize);
-    MemOperand dst1(dst0.rn(), dst0.offset() + kPointerSize);
-    __ vldr(temp_1, dst0);  // Save destination in temp_1.
-    __ ldr(temp_0, src0);   // Then use temp_0 to copy source to destination.
-    __ str(temp_0, dst0);
-    __ ldr(temp_0, src1);
-    __ str(temp_0, dst1);
-    __ vstr(temp_1, src0);
+    MachineRepresentation rep = LocationOperand::cast(source)->representation();
+    if (rep == MachineRepresentation::kFloat64) {
+      MemOperand src1(src0.rn(), src0.offset() + kPointerSize);
+      MemOperand dst1(dst0.rn(), dst0.offset() + kPointerSize);
+      __ vldr(temp_1, dst0);  // Save destination in temp_1.
+      __ ldr(temp_0, src0);   // Then use temp_0 to copy source to destination.
+      __ str(temp_0, dst0);
+      __ ldr(temp_0, src1);
+      __ str(temp_0, dst1);
+      __ vstr(temp_1, src0);
+    } else {
+      DCHECK_EQ(MachineRepresentation::kFloat32, rep);
+      __ vldr(temp_1.low(), dst0);  // Save destination in temp_1.
+      __ ldr(temp_0, src0);  // Then use temp_0 to copy source to destination.
+      __ str(temp_0, dst0);
+      __ vstr(temp_1.low(), src0);
+    }
  } else {
    // No other combinations are possible.
    UNREACHABLE();
  }
 }

-
 void CodeGenerator::AssembleJumpTable(Label** targets, size_t target_count) {
  // On 32-bit ARM we emit the jump tables inline.
  UNREACHABLE();
--- a/src/compiler/gap-resolver.cc
+++ b/src/compiler/gap-resolver.cc
@ -14,27 +14,124 @@ namespace compiler {

 namespace {

+#define REP_BIT(rep) (1 << static_cast<int>(rep))
+
+const int kFloat32Bit = REP_BIT(MachineRepresentation::kFloat32);
+const int kFloat64Bit = REP_BIT(MachineRepresentation::kFloat64);
+
 inline bool Blocks(MoveOperands* move, InstructionOperand destination) {
-  return move->Blocks(destination);
+  return !move->IsEliminated() && move->source().InterferesWith(destination);
 }

+// Splits a FP move between two location operands into the equivalent series of
+// moves between smaller sub-operands, e.g. a double move to two single moves.
+// This helps reduce the number of cycles that would normally occur under FP
+// aliasing, and makes swaps much easier to implement.
+MoveOperands* Split(MoveOperands* move, MachineRepresentation smaller_rep,
+                    ParallelMove* moves) {
+  DCHECK(!kSimpleFPAliasing);
+  // Splitting is only possible when the slot size is the same as float size.
+  DCHECK_EQ(kPointerSize, kFloatSize);
+  const LocationOperand& src_loc = LocationOperand::cast(move->source());
+  const LocationOperand& dst_loc = LocationOperand::cast(move->destination());
+  MachineRepresentation dst_rep = dst_loc.representation();
+  DCHECK_NE(smaller_rep, dst_rep);
+  auto src_kind = src_loc.location_kind();
+  auto dst_kind = dst_loc.location_kind();

-inline bool IsRedundant(MoveOperands* move) { return move->IsRedundant(); }
+  int aliases =
+      1 << (ElementSizeLog2Of(dst_rep) - ElementSizeLog2Of(smaller_rep));
+  int base = -1;
+  USE(base);
+  DCHECK_EQ(aliases, RegisterConfiguration::Turbofan()->GetAliases(
+                         dst_rep, 0, smaller_rep, &base));
+
+  int src_index = -1;
+  int slot_size = (1 << ElementSizeLog2Of(smaller_rep)) / kPointerSize;
+  int src_step = 1;
+  if (src_kind == LocationOperand::REGISTER) {
+    src_index = src_loc.register_code() * aliases;
+  } else {
+    src_index = src_loc.index();
+    // For operands that occuply multiple slots, the index refers to the last
+    // slot. On little-endian architectures, we start at the high slot and use a
+    // negative step so that register-to-slot moves are in the correct order.
+    src_step = -slot_size;
+  }
+  int dst_index = -1;
+  int dst_step = 1;
+  if (dst_kind == LocationOperand::REGISTER) {
+    dst_index = dst_loc.register_code() * aliases;
+  } else {
+    dst_index = dst_loc.index();
+    dst_step = -slot_size;
+  }
+
+  // Reuse 'move' for the first fragment. It is not pending.
+  move->set_source(AllocatedOperand(src_kind, smaller_rep, src_index));
+  move->set_destination(AllocatedOperand(dst_kind, smaller_rep, dst_index));
+  // Add the remaining fragment moves.
+  for (int i = 1; i < aliases; ++i) {
+    src_index += src_step;
+    dst_index += dst_step;
+    moves->AddMove(AllocatedOperand(src_kind, smaller_rep, src_index),
+                   AllocatedOperand(dst_kind, smaller_rep, dst_index));
+  }
+  // Return the first fragment.
+  return move;
+}

 }  // namespace

+void GapResolver::Resolve(ParallelMove* moves) {
+  // Clear redundant moves, and collect FP move representations if aliasing
+  // is non-simple.
+  int reps = 0;
+  for (size_t i = 0; i < moves->size();) {
+    MoveOperands* move = (*moves)[i];
+    if (move->IsRedundant()) {
+      (*moves)[i] = moves->back();
+      moves->pop_back();
+      continue;
+    }
+    i++;
+    if (!kSimpleFPAliasing && move->destination().IsFPRegister()) {
+      reps |=
+          REP_BIT(LocationOperand::cast(move->destination()).representation());
+    }
+  }

-void GapResolver::Resolve(ParallelMove* moves) const {
-  // Clear redundant moves.
-  auto it =
-      std::remove_if(moves->begin(), moves->end(), std::ptr_fun(IsRedundant));
-  moves->erase(it, moves->end());
-  for (MoveOperands* move : *moves) {
+  if (!kSimpleFPAliasing) {
+    if (reps && !base::bits::IsPowerOfTwo32(reps)) {
+      // Start with the smallest FP moves, so we never encounter smaller moves
+      // in the middle of a cycle of larger moves.
+      if ((reps & kFloat32Bit) != 0) {
+        split_rep_ = MachineRepresentation::kFloat32;
+        for (size_t i = 0; i < moves->size(); ++i) {
+          auto move = (*moves)[i];
+          if (!move->IsEliminated() && move->destination().IsFloatRegister())
+            PerformMove(moves, move);
+        }
+      }
+      if ((reps & kFloat64Bit) != 0) {
+        split_rep_ = MachineRepresentation::kFloat64;
+        for (size_t i = 0; i < moves->size(); ++i) {
+          auto move = (*moves)[i];
+          if (!move->IsEliminated() && move->destination().IsDoubleRegister())
+            PerformMove(moves, move);
+        }
+      }
+    }
+    split_rep_ = MachineRepresentation::kSimd128;
+  }
+
+  for (size_t i = 0; i < moves->size(); ++i) {
+    auto move = (*moves)[i];
    if (!move->IsEliminated()) PerformMove(moves, move);
  }
 }

-void GapResolver::PerformMove(ParallelMove* moves, MoveOperands* move) const {
+void GapResolver::PerformMove(ParallelMove* moves, MoveOperands* move) {
  // Each call to this function performs a move and deletes it from the move
  // graph.  We first recursively perform any move blocking this one.  We mark a
  // move as "pending" on entry to PerformMove in order to detect cycles in the
@ -45,15 +142,32 @@ void GapResolver::PerformMove(ParallelMove* moves, MoveOperands* move) const {

  // Clear this move's destination to indicate a pending move.  The actual
  // destination is saved on the side.
-  DCHECK(!move->source().IsInvalid());  // Or else it will look eliminated.
+  InstructionOperand source = move->source();
+  DCHECK(!source.IsInvalid());  // Or else it will look eliminated.
  InstructionOperand destination = move->destination();
  move->SetPending();

+  // We may need to split moves between FP locations differently.
+  bool is_fp_loc_move = !kSimpleFPAliasing && destination.IsFPLocationOperand();
+
  // Perform a depth-first traversal of the move graph to resolve dependencies.
  // Any unperformed, unpending move with a source the same as this one's
  // destination blocks this one so recursively perform all such moves.
-  for (MoveOperands* other : *moves) {
-    if (other->Blocks(destination) && !other->IsPending()) {
+  for (size_t i = 0; i < moves->size(); ++i) {
+    auto other = (*moves)[i];
+    if (other->IsEliminated()) continue;
+    if (other->IsPending()) continue;
+    if (other->source().InterferesWith(destination)) {
+      if (!kSimpleFPAliasing && is_fp_loc_move &&
+          LocationOperand::cast(other->source()).representation() >
+              split_rep_) {
+        // 'other' must also be an FP location move. Break it into fragments
+        // of the same size as 'move'. 'other' is set to one of the fragments,
+        // and the rest are appended to 'moves'.
+        other = Split(other, split_rep_, moves);
+        // 'other' may not block destination now.
+        if (!other->source().InterferesWith(destination)) continue;
+      }
      // Though PerformMove can change any source operand in the move graph,
      // this call cannot create a blocking move via a swap (this loop does not
      // miss any).  Assume there is a non-blocking move with source A and this
@ -67,18 +181,18 @@ void GapResolver::PerformMove(ParallelMove* moves, MoveOperands* move) const {
    }
  }

-  // We are about to resolve this move and don't need it marked as pending, so
-  // restore its destination.
-  move->set_destination(destination);
-
  // This move's source may have changed due to swaps to resolve cycles and so
  // it may now be the last move in the cycle.  If so remove it.
-  InstructionOperand source = move->source();
-  if (source.InterferesWith(destination)) {
+  source = move->source();
+  if (source.EqualsCanonicalized(destination)) {
    move->Eliminate();
    return;
  }

+  // We are about to resolve this move and don't need it marked as pending, so
+  // restore its destination.
+  move->set_destination(destination);
+
  // The move may be blocked on a (at most one) pending move, in which case we
  // have a cycle.  Search for such a blocking move and perform a swap to
  // resolve it.
@ -91,7 +205,6 @@ void GapResolver::PerformMove(ParallelMove* moves, MoveOperands* move) const {
    return;
  }

-  DCHECK((*blocker)->IsPending());
  // Ensure source is a register or both are stack slots, to limit swap cases.
  if (source.IsStackSlot() || source.IsFPStackSlot()) {
    std::swap(source, destination);
@ -99,14 +212,36 @@ void GapResolver::PerformMove(ParallelMove* moves, MoveOperands* move) const {
  assembler_->AssembleSwap(&source, &destination);
  move->Eliminate();

-  // Any unperformed (including pending) move with a source of either this
-  // move's source or destination needs to have their source changed to
-  // reflect the state of affairs after the swap.
-  for (MoveOperands* other : *moves) {
-    if (other->Blocks(source)) {
-      other->set_source(destination);
-    } else if (other->Blocks(destination)) {
-      other->set_source(source);
+  // Update outstanding moves whose source may now have been moved.
+  if (!kSimpleFPAliasing && is_fp_loc_move) {
+    // We may have to split larger moves.
+    for (size_t i = 0; i < moves->size(); ++i) {
+      auto other = (*moves)[i];
+      if (other->IsEliminated()) continue;
+      if (source.InterferesWith(other->source())) {
+        if (LocationOperand::cast(other->source()).representation() >
+            split_rep_) {
+          other = Split(other, split_rep_, moves);
+          if (!source.InterferesWith(other->source())) continue;
+        }
+        other->set_source(destination);
+      } else if (destination.InterferesWith(other->source())) {
+        if (LocationOperand::cast(other->source()).representation() >
+            split_rep_) {
+          other = Split(other, split_rep_, moves);
+          if (!destination.InterferesWith(other->source())) continue;
+        }
+        other->set_source(source);
+      }
+    }
+  } else {
+    for (auto other : *moves) {
+      if (other->IsEliminated()) continue;
+      if (source.EqualsCanonicalized(other->source())) {
+        other->set_source(destination);
+      } else if (destination.EqualsCanonicalized(other->source())) {
+        other->set_source(source);
+      }
    }
  }
 }
--- a/src/compiler/gap-resolver.h
+++ b/src/compiler/gap-resolver.h
@ -26,18 +26,24 @@ class GapResolver final {
                              InstructionOperand* destination) = 0;
  };

-  explicit GapResolver(Assembler* assembler) : assembler_(assembler) {}
+  explicit GapResolver(Assembler* assembler)
+      : assembler_(assembler), split_rep_(MachineRepresentation::kSimd128) {}

  // Resolve a set of parallel moves, emitting assembler instructions.
-  void Resolve(ParallelMove* parallel_move) const;
+  void Resolve(ParallelMove* parallel_move);

 private:
-  // Perform the given move, possibly requiring other moves to satisfy
-  // dependencies.
-  void PerformMove(ParallelMove* moves, MoveOperands* move) const;
+  // Performs the given move, possibly performing other moves to unblock the
+  // destination operand.
+  void PerformMove(ParallelMove* moves, MoveOperands* move);

  // Assembler used to emit moves and save registers.
  Assembler* const assembler_;
+
+  // While resolving moves, the largest FP representation that can be moved.
+  // Any larger moves must be split into an equivalent series of moves of this
+  // representation.
+  MachineRepresentation split_rep_;
 };

 }  // namespace compiler
--- a/src/compiler/instruction.cc
+++ b/src/compiler/instruction.cc
@ -64,8 +64,35 @@ FlagsCondition CommuteFlagsCondition(FlagsCondition condition) {
  return condition;
 }

-bool InstructionOperand::InterferesWith(const InstructionOperand& that) const {
-  return EqualsCanonicalized(that);
+bool InstructionOperand::InterferesWith(const InstructionOperand& other) const {
+  if (kSimpleFPAliasing || !this->IsFPLocationOperand() ||
+      !other.IsFPLocationOperand())
+    return EqualsCanonicalized(other);
+  // Aliasing is complex and both operands are fp locations.
+  const LocationOperand& loc = *LocationOperand::cast(this);
+  const LocationOperand& other_loc = LocationOperand::cast(other);
+  LocationOperand::LocationKind kind = loc.location_kind();
+  LocationOperand::LocationKind other_kind = other_loc.location_kind();
+  if (kind != other_kind) return false;
+  MachineRepresentation rep = loc.representation();
+  MachineRepresentation other_rep = other_loc.representation();
+  if (rep == other_rep) return EqualsCanonicalized(other);
+  if (kind == LocationOperand::REGISTER) {
+    // FP register-register interference.
+    return GetRegConfig()->AreAliases(rep, loc.register_code(), other_rep,
+                                      other_loc.register_code());
+  } else {
+    // FP slot-slot interference. Slots of different FP reps can alias because
+    // the gap resolver may break a move into 2 or 4 equivalent smaller moves.
+    DCHECK_EQ(LocationOperand::STACK_SLOT, kind);
+    int index_hi = loc.index();
+    int index_lo = index_hi - (1 << ElementSizeLog2Of(rep)) / kPointerSize + 1;
+    int other_index_hi = other_loc.index();
+    int other_index_lo =
+        other_index_hi - (1 << ElementSizeLog2Of(other_rep)) / kPointerSize + 1;
+    return other_index_hi >= index_lo && index_hi >= other_index_lo;
+  }
+  return false;
 }

 void InstructionOperand::Print(const RegisterConfiguration* config) const {
@ -232,28 +259,31 @@ bool ParallelMove::IsRedundant() const {
  return true;
 }

-
-MoveOperands* ParallelMove::PrepareInsertAfter(MoveOperands* move) const {
+void ParallelMove::PrepareInsertAfter(
+    MoveOperands* move, ZoneVector<MoveOperands*>* to_eliminate) const {
+  bool no_aliasing =
+      kSimpleFPAliasing || !move->destination().IsFPLocationOperand();
  MoveOperands* replacement = nullptr;
-  MoveOperands* to_eliminate = nullptr;
+  MoveOperands* eliminated = nullptr;
  for (MoveOperands* curr : *this) {
    if (curr->IsEliminated()) continue;
    if (curr->destination().EqualsCanonicalized(move->source())) {
+      // We must replace move's source with curr's destination in order to
+      // insert it into this ParallelMove.
      DCHECK(!replacement);
      replacement = curr;
-      if (to_eliminate != nullptr) break;
-    } else if (curr->destination().EqualsCanonicalized(move->destination())) {
-      DCHECK(!to_eliminate);
-      to_eliminate = curr;
-      if (replacement != nullptr) break;
+      if (no_aliasing && eliminated != nullptr) break;
+    } else if (curr->destination().InterferesWith(move->destination())) {
+      // We can eliminate curr, since move overwrites at least a part of its
+      // destination, implying its value is no longer live.
+      eliminated = curr;
+      to_eliminate->push_back(curr);
+      if (no_aliasing && replacement != nullptr) break;
    }
  }
-  DCHECK_IMPLIES(replacement == to_eliminate, replacement == nullptr);
  if (replacement != nullptr) move->set_source(replacement->source());
-  return to_eliminate;
 }

-
 ExplicitOperand::ExplicitOperand(LocationKind kind, MachineRepresentation rep,
                                 int index)
    : LocationOperand(EXPLICIT, kind, rep, index) {
--- a/src/compiler/instruction.h
+++ b/src/compiler/instruction.h
@ -28,8 +28,7 @@ namespace compiler {
 // Forward declarations.
 class Schedule;

-
-class InstructionOperand {
+class V8_EXPORT_PRIVATE InstructionOperand {
 public:
  static const int kInvalidVirtualRegister = -1;

@ -119,7 +118,7 @@ class InstructionOperand {
    return this->GetCanonicalizedValue() < that.GetCanonicalizedValue();
  }

-  bool InterferesWith(const InstructionOperand& that) const;
+  bool InterferesWith(const InstructionOperand& other) const;

  // APIs to aid debugging. For general-stream APIs, use operator<<
  void Print(const RegisterConfiguration* config) const;
@ -641,8 +640,14 @@ uint64_t InstructionOperand::GetCanonicalizedValue() const {
  if (IsAnyLocationOperand()) {
    MachineRepresentation canonical = MachineRepresentation::kNone;
    if (IsFPRegister()) {
-      // We treat all FP register operands the same for simple aliasing.
-      canonical = MachineRepresentation::kFloat64;
+      if (kSimpleFPAliasing) {
+        // We treat all FP register operands the same for simple aliasing.
+        canonical = MachineRepresentation::kFloat64;
+      } else {
+        // We need to distinguish FP register operands of different reps when
+        // aliasing is not simple (e.g. ARM).
+        canonical = LocationOperand::cast(this)->representation();
+      }
    }
    return InstructionOperand::KindField::update(
        LocationOperand::RepresentationField::update(this->value_, canonical),
@ -659,8 +664,8 @@ struct CompareOperandModuloType {
  }
 };

-
-class MoveOperands final : public ZoneObject {
+class V8_EXPORT_PRIVATE MoveOperands final
+    : public NON_EXPORTED_BASE(ZoneObject) {
 public:
  MoveOperands(const InstructionOperand& source,
               const InstructionOperand& destination)
@ -685,11 +690,6 @@ class MoveOperands final : public ZoneObject {
  }
  void SetPending() { destination_ = InstructionOperand(); }

-  // True if this move is a move into the given destination operand.
-  bool Blocks(const InstructionOperand& destination) const {
-    return !IsEliminated() && source().InterferesWith(destination);
-  }
-
  // A move is redundant if it's been eliminated or if its source and
  // destination are the same.
  bool IsRedundant() const {
@ -724,8 +724,9 @@ struct PrintableMoveOperands {

 std::ostream& operator<<(std::ostream& os, const PrintableMoveOperands& mo);

-
-class ParallelMove final : public ZoneVector<MoveOperands*>, public ZoneObject {
+class V8_EXPORT_PRIVATE ParallelMove final
+    : public NON_EXPORTED_BASE(ZoneVector<MoveOperands *>),
+      public NON_EXPORTED_BASE(ZoneObject) {
 public:
  explicit ParallelMove(Zone* zone) : ZoneVector<MoveOperands*>(zone) {
    reserve(4);
@ -748,9 +749,10 @@ class ParallelMove final : public ZoneVector<MoveOperands*>, public ZoneObject {
  bool IsRedundant() const;

  // Prepare this ParallelMove to insert move as if it happened in a subsequent
-  // ParallelMove.  move->source() may be changed.  The MoveOperand returned
-  // must be Eliminated.
-  MoveOperands* PrepareInsertAfter(MoveOperands* move) const;
+  // ParallelMove.  move->source() may be changed.  Any MoveOperands added to
+  // to_eliminate must be Eliminated.
+  void PrepareInsertAfter(MoveOperands* move,
+                          ZoneVector<MoveOperands*>* to_eliminate) const;

 private:
  DISALLOW_COPY_AND_ASSIGN(ParallelMove);
--- a/src/compiler/move-optimizer.cc
+++ b/src/compiler/move-optimizer.cc
@ -25,11 +25,79 @@ struct MoveKeyCompare {
 };

 typedef ZoneMap<MoveKey, unsigned, MoveKeyCompare> MoveMap;
-typedef ZoneSet<InstructionOperand, CompareOperandModuloType> OperandSet;

-bool Blocks(const OperandSet& set, const InstructionOperand& operand) {
-  return set.find(operand) != set.end();
-}
+class OperandSet {
+ public:
+  explicit OperandSet(Zone* zone) : set_(zone), fp_reps_(0) {}
+
+  void InsertOp(const InstructionOperand& op) {
+    set_.insert(op);
+    if (!kSimpleFPAliasing && op.IsFPRegister())
+      fp_reps_ |= RepBit(LocationOperand::cast(op).representation());
+  }
+
+  bool ContainsOpOrAlias(const InstructionOperand& op) const {
+    if (set_.find(op) != set_.end()) return true;
+
+    if (!kSimpleFPAliasing && op.IsFPRegister()) {
+      // Platforms where FP registers have complex aliasing need extra checks.
+      const LocationOperand& loc = LocationOperand::cast(op);
+      MachineRepresentation rep = loc.representation();
+      // If haven't encountered mixed rep FP registers, skip the extra checks.
+      if (!HasMixedFPReps(fp_reps_ | RepBit(rep))) return false;
+
+      // Check register against aliasing registers of other FP representations.
+      MachineRepresentation other_rep1, other_rep2;
+      switch (rep) {
+        case MachineRepresentation::kFloat32:
+          other_rep1 = MachineRepresentation::kFloat64;
+          other_rep2 = MachineRepresentation::kSimd128;
+          break;
+        case MachineRepresentation::kFloat64:
+          other_rep1 = MachineRepresentation::kFloat32;
+          other_rep2 = MachineRepresentation::kSimd128;
+          break;
+        case MachineRepresentation::kSimd128:
+          other_rep1 = MachineRepresentation::kFloat32;
+          other_rep2 = MachineRepresentation::kFloat64;
+          break;
+        default:
+          UNREACHABLE();
+          break;
+      }
+      const RegisterConfiguration* config = RegisterConfiguration::Turbofan();
+      int base = -1;
+      int aliases =
+          config->GetAliases(rep, loc.register_code(), other_rep1, &base);
+      DCHECK(aliases > 0 || (aliases == 0 && base == -1));
+      while (aliases--) {
+        if (set_.find(AllocatedOperand(LocationOperand::REGISTER, other_rep1,
+                                       base + aliases)) != set_.end())
+          return true;
+      }
+      aliases = config->GetAliases(rep, loc.register_code(), other_rep2, &base);
+      DCHECK(aliases > 0 || (aliases == 0 && base == -1));
+      while (aliases--) {
+        if (set_.find(AllocatedOperand(LocationOperand::REGISTER, other_rep2,
+                                       base + aliases)) != set_.end())
+          return true;
+      }
+    }
+    return false;
+  }
+
+ private:
+  static int RepBit(MachineRepresentation rep) {
+    return 1 << static_cast<int>(rep);
+  }
+
+  static bool HasMixedFPReps(int reps) {
+    return reps && !base::bits::IsPowerOfTwo32(reps);
+  }
+
+  ZoneSet<InstructionOperand, CompareOperandModuloType> set_;
+  int fp_reps_;
+};

 int FindFirstNonEmptySlot(const Instruction* instr) {
  int i = Instruction::FIRST_GAP_POSITION;
@ -98,21 +166,21 @@ void MoveOptimizer::RemoveClobberedDestinations(Instruction* instruction) {
  // Outputs and temps are treated together as potentially clobbering a
  // destination operand.
  for (size_t i = 0; i < instruction->OutputCount(); ++i) {
-    outputs.insert(*instruction->OutputAt(i));
+    outputs.InsertOp(*instruction->OutputAt(i));
  }
  for (size_t i = 0; i < instruction->TempCount(); ++i) {
-    outputs.insert(*instruction->TempAt(i));
+    outputs.InsertOp(*instruction->TempAt(i));
  }

  // Input operands block elisions.
  for (size_t i = 0; i < instruction->InputCount(); ++i) {
-    inputs.insert(*instruction->InputAt(i));
+    inputs.InsertOp(*instruction->InputAt(i));
  }

  // Elide moves made redundant by the instruction.
  for (MoveOperands* move : *moves) {
-    if (outputs.find(move->destination()) != outputs.end() &&
-        inputs.find(move->destination()) == inputs.end()) {
+    if (outputs.ContainsOpOrAlias(move->destination()) &&
+        !inputs.ContainsOpOrAlias(move->destination())) {
      move->Eliminate();
    }
  }
@ -121,7 +189,7 @@ void MoveOptimizer::RemoveClobberedDestinations(Instruction* instruction) {
  // the one for its input.
  if (instruction->IsRet() || instruction->IsTailCall()) {
    for (MoveOperands* move : *moves) {
-      if (inputs.find(move->destination()) == inputs.end()) {
+      if (!inputs.ContainsOpOrAlias(move->destination())) {
        move->Eliminate();
      }
    }
@ -140,7 +208,7 @@ void MoveOptimizer::MigrateMoves(Instruction* to, Instruction* from) {
  // If an operand is an input to the instruction, we cannot move assignments
  // where it appears on the LHS.
  for (size_t i = 0; i < from->InputCount(); ++i) {
-    dst_cant_be.insert(*from->InputAt(i));
+    dst_cant_be.InsertOp(*from->InputAt(i));
  }
  // If an operand is output to the instruction, we cannot move assignments
  // where it appears on the RHS, because we would lose its value before the
@ -149,10 +217,10 @@ void MoveOptimizer::MigrateMoves(Instruction* to, Instruction* from) {
  // The output can't appear on the LHS because we performed
  // RemoveClobberedDestinations for the "from" instruction.
  for (size_t i = 0; i < from->OutputCount(); ++i) {
-    src_cant_be.insert(*from->OutputAt(i));
+    src_cant_be.InsertOp(*from->OutputAt(i));
  }
  for (size_t i = 0; i < from->TempCount(); ++i) {
-    src_cant_be.insert(*from->TempAt(i));
+    src_cant_be.InsertOp(*from->TempAt(i));
  }
  for (MoveOperands* move : *from_moves) {
    if (move->IsRedundant()) continue;
@ -160,7 +228,7 @@ void MoveOptimizer::MigrateMoves(Instruction* to, Instruction* from) {
    // move "z = dest", because z would become y rather than "V".
    // We assume CompressMoves has happened before this, which means we don't
    // have more than one assignment to dest.
-    src_cant_be.insert(move->destination());
+    src_cant_be.InsertOp(move->destination());
  }

  ZoneSet<MoveKey, MoveKeyCompare> move_candidates(local_zone());
@ -168,7 +236,7 @@ void MoveOptimizer::MigrateMoves(Instruction* to, Instruction* from) {
  // destination operands are eligible for being moved down.
  for (MoveOperands* move : *from_moves) {
    if (move->IsRedundant()) continue;
-    if (!Blocks(dst_cant_be, move->destination())) {
+    if (!dst_cant_be.ContainsOpOrAlias(move->destination())) {
      MoveKey key = {move->source(), move->destination()};
      move_candidates.insert(key);
    }
@ -183,8 +251,8 @@ void MoveOptimizer::MigrateMoves(Instruction* to, Instruction* from) {
      auto current = iter;
      ++iter;
      InstructionOperand src = current->source;
-      if (Blocks(src_cant_be, src)) {
-        src_cant_be.insert(current->destination);
+      if (src_cant_be.ContainsOpOrAlias(src)) {
+        src_cant_be.InsertOp(current->destination);
        move_candidates.erase(current);
        changed = true;
      }
@ -223,8 +291,7 @@ void MoveOptimizer::CompressMoves(ParallelMove* left, MoveOpVector* right) {
    // merging the two gaps.
    for (MoveOperands* move : *right) {
      if (move->IsRedundant()) continue;
-      MoveOperands* to_eliminate = left->PrepareInsertAfter(move);
-      if (to_eliminate != nullptr) eliminated.push_back(to_eliminate);
+      left->PrepareInsertAfter(move, &eliminated);
    }
    // Eliminate dead moves.
    for (MoveOperands* to_eliminate : eliminated) {
@ -360,7 +427,7 @@ void MoveOptimizer::OptimizeMerge(InstructionBlock* block) {
        // there are such moves, we could move them, but the destination of the
        // moves staying behind can't appear as a source of a common move,
        // because the move staying behind will clobber this destination.
-        conflicting_srcs.insert(dest);
+        conflicting_srcs.InsertOp(dest);
        move_map.erase(current);
      }
    }
@ -374,9 +441,8 @@ void MoveOptimizer::OptimizeMerge(InstructionBlock* block) {
        auto current = iter;
        ++iter;
        DCHECK_EQ(block->PredecessorCount(), current->second);
-        if (conflicting_srcs.find(current->first.source) !=
-            conflicting_srcs.end()) {
-          conflicting_srcs.insert(current->first.destination);
+        if (conflicting_srcs.ContainsOpOrAlias(current->first.source)) {
+          conflicting_srcs.InsertOp(current->first.destination);
          move_map.erase(current);
          changed = true;
        }
--- a/src/compiler/register-allocator.cc
+++ b/src/compiler/register-allocator.cc
@ -33,7 +33,7 @@ int GetRegisterCount(const RegisterConfiguration* cfg, RegisterKind kind) {

 int GetAllocatableRegisterCount(const RegisterConfiguration* cfg,
                                RegisterKind kind) {
-  return kind == FP_REGISTERS ? cfg->num_allocatable_aliased_double_registers()
+  return kind == FP_REGISTERS ? cfg->num_allocatable_double_registers()
                              : cfg->num_allocatable_general_registers();
 }

@ -74,14 +74,8 @@ int GetByteWidth(MachineRepresentation rep) {
    case MachineRepresentation::kTaggedSigned:
    case MachineRepresentation::kTaggedPointer:
    case MachineRepresentation::kTagged:
-      return kPointerSize;
    case MachineRepresentation::kFloat32:
-// TODO(bbudge) Eliminate this when FP register aliasing works.
-#if V8_TARGET_ARCH_ARM
-      return kDoubleSize;
-#else
      return kPointerSize;
-#endif
    case MachineRepresentation::kWord64:
    case MachineRepresentation::kFloat64:
      return kDoubleSize;
@ -498,6 +492,12 @@ UsePosition* LiveRange::NextUsePositionRegisterIsBeneficial(
  return pos;
 }

+LifetimePosition LiveRange::NextLifetimePositionRegisterIsBeneficial(
+    const LifetimePosition& start) const {
+  UsePosition* next_use = NextUsePositionRegisterIsBeneficial(start);
+  if (next_use == nullptr) return End();
+  return next_use->pos();
+}

 UsePosition* LiveRange::PreviousUsePositionRegisterIsBeneficial(
    LifetimePosition start) const {
@ -1360,8 +1360,12 @@ RegisterAllocationData::RegisterAllocationData(
                   allocation_zone()),
      fixed_live_ranges_(this->config()->num_general_registers(), nullptr,
                         allocation_zone()),
+      fixed_float_live_ranges_(this->config()->num_float_registers(), nullptr,
+                               allocation_zone()),
      fixed_double_live_ranges_(this->config()->num_double_registers(), nullptr,
                                allocation_zone()),
+      fixed_simd128_live_ranges_(this->config()->num_simd128_registers(),
+                                 nullptr, allocation_zone()),
      spill_ranges_(code->VirtualRegisterCount(), nullptr, allocation_zone()),
      delayed_references_(allocation_zone()),
      assigned_registers_(nullptr),
@ -1539,8 +1543,21 @@ void RegisterAllocationData::MarkAllocated(MachineRepresentation rep,
                                           int index) {
  switch (rep) {
    case MachineRepresentation::kFloat32:
-    case MachineRepresentation::kFloat64:
    case MachineRepresentation::kSimd128:
+      if (kSimpleFPAliasing) {
+        assigned_double_registers_->Add(index);
+      } else {
+        int alias_base_index = -1;
+        int aliases = config()->GetAliases(
+            rep, index, MachineRepresentation::kFloat64, &alias_base_index);
+        DCHECK(aliases > 0 || (aliases == 0 && alias_base_index == -1));
+        while (aliases--) {
+          int aliased_reg = alias_base_index + aliases;
+          assigned_double_registers_->Add(aliased_reg);
+        }
+      }
+      break;
+    case MachineRepresentation::kFloat64:
      assigned_double_registers_->Add(index);
      break;
    default:
@ -1867,7 +1884,11 @@ int LiveRangeBuilder::FixedFPLiveRangeID(int index, MachineRepresentation rep) {
  int result = -index - 1;
  switch (rep) {
    case MachineRepresentation::kSimd128:
+      result -= config()->num_float_registers();
+    // Fall through.
    case MachineRepresentation::kFloat32:
+      result -= config()->num_double_registers();
+    // Fall through.
    case MachineRepresentation::kFloat64:
      result -= config()->num_general_registers();
      break;
@ -1894,25 +1915,35 @@ TopLevelLiveRange* LiveRangeBuilder::FixedLiveRangeFor(int index) {

 TopLevelLiveRange* LiveRangeBuilder::FixedFPLiveRangeFor(
    int index, MachineRepresentation rep) {
-  TopLevelLiveRange* result = nullptr;
+  int num_regs = -1;
+  ZoneVector<TopLevelLiveRange*>* live_ranges = nullptr;
  switch (rep) {
    case MachineRepresentation::kFloat32:
+      num_regs = config()->num_float_registers();
+      live_ranges = &data()->fixed_float_live_ranges();
+      break;
    case MachineRepresentation::kFloat64:
+      num_regs = config()->num_double_registers();
+      live_ranges = &data()->fixed_double_live_ranges();
+      break;
    case MachineRepresentation::kSimd128:
-      DCHECK(index < config()->num_double_registers());
-      result = data()->fixed_double_live_ranges()[index];
-      if (result == nullptr) {
-        result = data()->NewLiveRange(FixedFPLiveRangeID(index, rep), rep);
-        DCHECK(result->IsFixed());
-        result->set_assigned_register(index);
-        data()->MarkAllocated(rep, index);
-        data()->fixed_double_live_ranges()[index] = result;
-      }
+      num_regs = config()->num_simd128_registers();
+      live_ranges = &data()->fixed_simd128_live_ranges();
      break;
    default:
      UNREACHABLE();
      break;
  }
+
+  DCHECK(index < num_regs);
+  TopLevelLiveRange* result = (*live_ranges)[index];
+  if (result == nullptr) {
+    result = data()->NewLiveRange(FixedFPLiveRangeID(index, rep), rep);
+    DCHECK(result->IsFixed());
+    result->set_assigned_register(index);
+    data()->MarkAllocated(rep, index);
+    (*live_ranges)[index] = result;
+  }
  return result;
 }

@ -2035,8 +2066,7 @@ void LiveRangeBuilder::ProcessInstructions(const InstructionBlock* block,
    }

    if (instr->ClobbersDoubleRegisters()) {
-      for (int i = 0; i < config()->num_allocatable_aliased_double_registers();
-           ++i) {
+      for (int i = 0; i < config()->num_allocatable_double_registers(); ++i) {
        // Add a UseInterval for all DoubleRegisters. See comment above for
        // general registers.
        int code = config()->GetAllocatableDoubleCode(i);
@ -2045,6 +2075,26 @@ void LiveRangeBuilder::ProcessInstructions(const InstructionBlock* block,
        range->AddUseInterval(curr_position, curr_position.End(),
                              allocation_zone());
      }
+      // Clobber fixed float registers on archs with non-simple aliasing.
+      if (!kSimpleFPAliasing) {
+        for (int i = 0; i < config()->num_allocatable_float_registers(); ++i) {
+          // Add a UseInterval for all FloatRegisters. See comment above for
+          // general registers.
+          int code = config()->GetAllocatableFloatCode(i);
+          TopLevelLiveRange* range =
+              FixedFPLiveRangeFor(code, MachineRepresentation::kFloat32);
+          range->AddUseInterval(curr_position, curr_position.End(),
+                                allocation_zone());
+        }
+        for (int i = 0; i < config()->num_allocatable_simd128_registers();
+             ++i) {
+          int code = config()->GetAllocatableSimd128Code(i);
+          TopLevelLiveRange* range =
+              FixedFPLiveRangeFor(code, MachineRepresentation::kSimd128);
+          range->AddUseInterval(curr_position, curr_position.End(),
+                                allocation_zone());
+        }
+      }
    }

    for (size_t i = 0; i < instr->InputCount(); i++) {
@ -2690,9 +2740,15 @@ void LinearScanAllocator::AllocateRegisters() {
      if (current != nullptr) AddToInactive(current);
    }
  } else {
+    for (TopLevelLiveRange* current : data()->fixed_float_live_ranges()) {
+      if (current != nullptr) AddToInactive(current);
+    }
    for (TopLevelLiveRange* current : data()->fixed_double_live_ranges()) {
      if (current != nullptr) AddToInactive(current);
    }
+    for (TopLevelLiveRange* current : data()->fixed_simd128_live_ranges()) {
+      if (current != nullptr) AddToInactive(current);
+    }
  }

  while (!unhandled_live_ranges().empty()) {
@ -2873,9 +2929,32 @@ void LinearScanAllocator::InactiveToActive(LiveRange* range) {
        range->TopLevel()->vreg(), range->relative_id());
 }

+void LinearScanAllocator::GetFPRegisterSet(MachineRepresentation rep,
+                                           int* num_regs, int* num_codes,
+                                           const int** codes) const {
+  DCHECK(!kSimpleFPAliasing);
+  if (rep == MachineRepresentation::kFloat32) {
+    *num_regs = data()->config()->num_float_registers();
+    *num_codes = data()->config()->num_allocatable_float_registers();
+    *codes = data()->config()->allocatable_float_codes();
+  } else if (rep == MachineRepresentation::kSimd128) {
+    *num_regs = data()->config()->num_simd128_registers();
+    *num_codes = data()->config()->num_allocatable_simd128_registers();
+    *codes = data()->config()->allocatable_simd128_codes();
+  } else {
+    UNREACHABLE();
+  }
+}
+
 void LinearScanAllocator::FindFreeRegistersForRange(
    LiveRange* range, Vector<LifetimePosition> positions) {
  int num_regs = num_registers();
+  int num_codes = num_allocatable_registers();
+  const int* codes = allocatable_register_codes();
+  MachineRepresentation rep = range->representation();
+  if (!kSimpleFPAliasing && (rep == MachineRepresentation::kFloat32 ||
+                             rep == MachineRepresentation::kSimd128))
+    GetFPRegisterSet(rep, &num_regs, &num_codes, &codes);
  DCHECK_GE(positions.length(), num_regs);

  for (int i = 0; i < num_regs; i++) {
@ -2884,9 +2963,20 @@ void LinearScanAllocator::FindFreeRegistersForRange(

  for (LiveRange* cur_active : active_live_ranges()) {
    int cur_reg = cur_active->assigned_register();
-    positions[cur_reg] = LifetimePosition::GapFromInstructionIndex(0);
-    TRACE("Register %s is free until pos %d (1)\n", RegisterName(cur_reg),
-          LifetimePosition::GapFromInstructionIndex(0).value());
+    if (kSimpleFPAliasing || mode() == GENERAL_REGISTERS) {
+      positions[cur_reg] = LifetimePosition::GapFromInstructionIndex(0);
+      TRACE("Register %s is free until pos %d (1)\n", RegisterName(cur_reg),
+            LifetimePosition::GapFromInstructionIndex(0).value());
+    } else {
+      int alias_base_index = -1;
+      int aliases = data()->config()->GetAliases(
+          cur_active->representation(), cur_reg, rep, &alias_base_index);
+      DCHECK(aliases > 0 || (aliases == 0 && alias_base_index == -1));
+      while (aliases--) {
+        int aliased_reg = alias_base_index + aliases;
+        positions[aliased_reg] = LifetimePosition::GapFromInstructionIndex(0);
+      }
+    }
  }

  for (LiveRange* cur_inactive : inactive_live_ranges()) {
@ -2894,9 +2984,20 @@ void LinearScanAllocator::FindFreeRegistersForRange(
    LifetimePosition next_intersection = cur_inactive->FirstIntersection(range);
    if (!next_intersection.IsValid()) continue;
    int cur_reg = cur_inactive->assigned_register();
-    positions[cur_reg] = Min(positions[cur_reg], next_intersection);
-    TRACE("Register %s is free until pos %d (2)\n", RegisterName(cur_reg),
-          Min(positions[cur_reg], next_intersection).value());
+    if (kSimpleFPAliasing || mode() == GENERAL_REGISTERS) {
+      positions[cur_reg] = Min(positions[cur_reg], next_intersection);
+      TRACE("Register %s is free until pos %d (2)\n", RegisterName(cur_reg),
+            Min(positions[cur_reg], next_intersection).value());
+    } else {
+      int alias_base_index = -1;
+      int aliases = data()->config()->GetAliases(
+          cur_inactive->representation(), cur_reg, rep, &alias_base_index);
+      DCHECK(aliases > 0 || (aliases == 0 && alias_base_index == -1));
+      while (aliases--) {
+        int aliased_reg = alias_base_index + aliases;
+        positions[aliased_reg] = Min(positions[aliased_reg], next_intersection);
+      }
+    }
  }
 }

@ -2977,8 +3078,14 @@ bool LinearScanAllocator::TryAllocatePreferredReg(

 bool LinearScanAllocator::TryAllocateFreeReg(
    LiveRange* current, const Vector<LifetimePosition>& free_until_pos) {
+  int num_regs = 0;  // used only for the call to GetFPRegisterSet.
  int num_codes = num_allocatable_registers();
  const int* codes = allocatable_register_codes();
+  MachineRepresentation rep = current->representation();
+  if (!kSimpleFPAliasing && (rep == MachineRepresentation::kFloat32 ||
+                             rep == MachineRepresentation::kSimd128))
+    GetFPRegisterSet(rep, &num_regs, &num_codes, &codes);
+
  DCHECK_GE(free_until_pos.length(), num_codes);

  // Find the register which stays free for the longest time.
@ -3026,6 +3133,10 @@ void LinearScanAllocator::AllocateBlockedReg(LiveRange* current) {
  int num_regs = num_registers();
  int num_codes = num_allocatable_registers();
  const int* codes = allocatable_register_codes();
+  MachineRepresentation rep = current->representation();
+  if (!kSimpleFPAliasing && (rep == MachineRepresentation::kFloat32 ||
+                             rep == MachineRepresentation::kSimd128))
+    GetFPRegisterSet(rep, &num_regs, &num_codes, &codes);

  LifetimePosition use_pos[RegisterConfiguration::kMaxFPRegisters];
  LifetimePosition block_pos[RegisterConfiguration::kMaxFPRegisters];
@ -3037,16 +3148,28 @@ void LinearScanAllocator::AllocateBlockedReg(LiveRange* current) {
    int cur_reg = range->assigned_register();
    bool is_fixed_or_cant_spill =
        range->TopLevel()->IsFixed() || !range->CanBeSpilled(current->Start());
-    if (is_fixed_or_cant_spill) {
-      block_pos[cur_reg] = use_pos[cur_reg] =
-          LifetimePosition::GapFromInstructionIndex(0);
-    } else {
-      UsePosition* next_use =
-          range->NextUsePositionRegisterIsBeneficial(current->Start());
-      if (next_use == nullptr) {
-        use_pos[cur_reg] = range->End();
+    if (kSimpleFPAliasing || mode() == GENERAL_REGISTERS) {
+      if (is_fixed_or_cant_spill) {
+        block_pos[cur_reg] = use_pos[cur_reg] =
+            LifetimePosition::GapFromInstructionIndex(0);
      } else {
-        use_pos[cur_reg] = next_use->pos();
+        use_pos[cur_reg] =
+            range->NextLifetimePositionRegisterIsBeneficial(current->Start());
+      }
+    } else {
+      int alias_base_index = -1;
+      int aliases = data()->config()->GetAliases(
+          range->representation(), cur_reg, rep, &alias_base_index);
+      DCHECK(aliases > 0 || (aliases == 0 && alias_base_index == -1));
+      while (aliases--) {
+        int aliased_reg = alias_base_index + aliases;
+        if (is_fixed_or_cant_spill) {
+          block_pos[aliased_reg] = use_pos[aliased_reg] =
+              LifetimePosition::GapFromInstructionIndex(0);
+        } else {
+          use_pos[aliased_reg] =
+              range->NextLifetimePositionRegisterIsBeneficial(current->Start());
+        }
      }
    }
  }
@ -3057,11 +3180,29 @@ void LinearScanAllocator::AllocateBlockedReg(LiveRange* current) {
    if (!next_intersection.IsValid()) continue;
    int cur_reg = range->assigned_register();
    bool is_fixed = range->TopLevel()->IsFixed();
-    if (is_fixed) {
-      block_pos[cur_reg] = Min(block_pos[cur_reg], next_intersection);
-      use_pos[cur_reg] = Min(block_pos[cur_reg], use_pos[cur_reg]);
+    if (kSimpleFPAliasing || mode() == GENERAL_REGISTERS) {
+      if (is_fixed) {
+        block_pos[cur_reg] = Min(block_pos[cur_reg], next_intersection);
+        use_pos[cur_reg] = Min(block_pos[cur_reg], use_pos[cur_reg]);
+      } else {
+        use_pos[cur_reg] = Min(use_pos[cur_reg], next_intersection);
+      }
    } else {
-      use_pos[cur_reg] = Min(use_pos[cur_reg], next_intersection);
+      int alias_base_index = -1;
+      int aliases = data()->config()->GetAliases(
+          range->representation(), cur_reg, rep, &alias_base_index);
+      DCHECK(aliases > 0 || (aliases == 0 && alias_base_index == -1));
+      while (aliases--) {
+        int aliased_reg = alias_base_index + aliases;
+        if (is_fixed) {
+          block_pos[aliased_reg] =
+              Min(block_pos[aliased_reg], next_intersection);
+          use_pos[aliased_reg] =
+              Min(block_pos[aliased_reg], use_pos[aliased_reg]);
+        } else {
+          use_pos[aliased_reg] = Min(use_pos[aliased_reg], next_intersection);
+        }
+      }
    }
  }

@ -3113,7 +3254,15 @@ void LinearScanAllocator::SplitAndSpillIntersecting(LiveRange* current) {
  LifetimePosition split_pos = current->Start();
  for (size_t i = 0; i < active_live_ranges().size(); ++i) {
    LiveRange* range = active_live_ranges()[i];
-    if (range->assigned_register() != reg) continue;
+    if (kSimpleFPAliasing || mode() == GENERAL_REGISTERS) {
+      if (range->assigned_register() != reg) continue;
+    } else {
+      if (!data()->config()->AreAliases(current->representation(), reg,
+                                        range->representation(),
+                                        range->assigned_register())) {
+        continue;
+      }
+    }

    UsePosition* next_pos = range->NextRegisterPosition(current->Start());
    LifetimePosition spill_pos = FindOptimalSpillingPos(range, split_pos);
@ -3140,7 +3289,14 @@ void LinearScanAllocator::SplitAndSpillIntersecting(LiveRange* current) {
    LiveRange* range = inactive_live_ranges()[i];
    DCHECK(range->End() > current->Start());
    if (range->TopLevel()->IsFixed()) continue;
-    if (range->assigned_register() != reg) continue;
+    if (kSimpleFPAliasing || mode() == GENERAL_REGISTERS) {
+      if (range->assigned_register() != reg) continue;
+    } else {
+      if (!data()->config()->AreAliases(current->representation(), reg,
+                                        range->representation(),
+                                        range->assigned_register()))
+        continue;
+    }

    LifetimePosition next_intersection = range->FirstIntersection(current);
    if (next_intersection.IsValid()) {
@ -3631,7 +3787,6 @@ int LiveRangeConnector::ResolveControlFlow(const InstructionBlock* block,
  return gap_index;
 }

-
 void LiveRangeConnector::ConnectRanges(Zone* local_zone) {
  DelayedInsertionMap delayed_insertion_map(local_zone);
  for (TopLevelLiveRange* top_range : data()->live_ranges()) {
@ -3719,9 +3874,8 @@ void LiveRangeConnector::ConnectRanges(Zone* local_zone) {
    // Gather all MoveOperands for a single ParallelMove.
    MoveOperands* move =
        new (code_zone()) MoveOperands(it->first.second, it->second);
-    MoveOperands* eliminate = moves->PrepareInsertAfter(move);
+    moves->PrepareInsertAfter(move, &to_eliminate);
    to_insert.push_back(move);
-    if (eliminate != nullptr) to_eliminate.push_back(eliminate);
  }
 }

--- a/src/compiler/register-allocator.h
+++ b/src/compiler/register-allocator.h
@ -357,6 +357,11 @@ class V8_EXPORT_PRIVATE LiveRange : public NON_EXPORTED_BASE(ZoneObject) {
  UsePosition* NextUsePositionRegisterIsBeneficial(
      LifetimePosition start) const;

+  // Returns lifetime position for which register is beneficial in this live
+  // range and which follows both start and last processed use position.
+  LifetimePosition NextLifetimePositionRegisterIsBeneficial(
+      const LifetimePosition& start) const;
+
  // Returns use position for which register is beneficial in this live
  // range and which precedes start.
  UsePosition* PreviousUsePositionRegisterIsBeneficial(
@ -773,12 +778,24 @@ class RegisterAllocationData final : public ZoneObject {
  ZoneVector<TopLevelLiveRange*>& fixed_live_ranges() {
    return fixed_live_ranges_;
  }
+  ZoneVector<TopLevelLiveRange*>& fixed_float_live_ranges() {
+    return fixed_float_live_ranges_;
+  }
+  const ZoneVector<TopLevelLiveRange*>& fixed_float_live_ranges() const {
+    return fixed_float_live_ranges_;
+  }
  ZoneVector<TopLevelLiveRange*>& fixed_double_live_ranges() {
    return fixed_double_live_ranges_;
  }
  const ZoneVector<TopLevelLiveRange*>& fixed_double_live_ranges() const {
    return fixed_double_live_ranges_;
  }
+  ZoneVector<TopLevelLiveRange*>& fixed_simd128_live_ranges() {
+    return fixed_simd128_live_ranges_;
+  }
+  const ZoneVector<TopLevelLiveRange*>& fixed_simd128_live_ranges() const {
+    return fixed_simd128_live_ranges_;
+  }
  ZoneVector<BitVector*>& live_in_sets() { return live_in_sets_; }
  ZoneVector<BitVector*>& live_out_sets() { return live_out_sets_; }
  ZoneVector<SpillRange*>& spill_ranges() { return spill_ranges_; }
@ -840,7 +857,9 @@ class RegisterAllocationData final : public ZoneObject {
  ZoneVector<BitVector*> live_out_sets_;
  ZoneVector<TopLevelLiveRange*> live_ranges_;
  ZoneVector<TopLevelLiveRange*> fixed_live_ranges_;
+  ZoneVector<TopLevelLiveRange*> fixed_float_live_ranges_;
  ZoneVector<TopLevelLiveRange*> fixed_double_live_ranges_;
+  ZoneVector<TopLevelLiveRange*> fixed_simd128_live_ranges_;
  ZoneVector<SpillRange*> spill_ranges_;
  DelayedReferences delayed_references_;
  BitVector* assigned_registers_;
@ -1058,6 +1077,8 @@ class LinearScanAllocator final : public RegisterAllocator {
                          const Vector<LifetimePosition>& free_until_pos);
  bool TryAllocatePreferredReg(LiveRange* range,
                               const Vector<LifetimePosition>& free_until_pos);
+  void GetFPRegisterSet(MachineRepresentation rep, int* num_regs,
+                        int* num_codes, const int** codes) const;
  void FindFreeRegistersForRange(LiveRange* range,
                                 Vector<LifetimePosition> free_until_pos);
  void ProcessCurrentRange(LiveRange* current);
--- a/src/compiler/wasm-linkage.cc
+++ b/src/compiler/wasm-linkage.cc
@ -178,6 +178,17 @@ struct Allocator {
      // Allocate a floating point register/stack location.
      if (fp_offset < fp_count) {
        DoubleRegister reg = fp_regs[fp_offset++];
+#if V8_TARGET_ARCH_ARM
+        // Allocate floats using a double register, but modify the code to
+        // reflect how ARM FP registers alias.
+        // TODO(bbudge) Modify wasm linkage to allow use of all float regs.
+        if (type == kAstF32) {
+          int float_reg_code = reg.code() * 2;
+          DCHECK(float_reg_code < RegisterConfiguration::kMaxFPRegisters);
+          return regloc(DoubleRegister::from_code(float_reg_code),
+                        MachineTypeFor(type));
+        }
+#endif
        return regloc(reg, MachineTypeFor(type));
      } else {
        int offset = -1 - stack_offset;
--- a/src/machine-type.h
+++ b/src/machine-type.h
@ -239,7 +239,7 @@ inline bool IsAnyTagged(MachineRepresentation rep) {
 }

 // Gets the log2 of the element size in bytes of the machine type.
-inline int ElementSizeLog2Of(MachineRepresentation rep) {
+V8_EXPORT_PRIVATE inline int ElementSizeLog2Of(MachineRepresentation rep) {
  switch (rep) {
    case MachineRepresentation::kBit:
    case MachineRepresentation::kWord8:
--- a/src/register-configuration.cc
+++ b/src/register-configuration.cc
@ -70,15 +70,12 @@ class ArchDefaultRegisterConfiguration : public RegisterConfiguration {
 #if V8_TARGET_ARCH_IA32
            kMaxAllocatableGeneralRegisterCount,
            kMaxAllocatableDoubleRegisterCount,
-            kMaxAllocatableDoubleRegisterCount,
 #elif V8_TARGET_ARCH_X87
            kMaxAllocatableGeneralRegisterCount,
            compiler == TURBOFAN ? 1 : kMaxAllocatableDoubleRegisterCount,
-            compiler == TURBOFAN ? 1 : kMaxAllocatableDoubleRegisterCount,
 #elif V8_TARGET_ARCH_X64
            kMaxAllocatableGeneralRegisterCount,
            kMaxAllocatableDoubleRegisterCount,
-            kMaxAllocatableDoubleRegisterCount,
 #elif V8_TARGET_ARCH_ARM
            FLAG_enable_embedded_constant_pool
                ? (kMaxAllocatableGeneralRegisterCount - 1)
@ -86,27 +83,21 @@ class ArchDefaultRegisterConfiguration : public RegisterConfiguration {
            CpuFeatures::IsSupported(VFP32DREGS)
                ? kMaxAllocatableDoubleRegisterCount
                : (ALLOCATABLE_NO_VFP32_DOUBLE_REGISTERS(REGISTER_COUNT) 0),
-            ALLOCATABLE_NO_VFP32_DOUBLE_REGISTERS(REGISTER_COUNT) 0,
 #elif V8_TARGET_ARCH_ARM64
            kMaxAllocatableGeneralRegisterCount,
            kMaxAllocatableDoubleRegisterCount,
-            kMaxAllocatableDoubleRegisterCount,
 #elif V8_TARGET_ARCH_MIPS
            kMaxAllocatableGeneralRegisterCount,
            kMaxAllocatableDoubleRegisterCount,
-            kMaxAllocatableDoubleRegisterCount,
 #elif V8_TARGET_ARCH_MIPS64
            kMaxAllocatableGeneralRegisterCount,
            kMaxAllocatableDoubleRegisterCount,
-            kMaxAllocatableDoubleRegisterCount,
 #elif V8_TARGET_ARCH_PPC
            kMaxAllocatableGeneralRegisterCount,
            kMaxAllocatableDoubleRegisterCount,
-            kMaxAllocatableDoubleRegisterCount,
 #elif V8_TARGET_ARCH_S390
            kMaxAllocatableGeneralRegisterCount,
            kMaxAllocatableDoubleRegisterCount,
-            kMaxAllocatableDoubleRegisterCount,
 #else
 #error Unsupported target architecture.
 #endif
@ -145,7 +136,6 @@ const RegisterConfiguration* RegisterConfiguration::Turbofan() {
 RegisterConfiguration::RegisterConfiguration(
    int num_general_registers, int num_double_registers,
    int num_allocatable_general_registers, int num_allocatable_double_registers,
-    int num_allocatable_aliased_double_registers,
    const int* allocatable_general_codes, const int* allocatable_double_codes,
    AliasingKind fp_aliasing_kind, const char* const* general_register_names,
    const char* const* float_register_names,
@ -158,8 +148,6 @@ RegisterConfiguration::RegisterConfiguration(
      num_allocatable_general_registers_(num_allocatable_general_registers),
      num_allocatable_float_registers_(0),
      num_allocatable_double_registers_(num_allocatable_double_registers),
-      num_allocatable_aliased_double_registers_(
-          num_allocatable_aliased_double_registers),
      num_allocatable_simd128_registers_(0),
      allocatable_general_codes_mask_(0),
      allocatable_float_codes_mask_(0),
--- a/src/register-configuration.h
+++ b/src/register-configuration.h
@ -36,7 +36,6 @@ class V8_EXPORT_PRIVATE RegisterConfiguration {
  RegisterConfiguration(int num_general_registers, int num_double_registers,
                        int num_allocatable_general_registers,
                        int num_allocatable_double_registers,
-                        int num_allocatable_aliased_double_registers,
                        const int* allocatable_general_codes,
                        const int* allocatable_double_codes,
                        AliasingKind fp_aliasing_kind,
@ -58,12 +57,6 @@ class V8_EXPORT_PRIVATE RegisterConfiguration {
  int num_allocatable_double_registers() const {
    return num_allocatable_double_registers_;
  }
-  // TODO(bbudge): This is a temporary work-around required because our
-  // register allocator does not yet support the aliasing of single/double
-  // registers on ARM.
-  int num_allocatable_aliased_double_registers() const {
-    return num_allocatable_aliased_double_registers_;
-  }
  int num_allocatable_simd128_registers() const {
    return num_allocatable_simd128_registers_;
  }
@ -143,7 +136,6 @@ class V8_EXPORT_PRIVATE RegisterConfiguration {
  int num_allocatable_general_registers_;
  int num_allocatable_float_registers_;
  int num_allocatable_double_registers_;
-  int num_allocatable_aliased_double_registers_;
  int num_allocatable_simd128_registers_;
  int32_t allocatable_general_codes_mask_;
  int32_t allocatable_float_codes_mask_;
--- a/src/zone/zone-allocator.h
+++ b/src/zone/zone-allocator.h
@ -26,6 +26,8 @@ class zone_allocator {
    typedef zone_allocator<O> other;
  };

+  // TODO(bbudge) Remove when V8 updates to MSVS 2015. See crbug.com/603131.
+  zone_allocator() : zone_(nullptr) { UNREACHABLE(); }
  explicit zone_allocator(Zone* zone) throw() : zone_(zone) {}
  explicit zone_allocator(const zone_allocator& other) throw()
      : zone_(other.zone_) {}
@ -62,7 +64,6 @@ class zone_allocator {
  Zone* zone() { return zone_; }

 private:
-  zone_allocator();
  Zone* zone_;
 };

--- a/test/cctest/compiler/test-gap-resolver.cc
+++ b/test/cctest/compiler/test-gap-resolver.cc
@ -13,15 +13,32 @@ namespace compiler {

 const auto GetRegConfig = RegisterConfiguration::Turbofan;

-// Fragments the given operand into an equivalent set of operands to simplify
-// ParallelMove equivalence testing.
+// Fragments the given FP operand into an equivalent set of FP operands to
+// simplify ParallelMove equivalence testing.
 void GetCanonicalOperands(const InstructionOperand& op,
                          std::vector<InstructionOperand>* fragments) {
  CHECK(!kSimpleFPAliasing);
  CHECK(op.IsFPLocationOperand());
-  // TODO(bbudge) Split into float operands on platforms with non-simple FP
-  // register aliasing.
-  fragments->push_back(op);
+  const LocationOperand& loc = LocationOperand::cast(op);
+  MachineRepresentation rep = loc.representation();
+  int base = -1;
+  int aliases = GetRegConfig()->GetAliases(
+      rep, 0, MachineRepresentation::kFloat32, &base);
+  CHECK_LT(0, aliases);
+  CHECK_GE(4, aliases);
+  int index = -1;
+  int step = 1;
+  if (op.IsFPRegister()) {
+    index = loc.register_code() * aliases;
+  } else {
+    index = loc.index();
+    step = -1;
+  }
+  for (int i = 0; i < aliases; i++) {
+    fragments->push_back(AllocatedOperand(loc.location_kind(),
+                                          MachineRepresentation::kFloat32,
+                                          index + i * step));
+  }
 }

 // The state of our move interpreter is the mapping of operands to values. Note
@ -36,7 +53,9 @@ class InterpreterState {
      const InstructionOperand& dst = m->destination();
      if (!kSimpleFPAliasing && src.IsFPLocationOperand() &&
          dst.IsFPLocationOperand()) {
-        // Canonicalize FP location-location moves.
+        // Canonicalize FP location-location moves by fragmenting them into
+        // an equivalent sequence of float32 moves, to simplify state
+        // equivalence testing.
        std::vector<InstructionOperand> src_fragments;
        GetCanonicalOperands(src, &src_fragments);
        CHECK(!src_fragments.empty());
@ -115,9 +134,11 @@ class InterpreterState {
    int index;
    if (!is_constant) {
      const LocationOperand& loc_op = LocationOperand::cast(op);
-      // Canonicalize FP location operand representations to kFloat64.
+      // Preserve FP representation when FP register aliasing is complex.
+      // Otherwise, canonicalize to kFloat64.
      if (IsFloatingPoint(loc_op.representation())) {
-        rep = MachineRepresentation::kFloat64;
+        rep = kSimpleFPAliasing ? MachineRepresentation::kFloat64
+                                : loc_op.representation();
      }
      if (loc_op.IsAnyRegister()) {
        index = loc_op.register_code();
@ -321,9 +342,11 @@ class ParallelMoveCreator : public HandleAndZoneScope {
    auto GetValidRegisterCode = [&conf](MachineRepresentation rep, int index) {
      switch (rep) {
        case MachineRepresentation::kFloat32:
+          return conf->RegisterConfiguration::GetAllocatableFloatCode(index);
        case MachineRepresentation::kFloat64:
-        case MachineRepresentation::kSimd128:
          return conf->RegisterConfiguration::GetAllocatableDoubleCode(index);
+        case MachineRepresentation::kSimd128:
+          return conf->RegisterConfiguration::GetAllocatableSimd128Code(index);
        default:
          return conf->RegisterConfiguration::GetAllocatableGeneralCode(index);
      }
@ -368,6 +391,118 @@ void RunTest(ParallelMove* pm, Zone* zone) {
  CHECK_EQ(mi1.state(), mi2.state());
 }

+TEST(Aliasing) {
+  // On platforms with simple aliasing, these parallel moves are ill-formed.
+  if (kSimpleFPAliasing) return;
+
+  ParallelMoveCreator pmc;
+  Zone* zone = pmc.main_zone();
+
+  auto s0 = AllocatedOperand(LocationOperand::REGISTER,
+                             MachineRepresentation::kFloat32, 0);
+  auto s1 = AllocatedOperand(LocationOperand::REGISTER,
+                             MachineRepresentation::kFloat32, 1);
+  auto s2 = AllocatedOperand(LocationOperand::REGISTER,
+                             MachineRepresentation::kFloat32, 2);
+  auto s3 = AllocatedOperand(LocationOperand::REGISTER,
+                             MachineRepresentation::kFloat32, 3);
+  auto s4 = AllocatedOperand(LocationOperand::REGISTER,
+                             MachineRepresentation::kFloat32, 4);
+
+  auto d0 = AllocatedOperand(LocationOperand::REGISTER,
+                             MachineRepresentation::kFloat64, 0);
+  auto d1 = AllocatedOperand(LocationOperand::REGISTER,
+                             MachineRepresentation::kFloat64, 1);
+  auto d16 = AllocatedOperand(LocationOperand::REGISTER,
+                              MachineRepresentation::kFloat64, 16);
+
+  // Double slots must be odd to match frame allocation.
+  auto dSlot = AllocatedOperand(LocationOperand::STACK_SLOT,
+                                MachineRepresentation::kFloat64, 3);
+
+  // Cycles involving s- and d-registers.
+  {
+    std::vector<InstructionOperand> moves = {
+        s2, s0,  // s2 <- s0
+        d0, d1   // d0 <- d1
+    };
+    RunTest(pmc.Create(moves), zone);
+  }
+  {
+    std::vector<InstructionOperand> moves = {
+        d0, d1,  // d0 <- d1
+        s2, s0   // s2 <- s0
+    };
+    RunTest(pmc.Create(moves), zone);
+  }
+  {
+    std::vector<InstructionOperand> moves = {
+        s2, s1,  // s2 <- s1
+        d0, d1   // d0 <- d1
+    };
+    RunTest(pmc.Create(moves), zone);
+  }
+  {
+    std::vector<InstructionOperand> moves = {
+        d0, d1,  // d0 <- d1
+        s2, s1   // s2 <- s1
+    };
+    RunTest(pmc.Create(moves), zone);
+  }
+  // Two cycles involving a single d-register.
+  {
+    std::vector<InstructionOperand> moves = {
+        d0, d1,  // d0 <- d1
+        s2, s1,  // s2 <- s1
+        s3, s0   // s3 <- s0
+    };
+    RunTest(pmc.Create(moves), zone);
+  }
+  // Cycle with a float move that must be deferred until after swaps.
+  {
+    std::vector<InstructionOperand> moves = {
+        d0, d1,  // d0 <- d1
+        s2, s0,  // s2 <- s0
+        s3, s4   // s3 <- s4  must be deferred
+    };
+    RunTest(pmc.Create(moves), zone);
+  }
+  // Cycles involving s-registers and a non-aliased d-register.
+  {
+    std::vector<InstructionOperand> moves = {
+        d16, d0,  // d16 <- d0
+        s1,  s2,  // s1 <- s2
+        d1,  d16  // d1 <- d16
+    };
+    RunTest(pmc.Create(moves), zone);
+  }
+  {
+    std::vector<InstructionOperand> moves = {
+        s2,  s1,   // s1 <- s2
+        d0,  d16,  // d16 <- d0
+        d16, d1    // d1 <- d16
+    };
+    RunTest(pmc.Create(moves), zone);
+  }
+  {
+    std::vector<InstructionOperand> moves = {
+        d0,  d16,  // d0 <- d16
+        d16, d1,   // s2 <- s0
+        s3,  s0    // d0 <- d1
+    };
+    RunTest(pmc.Create(moves), zone);
+  }
+  // Cycle involving aliasing registers and a slot.
+  {
+    std::vector<InstructionOperand> moves = {
+        dSlot, d0,     // dSlot <- d0
+        d1,    dSlot,  // d1 <- dSlot
+        s0,    s3      // s0 <- s3
+    };
+    RunTest(pmc.Create(moves), zone);
+  }
+}
+
 TEST(FuzzResolver) {
  ParallelMoveCreator pmc;
  for (int size = 0; size < 80; ++size) {
--- a/test/cctest/compiler/test-run-native-calls.cc
+++ b/test/cctest/compiler/test-run-native-calls.cc
@ -87,8 +87,16 @@ class RegisterPairs : public Pairs {
 class Float32RegisterPairs : public Pairs {
 public:
  Float32RegisterPairs()
-      : Pairs(100, GetRegConfig()->num_allocatable_aliased_double_registers(),
-              GetRegConfig()->allocatable_double_codes()) {}
+      : Pairs(
+            100,
+#if V8_TARGET_ARCH_ARM
+            // TODO(bbudge) Modify wasm linkage to allow use of all float regs.
+            GetRegConfig()->num_allocatable_double_registers() / 2 - 2,
+#else
+            GetRegConfig()->num_allocatable_double_registers(),
+#endif
+            GetRegConfig()->allocatable_double_codes()) {
+  }
 };


@ -127,6 +135,10 @@ struct Allocator {
      // Allocate a floating point register/stack location.
      if (fp_offset < fp_count) {
        int code = fp_regs[fp_offset++];
+#if V8_TARGET_ARCH_ARM
+        // TODO(bbudge) Modify wasm linkage to allow use of all float regs.
+        if (type.representation() == MachineRepresentation::kFloat32) code *= 2;
+#endif
        return LinkageLocation::ForRegister(code, type);
      } else {
        int offset = -1 - stack_offset;
--- a/test/unittests/BUILD.gn
+++ b/test/unittests/BUILD.gn
@ -50,6 +50,7 @@ v8_executable("unittests") {
    "compiler/instruction-selector-unittest.h",
    "compiler/instruction-sequence-unittest.cc",
    "compiler/instruction-sequence-unittest.h",
+    "compiler/instruction-unittest.cc",
    "compiler/int64-lowering-unittest.cc",
    "compiler/js-builtin-reducer-unittest.cc",
    "compiler/js-create-lowering-unittest.cc",
--- a/test/unittests/compiler/instruction-sequence-unittest.cc
+++ b/test/unittests/compiler/instruction-sequence-unittest.cc
@ -22,11 +22,8 @@ static char register_names_[10 * (RegisterConfiguration::kMaxGeneralRegisters +
 namespace {
 static int allocatable_codes[InstructionSequenceTest::kDefaultNRegs] = {
    0, 1, 2, 3, 4, 5, 6, 7};
-static int allocatable_double_codes[InstructionSequenceTest::kDefaultNRegs] = {
-    0, 1, 2, 3, 4, 5, 6, 7};
 }

-
 static void InitializeRegisterNames() {
  char* loc = register_names_;
  for (int i = 0; i < RegisterConfiguration::kMaxGeneralRegisters; ++i) {
@ -92,8 +89,7 @@ RegisterConfiguration* InstructionSequenceTest::config() {
  if (!config_) {
    config_.reset(new RegisterConfiguration(
        num_general_registers_, num_double_registers_, num_general_registers_,
-        num_double_registers_, num_double_registers_, allocatable_codes,
-        allocatable_double_codes,
+        num_double_registers_, allocatable_codes, allocatable_codes,
        kSimpleFPAliasing ? RegisterConfiguration::OVERLAP
                          : RegisterConfiguration::COMBINE,
        general_register_names_,
--- a/test/unittests/compiler/instruction-sequence-unittest.h
+++ b/test/unittests/compiler/instruction-sequence-unittest.h
@ -20,7 +20,9 @@ class InstructionSequenceTest : public TestWithIsolateAndZone {
  static const int kDefaultNRegs = 8;
  static const int kNoValue = kMinInt;
  static const MachineRepresentation kNoRep = MachineRepresentation::kNone;
+  static const MachineRepresentation kFloat32 = MachineRepresentation::kFloat32;
  static const MachineRepresentation kFloat64 = MachineRepresentation::kFloat64;
+  static const MachineRepresentation kSimd128 = MachineRepresentation::kSimd128;

  typedef RpoNumber Rpo;

--- a/test/unittests/compiler/instruction-unittest.cc
+++ b/test/unittests/compiler/instruction-unittest.cc
@ -0,0 +1,175 @@
+// Copyright 2016 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "src/compiler/instruction.h"
+#include "src/register-configuration.h"
+#include "test/unittests/test-utils.h"
+#include "testing/gtest-support.h"
+
+namespace v8 {
+namespace internal {
+namespace compiler {
+
+namespace {
+
+const MachineRepresentation kWord = MachineRepresentation::kWord32;
+const MachineRepresentation kFloat = MachineRepresentation::kFloat32;
+const MachineRepresentation kDouble = MachineRepresentation::kFloat64;
+
+bool Interfere(LocationOperand::LocationKind kind, MachineRepresentation rep1,
+               int index1, MachineRepresentation rep2, int index2) {
+  return AllocatedOperand(kind, rep1, index1)
+      .InterferesWith(AllocatedOperand(kind, rep2, index2));
+}
+
+bool Contains(const ZoneVector<MoveOperands*>* moves,
+              const InstructionOperand& to, const InstructionOperand& from) {
+  for (auto move : *moves) {
+    if (move->destination().Equals(to) && move->source().Equals(from)) {
+      return true;
+    }
+  }
+  return false;
+}
+
+}  // namespace
+
+class InstructionTest : public TestWithZone {
+ public:
+  InstructionTest() {}
+  virtual ~InstructionTest() {}
+
+  ParallelMove* CreateParallelMove(
+      const std::vector<InstructionOperand>& operand_pairs) {
+    ParallelMove* parallel_move = new (zone()) ParallelMove(zone());
+    for (size_t i = 0; i < operand_pairs.size(); i += 2)
+      parallel_move->AddMove(operand_pairs[i + 1], operand_pairs[i]);
+    return parallel_move;
+  }
+};
+
+TEST_F(InstructionTest, OperandInterference) {
+  // All general registers and slots interfere only with themselves.
+  for (int i = 0; i < RegisterConfiguration::kMaxGeneralRegisters; ++i) {
+    EXPECT_TRUE(Interfere(LocationOperand::REGISTER, kWord, i, kWord, i));
+    EXPECT_TRUE(Interfere(LocationOperand::STACK_SLOT, kWord, i, kWord, i));
+    for (int j = i + 1; j < RegisterConfiguration::kMaxGeneralRegisters; ++j) {
+      EXPECT_FALSE(Interfere(LocationOperand::REGISTER, kWord, i, kWord, j));
+      EXPECT_FALSE(Interfere(LocationOperand::STACK_SLOT, kWord, i, kWord, j));
+    }
+  }
+
+  // All FP registers interfere with themselves.
+  for (int i = 0; i < RegisterConfiguration::kMaxFPRegisters; ++i) {
+    EXPECT_TRUE(Interfere(LocationOperand::REGISTER, kFloat, i, kFloat, i));
+    EXPECT_TRUE(Interfere(LocationOperand::STACK_SLOT, kFloat, i, kFloat, i));
+    EXPECT_TRUE(Interfere(LocationOperand::REGISTER, kDouble, i, kDouble, i));
+    EXPECT_TRUE(Interfere(LocationOperand::STACK_SLOT, kDouble, i, kDouble, i));
+  }
+
+  if (kSimpleFPAliasing) {
+    // Simple FP aliasing: interfering registers of different reps have the same
+    // index.
+    for (int i = 0; i < RegisterConfiguration::kMaxFPRegisters; ++i) {
+      EXPECT_TRUE(Interfere(LocationOperand::REGISTER, kFloat, i, kDouble, i));
+      EXPECT_TRUE(Interfere(LocationOperand::REGISTER, kDouble, i, kFloat, i));
+      for (int j = i + 1; j < RegisterConfiguration::kMaxFPRegisters; ++j) {
+        EXPECT_FALSE(Interfere(LocationOperand::REGISTER, kWord, i, kWord, j));
+        EXPECT_FALSE(
+            Interfere(LocationOperand::STACK_SLOT, kWord, i, kWord, j));
+      }
+    }
+  } else {
+    // Complex FP aliasing: sub-registers intefere with containing registers.
+    // Test sub-register indices which may not exist on the platform. This is
+    // necessary since the GapResolver may split large moves into smaller ones.
+    for (int i = 0; i < RegisterConfiguration::kMaxFPRegisters; ++i) {
+      EXPECT_TRUE(
+          Interfere(LocationOperand::REGISTER, kFloat, i * 2, kDouble, i));
+      EXPECT_TRUE(
+          Interfere(LocationOperand::REGISTER, kFloat, i * 2 + 1, kDouble, i));
+      EXPECT_TRUE(
+          Interfere(LocationOperand::REGISTER, kDouble, i, kFloat, i * 2));
+      EXPECT_TRUE(
+          Interfere(LocationOperand::REGISTER, kDouble, i, kFloat, i * 2 + 1));
+
+      for (int j = i + 1; j < RegisterConfiguration::kMaxFPRegisters; ++j) {
+        EXPECT_FALSE(
+            Interfere(LocationOperand::REGISTER, kFloat, i * 2, kDouble, j));
+        EXPECT_FALSE(Interfere(LocationOperand::REGISTER, kFloat, i * 2 + 1,
+                               kDouble, j));
+        EXPECT_FALSE(
+            Interfere(LocationOperand::REGISTER, kDouble, i, kFloat, j * 2));
+        EXPECT_FALSE(Interfere(LocationOperand::REGISTER, kDouble, i, kFloat,
+                               j * 2 + 1));
+      }
+    }
+  }
+}
+
+TEST_F(InstructionTest, PrepareInsertAfter) {
+  InstructionOperand r0 = AllocatedOperand(LocationOperand::REGISTER,
+                                           MachineRepresentation::kWord32, 0);
+  InstructionOperand r1 = AllocatedOperand(LocationOperand::REGISTER,
+                                           MachineRepresentation::kWord32, 1);
+  InstructionOperand r2 = AllocatedOperand(LocationOperand::REGISTER,
+                                           MachineRepresentation::kWord32, 2);
+
+  InstructionOperand d0 = AllocatedOperand(LocationOperand::REGISTER,
+                                           MachineRepresentation::kFloat64, 0);
+  InstructionOperand d1 = AllocatedOperand(LocationOperand::REGISTER,
+                                           MachineRepresentation::kFloat64, 1);
+  InstructionOperand d2 = AllocatedOperand(LocationOperand::REGISTER,
+                                           MachineRepresentation::kFloat64, 2);
+
+  {
+    // Moves inserted after should pick up assignments to their sources.
+    // Moves inserted after should cause interfering moves to be eliminated.
+    ZoneVector<MoveOperands*> to_eliminate(zone());
+    std::vector<InstructionOperand> moves = {
+        r1, r0,  // r1 <- r0
+        r2, r0,  // r2 <- r0
+        d1, d0,  // d1 <- d0
+        d2, d0   // d2 <- d0
+    };
+
+    ParallelMove* pm = CreateParallelMove(moves);
+    MoveOperands m1(r1, r2);  // r2 <- r1
+    pm->PrepareInsertAfter(&m1, &to_eliminate);
+    CHECK(m1.source().Equals(r0));
+    CHECK(Contains(&to_eliminate, r2, r0));
+    MoveOperands m2(d1, d2);  // d2 <- d1
+    pm->PrepareInsertAfter(&m2, &to_eliminate);
+    CHECK(m2.source().Equals(d0));
+    CHECK(Contains(&to_eliminate, d2, d0));
+  }
+
+  if (!kSimpleFPAliasing) {
+    // Moves inserted after should cause all interfering moves to be eliminated.
+    auto s0 = AllocatedOperand(LocationOperand::REGISTER,
+                               MachineRepresentation::kFloat32, 0);
+    auto s1 = AllocatedOperand(LocationOperand::REGISTER,
+                               MachineRepresentation::kFloat32, 1);
+    auto s2 = AllocatedOperand(LocationOperand::REGISTER,
+                               MachineRepresentation::kFloat32, 2);
+
+    {
+      ZoneVector<MoveOperands*> to_eliminate(zone());
+      std::vector<InstructionOperand> moves = {
+          s0, s2,  // s0 <- s2
+          s1, s2   // s1 <- s2
+      };
+
+      ParallelMove* pm = CreateParallelMove(moves);
+      MoveOperands m1(d1, d0);  // d0 <- d1
+      pm->PrepareInsertAfter(&m1, &to_eliminate);
+      CHECK(Contains(&to_eliminate, s0, s2));
+      CHECK(Contains(&to_eliminate, s1, s2));
+    }
+  }
+}
+
+}  // namespace compiler
+}  // namespace internal
+}  // namespace v8
--- a/test/unittests/compiler/move-optimizer-unittest.cc
+++ b/test/unittests/compiler/move-optimizer-unittest.cc
@ -12,6 +12,14 @@ namespace compiler {

 class MoveOptimizerTest : public InstructionSequenceTest {
 public:
+  // FP register indices which don't interfere under simple or complex aliasing.
+  static const int kF64_1 = 0;
+  static const int kF64_2 = 1;
+  static const int kF32_1 = 4;
+  static const int kF32_2 = 5;
+  static const int kS128_1 = 2;
+  static const int kS128_2 = 3;
+
  Instruction* LastInstruction() { return sequence()->instructions().back(); }

  void AddMove(Instruction* instr, TestOperand from, TestOperand to,
@ -99,8 +107,12 @@ TEST_F(MoveOptimizerTest, RemovesRedundant) {
  AddMove(first_instr, Reg(0), Reg(1));
  AddMove(last_instr, Reg(1), Reg(0));

-  AddMove(first_instr, FPReg(0), FPReg(1));
-  AddMove(last_instr, FPReg(1), FPReg(0));
+  AddMove(first_instr, FPReg(kS128_1, kSimd128), FPReg(kS128_2, kSimd128));
+  AddMove(last_instr, FPReg(kS128_2, kSimd128), FPReg(kS128_1, kSimd128));
+  AddMove(first_instr, FPReg(kF64_1, kFloat64), FPReg(kF64_2, kFloat64));
+  AddMove(last_instr, FPReg(kF64_2, kFloat64), FPReg(kF64_1, kFloat64));
+  AddMove(first_instr, FPReg(kF32_1, kFloat32), FPReg(kF32_2, kFloat32));
+  AddMove(last_instr, FPReg(kF32_2, kFloat32), FPReg(kF32_1, kFloat32));

  EndBlock(Last());

@ -108,22 +120,38 @@ TEST_F(MoveOptimizerTest, RemovesRedundant) {

  CHECK_EQ(0, NonRedundantSize(first_instr->parallel_moves()[0]));
  auto move = last_instr->parallel_moves()[0];
-  CHECK_EQ(2, NonRedundantSize(move));
+  CHECK_EQ(4, NonRedundantSize(move));
  CHECK(Contains(move, Reg(0), Reg(1)));
-  CHECK(Contains(move, FPReg(0), FPReg(1)));
+  CHECK(Contains(move, FPReg(kS128_1, kSimd128), FPReg(kS128_2, kSimd128)));
+  CHECK(Contains(move, FPReg(kF64_1, kFloat64), FPReg(kF64_2, kFloat64)));
+  CHECK(Contains(move, FPReg(kF32_1, kFloat32), FPReg(kF32_2, kFloat32)));
 }


 TEST_F(MoveOptimizerTest, RemovesRedundantExplicit) {
-  int first_reg_index = GetAllocatableCode(0);
-  int second_reg_index = GetAllocatableCode(1);
+  int index1 = GetAllocatableCode(0);
+  int index2 = GetAllocatableCode(1);
+  int s128_1 = GetAllocatableCode(kS128_1, kSimd128);
+  int s128_2 = GetAllocatableCode(kS128_2, kSimd128);
+  int f64_1 = GetAllocatableCode(kF64_1, kFloat64);
+  int f64_2 = GetAllocatableCode(kF64_2, kFloat64);
+  int f32_1 = GetAllocatableCode(kF32_1, kFloat32);
+  int f32_2 = GetAllocatableCode(kF32_2, kFloat32);

  StartBlock();
  auto first_instr = EmitNop();
  auto last_instr = EmitNop();

-  AddMove(first_instr, Reg(first_reg_index), ExplicitReg(second_reg_index));
-  AddMove(last_instr, Reg(second_reg_index), Reg(first_reg_index));
+  AddMove(first_instr, Reg(index1), ExplicitReg(index2));
+  AddMove(last_instr, Reg(index2), Reg(index1));
+
+  AddMove(first_instr, FPReg(s128_1, kSimd128),
+          ExplicitFPReg(s128_2, kSimd128));
+  AddMove(last_instr, FPReg(s128_2, kSimd128), FPReg(s128_1, kSimd128));
+  AddMove(first_instr, FPReg(f64_1, kFloat64), ExplicitFPReg(f64_2, kFloat64));
+  AddMove(last_instr, FPReg(f64_2, kFloat64), FPReg(f64_1, kFloat64));
+  AddMove(first_instr, FPReg(f32_1, kFloat32), ExplicitFPReg(f32_2, kFloat32));
+  AddMove(last_instr, FPReg(f32_2, kFloat32), FPReg(f32_1, kFloat32));

  EndBlock(Last());

@ -131,8 +159,12 @@ TEST_F(MoveOptimizerTest, RemovesRedundantExplicit) {

  CHECK_EQ(0, NonRedundantSize(first_instr->parallel_moves()[0]));
  auto move = last_instr->parallel_moves()[0];
-  CHECK_EQ(1, NonRedundantSize(move));
-  CHECK(Contains(move, Reg(first_reg_index), ExplicitReg(second_reg_index)));
+  CHECK_EQ(4, NonRedundantSize(move));
+  CHECK(Contains(move, Reg(index1), ExplicitReg(index2)));
+  CHECK(
+      Contains(move, FPReg(s128_1, kSimd128), ExplicitFPReg(s128_2, kSimd128)));
+  CHECK(Contains(move, FPReg(f64_1, kFloat64), ExplicitFPReg(f64_2, kFloat64)));
+  CHECK(Contains(move, FPReg(f32_1, kFloat32), ExplicitFPReg(f32_2, kFloat32)));
 }


@ -167,10 +199,18 @@ TEST_F(MoveOptimizerTest, SimpleMerge) {
  StartBlock();
  EndBlock(Jump(2));
  AddMove(LastInstruction(), Reg(0), Reg(1));
+  AddMove(LastInstruction(), FPReg(kS128_1, kSimd128),
+          FPReg(kS128_2, kSimd128));
+  AddMove(LastInstruction(), FPReg(kF64_1, kFloat64), FPReg(kF64_2, kFloat64));
+  AddMove(LastInstruction(), FPReg(kF32_1, kFloat32), FPReg(kF32_2, kFloat32));

  StartBlock();
  EndBlock(Jump(1));
  AddMove(LastInstruction(), Reg(0), Reg(1));
+  AddMove(LastInstruction(), FPReg(kS128_1, kSimd128),
+          FPReg(kS128_2, kSimd128));
+  AddMove(LastInstruction(), FPReg(kF64_1, kFloat64), FPReg(kF64_2, kFloat64));
+  AddMove(LastInstruction(), FPReg(kF32_1, kFloat32), FPReg(kF32_2, kFloat32));

  StartBlock();
  EndBlock(Last());
@ -180,8 +220,11 @@ TEST_F(MoveOptimizerTest, SimpleMerge) {
  Optimize();

  auto move = last->parallel_moves()[0];
-  CHECK_EQ(1, NonRedundantSize(move));
+  CHECK_EQ(4, NonRedundantSize(move));
  CHECK(Contains(move, Reg(0), Reg(1)));
+  CHECK(Contains(move, FPReg(kS128_1, kSimd128), FPReg(kS128_2, kSimd128)));
+  CHECK(Contains(move, FPReg(kF64_1, kFloat64), FPReg(kF64_2, kFloat64)));
+  CHECK(Contains(move, FPReg(kF32_1, kFloat32), FPReg(kF32_2, kFloat32)));
 }


@ -195,16 +238,25 @@ TEST_F(MoveOptimizerTest, SimpleMergeCycle) {
  AddMove(gap_0, Reg(0), Reg(1));
  AddMove(LastInstruction(), Reg(1), Reg(0));

-  AddMove(gap_0, FPReg(0), FPReg(1));
-  AddMove(LastInstruction(), FPReg(1), FPReg(0));
+  AddMove(gap_0, FPReg(kS128_1, kSimd128), FPReg(kS128_2, kSimd128));
+  AddMove(LastInstruction(), FPReg(kS128_2, kSimd128),
+          FPReg(kS128_1, kSimd128));
+  AddMove(gap_0, FPReg(kF64_1, kFloat64), FPReg(kF64_2, kFloat64));
+  AddMove(LastInstruction(), FPReg(kF64_2, kFloat64), FPReg(kF64_1, kFloat64));
+  AddMove(gap_0, FPReg(kF32_1, kFloat32), FPReg(kF32_2, kFloat32));
+  AddMove(LastInstruction(), FPReg(kF32_2, kFloat32), FPReg(kF32_1, kFloat32));

  StartBlock();
  EndBlock(Jump(1));
  auto gap_1 = LastInstruction();
  AddMove(gap_1, Reg(0), Reg(1));
  AddMove(gap_1, Reg(1), Reg(0));
-  AddMove(gap_1, FPReg(0), FPReg(1));
-  AddMove(gap_1, FPReg(1), FPReg(0));
+  AddMove(gap_1, FPReg(kS128_1, kSimd128), FPReg(kS128_2, kSimd128));
+  AddMove(gap_1, FPReg(kS128_2, kSimd128), FPReg(kS128_1, kSimd128));
+  AddMove(gap_1, FPReg(kF64_1, kFloat64), FPReg(kF64_2, kFloat64));
+  AddMove(gap_1, FPReg(kF64_2, kFloat64), FPReg(kF64_1, kFloat64));
+  AddMove(gap_1, FPReg(kF32_1, kFloat32), FPReg(kF32_2, kFloat32));
+  AddMove(gap_1, FPReg(kF32_2, kFloat32), FPReg(kF32_1, kFloat32));

  StartBlock();
  EndBlock(Last());
@ -216,11 +268,15 @@ TEST_F(MoveOptimizerTest, SimpleMergeCycle) {
  CHECK(gap_0->AreMovesRedundant());
  CHECK(gap_1->AreMovesRedundant());
  auto move = last->parallel_moves()[0];
-  CHECK_EQ(4, NonRedundantSize(move));
+  CHECK_EQ(8, NonRedundantSize(move));
  CHECK(Contains(move, Reg(0), Reg(1)));
  CHECK(Contains(move, Reg(1), Reg(0)));
-  CHECK(Contains(move, FPReg(0), FPReg(1)));
-  CHECK(Contains(move, FPReg(1), FPReg(0)));
+  CHECK(Contains(move, FPReg(kS128_1, kSimd128), FPReg(kS128_2, kSimd128)));
+  CHECK(Contains(move, FPReg(kS128_2, kSimd128), FPReg(kS128_1, kSimd128)));
+  CHECK(Contains(move, FPReg(kF64_1, kFloat64), FPReg(kF64_2, kFloat64)));
+  CHECK(Contains(move, FPReg(kF64_2, kFloat64), FPReg(kF64_1, kFloat64)));
+  CHECK(Contains(move, FPReg(kF32_1, kFloat32), FPReg(kF32_2, kFloat32)));
+  CHECK(Contains(move, FPReg(kF32_2, kFloat32), FPReg(kF32_1, kFloat32)));
 }


@ -342,8 +398,31 @@ TEST_F(MoveOptimizerTest, ClobberedDestinationsAreEliminated) {
  EmitNop();
  Instruction* first_instr = LastInstruction();
  AddMove(first_instr, Reg(0), Reg(1));
-  AddMove(first_instr, FPReg(0), FPReg(1));
-  EmitOOI(Reg(1), FPReg(1), 0, nullptr);
+  EmitOI(Reg(1), 0, nullptr);
+  Instruction* last_instr = LastInstruction();
+  EndBlock();
+  Optimize();
+
+  ParallelMove* first_move = first_instr->parallel_moves()[0];
+  CHECK_EQ(0, NonRedundantSize(first_move));
+
+  ParallelMove* last_move = last_instr->parallel_moves()[0];
+  CHECK_EQ(0, NonRedundantSize(last_move));
+}
+
+TEST_F(MoveOptimizerTest, ClobberedFPDestinationsAreEliminated) {
+  StartBlock();
+  EmitNop();
+  Instruction* first_instr = LastInstruction();
+  AddMove(first_instr, FPReg(4, kFloat64), FPReg(1, kFloat64));
+  if (!kSimpleFPAliasing) {
+    // We clobber q0 below. This is aliased by d0, d1, s0, s1, s2, and s3.
+    // Add moves to registers s2 and s3.
+    AddMove(first_instr, FPReg(10, kFloat32), FPReg(0, kFloat32));
+    AddMove(first_instr, FPReg(11, kFloat32), FPReg(1, kFloat32));
+  }
+  // Clobbers output register 0.
+  EmitOI(FPReg(0, kSimd128), 0, nullptr);
  Instruction* last_instr = LastInstruction();
  EndBlock();
  Optimize();
--- a/test/unittests/compiler/register-allocator-unittest.cc
+++ b/test/unittests/compiler/register-allocator-unittest.cc
@ -101,13 +101,14 @@ TEST_F(RegisterAllocatorTest, CanAllocateThreeRegisters) {
  Allocate();
 }

-TEST_F(RegisterAllocatorTest, CanAllocateThreeFPRegisters) {
-  // return p0 + p1;
+TEST_F(RegisterAllocatorTest, CanAllocateFPRegisters) {
  StartBlock();
-  VReg a_reg = FPParameter();
-  VReg b_reg = FPParameter();
-  VReg c_reg = EmitOI(FPReg(1), Reg(a_reg, 1), Reg(b_reg, 0));
-  Return(c_reg);
+  TestOperand inputs[] = {
+      Reg(FPParameter(kFloat64)), Reg(FPParameter(kFloat64)),
+      Reg(FPParameter(kFloat32)), Reg(FPParameter(kFloat32)),
+      Reg(FPParameter(kSimd128)), Reg(FPParameter(kSimd128))};
+  VReg out1 = EmitOI(FPReg(1, kFloat64), arraysize(inputs), inputs);
+  Return(out1);
  EndBlock(Last());

  Allocate();
--- a/test/unittests/register-configuration-unittest.cc
+++ b/test/unittests/register-configuration-unittest.cc
@ -16,8 +16,6 @@ class RegisterConfigurationUnitTest : public ::testing::Test {
 public:
  RegisterConfigurationUnitTest() {}
  virtual ~RegisterConfigurationUnitTest() {}
-
- private:
 };

 TEST_F(RegisterConfigurationUnitTest, BasicProperties) {
@ -30,9 +28,8 @@ TEST_F(RegisterConfigurationUnitTest, BasicProperties) {

  RegisterConfiguration test(
      kNumGeneralRegs, kNumDoubleRegs, kNumAllocatableGeneralRegs,
-      kNumAllocatableDoubleRegs, kNumAllocatableDoubleRegs, general_codes,
-      double_codes, RegisterConfiguration::OVERLAP, nullptr, nullptr, nullptr,
-      nullptr);
+      kNumAllocatableDoubleRegs, general_codes, double_codes,
+      RegisterConfiguration::OVERLAP, nullptr, nullptr, nullptr, nullptr);

  EXPECT_EQ(test.num_general_registers(), kNumGeneralRegs);
  EXPECT_EQ(test.num_double_registers(), kNumDoubleRegs);
@ -67,9 +64,8 @@ TEST_F(RegisterConfigurationUnitTest, CombineAliasing) {

  RegisterConfiguration test(
      kNumGeneralRegs, kNumDoubleRegs, kNumAllocatableGeneralRegs,
-      kNumAllocatableDoubleRegs, kNumAllocatableDoubleRegs, general_codes,
-      double_codes, RegisterConfiguration::COMBINE, nullptr, nullptr, nullptr,
-      nullptr);
+      kNumAllocatableDoubleRegs, general_codes, double_codes,
+      RegisterConfiguration::COMBINE, nullptr, nullptr, nullptr, nullptr);

  // There are 3 allocatable double regs, but only 2 can alias float regs.
  EXPECT_EQ(test.num_allocatable_float_registers(), 4);
@ -157,9 +153,10 @@ TEST_F(RegisterConfigurationUnitTest, CombineAliasing) {
      test.GetAliases(kFloat64, RegisterConfiguration::kMaxFPRegisters / 2 + 1,
                      kFloat32, &alias_base_index),
      0);
-  EXPECT_EQ(test.GetAliases(kFloat64, RegisterConfiguration::kMaxFPRegisters,
-                            kFloat32, &alias_base_index),
-            0);
+  EXPECT_EQ(
+      test.GetAliases(kFloat64, RegisterConfiguration::kMaxFPRegisters - 1,
+                      kFloat32, &alias_base_index),
+      0);
 }

 }  // namespace internal
--- a/test/unittests/unittests.gyp
+++ b/test/unittests/unittests.gyp
@ -42,6 +42,7 @@
      'compiler/graph-trimmer-unittest.cc',
      'compiler/graph-unittest.cc',
      'compiler/graph-unittest.h',
+      'compiler/instruction-unittest.cc',
      'compiler/instruction-selector-unittest.cc',
      'compiler/instruction-selector-unittest.h',
      'compiler/instruction-sequence-unittest.cc',