diff --git a/src/wasm/baseline/liftoff-assembler.cc b/src/wasm/baseline/liftoff-assembler.cc index ff2ab4a3f1..ba5ca6b2d9 100644 --- a/src/wasm/baseline/liftoff-assembler.cc +++ b/src/wasm/baseline/liftoff-assembler.cc @@ -807,6 +807,52 @@ LiftoffRegister LiftoffAssembler::SpillOneRegister(LiftoffRegList candidates, return spill_reg; } +LiftoffRegister LiftoffAssembler::SpillAdjacentFpRegisters( + LiftoffRegList pinned) { + // We end up in this call only when: + // [1] kNeedS128RegPair, and + // [2] there are no pair of adjacent FP registers that are free + CHECK(kNeedS128RegPair); + DCHECK(!kFpCacheRegList.MaskOut(pinned) + .MaskOut(cache_state_.used_registers) + .HasAdjacentFpRegsSet()); + + // Special logic, if the top fp register is even, we might hit a case of an + // invalid register in case 2. + LiftoffRegister last_fp = kFpCacheRegList.GetLastRegSet(); + if (last_fp.fp().code() % 2 == 0) { + pinned.set(last_fp); + } + + // We can try to optimize the spilling here: + // 1. Try to get a free fp register, either: + // a. This register is already free, or + // b. it had to be spilled. + // 2. If 1a, the adjacent register is used (invariant [2]), spill it. + // 3. If 1b, check the adjacent register: + // a. If free, done! + // b. If used, spill it. + // We spill one register in 2 and 3a, and two registers in 3b. + + LiftoffRegister first_reg = GetUnusedRegister(kFpCacheRegList, pinned); + LiftoffRegister second_reg = first_reg, low_reg = first_reg; + + if (first_reg.fp().code() % 2 == 0) { + second_reg = + LiftoffRegister::from_liftoff_code(first_reg.liftoff_code() + 1); + } else { + second_reg = + LiftoffRegister::from_liftoff_code(first_reg.liftoff_code() - 1); + low_reg = second_reg; + } + + if (cache_state_.is_used(second_reg)) { + SpillRegister(second_reg); + } + + return low_reg; +} + void LiftoffAssembler::SpillRegister(LiftoffRegister reg) { int remaining_uses = cache_state_.get_use_count(reg); DCHECK_LT(0, remaining_uses); @@ -814,7 +860,7 @@ void LiftoffAssembler::SpillRegister(LiftoffRegister reg) { DCHECK_GT(cache_state_.stack_height(), idx); auto* slot = &cache_state_.stack_state[idx]; if (!slot->is_reg() || !slot->reg().overlaps(reg)) continue; - if (slot->reg().is_gp_pair()) { + if (slot->reg().is_pair()) { // Make sure to decrement *both* registers in a pair, because the // {clear_used} call below only clears one of them. cache_state_.dec_used(slot->reg().low()); diff --git a/src/wasm/baseline/liftoff-assembler.h b/src/wasm/baseline/liftoff-assembler.h index a438a81484..001a34e51e 100644 --- a/src/wasm/baseline/liftoff-assembler.h +++ b/src/wasm/baseline/liftoff-assembler.h @@ -146,6 +146,10 @@ class LiftoffAssembler : public TurboAssembler { LiftoffRegList available_regs = kGpCacheRegList.MaskOut(used_registers).MaskOut(pinned); return available_regs.GetNumRegsSet() >= 2; + } else if (kNeedS128RegPair && rc == kFpRegPair) { + LiftoffRegList available_regs = + kFpCacheRegList.MaskOut(used_registers).MaskOut(pinned); + return available_regs.HasAdjacentFpRegsSet(); } DCHECK(rc == kGpReg || rc == kFpReg); LiftoffRegList candidates = GetCacheRegList(rc); @@ -165,6 +169,13 @@ class LiftoffAssembler : public TurboAssembler { Register low = pinned.set(unused_register(kGpReg, pinned)).gp(); Register high = unused_register(kGpReg, pinned).gp(); return LiftoffRegister::ForPair(low, high); + } else if (kNeedS128RegPair && rc == kFpRegPair) { + LiftoffRegList available_regs = + kFpCacheRegList.MaskOut(used_registers).MaskOut(pinned); + DoubleRegister low = + available_regs.GetAdjacentFpRegsSet().GetFirstRegSet().fp(); + DCHECK(is_free(LiftoffRegister::ForFpPair(low))); + return LiftoffRegister::ForFpPair(low); } DCHECK(rc == kGpReg || rc == kFpReg); LiftoffRegList candidates = GetCacheRegList(rc); @@ -323,6 +334,14 @@ class LiftoffAssembler : public TurboAssembler { Register low = pinned.set(GetUnusedRegister(candidates, pinned)).gp(); Register high = GetUnusedRegister(candidates, pinned).gp(); return LiftoffRegister::ForPair(low, high); + } else if (kNeedS128RegPair && rc == kFpRegPair) { + // kFpRegPair specific logic here because we need adjacent registers, not + // just any two registers (like kGpRegPair). + if (cache_state_.has_unused_register(rc, pinned)) { + return cache_state_.unused_register(rc, pinned); + } + DoubleRegister low_fp = SpillAdjacentFpRegisters(pinned).fp(); + return LiftoffRegister::ForFpPair(low_fp); } DCHECK(rc == kGpReg || rc == kFpReg); LiftoffRegList candidates = GetCacheRegList(rc); @@ -717,6 +736,8 @@ class LiftoffAssembler : public TurboAssembler { LiftoffRegister SpillOneRegister(LiftoffRegList candidates, LiftoffRegList pinned); + // Spill one or two fp registers to get a pair of adjacent fp registers. + LiftoffRegister SpillAdjacentFpRegisters(LiftoffRegList pinned); }; std::ostream& operator<<(std::ostream& os, LiftoffAssembler::VarState); diff --git a/src/wasm/baseline/liftoff-register.h b/src/wasm/baseline/liftoff-register.h index 32188139bb..65fca07214 100644 --- a/src/wasm/baseline/liftoff-register.h +++ b/src/wasm/baseline/liftoff-register.h @@ -306,6 +306,9 @@ class LiftoffRegList { static constexpr storage_t kGpMask = storage_t{kLiftoffAssemblerGpCacheRegs}; static constexpr storage_t kFpMask = storage_t{kLiftoffAssemblerFpCacheRegs} << kAfterMaxLiftoffGpRegCode; + // Sets all even numbered fp registers. + static constexpr uint64_t kEvenFpSetMask = uint64_t{0x5555555555555555} + << kAfterMaxLiftoffGpRegCode; constexpr LiftoffRegList() = default; @@ -358,6 +361,17 @@ class LiftoffRegList { return LiftoffRegList(regs_ | other.regs_); } + constexpr LiftoffRegList GetAdjacentFpRegsSet() const { + // And regs_ with a right shifted version of itself, so reg[i] is set only + // if reg[i+1] is set. We only care about the even fp registers. + storage_t available = (regs_ >> 1) & regs_ & kEvenFpSetMask; + return LiftoffRegList(available); + } + + constexpr bool HasAdjacentFpRegsSet() const { + return !GetAdjacentFpRegsSet().is_empty(); + } + constexpr bool operator==(const LiftoffRegList other) const { return regs_ == other.regs_; }