[Liftoff] Fix 64bit shift on ia32

With just five cache registers, Liftoff can run out of memory on a
64bit shift. This CL solves this by using a parallel register move and
pinning less registers.

R=ahaas@chromium.org

Bug: chromium:894307
Change-Id: I91ed0fee00ceb452841e5d1bb10905be6702dcce
Reviewed-on: https://chromium-review.googlesource.com/c/1337580
Commit-Queue: Clemens Hammacher <clemensh@chromium.org>
Reviewed-by: Andreas Haas <ahaas@chromium.org>
Cr-Commit-Position: refs/heads/master@{#57552}
This commit is contained in:
Clemens Hammacher 2018-11-15 16:11:42 +01:00 committed by Commit Bot
parent 01079cb82f
commit 59a8eba8d7
4 changed files with 36 additions and 13 deletions

View File

@ -777,9 +777,10 @@ void LiftoffAssembler::emit_i64_mul(LiftoffRegister dst, LiftoffRegister lhs,
liftoff::SpillRegisters(this, dst_hi, dst_lo, lhs_hi, rhs_lo);
// Move lhs and rhs into the respective registers.
ParallelRegisterMove(
{{LiftoffRegister::ForPair(lhs_lo, lhs_hi), lhs, kWasmI64},
{LiftoffRegister::ForPair(rhs_lo, rhs_hi), rhs, kWasmI64}});
ParallelRegisterMoveTuple reg_moves[]{
{LiftoffRegister::ForPair(lhs_lo, lhs_hi), lhs, kWasmI64},
{LiftoffRegister::ForPair(rhs_lo, rhs_hi), rhs, kWasmI64}};
ParallelRegisterMove(ArrayVector(reg_moves));
// First mul: lhs_hi' = lhs_hi * rhs_lo.
imul(lhs_hi, rhs_lo);
@ -842,27 +843,30 @@ inline void Emit64BitShiftOperation(
LiftoffAssembler* assm, LiftoffRegister dst, LiftoffRegister src,
Register amount, void (TurboAssembler::*emit_shift)(Register, Register),
LiftoffRegList pinned) {
// Temporary registers cannot overlap with {dst}.
pinned.set(dst);
pinned.set(src);
pinned.set(amount);
std::vector<LiftoffAssembler::ParallelRegisterMoveTuple> reg_moves;
// If {dst} contains {ecx}, replace it by an unused register, which is then
// moved to {ecx} in the end.
Register ecx_replace = no_reg;
if (PairContains(dst, ecx)) {
ecx_replace = pinned.set(assm->GetUnusedRegister(kGpReg, pinned)).gp();
ecx_replace = assm->GetUnusedRegister(kGpReg, pinned).gp();
dst = ReplaceInPair(dst, ecx, ecx_replace);
// If {amount} needs to be moved to {ecx}, but {ecx} is in use (and not part
// of {dst}, hence overwritten anyway), move {ecx} to a tmp register and
// restore it at the end.
} else if (amount != ecx &&
assm->cache_state()->is_used(LiftoffRegister(ecx))) {
(assm->cache_state()->is_used(LiftoffRegister(ecx)) ||
pinned.has(LiftoffRegister(ecx)))) {
ecx_replace = assm->GetUnusedRegister(kGpReg, pinned).gp();
assm->mov(ecx_replace, ecx);
reg_moves.emplace_back(ecx_replace, ecx, kWasmI32);
}
assm->ParallelRegisterMove(
{{dst, src, kWasmI64},
{LiftoffRegister{ecx}, LiftoffRegister{amount}, kWasmI32}});
reg_moves.emplace_back(dst, src, kWasmI64);
reg_moves.emplace_back(ecx, amount, kWasmI32);
assm->ParallelRegisterMove({reg_moves.data(), reg_moves.size()});
// Do the actual shift.
(assm->*emit_shift)(dst.high_gp(), dst.low_gp());

View File

@ -596,7 +596,7 @@ void LiftoffAssembler::Move(LiftoffRegister dst, LiftoffRegister src,
}
void LiftoffAssembler::ParallelRegisterMove(
std::initializer_list<ParallelRegisterMoveTuple> tuples) {
Vector<ParallelRegisterMoveTuple> tuples) {
StackTransferRecipe stack_transfers(this);
for (auto tuple : tuples) {
if (tuple.dst == tuple.src) continue;

View File

@ -332,8 +332,11 @@ class LiftoffAssembler : public TurboAssembler {
LiftoffRegister dst;
LiftoffRegister src;
ValueType type;
template <typename Dst, typename Src>
ParallelRegisterMoveTuple(Dst dst, Src src, ValueType type)
: dst(dst), src(src), type(type) {}
};
void ParallelRegisterMove(std::initializer_list<ParallelRegisterMoveTuple>);
void ParallelRegisterMove(Vector<ParallelRegisterMoveTuple>);
// Validate that the register use counts reflect the state of the cache.
bool ValidateCacheState() const;

View File

@ -0,0 +1,16 @@
// Copyright 2018 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
load('test/mjsunit/wasm/wasm-constants.js');
load('test/mjsunit/wasm/wasm-module-builder.js');
const builder = new WasmModuleBuilder();
const sig = makeSig([kWasmI32, kWasmI64, kWasmI64], [kWasmI64]);
builder.addFunction(undefined, sig)
.addBody([
kExprGetLocal, 2,
kExprGetLocal, 1,
kExprI64Shl,
]);
builder.instantiate();