PPC/s390: [Liftoff] Improve initialization for many locals
Port a8cdda9947
Original Commit Message:
WebAssembly locals are specified to be zero on function entry. Liftoff
implements this by just storing the constant 0 in the virtual stack for
integer types, and using one floating point register initialized to
zero for all floating point types.
For big counts of locals this leads to problems (manifesting as huge
blocks of code being generated) once we hit a merge point: All those
constants (for int) and all duplicate register uses (for floats) need to
be fixed up, by using separate registers for the locals or spilling to
the stack if no more registers are available. All this spilling
generates a lot of code, and can even happen multiple times within a
function.
This CL optimizes for such cases by spilling all locals to the stack
initially. All merges within the function body get much smaller then.
The spilled values rarely have to be loaded anyway, because the initial
zero value is usually overwritten before the first use.
To optimize the code size for initializing big numbers of locals on the
stack, this CL also introduces the platform-specific
{FillStackSlotsWithZero} method which uses a loop for bigger local
counts.
This often saves dozens of kilobytes for very big functions, and shows
an overall code size reduction of 4-5 percent for big modules.
R=clemensb@chromium.org, joransiu@ca.ibm.com, jyan@ca.ibm.com, michael_dawson@ca.ibm.com
BUG=
LOG=N
Change-Id: I2459080a1f6acfdd212e9a93a868d028980c5554
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1863370
Reviewed-by: Junliang Yan <jyan@ca.ibm.com>
Reviewed-by: Milad Farazmand <miladfar@ca.ibm.com>
Commit-Queue: Milad Farazmand <miladfar@ca.ibm.com>
Cr-Commit-Position: refs/heads/master@{#64301}
This commit is contained in:
parent
e359c49b9d
commit
c314cf7466
@ -12,6 +12,49 @@ namespace v8 {
|
||||
namespace internal {
|
||||
namespace wasm {
|
||||
|
||||
namespace liftoff {
|
||||
|
||||
// half
|
||||
// slot Frame
|
||||
// -----+--------------------+---------------------------
|
||||
// n+3 | parameter n |
|
||||
// ... | ... |
|
||||
// 4 | parameter 1 | or parameter 2
|
||||
// 3 | parameter 0 | or parameter 1
|
||||
// 2 | (result address) | or parameter 0
|
||||
// -----+--------------------+---------------------------
|
||||
// 1 | return addr (lr) |
|
||||
// 0 | previous frame (fp)|
|
||||
// -----+--------------------+ <-- frame ptr (fp)
|
||||
// -1 | 0xa: WASM_COMPILED |
|
||||
// -2 | instance |
|
||||
// -----+--------------------+---------------------------
|
||||
// -3 | slot 0 (high) | ^
|
||||
// -4 | slot 0 (low) | |
|
||||
// -5 | slot 1 (high) | Frame slots
|
||||
// -6 | slot 1 (low) | |
|
||||
// | | v
|
||||
// -----+--------------------+ <-- stack ptr (sp)
|
||||
//
|
||||
|
||||
constexpr int32_t kInstanceOffset = 2 * kSystemPointerSize;
|
||||
constexpr int32_t kFirstStackSlotOffset =
|
||||
kInstanceOffset + 2 * kSystemPointerSize;
|
||||
|
||||
inline int GetStackSlotOffset(uint32_t index) {
|
||||
return kFirstStackSlotOffset + index * LiftoffAssembler::kStackSlotSize;
|
||||
}
|
||||
|
||||
inline MemOperand GetHalfStackSlot(uint32_t index, RegPairHalf half) {
|
||||
int32_t half_offset =
|
||||
half == kLowWord ? 0 : LiftoffAssembler::kStackSlotSize / 2;
|
||||
int32_t offset = kFirstStackSlotOffset +
|
||||
index * LiftoffAssembler::kStackSlotSize - half_offset;
|
||||
return MemOperand(fp, -offset);
|
||||
}
|
||||
|
||||
} // namespace liftoff
|
||||
|
||||
int LiftoffAssembler::PrepareStackFrame() {
|
||||
bailout(kUnsupportedArchitecture, "PrepareStackFrame");
|
||||
return 0;
|
||||
@ -108,6 +151,45 @@ void LiftoffAssembler::FillI64Half(Register, uint32_t index, RegPairHalf) {
|
||||
bailout(kUnsupportedArchitecture, "FillI64Half");
|
||||
}
|
||||
|
||||
void LiftoffAssembler::FillStackSlotsWithZero(uint32_t index, uint32_t count) {
|
||||
DCHECK_LT(0, count);
|
||||
uint32_t last_stack_slot = index + count - 1;
|
||||
RecordUsedSpillSlot(last_stack_slot);
|
||||
|
||||
// We need a zero reg. Always use r0 for that, and push it before to restore
|
||||
// its value afterwards.
|
||||
push(r0);
|
||||
mov(r0, Operand(0));
|
||||
|
||||
if (count <= 5) {
|
||||
// Special straight-line code for up to five slots. Generates two
|
||||
// instructions per slot.
|
||||
for (uint32_t offset = 0; offset < count; ++offset) {
|
||||
StoreP(r0, liftoff::GetHalfStackSlot(index + offset, kLowWord));
|
||||
StoreP(r0, liftoff::GetHalfStackSlot(index + offset, kHighWord));
|
||||
}
|
||||
} else {
|
||||
// General case for bigger counts (9 instructions).
|
||||
// Use r4 for start address (inclusive), r5 for end address (exclusive).
|
||||
push(r4);
|
||||
push(r5);
|
||||
subi(r4, fp, Operand(liftoff::GetStackSlotOffset(last_stack_slot)));
|
||||
subi(r5, fp, Operand(liftoff::GetStackSlotOffset(index) + kStackSlotSize));
|
||||
|
||||
Label loop;
|
||||
bind(&loop);
|
||||
StoreP(r0, MemOperand(r0));
|
||||
addi(r0, r0, Operand(kSystemPointerSize));
|
||||
cmp(r4, r5);
|
||||
bne(&loop);
|
||||
|
||||
pop(r4);
|
||||
pop(r5);
|
||||
}
|
||||
|
||||
pop(r0);
|
||||
}
|
||||
|
||||
#define UNIMPLEMENTED_I32_BINOP(name) \
|
||||
void LiftoffAssembler::emit_##name(Register dst, Register lhs, \
|
||||
Register rhs) { \
|
||||
|
@ -12,6 +12,48 @@ namespace v8 {
|
||||
namespace internal {
|
||||
namespace wasm {
|
||||
|
||||
namespace liftoff {
|
||||
|
||||
// half
|
||||
// slot Frame
|
||||
// -----+--------------------+---------------------------
|
||||
// n+3 | parameter n |
|
||||
// ... | ... |
|
||||
// 4 | parameter 1 | or parameter 2
|
||||
// 3 | parameter 0 | or parameter 1
|
||||
// 2 | (result address) | or parameter 0
|
||||
// -----+--------------------+---------------------------
|
||||
// 1 | return addr (lr) |
|
||||
// 0 | previous frame (fp)|
|
||||
// -----+--------------------+ <-- frame ptr (fp)
|
||||
// -1 | 0xa: WASM_COMPILED |
|
||||
// -2 | instance |
|
||||
// -----+--------------------+---------------------------
|
||||
// -3 | slot 0 (high) | ^
|
||||
// -4 | slot 0 (low) | |
|
||||
// -5 | slot 1 (high) | Frame slots
|
||||
// -6 | slot 1 (low) | |
|
||||
// | | v
|
||||
// -----+--------------------+ <-- stack ptr (sp)
|
||||
//
|
||||
constexpr int32_t kInstanceOffset = 2 * kSystemPointerSize;
|
||||
constexpr int32_t kFirstStackSlotOffset =
|
||||
kInstanceOffset + 2 * kSystemPointerSize;
|
||||
|
||||
inline int GetStackSlotOffset(uint32_t index) {
|
||||
return kFirstStackSlotOffset + index * LiftoffAssembler::kStackSlotSize;
|
||||
}
|
||||
|
||||
inline MemOperand GetHalfStackSlot(uint32_t index, RegPairHalf half) {
|
||||
int32_t half_offset =
|
||||
half == kLowWord ? 0 : LiftoffAssembler::kStackSlotSize / 2;
|
||||
int32_t offset = kFirstStackSlotOffset +
|
||||
index * LiftoffAssembler::kStackSlotSize - half_offset;
|
||||
return MemOperand(fp, -offset);
|
||||
}
|
||||
|
||||
} // namespace liftoff
|
||||
|
||||
int LiftoffAssembler::PrepareStackFrame() {
|
||||
bailout(kUnsupportedArchitecture, "PrepareStackFrame");
|
||||
return 0;
|
||||
@ -108,6 +150,45 @@ void LiftoffAssembler::FillI64Half(Register, uint32_t index, RegPairHalf) {
|
||||
bailout(kUnsupportedArchitecture, "FillI64Half");
|
||||
}
|
||||
|
||||
void LiftoffAssembler::FillStackSlotsWithZero(uint32_t index, uint32_t count) {
|
||||
DCHECK_LT(0, count);
|
||||
uint32_t last_stack_slot = index + count - 1;
|
||||
RecordUsedSpillSlot(last_stack_slot);
|
||||
|
||||
// We need a zero reg. Always use r0 for that, and push it before to restore
|
||||
// its value afterwards.
|
||||
push(r0);
|
||||
mov(r0, Operand(0));
|
||||
|
||||
if (count <= 5) {
|
||||
// Special straight-line code for up to five slots. Generates two
|
||||
// instructions per slot.
|
||||
for (uint32_t offset = 0; offset < count; ++offset) {
|
||||
StoreP(r0, liftoff::GetHalfStackSlot(index + offset, kLowWord));
|
||||
StoreP(r0, liftoff::GetHalfStackSlot(index + offset, kHighWord));
|
||||
}
|
||||
} else {
|
||||
// General case for bigger counts (9 instructions).
|
||||
// Use r3 for start address (inclusive), r4 for end address (exclusive).
|
||||
push(r3);
|
||||
push(r4);
|
||||
SubP(r3, fp, Operand(liftoff::GetStackSlotOffset(last_stack_slot)));
|
||||
SubP(r4, fp, Operand(liftoff::GetStackSlotOffset(index) + kStackSlotSize));
|
||||
|
||||
Label loop;
|
||||
bind(&loop);
|
||||
StoreP(r0, MemOperand(r0));
|
||||
la(r0, MemOperand(r0, kSystemPointerSize));
|
||||
CmpLogicalP(r3, r4);
|
||||
bne(&loop);
|
||||
|
||||
pop(r4);
|
||||
pop(r3);
|
||||
}
|
||||
|
||||
pop(r0);
|
||||
}
|
||||
|
||||
#define UNIMPLEMENTED_I32_BINOP(name) \
|
||||
void LiftoffAssembler::emit_##name(Register dst, Register lhs, \
|
||||
Register rhs) { \
|
||||
|
Loading…
Reference in New Issue
Block a user