MIPS: Optimize load/store with large offset

Currently, we are using the following sequence for load/store with large offset (offset > 16b):

    lui at, 0x1234
    ori at, at, 0x5678
    add at, s0, at
    lw  a0, 0(at)

This sequence can be optimized in the following way:

    lui at, 0x1234
    add at, s0, at
    lw  a0, 0x5678(at)

BUG=

Review-Url: https://codereview.chromium.org/2486283003
Cr-Commit-Position: refs/heads/master@{#40953}
This commit is contained in:
dusan.simicic 2016-11-14 01:57:06 -08:00 committed by Commit bot
parent 787e87a814
commit 6d054f7658
4 changed files with 99 additions and 49 deletions

View File

@ -46,12 +46,33 @@ class MipsOperandGenerator final : public OperandGenerator {
case kMipsSub:
case kMipsXor:
return is_uint16(value);
case kMipsLb:
case kMipsLbu:
case kMipsSb:
case kMipsLh:
case kMipsLhu:
case kMipsSh:
case kMipsLw:
case kMipsSw:
case kMipsLwc1:
case kMipsSwc1:
case kMipsLdc1:
case kMipsSdc1:
case kCheckedLoadInt8:
case kCheckedLoadUint8:
case kCheckedLoadInt16:
case kCheckedLoadUint16:
case kCheckedLoadWord32:
case kCheckedStoreWord8:
case kCheckedStoreWord16:
case kCheckedStoreWord32:
case kCheckedLoadFloat32:
case kCheckedLoadFloat64:
case kCheckedStoreFloat32:
case kCheckedStoreFloat64:
return std::numeric_limits<int16_t>::min() <= (value + kIntSize) &&
std::numeric_limits<int16_t>::max() >= (value + kIntSize);
// true even for 32b values, offsets > 16b
// are handled in assembler-mips.cc
return is_int32(value);
default:
return is_int16(value);
}
@ -1765,6 +1786,7 @@ void InstructionSelector::VisitAtomicLoad(Node* node) {
UNREACHABLE();
return;
}
if (g.CanBeImmediate(index, opcode)) {
Emit(opcode | AddressingModeField::encode(kMode_MRI),
g.DefineAsRegister(node), g.UseRegister(base), g.UseImmediate(index));

View File

@ -1784,13 +1784,44 @@ void Assembler::LoadRegPlusOffsetToAt(const MemOperand& src) {
addu(at, at, src.rm()); // Add base register.
}
// Helper for base-reg + upper part of offset, when offset is larger than int16.
// Loads higher part of the offset to AT register.
// Returns lower part of the offset to be used as offset
// in Load/Store instructions
int32_t Assembler::LoadRegPlusUpperOffsetPartToAt(const MemOperand& src) {
DCHECK(!src.rm().is(at));
int32_t hi = (src.offset_ >> kLuiShift) & kImm16Mask;
// If the highest bit of the lower part of the offset is 1, this would make
// the offset in the load/store instruction negative. We need to compensate
// for this by adding 1 to the upper part of the offset.
if (src.offset_ & kNegOffset) {
hi += 1;
}
lui(at, hi);
addu(at, at, src.rm());
return (src.offset_ & kImm16Mask);
}
// Helper for loading base-reg + upper offset's part to AT reg when we are using
// two 32-bit loads/stores instead of one 64-bit
int32_t Assembler::LoadUpperOffsetForTwoMemoryAccesses(const MemOperand& src) {
DCHECK(!src.rm().is(at));
if (is_int16((src.offset_ & kImm16Mask) + kIntSize)) {
// Only if lower part of offset + kIntSize fits in 16bits
return LoadRegPlusUpperOffsetPartToAt(src);
}
// In case offset's lower part + kIntSize doesn't fit in 16bits,
// load reg + hole offset to AT
LoadRegPlusOffsetToAt(src);
return 0;
}
void Assembler::lb(Register rd, const MemOperand& rs) {
if (is_int16(rs.offset_)) {
GenInstrImmediate(LB, rs.rm(), rd, rs.offset_);
} else { // Offset > 16 bits, use multiple instructions to load.
LoadRegPlusOffsetToAt(rs);
GenInstrImmediate(LB, at, rd, 0); // Equiv to lb(rd, MemOperand(at, 0));
int32_t off16 = LoadRegPlusUpperOffsetPartToAt(rs);
GenInstrImmediate(LB, at, rd, off16);
}
}
@ -1799,8 +1830,8 @@ void Assembler::lbu(Register rd, const MemOperand& rs) {
if (is_int16(rs.offset_)) {
GenInstrImmediate(LBU, rs.rm(), rd, rs.offset_);
} else { // Offset > 16 bits, use multiple instructions to load.
LoadRegPlusOffsetToAt(rs);
GenInstrImmediate(LBU, at, rd, 0); // Equiv to lbu(rd, MemOperand(at, 0));
int32_t off16 = LoadRegPlusUpperOffsetPartToAt(rs);
GenInstrImmediate(LBU, at, rd, off16);
}
}
@ -1809,8 +1840,8 @@ void Assembler::lh(Register rd, const MemOperand& rs) {
if (is_int16(rs.offset_)) {
GenInstrImmediate(LH, rs.rm(), rd, rs.offset_);
} else { // Offset > 16 bits, use multiple instructions to load.
LoadRegPlusOffsetToAt(rs);
GenInstrImmediate(LH, at, rd, 0); // Equiv to lh(rd, MemOperand(at, 0));
int32_t off16 = LoadRegPlusUpperOffsetPartToAt(rs);
GenInstrImmediate(LH, at, rd, off16);
}
}
@ -1819,8 +1850,8 @@ void Assembler::lhu(Register rd, const MemOperand& rs) {
if (is_int16(rs.offset_)) {
GenInstrImmediate(LHU, rs.rm(), rd, rs.offset_);
} else { // Offset > 16 bits, use multiple instructions to load.
LoadRegPlusOffsetToAt(rs);
GenInstrImmediate(LHU, at, rd, 0); // Equiv to lhu(rd, MemOperand(at, 0));
int32_t off16 = LoadRegPlusUpperOffsetPartToAt(rs);
GenInstrImmediate(LHU, at, rd, off16);
}
}
@ -1829,8 +1860,8 @@ void Assembler::lw(Register rd, const MemOperand& rs) {
if (is_int16(rs.offset_)) {
GenInstrImmediate(LW, rs.rm(), rd, rs.offset_);
} else { // Offset > 16 bits, use multiple instructions to load.
LoadRegPlusOffsetToAt(rs);
GenInstrImmediate(LW, at, rd, 0); // Equiv to lw(rd, MemOperand(at, 0));
int32_t off16 = LoadRegPlusUpperOffsetPartToAt(rs);
GenInstrImmediate(LW, at, rd, off16);
}
}
@ -1855,8 +1886,8 @@ void Assembler::sb(Register rd, const MemOperand& rs) {
if (is_int16(rs.offset_)) {
GenInstrImmediate(SB, rs.rm(), rd, rs.offset_);
} else { // Offset > 16 bits, use multiple instructions to store.
LoadRegPlusOffsetToAt(rs);
GenInstrImmediate(SB, at, rd, 0); // Equiv to sb(rd, MemOperand(at, 0));
int32_t off16 = LoadRegPlusUpperOffsetPartToAt(rs);
GenInstrImmediate(SB, at, rd, off16);
}
}
@ -1865,8 +1896,8 @@ void Assembler::sh(Register rd, const MemOperand& rs) {
if (is_int16(rs.offset_)) {
GenInstrImmediate(SH, rs.rm(), rd, rs.offset_);
} else { // Offset > 16 bits, use multiple instructions to store.
LoadRegPlusOffsetToAt(rs);
GenInstrImmediate(SH, at, rd, 0); // Equiv to sh(rd, MemOperand(at, 0));
int32_t off16 = LoadRegPlusUpperOffsetPartToAt(rs);
GenInstrImmediate(SH, at, rd, off16);
}
}
@ -1875,8 +1906,8 @@ void Assembler::sw(Register rd, const MemOperand& rs) {
if (is_int16(rs.offset_)) {
GenInstrImmediate(SW, rs.rm(), rd, rs.offset_);
} else { // Offset > 16 bits, use multiple instructions to store.
LoadRegPlusOffsetToAt(rs);
GenInstrImmediate(SW, at, rd, 0); // Equiv to sw(rd, MemOperand(at, 0));
int32_t off16 = LoadRegPlusUpperOffsetPartToAt(rs);
GenInstrImmediate(SW, at, rd, off16);
}
}
@ -2172,8 +2203,8 @@ void Assembler::lwc1(FPURegister fd, const MemOperand& src) {
if (is_int16(src.offset_)) {
GenInstrImmediate(LWC1, src.rm(), fd, src.offset_);
} else { // Offset > 16 bits, use multiple instructions to load.
LoadRegPlusOffsetToAt(src);
GenInstrImmediate(LWC1, at, fd, 0);
int32_t off16 = LoadRegPlusUpperOffsetPartToAt(src);
GenInstrImmediate(LWC1, at, fd, off16);
}
}
@ -2190,11 +2221,11 @@ void Assembler::ldc1(FPURegister fd, const MemOperand& src) {
GenInstrImmediate(LWC1, src.rm(), nextfpreg,
src.offset_ + Register::kExponentOffset);
} else { // Offset > 16 bits, use multiple instructions to load.
LoadRegPlusOffsetToAt(src);
GenInstrImmediate(LWC1, at, fd, Register::kMantissaOffset);
int32_t off16 = LoadUpperOffsetForTwoMemoryAccesses(src);
GenInstrImmediate(LWC1, at, fd, off16 + Register::kMantissaOffset);
FPURegister nextfpreg;
nextfpreg.setcode(fd.code() + 1);
GenInstrImmediate(LWC1, at, nextfpreg, Register::kExponentOffset);
GenInstrImmediate(LWC1, at, nextfpreg, off16 + Register::kExponentOffset);
}
} else {
DCHECK(IsFp64Mode() || IsFpxxMode());
@ -2207,9 +2238,9 @@ void Assembler::ldc1(FPURegister fd, const MemOperand& src) {
src.offset_ + Register::kExponentOffset);
mthc1(at, fd);
} else { // Offset > 16 bits, use multiple instructions to load.
LoadRegPlusOffsetToAt(src);
GenInstrImmediate(LWC1, at, fd, Register::kMantissaOffset);
GenInstrImmediate(LW, at, at, Register::kExponentOffset);
int32_t off16 = LoadUpperOffsetForTwoMemoryAccesses(src);
GenInstrImmediate(LWC1, at, fd, off16 + Register::kMantissaOffset);
GenInstrImmediate(LW, at, at, off16 + Register::kExponentOffset);
mthc1(at, fd);
}
}
@ -2220,8 +2251,8 @@ void Assembler::swc1(FPURegister fd, const MemOperand& src) {
if (is_int16(src.offset_)) {
GenInstrImmediate(SWC1, src.rm(), fd, src.offset_);
} else { // Offset > 16 bits, use multiple instructions to load.
LoadRegPlusOffsetToAt(src);
GenInstrImmediate(SWC1, at, fd, 0);
int32_t off16 = LoadRegPlusUpperOffsetPartToAt(src);
GenInstrImmediate(SWC1, at, fd, off16);
}
}
@ -2240,11 +2271,11 @@ void Assembler::sdc1(FPURegister fd, const MemOperand& src) {
GenInstrImmediate(SWC1, src.rm(), nextfpreg,
src.offset_ + Register::kExponentOffset);
} else { // Offset > 16 bits, use multiple instructions to load.
LoadRegPlusOffsetToAt(src);
GenInstrImmediate(SWC1, at, fd, Register::kMantissaOffset);
int32_t off16 = LoadUpperOffsetForTwoMemoryAccesses(src);
GenInstrImmediate(SWC1, at, fd, off16 + Register::kMantissaOffset);
FPURegister nextfpreg;
nextfpreg.setcode(fd.code() + 1);
GenInstrImmediate(SWC1, at, nextfpreg, Register::kExponentOffset);
GenInstrImmediate(SWC1, at, nextfpreg, off16 + Register::kExponentOffset);
}
} else {
DCHECK(IsFp64Mode() || IsFpxxMode());
@ -2257,10 +2288,10 @@ void Assembler::sdc1(FPURegister fd, const MemOperand& src) {
GenInstrImmediate(SW, src.rm(), at,
src.offset_ + Register::kExponentOffset);
} else { // Offset > 16 bits, use multiple instructions to load.
LoadRegPlusOffsetToAt(src);
GenInstrImmediate(SWC1, at, fd, Register::kMantissaOffset);
int32_t off16 = LoadUpperOffsetForTwoMemoryAccesses(src);
GenInstrImmediate(SWC1, at, fd, off16 + Register::kMantissaOffset);
mfhc1(t8, fd);
GenInstrImmediate(SW, at, t8, Register::kExponentOffset);
GenInstrImmediate(SW, at, t8, off16 + Register::kExponentOffset);
}
}
}

View File

@ -1177,6 +1177,8 @@ class Assembler : public AssemblerBase {
// Helpers.
void LoadRegPlusOffsetToAt(const MemOperand& src);
int32_t LoadRegPlusUpperOffsetPartToAt(const MemOperand& src);
int32_t LoadUpperOffsetForTwoMemoryAccesses(const MemOperand& src);
// Relocation for a type-recording IC has the AST id added to it. This
// member variable is a way to pass the information from the call site to

View File

@ -916,14 +916,13 @@ const MemoryAccessImm kMemoryAccessesImm[] = {
-87, -86, -82, -44, -23, -3, 0, 7, 10, 39, 52, 69, 71, 91, 92, 107, 109,
115, 124, 286, 655, 1362, 1569, 2587, 3067, 3096, 3462, 3510, 4095}}};
const MemoryAccessImm1 kMemoryAccessImmMoreThan16bit[] = {
{MachineType::Int8(),
kMipsLb,
kMipsSb,
&InstructionSelectorTest::Stream::IsInteger,
{-65000, -55000, 32777, 55000, 65000}},
{MachineType::Int8(),
{MachineType::Uint8(),
kMipsLbu,
kMipsSb,
&InstructionSelectorTest::Stream::IsInteger,
@ -933,7 +932,7 @@ const MemoryAccessImm1 kMemoryAccessImmMoreThan16bit[] = {
kMipsSh,
&InstructionSelectorTest::Stream::IsInteger,
{-65000, -55000, 32777, 55000, 65000}},
{MachineType::Int16(),
{MachineType::Uint16(),
kMipsLhu,
kMipsSh,
&InstructionSelectorTest::Stream::IsInteger,
@ -1065,11 +1064,9 @@ TEST_P(InstructionSelectorMemoryAccessImmMoreThan16bitTest,
StreamBuilder m(this, memacc.type, MachineType::Pointer());
m.Return(m.Load(memacc.type, m.Parameter(0), m.Int32Constant(index)));
Stream s = m.Build();
ASSERT_EQ(2U, s.size());
// kMipsAdd is expected opcode.
// size more than 16 bits wide.
EXPECT_EQ(kMipsAdd, s[0]->arch_opcode());
EXPECT_EQ(kMode_None, s[0]->addressing_mode());
ASSERT_EQ(1U, s.size());
EXPECT_EQ(memacc.load_opcode, s[0]->arch_opcode());
EXPECT_EQ(kMode_MRI, s[0]->addressing_mode());
EXPECT_EQ(2U, s[0]->InputCount());
EXPECT_EQ(1U, s[0]->OutputCount());
}
@ -1086,13 +1083,11 @@ TEST_P(InstructionSelectorMemoryAccessImmMoreThan16bitTest,
m.Int32Constant(index), m.Parameter(1), kNoWriteBarrier);
m.Return(m.Int32Constant(0));
Stream s = m.Build();
ASSERT_EQ(2U, s.size());
// kMipsAdd is expected opcode
// size more than 16 bits wide
EXPECT_EQ(kMipsAdd, s[0]->arch_opcode());
EXPECT_EQ(kMode_None, s[0]->addressing_mode());
EXPECT_EQ(2U, s[0]->InputCount());
EXPECT_EQ(1U, s[0]->OutputCount());
ASSERT_EQ(1U, s.size());
EXPECT_EQ(memacc.store_opcode, s[0]->arch_opcode());
EXPECT_EQ(kMode_MRI, s[0]->addressing_mode());
EXPECT_EQ(3U, s[0]->InputCount());
EXPECT_EQ(0, s[0]->OutputCount());
}
}