MIPS: Use PC relative instructions on r6.

BUG=

Review URL: https://codereview.chromium.org/1628453002

Cr-Commit-Position: refs/heads/master@{#33703}
This commit is contained in:
balazs.kilvady 2016-02-03 05:30:57 -08:00 committed by Commit bot
parent 007e14ce4b
commit ebac85c0a4
9 changed files with 137 additions and 142 deletions

View File

@ -1487,19 +1487,10 @@ void CodeGenerator::AssembleArchTableSwitch(Instruction* instr) {
MipsOperandConverter i(this, instr);
Register input = i.InputRegister(0);
size_t const case_count = instr->InputCount() - 2;
Label here;
__ Branch(GetLabel(i.InputRpo(1)), hs, input, Operand(case_count));
__ BlockTrampolinePoolFor(case_count + 6);
__ bal(&here);
__ sll(at, input, 2); // Branch delay slot.
__ bind(&here);
__ addu(at, at, ra);
__ lw(at, MemOperand(at, 4 * v8::internal::Assembler::kInstrSize));
__ jr(at);
__ nop(); // Branch delay slot nop.
for (size_t index = 0; index < case_count; ++index) {
__ dd(GetLabel(i.InputRpo(index + 2)));
}
__ GenerateSwitchTable(input, case_count, [&i, this](size_t index) {
return GetLabel(i.InputRpo(index + 2));
});
}

View File

@ -1791,27 +1791,15 @@ void CodeGenerator::AssembleArchLookupSwitch(Instruction* instr) {
AssembleArchJump(i.InputRpo(1));
}
void CodeGenerator::AssembleArchTableSwitch(Instruction* instr) {
MipsOperandConverter i(this, instr);
Register input = i.InputRegister(0);
size_t const case_count = instr->InputCount() - 2;
Label here;
__ Branch(GetLabel(i.InputRpo(1)), hs, input, Operand(case_count));
__ BlockTrampolinePoolFor(static_cast<int>(case_count) * 2 + 7);
// Ensure that dd-ed labels use 8 byte aligned addresses.
__ Align(8);
__ bal(&here);
__ dsll(at, input, 3); // Branch delay slot.
__ bind(&here);
__ daddu(at, at, ra);
__ ld(at, MemOperand(at, 4 * v8::internal::Assembler::kInstrSize));
__ jr(at);
__ nop(); // Branch delay slot nop.
for (size_t index = 0; index < case_count; ++index) {
__ dd(GetLabel(i.InputRpo(index + 2)));
}
__ GenerateSwitchTable(input, case_count, [&i, this](size_t index) {
return GetLabel(i.InputRpo(index + 2));
});
}

View File

@ -1125,20 +1125,21 @@ void CEntryStub::Generate(MacroAssembler* masm) {
// we can store the address on the stack to be able to find it again and
// we never have to restore it, because it will not change.
{ Assembler::BlockTrampolinePoolScope block_trampoline_pool(masm);
// This branch-and-link sequence is needed to find the current PC on mips,
// saved to the ra register.
// Use masm-> here instead of the double-underscore macro since extra
// coverage code can interfere with the proper calculation of ra.
int kNumInstructionsToJump = 4;
Label find_ra;
__ bal(&find_ra); // bal exposes branch delay slot.
__ nop();
__ bind(&find_ra);
// Adjust the value in ra to point to the correct return location, 2nd
// instruction past the real call into C code (the jalr(t9)), and push it.
// This is the return address of the exit frame.
const int kNumInstructionsToJump = 5;
__ Addu(ra, ra, kNumInstructionsToJump * kPointerSize);
if (kArchVariant >= kMips32r6) {
__ addiupc(ra, kNumInstructionsToJump + 1);
} else {
// This branch-and-link sequence is needed to find the current PC on mips
// before r6, saved to the ra register.
__ bal(&find_ra); // bal exposes branch delay slot.
__ Addu(ra, ra, kNumInstructionsToJump * Instruction::kInstrSize);
}
__ bind(&find_ra);
// This spot was reserved in EnterExitFrame.
__ sw(ra, MemOperand(sp, result_stack_size));
// Stack space reservation moved to the branch delay slot below.

View File

@ -208,6 +208,11 @@ class MacroAssembler: public Assembler {
Heap::RootListIndex index,
BranchDelaySlot bdslot = PROTECT);
// GetLabelFunction must be lambda '[](size_t index) -> Label*' or a
// functor/function with 'Label *func(size_t index)' declaration.
template <typename Func>
void GenerateSwitchTable(Register index, size_t case_count,
Func GetLabelFunction);
#undef COND_ARGS
// Emit code to discard a non-negative number of pointer-sized elements
@ -1746,7 +1751,29 @@ class CodePatcher {
FlushICache flush_cache_; // Whether to flush the I cache after patching.
};
template <typename Func>
void MacroAssembler::GenerateSwitchTable(Register index, size_t case_count,
Func GetLabelFunction) {
if (kArchVariant >= kMips32r6) {
BlockTrampolinePoolFor(case_count + 5);
addiupc(at, 5);
lsa(at, at, index, kPointerSizeLog2);
lw(at, MemOperand(at));
} else {
Label here;
BlockTrampolinePoolFor(case_count + 6);
bal(&here);
sll(at, index, kPointerSizeLog2); // Branch delay slot.
bind(&here);
addu(at, at, ra);
lw(at, MemOperand(at, 4 * v8::internal::Assembler::kInstrSize));
}
jr(at);
nop(); // Branch delay slot nop.
for (size_t index = 0; index < case_count; ++index) {
dd(GetLabelFunction(index));
}
}
#ifdef GENERATED_CODE_COVERAGE
#define CODE_COVERAGE_STRINGIFY(x) #x

View File

@ -1123,20 +1123,21 @@ void CEntryStub::Generate(MacroAssembler* masm) {
// we can store the address on the stack to be able to find it again and
// we never have to restore it, because it will not change.
{ Assembler::BlockTrampolinePoolScope block_trampoline_pool(masm);
// This branch-and-link sequence is needed to find the current PC on mips,
// saved to the ra register.
// Use masm-> here instead of the double-underscore macro since extra
// coverage code can interfere with the proper calculation of ra.
int kNumInstructionsToJump = 4;
Label find_ra;
__ bal(&find_ra); // bal exposes branch delay slot.
__ nop();
__ bind(&find_ra);
// Adjust the value in ra to point to the correct return location, 2nd
// instruction past the real call into C code (the jalr(t9)), and push it.
// This is the return address of the exit frame.
const int kNumInstructionsToJump = 5;
__ Daddu(ra, ra, kNumInstructionsToJump * kInt32Size);
if (kArchVariant >= kMips64r6) {
__ addiupc(ra, kNumInstructionsToJump + 1);
} else {
// This branch-and-link sequence is needed to find the current PC on mips
// before r6, saved to the ra register.
__ bal(&find_ra); // bal exposes branch delay slot.
__ Daddu(ra, ra, kNumInstructionsToJump * Instruction::kInstrSize);
}
__ bind(&find_ra);
// This spot was reserved in EnterExitFrame.
__ sd(ra, MemOperand(sp, result_stack_size));
// Stack space reservation moved to the branch delay slot below.

View File

@ -236,6 +236,11 @@ class MacroAssembler: public Assembler {
Heap::RootListIndex index,
BranchDelaySlot bdslot = PROTECT);
// GetLabelFunction must be lambda '[](size_t index) -> Label*' or a
// functor/function with 'Label *func(size_t index)' declaration.
template <typename Func>
void GenerateSwitchTable(Register index, size_t case_count,
Func GetLabelFunction);
#undef COND_ARGS
// Emit code to discard a non-negative number of pointer-sized elements
@ -1898,7 +1903,36 @@ class CodePatcher {
FlushICache flush_cache_; // Whether to flush the I cache after patching.
};
template <typename Func>
void MacroAssembler::GenerateSwitchTable(Register index, size_t case_count,
Func GetLabelFunction) {
// Ensure that dd-ed labels following this instruction use 8 bytes aligned
// addresses.
if (kArchVariant >= kMips64r6) {
BlockTrampolinePoolFor(static_cast<int>(case_count) * 2 + 6);
// Opposite of Align(8) as we have odd number of instructions in this case.
if ((pc_offset() & 7) == 0) {
nop();
}
addiupc(at, 5);
dlsa(at, at, index, kPointerSizeLog2);
ld(at, MemOperand(at));
} else {
Label here;
BlockTrampolinePoolFor(static_cast<int>(case_count) * 2 + 7);
Align(8);
bal(&here);
dsll(at, index, kPointerSizeLog2); // Branch delay slot.
bind(&here);
daddu(at, at, ra);
ld(at, MemOperand(at, 4 * v8::internal::Assembler::kInstrSize));
}
jr(at);
nop(); // Branch delay slot nop.
for (size_t index = 0; index < case_count; ++index) {
dd(GetLabelFunction(index));
}
}
#ifdef GENERATED_CODE_COVERAGE
#define CODE_COVERAGE_STRINGIFY(x) #x

View File

@ -5024,8 +5024,7 @@ TEST(r6_jialc) {
}
}
uint64_t run_addiupc(int32_t imm19) {
static uint32_t run_addiupc(int32_t imm19) {
Isolate* isolate = CcTest::i_isolate();
HandleScope scope(isolate);
@ -5058,13 +5057,13 @@ TEST(r6_addiupc) {
int32_t imm19;
};
struct TestCaseAddiupc tc[] = {
// imm19
{ -262144 }, // 0x40000
{ -1 }, // 0x7FFFF
{ 0 },
{ 1 }, // 0x00001
{ 262143 } // 0x3FFFF
TestCaseAddiupc tc[] = {
// imm19
{-262144}, // 0x40000
{-1}, // 0x7FFFF
{0},
{1}, // 0x00001
{262143} // 0x3FFFF
};
size_t nr_test_cases = sizeof(tc) / sizeof(TestCaseAddiupc);

View File

@ -185,7 +185,7 @@ TEST(jump_tables4) {
CcTest::InitializeVM();
Isolate* isolate = CcTest::i_isolate();
HandleScope scope(isolate);
MacroAssembler assembler(isolate, NULL, 0,
MacroAssembler assembler(isolate, nullptr, 0,
v8::internal::CodeObjectRequired::kYes);
MacroAssembler* masm = &assembler;
@ -193,11 +193,9 @@ TEST(jump_tables4) {
int values[kNumCases];
isolate->random_number_generator()->NextBytes(values, sizeof(values));
Label labels[kNumCases];
Label near_start, end;
__ addiu(sp, sp, -4);
__ sw(ra, MemOperand(sp));
Label near_start, end, done;
__ Push(ra);
__ mov(v0, zero_reg);
__ Branch(&end);
@ -209,35 +207,17 @@ TEST(jump_tables4) {
__ addiu(v0, v0, 1);
}
Label done;
{
__ BlockTrampolinePoolFor(kNumCases + 6);
PredictableCodeSizeScope predictable(
masm, (kNumCases + 6) * Assembler::kInstrSize);
Label here;
__ bal(&here);
__ sll(at, a0, 2); // In delay slot.
__ bind(&here);
__ addu(at, at, ra);
__ lw(at, MemOperand(at, 4 * Assembler::kInstrSize));
__ jr(at);
__ nop(); // Branch delay slot nop.
for (int i = 0; i < kNumCases; ++i) {
__ dd(&labels[i]);
}
}
__ GenerateSwitchTable(a0, kNumCases,
[&labels](size_t i) { return labels + i; });
for (int i = 0; i < kNumCases; ++i) {
__ bind(&labels[i]);
__ lui(v0, (values[i] >> 16) & 0xffff);
__ ori(v0, v0, values[i] & 0xffff);
__ li(v0, values[i]);
__ Branch(&done);
}
__ bind(&done);
__ lw(ra, MemOperand(sp));
__ addiu(sp, sp, 4);
__ Pop(ra);
__ jr(ra);
__ nop();
@ -279,23 +259,21 @@ TEST(jump_tables5) {
Label labels[kNumCases];
Label done;
__ addiu(sp, sp, -4);
__ sw(ra, MemOperand(sp));
__ Push(ra);
{
__ BlockTrampolinePoolFor(kNumCases * 2 + 7 + 1);
__ BlockTrampolinePoolFor(kNumCases + 6 + 1);
PredictableCodeSizeScope predictable(
masm, kNumCases * kPointerSize + ((7 + 1) * Assembler::kInstrSize));
Label here;
masm, kNumCases * kPointerSize + ((6 + 1) * Assembler::kInstrSize));
__ bal(&here);
__ sll(at, a0, 2); // In delay slot.
__ bind(&here);
__ addu(at, at, ra);
__ lw(at, MemOperand(at, 6 * Assembler::kInstrSize));
__ addiupc(at, 6 + 1);
__ lsa(at, at, a0, 2);
__ lw(at, MemOperand(at));
__ jalr(at);
__ nop(); // Branch delay slot nop.
__ bc(&done);
// A nop instruction must be generated by the forbidden slot guard
// (Assembler::dd(Label*)).
for (int i = 0; i < kNumCases; ++i) {
__ dd(&labels[i]);
}
@ -303,15 +281,13 @@ TEST(jump_tables5) {
for (int i = 0; i < kNumCases; ++i) {
__ bind(&labels[i]);
__ lui(v0, (values[i] >> 16) & 0xffff);
__ ori(v0, v0, values[i] & 0xffff);
__ li(v0, values[i]);
__ jr(ra);
__ nop();
}
__ bind(&done);
__ lw(ra, MemOperand(sp));
__ addiu(sp, sp, 4);
__ Pop(ra);
__ jr(ra);
__ nop();

View File

@ -228,7 +228,7 @@ TEST(jump_tables4) {
CcTest::InitializeVM();
Isolate* isolate = CcTest::i_isolate();
HandleScope scope(isolate);
MacroAssembler assembler(isolate, NULL, 0,
MacroAssembler assembler(isolate, nullptr, 0,
v8::internal::CodeObjectRequired::kYes);
MacroAssembler* masm = &assembler;
@ -236,11 +236,9 @@ TEST(jump_tables4) {
int values[kNumCases];
isolate->random_number_generator()->NextBytes(values, sizeof(values));
Label labels[kNumCases];
Label near_start, end;
__ daddiu(sp, sp, -8);
__ sd(ra, MemOperand(sp));
Label near_start, end, done;
__ Push(ra);
__ mov(v0, zero_reg);
__ Branch(&end);
@ -252,36 +250,17 @@ TEST(jump_tables4) {
__ addiu(v0, v0, 1);
}
__ Align(8);
Label done;
{
__ BlockTrampolinePoolFor(kNumCases * 2 + 6);
PredictableCodeSizeScope predictable(
masm, (kNumCases * 2 + 6) * Assembler::kInstrSize);
Label here;
__ bal(&here);
__ dsll(at, a0, 3); // In delay slot.
__ bind(&here);
__ daddu(at, at, ra);
__ ld(at, MemOperand(at, 4 * Assembler::kInstrSize));
__ jr(at);
__ nop(); // Branch delay slot nop.
for (int i = 0; i < kNumCases; ++i) {
__ dd(&labels[i]);
}
}
__ GenerateSwitchTable(a0, kNumCases,
[&labels](size_t i) { return labels + i; });
for (int i = 0; i < kNumCases; ++i) {
__ bind(&labels[i]);
__ lui(v0, (values[i] >> 16) & 0xffff);
__ ori(v0, v0, values[i] & 0xffff);
__ li(v0, values[i]);
__ Branch(&done);
}
__ bind(&done);
__ ld(ra, MemOperand(sp));
__ daddiu(sp, sp, 8);
__ Pop(ra);
__ jr(ra);
__ nop();
@ -323,21 +302,22 @@ TEST(jump_tables5) {
Label labels[kNumCases];
Label done;
__ daddiu(sp, sp, -8);
__ sd(ra, MemOperand(sp));
__ Push(ra);
// Opposite of Align(8) as we have unaligned number of instructions in the
// following block before the first dd().
if ((masm->pc_offset() & 7) == 0) {
__ nop();
}
__ Align(8);
{
__ BlockTrampolinePoolFor(kNumCases * 2 + 7 + 1);
__ BlockTrampolinePoolFor(kNumCases * 2 + 6 + 1);
PredictableCodeSizeScope predictable(
masm, kNumCases * kPointerSize + ((7 + 1) * Assembler::kInstrSize));
Label here;
masm, kNumCases * kPointerSize + ((6 + 1) * Assembler::kInstrSize));
__ bal(&here);
__ dsll(at, a0, 3); // In delay slot.
__ bind(&here);
__ daddu(at, at, ra);
__ ld(at, MemOperand(at, 6 * Assembler::kInstrSize));
__ addiupc(at, 6 + 1);
__ dlsa(at, at, a0, 3);
__ ld(at, MemOperand(at));
__ jalr(at);
__ nop(); // Branch delay slot nop.
__ bc(&done);
@ -351,15 +331,13 @@ TEST(jump_tables5) {
for (int i = 0; i < kNumCases; ++i) {
__ bind(&labels[i]);
__ lui(v0, (values[i] >> 16) & 0xffff);
__ ori(v0, v0, values[i] & 0xffff);
__ li(v0, values[i]);
__ jr(ra);
__ nop();
}
__ bind(&done);
__ ld(ra, MemOperand(sp));
__ daddiu(sp, sp, 8);
__ Pop(ra);
__ jr(ra);
__ nop();