Better peephole optimization for ARM. This is a commit of

http://codereview.chromium.org/2004006 for Subrato De.


git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@4662 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
erik.corry@gmail.com 2010-05-17 10:51:41 +00:00
parent 5e91f6fa2f
commit afe8c296a4
3 changed files with 232 additions and 11 deletions

View File

@ -268,6 +268,20 @@ const Instr kBlxRegMask =
15 * B24 | 15 * B20 | 15 * B16 | 15 * B12 | 15 * B8 | 15 * B4; 15 * B24 | 15 * B20 | 15 * B16 | 15 * B12 | 15 * B8 | 15 * B4;
const Instr kBlxRegPattern = const Instr kBlxRegPattern =
B24 | B21 | 15 * B16 | 15 * B12 | 15 * B8 | 3 * B4; B24 | B21 | 15 * B16 | 15 * B12 | 15 * B8 | 3 * B4;
// A mask for the Rd register for push, pop, ldr, str instructions.
const Instr kRdMask = 0x0000f000;
static const Instr kLdrRegFpOffsetPattern =
al | B26 | L | Offset | fp.code() * B16;
static const Instr kStrRegFpOffsetPattern =
al | B26 | Offset | fp.code() * B16;
static const Instr kLdrRegFpNegOffsetPattern =
al | B26 | L | NegOffset | fp.code() * B16;
static const Instr kStrRegFpNegOffsetPattern =
al | B26 | NegOffset | fp.code() * B16;
static const Instr kLdrStrInstrTypeMask = 0xffff0000;
static const Instr kLdrStrInstrArgumentMask = 0x0000ffff;
static const Instr kLdrStrOffsetMask = 0x00000fff;
static const int kRdShift = 12;
// Spare buffer. // Spare buffer.
static const int kMinimalBufferSize = 4*KB; static const int kMinimalBufferSize = 4*KB;
@ -395,6 +409,43 @@ Instr Assembler::SetLdrRegisterImmediateOffset(Instr instr, int offset) {
} }
Register Assembler::GetRd(Instr instr) {
Register reg;
reg.code_ = ((instr & kRdMask) >> kRdShift);
return reg;
}
bool Assembler::IsPush(Instr instr) {
return ((instr & ~kRdMask) == kPushRegPattern);
}
bool Assembler::IsPop(Instr instr) {
return ((instr & ~kRdMask) == kPopRegPattern);
}
bool Assembler::IsStrRegFpOffset(Instr instr) {
return ((instr & kLdrStrInstrTypeMask) == kStrRegFpOffsetPattern);
}
bool Assembler::IsLdrRegFpOffset(Instr instr) {
return ((instr & kLdrStrInstrTypeMask) == kLdrRegFpOffsetPattern);
}
bool Assembler::IsStrRegFpNegOffset(Instr instr) {
return ((instr & kLdrStrInstrTypeMask) == kStrRegFpNegOffsetPattern);
}
bool Assembler::IsLdrRegFpNegOffset(Instr instr) {
return ((instr & kLdrStrInstrTypeMask) == kLdrRegFpNegOffsetPattern);
}
// Labels refer to positions in the (to be) generated code. // Labels refer to positions in the (to be) generated code.
// There are bound, linked, and unused labels. // There are bound, linked, and unused labels.
// //
@ -1086,20 +1137,179 @@ void Assembler::ldr(Register dst, const MemOperand& src, Condition cond) {
} }
addrmod2(cond | B26 | L, dst, src); addrmod2(cond | B26 | L, dst, src);
// Eliminate pattern: push(r), pop(r) // Eliminate pattern: push(ry), pop(rx)
// str(r, MemOperand(sp, 4, NegPreIndex), al) // str(ry, MemOperand(sp, 4, NegPreIndex), al)
// ldr(r, MemOperand(sp, 4, PostIndex), al) // ldr(rx, MemOperand(sp, 4, PostIndex), al)
// Both instructions can be eliminated. // Both instructions can be eliminated if ry = rx.
// If ry != rx, a register copy from ry to rx is inserted
// after eliminating the push and the pop instructions.
int pattern_size = 2 * kInstrSize; int pattern_size = 2 * kInstrSize;
Instr push_instr = instr_at(pc_ - 2 * kInstrSize);
Instr pop_instr = instr_at(pc_ - 1 * kInstrSize);
if (FLAG_push_pop_elimination && if (FLAG_push_pop_elimination &&
last_bound_pos_ <= (pc_offset() - pattern_size) &&
reloc_info_writer.last_pc() <= (pc_ - pattern_size) &&
IsPush(push_instr) &&
IsPop(pop_instr)) {
if ((pop_instr & kRdMask) != (push_instr & kRdMask)) {
// For consecutive push and pop on different registers,
// we delete both the push & pop and insert a register move.
// push ry, pop rx --> mov rx, ry
Register reg_pushed, reg_popped;
reg_pushed = GetRd(push_instr);
reg_popped = GetRd(pop_instr);
pc_ -= 2 * kInstrSize;
// Insert a mov instruction, which is better than a pair of push & pop
mov(reg_popped, reg_pushed);
if (FLAG_print_push_pop_elimination) {
PrintF("%x push/pop (diff reg) replaced by a reg move\n", pc_offset());
}
} else {
// For consecutive push and pop on the same register,
// both the push and the pop can be deleted.
pc_ -= 2 * kInstrSize;
if (FLAG_print_push_pop_elimination) {
PrintF("%x push/pop (same reg) eliminated\n", pc_offset());
}
}
}
pattern_size = 2 * kInstrSize;
if (FLAG_peephole_optimization &&
last_bound_pos_ <= (pc_offset() - pattern_size) && last_bound_pos_ <= (pc_offset() - pattern_size) &&
reloc_info_writer.last_pc() <= (pc_ - pattern_size) && reloc_info_writer.last_pc() <= (pc_ - pattern_size)) {
// Pattern. Instr str_instr = instr_at(pc_ - 2 * kInstrSize);
instr_at(pc_ - 1 * kInstrSize) == (kPopRegPattern | dst.code() * B12) && Instr ldr_instr = instr_at(pc_ - 1 * kInstrSize);
instr_at(pc_ - 2 * kInstrSize) == (kPushRegPattern | dst.code() * B12)) {
pc_ -= 2 * kInstrSize; if ((IsStrRegFpOffset(str_instr) &&
if (FLAG_print_push_pop_elimination) { IsLdrRegFpOffset(ldr_instr)) ||
PrintF("%x push/pop (same reg) eliminated\n", pc_offset()); (IsStrRegFpNegOffset(str_instr) &&
IsLdrRegFpNegOffset(ldr_instr))) {
if ((ldr_instr & kLdrStrInstrArgumentMask) ==
(str_instr & kLdrStrInstrArgumentMask)) {
// Pattern: Ldr/str same fp+offset, same register.
//
// The following:
// str rx, [fp, #-12]
// ldr rx, [fp, #-12]
//
// Becomes:
// str rx, [fp, #-12]
pc_ -= 1 * kInstrSize;
if (FLAG_print_peephole_optimization) {
PrintF("%x str/ldr (fp + same offset), same reg\n", pc_offset());
}
} else if ((ldr_instr & kLdrStrOffsetMask) ==
(str_instr & kLdrStrOffsetMask)) {
// Pattern: Ldr/str same fp+offset, different register.
//
// The following:
// str rx, [fp, #-12]
// ldr ry, [fp, #-12]
//
// Becomes:
// str rx, [fp, #-12]
// mov ry, rx
Register reg_stored, reg_loaded;
reg_stored = GetRd(str_instr);
reg_loaded = GetRd(ldr_instr);
pc_ -= 1 * kInstrSize;
// Insert a mov instruction, which is better than ldr.
mov(reg_loaded, reg_stored);
if (FLAG_print_peephole_optimization) {
PrintF("%x str/ldr (fp + same offset), diff reg \n", pc_offset());
}
}
}
}
pattern_size = 3 * kInstrSize;
if (FLAG_push_pop_elimination &&
last_bound_pos_ <= (pc_offset() - pattern_size) &&
reloc_info_writer.last_pc() <= (pc_ - pattern_size)) {
Instr mem_write_instr = instr_at(pc_ - 3 * kInstrSize);
Instr ldr_instr = instr_at(pc_ - 2 * kInstrSize);
Instr mem_read_instr = instr_at(pc_ - 1 * kInstrSize);
if (IsPush(mem_write_instr) &&
IsPop(mem_read_instr)) {
if ((IsLdrRegFpOffset(ldr_instr) ||
IsLdrRegFpNegOffset(ldr_instr))) {
if ((mem_write_instr & kRdMask) ==
(mem_read_instr & kRdMask)) {
// Pattern: push & pop from/to same register,
// with a fp+offset ldr in between
//
// The following:
// str rx, [sp, #-4]!
// ldr rz, [fp, #-24]
// ldr rx, [sp], #+4
//
// Becomes:
// if(rx == rz)
// delete all
// else
// ldr rz, [fp, #-24]
if ((mem_write_instr & kRdMask) == (ldr_instr & kRdMask)) {
pc_ -= 3 * kInstrSize;
} else {
pc_ -= 3 * kInstrSize;
// Reinsert back the ldr rz.
emit(ldr_instr);
}
if (FLAG_print_push_pop_elimination) {
PrintF("%x push/pop -dead ldr fp+offset in middle\n", pc_offset());
}
} else {
// Pattern: push & pop from/to different registers
// with a fp+offset ldr in between
//
// The following:
// str rx, [sp, #-4]!
// ldr rz, [fp, #-24]
// ldr ry, [sp], #+4
//
// Becomes:
// if(ry == rz)
// mov ry, rx;
// else if(rx != rz)
// ldr rz, [fp, #-24]
// mov ry, rx
// else if((ry != rz) || (rx == rz)) becomes:
// mov ry, rx
// ldr rz, [fp, #-24]
Register reg_pushed, reg_popped;
if ((mem_read_instr & kRdMask) == (ldr_instr & kRdMask)) {
reg_pushed = GetRd(mem_write_instr);
reg_popped = GetRd(mem_read_instr);
pc_ -= 3 * kInstrSize;
mov(reg_popped, reg_pushed);
} else if ((mem_write_instr & kRdMask)
!= (ldr_instr & kRdMask)) {
reg_pushed = GetRd(mem_write_instr);
reg_popped = GetRd(mem_read_instr);
pc_ -= 3 * kInstrSize;
emit(ldr_instr);
mov(reg_popped, reg_pushed);
} else if (((mem_read_instr & kRdMask)
!= (ldr_instr & kRdMask)) ||
((mem_write_instr & kRdMask)
== (ldr_instr & kRdMask)) ) {
reg_pushed = GetRd(mem_write_instr);
reg_popped = GetRd(mem_read_instr);
pc_ -= 3 * kInstrSize;
mov(reg_popped, reg_pushed);
emit(ldr_instr);
}
if (FLAG_print_push_pop_elimination) {
PrintF("%x push/pop (ldr fp+off in middle)\n", pc_offset());
}
}
}
} }
} }
} }

View File

@ -987,6 +987,13 @@ class Assembler : public Malloced {
static bool IsLdrRegisterImmediate(Instr instr); static bool IsLdrRegisterImmediate(Instr instr);
static int GetLdrRegisterImmediateOffset(Instr instr); static int GetLdrRegisterImmediateOffset(Instr instr);
static Instr SetLdrRegisterImmediateOffset(Instr instr, int offset); static Instr SetLdrRegisterImmediateOffset(Instr instr, int offset);
static Register GetRd(Instr instr);
static bool IsPush(Instr instr);
static bool IsPop(Instr instr);
static bool IsStrRegFpOffset(Instr instr);
static bool IsLdrRegFpOffset(Instr instr);
static bool IsStrRegFpNegOffset(Instr instr);
static bool IsLdrRegFpNegOffset(Instr instr);
protected: protected:

View File

@ -104,6 +104,10 @@ DEFINE_bool(push_pop_elimination, true,
"eliminate redundant push/pops in assembly code") "eliminate redundant push/pops in assembly code")
DEFINE_bool(print_push_pop_elimination, false, DEFINE_bool(print_push_pop_elimination, false,
"print elimination of redundant push/pops in assembly code") "print elimination of redundant push/pops in assembly code")
DEFINE_bool(peephole_optimization, true,
"perform peephole optimizations in assembly code")
DEFINE_bool(print_peephole_optimization, false,
"print peephole optimizations in assembly code")
DEFINE_bool(enable_sse2, true, DEFINE_bool(enable_sse2, true,
"enable use of SSE2 instructions if available") "enable use of SSE2 instructions if available")
DEFINE_bool(enable_sse3, true, DEFINE_bool(enable_sse3, true,