Better peephole optimization for ARM. This is a commit of
http://codereview.chromium.org/2004006 for Subrato De. git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@4662 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
parent
5e91f6fa2f
commit
afe8c296a4
@ -268,6 +268,20 @@ const Instr kBlxRegMask =
|
||||
15 * B24 | 15 * B20 | 15 * B16 | 15 * B12 | 15 * B8 | 15 * B4;
|
||||
const Instr kBlxRegPattern =
|
||||
B24 | B21 | 15 * B16 | 15 * B12 | 15 * B8 | 3 * B4;
|
||||
// A mask for the Rd register for push, pop, ldr, str instructions.
|
||||
const Instr kRdMask = 0x0000f000;
|
||||
static const Instr kLdrRegFpOffsetPattern =
|
||||
al | B26 | L | Offset | fp.code() * B16;
|
||||
static const Instr kStrRegFpOffsetPattern =
|
||||
al | B26 | Offset | fp.code() * B16;
|
||||
static const Instr kLdrRegFpNegOffsetPattern =
|
||||
al | B26 | L | NegOffset | fp.code() * B16;
|
||||
static const Instr kStrRegFpNegOffsetPattern =
|
||||
al | B26 | NegOffset | fp.code() * B16;
|
||||
static const Instr kLdrStrInstrTypeMask = 0xffff0000;
|
||||
static const Instr kLdrStrInstrArgumentMask = 0x0000ffff;
|
||||
static const Instr kLdrStrOffsetMask = 0x00000fff;
|
||||
static const int kRdShift = 12;
|
||||
|
||||
// Spare buffer.
|
||||
static const int kMinimalBufferSize = 4*KB;
|
||||
@ -395,6 +409,43 @@ Instr Assembler::SetLdrRegisterImmediateOffset(Instr instr, int offset) {
|
||||
}
|
||||
|
||||
|
||||
Register Assembler::GetRd(Instr instr) {
|
||||
Register reg;
|
||||
reg.code_ = ((instr & kRdMask) >> kRdShift);
|
||||
return reg;
|
||||
}
|
||||
|
||||
|
||||
bool Assembler::IsPush(Instr instr) {
|
||||
return ((instr & ~kRdMask) == kPushRegPattern);
|
||||
}
|
||||
|
||||
|
||||
bool Assembler::IsPop(Instr instr) {
|
||||
return ((instr & ~kRdMask) == kPopRegPattern);
|
||||
}
|
||||
|
||||
|
||||
bool Assembler::IsStrRegFpOffset(Instr instr) {
|
||||
return ((instr & kLdrStrInstrTypeMask) == kStrRegFpOffsetPattern);
|
||||
}
|
||||
|
||||
|
||||
bool Assembler::IsLdrRegFpOffset(Instr instr) {
|
||||
return ((instr & kLdrStrInstrTypeMask) == kLdrRegFpOffsetPattern);
|
||||
}
|
||||
|
||||
|
||||
bool Assembler::IsStrRegFpNegOffset(Instr instr) {
|
||||
return ((instr & kLdrStrInstrTypeMask) == kStrRegFpNegOffsetPattern);
|
||||
}
|
||||
|
||||
|
||||
bool Assembler::IsLdrRegFpNegOffset(Instr instr) {
|
||||
return ((instr & kLdrStrInstrTypeMask) == kLdrRegFpNegOffsetPattern);
|
||||
}
|
||||
|
||||
|
||||
// Labels refer to positions in the (to be) generated code.
|
||||
// There are bound, linked, and unused labels.
|
||||
//
|
||||
@ -1086,20 +1137,179 @@ void Assembler::ldr(Register dst, const MemOperand& src, Condition cond) {
|
||||
}
|
||||
addrmod2(cond | B26 | L, dst, src);
|
||||
|
||||
// Eliminate pattern: push(r), pop(r)
|
||||
// str(r, MemOperand(sp, 4, NegPreIndex), al)
|
||||
// ldr(r, MemOperand(sp, 4, PostIndex), al)
|
||||
// Both instructions can be eliminated.
|
||||
// Eliminate pattern: push(ry), pop(rx)
|
||||
// str(ry, MemOperand(sp, 4, NegPreIndex), al)
|
||||
// ldr(rx, MemOperand(sp, 4, PostIndex), al)
|
||||
// Both instructions can be eliminated if ry = rx.
|
||||
// If ry != rx, a register copy from ry to rx is inserted
|
||||
// after eliminating the push and the pop instructions.
|
||||
int pattern_size = 2 * kInstrSize;
|
||||
Instr push_instr = instr_at(pc_ - 2 * kInstrSize);
|
||||
Instr pop_instr = instr_at(pc_ - 1 * kInstrSize);
|
||||
|
||||
if (FLAG_push_pop_elimination &&
|
||||
last_bound_pos_ <= (pc_offset() - pattern_size) &&
|
||||
reloc_info_writer.last_pc() <= (pc_ - pattern_size) &&
|
||||
IsPush(push_instr) &&
|
||||
IsPop(pop_instr)) {
|
||||
if ((pop_instr & kRdMask) != (push_instr & kRdMask)) {
|
||||
// For consecutive push and pop on different registers,
|
||||
// we delete both the push & pop and insert a register move.
|
||||
// push ry, pop rx --> mov rx, ry
|
||||
Register reg_pushed, reg_popped;
|
||||
reg_pushed = GetRd(push_instr);
|
||||
reg_popped = GetRd(pop_instr);
|
||||
pc_ -= 2 * kInstrSize;
|
||||
// Insert a mov instruction, which is better than a pair of push & pop
|
||||
mov(reg_popped, reg_pushed);
|
||||
if (FLAG_print_push_pop_elimination) {
|
||||
PrintF("%x push/pop (diff reg) replaced by a reg move\n", pc_offset());
|
||||
}
|
||||
} else {
|
||||
// For consecutive push and pop on the same register,
|
||||
// both the push and the pop can be deleted.
|
||||
pc_ -= 2 * kInstrSize;
|
||||
if (FLAG_print_push_pop_elimination) {
|
||||
PrintF("%x push/pop (same reg) eliminated\n", pc_offset());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pattern_size = 2 * kInstrSize;
|
||||
if (FLAG_peephole_optimization &&
|
||||
last_bound_pos_ <= (pc_offset() - pattern_size) &&
|
||||
reloc_info_writer.last_pc() <= (pc_ - pattern_size) &&
|
||||
// Pattern.
|
||||
instr_at(pc_ - 1 * kInstrSize) == (kPopRegPattern | dst.code() * B12) &&
|
||||
instr_at(pc_ - 2 * kInstrSize) == (kPushRegPattern | dst.code() * B12)) {
|
||||
pc_ -= 2 * kInstrSize;
|
||||
if (FLAG_print_push_pop_elimination) {
|
||||
PrintF("%x push/pop (same reg) eliminated\n", pc_offset());
|
||||
reloc_info_writer.last_pc() <= (pc_ - pattern_size)) {
|
||||
Instr str_instr = instr_at(pc_ - 2 * kInstrSize);
|
||||
Instr ldr_instr = instr_at(pc_ - 1 * kInstrSize);
|
||||
|
||||
if ((IsStrRegFpOffset(str_instr) &&
|
||||
IsLdrRegFpOffset(ldr_instr)) ||
|
||||
(IsStrRegFpNegOffset(str_instr) &&
|
||||
IsLdrRegFpNegOffset(ldr_instr))) {
|
||||
if ((ldr_instr & kLdrStrInstrArgumentMask) ==
|
||||
(str_instr & kLdrStrInstrArgumentMask)) {
|
||||
// Pattern: Ldr/str same fp+offset, same register.
|
||||
//
|
||||
// The following:
|
||||
// str rx, [fp, #-12]
|
||||
// ldr rx, [fp, #-12]
|
||||
//
|
||||
// Becomes:
|
||||
// str rx, [fp, #-12]
|
||||
|
||||
pc_ -= 1 * kInstrSize;
|
||||
if (FLAG_print_peephole_optimization) {
|
||||
PrintF("%x str/ldr (fp + same offset), same reg\n", pc_offset());
|
||||
}
|
||||
} else if ((ldr_instr & kLdrStrOffsetMask) ==
|
||||
(str_instr & kLdrStrOffsetMask)) {
|
||||
// Pattern: Ldr/str same fp+offset, different register.
|
||||
//
|
||||
// The following:
|
||||
// str rx, [fp, #-12]
|
||||
// ldr ry, [fp, #-12]
|
||||
//
|
||||
// Becomes:
|
||||
// str rx, [fp, #-12]
|
||||
// mov ry, rx
|
||||
|
||||
Register reg_stored, reg_loaded;
|
||||
reg_stored = GetRd(str_instr);
|
||||
reg_loaded = GetRd(ldr_instr);
|
||||
pc_ -= 1 * kInstrSize;
|
||||
// Insert a mov instruction, which is better than ldr.
|
||||
mov(reg_loaded, reg_stored);
|
||||
if (FLAG_print_peephole_optimization) {
|
||||
PrintF("%x str/ldr (fp + same offset), diff reg \n", pc_offset());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pattern_size = 3 * kInstrSize;
|
||||
if (FLAG_push_pop_elimination &&
|
||||
last_bound_pos_ <= (pc_offset() - pattern_size) &&
|
||||
reloc_info_writer.last_pc() <= (pc_ - pattern_size)) {
|
||||
Instr mem_write_instr = instr_at(pc_ - 3 * kInstrSize);
|
||||
Instr ldr_instr = instr_at(pc_ - 2 * kInstrSize);
|
||||
Instr mem_read_instr = instr_at(pc_ - 1 * kInstrSize);
|
||||
if (IsPush(mem_write_instr) &&
|
||||
IsPop(mem_read_instr)) {
|
||||
if ((IsLdrRegFpOffset(ldr_instr) ||
|
||||
IsLdrRegFpNegOffset(ldr_instr))) {
|
||||
if ((mem_write_instr & kRdMask) ==
|
||||
(mem_read_instr & kRdMask)) {
|
||||
// Pattern: push & pop from/to same register,
|
||||
// with a fp+offset ldr in between
|
||||
//
|
||||
// The following:
|
||||
// str rx, [sp, #-4]!
|
||||
// ldr rz, [fp, #-24]
|
||||
// ldr rx, [sp], #+4
|
||||
//
|
||||
// Becomes:
|
||||
// if(rx == rz)
|
||||
// delete all
|
||||
// else
|
||||
// ldr rz, [fp, #-24]
|
||||
|
||||
if ((mem_write_instr & kRdMask) == (ldr_instr & kRdMask)) {
|
||||
pc_ -= 3 * kInstrSize;
|
||||
} else {
|
||||
pc_ -= 3 * kInstrSize;
|
||||
// Reinsert back the ldr rz.
|
||||
emit(ldr_instr);
|
||||
}
|
||||
if (FLAG_print_push_pop_elimination) {
|
||||
PrintF("%x push/pop -dead ldr fp+offset in middle\n", pc_offset());
|
||||
}
|
||||
} else {
|
||||
// Pattern: push & pop from/to different registers
|
||||
// with a fp+offset ldr in between
|
||||
//
|
||||
// The following:
|
||||
// str rx, [sp, #-4]!
|
||||
// ldr rz, [fp, #-24]
|
||||
// ldr ry, [sp], #+4
|
||||
//
|
||||
// Becomes:
|
||||
// if(ry == rz)
|
||||
// mov ry, rx;
|
||||
// else if(rx != rz)
|
||||
// ldr rz, [fp, #-24]
|
||||
// mov ry, rx
|
||||
// else if((ry != rz) || (rx == rz)) becomes:
|
||||
// mov ry, rx
|
||||
// ldr rz, [fp, #-24]
|
||||
|
||||
Register reg_pushed, reg_popped;
|
||||
if ((mem_read_instr & kRdMask) == (ldr_instr & kRdMask)) {
|
||||
reg_pushed = GetRd(mem_write_instr);
|
||||
reg_popped = GetRd(mem_read_instr);
|
||||
pc_ -= 3 * kInstrSize;
|
||||
mov(reg_popped, reg_pushed);
|
||||
} else if ((mem_write_instr & kRdMask)
|
||||
!= (ldr_instr & kRdMask)) {
|
||||
reg_pushed = GetRd(mem_write_instr);
|
||||
reg_popped = GetRd(mem_read_instr);
|
||||
pc_ -= 3 * kInstrSize;
|
||||
emit(ldr_instr);
|
||||
mov(reg_popped, reg_pushed);
|
||||
} else if (((mem_read_instr & kRdMask)
|
||||
!= (ldr_instr & kRdMask)) ||
|
||||
((mem_write_instr & kRdMask)
|
||||
== (ldr_instr & kRdMask)) ) {
|
||||
reg_pushed = GetRd(mem_write_instr);
|
||||
reg_popped = GetRd(mem_read_instr);
|
||||
pc_ -= 3 * kInstrSize;
|
||||
mov(reg_popped, reg_pushed);
|
||||
emit(ldr_instr);
|
||||
}
|
||||
if (FLAG_print_push_pop_elimination) {
|
||||
PrintF("%x push/pop (ldr fp+off in middle)\n", pc_offset());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -987,6 +987,13 @@ class Assembler : public Malloced {
|
||||
static bool IsLdrRegisterImmediate(Instr instr);
|
||||
static int GetLdrRegisterImmediateOffset(Instr instr);
|
||||
static Instr SetLdrRegisterImmediateOffset(Instr instr, int offset);
|
||||
static Register GetRd(Instr instr);
|
||||
static bool IsPush(Instr instr);
|
||||
static bool IsPop(Instr instr);
|
||||
static bool IsStrRegFpOffset(Instr instr);
|
||||
static bool IsLdrRegFpOffset(Instr instr);
|
||||
static bool IsStrRegFpNegOffset(Instr instr);
|
||||
static bool IsLdrRegFpNegOffset(Instr instr);
|
||||
|
||||
|
||||
protected:
|
||||
|
@ -104,6 +104,10 @@ DEFINE_bool(push_pop_elimination, true,
|
||||
"eliminate redundant push/pops in assembly code")
|
||||
DEFINE_bool(print_push_pop_elimination, false,
|
||||
"print elimination of redundant push/pops in assembly code")
|
||||
DEFINE_bool(peephole_optimization, true,
|
||||
"perform peephole optimizations in assembly code")
|
||||
DEFINE_bool(print_peephole_optimization, false,
|
||||
"print peephole optimizations in assembly code")
|
||||
DEFINE_bool(enable_sse2, true,
|
||||
"enable use of SSE2 instructions if available")
|
||||
DEFINE_bool(enable_sse3, true,
|
||||
|
Loading…
Reference in New Issue
Block a user