Better peephole optimization for ARM. This is a commit of
http://codereview.chromium.org/2004006 for Subrato De. git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@4662 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
parent
5e91f6fa2f
commit
afe8c296a4
@ -268,6 +268,20 @@ const Instr kBlxRegMask =
|
|||||||
15 * B24 | 15 * B20 | 15 * B16 | 15 * B12 | 15 * B8 | 15 * B4;
|
15 * B24 | 15 * B20 | 15 * B16 | 15 * B12 | 15 * B8 | 15 * B4;
|
||||||
const Instr kBlxRegPattern =
|
const Instr kBlxRegPattern =
|
||||||
B24 | B21 | 15 * B16 | 15 * B12 | 15 * B8 | 3 * B4;
|
B24 | B21 | 15 * B16 | 15 * B12 | 15 * B8 | 3 * B4;
|
||||||
|
// A mask for the Rd register for push, pop, ldr, str instructions.
|
||||||
|
const Instr kRdMask = 0x0000f000;
|
||||||
|
static const Instr kLdrRegFpOffsetPattern =
|
||||||
|
al | B26 | L | Offset | fp.code() * B16;
|
||||||
|
static const Instr kStrRegFpOffsetPattern =
|
||||||
|
al | B26 | Offset | fp.code() * B16;
|
||||||
|
static const Instr kLdrRegFpNegOffsetPattern =
|
||||||
|
al | B26 | L | NegOffset | fp.code() * B16;
|
||||||
|
static const Instr kStrRegFpNegOffsetPattern =
|
||||||
|
al | B26 | NegOffset | fp.code() * B16;
|
||||||
|
static const Instr kLdrStrInstrTypeMask = 0xffff0000;
|
||||||
|
static const Instr kLdrStrInstrArgumentMask = 0x0000ffff;
|
||||||
|
static const Instr kLdrStrOffsetMask = 0x00000fff;
|
||||||
|
static const int kRdShift = 12;
|
||||||
|
|
||||||
// Spare buffer.
|
// Spare buffer.
|
||||||
static const int kMinimalBufferSize = 4*KB;
|
static const int kMinimalBufferSize = 4*KB;
|
||||||
@ -395,6 +409,43 @@ Instr Assembler::SetLdrRegisterImmediateOffset(Instr instr, int offset) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Register Assembler::GetRd(Instr instr) {
|
||||||
|
Register reg;
|
||||||
|
reg.code_ = ((instr & kRdMask) >> kRdShift);
|
||||||
|
return reg;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool Assembler::IsPush(Instr instr) {
|
||||||
|
return ((instr & ~kRdMask) == kPushRegPattern);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool Assembler::IsPop(Instr instr) {
|
||||||
|
return ((instr & ~kRdMask) == kPopRegPattern);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool Assembler::IsStrRegFpOffset(Instr instr) {
|
||||||
|
return ((instr & kLdrStrInstrTypeMask) == kStrRegFpOffsetPattern);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool Assembler::IsLdrRegFpOffset(Instr instr) {
|
||||||
|
return ((instr & kLdrStrInstrTypeMask) == kLdrRegFpOffsetPattern);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool Assembler::IsStrRegFpNegOffset(Instr instr) {
|
||||||
|
return ((instr & kLdrStrInstrTypeMask) == kStrRegFpNegOffsetPattern);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool Assembler::IsLdrRegFpNegOffset(Instr instr) {
|
||||||
|
return ((instr & kLdrStrInstrTypeMask) == kLdrRegFpNegOffsetPattern);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
// Labels refer to positions in the (to be) generated code.
|
// Labels refer to positions in the (to be) generated code.
|
||||||
// There are bound, linked, and unused labels.
|
// There are bound, linked, and unused labels.
|
||||||
//
|
//
|
||||||
@ -1086,20 +1137,179 @@ void Assembler::ldr(Register dst, const MemOperand& src, Condition cond) {
|
|||||||
}
|
}
|
||||||
addrmod2(cond | B26 | L, dst, src);
|
addrmod2(cond | B26 | L, dst, src);
|
||||||
|
|
||||||
// Eliminate pattern: push(r), pop(r)
|
// Eliminate pattern: push(ry), pop(rx)
|
||||||
// str(r, MemOperand(sp, 4, NegPreIndex), al)
|
// str(ry, MemOperand(sp, 4, NegPreIndex), al)
|
||||||
// ldr(r, MemOperand(sp, 4, PostIndex), al)
|
// ldr(rx, MemOperand(sp, 4, PostIndex), al)
|
||||||
// Both instructions can be eliminated.
|
// Both instructions can be eliminated if ry = rx.
|
||||||
|
// If ry != rx, a register copy from ry to rx is inserted
|
||||||
|
// after eliminating the push and the pop instructions.
|
||||||
int pattern_size = 2 * kInstrSize;
|
int pattern_size = 2 * kInstrSize;
|
||||||
|
Instr push_instr = instr_at(pc_ - 2 * kInstrSize);
|
||||||
|
Instr pop_instr = instr_at(pc_ - 1 * kInstrSize);
|
||||||
|
|
||||||
if (FLAG_push_pop_elimination &&
|
if (FLAG_push_pop_elimination &&
|
||||||
|
last_bound_pos_ <= (pc_offset() - pattern_size) &&
|
||||||
|
reloc_info_writer.last_pc() <= (pc_ - pattern_size) &&
|
||||||
|
IsPush(push_instr) &&
|
||||||
|
IsPop(pop_instr)) {
|
||||||
|
if ((pop_instr & kRdMask) != (push_instr & kRdMask)) {
|
||||||
|
// For consecutive push and pop on different registers,
|
||||||
|
// we delete both the push & pop and insert a register move.
|
||||||
|
// push ry, pop rx --> mov rx, ry
|
||||||
|
Register reg_pushed, reg_popped;
|
||||||
|
reg_pushed = GetRd(push_instr);
|
||||||
|
reg_popped = GetRd(pop_instr);
|
||||||
|
pc_ -= 2 * kInstrSize;
|
||||||
|
// Insert a mov instruction, which is better than a pair of push & pop
|
||||||
|
mov(reg_popped, reg_pushed);
|
||||||
|
if (FLAG_print_push_pop_elimination) {
|
||||||
|
PrintF("%x push/pop (diff reg) replaced by a reg move\n", pc_offset());
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// For consecutive push and pop on the same register,
|
||||||
|
// both the push and the pop can be deleted.
|
||||||
|
pc_ -= 2 * kInstrSize;
|
||||||
|
if (FLAG_print_push_pop_elimination) {
|
||||||
|
PrintF("%x push/pop (same reg) eliminated\n", pc_offset());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pattern_size = 2 * kInstrSize;
|
||||||
|
if (FLAG_peephole_optimization &&
|
||||||
last_bound_pos_ <= (pc_offset() - pattern_size) &&
|
last_bound_pos_ <= (pc_offset() - pattern_size) &&
|
||||||
reloc_info_writer.last_pc() <= (pc_ - pattern_size) &&
|
reloc_info_writer.last_pc() <= (pc_ - pattern_size)) {
|
||||||
// Pattern.
|
Instr str_instr = instr_at(pc_ - 2 * kInstrSize);
|
||||||
instr_at(pc_ - 1 * kInstrSize) == (kPopRegPattern | dst.code() * B12) &&
|
Instr ldr_instr = instr_at(pc_ - 1 * kInstrSize);
|
||||||
instr_at(pc_ - 2 * kInstrSize) == (kPushRegPattern | dst.code() * B12)) {
|
|
||||||
pc_ -= 2 * kInstrSize;
|
if ((IsStrRegFpOffset(str_instr) &&
|
||||||
if (FLAG_print_push_pop_elimination) {
|
IsLdrRegFpOffset(ldr_instr)) ||
|
||||||
PrintF("%x push/pop (same reg) eliminated\n", pc_offset());
|
(IsStrRegFpNegOffset(str_instr) &&
|
||||||
|
IsLdrRegFpNegOffset(ldr_instr))) {
|
||||||
|
if ((ldr_instr & kLdrStrInstrArgumentMask) ==
|
||||||
|
(str_instr & kLdrStrInstrArgumentMask)) {
|
||||||
|
// Pattern: Ldr/str same fp+offset, same register.
|
||||||
|
//
|
||||||
|
// The following:
|
||||||
|
// str rx, [fp, #-12]
|
||||||
|
// ldr rx, [fp, #-12]
|
||||||
|
//
|
||||||
|
// Becomes:
|
||||||
|
// str rx, [fp, #-12]
|
||||||
|
|
||||||
|
pc_ -= 1 * kInstrSize;
|
||||||
|
if (FLAG_print_peephole_optimization) {
|
||||||
|
PrintF("%x str/ldr (fp + same offset), same reg\n", pc_offset());
|
||||||
|
}
|
||||||
|
} else if ((ldr_instr & kLdrStrOffsetMask) ==
|
||||||
|
(str_instr & kLdrStrOffsetMask)) {
|
||||||
|
// Pattern: Ldr/str same fp+offset, different register.
|
||||||
|
//
|
||||||
|
// The following:
|
||||||
|
// str rx, [fp, #-12]
|
||||||
|
// ldr ry, [fp, #-12]
|
||||||
|
//
|
||||||
|
// Becomes:
|
||||||
|
// str rx, [fp, #-12]
|
||||||
|
// mov ry, rx
|
||||||
|
|
||||||
|
Register reg_stored, reg_loaded;
|
||||||
|
reg_stored = GetRd(str_instr);
|
||||||
|
reg_loaded = GetRd(ldr_instr);
|
||||||
|
pc_ -= 1 * kInstrSize;
|
||||||
|
// Insert a mov instruction, which is better than ldr.
|
||||||
|
mov(reg_loaded, reg_stored);
|
||||||
|
if (FLAG_print_peephole_optimization) {
|
||||||
|
PrintF("%x str/ldr (fp + same offset), diff reg \n", pc_offset());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pattern_size = 3 * kInstrSize;
|
||||||
|
if (FLAG_push_pop_elimination &&
|
||||||
|
last_bound_pos_ <= (pc_offset() - pattern_size) &&
|
||||||
|
reloc_info_writer.last_pc() <= (pc_ - pattern_size)) {
|
||||||
|
Instr mem_write_instr = instr_at(pc_ - 3 * kInstrSize);
|
||||||
|
Instr ldr_instr = instr_at(pc_ - 2 * kInstrSize);
|
||||||
|
Instr mem_read_instr = instr_at(pc_ - 1 * kInstrSize);
|
||||||
|
if (IsPush(mem_write_instr) &&
|
||||||
|
IsPop(mem_read_instr)) {
|
||||||
|
if ((IsLdrRegFpOffset(ldr_instr) ||
|
||||||
|
IsLdrRegFpNegOffset(ldr_instr))) {
|
||||||
|
if ((mem_write_instr & kRdMask) ==
|
||||||
|
(mem_read_instr & kRdMask)) {
|
||||||
|
// Pattern: push & pop from/to same register,
|
||||||
|
// with a fp+offset ldr in between
|
||||||
|
//
|
||||||
|
// The following:
|
||||||
|
// str rx, [sp, #-4]!
|
||||||
|
// ldr rz, [fp, #-24]
|
||||||
|
// ldr rx, [sp], #+4
|
||||||
|
//
|
||||||
|
// Becomes:
|
||||||
|
// if(rx == rz)
|
||||||
|
// delete all
|
||||||
|
// else
|
||||||
|
// ldr rz, [fp, #-24]
|
||||||
|
|
||||||
|
if ((mem_write_instr & kRdMask) == (ldr_instr & kRdMask)) {
|
||||||
|
pc_ -= 3 * kInstrSize;
|
||||||
|
} else {
|
||||||
|
pc_ -= 3 * kInstrSize;
|
||||||
|
// Reinsert back the ldr rz.
|
||||||
|
emit(ldr_instr);
|
||||||
|
}
|
||||||
|
if (FLAG_print_push_pop_elimination) {
|
||||||
|
PrintF("%x push/pop -dead ldr fp+offset in middle\n", pc_offset());
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Pattern: push & pop from/to different registers
|
||||||
|
// with a fp+offset ldr in between
|
||||||
|
//
|
||||||
|
// The following:
|
||||||
|
// str rx, [sp, #-4]!
|
||||||
|
// ldr rz, [fp, #-24]
|
||||||
|
// ldr ry, [sp], #+4
|
||||||
|
//
|
||||||
|
// Becomes:
|
||||||
|
// if(ry == rz)
|
||||||
|
// mov ry, rx;
|
||||||
|
// else if(rx != rz)
|
||||||
|
// ldr rz, [fp, #-24]
|
||||||
|
// mov ry, rx
|
||||||
|
// else if((ry != rz) || (rx == rz)) becomes:
|
||||||
|
// mov ry, rx
|
||||||
|
// ldr rz, [fp, #-24]
|
||||||
|
|
||||||
|
Register reg_pushed, reg_popped;
|
||||||
|
if ((mem_read_instr & kRdMask) == (ldr_instr & kRdMask)) {
|
||||||
|
reg_pushed = GetRd(mem_write_instr);
|
||||||
|
reg_popped = GetRd(mem_read_instr);
|
||||||
|
pc_ -= 3 * kInstrSize;
|
||||||
|
mov(reg_popped, reg_pushed);
|
||||||
|
} else if ((mem_write_instr & kRdMask)
|
||||||
|
!= (ldr_instr & kRdMask)) {
|
||||||
|
reg_pushed = GetRd(mem_write_instr);
|
||||||
|
reg_popped = GetRd(mem_read_instr);
|
||||||
|
pc_ -= 3 * kInstrSize;
|
||||||
|
emit(ldr_instr);
|
||||||
|
mov(reg_popped, reg_pushed);
|
||||||
|
} else if (((mem_read_instr & kRdMask)
|
||||||
|
!= (ldr_instr & kRdMask)) ||
|
||||||
|
((mem_write_instr & kRdMask)
|
||||||
|
== (ldr_instr & kRdMask)) ) {
|
||||||
|
reg_pushed = GetRd(mem_write_instr);
|
||||||
|
reg_popped = GetRd(mem_read_instr);
|
||||||
|
pc_ -= 3 * kInstrSize;
|
||||||
|
mov(reg_popped, reg_pushed);
|
||||||
|
emit(ldr_instr);
|
||||||
|
}
|
||||||
|
if (FLAG_print_push_pop_elimination) {
|
||||||
|
PrintF("%x push/pop (ldr fp+off in middle)\n", pc_offset());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -987,6 +987,13 @@ class Assembler : public Malloced {
|
|||||||
static bool IsLdrRegisterImmediate(Instr instr);
|
static bool IsLdrRegisterImmediate(Instr instr);
|
||||||
static int GetLdrRegisterImmediateOffset(Instr instr);
|
static int GetLdrRegisterImmediateOffset(Instr instr);
|
||||||
static Instr SetLdrRegisterImmediateOffset(Instr instr, int offset);
|
static Instr SetLdrRegisterImmediateOffset(Instr instr, int offset);
|
||||||
|
static Register GetRd(Instr instr);
|
||||||
|
static bool IsPush(Instr instr);
|
||||||
|
static bool IsPop(Instr instr);
|
||||||
|
static bool IsStrRegFpOffset(Instr instr);
|
||||||
|
static bool IsLdrRegFpOffset(Instr instr);
|
||||||
|
static bool IsStrRegFpNegOffset(Instr instr);
|
||||||
|
static bool IsLdrRegFpNegOffset(Instr instr);
|
||||||
|
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
@ -104,6 +104,10 @@ DEFINE_bool(push_pop_elimination, true,
|
|||||||
"eliminate redundant push/pops in assembly code")
|
"eliminate redundant push/pops in assembly code")
|
||||||
DEFINE_bool(print_push_pop_elimination, false,
|
DEFINE_bool(print_push_pop_elimination, false,
|
||||||
"print elimination of redundant push/pops in assembly code")
|
"print elimination of redundant push/pops in assembly code")
|
||||||
|
DEFINE_bool(peephole_optimization, true,
|
||||||
|
"perform peephole optimizations in assembly code")
|
||||||
|
DEFINE_bool(print_peephole_optimization, false,
|
||||||
|
"print peephole optimizations in assembly code")
|
||||||
DEFINE_bool(enable_sse2, true,
|
DEFINE_bool(enable_sse2, true,
|
||||||
"enable use of SSE2 instructions if available")
|
"enable use of SSE2 instructions if available")
|
||||||
DEFINE_bool(enable_sse3, true,
|
DEFINE_bool(enable_sse3, true,
|
||||||
|
Loading…
Reference in New Issue
Block a user