Refine CopyBytes macro instruction in IA32/X64

Use DWORD/QWORD copy for more short cases

BUG=
R=svenpanne@chromium.org

Review URL: https://codereview.chromium.org/66073003

Patch from Weiliang Lin <weiliang.lin@intel.com>.

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@17632 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
svenpanne@chromium.org 2013-11-12 09:08:51 +00:00
parent 5fcc956a98
commit 51d637a073
2 changed files with 60 additions and 20 deletions

View File

@ -2010,30 +2010,48 @@ void MacroAssembler::CopyBytes(Register source,
Register destination,
Register length,
Register scratch) {
Label loop, done, short_string, short_loop;
// Experimentation shows that the short string loop is faster if length < 10.
cmp(length, Immediate(10));
j(less_equal, &short_string);
Label short_loop, len4, len8, len12, done, short_string;
ASSERT(source.is(esi));
ASSERT(destination.is(edi));
ASSERT(length.is(ecx));
cmp(length, Immediate(4));
j(below, &short_string, Label::kNear);
// Because source is 4-byte aligned in our uses of this function,
// we keep source aligned for the rep_movs call by copying the odd bytes
// at the end of the ranges.
mov(scratch, Operand(source, length, times_1, -4));
mov(Operand(destination, length, times_1, -4), scratch);
cmp(length, Immediate(8));
j(below_equal, &len4, Label::kNear);
cmp(length, Immediate(12));
j(below_equal, &len8, Label::kNear);
cmp(length, Immediate(16));
j(below_equal, &len12, Label::kNear);
mov(scratch, ecx);
shr(ecx, 2);
rep_movs();
and_(scratch, Immediate(0x3));
add(destination, scratch);
jmp(&done);
jmp(&done, Label::kNear);
bind(&len12);
mov(scratch, Operand(source, 8));
mov(Operand(destination, 8), scratch);
bind(&len8);
mov(scratch, Operand(source, 4));
mov(Operand(destination, 4), scratch);
bind(&len4);
mov(scratch, Operand(source, 0));
mov(Operand(destination, 0), scratch);
add(destination, length);
jmp(&done, Label::kNear);
bind(&short_string);
test(length, length);
j(zero, &done);
j(zero, &done, Label::kNear);
bind(&short_loop);
mov_b(scratch, Operand(source, 0));

View File

@ -4415,18 +4415,27 @@ void MacroAssembler::CopyBytes(Register destination,
cmpl(length, Immediate(min_length));
Assert(greater_equal, kInvalidMinLength);
}
Label loop, done, short_string, short_loop;
Label short_loop, len8, len16, len24, done, short_string;
const int kLongStringLimit = 20;
const int kLongStringLimit = 4 * kPointerSize;
if (min_length <= kLongStringLimit) {
cmpl(length, Immediate(kLongStringLimit));
j(less_equal, &short_string);
cmpl(length, Immediate(kPointerSize));
j(below, &short_string, Label::kNear);
}
ASSERT(source.is(rsi));
ASSERT(destination.is(rdi));
ASSERT(length.is(rcx));
if (min_length <= kLongStringLimit) {
cmpl(length, Immediate(2 * kPointerSize));
j(below_equal, &len8, Label::kNear);
cmpl(length, Immediate(3 * kPointerSize));
j(below_equal, &len16, Label::kNear);
cmpl(length, Immediate(4 * kPointerSize));
j(below_equal, &len24, Label::kNear);
}
// Because source is 8-byte aligned in our uses of this function,
// we keep source aligned for the rep movs operation by copying the odd bytes
// at the end of the ranges.
@ -4440,25 +4449,38 @@ void MacroAssembler::CopyBytes(Register destination,
addq(destination, scratch);
if (min_length <= kLongStringLimit) {
jmp(&done);
jmp(&done, Label::kNear);
bind(&len24);
movq(scratch, Operand(source, 2 * kPointerSize));
movq(Operand(destination, 2 * kPointerSize), scratch);
bind(&len16);
movq(scratch, Operand(source, kPointerSize));
movq(Operand(destination, kPointerSize), scratch);
bind(&len8);
movq(scratch, Operand(source, 0));
movq(Operand(destination, 0), scratch);
// Move remaining bytes of length.
movq(scratch, Operand(source, length, times_1, -kPointerSize));
movq(Operand(destination, length, times_1, -kPointerSize), scratch);
addq(destination, length);
jmp(&done, Label::kNear);
bind(&short_string);
if (min_length == 0) {
testl(length, length);
j(zero, &done);
j(zero, &done, Label::kNear);
}
lea(scratch, Operand(destination, length, times_1, 0));
bind(&short_loop);
movb(length, Operand(source, 0));
movb(Operand(destination, 0), length);
movb(scratch, Operand(source, 0));
movb(Operand(destination, 0), scratch);
incq(source);
incq(destination);
cmpq(destination, scratch);
j(not_equal, &short_loop);
bind(&done);
decl(length);
j(not_zero, &short_loop);
}
bind(&done);
}