[arm64] Add CopyDoubleWordsMode option to CopyDoubleWords.

The option lets us use the function in cases where we cannot use the current
version due to restrictions on src and dst. This will be useful for some arm64
builtins when we pad the stack arguments, where we will need to copy the
existing arguments either one slot up or one slot down in memory.

Bug: v8:6644
Change-Id: I75281cdc9fa6812e3b24bf5756057c93305cbb95
Reviewed-on: https://chromium-review.googlesource.com/771711
Reviewed-by: Benedikt Meurer <bmeurer@chromium.org>
Commit-Queue: Georgia Kouveli <georgia.kouveli@arm.com>
Cr-Commit-Position: refs/heads/master@{#49394}
This commit is contained in:
Georgia Kouveli 2017-11-15 12:19:41 +00:00 committed by Commit Bot
parent 11e635bd19
commit 1adce94ab3
3 changed files with 241 additions and 21 deletions

View File

@ -1499,40 +1499,53 @@ void TurboAssembler::CopySlots(Register dst, Register src,
CopyDoubleWords(dst, src, slot_count);
}
void TurboAssembler::CopyDoubleWords(Register dst, Register src,
Register count) {
void TurboAssembler::CopyDoubleWords(Register dst, Register src, Register count,
CopyDoubleWordsMode mode) {
DCHECK(!AreAliased(dst, src, count));
if (emit_debug_code()) {
// Copy requires dst < src || (dst - src) >= count.
Label dst_below_src;
Subs(dst, dst, src);
B(lt, &dst_below_src);
Cmp(dst, count);
Check(ge, kOffsetOutOfRange);
Bind(&dst_below_src);
Add(dst, dst, src);
Register pointer1 = dst;
Register pointer2 = src;
if (mode == kSrcLessThanDst) {
pointer1 = src;
pointer2 = dst;
}
// Copy requires pointer1 < pointer2 || (pointer1 - pointer2) >= count.
Label pointer1_below_pointer2;
Subs(pointer1, pointer1, pointer2);
B(lt, &pointer1_below_pointer2);
Cmp(pointer1, count);
Check(ge, kOffsetOutOfRange);
Bind(&pointer1_below_pointer2);
Add(pointer1, pointer1, pointer2);
}
static_assert(kPointerSize == kDRegSize,
"pointers must be the same size as doubles");
int direction = (mode == kDstLessThanSrc) ? 1 : -1;
UseScratchRegisterScope scope(this);
VRegister temp0 = scope.AcquireD();
VRegister temp1 = scope.AcquireD();
Label pairs, done;
Label pairs, loop, done;
Tbz(count, 0, &pairs);
Ldr(temp0, MemOperand(src, kPointerSize, PostIndex));
Ldr(temp0, MemOperand(src, direction * kPointerSize, PostIndex));
Sub(count, count, 1);
Str(temp0, MemOperand(dst, kPointerSize, PostIndex));
Str(temp0, MemOperand(dst, direction * kPointerSize, PostIndex));
Bind(&pairs);
if (mode == kSrcLessThanDst) {
// Adjust pointers for post-index ldp/stp with negative offset:
Sub(dst, dst, kPointerSize);
Sub(src, src, kPointerSize);
}
Bind(&loop);
Cbz(count, &done);
Ldp(temp0, temp1, MemOperand(src, 2 * kPointerSize, PostIndex));
Ldp(temp0, temp1, MemOperand(src, 2 * direction * kPointerSize, PostIndex));
Sub(count, count, 2);
Stp(temp0, temp1, MemOperand(dst, 2 * kPointerSize, PostIndex));
B(&pairs);
Stp(temp0, temp1, MemOperand(dst, 2 * direction * kPointerSize, PostIndex));
B(&loop);
// TODO(all): large copies may benefit from using temporary Q registers
// to copy four double words per iteration.

View File

@ -669,11 +669,20 @@ class TurboAssembler : public Assembler {
void CopySlots(Register dst, Register src, Register slot_count);
// Copy count double words from the address in register src to the address
// in register dst. Address dst must be less than src, or the gap between
// them must be greater than or equal to count double words, otherwise the
// result is unpredictable. The function may corrupt its register arguments.
// The registers must not alias each other.
void CopyDoubleWords(Register dst, Register src, Register count);
// in register dst. There are two modes for this function:
// 1) Address dst must be less than src, or the gap between them must be
// greater than or equal to count double words, otherwise the result is
// unpredictable. This is the default mode.
// 2) Address src must be less than dst, or the gap between them must be
// greater than or equal to count double words, otherwise the result is
// undpredictable. In this mode, src and dst specify the last (highest)
// address of the regions to copy from and to.
// The case where src == dst is not supported.
// The function may corrupt its register arguments. The registers must not
// alias each other.
enum CopyDoubleWordsMode { kDstLessThanSrc, kSrcLessThanDst };
void CopyDoubleWords(Register dst, Register src, Register count,
CopyDoubleWordsMode mode = kDstLessThanSrc);
// Calculate the address of a double word-sized slot at slot_offset from the
// stack pointer, and write it to dst. Positive slot_offsets are at addresses

View File

@ -13404,6 +13404,204 @@ TEST(copy_slots_up) {
TEARDOWN();
}
TEST(copy_double_words_downwards_even) {
INIT_V8();
SETUP();
const uint64_t ones = 0x1111111111111111UL;
const uint64_t twos = 0x2222222222222222UL;
const uint64_t threes = 0x3333333333333333UL;
const uint64_t fours = 0x4444444444444444UL;
START();
__ Mov(jssp, __ StackPointer());
__ SetStackPointer(jssp);
// Test copying 12 slots up one slot.
__ Mov(x1, ones);
__ Mov(x2, twos);
__ Mov(x3, threes);
__ Mov(x4, fours);
__ Push(xzr);
__ Push(x1, x2, x3, x4);
__ Push(x1, x2, x1, x2);
__ Push(x3, x4, x3, x4);
__ SlotAddress(x5, 12);
__ SlotAddress(x6, 11);
__ Mov(x7, 12);
__ CopyDoubleWords(x5, x6, x7, TurboAssembler::kSrcLessThanDst);
__ Drop(1);
__ Pop(x4, x5, x6, x7);
__ Pop(x8, x9, x10, x11);
__ Pop(x12, x13, x14, x15);
__ Mov(csp, jssp);
__ SetStackPointer(csp);
END();
RUN();
CHECK_EQUAL_64(ones, x15);
CHECK_EQUAL_64(twos, x14);
CHECK_EQUAL_64(threes, x13);
CHECK_EQUAL_64(fours, x12);
CHECK_EQUAL_64(ones, x11);
CHECK_EQUAL_64(twos, x10);
CHECK_EQUAL_64(ones, x9);
CHECK_EQUAL_64(twos, x8);
CHECK_EQUAL_64(threes, x7);
CHECK_EQUAL_64(fours, x6);
CHECK_EQUAL_64(threes, x5);
CHECK_EQUAL_64(fours, x4);
TEARDOWN();
}
TEST(copy_double_words_downwards_odd) {
INIT_V8();
SETUP();
const uint64_t ones = 0x1111111111111111UL;
const uint64_t twos = 0x2222222222222222UL;
const uint64_t threes = 0x3333333333333333UL;
const uint64_t fours = 0x4444444444444444UL;
const uint64_t fives = 0x5555555555555555UL;
START();
__ Mov(jssp, __ StackPointer());
__ SetStackPointer(jssp);
// Test copying 13 slots up one slot.
__ Mov(x1, ones);
__ Mov(x2, twos);
__ Mov(x3, threes);
__ Mov(x4, fours);
__ Mov(x5, fives);
__ Push(xzr, x5);
__ Push(x1, x2, x3, x4);
__ Push(x1, x2, x1, x2);
__ Push(x3, x4, x3, x4);
__ SlotAddress(x5, 13);
__ SlotAddress(x6, 12);
__ Mov(x7, 13);
__ CopyDoubleWords(x5, x6, x7, TurboAssembler::kSrcLessThanDst);
__ Drop(1);
__ Pop(x4);
__ Pop(x5, x6, x7, x8);
__ Pop(x9, x10, x11, x12);
__ Pop(x13, x14, x15, x16);
__ Mov(csp, jssp);
__ SetStackPointer(csp);
END();
RUN();
CHECK_EQUAL_64(fives, x16);
CHECK_EQUAL_64(ones, x15);
CHECK_EQUAL_64(twos, x14);
CHECK_EQUAL_64(threes, x13);
CHECK_EQUAL_64(fours, x12);
CHECK_EQUAL_64(ones, x11);
CHECK_EQUAL_64(twos, x10);
CHECK_EQUAL_64(ones, x9);
CHECK_EQUAL_64(twos, x8);
CHECK_EQUAL_64(threes, x7);
CHECK_EQUAL_64(fours, x6);
CHECK_EQUAL_64(threes, x5);
CHECK_EQUAL_64(fours, x4);
TEARDOWN();
}
TEST(copy_noop) {
INIT_V8();
SETUP();
const uint64_t ones = 0x1111111111111111UL;
const uint64_t twos = 0x2222222222222222UL;
const uint64_t threes = 0x3333333333333333UL;
const uint64_t fours = 0x4444444444444444UL;
const uint64_t fives = 0x5555555555555555UL;
START();
__ Mov(jssp, __ StackPointer());
__ SetStackPointer(jssp);
__ Mov(x1, ones);
__ Mov(x2, twos);
__ Mov(x3, threes);
__ Mov(x4, fours);
__ Mov(x5, fives);
__ Push(xzr, x5, x5, xzr);
__ Push(x3, x4, x3, x4);
__ Push(x1, x2, x1, x2);
__ Push(x1, x2, x3, x4);
// src < dst, count == 0
__ SlotAddress(x5, 3);
__ SlotAddress(x6, 2);
__ Mov(x7, 0);
__ CopyDoubleWords(x5, x6, x7, TurboAssembler::kSrcLessThanDst);
// dst < src, count == 0
__ SlotAddress(x5, 2);
__ SlotAddress(x6, 3);
__ Mov(x7, 0);
__ CopyDoubleWords(x5, x6, x7, TurboAssembler::kDstLessThanSrc);
__ Pop(x1, x2, x3, x4);
__ Pop(x5, x6, x7, x8);
__ Pop(x9, x10, x11, x12);
__ Pop(x13, x14, x15, x16);
__ Mov(csp, jssp);
__ SetStackPointer(csp);
END();
RUN();
CHECK_EQUAL_64(fours, x1);
CHECK_EQUAL_64(threes, x2);
CHECK_EQUAL_64(twos, x3);
CHECK_EQUAL_64(ones, x4);
CHECK_EQUAL_64(twos, x5);
CHECK_EQUAL_64(ones, x6);
CHECK_EQUAL_64(twos, x7);
CHECK_EQUAL_64(ones, x8);
CHECK_EQUAL_64(fours, x9);
CHECK_EQUAL_64(threes, x10);
CHECK_EQUAL_64(fours, x11);
CHECK_EQUAL_64(threes, x12);
CHECK_EQUAL_64(0, x13);
CHECK_EQUAL_64(fives, x14);
CHECK_EQUAL_64(fives, x15);
CHECK_EQUAL_64(0, x16);
TEARDOWN();
}
TEST(jump_both_smi) {
INIT_V8();
SETUP();