[x64] Implement 256-bit assembly for vmovddup/vmovshdup

Bug: v8:12228
Change-Id: I49b2e1a1c837b96ea2e7cb58f42314109845b7fc
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3263766
Reviewed-by: Zhi An Ng <zhin@chromium.org>
Commit-Queue: Yolanda Chen <yolanda.chen@intel.com>
Cr-Commit-Position: refs/heads/main@{#77746}
This commit is contained in:
Yolanda Chen 2021-11-06 00:55:55 +08:00 committed by V8 LUCI CQ
parent 7b785f3389
commit 0233cb6c82
4 changed files with 46 additions and 26 deletions

View File

@ -3416,30 +3416,33 @@ void Assembler::pmovmskb(Register dst, XMMRegister src) {
}
// AVX instructions
void Assembler::vmovddup(XMMRegister dst, XMMRegister src) {
DCHECK(IsEnabled(AVX));
EnsureSpace ensure_space(this);
emit_vex_prefix(dst, xmm0, src, kL128, kF2, k0F, kWIG);
emit(0x12);
emit_sse_operand(dst, src);
}
void Assembler::vmovddup(XMMRegister dst, Operand src) {
DCHECK(IsEnabled(AVX));
EnsureSpace ensure_space(this);
emit_vex_prefix(dst, xmm0, src, kL128, kF2, k0F, kWIG);
emit(0x12);
emit_sse_operand(dst, src);
}
void Assembler::vmovshdup(XMMRegister dst, XMMRegister src) {
DCHECK(IsEnabled(AVX));
EnsureSpace ensure_space(this);
emit_vex_prefix(dst, xmm0, src, kL128, kF3, k0F, kWIG);
emit(0x16);
emit_sse_operand(dst, src);
}
#define VMOV_DUP(SIMDRegister, length) \
void Assembler::vmovddup(SIMDRegister dst, SIMDRegister src) { \
DCHECK(IsEnabled(AVX)); \
EnsureSpace ensure_space(this); \
emit_vex_prefix(dst, xmm0, src, k##length, kF2, k0F, kWIG); \
emit(0x12); \
emit_sse_operand(dst, src); \
} \
\
void Assembler::vmovddup(SIMDRegister dst, Operand src) { \
DCHECK(IsEnabled(AVX)); \
EnsureSpace ensure_space(this); \
emit_vex_prefix(dst, xmm0, src, k##length, kF2, k0F, kWIG); \
emit(0x12); \
emit_sse_operand(dst, src); \
} \
\
void Assembler::vmovshdup(SIMDRegister dst, SIMDRegister src) { \
DCHECK(IsEnabled(AVX)); \
EnsureSpace ensure_space(this); \
emit_vex_prefix(dst, xmm0, src, k##length, kF3, k0F, kWIG); \
emit(0x16); \
emit_sse_operand(dst, src); \
}
VMOV_DUP(XMMRegister, L128)
VMOV_DUP(YMMRegister, L256)
#undef VMOV_DUP
#define BROADCASTSS(SIMDRegister, length) \
void Assembler::vbroadcastss(SIMDRegister dst, Operand src) { \

View File

@ -1329,7 +1329,10 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
// AVX instruction
void vmovddup(XMMRegister dst, XMMRegister src);
void vmovddup(XMMRegister dst, Operand src);
void vmovddup(YMMRegister dst, YMMRegister src);
void vmovddup(YMMRegister dst, Operand src);
void vmovshdup(XMMRegister dst, XMMRegister src);
void vmovshdup(YMMRegister dst, YMMRegister src);
void vbroadcastss(XMMRegister dst, Operand src);
void vbroadcastss(XMMRegister dst, XMMRegister src);
void vbroadcastss(YMMRegister dst, Operand src);

View File

@ -2537,6 +2537,9 @@ TEST(AssemblerX64Regmove256bit) {
__ vmovdqu(ymm9, Operand(rbx, rcx, times_4, 10000));
__ vmovdqu(Operand(rbx, rcx, times_4, 10000), ymm0);
__ vbroadcastss(ymm7, Operand(rbx, rcx, times_4, 10000));
__ vmovddup(ymm3, ymm2);
__ vmovddup(ymm4, Operand(rbx, rcx, times_4, 10000));
__ vmovshdup(ymm1, ymm2);
CodeDesc desc;
masm.GetCode(isolate, &desc);
@ -2562,8 +2565,15 @@ TEST(AssemblerX64Regmove256bit) {
0xC5, 0xFE, 0x7F, 0x84, 0x8B, 0x10, 0x27, 0x00, 0x00,
// vbroadcastss ymm7, DWORD PTR [rbx+rcx*4+0x2710]
0xc4, 0xe2, 0x7d, 0x18, 0xbc, 0x8b, 0x10, 0x27, 0x00,
0x00};
0xc4, 0xe2, 0x7d, 0x18, 0xbc, 0x8b, 0x10, 0x27, 0x00, 0x00,
// vmovddup ymm3, ymm2
0xc5, 0xff, 0x12, 0xda,
// vmovddup ymm4, YMMWORD PTR [rbx+rcx*4+0x2710]
0xc5, 0xff, 0x12, 0xa4, 0x8b, 0x10, 0x27, 0x00, 0x00,
// vmovshdup ymm1, ymm2
0xc5, 0xfe, 0x16, 0xca};
CHECK_EQ(0, memcmp(expected, desc.buffer, sizeof(expected)));
}

View File

@ -1411,6 +1411,10 @@ UNINITIALIZED_TEST(DisasmX64YMMRegister) {
vhaddps(ymm0, ymm1, Operand(rbx, rcx, times_4, 10000)));
COMPARE("c4e27d18bc8b10270000 vbroadcastss ymm7,[rbx+rcx*4+0x2710]",
vbroadcastss(ymm7, Operand(rbx, rcx, times_4, 10000)));
COMPARE("c5ff12da vmovddup ymm3,ymm2", vmovddup(ymm3, ymm2));
COMPARE("c5ff12a48b10270000 vmovddup ymm4,[rbx+rcx*4+0x2710]",
vmovddup(ymm4, Operand(rbx, rcx, times_4, 10000)));
COMPARE("c5fe16ca vmovshdup ymm1,ymm2", vmovshdup(ymm1, ymm2));
}
if (!CpuFeatures::IsSupported(AVX2)) return;