[x64] Implement 256-bit assembler for vshufps

Bug: v8:12228
Change-Id: I233efc9fc4636c25baba6a689f7038331fd1f32b
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3303806
Reviewed-by: Deepti Gandluri <gdeepti@chromium.org>
Commit-Queue: Jie Pan <jie.pan@intel.com>
Cr-Commit-Position: refs/heads/main@{#78598}
This commit is contained in:
jiepan 2022-01-06 16:32:13 +08:00 committed by V8 LUCI CQ
parent 4b8d04897c
commit a54f38e1fd
4 changed files with 22 additions and 1 deletions

View File

@ -3761,6 +3761,16 @@ void Assembler::vps(byte op, XMMRegister dst, XMMRegister src1,
emit(imm8);
}
void Assembler::vps(byte op, YMMRegister dst, YMMRegister src1,
YMMRegister src2, byte imm8) {
DCHECK(IsEnabled(AVX));
EnsureSpace ensure_space(this);
emit_vex_prefix(dst, src1, src2, kL256, kNoPrefix, k0F, kWIG);
emit(op);
emit_sse_operand(dst, src2);
emit(imm8);
}
#define VPD(SIMDRegister, length) \
void Assembler::vpd(byte op, SIMDRegister dst, SIMDRegister src1, \
SIMDRegister src2) { \

View File

@ -1596,6 +1596,9 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
void vshufps(XMMRegister dst, XMMRegister src1, XMMRegister src2, byte imm8) {
vps(0xC6, dst, src1, src2, imm8);
}
void vshufps(YMMRegister dst, YMMRegister src1, YMMRegister src2, byte imm8) {
vps(0xC6, dst, src1, src2, imm8);
}
void vmovaps(XMMRegister dst, XMMRegister src) { vps(0x28, dst, xmm0, src); }
void vmovaps(YMMRegister dst, YMMRegister src) { vps(0x28, dst, ymm0, src); }
@ -1811,6 +1814,8 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
void vps(byte op, YMMRegister dst, YMMRegister src1, Operand src2);
void vps(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2,
byte imm8);
void vps(byte op, YMMRegister dst, YMMRegister src1, YMMRegister src2,
byte imm8);
void vpd(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
void vpd(byte op, YMMRegister dst, YMMRegister src1, YMMRegister src2);
void vpd(byte op, XMMRegister dst, XMMRegister src1, Operand src2);

View File

@ -2681,6 +2681,7 @@ TEST(AssemblerX64FloatingPoint256bit) {
__ vhaddps(ymm0, ymm1, Operand(rbx, rcx, times_4, 10000));
__ vblendvps(ymm0, ymm3, ymm5, ymm9);
__ vblendvpd(ymm7, ymm4, ymm3, ymm1);
__ vshufps(ymm3, ymm1, ymm2, 0x75);
CodeDesc desc;
masm.GetCode(isolate, &desc);
@ -2712,7 +2713,9 @@ TEST(AssemblerX64FloatingPoint256bit) {
// vblendvps ymm0, ymm3, ymm5, ymm9
0xC4, 0xE3, 0x65, 0x4A, 0xC5, 0x90,
// vblendvpd ymm7, ymm4, ymm3, ymm1
0xC4, 0xE3, 0x5D, 0x4B, 0xFB, 0x10};
0xC4, 0xE3, 0x5D, 0x4B, 0xFB, 0x10,
// vshufps ymm3, ymm1, ymm2, 0x75
0xC5, 0xF4, 0xC6, 0xDA, 0x75};
CHECK_EQ(0, memcmp(expected, desc.buffer, sizeof(expected)));
}

View File

@ -1415,6 +1415,9 @@ UNINITIALIZED_TEST(DisasmX64YMMRegister) {
COMPARE("c5ff12a48b10270000 vmovddup ymm4,[rbx+rcx*4+0x2710]",
vmovddup(ymm4, Operand(rbx, rcx, times_4, 10000)));
COMPARE("c5fe16ca vmovshdup ymm1,ymm2", vmovshdup(ymm1, ymm2));
COMPARE("c5f4c6da73 vshufps ymm3,ymm1,ymm2,0x73",
vshufps(ymm3, ymm1, ymm2, 115));
}
if (!CpuFeatures::IsSupported(AVX2)) return;