[x64] Implement 256-bit assembly for vroundps/pd, vpblendw, vpalignr
Bug: v8:12228 Change-Id: Ifd813e6bff92e6a08cc41eb8f5b1848abe849cd3 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3178540 Reviewed-by: Zhi An Ng <zhin@chromium.org> Commit-Queue: Jing Bao <jing.bao@intel.com> Cr-Commit-Position: refs/heads/main@{#77070}
This commit is contained in:
parent
ce23293e75
commit
dd1e168caf
@ -1500,10 +1500,18 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
|
||||
vinstr(0x08, dst, xmm0, src, k66, k0F3A, kWIG);
|
||||
emit(static_cast<byte>(mode) | 0x8); // Mask precision exception.
|
||||
}
|
||||
void vroundps(YMMRegister dst, YMMRegister src, RoundingMode mode) {
|
||||
vinstr(0x08, dst, ymm0, src, k66, k0F3A, kWIG, AVX);
|
||||
emit(static_cast<byte>(mode) | 0x8); // Mask precision exception.
|
||||
}
|
||||
void vroundpd(XMMRegister dst, XMMRegister src, RoundingMode mode) {
|
||||
vinstr(0x09, dst, xmm0, src, k66, k0F3A, kWIG);
|
||||
emit(static_cast<byte>(mode) | 0x8); // Mask precision exception.
|
||||
}
|
||||
void vroundpd(YMMRegister dst, YMMRegister src, RoundingMode mode) {
|
||||
vinstr(0x09, dst, ymm0, src, k66, k0F3A, kWIG, AVX);
|
||||
emit(static_cast<byte>(mode) | 0x8); // Mask precision exception.
|
||||
}
|
||||
|
||||
void vsd(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2) {
|
||||
vinstr(op, dst, src1, src2, kF2, k0F, kWIG);
|
||||
@ -1693,20 +1701,38 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
|
||||
vinstr(0x0E, dst, src1, src2, k66, k0F3A, kWIG);
|
||||
emit(mask);
|
||||
}
|
||||
void vpblendw(YMMRegister dst, YMMRegister src1, YMMRegister src2,
|
||||
uint8_t mask) {
|
||||
vinstr(0x0E, dst, src1, src2, k66, k0F3A, kWIG);
|
||||
emit(mask);
|
||||
}
|
||||
void vpblendw(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t mask) {
|
||||
vinstr(0x0E, dst, src1, src2, k66, k0F3A, kWIG);
|
||||
emit(mask);
|
||||
}
|
||||
void vpblendw(YMMRegister dst, YMMRegister src1, Operand src2, uint8_t mask) {
|
||||
vinstr(0x0E, dst, src1, src2, k66, k0F3A, kWIG);
|
||||
emit(mask);
|
||||
}
|
||||
|
||||
void vpalignr(XMMRegister dst, XMMRegister src1, XMMRegister src2,
|
||||
uint8_t imm8) {
|
||||
vinstr(0x0F, dst, src1, src2, k66, k0F3A, kWIG);
|
||||
emit(imm8);
|
||||
}
|
||||
void vpalignr(YMMRegister dst, YMMRegister src1, YMMRegister src2,
|
||||
uint8_t imm8) {
|
||||
vinstr(0x0F, dst, src1, src2, k66, k0F3A, kWIG);
|
||||
emit(imm8);
|
||||
}
|
||||
void vpalignr(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t imm8) {
|
||||
vinstr(0x0F, dst, src1, src2, k66, k0F3A, kWIG);
|
||||
emit(imm8);
|
||||
}
|
||||
void vpalignr(YMMRegister dst, YMMRegister src1, Operand src2, uint8_t imm8) {
|
||||
vinstr(0x0F, dst, src1, src2, k66, k0F3A, kWIG);
|
||||
emit(imm8);
|
||||
}
|
||||
|
||||
void vps(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
|
||||
void vps(byte op, YMMRegister dst, YMMRegister src1, YMMRegister src2);
|
||||
|
@ -2562,7 +2562,7 @@ TEST(AssemblerX64Regmove256bit) {
|
||||
CHECK_EQ(0, memcmp(expected, desc.buffer, sizeof(expected)));
|
||||
}
|
||||
|
||||
TEST(AssemblerX64Shuffle256bit) {
|
||||
TEST(AssemblerX64LaneOp256bit) {
|
||||
if (!CpuFeatures::IsSupported(AVX2)) return;
|
||||
CcTest::InitializeVM();
|
||||
v8::HandleScope scope(CcTest::isolate());
|
||||
@ -2577,6 +2577,10 @@ TEST(AssemblerX64Shuffle256bit) {
|
||||
__ vpshuflw(ymm9, Operand(rbx, rcx, times_4, 10000), 85);
|
||||
__ vpshufhw(ymm1, ymm2, 85);
|
||||
__ vpshufhw(ymm1, Operand(rbx, rcx, times_4, 10000), 85);
|
||||
__ vpblendw(ymm2, ymm3, ymm4, 23);
|
||||
__ vpblendw(ymm2, ymm3, Operand(rbx, rcx, times_4, 10000), 23);
|
||||
__ vpalignr(ymm10, ymm11, ymm12, 4);
|
||||
__ vpalignr(ymm10, ymm11, Operand(rbx, rcx, times_4, 10000), 4);
|
||||
|
||||
CodeDesc desc;
|
||||
masm.GetCode(isolate, &desc);
|
||||
@ -2587,19 +2591,27 @@ TEST(AssemblerX64Shuffle256bit) {
|
||||
code->Print(os);
|
||||
#endif
|
||||
|
||||
byte expected[] = {// vpshufd ymm1, ymm2, 85
|
||||
0xC5, 0xFD, 0x70, 0xCA, 0x55,
|
||||
// vpshufd ymm1,YMMWORD PTR [rbx+rcx*4+0x2710], 85
|
||||
0xC5, 0xFD, 0x70, 0x8C, 0x8B, 0x10, 0x27, 0x00, 0x00, 0x55,
|
||||
// vpshuflw ymm9, ymm10, 85,
|
||||
0xC4, 0x41, 0x7F, 0x70, 0xCA, 0x55,
|
||||
// vpshuflw ymm9,YMMWORD PTR [rbx+rcx*4+0x2710], 85
|
||||
0xC5, 0x7F, 0x70, 0x8C, 0x8B, 0x10, 0x27, 0x00, 0x00, 0x55,
|
||||
// vpshufhw ymm1, ymm2, 85
|
||||
0xC5, 0xFE, 0x70, 0xCA, 0x55,
|
||||
// vpshufhw ymm1,YMMWORD PTR [rbx+rcx*4+0x2710], 85
|
||||
0xC5, 0xFE, 0x70, 0x8C, 0x8B, 0x10, 0x27, 0x00, 0x00,
|
||||
0x55};
|
||||
byte expected[] = {
|
||||
// vpshufd ymm1, ymm2, 85
|
||||
0xC5, 0xFD, 0x70, 0xCA, 0x55,
|
||||
// vpshufd ymm1,YMMWORD PTR [rbx+rcx*4+0x2710], 85
|
||||
0xC5, 0xFD, 0x70, 0x8C, 0x8B, 0x10, 0x27, 0x00, 0x00, 0x55,
|
||||
// vpshuflw ymm9, ymm10, 85,
|
||||
0xC4, 0x41, 0x7F, 0x70, 0xCA, 0x55,
|
||||
// vpshuflw ymm9,YMMWORD PTR [rbx+rcx*4+0x2710], 85
|
||||
0xC5, 0x7F, 0x70, 0x8C, 0x8B, 0x10, 0x27, 0x00, 0x00, 0x55,
|
||||
// vpshufhw ymm1, ymm2, 85
|
||||
0xC5, 0xFE, 0x70, 0xCA, 0x55,
|
||||
// vpshufhw ymm1,YMMWORD PTR [rbx+rcx*4+0x2710], 85
|
||||
0xC5, 0xFE, 0x70, 0x8C, 0x8B, 0x10, 0x27, 0x00, 0x00, 0x55,
|
||||
// vpblendw ymm2, ymm3, ymm4, 23
|
||||
0xC4, 0xE3, 0x65, 0x0E, 0xD4, 0x17,
|
||||
// vpblendw ymm2, ymm3, YMMWORD PTR [rbx+rcx*4+0x2710], 23
|
||||
0xC4, 0xE3, 0x65, 0x0E, 0x94, 0x8B, 0x10, 0x27, 0x00, 0x00, 0x17,
|
||||
// vpalignr ymm10, ymm11, ymm12, 4
|
||||
0xC4, 0x43, 0x25, 0x0F, 0xD4, 0x04,
|
||||
// vpalignr ymm10, ymm11, YMMWORD PTR [rbx+rcx*4+0x2710], 4
|
||||
0xC4, 0x63, 0x25, 0x0F, 0x94, 0x8B, 0x10, 0x27, 0x00, 0x00, 0x04};
|
||||
CHECK_EQ(0, memcmp(expected, desc.buffer, sizeof(expected)));
|
||||
}
|
||||
|
||||
@ -2615,6 +2627,8 @@ TEST(AssemblerX64FloatingPoint256bit) {
|
||||
__ vsqrtps(ymm0, ymm1);
|
||||
__ vunpcklps(ymm2, ymm3, ymm14);
|
||||
__ vsubps(ymm10, ymm11, ymm12);
|
||||
__ vroundps(ymm9, ymm2, kRoundUp);
|
||||
__ vroundpd(ymm9, ymm2, kRoundToNearest);
|
||||
|
||||
CodeDesc desc;
|
||||
masm.GetCode(isolate, &desc);
|
||||
@ -2630,7 +2644,11 @@ TEST(AssemblerX64FloatingPoint256bit) {
|
||||
// VUNPCKLPS
|
||||
0xC4, 0xC1, 0x64, 0x14, 0xD6,
|
||||
// VSUBPS
|
||||
0xC4, 0x41, 0x24, 0x5C, 0xD4};
|
||||
0xC4, 0x41, 0x24, 0x5C, 0xD4,
|
||||
// vroundps ymm9, ymm2, 0xA
|
||||
0xC4, 0x63, 0x7D, 0x08, 0xCA, 0x0A,
|
||||
// vroundpd ymm9, ymm2, 0x8
|
||||
0xC4, 0x63, 0x7D, 0x09, 0xCA, 0x08};
|
||||
CHECK_EQ(0, memcmp(expected, desc.buffer, sizeof(expected)));
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user