Revert "[wasm-simd] Implement remaining I8x16 SIMD ops on x64"
This reverts commit de88bfb270
.
Reason for revert: Breaks msvc build https://ci.chromium.org/p/v8/builders/luci.v8.ci/V8%20Win64%20-%20msvc/5765
Original change's description:
> [wasm-simd] Implement remaining I8x16 SIMD ops on x64
>
> - Implementation for I8x16 Shifts, and Mul
> - Fix convert bug
> - Enable all tests except for shuffle tests
>
> Change-Id: Id1a469d2883c30ea782c51d21dc462d211f94420
> Reviewed-on: https://chromium-review.googlesource.com/c/1318609
> Reviewed-by: Bill Budge <bbudge@chromium.org>
> Commit-Queue: Deepti Gandluri <gdeepti@chromium.org>
> Cr-Commit-Position: refs/heads/master@{#57254}
TBR=bbudge@chromium.org,gdeepti@chromium.org
Change-Id: I09efd8002e27f457e89250336e6c3a12d8d9682c
No-Presubmit: true
No-Tree-Checks: true
No-Try: true
Reviewed-on: https://chromium-review.googlesource.com/c/1318097
Reviewed-by: Deepti Gandluri <gdeepti@chromium.org>
Commit-Queue: Deepti Gandluri <gdeepti@chromium.org>
Cr-Commit-Position: refs/heads/master@{#57257}
This commit is contained in:
parent
64c668a224
commit
bdb9e7a517
@ -2481,6 +2481,14 @@ void InstructionSelector::VisitWord64AtomicCompareExchange(Node* node) {
|
||||
|
||||
#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS && \
|
||||
!V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_IA32
|
||||
void InstructionSelector::VisitI8x16Shl(Node* node) { UNIMPLEMENTED(); }
|
||||
|
||||
void InstructionSelector::VisitI8x16ShrS(Node* node) { UNIMPLEMENTED(); }
|
||||
|
||||
void InstructionSelector::VisitI8x16ShrU(Node* node) { UNIMPLEMENTED(); }
|
||||
|
||||
void InstructionSelector::VisitI8x16Mul(Node* node) { UNIMPLEMENTED(); }
|
||||
|
||||
void InstructionSelector::VisitS8x16Shuffle(Node* node) { UNIMPLEMENTED(); }
|
||||
#endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS
|
||||
// && !V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_IA32
|
||||
|
@ -2683,37 +2683,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
}
|
||||
break;
|
||||
}
|
||||
case kX64I8x16Shl: {
|
||||
XMMRegister dst = i.OutputSimd128Register();
|
||||
DCHECK_EQ(dst, i.InputSimd128Register(0));
|
||||
int8_t shift = i.InputInt8(1) & 0x7;
|
||||
if (shift < 4) {
|
||||
// For small shifts, doubling is faster.
|
||||
for (int i = 0; i < shift; ++i) {
|
||||
__ paddb(dst, dst);
|
||||
}
|
||||
} else {
|
||||
// Mask off the unwanted bits before word-shifting.
|
||||
__ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
|
||||
__ psrlw(kScratchDoubleReg, 8 + shift);
|
||||
__ packuswb(kScratchDoubleReg, kScratchDoubleReg);
|
||||
__ pand(dst, kScratchDoubleReg);
|
||||
__ psllw(dst, shift);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case kX64I8x16ShrS: {
|
||||
XMMRegister dst = i.OutputSimd128Register();
|
||||
XMMRegister src = i.InputSimd128Register(0);
|
||||
int8_t shift = i.InputInt8(1) & 0x7;
|
||||
// Unpack the bytes into words, do arithmetic shifts, and repack.
|
||||
__ punpckhbw(kScratchDoubleReg, src);
|
||||
__ punpcklbw(dst, src);
|
||||
__ psraw(kScratchDoubleReg, 8 + shift);
|
||||
__ psraw(dst, 8 + shift);
|
||||
__ packsswb(dst, kScratchDoubleReg);
|
||||
break;
|
||||
}
|
||||
case kX64I8x16Add: {
|
||||
__ paddb(i.OutputSimd128Register(), i.InputSimd128Register(1));
|
||||
break;
|
||||
@ -2730,39 +2699,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
__ psubsb(i.OutputSimd128Register(), i.InputSimd128Register(1));
|
||||
break;
|
||||
}
|
||||
case kX64I8x16Mul: {
|
||||
XMMRegister dst = i.OutputSimd128Register();
|
||||
DCHECK_EQ(dst, i.InputSimd128Register(0));
|
||||
XMMRegister right = i.InputSimd128Register(1);
|
||||
XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0));
|
||||
// I16x8 view of I8x16
|
||||
// left = AAaa AAaa ... AAaa AAaa
|
||||
// right= BBbb BBbb ... BBbb BBbb
|
||||
// t = 00AA 00AA ... 00AA 00AA
|
||||
// s = 00BB 00BB ... 00BB 00BB
|
||||
__ movaps(tmp, dst);
|
||||
__ movaps(kScratchDoubleReg, right);
|
||||
__ psrlw(tmp, 8);
|
||||
__ psrlw(kScratchDoubleReg, 8);
|
||||
// dst = left * 256
|
||||
__ psllw(dst, 8);
|
||||
// t = I16x8Mul(t, s)
|
||||
// => __PP __PP ... __PP __PP
|
||||
__ pmullw(tmp, kScratchDoubleReg);
|
||||
// dst = I16x8Mul(left * 256, right)
|
||||
// => pp__ pp__ ... pp__ pp__
|
||||
__ pmullw(dst, right);
|
||||
// t = I16x8Shl(t, 8)
|
||||
// => PP00 PP00 ... PP00 PP00
|
||||
__ psllw(tmp, 8);
|
||||
// dst = I16x8Shr(dst, 8)
|
||||
// => 00pp 00pp ... 00pp 00pp
|
||||
__ psrlw(dst, 8);
|
||||
// dst = I16x8Or(dst, t)
|
||||
// => PPpp PPpp ... PPpp PPpp
|
||||
__ por(dst, tmp);
|
||||
break;
|
||||
}
|
||||
case kX64I8x16MinS: {
|
||||
CpuFeatureScope sse_scope(tasm(), SSE4_1);
|
||||
__ pminsb(i.OutputSimd128Register(), i.InputSimd128Register(1));
|
||||
@ -2807,18 +2743,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
__ packuswb(dst, kScratchDoubleReg);
|
||||
break;
|
||||
}
|
||||
case kX64I8x16ShrU: {
|
||||
XMMRegister dst = i.OutputSimd128Register();
|
||||
XMMRegister src = i.InputSimd128Register(0);
|
||||
int8_t shift = i.InputInt8(1) & 0x7;
|
||||
// Unpack the bytes into words, do logical shifts, and repack.
|
||||
__ punpckhbw(kScratchDoubleReg, src);
|
||||
__ punpcklbw(dst, src);
|
||||
__ psrlw(kScratchDoubleReg, 8 + shift);
|
||||
__ psrlw(dst, 8 + shift);
|
||||
__ packuswb(dst, kScratchDoubleReg);
|
||||
break;
|
||||
}
|
||||
case kX64I8x16AddSaturateU: {
|
||||
__ paddusb(i.OutputSimd128Register(), i.InputSimd128Register(1));
|
||||
break;
|
||||
|
@ -230,13 +230,10 @@ namespace compiler {
|
||||
V(X64I8x16ReplaceLane) \
|
||||
V(X64I8x16SConvertI16x8) \
|
||||
V(X64I8x16Neg) \
|
||||
V(X64I8x16Shl) \
|
||||
V(X64I8x16ShrS) \
|
||||
V(X64I8x16Add) \
|
||||
V(X64I8x16AddSaturateS) \
|
||||
V(X64I8x16Sub) \
|
||||
V(X64I8x16SubSaturateS) \
|
||||
V(X64I8x16Mul) \
|
||||
V(X64I8x16MinS) \
|
||||
V(X64I8x16MaxS) \
|
||||
V(X64I8x16Eq) \
|
||||
@ -246,17 +243,16 @@ namespace compiler {
|
||||
V(X64I8x16UConvertI16x8) \
|
||||
V(X64I8x16AddSaturateU) \
|
||||
V(X64I8x16SubSaturateU) \
|
||||
V(X64I8x16ShrU) \
|
||||
V(X64I8x16MinU) \
|
||||
V(X64I8x16MaxU) \
|
||||
V(X64I8x16GtU) \
|
||||
V(X64I8x16GeU) \
|
||||
V(X64S128Zero) \
|
||||
V(X64S128Not) \
|
||||
V(X64S128And) \
|
||||
V(X64S128Or) \
|
||||
V(X64S128Xor) \
|
||||
V(X64S128Not) \
|
||||
V(X64S128Select) \
|
||||
V(X64S128Zero) \
|
||||
V(X64S1x4AnyTrue) \
|
||||
V(X64S1x4AllTrue) \
|
||||
V(X64S1x8AnyTrue) \
|
||||
|
@ -207,13 +207,10 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
||||
case kX64I8x16ReplaceLane:
|
||||
case kX64I8x16SConvertI16x8:
|
||||
case kX64I8x16Neg:
|
||||
case kX64I8x16Shl:
|
||||
case kX64I8x16ShrS:
|
||||
case kX64I8x16Add:
|
||||
case kX64I8x16AddSaturateS:
|
||||
case kX64I8x16Sub:
|
||||
case kX64I8x16SubSaturateS:
|
||||
case kX64I8x16Mul:
|
||||
case kX64I8x16MinS:
|
||||
case kX64I8x16MaxS:
|
||||
case kX64I8x16Eq:
|
||||
@ -223,7 +220,6 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
||||
case kX64I8x16UConvertI16x8:
|
||||
case kX64I8x16AddSaturateU:
|
||||
case kX64I8x16SubSaturateU:
|
||||
case kX64I8x16ShrU:
|
||||
case kX64I8x16MinU:
|
||||
case kX64I8x16MaxU:
|
||||
case kX64I8x16GtU:
|
||||
|
@ -2655,10 +2655,7 @@ VISIT_ATOMIC_BINOP(Xor)
|
||||
V(I32x4ShrU) \
|
||||
V(I16x8Shl) \
|
||||
V(I16x8ShrS) \
|
||||
V(I16x8ShrU) \
|
||||
V(I8x16Shl) \
|
||||
V(I8x16ShrS) \
|
||||
V(I8x16ShrU)
|
||||
V(I16x8ShrU)
|
||||
|
||||
#define SIMD_ANYTRUE_LIST(V) \
|
||||
V(S1x4AnyTrue) \
|
||||
@ -2780,9 +2777,8 @@ void InstructionSelector::VisitI32x4SConvertF32x4(Node* node) {
|
||||
|
||||
void InstructionSelector::VisitI32x4UConvertF32x4(Node* node) {
|
||||
X64OperandGenerator g(this);
|
||||
InstructionOperand temps[] = {g.TempSimd128Register()};
|
||||
Emit(kX64I32x4UConvertF32x4, g.DefineSameAsFirst(node),
|
||||
g.UseRegister(node->InputAt(0)), arraysize(temps), temps);
|
||||
g.UseRegister(node->InputAt(0)));
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitI16x8UConvertI32x4(Node* node) {
|
||||
@ -2797,14 +2793,6 @@ void InstructionSelector::VisitI8x16UConvertI16x8(Node* node) {
|
||||
g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)));
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitI8x16Mul(Node* node) {
|
||||
X64OperandGenerator g(this);
|
||||
InstructionOperand temps[] = {g.TempSimd128Register()};
|
||||
Emit(kX64I8x16Mul, g.DefineSameAsFirst(node),
|
||||
g.UseUniqueRegister(node->InputAt(0)),
|
||||
g.UseUniqueRegister(node->InputAt(1)), arraysize(temps), temps);
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitInt32AbsWithOverflow(Node* node) {
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
@ -438,6 +438,8 @@ WASM_SIMD_TEST(F32x4ReplaceLane) {
|
||||
CHECK_EQ(1, r.Call(3.14159f, -1.5f));
|
||||
}
|
||||
|
||||
#if V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS || \
|
||||
V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_IA32
|
||||
// Tests both signed and unsigned conversion.
|
||||
WASM_SIMD_TEST(F32x4ConvertI32x4) {
|
||||
WasmRunner<int32_t, int32_t, float, float> r(execution_tier, lower_simd);
|
||||
@ -461,6 +463,8 @@ WASM_SIMD_TEST(F32x4ConvertI32x4) {
|
||||
static_cast<float>(static_cast<uint32_t>(*i))));
|
||||
}
|
||||
}
|
||||
#endif // V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS ||
|
||||
// V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_IA32
|
||||
|
||||
void RunF32x4UnOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
|
||||
WasmOpcode simd_op, FloatUnOp expected_op,
|
||||
@ -815,6 +819,9 @@ WASM_SIMD_TEST(I8x16ReplaceLane) {
|
||||
CHECK_EQ(1, r.Call(1, 2));
|
||||
}
|
||||
|
||||
#if V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS || \
|
||||
V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_IA32
|
||||
|
||||
int32_t ConvertToInt(double val, bool unsigned_integer) {
|
||||
if (std::isnan(val)) return 0;
|
||||
if (unsigned_integer) {
|
||||
@ -893,6 +900,8 @@ WASM_SIMD_TEST(I32x4ConvertI16x8) {
|
||||
CHECK_EQ(1, r.Call(*i, unpacked_signed, unpacked_unsigned, 0));
|
||||
}
|
||||
}
|
||||
#endif // V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS ||
|
||||
// V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_IA32
|
||||
|
||||
void RunI32x4UnOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
|
||||
WasmOpcode simd_op, Int32UnOp expected_op) {
|
||||
@ -1533,9 +1542,13 @@ WASM_SIMD_TEST(I8x16LeU) {
|
||||
UnsignedLessEqual);
|
||||
}
|
||||
|
||||
#if V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS || \
|
||||
V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_IA32
|
||||
WASM_SIMD_TEST(I8x16Mul) {
|
||||
RunI8x16BinOpTest(execution_tier, lower_simd, kExprI8x16Mul, Mul);
|
||||
}
|
||||
#endif // V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS ||
|
||||
// V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_IA32
|
||||
|
||||
void RunI8x16ShiftOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
|
||||
WasmOpcode simd_op, Int8ShiftOp expected_op) {
|
||||
@ -1553,6 +1566,8 @@ void RunI8x16ShiftOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
|
||||
}
|
||||
}
|
||||
|
||||
#if V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS || \
|
||||
V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_IA32
|
||||
WASM_SIMD_TEST(I8x16Shl) {
|
||||
RunI8x16ShiftOpTest(execution_tier, lower_simd, kExprI8x16Shl,
|
||||
LogicalShiftLeft);
|
||||
@ -1567,6 +1582,8 @@ WASM_SIMD_TEST(I8x16ShrU) {
|
||||
RunI8x16ShiftOpTest(execution_tier, lower_simd, kExprI8x16ShrU,
|
||||
LogicalShiftRight);
|
||||
}
|
||||
#endif // V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS ||
|
||||
// V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_IA32
|
||||
|
||||
// Test Select by making a mask where the 0th and 3rd lanes are true and the
|
||||
// rest false, and comparing for non-equality with zero to convert to a boolean
|
||||
|
Loading…
Reference in New Issue
Block a user