Revert "[wasm-simd][x64] Prototype i32x4.widen_i8x16_{s,u}"
This reverts commit 5a0938e593
.
Reason for revert: Broke build https://logs.chromium.org/logs/v8/buildbucket/cr-buildbucket.appspot.com/8857098178780038608/+/steps/Check/0/logs/RunWasm_I32x4WidenI8x16S_liftoff/0
Original change's description:
> [wasm-simd][x64] Prototype i32x4.widen_i8x16_{s,u}
>
> This prototypes i32x4.widen_i8x16_s and i32x4.widen_i8x16_u for x64. It
> uses some masks and pshufb for the widening. These masks (3 for each
> instruction) are stored as external references.
>
> Bug: v8:11297
> Change-Id: I6c8f55426bbb44b16ed552f393762c34c2524b55
> Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2617389
> Commit-Queue: Zhi An Ng <zhin@chromium.org>
> Reviewed-by: Deepti Gandluri <gdeepti@chromium.org>
> Reviewed-by: Georg Neis <neis@chromium.org>
> Reviewed-by: Andreas Haas <ahaas@chromium.org>
> Cr-Commit-Position: refs/heads/master@{#72301}
TBR=neis@chromium.org,gdeepti@chromium.org,neis@google.com,ahaas@chromium.org,zhin@chromium.org
Change-Id: I83aa2e86854e39ac6afd250fdc0dfac7cdd99e6d
No-Presubmit: true
No-Tree-Checks: true
No-Try: true
Bug: v8:11297
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2648194
Reviewed-by: Zhi An Ng <zhin@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#72302}
This commit is contained in:
parent
5a0938e593
commit
89ea6caf56
@ -105,43 +105,6 @@ constexpr struct alignas(16) {
|
||||
} wasm_uint32_max_as_double = {uint64_t{0x41efffffffe00000},
|
||||
uint64_t{0x41efffffffe00000}};
|
||||
|
||||
// Helper masks used for i32x4.widen_i8x16_{s,u}.
|
||||
constexpr struct alignas(16) {
|
||||
uint64_t a;
|
||||
uint64_t b;
|
||||
} i32x4_widen_i8x16_s1_mask = {uint64_t{0x05FFFFFF'04FFFFFF},
|
||||
uint64_t{0x07FFFFFF'06FFFFFF}};
|
||||
|
||||
constexpr struct alignas(16) {
|
||||
uint64_t a;
|
||||
uint64_t b;
|
||||
} i32x4_widen_i8x16_s2_mask = {uint64_t{0x09FFFFFF'08FFFFFF},
|
||||
uint64_t{0x0BFFFFFF'0AFFFFFF}};
|
||||
|
||||
constexpr struct alignas(16) {
|
||||
uint64_t a;
|
||||
uint64_t b;
|
||||
} i32x4_widen_i8x16_s3_mask = {uint64_t{0x0DFFFFFF'0CFFFFFF},
|
||||
uint64_t{0x0FFFFFFF'0EFFFFFF}};
|
||||
|
||||
constexpr struct alignas(16) {
|
||||
uint64_t a;
|
||||
uint64_t b;
|
||||
} i32x4_widen_i8x16_u1_mask = {uint64_t{0xFFFFFF05'FFFFFF04},
|
||||
uint64_t{0xFFFFFF07'FFFFFF06}};
|
||||
|
||||
constexpr struct alignas(16) {
|
||||
uint64_t a;
|
||||
uint64_t b;
|
||||
} i32x4_widen_i8x16_u2_mask = {uint64_t{0xFFFFFF09'FFFFFF08},
|
||||
uint64_t{0xFFFFFF0B'FFFFFF0A}};
|
||||
|
||||
constexpr struct alignas(16) {
|
||||
uint64_t a;
|
||||
uint64_t b;
|
||||
} i32x4_widen_i8x16_u3_mask = {uint64_t{0xFFFFFF0D'FFFFFF0C},
|
||||
uint64_t{0xFFFFFF0F'FFFFFF0E}};
|
||||
|
||||
// Implementation of ExternalReference
|
||||
|
||||
static ExternalReference::Type BuiltinCallTypeForResultSize(int result_size) {
|
||||
@ -571,36 +534,6 @@ ExternalReference ExternalReference::address_of_wasm_uint32_max_as_double() {
|
||||
reinterpret_cast<Address>(&wasm_uint32_max_as_double));
|
||||
}
|
||||
|
||||
ExternalReference ExternalReference::address_of_i32x4_widen_i8x16_s1_mask() {
|
||||
return ExternalReference(
|
||||
reinterpret_cast<Address>(&i32x4_widen_i8x16_s1_mask));
|
||||
}
|
||||
|
||||
ExternalReference ExternalReference::address_of_i32x4_widen_i8x16_s2_mask() {
|
||||
return ExternalReference(
|
||||
reinterpret_cast<Address>(&i32x4_widen_i8x16_s2_mask));
|
||||
}
|
||||
|
||||
ExternalReference ExternalReference::address_of_i32x4_widen_i8x16_s3_mask() {
|
||||
return ExternalReference(
|
||||
reinterpret_cast<Address>(&i32x4_widen_i8x16_s3_mask));
|
||||
}
|
||||
|
||||
ExternalReference ExternalReference::address_of_i32x4_widen_i8x16_u1_mask() {
|
||||
return ExternalReference(
|
||||
reinterpret_cast<Address>(&i32x4_widen_i8x16_u1_mask));
|
||||
}
|
||||
|
||||
ExternalReference ExternalReference::address_of_i32x4_widen_i8x16_u2_mask() {
|
||||
return ExternalReference(
|
||||
reinterpret_cast<Address>(&i32x4_widen_i8x16_u2_mask));
|
||||
}
|
||||
|
||||
ExternalReference ExternalReference::address_of_i32x4_widen_i8x16_u3_mask() {
|
||||
return ExternalReference(
|
||||
reinterpret_cast<Address>(&i32x4_widen_i8x16_u3_mask));
|
||||
}
|
||||
|
||||
ExternalReference
|
||||
ExternalReference::address_of_enable_experimental_regexp_engine() {
|
||||
return ExternalReference(&FLAG_enable_experimental_regexp_engine);
|
||||
|
@ -239,12 +239,6 @@ class StatsCounter;
|
||||
V(address_of_wasm_double_2_power_52, "wasm_double_2_power_52") \
|
||||
V(address_of_wasm_int32_max_as_double, "wasm_int32_max_as_double") \
|
||||
V(address_of_wasm_uint32_max_as_double, "wasm_uint32_max_as_double") \
|
||||
V(address_of_i32x4_widen_i8x16_u1_mask, "i32x4_widen_i8x16_u1_mask") \
|
||||
V(address_of_i32x4_widen_i8x16_u2_mask, "i32x4_widen_i8x16_u2_mask") \
|
||||
V(address_of_i32x4_widen_i8x16_u3_mask, "i32x4_widen_i8x16_u3_mask") \
|
||||
V(address_of_i32x4_widen_i8x16_s1_mask, "i32x4_widen_i8x16_s1_mask") \
|
||||
V(address_of_i32x4_widen_i8x16_s2_mask, "i32x4_widen_i8x16_s2_mask") \
|
||||
V(address_of_i32x4_widen_i8x16_s3_mask, "i32x4_widen_i8x16_s3_mask") \
|
||||
V(write_barrier_marking_from_code_function, "WriteBarrier::MarkingFromCode") \
|
||||
V(call_enqueue_microtask_function, "MicrotaskQueue::CallEnqueueMicrotask") \
|
||||
V(call_enter_context_function, "call_enter_context_function") \
|
||||
|
@ -2022,19 +2022,6 @@ void TurboAssembler::Pshufb(XMMRegister dst, XMMRegister src,
|
||||
}
|
||||
}
|
||||
|
||||
void TurboAssembler::Pshufb(XMMRegister dst, XMMRegister src, Operand mask) {
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
CpuFeatureScope avx_scope(this, AVX);
|
||||
vpshufb(dst, src, mask);
|
||||
} else {
|
||||
if (dst != src) {
|
||||
movapd(dst, src);
|
||||
}
|
||||
CpuFeatureScope sse_scope(this, SSSE3);
|
||||
pshufb(dst, mask);
|
||||
}
|
||||
}
|
||||
|
||||
void TurboAssembler::Pmulhrsw(XMMRegister dst, XMMRegister src1,
|
||||
XMMRegister src2) {
|
||||
if (CpuFeatures::IsSupported(AVX)) {
|
||||
|
@ -285,11 +285,9 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
|
||||
AVX_OP_SSE4_1(Pblendw, pblendw)
|
||||
AVX_OP_SSE4_1(Ptest, ptest)
|
||||
AVX_OP_SSE4_1(Pmovsxbw, pmovsxbw)
|
||||
AVX_OP_SSE4_1(Pmovsxbd, pmovsxbd)
|
||||
AVX_OP_SSE4_1(Pmovsxwd, pmovsxwd)
|
||||
AVX_OP_SSE4_1(Pmovsxdq, pmovsxdq)
|
||||
AVX_OP_SSE4_1(Pmovzxbw, pmovzxbw)
|
||||
AVX_OP_SSE4_1(Pmovzxbd, pmovzxbd)
|
||||
AVX_OP_SSE4_1(Pmovzxwd, pmovzxwd)
|
||||
AVX_OP_SSE4_1(Pmovzxdq, pmovzxdq)
|
||||
AVX_OP_SSE4_1(Pextrb, pextrb)
|
||||
@ -575,7 +573,6 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
|
||||
|
||||
// Supports both SSE and AVX. Move src1 to dst if they are not equal on SSE.
|
||||
void Pshufb(XMMRegister dst, XMMRegister src1, XMMRegister src2);
|
||||
void Pshufb(XMMRegister dst, XMMRegister src1, Operand src2);
|
||||
void Pmulhrsw(XMMRegister dst, XMMRegister src1, XMMRegister src2);
|
||||
|
||||
// These Wasm SIMD ops do not have direct lowerings on x64. These
|
||||
|
@ -172,11 +172,9 @@
|
||||
#define SSE4_UNOP_INSTRUCTION_LIST(V) \
|
||||
V(ptest, 66, 0F, 38, 17) \
|
||||
V(pmovsxbw, 66, 0F, 38, 20) \
|
||||
V(pmovsxbd, 66, 0F, 38, 21) \
|
||||
V(pmovsxwd, 66, 0F, 38, 23) \
|
||||
V(pmovsxdq, 66, 0F, 38, 25) \
|
||||
V(pmovzxbw, 66, 0F, 38, 30) \
|
||||
V(pmovzxbd, 66, 0F, 38, 31) \
|
||||
V(pmovzxwd, 66, 0F, 38, 33) \
|
||||
V(pmovzxdq, 66, 0F, 38, 35)
|
||||
|
||||
|
@ -2136,10 +2136,6 @@ void InstructionSelector::VisitNode(Node* node) {
|
||||
return MarkAsSimd128(node), VisitI32x4TruncSatF64x2SZero(node);
|
||||
case IrOpcode::kI32x4TruncSatF64x2UZero:
|
||||
return MarkAsSimd128(node), VisitI32x4TruncSatF64x2UZero(node);
|
||||
case IrOpcode::kI32x4WidenI8x16S:
|
||||
return MarkAsSimd128(node), VisitI32x4WidenI8x16S(node);
|
||||
case IrOpcode::kI32x4WidenI8x16U:
|
||||
return MarkAsSimd128(node), VisitI32x4WidenI8x16U(node);
|
||||
case IrOpcode::kI16x8Splat:
|
||||
return MarkAsSimd128(node), VisitI16x8Splat(node);
|
||||
case IrOpcode::kI16x8ExtractLaneU:
|
||||
@ -2835,13 +2831,6 @@ void InstructionSelector::VisitI32x4TruncSatF64x2UZero(Node* node) {
|
||||
}
|
||||
#endif //! V8_TARGET_ARCH_X64
|
||||
|
||||
#if !V8_TARGET_ARCH_X64
|
||||
// TODO(v8:11297) Prototype i32x4.widen_i8x16_u
|
||||
void InstructionSelector::VisitI32x4WidenI8x16S(Node* node) { UNIMPLEMENTED(); }
|
||||
|
||||
void InstructionSelector::VisitI32x4WidenI8x16U(Node* node) { UNIMPLEMENTED(); }
|
||||
#endif // !V8_TARGET_ARCH_X64
|
||||
|
||||
void InstructionSelector::VisitFinishRegion(Node* node) { EmitIdentity(node); }
|
||||
|
||||
void InstructionSelector::VisitParameter(Node* node) {
|
||||
|
@ -3779,49 +3779,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
/*is_signed=*/false);
|
||||
break;
|
||||
}
|
||||
case kX64I32x4WidenI8x16S: {
|
||||
uint8_t laneidx = static_cast<uint8_t>(MiscField::decode(opcode));
|
||||
XMMRegister dst = i.OutputSimd128Register();
|
||||
XMMRegister src = i.InputSimd128Register(0);
|
||||
if (laneidx == 0) {
|
||||
__ Pmovsxbd(dst, src);
|
||||
break;
|
||||
}
|
||||
|
||||
ExternalReference mask;
|
||||
if (laneidx == 1) {
|
||||
mask = ExternalReference::address_of_i32x4_widen_i8x16_s1_mask();
|
||||
} else if (laneidx == 2) {
|
||||
mask = ExternalReference::address_of_i32x4_widen_i8x16_s2_mask();
|
||||
} else {
|
||||
DCHECK_EQ(3, laneidx);
|
||||
mask = ExternalReference::address_of_i32x4_widen_i8x16_s3_mask();
|
||||
}
|
||||
__ Pshufb(dst, src, __ ExternalReferenceAsOperand(mask));
|
||||
__ Psrad(dst, byte{24});
|
||||
break;
|
||||
}
|
||||
case kX64I32x4WidenI8x16U: {
|
||||
uint8_t laneidx = static_cast<uint8_t>(MiscField::decode(opcode));
|
||||
XMMRegister dst = i.OutputSimd128Register();
|
||||
XMMRegister src = i.InputSimd128Register(0);
|
||||
if (laneidx == 0) {
|
||||
__ Pmovzxbd(dst, src);
|
||||
break;
|
||||
}
|
||||
|
||||
ExternalReference mask;
|
||||
if (laneidx == 1) {
|
||||
mask = ExternalReference::address_of_i32x4_widen_i8x16_u1_mask();
|
||||
} else if (laneidx == 2) {
|
||||
mask = ExternalReference::address_of_i32x4_widen_i8x16_u2_mask();
|
||||
} else {
|
||||
DCHECK_EQ(3, laneidx);
|
||||
mask = ExternalReference::address_of_i32x4_widen_i8x16_u3_mask();
|
||||
}
|
||||
__ Pshufb(dst, src, __ ExternalReferenceAsOperand(mask));
|
||||
break;
|
||||
}
|
||||
case kX64I64x2SignSelect: {
|
||||
__ Blendvpd(i.OutputSimd128Register(), i.InputSimd128Register(0),
|
||||
i.InputSimd128Register(1), i.InputSimd128Register(2));
|
||||
|
@ -262,8 +262,6 @@ namespace compiler {
|
||||
V(X64I32x4ExtAddPairwiseI16x8U) \
|
||||
V(X64I32x4TruncSatF64x2SZero) \
|
||||
V(X64I32x4TruncSatF64x2UZero) \
|
||||
V(X64I32x4WidenI8x16S) \
|
||||
V(X64I32x4WidenI8x16U) \
|
||||
V(X64I16x8Splat) \
|
||||
V(X64I16x8ExtractLaneS) \
|
||||
V(X64I16x8SConvertI8x16Low) \
|
||||
|
@ -238,8 +238,6 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
||||
case kX64I32x4ExtAddPairwiseI16x8U:
|
||||
case kX64I32x4TruncSatF64x2SZero:
|
||||
case kX64I32x4TruncSatF64x2UZero:
|
||||
case kX64I32x4WidenI8x16S:
|
||||
case kX64I32x4WidenI8x16U:
|
||||
case kX64I16x8Splat:
|
||||
case kX64I16x8ExtractLaneS:
|
||||
case kX64I16x8SConvertI8x16Low:
|
||||
|
@ -3753,26 +3753,6 @@ void InstructionSelector::VisitI32x4TruncSatF64x2UZero(Node* node) {
|
||||
Emit(kX64I32x4TruncSatF64x2UZero, dst, g.UseRegister(node->InputAt(0)));
|
||||
}
|
||||
|
||||
namespace {
|
||||
void VisitWiden(InstructionSelector* selector, Node* node, ArchOpcode opcode) {
|
||||
X64OperandGenerator g(selector);
|
||||
uint8_t laneidx = OpParameter<int8_t>(node->op());
|
||||
InstructionOperand dst = CpuFeatures::IsSupported(AVX)
|
||||
? g.DefineAsRegister(node)
|
||||
: g.DefineSameAsFirst(node);
|
||||
selector->Emit(opcode | MiscField::encode(laneidx), dst,
|
||||
g.UseRegister(node->InputAt(0)));
|
||||
}
|
||||
} // namespace
|
||||
|
||||
void InstructionSelector::VisitI32x4WidenI8x16S(Node* node) {
|
||||
VisitWiden(this, node, kX64I32x4WidenI8x16S);
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitI32x4WidenI8x16U(Node* node) {
|
||||
VisitWiden(this, node, kX64I32x4WidenI8x16U);
|
||||
}
|
||||
|
||||
// static
|
||||
MachineOperatorBuilder::Flags
|
||||
InstructionSelector::SupportedMachineOperatorFlags() {
|
||||
|
@ -1707,18 +1707,6 @@ const Operator* MachineOperatorBuilder::Word64PoisonOnSpeculation() {
|
||||
return GetCachedOperator<Word64PoisonOnSpeculationOperator>();
|
||||
}
|
||||
|
||||
const Operator* MachineOperatorBuilder::I32x4WidenI8x16S(uint8_t laneidx) {
|
||||
return zone_->New<Operator1<uint8_t>>(IrOpcode::kI32x4WidenI8x16S,
|
||||
Operator::kPure, "I32x4WidenI8x16S", 1,
|
||||
0, 0, 1, 0, 0, laneidx);
|
||||
}
|
||||
|
||||
const Operator* MachineOperatorBuilder::I32x4WidenI8x16U(uint8_t laneidx) {
|
||||
return zone_->New<Operator1<uint8_t>>(IrOpcode::kI32x4WidenI8x16U,
|
||||
Operator::kPure, "I32x4WidenI8x16U", 1,
|
||||
0, 0, 1, 0, 0, laneidx);
|
||||
}
|
||||
|
||||
#define EXTRACT_LANE_OP(Type, Sign, lane_count) \
|
||||
const Operator* MachineOperatorBuilder::Type##ExtractLane##Sign( \
|
||||
int32_t lane_index) { \
|
||||
|
@ -724,8 +724,6 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
|
||||
const Operator* I32x4ExtAddPairwiseI16x8U();
|
||||
const Operator* I32x4TruncSatF64x2SZero();
|
||||
const Operator* I32x4TruncSatF64x2UZero();
|
||||
const Operator* I32x4WidenI8x16S(uint8_t laneidx);
|
||||
const Operator* I32x4WidenI8x16U(uint8_t laneidx);
|
||||
|
||||
const Operator* I16x8Splat();
|
||||
const Operator* I16x8ExtractLaneU(int32_t);
|
||||
|
@ -883,8 +883,6 @@
|
||||
V(I32x4ExtAddPairwiseI16x8U) \
|
||||
V(I32x4TruncSatF64x2SZero) \
|
||||
V(I32x4TruncSatF64x2UZero) \
|
||||
V(I32x4WidenI8x16S) \
|
||||
V(I32x4WidenI8x16U) \
|
||||
V(I16x8Splat) \
|
||||
V(I16x8ExtractLaneU) \
|
||||
V(I16x8ExtractLaneS) \
|
||||
|
@ -5235,12 +5235,6 @@ Node* WasmGraphBuilder::SimdLaneOp(wasm::WasmOpcode opcode, uint8_t lane,
|
||||
Node* const* inputs) {
|
||||
has_simd_ = true;
|
||||
switch (opcode) {
|
||||
case wasm::kExprI32x4WidenI8x16S:
|
||||
return graph()->NewNode(mcgraph()->machine()->I32x4WidenI8x16S(lane),
|
||||
inputs[0]);
|
||||
case wasm::kExprI32x4WidenI8x16U:
|
||||
return graph()->NewNode(mcgraph()->machine()->I32x4WidenI8x16U(lane),
|
||||
inputs[0]);
|
||||
case wasm::kExprF64x2ExtractLane:
|
||||
return graph()->NewNode(mcgraph()->machine()->F64x2ExtractLane(lane),
|
||||
inputs[0]);
|
||||
|
@ -2225,6 +2225,8 @@ WASM_SIMD_TEST(I32x4ShrU) {
|
||||
#if V8_TARGET_ARCH_X64
|
||||
// TODO(v8:11297) Prototype i32x4.widen_i8x16_{u,s}
|
||||
WASM_SIMD_TEST_NO_LOWERING(I32x4WidenI8x16U) {
|
||||
// TODO(zhin): Add TurboFan support.
|
||||
if (execution_tier != TestExecutionTier::kInterpreter) return;
|
||||
FLAG_SCOPE(wasm_simd_post_mvp);
|
||||
|
||||
WasmRunner<uint32_t, uint32_t> r(execution_tier, lower_simd);
|
||||
@ -2262,6 +2264,8 @@ WASM_SIMD_TEST_NO_LOWERING(I32x4WidenI8x16U) {
|
||||
}
|
||||
|
||||
WASM_SIMD_TEST_NO_LOWERING(I32x4WidenI8x16S) {
|
||||
// TODO(zhin): Add TurboFan support.
|
||||
if (execution_tier != TestExecutionTier::kInterpreter) return;
|
||||
FLAG_SCOPE(wasm_simd_post_mvp);
|
||||
|
||||
WasmRunner<int32_t, int32_t> r(execution_tier, lower_simd);
|
||||
|
Loading…
Reference in New Issue
Block a user