From a1d39bbaed912c10d76f50744fed3b8b494bc686 Mon Sep 17 00:00:00 2001 From: Zhi An Ng Date: Fri, 15 Jan 2021 02:35:41 +0000 Subject: [PATCH] [wasm-simd] Prototype i32x4.widen_i8x16_{s,u} This prototypes i32x4.widen_i8x16_s and i32x4.widen_i8x16_u for the interpreter. This is the first instruction of its kind, a post-mvp, unary operation that takes one immediate. Which is why there are more changes to the decoder than usual. Bug: v8:11297 Change-Id: Ib5c58965e0cba8d7a395b0dc57673110bc60e87c Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2617385 Reviewed-by: Deepti Gandluri Commit-Queue: Zhi An Ng Cr-Commit-Position: refs/heads/master@{#72170} --- src/wasm/function-body-decoder-impl.h | 7 +++ src/wasm/wasm-opcodes-inl.h | 4 ++ src/wasm/wasm-opcodes.h | 7 ++- test/cctest/wasm/test-run-wasm-simd.cc | 80 ++++++++++++++++++++++++++ test/common/wasm/wasm-interpreter.cc | 20 +++++++ 5 files changed, 117 insertions(+), 1 deletion(-) diff --git a/src/wasm/function-body-decoder-impl.h b/src/wasm/function-body-decoder-impl.h index 2b03664c0d..0e616f3c36 100644 --- a/src/wasm/function-body-decoder-impl.h +++ b/src/wasm/function-body-decoder-impl.h @@ -1453,6 +1453,8 @@ class WasmDecoder : public Decoder { case kExprI32x4ReplaceLane: case kExprS128Load32Lane: case kExprS128Store32Lane: + case kExprI32x4WidenI8x16S: + case kExprI32x4WidenI8x16U: num_lanes = 4; break; case kExprI16x8ExtractLaneS: @@ -2029,6 +2031,7 @@ class WasmDecoder : public Decoder { opcode = this->read_prefixed_opcode(pc); switch (opcode) { FOREACH_SIMD_1_OPERAND_1_PARAM_OPCODE(DECLARE_OPCODE_CASE) + FOREACH_SIMD_POST_MVP_ONE_OPERAND_OPCODE(DECLARE_OPCODE_CASE) return {1, 1}; FOREACH_SIMD_1_OPERAND_2_PARAM_OPCODE(DECLARE_OPCODE_CASE) FOREACH_SIMD_MASK_OPERAND_OPCODE(DECLARE_OPCODE_CASE) @@ -3746,6 +3749,10 @@ class WasmFullDecoder : public WasmDecoder { case kExprPrefetchNT: { return SimdPrefetch(opcode_length, /*temporal=*/false); } + case kExprI32x4WidenI8x16S: + case kExprI32x4WidenI8x16U: { + return SimdExtractLane(opcode, kWasmS128, opcode_length); + } default: { const FunctionSig* sig = WasmOpcodes::Signature(opcode); if (!VALIDATE(sig != nullptr)) { diff --git a/src/wasm/wasm-opcodes-inl.h b/src/wasm/wasm-opcodes-inl.h index 0f9723cddf..e90ff2c93c 100644 --- a/src/wasm/wasm-opcodes-inl.h +++ b/src/wasm/wasm-opcodes-inl.h @@ -365,6 +365,9 @@ constexpr const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) { CASE_OP(PrefetchT, "prefetch_t") CASE_OP(PrefetchNT, "prefetch_nt") + CASE_I32x4_OP(WidenI8x16S, "widen_i8x16_s") + CASE_I32x4_OP(WidenI8x16U, "widen_i8x16_u") + // Atomic operations. CASE_OP(AtomicNotify, "atomic.notify") CASE_INT_OP(AtomicWait, "atomic.wait") @@ -534,6 +537,7 @@ constexpr bool WasmOpcodes::IsSimdPostMvpOpcode(WasmOpcode opcode) { #define CHECK_OPCODE(name, opcode, _) case kExpr##name: FOREACH_SIMD_POST_MVP_OPCODE(CHECK_OPCODE) FOREACH_SIMD_POST_MVP_MEM_OPCODE(CHECK_OPCODE) + FOREACH_SIMD_POST_MVP_ONE_OPERAND_OPCODE(CHECK_OPCODE) #undef CHECK_OPCODE return true; default: diff --git a/src/wasm/wasm-opcodes.h b/src/wasm/wasm-opcodes.h index 172138229f..5f053863db 100644 --- a/src/wasm/wasm-opcodes.h +++ b/src/wasm/wasm-opcodes.h @@ -516,6 +516,10 @@ bool V8_EXPORT_PRIVATE IsJSCompatibleSignature(const FunctionSig* sig, V(F32x4RecipApprox, 0xfdb3, s_s) \ V(F32x4RecipSqrtApprox, 0xfdbc, s_s) +#define FOREACH_SIMD_POST_MVP_ONE_OPERAND_OPCODE(V) \ + V(I32x4WidenI8x16S, 0xfd67, s_s) \ + V(I32x4WidenI8x16U, 0xfd68, s_s) + #define FOREACH_SIMD_1_OPERAND_1_PARAM_OPCODE(V) \ V(I8x16ExtractLaneS, 0xfd15, _) \ V(I8x16ExtractLaneU, 0xfd16, _) \ @@ -540,7 +544,8 @@ bool V8_EXPORT_PRIVATE IsJSCompatibleSignature(const FunctionSig* sig, #define FOREACH_SIMD_1_OPERAND_OPCODE(V) \ FOREACH_SIMD_1_OPERAND_1_PARAM_OPCODE(V) \ - FOREACH_SIMD_1_OPERAND_2_PARAM_OPCODE(V) + FOREACH_SIMD_1_OPERAND_2_PARAM_OPCODE(V) \ + FOREACH_SIMD_POST_MVP_ONE_OPERAND_OPCODE(V) #define FOREACH_SIMD_OPCODE(V) \ FOREACH_SIMD_0_OPERAND_OPCODE(V) \ diff --git a/test/cctest/wasm/test-run-wasm-simd.cc b/test/cctest/wasm/test-run-wasm-simd.cc index 17b0fce65e..c3c00d539a 100644 --- a/test/cctest/wasm/test-run-wasm-simd.cc +++ b/test/cctest/wasm/test-run-wasm-simd.cc @@ -2103,6 +2103,86 @@ WASM_SIMD_TEST(I32x4ShrU) { LogicalShiftRight); } +#if V8_TARGET_ARCH_X64 +// TODO(v8:11297) Prototype i32x4.widen_i8x16_{u,s} +WASM_SIMD_TEST_NO_LOWERING(I32x4WidenI8x16U) { + // TODO(zhin): Add TurboFan support. + if (execution_tier != TestExecutionTier::kInterpreter) return; + FLAG_SCOPE(wasm_simd_post_mvp); + + WasmRunner r(execution_tier, lower_simd); + uint32_t* g0 = r.builder().AddGlobal(kWasmS128); + uint32_t* g1 = r.builder().AddGlobal(kWasmS128); + uint32_t* g2 = r.builder().AddGlobal(kWasmS128); + uint32_t* g3 = r.builder().AddGlobal(kWasmS128); + byte arg = 0; + +#define COPY_PARAM_TO_I32X4_LANE(idx) \ + WASM_SIMD_I32x4_REPLACE_LANE(idx, WASM_GLOBAL_GET(idx), WASM_LOCAL_GET(arg)) +#define WIDEN(idx) WASM_SIMD_OP(kExprI32x4WidenI8x16U), idx, kExprGlobalSet, idx + BUILD(r, + // g0 = widen_u([arg, 0, 0, 0], 0) + COPY_PARAM_TO_I32X4_LANE(0), WIDEN(0), + // g1 = widen_u([0, arg, 0, 0], 1) + COPY_PARAM_TO_I32X4_LANE(1), WIDEN(1), + // g2 = widen_u([0, 0, arg, 0], 2) + COPY_PARAM_TO_I32X4_LANE(2), WIDEN(2), + // g3 = widen_u([0, 0, 0, arg], 3) + COPY_PARAM_TO_I32X4_LANE(3), WIDEN(3), WASM_ONE); +#undef WIDEN +#undef COPY_PARAM_TO_I32X4_LANE + + FOR_UINT8_INPUTS(x) { + r.Call(x << 24 | x << 16 | x << 8 | x); + uint32_t expected = static_cast(x); + for (int i = 0; i < 4; i++) { + CHECK_EQ(expected, ReadLittleEndianValue(&g0[i])); + CHECK_EQ(expected, ReadLittleEndianValue(&g1[i])); + CHECK_EQ(expected, ReadLittleEndianValue(&g2[i])); + CHECK_EQ(expected, ReadLittleEndianValue(&g3[i])); + } + } +} + +WASM_SIMD_TEST_NO_LOWERING(I32x4WidenI8x16S) { + // TODO(zhin): Add TurboFan support. + if (execution_tier != TestExecutionTier::kInterpreter) return; + FLAG_SCOPE(wasm_simd_post_mvp); + + WasmRunner r(execution_tier, lower_simd); + int32_t* g0 = r.builder().AddGlobal(kWasmS128); + int32_t* g1 = r.builder().AddGlobal(kWasmS128); + int32_t* g2 = r.builder().AddGlobal(kWasmS128); + int32_t* g3 = r.builder().AddGlobal(kWasmS128); + byte arg = 0; +#define COPY_PARAM_TO_I32X4_LANE(idx) \ + WASM_SIMD_I32x4_REPLACE_LANE(idx, WASM_GLOBAL_GET(idx), WASM_LOCAL_GET(arg)) +#define WIDEN(idx) WASM_SIMD_OP(kExprI32x4WidenI8x16S), idx, kExprGlobalSet, idx + BUILD(r, + // g0 = widen_s([arg, 0, 0, 0], 0) + COPY_PARAM_TO_I32X4_LANE(0), WIDEN(0), + // g1 = widen_s([0, arg, 0, 0], 1) + COPY_PARAM_TO_I32X4_LANE(1), WIDEN(1), + // g2 = widen_s([0, 0, arg, 0], 2) + COPY_PARAM_TO_I32X4_LANE(2), WIDEN(2), + // g3 = widen_s([0, 0, 0, arg], 3) + COPY_PARAM_TO_I32X4_LANE(3), WIDEN(3), WASM_ONE); +#undef WIDEN +#undef COPY_PARAM_TO_I32X4_LANE + + FOR_UINT8_INPUTS(x) { + r.Call(x << 24 | x << 16 | x << 8 | x); + int32_t expected_signed = static_cast(bit_cast((x))); + for (int i = 0; i < 4; i++) { + CHECK_EQ(expected_signed, ReadLittleEndianValue(&g0[i])); + CHECK_EQ(expected_signed, ReadLittleEndianValue(&g1[i])); + CHECK_EQ(expected_signed, ReadLittleEndianValue(&g2[i])); + CHECK_EQ(expected_signed, ReadLittleEndianValue(&g3[i])); + } + } +} +#endif // V8_TARGET_ARCH_X64 + // Tests both signed and unsigned conversion from I8x16 (unpacking). WASM_SIMD_TEST(I16x8ConvertI8x16) { WasmRunner r(execution_tier, lower_simd); diff --git a/test/common/wasm/wasm-interpreter.cc b/test/common/wasm/wasm-interpreter.cc index 3b68145d29..fd78a03293 100644 --- a/test/common/wasm/wasm-interpreter.cc +++ b/test/common/wasm/wasm-interpreter.cc @@ -28,6 +28,7 @@ #include "src/wasm/wasm-module.h" #include "src/wasm/wasm-objects-inl.h" #include "src/wasm/wasm-opcodes-inl.h" +#include "src/wasm/wasm-opcodes.h" #include "src/zone/accounting-allocator.h" #include "src/zone/zone-containers.h" @@ -2098,6 +2099,25 @@ class WasmInterpreterInternals { bool ExecuteSimdOp(WasmOpcode opcode, Decoder* decoder, InterpreterCode* code, pc_t pc, int* const len) { switch (opcode) { +#define WIDEN_CASE(op, expr) \ + case op: { \ + uint8_t lane = \ + decoder->read_u8(code->at(pc + *len), "lane"); \ + *len += 1; \ + int16 s = Pop().to_s128().to_i8x16(); \ + int4 r; \ + for (int i = 0; i < 4; i++) { \ + auto x = s.val[LANE(lane * 4 + i, s)]; \ + r.val[LANE(i, r)] = expr; \ + } \ + Push(WasmValue(Simd128(r))); \ + return true; \ + } + WIDEN_CASE(kExprI32x4WidenI8x16S, static_cast(x)) + WIDEN_CASE(kExprI32x4WidenI8x16U, + static_cast(bit_cast(x))) +#undef WIDEN_CASE + #define SPLAT_CASE(format, sType, valType, num) \ case kExpr##format##Splat: { \ WasmValue val = Pop(); \