From 7c429d96f6d92fbd5fd668ab4e76af6c15070e9c Mon Sep 17 00:00:00 2001 From: Ng Zhi An Date: Fri, 19 Jun 2020 09:58:12 -0700 Subject: [PATCH] [wasm-simd][liftoff][arm] Implement s8x16shuffle Bug: v8:9909 Change-Id: Icb4dd53f02bcd3b38511bb028768d276e3bfebaf Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2251041 Commit-Queue: Zhi An Ng Reviewed-by: Clemens Backes Cr-Commit-Position: refs/heads/master@{#68467} --- src/wasm/baseline/arm/liftoff-assembler-arm.h | 49 ++++++++++++++++++- .../cctest/wasm/test-run-wasm-simd-liftoff.cc | 35 +++++++++++++ 2 files changed, 83 insertions(+), 1 deletion(-) diff --git a/src/wasm/baseline/arm/liftoff-assembler-arm.h b/src/wasm/baseline/arm/liftoff-assembler-arm.h index 1844fa8ca7..ed3baa452e 100644 --- a/src/wasm/baseline/arm/liftoff-assembler-arm.h +++ b/src/wasm/baseline/arm/liftoff-assembler-arm.h @@ -2870,7 +2870,54 @@ void LiftoffAssembler::emit_s8x16_shuffle(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs, const uint8_t shuffle[16]) { - bailout(kSimd, "s8x16_shuffle"); + Simd128Register dest = liftoff::GetSimd128Register(dst); + Simd128Register src1 = liftoff::GetSimd128Register(lhs); + Simd128Register src2 = liftoff::GetSimd128Register(rhs); + UseScratchRegisterScope temps(this); + Simd128Register scratch = temps.AcquireQ(); + if ((src1 != src2) && src1.code() + 1 != src2.code()) { + // vtbl requires the operands to be consecutive or the same. + // If they are the same, we build a smaller list operand (table_size = 2). + // If they are not the same, and not consecutive, we move the src1 and src2 + // to q14 and q15, which will be unused since they are not allocatable in + // Liftoff. If the operands are the same, then we build a smaller list + // operand below. + static_assert(!(kLiftoffAssemblerFpCacheRegs & + (d28.bit() | d29.bit() | d30.bit() | d31.bit())), + "This only works if q14-q15 (d28-d31) are not used."); + vmov(q14, src1); + src1 = q14; + vmov(q15, src2); + src2 = q15; + } + + int table_size = src1 == src2 ? 2 : 4; + uint32_t mask = table_size == 2 ? 0x0F0F0F0F : 0x1F1F1F1F; + + int scratch_s_base = scratch.code() * 4; + for (int j = 0; j < 4; j++) { + uint32_t imm = 0; + for (int i = 3; i >= 0; i--) { + imm = (imm << 8) | shuffle[j * 4 + i]; + } + uint32_t four_lanes = imm; + // Ensure indices are in [0,15] if table_size is 2, or [0,31] if 4. + four_lanes &= mask; + vmov(SwVfpRegister::from_code(scratch_s_base + j), + Float32::FromBits(four_lanes)); + } + + DwVfpRegister table_base = src1.low(); + NeonListOperand table(table_base, table_size); + + if (dest != src1 && dest != src2) { + vtbl(dest.low(), table, scratch.low()); + vtbl(dest.high(), table, scratch.high()); + } else { + vtbl(scratch.low(), table, scratch.low()); + vtbl(scratch.high(), table, scratch.high()); + vmov(dest, scratch); + } } void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst, diff --git a/test/cctest/wasm/test-run-wasm-simd-liftoff.cc b/test/cctest/wasm/test-run-wasm-simd-liftoff.cc index 140f793bdf..1e6f0c738e 100644 --- a/test/cctest/wasm/test-run-wasm-simd-liftoff.cc +++ b/test/cctest/wasm/test-run-wasm-simd-liftoff.cc @@ -151,6 +151,41 @@ WASM_SIMD_LIFTOFF_TEST(S8x16Shuffle) { CHECK_EQ(31, ReadLittleEndianValue(&output[15])); } +// Exercise logic in Liftoff's implementation of shuffle when inputs to the +// shuffle are the same register. +WASM_SIMD_LIFTOFF_TEST(S8x16Shuffle_SingleOperand) { + WasmRunner r(ExecutionTier::kLiftoff, kNoLowerSimd); + byte local0 = r.AllocateLocal(kWasmS128); + + byte* g0 = r.builder().AddGlobal(kWasmS128); + for (int i = 0; i < 16; i++) { + WriteLittleEndianValue(&g0[i], i); + } + + byte* output = r.builder().AddGlobal(kWasmS128); + + // This pattern reverses first operand. 31 should select the last lane of + // the second operand, but since the operands are the same, the effect is that + // the first operand is reversed. + std::array pattern = { + {31, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}}; + + // Set up locals so shuffle is called with non-adjacent registers v2 and v0. + BUILD(r, WASM_SET_LOCAL(local0, WASM_GET_GLOBAL(0)), WASM_GET_LOCAL(local0), + WASM_GET_LOCAL(local0), + WASM_SET_GLOBAL(1, WASM_SIMD_S8x16_SHUFFLE_OP( + kExprS8x16Shuffle, pattern, WASM_NOP, WASM_NOP)), + WASM_ONE); + + r.Call(); + + for (int i = 0; i < 16; i++) { + // Check that the output is the reverse of input. + byte actual = ReadLittleEndianValue(&output[i]); + CHECK_EQ(15 - i, actual); + } +} + #undef WASM_SIMD_LIFTOFF_TEST } // namespace test_run_wasm_simd_liftoff