[wasm-simd][liftoff][arm] Implement s8x16shuffle

Bug: v8:9909
Change-Id: Icb4dd53f02bcd3b38511bb028768d276e3bfebaf
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2251041
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Reviewed-by: Clemens Backes <clemensb@chromium.org>
Cr-Commit-Position: refs/heads/master@{#68467}
This commit is contained in:
Ng Zhi An 2020-06-19 09:58:12 -07:00 committed by Commit Bot
parent b47c163bda
commit 7c429d96f6
2 changed files with 83 additions and 1 deletions

View File

@ -2870,7 +2870,54 @@ void LiftoffAssembler::emit_s8x16_shuffle(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs,
const uint8_t shuffle[16]) {
bailout(kSimd, "s8x16_shuffle");
Simd128Register dest = liftoff::GetSimd128Register(dst);
Simd128Register src1 = liftoff::GetSimd128Register(lhs);
Simd128Register src2 = liftoff::GetSimd128Register(rhs);
UseScratchRegisterScope temps(this);
Simd128Register scratch = temps.AcquireQ();
if ((src1 != src2) && src1.code() + 1 != src2.code()) {
// vtbl requires the operands to be consecutive or the same.
// If they are the same, we build a smaller list operand (table_size = 2).
// If they are not the same, and not consecutive, we move the src1 and src2
// to q14 and q15, which will be unused since they are not allocatable in
// Liftoff. If the operands are the same, then we build a smaller list
// operand below.
static_assert(!(kLiftoffAssemblerFpCacheRegs &
(d28.bit() | d29.bit() | d30.bit() | d31.bit())),
"This only works if q14-q15 (d28-d31) are not used.");
vmov(q14, src1);
src1 = q14;
vmov(q15, src2);
src2 = q15;
}
int table_size = src1 == src2 ? 2 : 4;
uint32_t mask = table_size == 2 ? 0x0F0F0F0F : 0x1F1F1F1F;
int scratch_s_base = scratch.code() * 4;
for (int j = 0; j < 4; j++) {
uint32_t imm = 0;
for (int i = 3; i >= 0; i--) {
imm = (imm << 8) | shuffle[j * 4 + i];
}
uint32_t four_lanes = imm;
// Ensure indices are in [0,15] if table_size is 2, or [0,31] if 4.
four_lanes &= mask;
vmov(SwVfpRegister::from_code(scratch_s_base + j),
Float32::FromBits(four_lanes));
}
DwVfpRegister table_base = src1.low();
NeonListOperand table(table_base, table_size);
if (dest != src1 && dest != src2) {
vtbl(dest.low(), table, scratch.low());
vtbl(dest.high(), table, scratch.high());
} else {
vtbl(scratch.low(), table, scratch.low());
vtbl(scratch.high(), table, scratch.high());
vmov(dest, scratch);
}
}
void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst,

View File

@ -151,6 +151,41 @@ WASM_SIMD_LIFTOFF_TEST(S8x16Shuffle) {
CHECK_EQ(31, ReadLittleEndianValue<byte>(&output[15]));
}
// Exercise logic in Liftoff's implementation of shuffle when inputs to the
// shuffle are the same register.
WASM_SIMD_LIFTOFF_TEST(S8x16Shuffle_SingleOperand) {
WasmRunner<int32_t> r(ExecutionTier::kLiftoff, kNoLowerSimd);
byte local0 = r.AllocateLocal(kWasmS128);
byte* g0 = r.builder().AddGlobal<byte>(kWasmS128);
for (int i = 0; i < 16; i++) {
WriteLittleEndianValue<byte>(&g0[i], i);
}
byte* output = r.builder().AddGlobal<byte>(kWasmS128);
// This pattern reverses first operand. 31 should select the last lane of
// the second operand, but since the operands are the same, the effect is that
// the first operand is reversed.
std::array<byte, 16> pattern = {
{31, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}};
// Set up locals so shuffle is called with non-adjacent registers v2 and v0.
BUILD(r, WASM_SET_LOCAL(local0, WASM_GET_GLOBAL(0)), WASM_GET_LOCAL(local0),
WASM_GET_LOCAL(local0),
WASM_SET_GLOBAL(1, WASM_SIMD_S8x16_SHUFFLE_OP(
kExprS8x16Shuffle, pattern, WASM_NOP, WASM_NOP)),
WASM_ONE);
r.Call();
for (int i = 0; i < 16; i++) {
// Check that the output is the reverse of input.
byte actual = ReadLittleEndianValue<byte>(&output[i]);
CHECK_EQ(15 - i, actual);
}
}
#undef WASM_SIMD_LIFTOFF_TEST
} // namespace test_run_wasm_simd_liftoff