[wasm-simd][liftoff][arm] Implement s8x16shuffle
Bug: v8:9909 Change-Id: Icb4dd53f02bcd3b38511bb028768d276e3bfebaf Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2251041 Commit-Queue: Zhi An Ng <zhin@chromium.org> Reviewed-by: Clemens Backes <clemensb@chromium.org> Cr-Commit-Position: refs/heads/master@{#68467}
This commit is contained in:
parent
b47c163bda
commit
7c429d96f6
@ -2870,7 +2870,54 @@ void LiftoffAssembler::emit_s8x16_shuffle(LiftoffRegister dst,
|
||||
LiftoffRegister lhs,
|
||||
LiftoffRegister rhs,
|
||||
const uint8_t shuffle[16]) {
|
||||
bailout(kSimd, "s8x16_shuffle");
|
||||
Simd128Register dest = liftoff::GetSimd128Register(dst);
|
||||
Simd128Register src1 = liftoff::GetSimd128Register(lhs);
|
||||
Simd128Register src2 = liftoff::GetSimd128Register(rhs);
|
||||
UseScratchRegisterScope temps(this);
|
||||
Simd128Register scratch = temps.AcquireQ();
|
||||
if ((src1 != src2) && src1.code() + 1 != src2.code()) {
|
||||
// vtbl requires the operands to be consecutive or the same.
|
||||
// If they are the same, we build a smaller list operand (table_size = 2).
|
||||
// If they are not the same, and not consecutive, we move the src1 and src2
|
||||
// to q14 and q15, which will be unused since they are not allocatable in
|
||||
// Liftoff. If the operands are the same, then we build a smaller list
|
||||
// operand below.
|
||||
static_assert(!(kLiftoffAssemblerFpCacheRegs &
|
||||
(d28.bit() | d29.bit() | d30.bit() | d31.bit())),
|
||||
"This only works if q14-q15 (d28-d31) are not used.");
|
||||
vmov(q14, src1);
|
||||
src1 = q14;
|
||||
vmov(q15, src2);
|
||||
src2 = q15;
|
||||
}
|
||||
|
||||
int table_size = src1 == src2 ? 2 : 4;
|
||||
uint32_t mask = table_size == 2 ? 0x0F0F0F0F : 0x1F1F1F1F;
|
||||
|
||||
int scratch_s_base = scratch.code() * 4;
|
||||
for (int j = 0; j < 4; j++) {
|
||||
uint32_t imm = 0;
|
||||
for (int i = 3; i >= 0; i--) {
|
||||
imm = (imm << 8) | shuffle[j * 4 + i];
|
||||
}
|
||||
uint32_t four_lanes = imm;
|
||||
// Ensure indices are in [0,15] if table_size is 2, or [0,31] if 4.
|
||||
four_lanes &= mask;
|
||||
vmov(SwVfpRegister::from_code(scratch_s_base + j),
|
||||
Float32::FromBits(four_lanes));
|
||||
}
|
||||
|
||||
DwVfpRegister table_base = src1.low();
|
||||
NeonListOperand table(table_base, table_size);
|
||||
|
||||
if (dest != src1 && dest != src2) {
|
||||
vtbl(dest.low(), table, scratch.low());
|
||||
vtbl(dest.high(), table, scratch.high());
|
||||
} else {
|
||||
vtbl(scratch.low(), table, scratch.low());
|
||||
vtbl(scratch.high(), table, scratch.high());
|
||||
vmov(dest, scratch);
|
||||
}
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst,
|
||||
|
@ -151,6 +151,41 @@ WASM_SIMD_LIFTOFF_TEST(S8x16Shuffle) {
|
||||
CHECK_EQ(31, ReadLittleEndianValue<byte>(&output[15]));
|
||||
}
|
||||
|
||||
// Exercise logic in Liftoff's implementation of shuffle when inputs to the
|
||||
// shuffle are the same register.
|
||||
WASM_SIMD_LIFTOFF_TEST(S8x16Shuffle_SingleOperand) {
|
||||
WasmRunner<int32_t> r(ExecutionTier::kLiftoff, kNoLowerSimd);
|
||||
byte local0 = r.AllocateLocal(kWasmS128);
|
||||
|
||||
byte* g0 = r.builder().AddGlobal<byte>(kWasmS128);
|
||||
for (int i = 0; i < 16; i++) {
|
||||
WriteLittleEndianValue<byte>(&g0[i], i);
|
||||
}
|
||||
|
||||
byte* output = r.builder().AddGlobal<byte>(kWasmS128);
|
||||
|
||||
// This pattern reverses first operand. 31 should select the last lane of
|
||||
// the second operand, but since the operands are the same, the effect is that
|
||||
// the first operand is reversed.
|
||||
std::array<byte, 16> pattern = {
|
||||
{31, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}};
|
||||
|
||||
// Set up locals so shuffle is called with non-adjacent registers v2 and v0.
|
||||
BUILD(r, WASM_SET_LOCAL(local0, WASM_GET_GLOBAL(0)), WASM_GET_LOCAL(local0),
|
||||
WASM_GET_LOCAL(local0),
|
||||
WASM_SET_GLOBAL(1, WASM_SIMD_S8x16_SHUFFLE_OP(
|
||||
kExprS8x16Shuffle, pattern, WASM_NOP, WASM_NOP)),
|
||||
WASM_ONE);
|
||||
|
||||
r.Call();
|
||||
|
||||
for (int i = 0; i < 16; i++) {
|
||||
// Check that the output is the reverse of input.
|
||||
byte actual = ReadLittleEndianValue<byte>(&output[i]);
|
||||
CHECK_EQ(15 - i, actual);
|
||||
}
|
||||
}
|
||||
|
||||
#undef WASM_SIMD_LIFTOFF_TEST
|
||||
|
||||
} // namespace test_run_wasm_simd_liftoff
|
||||
|
Loading…
Reference in New Issue
Block a user