[wasm] Improve unpacking operations in simd lowering
Add explicit lowered type. Distinguish operations for low/high. Shl + Shr is equal to Mask. Enable WASM_SIMD_COMPILED_AND_LOWERED_TEST for unpacking. Change-Id: I0492318deb7c0ea0f4b30975ab107eedd446314a Reviewed-on: https://chromium-review.googlesource.com/1079989 Reviewed-by: Aseem Garg <aseemgarg@chromium.org> Commit-Queue: Jing Bao <jing.bao@intel.com> Cr-Commit-Position: refs/heads/master@{#53654}
This commit is contained in:
parent
b365b641c3
commit
d4f7d87bdf
@ -81,6 +81,8 @@ void SimdScalarLowering::LowerGraph() {
|
||||
V(I32x4ReplaceLane) \
|
||||
V(I32x4SConvertF32x4) \
|
||||
V(I32x4UConvertF32x4) \
|
||||
V(I32x4SConvertI16x8Low) \
|
||||
V(I32x4SConvertI16x8High) \
|
||||
V(I32x4Neg) \
|
||||
V(I32x4Shl) \
|
||||
V(I32x4ShrS) \
|
||||
@ -99,6 +101,8 @@ void SimdScalarLowering::LowerGraph() {
|
||||
V(I32x4LeS) \
|
||||
V(I32x4GtS) \
|
||||
V(I32x4GeS) \
|
||||
V(I32x4UConvertI16x8Low) \
|
||||
V(I32x4UConvertI16x8High) \
|
||||
V(I32x4LtU) \
|
||||
V(I32x4LeU) \
|
||||
V(I32x4GtU) \
|
||||
@ -143,6 +147,8 @@ void SimdScalarLowering::LowerGraph() {
|
||||
V(I16x8Splat) \
|
||||
V(I16x8ExtractLane) \
|
||||
V(I16x8ReplaceLane) \
|
||||
V(I16x8SConvertI8x16Low) \
|
||||
V(I16x8SConvertI8x16High) \
|
||||
V(I16x8Neg) \
|
||||
V(I16x8Shl) \
|
||||
V(I16x8ShrS) \
|
||||
@ -155,6 +161,8 @@ void SimdScalarLowering::LowerGraph() {
|
||||
V(I16x8Mul) \
|
||||
V(I16x8MinS) \
|
||||
V(I16x8MaxS) \
|
||||
V(I16x8UConvertI8x16Low) \
|
||||
V(I16x8UConvertI8x16High) \
|
||||
V(I16x8ShrU) \
|
||||
V(I16x8UConvertI32x4) \
|
||||
V(I16x8AddSaturateU) \
|
||||
@ -247,9 +255,20 @@ void SimdScalarLowering::SetLoweredType(Node* node, Node* output) {
|
||||
break;
|
||||
}
|
||||
case IrOpcode::kI8x16SConvertI16x8:
|
||||
case IrOpcode::kI8x16UConvertI16x8: {
|
||||
case IrOpcode::kI8x16UConvertI16x8:
|
||||
case IrOpcode::kI32x4SConvertI16x8Low:
|
||||
case IrOpcode::kI32x4SConvertI16x8High:
|
||||
case IrOpcode::kI32x4UConvertI16x8Low:
|
||||
case IrOpcode::kI32x4UConvertI16x8High: {
|
||||
replacements_[node->id()].type = SimdType::kInt16x8;
|
||||
break;
|
||||
}
|
||||
case IrOpcode::kI16x8SConvertI8x16Low:
|
||||
case IrOpcode::kI16x8SConvertI8x16High:
|
||||
case IrOpcode::kI16x8UConvertI8x16Low:
|
||||
case IrOpcode::kI16x8UConvertI8x16High: {
|
||||
replacements_[node->id()].type = SimdType::kInt8x16;
|
||||
break;
|
||||
}
|
||||
FOREACH_FLOAT32X4_TO_INT32X4OPCODE(CASE_STMT)
|
||||
case IrOpcode::kI32x4SConvertF32x4:
|
||||
@ -681,31 +700,25 @@ void SimdScalarLowering::LowerConvertFromFloat(Node* node, bool is_signed) {
|
||||
void SimdScalarLowering::LowerConvertFromInt(Node* node,
|
||||
SimdType input_rep_type,
|
||||
SimdType output_rep_type,
|
||||
bool is_signed) {
|
||||
bool is_signed, int start_index) {
|
||||
DCHECK_EQ(1, node->InputCount());
|
||||
Node** rep = GetReplacementsWithType(node->InputAt(0), input_rep_type);
|
||||
|
||||
int32_t shift_val = 0;
|
||||
int32_t mask = 0;
|
||||
if (input_rep_type == SimdType::kInt16x8) {
|
||||
DCHECK_EQ(output_rep_type, SimdType::kInt32x4);
|
||||
shift_val = kShift16;
|
||||
mask = kMask16;
|
||||
} else {
|
||||
DCHECK_EQ(output_rep_type, SimdType::kInt16x8);
|
||||
DCHECK_EQ(input_rep_type, SimdType::kInt8x16);
|
||||
shift_val = kShift8;
|
||||
mask = kMask8;
|
||||
}
|
||||
|
||||
int num_lanes = NumLanes(output_rep_type);
|
||||
Node** rep_node = zone()->NewArray<Node*>(num_lanes);
|
||||
for (int i = 0; i < num_lanes; ++i) {
|
||||
rep_node[i] = rep[i];
|
||||
if (!is_signed) {
|
||||
rep_node[i] =
|
||||
graph()->NewNode(machine()->Word32Shr(),
|
||||
graph()->NewNode(machine()->Word32Shl(), rep_node[i],
|
||||
mcgraph_->Int32Constant(shift_val)),
|
||||
mcgraph_->Int32Constant(shift_val));
|
||||
}
|
||||
rep_node[i] =
|
||||
is_signed ? rep[i + start_index] : Mask(rep[i + start_index], mask);
|
||||
}
|
||||
|
||||
ReplaceNode(node, rep_node, num_lanes);
|
||||
@ -1101,24 +1114,44 @@ void SimdScalarLowering::LowerNode(Node* node) {
|
||||
LowerConvertFromFloat(node, false);
|
||||
break;
|
||||
}
|
||||
case IrOpcode::kI32x4SConvertI16x8Low:
|
||||
case IrOpcode::kI32x4SConvertI16x8Low: {
|
||||
LowerConvertFromInt(node, SimdType::kInt16x8, SimdType::kInt32x4, true,
|
||||
0);
|
||||
break;
|
||||
}
|
||||
case IrOpcode::kI32x4SConvertI16x8High: {
|
||||
LowerConvertFromInt(node, SimdType::kInt16x8, SimdType::kInt32x4, true);
|
||||
LowerConvertFromInt(node, SimdType::kInt16x8, SimdType::kInt32x4, true,
|
||||
4);
|
||||
break;
|
||||
}
|
||||
case IrOpcode::kI32x4UConvertI16x8Low: {
|
||||
LowerConvertFromInt(node, SimdType::kInt16x8, SimdType::kInt32x4, false,
|
||||
0);
|
||||
break;
|
||||
}
|
||||
case IrOpcode::kI32x4UConvertI16x8Low:
|
||||
case IrOpcode::kI32x4UConvertI16x8High: {
|
||||
LowerConvertFromInt(node, SimdType::kInt16x8, SimdType::kInt32x4, false);
|
||||
LowerConvertFromInt(node, SimdType::kInt16x8, SimdType::kInt32x4, false,
|
||||
4);
|
||||
break;
|
||||
}
|
||||
case IrOpcode::kI16x8SConvertI8x16Low: {
|
||||
LowerConvertFromInt(node, SimdType::kInt8x16, SimdType::kInt16x8, true,
|
||||
0);
|
||||
break;
|
||||
}
|
||||
case IrOpcode::kI16x8SConvertI8x16Low:
|
||||
case IrOpcode::kI16x8SConvertI8x16High: {
|
||||
LowerConvertFromInt(node, SimdType::kInt8x16, SimdType::kInt16x8, true);
|
||||
LowerConvertFromInt(node, SimdType::kInt8x16, SimdType::kInt16x8, true,
|
||||
8);
|
||||
break;
|
||||
}
|
||||
case IrOpcode::kI16x8UConvertI8x16Low: {
|
||||
LowerConvertFromInt(node, SimdType::kInt8x16, SimdType::kInt16x8, false,
|
||||
0);
|
||||
break;
|
||||
}
|
||||
case IrOpcode::kI16x8UConvertI8x16Low:
|
||||
case IrOpcode::kI16x8UConvertI8x16High: {
|
||||
LowerConvertFromInt(node, SimdType::kInt8x16, SimdType::kInt16x8, false);
|
||||
LowerConvertFromInt(node, SimdType::kInt8x16, SimdType::kInt16x8, false,
|
||||
8);
|
||||
break;
|
||||
}
|
||||
case IrOpcode::kI16x8SConvertI32x4: {
|
||||
|
@ -92,7 +92,8 @@ class SimdScalarLowering {
|
||||
SimdType type);
|
||||
void LowerConvertFromFloat(Node* node, bool is_signed);
|
||||
void LowerConvertFromInt(Node* node, SimdType input_rep_type,
|
||||
SimdType output_rep_type, bool is_signed);
|
||||
SimdType output_rep_type, bool is_signed,
|
||||
int start_index);
|
||||
void LowerPack(Node* node, SimdType input_rep_type, SimdType output_rep_type,
|
||||
bool is_signed);
|
||||
void LowerShiftOp(Node* node, SimdType type);
|
||||
|
@ -886,27 +886,43 @@ WASM_SIMD_COMPILED_AND_LOWERED_TEST(I32x4ConvertF32x4) {
|
||||
}
|
||||
|
||||
// Tests both signed and unsigned conversion from I16x8 (unpacking).
|
||||
WASM_SIMD_COMPILED_TEST(I32x4ConvertI16x8) {
|
||||
WasmRunner<int32_t, int32_t, int32_t, int32_t> r(execution_mode, lower_simd);
|
||||
WASM_SIMD_COMPILED_AND_LOWERED_TEST(I32x4ConvertI16x8) {
|
||||
WasmRunner<int32_t, int32_t, int32_t, int32_t, int32_t> r(execution_mode,
|
||||
lower_simd);
|
||||
byte a = 0;
|
||||
byte unpacked_signed = 1;
|
||||
byte unpacked_unsigned = 2;
|
||||
byte zero_value = 3;
|
||||
byte simd0 = r.AllocateLocal(kWasmS128);
|
||||
byte simd1 = r.AllocateLocal(kWasmS128);
|
||||
byte simd2 = r.AllocateLocal(kWasmS128);
|
||||
byte simd3 = r.AllocateLocal(kWasmS128);
|
||||
byte simd4 = r.AllocateLocal(kWasmS128);
|
||||
BUILD(r, WASM_SET_LOCAL(simd0, WASM_SIMD_I16x8_SPLAT(WASM_GET_LOCAL(a))),
|
||||
WASM_SET_LOCAL(simd1, WASM_SIMD_UNOP(kExprI32x4SConvertI16x8Low,
|
||||
WASM_SET_LOCAL(
|
||||
simd0, WASM_SIMD_I16x8_REPLACE_LANE(0, WASM_GET_LOCAL(simd0),
|
||||
WASM_GET_LOCAL(zero_value))),
|
||||
WASM_SET_LOCAL(simd1, WASM_SIMD_UNOP(kExprI32x4SConvertI16x8High,
|
||||
WASM_GET_LOCAL(simd0))),
|
||||
WASM_SIMD_CHECK_SPLAT4(I32x4, simd1, I32, unpacked_signed),
|
||||
WASM_SET_LOCAL(simd2, WASM_SIMD_UNOP(kExprI32x4UConvertI16x8High,
|
||||
WASM_GET_LOCAL(simd0))),
|
||||
WASM_SIMD_CHECK_SPLAT4(I32x4, simd2, I32, unpacked_unsigned), WASM_ONE);
|
||||
WASM_SIMD_CHECK_SPLAT4(I32x4, simd2, I32, unpacked_unsigned),
|
||||
WASM_SET_LOCAL(simd3, WASM_SIMD_UNOP(kExprI32x4SConvertI16x8Low,
|
||||
WASM_GET_LOCAL(simd0))),
|
||||
WASM_SIMD_CHECK4(I32x4, simd3, I32, zero_value, unpacked_signed,
|
||||
unpacked_signed, unpacked_signed),
|
||||
WASM_SET_LOCAL(simd4, WASM_SIMD_UNOP(kExprI32x4UConvertI16x8Low,
|
||||
WASM_GET_LOCAL(simd0))),
|
||||
WASM_SIMD_CHECK4(I32x4, simd4, I32, zero_value, unpacked_unsigned,
|
||||
unpacked_unsigned, unpacked_unsigned),
|
||||
WASM_ONE);
|
||||
|
||||
FOR_INT16_INPUTS(i) {
|
||||
int32_t unpacked_signed = static_cast<int32_t>(Widen<int16_t>(*i));
|
||||
int32_t unpacked_unsigned =
|
||||
static_cast<int32_t>(UnsignedWiden<int16_t>(*i));
|
||||
CHECK_EQ(1, r.Call(*i, unpacked_signed, unpacked_unsigned));
|
||||
CHECK_EQ(1, r.Call(*i, unpacked_signed, unpacked_unsigned, 0));
|
||||
}
|
||||
}
|
||||
#endif // V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS ||
|
||||
@ -1092,26 +1108,45 @@ WASM_SIMD_COMPILED_AND_LOWERED_TEST(I32x4ShrU) {
|
||||
#if V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS || \
|
||||
V8_TARGET_ARCH_MIPS64
|
||||
// Tests both signed and unsigned conversion from I8x16 (unpacking).
|
||||
WASM_SIMD_COMPILED_TEST(I16x8ConvertI8x16) {
|
||||
WasmRunner<int32_t, int32_t, int32_t, int32_t> r(execution_mode, lower_simd);
|
||||
WASM_SIMD_COMPILED_AND_LOWERED_TEST(I16x8ConvertI8x16) {
|
||||
WasmRunner<int32_t, int32_t, int32_t, int32_t, int32_t> r(execution_mode,
|
||||
lower_simd);
|
||||
byte a = 0;
|
||||
byte unpacked_signed = 1;
|
||||
byte unpacked_unsigned = 2;
|
||||
byte zero_value = 3;
|
||||
byte simd0 = r.AllocateLocal(kWasmS128);
|
||||
byte simd1 = r.AllocateLocal(kWasmS128);
|
||||
byte simd2 = r.AllocateLocal(kWasmS128);
|
||||
BUILD(r, WASM_SET_LOCAL(simd0, WASM_SIMD_I8x16_SPLAT(WASM_GET_LOCAL(a))),
|
||||
WASM_SET_LOCAL(simd1, WASM_SIMD_UNOP(kExprI16x8SConvertI8x16Low,
|
||||
WASM_GET_LOCAL(simd0))),
|
||||
WASM_SIMD_CHECK_SPLAT8(I16x8, simd1, I32, unpacked_signed),
|
||||
WASM_SET_LOCAL(simd2, WASM_SIMD_UNOP(kExprI16x8UConvertI8x16High,
|
||||
WASM_GET_LOCAL(simd0))),
|
||||
WASM_SIMD_CHECK_SPLAT8(I16x8, simd2, I32, unpacked_unsigned), WASM_ONE);
|
||||
byte simd3 = r.AllocateLocal(kWasmS128);
|
||||
byte simd4 = r.AllocateLocal(kWasmS128);
|
||||
BUILD(
|
||||
r, WASM_SET_LOCAL(simd0, WASM_SIMD_I8x16_SPLAT(WASM_GET_LOCAL(a))),
|
||||
WASM_SET_LOCAL(simd0,
|
||||
WASM_SIMD_I8x16_REPLACE_LANE(0, WASM_GET_LOCAL(simd0),
|
||||
WASM_GET_LOCAL(zero_value))),
|
||||
WASM_SET_LOCAL(simd1, WASM_SIMD_UNOP(kExprI16x8SConvertI8x16High,
|
||||
WASM_GET_LOCAL(simd0))),
|
||||
WASM_SIMD_CHECK_SPLAT8(I16x8, simd1, I32, unpacked_signed),
|
||||
WASM_SET_LOCAL(simd2, WASM_SIMD_UNOP(kExprI16x8UConvertI8x16High,
|
||||
WASM_GET_LOCAL(simd0))),
|
||||
WASM_SIMD_CHECK_SPLAT8(I16x8, simd2, I32, unpacked_unsigned),
|
||||
WASM_SET_LOCAL(simd3, WASM_SIMD_UNOP(kExprI16x8SConvertI8x16Low,
|
||||
WASM_GET_LOCAL(simd0))),
|
||||
WASM_SIMD_CHECK8(I16x8, simd3, I32, zero_value, unpacked_signed,
|
||||
unpacked_signed, unpacked_signed, unpacked_signed,
|
||||
unpacked_signed, unpacked_signed, unpacked_signed),
|
||||
WASM_SET_LOCAL(simd4, WASM_SIMD_UNOP(kExprI16x8UConvertI8x16Low,
|
||||
WASM_GET_LOCAL(simd0))),
|
||||
WASM_SIMD_CHECK8(I16x8, simd4, I32, zero_value, unpacked_unsigned,
|
||||
unpacked_unsigned, unpacked_unsigned, unpacked_unsigned,
|
||||
unpacked_unsigned, unpacked_unsigned, unpacked_unsigned),
|
||||
WASM_ONE);
|
||||
|
||||
FOR_INT8_INPUTS(i) {
|
||||
int32_t unpacked_signed = static_cast<int32_t>(Widen<int8_t>(*i));
|
||||
int32_t unpacked_unsigned = static_cast<int32_t>(UnsignedWiden<int8_t>(*i));
|
||||
CHECK_EQ(1, r.Call(*i, unpacked_signed, unpacked_unsigned));
|
||||
CHECK_EQ(1, r.Call(*i, unpacked_signed, unpacked_unsigned, 0));
|
||||
}
|
||||
}
|
||||
#endif // V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS ||
|
||||
|
Loading…
Reference in New Issue
Block a user