[wasm] Improve unpacking operations in simd lowering

Add explicit lowered type.
Distinguish operations for low/high.
Shl + Shr is equal to Mask.
Enable WASM_SIMD_COMPILED_AND_LOWERED_TEST for unpacking.

Change-Id: I0492318deb7c0ea0f4b30975ab107eedd446314a
Reviewed-on: https://chromium-review.googlesource.com/1079989
Reviewed-by: Aseem Garg <aseemgarg@chromium.org>
Commit-Queue: Jing Bao <jing.bao@intel.com>
Cr-Commit-Position: refs/heads/master@{#53654}
This commit is contained in:
jing.bao 2018-06-07 09:51:58 +08:00 committed by Commit Bot
parent b365b641c3
commit d4f7d87bdf
3 changed files with 106 additions and 37 deletions

View File

@ -81,6 +81,8 @@ void SimdScalarLowering::LowerGraph() {
V(I32x4ReplaceLane) \
V(I32x4SConvertF32x4) \
V(I32x4UConvertF32x4) \
V(I32x4SConvertI16x8Low) \
V(I32x4SConvertI16x8High) \
V(I32x4Neg) \
V(I32x4Shl) \
V(I32x4ShrS) \
@ -99,6 +101,8 @@ void SimdScalarLowering::LowerGraph() {
V(I32x4LeS) \
V(I32x4GtS) \
V(I32x4GeS) \
V(I32x4UConvertI16x8Low) \
V(I32x4UConvertI16x8High) \
V(I32x4LtU) \
V(I32x4LeU) \
V(I32x4GtU) \
@ -143,6 +147,8 @@ void SimdScalarLowering::LowerGraph() {
V(I16x8Splat) \
V(I16x8ExtractLane) \
V(I16x8ReplaceLane) \
V(I16x8SConvertI8x16Low) \
V(I16x8SConvertI8x16High) \
V(I16x8Neg) \
V(I16x8Shl) \
V(I16x8ShrS) \
@ -155,6 +161,8 @@ void SimdScalarLowering::LowerGraph() {
V(I16x8Mul) \
V(I16x8MinS) \
V(I16x8MaxS) \
V(I16x8UConvertI8x16Low) \
V(I16x8UConvertI8x16High) \
V(I16x8ShrU) \
V(I16x8UConvertI32x4) \
V(I16x8AddSaturateU) \
@ -247,9 +255,20 @@ void SimdScalarLowering::SetLoweredType(Node* node, Node* output) {
break;
}
case IrOpcode::kI8x16SConvertI16x8:
case IrOpcode::kI8x16UConvertI16x8: {
case IrOpcode::kI8x16UConvertI16x8:
case IrOpcode::kI32x4SConvertI16x8Low:
case IrOpcode::kI32x4SConvertI16x8High:
case IrOpcode::kI32x4UConvertI16x8Low:
case IrOpcode::kI32x4UConvertI16x8High: {
replacements_[node->id()].type = SimdType::kInt16x8;
break;
}
case IrOpcode::kI16x8SConvertI8x16Low:
case IrOpcode::kI16x8SConvertI8x16High:
case IrOpcode::kI16x8UConvertI8x16Low:
case IrOpcode::kI16x8UConvertI8x16High: {
replacements_[node->id()].type = SimdType::kInt8x16;
break;
}
FOREACH_FLOAT32X4_TO_INT32X4OPCODE(CASE_STMT)
case IrOpcode::kI32x4SConvertF32x4:
@ -681,31 +700,25 @@ void SimdScalarLowering::LowerConvertFromFloat(Node* node, bool is_signed) {
void SimdScalarLowering::LowerConvertFromInt(Node* node,
SimdType input_rep_type,
SimdType output_rep_type,
bool is_signed) {
bool is_signed, int start_index) {
DCHECK_EQ(1, node->InputCount());
Node** rep = GetReplacementsWithType(node->InputAt(0), input_rep_type);
int32_t shift_val = 0;
int32_t mask = 0;
if (input_rep_type == SimdType::kInt16x8) {
DCHECK_EQ(output_rep_type, SimdType::kInt32x4);
shift_val = kShift16;
mask = kMask16;
} else {
DCHECK_EQ(output_rep_type, SimdType::kInt16x8);
DCHECK_EQ(input_rep_type, SimdType::kInt8x16);
shift_val = kShift8;
mask = kMask8;
}
int num_lanes = NumLanes(output_rep_type);
Node** rep_node = zone()->NewArray<Node*>(num_lanes);
for (int i = 0; i < num_lanes; ++i) {
rep_node[i] = rep[i];
if (!is_signed) {
rep_node[i] =
graph()->NewNode(machine()->Word32Shr(),
graph()->NewNode(machine()->Word32Shl(), rep_node[i],
mcgraph_->Int32Constant(shift_val)),
mcgraph_->Int32Constant(shift_val));
}
is_signed ? rep[i + start_index] : Mask(rep[i + start_index], mask);
}
ReplaceNode(node, rep_node, num_lanes);
@ -1101,24 +1114,44 @@ void SimdScalarLowering::LowerNode(Node* node) {
LowerConvertFromFloat(node, false);
break;
}
case IrOpcode::kI32x4SConvertI16x8Low:
case IrOpcode::kI32x4SConvertI16x8Low: {
LowerConvertFromInt(node, SimdType::kInt16x8, SimdType::kInt32x4, true,
0);
break;
}
case IrOpcode::kI32x4SConvertI16x8High: {
LowerConvertFromInt(node, SimdType::kInt16x8, SimdType::kInt32x4, true);
LowerConvertFromInt(node, SimdType::kInt16x8, SimdType::kInt32x4, true,
4);
break;
}
case IrOpcode::kI32x4UConvertI16x8Low: {
LowerConvertFromInt(node, SimdType::kInt16x8, SimdType::kInt32x4, false,
0);
break;
}
case IrOpcode::kI32x4UConvertI16x8Low:
case IrOpcode::kI32x4UConvertI16x8High: {
LowerConvertFromInt(node, SimdType::kInt16x8, SimdType::kInt32x4, false);
LowerConvertFromInt(node, SimdType::kInt16x8, SimdType::kInt32x4, false,
4);
break;
}
case IrOpcode::kI16x8SConvertI8x16Low: {
LowerConvertFromInt(node, SimdType::kInt8x16, SimdType::kInt16x8, true,
0);
break;
}
case IrOpcode::kI16x8SConvertI8x16Low:
case IrOpcode::kI16x8SConvertI8x16High: {
LowerConvertFromInt(node, SimdType::kInt8x16, SimdType::kInt16x8, true);
LowerConvertFromInt(node, SimdType::kInt8x16, SimdType::kInt16x8, true,
8);
break;
}
case IrOpcode::kI16x8UConvertI8x16Low: {
LowerConvertFromInt(node, SimdType::kInt8x16, SimdType::kInt16x8, false,
0);
break;
}
case IrOpcode::kI16x8UConvertI8x16Low:
case IrOpcode::kI16x8UConvertI8x16High: {
LowerConvertFromInt(node, SimdType::kInt8x16, SimdType::kInt16x8, false);
LowerConvertFromInt(node, SimdType::kInt8x16, SimdType::kInt16x8, false,
8);
break;
}
case IrOpcode::kI16x8SConvertI32x4: {

View File

@ -92,7 +92,8 @@ class SimdScalarLowering {
SimdType type);
void LowerConvertFromFloat(Node* node, bool is_signed);
void LowerConvertFromInt(Node* node, SimdType input_rep_type,
SimdType output_rep_type, bool is_signed);
SimdType output_rep_type, bool is_signed,
int start_index);
void LowerPack(Node* node, SimdType input_rep_type, SimdType output_rep_type,
bool is_signed);
void LowerShiftOp(Node* node, SimdType type);

View File

@ -886,27 +886,43 @@ WASM_SIMD_COMPILED_AND_LOWERED_TEST(I32x4ConvertF32x4) {
}
// Tests both signed and unsigned conversion from I16x8 (unpacking).
WASM_SIMD_COMPILED_TEST(I32x4ConvertI16x8) {
WasmRunner<int32_t, int32_t, int32_t, int32_t> r(execution_mode, lower_simd);
WASM_SIMD_COMPILED_AND_LOWERED_TEST(I32x4ConvertI16x8) {
WasmRunner<int32_t, int32_t, int32_t, int32_t, int32_t> r(execution_mode,
lower_simd);
byte a = 0;
byte unpacked_signed = 1;
byte unpacked_unsigned = 2;
byte zero_value = 3;
byte simd0 = r.AllocateLocal(kWasmS128);
byte simd1 = r.AllocateLocal(kWasmS128);
byte simd2 = r.AllocateLocal(kWasmS128);
byte simd3 = r.AllocateLocal(kWasmS128);
byte simd4 = r.AllocateLocal(kWasmS128);
BUILD(r, WASM_SET_LOCAL(simd0, WASM_SIMD_I16x8_SPLAT(WASM_GET_LOCAL(a))),
WASM_SET_LOCAL(simd1, WASM_SIMD_UNOP(kExprI32x4SConvertI16x8Low,
WASM_SET_LOCAL(
simd0, WASM_SIMD_I16x8_REPLACE_LANE(0, WASM_GET_LOCAL(simd0),
WASM_GET_LOCAL(zero_value))),
WASM_SET_LOCAL(simd1, WASM_SIMD_UNOP(kExprI32x4SConvertI16x8High,
WASM_GET_LOCAL(simd0))),
WASM_SIMD_CHECK_SPLAT4(I32x4, simd1, I32, unpacked_signed),
WASM_SET_LOCAL(simd2, WASM_SIMD_UNOP(kExprI32x4UConvertI16x8High,
WASM_GET_LOCAL(simd0))),
WASM_SIMD_CHECK_SPLAT4(I32x4, simd2, I32, unpacked_unsigned), WASM_ONE);
WASM_SIMD_CHECK_SPLAT4(I32x4, simd2, I32, unpacked_unsigned),
WASM_SET_LOCAL(simd3, WASM_SIMD_UNOP(kExprI32x4SConvertI16x8Low,
WASM_GET_LOCAL(simd0))),
WASM_SIMD_CHECK4(I32x4, simd3, I32, zero_value, unpacked_signed,
unpacked_signed, unpacked_signed),
WASM_SET_LOCAL(simd4, WASM_SIMD_UNOP(kExprI32x4UConvertI16x8Low,
WASM_GET_LOCAL(simd0))),
WASM_SIMD_CHECK4(I32x4, simd4, I32, zero_value, unpacked_unsigned,
unpacked_unsigned, unpacked_unsigned),
WASM_ONE);
FOR_INT16_INPUTS(i) {
int32_t unpacked_signed = static_cast<int32_t>(Widen<int16_t>(*i));
int32_t unpacked_unsigned =
static_cast<int32_t>(UnsignedWiden<int16_t>(*i));
CHECK_EQ(1, r.Call(*i, unpacked_signed, unpacked_unsigned));
CHECK_EQ(1, r.Call(*i, unpacked_signed, unpacked_unsigned, 0));
}
}
#endif // V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS ||
@ -1092,26 +1108,45 @@ WASM_SIMD_COMPILED_AND_LOWERED_TEST(I32x4ShrU) {
#if V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS || \
V8_TARGET_ARCH_MIPS64
// Tests both signed and unsigned conversion from I8x16 (unpacking).
WASM_SIMD_COMPILED_TEST(I16x8ConvertI8x16) {
WasmRunner<int32_t, int32_t, int32_t, int32_t> r(execution_mode, lower_simd);
WASM_SIMD_COMPILED_AND_LOWERED_TEST(I16x8ConvertI8x16) {
WasmRunner<int32_t, int32_t, int32_t, int32_t, int32_t> r(execution_mode,
lower_simd);
byte a = 0;
byte unpacked_signed = 1;
byte unpacked_unsigned = 2;
byte zero_value = 3;
byte simd0 = r.AllocateLocal(kWasmS128);
byte simd1 = r.AllocateLocal(kWasmS128);
byte simd2 = r.AllocateLocal(kWasmS128);
BUILD(r, WASM_SET_LOCAL(simd0, WASM_SIMD_I8x16_SPLAT(WASM_GET_LOCAL(a))),
WASM_SET_LOCAL(simd1, WASM_SIMD_UNOP(kExprI16x8SConvertI8x16Low,
byte simd3 = r.AllocateLocal(kWasmS128);
byte simd4 = r.AllocateLocal(kWasmS128);
BUILD(
r, WASM_SET_LOCAL(simd0, WASM_SIMD_I8x16_SPLAT(WASM_GET_LOCAL(a))),
WASM_SET_LOCAL(simd0,
WASM_SIMD_I8x16_REPLACE_LANE(0, WASM_GET_LOCAL(simd0),
WASM_GET_LOCAL(zero_value))),
WASM_SET_LOCAL(simd1, WASM_SIMD_UNOP(kExprI16x8SConvertI8x16High,
WASM_GET_LOCAL(simd0))),
WASM_SIMD_CHECK_SPLAT8(I16x8, simd1, I32, unpacked_signed),
WASM_SET_LOCAL(simd2, WASM_SIMD_UNOP(kExprI16x8UConvertI8x16High,
WASM_GET_LOCAL(simd0))),
WASM_SIMD_CHECK_SPLAT8(I16x8, simd2, I32, unpacked_unsigned), WASM_ONE);
WASM_SIMD_CHECK_SPLAT8(I16x8, simd2, I32, unpacked_unsigned),
WASM_SET_LOCAL(simd3, WASM_SIMD_UNOP(kExprI16x8SConvertI8x16Low,
WASM_GET_LOCAL(simd0))),
WASM_SIMD_CHECK8(I16x8, simd3, I32, zero_value, unpacked_signed,
unpacked_signed, unpacked_signed, unpacked_signed,
unpacked_signed, unpacked_signed, unpacked_signed),
WASM_SET_LOCAL(simd4, WASM_SIMD_UNOP(kExprI16x8UConvertI8x16Low,
WASM_GET_LOCAL(simd0))),
WASM_SIMD_CHECK8(I16x8, simd4, I32, zero_value, unpacked_unsigned,
unpacked_unsigned, unpacked_unsigned, unpacked_unsigned,
unpacked_unsigned, unpacked_unsigned, unpacked_unsigned),
WASM_ONE);
FOR_INT8_INPUTS(i) {
int32_t unpacked_signed = static_cast<int32_t>(Widen<int8_t>(*i));
int32_t unpacked_unsigned = static_cast<int32_t>(UnsignedWiden<int8_t>(*i));
CHECK_EQ(1, r.Call(*i, unpacked_signed, unpacked_unsigned));
CHECK_EQ(1, r.Call(*i, unpacked_signed, unpacked_unsigned, 0));
}
}
#endif // V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS ||