From d4f7d87bdf22d79b6a66e8a11903ab1400d59541 Mon Sep 17 00:00:00 2001 From: "jing.bao" Date: Thu, 7 Jun 2018 09:51:58 +0800 Subject: [PATCH] [wasm] Improve unpacking operations in simd lowering Add explicit lowered type. Distinguish operations for low/high. Shl + Shr is equal to Mask. Enable WASM_SIMD_COMPILED_AND_LOWERED_TEST for unpacking. Change-Id: I0492318deb7c0ea0f4b30975ab107eedd446314a Reviewed-on: https://chromium-review.googlesource.com/1079989 Reviewed-by: Aseem Garg Commit-Queue: Jing Bao Cr-Commit-Position: refs/heads/master@{#53654} --- src/compiler/simd-scalar-lowering.cc | 75 ++++++++++++++++++-------- src/compiler/simd-scalar-lowering.h | 3 +- test/cctest/wasm/test-run-wasm-simd.cc | 65 ++++++++++++++++------ 3 files changed, 106 insertions(+), 37 deletions(-) diff --git a/src/compiler/simd-scalar-lowering.cc b/src/compiler/simd-scalar-lowering.cc index 6192375826..ce6b8b6f42 100644 --- a/src/compiler/simd-scalar-lowering.cc +++ b/src/compiler/simd-scalar-lowering.cc @@ -81,6 +81,8 @@ void SimdScalarLowering::LowerGraph() { V(I32x4ReplaceLane) \ V(I32x4SConvertF32x4) \ V(I32x4UConvertF32x4) \ + V(I32x4SConvertI16x8Low) \ + V(I32x4SConvertI16x8High) \ V(I32x4Neg) \ V(I32x4Shl) \ V(I32x4ShrS) \ @@ -99,6 +101,8 @@ void SimdScalarLowering::LowerGraph() { V(I32x4LeS) \ V(I32x4GtS) \ V(I32x4GeS) \ + V(I32x4UConvertI16x8Low) \ + V(I32x4UConvertI16x8High) \ V(I32x4LtU) \ V(I32x4LeU) \ V(I32x4GtU) \ @@ -143,6 +147,8 @@ void SimdScalarLowering::LowerGraph() { V(I16x8Splat) \ V(I16x8ExtractLane) \ V(I16x8ReplaceLane) \ + V(I16x8SConvertI8x16Low) \ + V(I16x8SConvertI8x16High) \ V(I16x8Neg) \ V(I16x8Shl) \ V(I16x8ShrS) \ @@ -155,6 +161,8 @@ void SimdScalarLowering::LowerGraph() { V(I16x8Mul) \ V(I16x8MinS) \ V(I16x8MaxS) \ + V(I16x8UConvertI8x16Low) \ + V(I16x8UConvertI8x16High) \ V(I16x8ShrU) \ V(I16x8UConvertI32x4) \ V(I16x8AddSaturateU) \ @@ -247,9 +255,20 @@ void SimdScalarLowering::SetLoweredType(Node* node, Node* output) { break; } case IrOpcode::kI8x16SConvertI16x8: - case IrOpcode::kI8x16UConvertI16x8: { + case IrOpcode::kI8x16UConvertI16x8: + case IrOpcode::kI32x4SConvertI16x8Low: + case IrOpcode::kI32x4SConvertI16x8High: + case IrOpcode::kI32x4UConvertI16x8Low: + case IrOpcode::kI32x4UConvertI16x8High: { replacements_[node->id()].type = SimdType::kInt16x8; break; + } + case IrOpcode::kI16x8SConvertI8x16Low: + case IrOpcode::kI16x8SConvertI8x16High: + case IrOpcode::kI16x8UConvertI8x16Low: + case IrOpcode::kI16x8UConvertI8x16High: { + replacements_[node->id()].type = SimdType::kInt8x16; + break; } FOREACH_FLOAT32X4_TO_INT32X4OPCODE(CASE_STMT) case IrOpcode::kI32x4SConvertF32x4: @@ -681,31 +700,25 @@ void SimdScalarLowering::LowerConvertFromFloat(Node* node, bool is_signed) { void SimdScalarLowering::LowerConvertFromInt(Node* node, SimdType input_rep_type, SimdType output_rep_type, - bool is_signed) { + bool is_signed, int start_index) { DCHECK_EQ(1, node->InputCount()); Node** rep = GetReplacementsWithType(node->InputAt(0), input_rep_type); - int32_t shift_val = 0; + int32_t mask = 0; if (input_rep_type == SimdType::kInt16x8) { DCHECK_EQ(output_rep_type, SimdType::kInt32x4); - shift_val = kShift16; + mask = kMask16; } else { DCHECK_EQ(output_rep_type, SimdType::kInt16x8); DCHECK_EQ(input_rep_type, SimdType::kInt8x16); - shift_val = kShift8; + mask = kMask8; } int num_lanes = NumLanes(output_rep_type); Node** rep_node = zone()->NewArray(num_lanes); for (int i = 0; i < num_lanes; ++i) { - rep_node[i] = rep[i]; - if (!is_signed) { - rep_node[i] = - graph()->NewNode(machine()->Word32Shr(), - graph()->NewNode(machine()->Word32Shl(), rep_node[i], - mcgraph_->Int32Constant(shift_val)), - mcgraph_->Int32Constant(shift_val)); - } + rep_node[i] = + is_signed ? rep[i + start_index] : Mask(rep[i + start_index], mask); } ReplaceNode(node, rep_node, num_lanes); @@ -1101,24 +1114,44 @@ void SimdScalarLowering::LowerNode(Node* node) { LowerConvertFromFloat(node, false); break; } - case IrOpcode::kI32x4SConvertI16x8Low: + case IrOpcode::kI32x4SConvertI16x8Low: { + LowerConvertFromInt(node, SimdType::kInt16x8, SimdType::kInt32x4, true, + 0); + break; + } case IrOpcode::kI32x4SConvertI16x8High: { - LowerConvertFromInt(node, SimdType::kInt16x8, SimdType::kInt32x4, true); + LowerConvertFromInt(node, SimdType::kInt16x8, SimdType::kInt32x4, true, + 4); + break; + } + case IrOpcode::kI32x4UConvertI16x8Low: { + LowerConvertFromInt(node, SimdType::kInt16x8, SimdType::kInt32x4, false, + 0); break; } - case IrOpcode::kI32x4UConvertI16x8Low: case IrOpcode::kI32x4UConvertI16x8High: { - LowerConvertFromInt(node, SimdType::kInt16x8, SimdType::kInt32x4, false); + LowerConvertFromInt(node, SimdType::kInt16x8, SimdType::kInt32x4, false, + 4); + break; + } + case IrOpcode::kI16x8SConvertI8x16Low: { + LowerConvertFromInt(node, SimdType::kInt8x16, SimdType::kInt16x8, true, + 0); break; } - case IrOpcode::kI16x8SConvertI8x16Low: case IrOpcode::kI16x8SConvertI8x16High: { - LowerConvertFromInt(node, SimdType::kInt8x16, SimdType::kInt16x8, true); + LowerConvertFromInt(node, SimdType::kInt8x16, SimdType::kInt16x8, true, + 8); + break; + } + case IrOpcode::kI16x8UConvertI8x16Low: { + LowerConvertFromInt(node, SimdType::kInt8x16, SimdType::kInt16x8, false, + 0); break; } - case IrOpcode::kI16x8UConvertI8x16Low: case IrOpcode::kI16x8UConvertI8x16High: { - LowerConvertFromInt(node, SimdType::kInt8x16, SimdType::kInt16x8, false); + LowerConvertFromInt(node, SimdType::kInt8x16, SimdType::kInt16x8, false, + 8); break; } case IrOpcode::kI16x8SConvertI32x4: { diff --git a/src/compiler/simd-scalar-lowering.h b/src/compiler/simd-scalar-lowering.h index 0bbac96906..bbf15137d0 100644 --- a/src/compiler/simd-scalar-lowering.h +++ b/src/compiler/simd-scalar-lowering.h @@ -92,7 +92,8 @@ class SimdScalarLowering { SimdType type); void LowerConvertFromFloat(Node* node, bool is_signed); void LowerConvertFromInt(Node* node, SimdType input_rep_type, - SimdType output_rep_type, bool is_signed); + SimdType output_rep_type, bool is_signed, + int start_index); void LowerPack(Node* node, SimdType input_rep_type, SimdType output_rep_type, bool is_signed); void LowerShiftOp(Node* node, SimdType type); diff --git a/test/cctest/wasm/test-run-wasm-simd.cc b/test/cctest/wasm/test-run-wasm-simd.cc index 4d0af0386e..5cef9b25b0 100644 --- a/test/cctest/wasm/test-run-wasm-simd.cc +++ b/test/cctest/wasm/test-run-wasm-simd.cc @@ -886,27 +886,43 @@ WASM_SIMD_COMPILED_AND_LOWERED_TEST(I32x4ConvertF32x4) { } // Tests both signed and unsigned conversion from I16x8 (unpacking). -WASM_SIMD_COMPILED_TEST(I32x4ConvertI16x8) { - WasmRunner r(execution_mode, lower_simd); +WASM_SIMD_COMPILED_AND_LOWERED_TEST(I32x4ConvertI16x8) { + WasmRunner r(execution_mode, + lower_simd); byte a = 0; byte unpacked_signed = 1; byte unpacked_unsigned = 2; + byte zero_value = 3; byte simd0 = r.AllocateLocal(kWasmS128); byte simd1 = r.AllocateLocal(kWasmS128); byte simd2 = r.AllocateLocal(kWasmS128); + byte simd3 = r.AllocateLocal(kWasmS128); + byte simd4 = r.AllocateLocal(kWasmS128); BUILD(r, WASM_SET_LOCAL(simd0, WASM_SIMD_I16x8_SPLAT(WASM_GET_LOCAL(a))), - WASM_SET_LOCAL(simd1, WASM_SIMD_UNOP(kExprI32x4SConvertI16x8Low, + WASM_SET_LOCAL( + simd0, WASM_SIMD_I16x8_REPLACE_LANE(0, WASM_GET_LOCAL(simd0), + WASM_GET_LOCAL(zero_value))), + WASM_SET_LOCAL(simd1, WASM_SIMD_UNOP(kExprI32x4SConvertI16x8High, WASM_GET_LOCAL(simd0))), WASM_SIMD_CHECK_SPLAT4(I32x4, simd1, I32, unpacked_signed), WASM_SET_LOCAL(simd2, WASM_SIMD_UNOP(kExprI32x4UConvertI16x8High, WASM_GET_LOCAL(simd0))), - WASM_SIMD_CHECK_SPLAT4(I32x4, simd2, I32, unpacked_unsigned), WASM_ONE); + WASM_SIMD_CHECK_SPLAT4(I32x4, simd2, I32, unpacked_unsigned), + WASM_SET_LOCAL(simd3, WASM_SIMD_UNOP(kExprI32x4SConvertI16x8Low, + WASM_GET_LOCAL(simd0))), + WASM_SIMD_CHECK4(I32x4, simd3, I32, zero_value, unpacked_signed, + unpacked_signed, unpacked_signed), + WASM_SET_LOCAL(simd4, WASM_SIMD_UNOP(kExprI32x4UConvertI16x8Low, + WASM_GET_LOCAL(simd0))), + WASM_SIMD_CHECK4(I32x4, simd4, I32, zero_value, unpacked_unsigned, + unpacked_unsigned, unpacked_unsigned), + WASM_ONE); FOR_INT16_INPUTS(i) { int32_t unpacked_signed = static_cast(Widen(*i)); int32_t unpacked_unsigned = static_cast(UnsignedWiden(*i)); - CHECK_EQ(1, r.Call(*i, unpacked_signed, unpacked_unsigned)); + CHECK_EQ(1, r.Call(*i, unpacked_signed, unpacked_unsigned, 0)); } } #endif // V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS || @@ -1092,26 +1108,45 @@ WASM_SIMD_COMPILED_AND_LOWERED_TEST(I32x4ShrU) { #if V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS || \ V8_TARGET_ARCH_MIPS64 // Tests both signed and unsigned conversion from I8x16 (unpacking). -WASM_SIMD_COMPILED_TEST(I16x8ConvertI8x16) { - WasmRunner r(execution_mode, lower_simd); +WASM_SIMD_COMPILED_AND_LOWERED_TEST(I16x8ConvertI8x16) { + WasmRunner r(execution_mode, + lower_simd); byte a = 0; byte unpacked_signed = 1; byte unpacked_unsigned = 2; + byte zero_value = 3; byte simd0 = r.AllocateLocal(kWasmS128); byte simd1 = r.AllocateLocal(kWasmS128); byte simd2 = r.AllocateLocal(kWasmS128); - BUILD(r, WASM_SET_LOCAL(simd0, WASM_SIMD_I8x16_SPLAT(WASM_GET_LOCAL(a))), - WASM_SET_LOCAL(simd1, WASM_SIMD_UNOP(kExprI16x8SConvertI8x16Low, - WASM_GET_LOCAL(simd0))), - WASM_SIMD_CHECK_SPLAT8(I16x8, simd1, I32, unpacked_signed), - WASM_SET_LOCAL(simd2, WASM_SIMD_UNOP(kExprI16x8UConvertI8x16High, - WASM_GET_LOCAL(simd0))), - WASM_SIMD_CHECK_SPLAT8(I16x8, simd2, I32, unpacked_unsigned), WASM_ONE); + byte simd3 = r.AllocateLocal(kWasmS128); + byte simd4 = r.AllocateLocal(kWasmS128); + BUILD( + r, WASM_SET_LOCAL(simd0, WASM_SIMD_I8x16_SPLAT(WASM_GET_LOCAL(a))), + WASM_SET_LOCAL(simd0, + WASM_SIMD_I8x16_REPLACE_LANE(0, WASM_GET_LOCAL(simd0), + WASM_GET_LOCAL(zero_value))), + WASM_SET_LOCAL(simd1, WASM_SIMD_UNOP(kExprI16x8SConvertI8x16High, + WASM_GET_LOCAL(simd0))), + WASM_SIMD_CHECK_SPLAT8(I16x8, simd1, I32, unpacked_signed), + WASM_SET_LOCAL(simd2, WASM_SIMD_UNOP(kExprI16x8UConvertI8x16High, + WASM_GET_LOCAL(simd0))), + WASM_SIMD_CHECK_SPLAT8(I16x8, simd2, I32, unpacked_unsigned), + WASM_SET_LOCAL(simd3, WASM_SIMD_UNOP(kExprI16x8SConvertI8x16Low, + WASM_GET_LOCAL(simd0))), + WASM_SIMD_CHECK8(I16x8, simd3, I32, zero_value, unpacked_signed, + unpacked_signed, unpacked_signed, unpacked_signed, + unpacked_signed, unpacked_signed, unpacked_signed), + WASM_SET_LOCAL(simd4, WASM_SIMD_UNOP(kExprI16x8UConvertI8x16Low, + WASM_GET_LOCAL(simd0))), + WASM_SIMD_CHECK8(I16x8, simd4, I32, zero_value, unpacked_unsigned, + unpacked_unsigned, unpacked_unsigned, unpacked_unsigned, + unpacked_unsigned, unpacked_unsigned, unpacked_unsigned), + WASM_ONE); FOR_INT8_INPUTS(i) { int32_t unpacked_signed = static_cast(Widen(*i)); int32_t unpacked_unsigned = static_cast(UnsignedWiden(*i)); - CHECK_EQ(1, r.Call(*i, unpacked_signed, unpacked_unsigned)); + CHECK_EQ(1, r.Call(*i, unpacked_signed, unpacked_unsigned, 0)); } } #endif // V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS ||