[wasm-simd] Scalar lowering for load extends

Implements lowering for:
- i16x8.load8x8_s
- i16x8.load8x8_u
- i32x4.load16x4_s
- i32x4.load16x4_u

As before, i64x2 is not implemented since 64-bit lowering and scalar
lowering don't work together yet.

Bug: v8:9886
Change-Id: I3728d009e053acf82baacbcf1c6c08ea636ef241
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2044546
Reviewed-by: Deepti Gandluri <gdeepti@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#66380}
This commit is contained in:
Ng Zhi An 2020-02-18 07:56:16 +00:00 committed by Commit Bot
parent 03fc414908
commit 35effde9bc
2 changed files with 60 additions and 19 deletions

View File

@ -4,6 +4,7 @@
#include "src/compiler/simd-scalar-lowering.h" #include "src/compiler/simd-scalar-lowering.h"
#include "src/codegen/machine-type.h"
#include "src/compiler/diamond.h" #include "src/compiler/diamond.h"
#include "src/compiler/linkage.h" #include "src/compiler/linkage.h"
#include "src/compiler/machine-operator.h" #include "src/compiler/machine-operator.h"
@ -283,6 +284,14 @@ void SimdScalarLowering::SetLoweredType(Node* node, Node* output) {
case LoadTransformation::kS32x4LoadSplat: case LoadTransformation::kS32x4LoadSplat:
replacements_[node->id()].type = SimdType::kInt32x4; replacements_[node->id()].type = SimdType::kInt32x4;
break; break;
case LoadTransformation::kI16x8Load8x8S:
case LoadTransformation::kI16x8Load8x8U:
replacements_[node->id()].type = SimdType::kInt16x8;
break;
case LoadTransformation::kI32x4Load16x4S:
case LoadTransformation::kI32x4Load16x4U:
replacements_[node->id()].type = SimdType::kInt32x4;
break;
default: default:
UNIMPLEMENTED(); UNIMPLEMENTED();
} }
@ -448,47 +457,79 @@ void SimdScalarLowering::LowerLoadOp(Node* node, SimdType type) {
void SimdScalarLowering::LowerLoadTransformOp(Node* node, SimdType type) { void SimdScalarLowering::LowerLoadTransformOp(Node* node, SimdType type) {
LoadTransformParameters params = LoadTransformParametersOf(node->op()); LoadTransformParameters params = LoadTransformParametersOf(node->op());
MachineType load_rep = MachineType::None();
SimdType load_type = type;
// Load extends have a different machine type for loading.
switch (params.transformation) { switch (params.transformation) {
// Lowering for s64x2 is not implemented since lowering for 64x2 operations case LoadTransformation::kI16x8Load8x8S:
// doesn't work properly yet. load_rep = MachineType::Int8();
load_type = SimdType::kInt8x16;
break;
case LoadTransformation::kI16x8Load8x8U:
load_rep = MachineType::Uint8();
load_type = SimdType::kInt8x16;
break;
case LoadTransformation::kI32x4Load16x4S:
load_rep = MachineType::Int16();
load_type = SimdType::kInt16x8;
break;
case LoadTransformation::kI32x4Load16x4U:
load_rep = MachineType::Uint16();
load_type = SimdType::kInt16x8;
break;
case LoadTransformation::kS8x16LoadSplat: case LoadTransformation::kS8x16LoadSplat:
case LoadTransformation::kS16x8LoadSplat: case LoadTransformation::kS16x8LoadSplat:
case LoadTransformation::kS32x4LoadSplat: case LoadTransformation::kS32x4LoadSplat:
load_rep = MachineTypeFrom(type);
break; break;
default: default:
// Lowering for s64x2 is not implemented since lowering for 64x2
// operations doesn't work properly yet.
UNIMPLEMENTED(); UNIMPLEMENTED();
} }
DCHECK_NE(load_rep, MachineType::None());
const Operator* load_op; const Operator* load_op;
switch (params.kind) { switch (params.kind) {
case LoadKind::kNormal: case LoadKind::kNormal:
load_op = machine()->Load(MachineTypeFrom(type)); load_op = machine()->Load(load_rep);
break; break;
case LoadKind::kUnaligned: case LoadKind::kUnaligned:
load_op = machine()->UnalignedLoad(MachineTypeFrom(type)); load_op = machine()->UnalignedLoad(load_rep);
break; break;
case LoadKind::kProtected: case LoadKind::kProtected:
load_op = machine()->ProtectedLoad(MachineTypeFrom(type)); load_op = machine()->ProtectedLoad(load_rep);
break; break;
default:
UNREACHABLE();
} }
Node* base = node->InputAt(0); Node* base = node->InputAt(0);
Node* index = node->InputAt(1); Node* index = node->InputAt(1);
int num_lanes = NumLanes(type); int num_lanes = NumLanes(type);
Node** rep_nodes = zone()->NewArray<Node*>(num_lanes); Node** rep_nodes = zone()->NewArray<Node*>(num_lanes);
rep_nodes[0] = node;
NodeProperties::ChangeOp(rep_nodes[0], load_op);
Node* effect_input = node->InputAt(2); Node* effect_input = node->InputAt(2);
Node* control_input = node->InputAt(3); Node* control_input = node->InputAt(3);
for (int i = num_lanes - 1; i > 0; --i) {
rep_nodes[i] = if (type != load_type) {
graph()->NewNode(load_op, base, index, effect_input, control_input); // We load a smaller lane size, then extend to a larger lane size. So use
effect_input = rep_nodes[i]; // the smaller lane size to calculte the index nodes for loads, but only
// actually load half of those lanes.
Node** indices = zone()->NewArray<Node*>(num_lanes * 2);
GetIndexNodes(index, indices, load_type);
for (int i = num_lanes - 1; i >= 0; --i) {
rep_nodes[i] = graph()->NewNode(load_op, base, indices[i], effect_input,
control_input);
effect_input = rep_nodes[i];
}
} else {
// Load splat, load from the same index for every lane.
for (int i = num_lanes - 1; i >= 0; --i) {
rep_nodes[i] =
graph()->NewNode(load_op, base, index, effect_input, control_input);
effect_input = rep_nodes[i];
}
} }
rep_nodes[0]->ReplaceInput(2, rep_nodes[1]);
ReplaceNode(node, rep_nodes, num_lanes); ReplaceNode(node, rep_nodes, num_lanes);
} }

View File

@ -3368,21 +3368,21 @@ void RunLoadExtendTest(ExecutionTier execution_tier, LowerSimd lower_simd,
} }
} }
WASM_SIMD_TEST_NO_LOWERING(I16x8Load8x8U) { WASM_SIMD_TEST(I16x8Load8x8U) {
RunLoadExtendTest<uint8_t, uint16_t>(execution_tier, lower_simd, RunLoadExtendTest<uint8_t, uint16_t>(execution_tier, lower_simd,
kExprI16x8Load8x8U); kExprI16x8Load8x8U);
} }
WASM_SIMD_TEST_NO_LOWERING(I16x8Load8x8S) { WASM_SIMD_TEST(I16x8Load8x8S) {
RunLoadExtendTest<int8_t, int16_t>(execution_tier, lower_simd, RunLoadExtendTest<int8_t, int16_t>(execution_tier, lower_simd,
kExprI16x8Load8x8S); kExprI16x8Load8x8S);
} }
WASM_SIMD_TEST_NO_LOWERING(I32x4Load16x4U) { WASM_SIMD_TEST(I32x4Load16x4U) {
RunLoadExtendTest<uint16_t, uint32_t>(execution_tier, lower_simd, RunLoadExtendTest<uint16_t, uint32_t>(execution_tier, lower_simd,
kExprI32x4Load16x4U); kExprI32x4Load16x4U);
} }
WASM_SIMD_TEST_NO_LOWERING(I32x4Load16x4S) { WASM_SIMD_TEST(I32x4Load16x4S) {
RunLoadExtendTest<int16_t, int32_t>(execution_tier, lower_simd, RunLoadExtendTest<int16_t, int32_t>(execution_tier, lower_simd,
kExprI32x4Load16x4S); kExprI32x4Load16x4S);
} }