[wasm] Implement 128-bit endian swap for simd type

BUG=

Review-Url: https://codereview.chromium.org/2838943002
Cr-Commit-Position: refs/heads/master@{#45208}
This commit is contained in:
jyan 2017-05-09 12:54:19 -07:00 committed by Commit bot
parent 266ff75630
commit 18c33c504a
4 changed files with 80 additions and 22 deletions

View File

@ -276,15 +276,20 @@ int SimdScalarLowering::NumLanes(SimdType type) {
return num_lanes;
}
constexpr int SimdScalarLowering::kLaneOffsets[];
void SimdScalarLowering::GetIndexNodes(Node* index, Node** new_indices,
SimdType type) {
new_indices[0] = index;
int num_lanes = NumLanes(type);
int lane_width = kSimd128Size / num_lanes;
int laneIndex = kLaneOffsets[0] / lane_width;
new_indices[laneIndex] = index;
for (int i = 1; i < num_lanes; ++i) {
new_indices[i] = graph()->NewNode(machine()->Int32Add(), index,
graph()->NewNode(common()->Int32Constant(
static_cast<int>(i) * lane_width)));
laneIndex = kLaneOffsets[i * lane_width] / lane_width;
new_indices[laneIndex] = graph()->NewNode(
machine()->Int32Add(), index,
graph()->NewNode(
common()->Int32Constant(static_cast<int>(i) * lane_width)));
}
}
@ -298,6 +303,7 @@ void SimdScalarLowering::LowerLoadOp(MachineRepresentation rep, Node* node,
GetIndexNodes(index, indices, type);
Node** rep_nodes = zone()->NewArray<Node*>(num_lanes);
rep_nodes[0] = node;
rep_nodes[0]->ReplaceInput(1, indices[0]);
NodeProperties::ChangeOp(rep_nodes[0], load_op);
if (node->InputCount() > 2) {
DCHECK(node->InputCount() > 3);
@ -336,6 +342,7 @@ void SimdScalarLowering::LowerStoreOp(MachineRepresentation rep, Node* node,
rep_nodes[0] = node;
Node** rep_inputs = GetReplacementsWithType(value, rep_type);
rep_nodes[0]->ReplaceInput(2, rep_inputs[0]);
rep_nodes[0]->ReplaceInput(1, indices[0]);
NodeProperties::ChangeOp(node, store_op);
if (node->InputCount() > 3) {
DCHECK(node->InputCount() > 4);
@ -348,14 +355,12 @@ void SimdScalarLowering::LowerStoreOp(MachineRepresentation rep, Node* node,
effect_input = rep_nodes[i];
}
rep_nodes[0]->ReplaceInput(3, rep_nodes[1]);
} else {
for (int i = 1; i < num_lanes; ++i) {
rep_nodes[i] =
graph()->NewNode(store_op, base, indices[i], rep_inputs[i]);
}
}
ReplaceNode(node, rep_nodes, num_lanes);
} else {
DefaultLowering(node);

View File

@ -36,6 +36,13 @@ class SimdScalarLowering {
kSimd1x8
};
#if defined(V8_TARGET_BIG_ENDIAN)
static constexpr int kLaneOffsets[16] = {15, 14, 13, 12, 11, 10, 9, 8,
7, 6, 5, 4, 3, 2, 1, 0};
#else
static constexpr int kLaneOffsets[16] = {0, 1, 2, 3, 4, 5, 6, 7,
8, 9, 10, 11, 12, 13, 14, 15};
#endif
struct Replacement {
Node** node = nullptr;
SimdType type; // represents output type

View File

@ -1066,6 +1066,7 @@ static bool ReverseBytesSupported(MachineOperatorBuilder* m,
size_t size_in_bytes) {
switch (size_in_bytes) {
case 4:
case 16:
return m->Word32ReverseBytes().IsSupported();
case 8:
return m->Word64ReverseBytes().IsSupported();
@ -1102,6 +1103,9 @@ Node* WasmGraphBuilder::BuildChangeEndianness(Node* node, MachineType memtype,
// No need to change endianness for byte size, return original node
return node;
break;
case MachineRepresentation::kSimd128:
DCHECK(ReverseBytesSupported(m, valueSizeInBytes));
break;
default:
UNREACHABLE();
break;
@ -1124,6 +1128,27 @@ Node* WasmGraphBuilder::BuildChangeEndianness(Node* node, MachineType memtype,
case 8:
result = graph()->NewNode(m->Word64ReverseBytes().op(), value);
break;
case 16: {
Node* byte_reversed_lanes[4];
for (int lane = 0; lane < 4; lane++) {
byte_reversed_lanes[lane] = graph()->NewNode(
m->Word32ReverseBytes().op(),
graph()->NewNode(jsgraph()->machine()->I32x4ExtractLane(lane),
value));
}
// This is making a copy of the value.
result =
graph()->NewNode(jsgraph()->machine()->S128And(), value, value);
for (int lane = 0; lane < 4; lane++) {
result =
graph()->NewNode(jsgraph()->machine()->I32x4ReplaceLane(3 - lane),
result, byte_reversed_lanes[lane]);
}
break;
}
default:
UNREACHABLE();
}

View File

@ -2025,6 +2025,7 @@ WASM_EXEC_COMPILED_TEST(S1x16Xor) { RunS1x16BinOpTest(kExprS1x16Xor, Xor); }
#endif // !V8_TARGET_ARCH_ARM
#if V8_TARGET_ARCH_ARM || SIMD_LOWERING_TARGET
WASM_EXEC_COMPILED_TEST(SimdI32x4ExtractWithF32x4) {
FLAG_wasm_simd_prototype = true;
WasmRunner<int32_t> r(kExecuteCompiled);
@ -2171,14 +2172,37 @@ WASM_EXEC_COMPILED_TEST(SimdF32x4For) {
#endif // V8_TARGET_ARCH_ARM || SIMD_LOWERING_TARGET
#if V8_TARGET_ARCH_ARM || SIMD_LOWERING_TARGET || V8_TARGET_ARCH_X64
template <typename T, int numLanes = 4>
void SetVectorByLanes(T* v, const std::array<T, numLanes>& arr) {
for (int lane = 0; lane < numLanes; lane++) {
const T& value = arr[lane];
#if defined(V8_TARGET_BIG_ENDIAN)
v[numLanes - 1 - lane] = value;
#else
v[lane] = value;
#endif
}
}
template <typename T>
const T& GetScalar(T* v, int lane) {
constexpr int kElems = kSimd128Size / sizeof(T);
#if defined(V8_TARGET_BIG_ENDIAN)
const int index = kElems - 1 - lane;
#else
const int index = lane;
#endif
USE(kElems);
DCHECK(index >= 0 && index < kElems);
return v[index];
}
WASM_EXEC_COMPILED_TEST(SimdI32x4GetGlobal) {
FLAG_wasm_simd_prototype = true;
WasmRunner<int32_t, int32_t> r(kExecuteCompiled);
int32_t* global = r.module().AddGlobal<int32_t>(kWasmS128);
*(global) = 0;
*(global + 1) = 1;
*(global + 2) = 2;
*(global + 3) = 3;
SetVectorByLanes(global, {{0, 1, 2, 3}});
r.AllocateLocal(kWasmI32);
BUILD(
r, WASM_SET_LOCAL(1, WASM_I32V(1)),
@ -2211,10 +2235,10 @@ WASM_EXEC_COMPILED_TEST(SimdI32x4SetGlobal) {
WASM_I32V(56))),
WASM_I32V(1));
CHECK_EQ(1, r.Call(0));
CHECK_EQ(*global, 23);
CHECK_EQ(*(global + 1), 34);
CHECK_EQ(*(global + 2), 45);
CHECK_EQ(*(global + 3), 56);
CHECK_EQ(GetScalar(global, 0), 23);
CHECK_EQ(GetScalar(global, 1), 34);
CHECK_EQ(GetScalar(global, 2), 45);
CHECK_EQ(GetScalar(global, 3), 56);
}
#endif // V8_TARGET_ARCH_ARM || SIMD_LOWERING_TARGET || V8_TARGET_ARCH_X64
@ -2223,10 +2247,7 @@ WASM_EXEC_COMPILED_TEST(SimdF32x4GetGlobal) {
FLAG_wasm_simd_prototype = true;
WasmRunner<int32_t, int32_t> r(kExecuteCompiled);
float* global = r.module().AddGlobal<float>(kWasmS128);
*(global) = 0.0;
*(global + 1) = 1.5;
*(global + 2) = 2.25;
*(global + 3) = 3.5;
SetVectorByLanes<float>(global, {{0.0, 1.5, 2.25, 3.5}});
r.AllocateLocal(kWasmI32);
BUILD(
r, WASM_SET_LOCAL(1, WASM_I32V(1)),
@ -2259,10 +2280,10 @@ WASM_EXEC_COMPILED_TEST(SimdF32x4SetGlobal) {
WASM_F32(65.0))),
WASM_I32V(1));
CHECK_EQ(1, r.Call(0));
CHECK_EQ(*global, 13.5);
CHECK_EQ(*(global + 1), 45.5);
CHECK_EQ(*(global + 2), 32.25);
CHECK_EQ(*(global + 3), 65.0);
CHECK_EQ(GetScalar(global, 0), 13.5f);
CHECK_EQ(GetScalar(global, 1), 45.5f);
CHECK_EQ(GetScalar(global, 2), 32.25f);
CHECK_EQ(GetScalar(global, 3), 65.0f);
}
#endif // V8_TARGET_ARCH_ARM || SIMD_LOWERING_TARGET