[Turbofan] Add native ARM support for basic SIMD 32x4 operations.

- Adds Float32x4 ExtractLane, ReplaceLane, Splat, Add, Sub,
and conversions to Int32x4 and Uint32x4.
- Adds Int32x4 ExtractLane, ReplaceLane, Splat, Add, Sub and
conversions to Float32x4 (int and unsigned int).
- Adds Int32x4 CompareEqual, CompareNotEqual.
- Adds S32x4 Select.
- Adds tests for all new SIMD operations.

LOG=N
BUG=v8:4124

Review-Url: https://codereview.chromium.org/2584863002
Cr-Commit-Position: refs/heads/master@{#41828}
This commit is contained in:
bbudge 2016-12-19 14:23:03 -08:00 committed by Commit bot
parent cc7e0b0eff
commit 0625a686b5
12 changed files with 615 additions and 51 deletions

View File

@ -48,7 +48,7 @@ namespace internal {
bool CpuFeatures::SupportsCrankshaft() { return true; }
bool CpuFeatures::SupportsSimd128() { return false; }
bool CpuFeatures::SupportsSimd128() { return true; }
int DoubleRegister::NumRegisters() {
return CpuFeatures::IsSupported(VFP32DREGS) ? 32 : 16;

View File

@ -3360,12 +3360,13 @@ void Simulator::DecodeTypeVFP(Instruction* instr) {
int vn = instr->VFPNRegValue(kDoublePrecision);
int rt = instr->RtValue();
int opc1_opc2 = (instr->Bits(22, 21) << 2) | instr->Bits(6, 5);
uint64_t data;
get_d_register(vn, &data);
if ((opc1_opc2 & 0xb) == 0) {
// NeonS32 / NeonU32
double dn_value = get_double_from_d_register(vn);
int32_t data[2];
memcpy(data, &dn_value, 8);
set_register(rt, data[instr->Bit(21)]);
int32_t int_data[2];
memcpy(int_data, &data, sizeof(int_data));
set_register(rt, int_data[instr->Bit(21)]);
} else {
uint64_t data;
get_d_register(vn, &data);

View File

@ -1505,6 +1505,91 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
DCHECK_EQ(LeaveCC, i.OutputSBit());
break;
}
case kArmFloat32x4Splat: {
__ vdup(i.OutputSimd128Register(), i.InputFloatRegister(0));
break;
}
case kArmFloat32x4ExtractLane: {
__ ExtractLane(i.OutputFloatRegister(), i.InputSimd128Register(0),
kScratchReg, i.InputInt8(1));
break;
}
case kArmFloat32x4ReplaceLane: {
__ ReplaceLane(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputFloatRegister(2), kScratchReg, i.InputInt8(1));
break;
}
case kArmFloat32x4FromInt32x4: {
__ vcvt_f32_s32(i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kArmFloat32x4FromUint32x4: {
__ vcvt_f32_u32(i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kArmFloat32x4Add: {
__ vadd(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kArmFloat32x4Sub: {
__ vsub(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kArmInt32x4Splat: {
__ vdup(Neon32, i.OutputSimd128Register(), i.InputRegister(0));
break;
}
case kArmInt32x4ExtractLane: {
__ ExtractLane(i.OutputRegister(), i.InputSimd128Register(0), NeonS32,
i.InputInt8(1));
break;
}
case kArmInt32x4ReplaceLane: {
__ ReplaceLane(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputRegister(2), NeonS32, i.InputInt8(1));
break;
}
case kArmInt32x4FromFloat32x4: {
__ vcvt_s32_f32(i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kArmUint32x4FromFloat32x4: {
__ vcvt_u32_f32(i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kArmInt32x4Add: {
__ vadd(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kArmInt32x4Sub: {
__ vsub(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kArmInt32x4Eq: {
__ vceq(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kArmInt32x4Ne: {
Simd128Register dst = i.OutputSimd128Register();
__ vceq(Neon32, dst, i.InputSimd128Register(0),
i.InputSimd128Register(1));
__ vmvn(dst, dst);
break;
}
case kArmSimd32x4Select: {
// Select is a ternary op, so we need to move one input into the
// destination. Use vtst to canonicalize the 'boolean' input #0.
__ vtst(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(0));
__ vbsl(i.OutputSimd128Register(), i.InputSimd128Register(1),
i.InputSimd128Register(2));
break;
}
case kCheckedLoadInt8:
ASSEMBLE_CHECKED_LOAD_INTEGER(ldrsb);
break;

View File

@ -119,7 +119,24 @@ namespace compiler {
V(ArmLdr) \
V(ArmStr) \
V(ArmPush) \
V(ArmPoke)
V(ArmPoke) \
V(ArmFloat32x4Splat) \
V(ArmFloat32x4ExtractLane) \
V(ArmFloat32x4ReplaceLane) \
V(ArmFloat32x4FromInt32x4) \
V(ArmFloat32x4FromUint32x4) \
V(ArmFloat32x4Add) \
V(ArmFloat32x4Sub) \
V(ArmInt32x4Splat) \
V(ArmInt32x4ExtractLane) \
V(ArmInt32x4ReplaceLane) \
V(ArmInt32x4FromFloat32x4) \
V(ArmUint32x4FromFloat32x4) \
V(ArmInt32x4Add) \
V(ArmInt32x4Sub) \
V(ArmInt32x4Eq) \
V(ArmInt32x4Ne) \
V(ArmSimd32x4Select)
// Addressing modes represent the "shape" of inputs to an instruction.
// Many instructions support multiple addressing modes. Addressing modes

View File

@ -108,6 +108,23 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArmFloat32Min:
case kArmFloat64Min:
case kArmFloat64SilenceNaN:
case kArmFloat32x4Splat:
case kArmFloat32x4ExtractLane:
case kArmFloat32x4ReplaceLane:
case kArmFloat32x4FromInt32x4:
case kArmFloat32x4FromUint32x4:
case kArmFloat32x4Add:
case kArmFloat32x4Sub:
case kArmInt32x4Splat:
case kArmInt32x4ExtractLane:
case kArmInt32x4ReplaceLane:
case kArmInt32x4FromFloat32x4:
case kArmUint32x4FromFloat32x4:
case kArmInt32x4Add:
case kArmInt32x4Sub:
case kArmInt32x4Eq:
case kArmInt32x4Ne:
case kArmSimd32x4Select:
return kNoOpcodeFlags;
case kArmVldrF32:

View File

@ -2286,6 +2286,113 @@ void InstructionSelector::VisitAtomicStore(Node* node) {
Emit(code, 0, nullptr, input_count, inputs);
}
void InstructionSelector::VisitCreateFloat32x4(Node* node) {
ArmOperandGenerator g(this);
Emit(kArmFloat32x4Splat, g.DefineAsRegister(node), g.Use(node->InputAt(0)));
}
void InstructionSelector::VisitFloat32x4ExtractLane(Node* node) {
ArmOperandGenerator g(this);
int32_t lane = OpParameter<int32_t>(node);
Emit(kArmFloat32x4ExtractLane, g.DefineAsRegister(node),
g.UseRegister(node->InputAt(0)), g.UseImmediate(lane));
}
void InstructionSelector::VisitFloat32x4ReplaceLane(Node* node) {
ArmOperandGenerator g(this);
int32_t lane = OpParameter<int32_t>(node);
Emit(kArmFloat32x4ReplaceLane, g.DefineAsRegister(node),
g.UseRegister(node->InputAt(0)), g.UseImmediate(lane),
g.Use(node->InputAt(1)));
}
void InstructionSelector::VisitFloat32x4FromInt32x4(Node* node) {
ArmOperandGenerator g(this);
Emit(kArmFloat32x4FromInt32x4, g.DefineAsRegister(node),
g.UseRegister(node->InputAt(0)));
}
void InstructionSelector::VisitFloat32x4FromUint32x4(Node* node) {
ArmOperandGenerator g(this);
Emit(kArmFloat32x4FromUint32x4, g.DefineAsRegister(node),
g.UseRegister(node->InputAt(0)));
}
void InstructionSelector::VisitFloat32x4Add(Node* node) {
ArmOperandGenerator g(this);
Emit(kArmFloat32x4Add, g.DefineAsRegister(node),
g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)));
}
void InstructionSelector::VisitFloat32x4Sub(Node* node) {
ArmOperandGenerator g(this);
Emit(kArmFloat32x4Sub, g.DefineAsRegister(node),
g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)));
}
void InstructionSelector::VisitCreateInt32x4(Node* node) {
ArmOperandGenerator g(this);
Emit(kArmInt32x4Splat, g.DefineAsRegister(node), g.Use(node->InputAt(0)));
}
void InstructionSelector::VisitInt32x4ExtractLane(Node* node) {
ArmOperandGenerator g(this);
int32_t lane = OpParameter<int32_t>(node);
Emit(kArmInt32x4ExtractLane, g.DefineAsRegister(node),
g.UseRegister(node->InputAt(0)), g.UseImmediate(lane));
}
void InstructionSelector::VisitInt32x4ReplaceLane(Node* node) {
ArmOperandGenerator g(this);
int32_t lane = OpParameter<int32_t>(node);
Emit(kArmInt32x4ReplaceLane, g.DefineAsRegister(node),
g.UseRegister(node->InputAt(0)), g.UseImmediate(lane),
g.Use(node->InputAt(1)));
}
void InstructionSelector::VisitInt32x4FromFloat32x4(Node* node) {
ArmOperandGenerator g(this);
Emit(kArmInt32x4FromFloat32x4, g.DefineAsRegister(node),
g.UseRegister(node->InputAt(0)));
}
void InstructionSelector::VisitUint32x4FromFloat32x4(Node* node) {
ArmOperandGenerator g(this);
Emit(kArmUint32x4FromFloat32x4, g.DefineAsRegister(node),
g.UseRegister(node->InputAt(0)));
}
void InstructionSelector::VisitInt32x4Add(Node* node) {
ArmOperandGenerator g(this);
Emit(kArmInt32x4Add, g.DefineAsRegister(node),
g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)));
}
void InstructionSelector::VisitInt32x4Sub(Node* node) {
ArmOperandGenerator g(this);
Emit(kArmInt32x4Sub, g.DefineAsRegister(node),
g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)));
}
void InstructionSelector::VisitInt32x4Equal(Node* node) {
ArmOperandGenerator g(this);
Emit(kArmInt32x4Eq, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0)),
g.UseRegister(node->InputAt(1)));
}
void InstructionSelector::VisitInt32x4NotEqual(Node* node) {
ArmOperandGenerator g(this);
Emit(kArmInt32x4Ne, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0)),
g.UseRegister(node->InputAt(1)));
}
void InstructionSelector::VisitSimd32x4Select(Node* node) {
ArmOperandGenerator g(this);
Emit(kArmSimd32x4Select, g.DefineAsRegister(node),
g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)),
g.UseRegister(node->InputAt(2)));
}
// static
MachineOperatorBuilder::Flags
InstructionSelector::SupportedMachineOperatorFlags() {

View File

@ -1427,16 +1427,40 @@ void InstructionSelector::VisitNode(Node* node) {
case IrOpcode::kUnsafePointerAdd:
MarkAsRepresentation(MachineType::PointerRepresentation(), node);
return VisitUnsafePointerAdd(node);
case IrOpcode::kCreateFloat32x4:
return MarkAsSimd128(node), VisitCreateFloat32x4(node);
case IrOpcode::kFloat32x4ExtractLane:
return MarkAsFloat32(node), VisitFloat32x4ExtractLane(node);
case IrOpcode::kFloat32x4ReplaceLane:
return MarkAsSimd128(node), VisitFloat32x4ReplaceLane(node);
case IrOpcode::kFloat32x4FromInt32x4:
return MarkAsSimd128(node), VisitFloat32x4FromInt32x4(node);
case IrOpcode::kFloat32x4FromUint32x4:
return MarkAsSimd128(node), VisitFloat32x4FromUint32x4(node);
case IrOpcode::kFloat32x4Add:
return MarkAsSimd128(node), VisitFloat32x4Add(node);
case IrOpcode::kFloat32x4Sub:
return MarkAsSimd128(node), VisitFloat32x4Sub(node);
case IrOpcode::kCreateInt32x4:
return MarkAsSimd128(node), VisitCreateInt32x4(node);
case IrOpcode::kInt32x4ExtractLane:
return MarkAsWord32(node), VisitInt32x4ExtractLane(node);
case IrOpcode::kInt32x4ReplaceLane:
return MarkAsSimd128(node), VisitInt32x4ReplaceLane(node);
case IrOpcode::kInt32x4FromFloat32x4:
return MarkAsSimd128(node), VisitInt32x4FromFloat32x4(node);
case IrOpcode::kUint32x4FromFloat32x4:
return MarkAsSimd128(node), VisitUint32x4FromFloat32x4(node);
case IrOpcode::kInt32x4Add:
return MarkAsSimd128(node), VisitInt32x4Add(node);
case IrOpcode::kInt32x4Sub:
return MarkAsSimd128(node), VisitInt32x4Sub(node);
case IrOpcode::kInt32x4Equal:
return MarkAsSimd128(node), VisitInt32x4Equal(node);
case IrOpcode::kInt32x4NotEqual:
return MarkAsSimd128(node), VisitInt32x4NotEqual(node);
case IrOpcode::kSimd32x4Select:
return MarkAsSimd128(node), VisitSimd32x4Select(node);
default:
V8_Fatal(__FILE__, __LINE__, "Unexpected operator #%d:%s @ node #%d",
node->opcode(), node->op()->mnemonic(), node->id());
@ -1764,7 +1788,7 @@ void InstructionSelector::VisitWord32PairShr(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitWord32PairSar(Node* node) { UNIMPLEMENTED(); }
#endif // V8_TARGET_ARCH_64_BIT
#if !V8_TARGET_ARCH_X64
#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM
void InstructionSelector::VisitCreateInt32x4(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitInt32x4ExtractLane(Node* node) {
@ -1778,7 +1802,46 @@ void InstructionSelector::VisitInt32x4ReplaceLane(Node* node) {
void InstructionSelector::VisitInt32x4Add(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitInt32x4Sub(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_X64
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM
#if !V8_TARGET_ARCH_ARM
void InstructionSelector::VisitCreateFloat32x4(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitFloat32x4ExtractLane(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitFloat32x4ReplaceLane(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitFloat32x4FromInt32x4(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitFloat32x4FromUint32x4(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitFloat32x4Add(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitFloat32x4Sub(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitInt32x4FromFloat32x4(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitUint32x4FromFloat32x4(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitInt32x4Equal(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitInt32x4NotEqual(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitSimd32x4Select(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_ARM
void InstructionSelector::VisitFinishRegion(Node* node) { EmitIdentity(node); }

View File

@ -3235,21 +3235,45 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode,
const NodeVector& inputs) {
has_simd_ = true;
switch (opcode) {
case wasm::kExprF32x4Splat:
return graph()->NewNode(jsgraph()->machine()->CreateFloat32x4(),
inputs[0], inputs[0], inputs[0], inputs[0]);
case wasm::kExprF32x4FromInt32x4:
return graph()->NewNode(jsgraph()->machine()->Float32x4FromInt32x4(),
inputs[0]);
case wasm::kExprF32x4FromUint32x4:
return graph()->NewNode(jsgraph()->machine()->Float32x4FromUint32x4(),
inputs[0]);
case wasm::kExprF32x4Add:
return graph()->NewNode(jsgraph()->machine()->Float32x4Add(), inputs[0],
inputs[1]);
case wasm::kExprF32x4Sub:
return graph()->NewNode(jsgraph()->machine()->Float32x4Sub(), inputs[0],
inputs[1]);
case wasm::kExprI32x4Splat:
return graph()->NewNode(jsgraph()->machine()->CreateInt32x4(), inputs[0],
inputs[0], inputs[0], inputs[0]);
case wasm::kExprI32x4FromFloat32x4:
return graph()->NewNode(jsgraph()->machine()->Int32x4FromFloat32x4(),
inputs[0]);
case wasm::kExprUi32x4FromFloat32x4:
return graph()->NewNode(jsgraph()->machine()->Uint32x4FromFloat32x4(),
inputs[0]);
case wasm::kExprI32x4Add:
return graph()->NewNode(jsgraph()->machine()->Int32x4Add(), inputs[0],
inputs[1]);
case wasm::kExprI32x4Sub:
return graph()->NewNode(jsgraph()->machine()->Int32x4Sub(), inputs[0],
inputs[1]);
case wasm::kExprF32x4Splat:
return graph()->NewNode(jsgraph()->machine()->CreateFloat32x4(),
inputs[0], inputs[0], inputs[0], inputs[0]);
case wasm::kExprF32x4Add:
return graph()->NewNode(jsgraph()->machine()->Float32x4Add(), inputs[0],
case wasm::kExprI32x4Eq:
return graph()->NewNode(jsgraph()->machine()->Int32x4Equal(), inputs[0],
inputs[1]);
case wasm::kExprI32x4Ne:
return graph()->NewNode(jsgraph()->machine()->Int32x4NotEqual(),
inputs[0], inputs[1]);
case wasm::kExprS32x4Select:
return graph()->NewNode(jsgraph()->machine()->Simd32x4Select(), inputs[0],
inputs[1], inputs[2]);
default:
return graph()->NewNode(UnsupportedOpcode(opcode), nullptr);
}

View File

@ -460,6 +460,8 @@ class LocalDeclEncoder {
static_cast<byte>(index)
#define WASM_UNOP(opcode, x) x, static_cast<byte>(opcode)
#define WASM_BINOP(opcode, x, y) x, y, static_cast<byte>(opcode)
#define WASM_SIMD_BINOP(opcode, x, y) \
x, y, kSimdPrefix, static_cast<byte>(opcode)
//------------------------------------------------------------------------------
// Int32 operations
@ -621,19 +623,31 @@ class LocalDeclEncoder {
//------------------------------------------------------------------------------
// Simd Operations.
//------------------------------------------------------------------------------
#define WASM_SIMD_F32x4_SPLAT(x) x, kSimdPrefix, kExprF32x4Splat & 0xff
#define WASM_SIMD_F32x4_EXTRACT_LANE(lane, x) \
x, kSimdPrefix, kExprF32x4ExtractLane & 0xff, static_cast<byte>(lane)
#define WASM_SIMD_F32x4_REPLACE_LANE(lane, x, y) \
x, y, kSimdPrefix, kExprF32x4ReplaceLane & 0xff, static_cast<byte>(lane)
#define WASM_SIMD_F32x4_FROM_I32x4(x) \
x, kSimdPrefix, kExprF32x4FromInt32x4 & 0xff
#define WASM_SIMD_F32x4_FROM_U32x4(x) \
x, kSimdPrefix, kExprF32x4FromUint32x4 & 0xff
#define WASM_SIMD_F32x4_ADD(x, y) x, y, kSimdPrefix, kExprF32x4Add & 0xff
#define WASM_SIMD_F32x4_SUB(x, y) x, y, kSimdPrefix, kExprF32x4Sub & 0xff
#define WASM_SIMD_I32x4_SPLAT(x) x, kSimdPrefix, kExprI32x4Splat & 0xff
#define WASM_SIMD_I32x4_EXTRACT_LANE(lane, x) \
x, kSimdPrefix, kExprI32x4ExtractLane & 0xff, static_cast<byte>(lane)
#define WASM_SIMD_I32x4_REPLACE_LANE(lane, x, y) \
x, y, kSimdPrefix, kExprI32x4ReplaceLane & 0xff, static_cast<byte>(lane)
#define WASM_SIMD_I32x4_FROM_F32x4(x) \
x, kSimdPrefix, kExprI32x4FromFloat32x4 & 0xff
#define WASM_SIMD_U32x4_FROM_F32x4(x) \
x, kSimdPrefix, kExprUi32x4FromFloat32x4 & 0xff
#define WASM_SIMD_S32x4_SELECT(x, y, z) \
x, y, z, kSimdPrefix, kExprS32x4Select & 0xff
#define WASM_SIMD_I32x4_ADD(x, y) x, y, kSimdPrefix, kExprI32x4Add & 0xff
#define WASM_SIMD_I32x4_SUB(x, y) x, y, kSimdPrefix, kExprI32x4Sub & 0xff
#define WASM_SIMD_F32x4_SPLAT(x) x, kSimdPrefix, kExprF32x4Splat & 0xff
#define WASM_SIMD_F32x4_EXTRACT_LANE(lane, x) \
x, kSimdPrefix, kExprF32x4ExtractLane & 0xff, static_cast<byte>(lane)
#define WASM_SIMD_F32x4_ADD(x, y) x, y, kSimdPrefix, kExprF32x4Add & 0xff
#define WASM_SIMD_F32x4_REPLACE_LANE(lane, x, y) \
x, y, kSimdPrefix, kExprF32x4ReplaceLane & 0xff, static_cast<byte>(lane)
#define SIG_ENTRY_v_v kWasmFunctionTypeForm, 0, 0
#define SIZEOF_SIG_ENTRY_v_v 3

View File

@ -397,8 +397,8 @@ const WasmCodePosition kNoCodePosition = -1;
V(I32x4ExtractLane, 0xe51c, _) \
V(I32x4ReplaceLane, 0xe51d, _) \
V(I16x8ExtractLane, 0xe539, _) \
V(I8x16ExtractLane, 0xe558, _) \
V(I16x8ReplaceLane, 0xe53a, _) \
V(I8x16ExtractLane, 0xe558, _) \
V(I8x16ReplaceLane, 0xe559, _)
#define FOREACH_ATOMIC_OPCODE(V) \

View File

@ -206,7 +206,7 @@ v8_executable("cctest") {
"test-disasm-arm.cc",
"test-macro-assembler-arm.cc",
"test-run-wasm-relocation-arm.cc",
"wasm/test-run-wasm-simd-lowering.cc",
"wasm/test-run-wasm-simd.cc",
]
} else if (v8_current_cpu == "arm64") {
sources += [ ### gcmole(arch:arm64) ###

View File

@ -13,10 +13,39 @@ using namespace v8::internal;
using namespace v8::internal::compiler;
using namespace v8::internal::wasm;
namespace {
typedef float (*FloatBinOp)(float, float);
typedef int32_t (*Int32BinOp)(int32_t, int32_t);
template <typename T>
T Add(T a, T b) {
return a + b;
}
template <typename T>
T Sub(T a, T b) {
return a - b;
}
template <typename T>
int32_t Equal(T a, T b) {
return a == b ? 0xFFFFFFFF : 0;
}
template <typename T>
int32_t NotEqual(T a, T b) {
return a != b ? 0xFFFFFFFF : 0;
}
} // namespace
// TODO(gdeepti): These are tests using sample values to verify functional
// correctness of opcodes, add more tests for a range of values and macroize
// tests.
// TODO(bbudge) Figure out how to compare floats in Wasm code that can handle
// NaNs. For now, our tests avoid using NaNs.
#define WASM_SIMD_CHECK_LANE(TYPE, value, LANE_TYPE, lane_value, lane_index) \
WASM_IF(WASM_##LANE_TYPE##_NE(WASM_GET_LOCAL(lane_value), \
WASM_SIMD_##TYPE##_EXTRACT_LANE( \
@ -32,6 +61,155 @@ using namespace v8::internal::wasm;
#define WASM_SIMD_CHECK_SPLAT4(TYPE, value, LANE_TYPE, lv) \
WASM_SIMD_CHECK4(TYPE, value, LANE_TYPE, lv, lv, lv, lv)
#define WASM_SIMD_CHECK_F32_LANE(TYPE, value, lane_value, lane_index) \
WASM_IF( \
WASM_I32_NE(WASM_I32_REINTERPRET_F32(WASM_GET_LOCAL(lane_value)), \
WASM_I32_REINTERPRET_F32(WASM_SIMD_##TYPE##_EXTRACT_LANE( \
lane_index, WASM_GET_LOCAL(value)))), \
WASM_RETURN1(WASM_ZERO))
#define WASM_SIMD_CHECK4_F32(TYPE, value, lv0, lv1, lv2, lv3) \
WASM_SIMD_CHECK_F32_LANE(TYPE, value, lv0, 0) \
, WASM_SIMD_CHECK_F32_LANE(TYPE, value, lv1, 1), \
WASM_SIMD_CHECK_F32_LANE(TYPE, value, lv2, 2), \
WASM_SIMD_CHECK_F32_LANE(TYPE, value, lv3, 3)
#define WASM_SIMD_CHECK_SPLAT4_F32(TYPE, value, lv) \
WASM_SIMD_CHECK4_F32(TYPE, value, lv, lv, lv, lv)
#if V8_TARGET_ARCH_ARM
WASM_EXEC_TEST(F32x4Splat) {
FLAG_wasm_simd_prototype = true;
WasmRunner<int32_t, float> r(kExecuteCompiled);
byte lane_val = 0;
byte simd = r.AllocateLocal(kAstS128);
BUILD(r, WASM_BLOCK(WASM_SET_LOCAL(simd, WASM_SIMD_F32x4_SPLAT(
WASM_GET_LOCAL(lane_val))),
WASM_SIMD_CHECK_SPLAT4_F32(F32x4, simd, lane_val),
WASM_RETURN1(WASM_ONE)));
FOR_FLOAT32_INPUTS(i) { CHECK_EQ(1, r.Call(*i)); }
}
WASM_EXEC_TEST(F32x4ReplaceLane) {
FLAG_wasm_simd_prototype = true;
WasmRunner<int32_t, float, float> r(kExecuteCompiled);
byte old_val = 0;
byte new_val = 1;
byte simd = r.AllocateLocal(kAstS128);
BUILD(r, WASM_BLOCK(
WASM_SET_LOCAL(simd,
WASM_SIMD_F32x4_SPLAT(WASM_GET_LOCAL(old_val))),
WASM_SET_LOCAL(
simd, WASM_SIMD_F32x4_REPLACE_LANE(0, WASM_GET_LOCAL(simd),
WASM_GET_LOCAL(new_val))),
WASM_SIMD_CHECK4(F32x4, simd, F32, new_val, old_val, old_val,
old_val),
WASM_SET_LOCAL(
simd, WASM_SIMD_F32x4_REPLACE_LANE(1, WASM_GET_LOCAL(simd),
WASM_GET_LOCAL(new_val))),
WASM_SIMD_CHECK4(F32x4, simd, F32, new_val, new_val, old_val,
old_val),
WASM_SET_LOCAL(
simd, WASM_SIMD_F32x4_REPLACE_LANE(2, WASM_GET_LOCAL(simd),
WASM_GET_LOCAL(new_val))),
WASM_SIMD_CHECK4(F32x4, simd, F32, new_val, new_val, new_val,
old_val),
WASM_SET_LOCAL(
simd, WASM_SIMD_F32x4_REPLACE_LANE(3, WASM_GET_LOCAL(simd),
WASM_GET_LOCAL(new_val))),
WASM_SIMD_CHECK_SPLAT4(F32x4, simd, F32, new_val),
WASM_RETURN1(WASM_ONE)));
CHECK_EQ(1, r.Call(3.14159, -1.5));
}
// Tests both signed and unsigned conversion.
WASM_EXEC_TEST(F32x4FromInt32x4) {
FLAG_wasm_simd_prototype = true;
WasmRunner<int32_t, int32_t, float, float> r(kExecuteCompiled);
byte a = 0;
byte expected_signed = 1;
byte expected_unsigned = 2;
byte simd0 = r.AllocateLocal(kAstS128);
byte simd1 = r.AllocateLocal(kAstS128);
byte simd2 = r.AllocateLocal(kAstS128);
BUILD(r, WASM_BLOCK(
WASM_SET_LOCAL(simd0, WASM_SIMD_I32x4_SPLAT(WASM_GET_LOCAL(a))),
WASM_SET_LOCAL(
simd1, WASM_SIMD_F32x4_FROM_I32x4(WASM_GET_LOCAL(simd0))),
WASM_SIMD_CHECK_SPLAT4_F32(F32x4, simd1, expected_signed),
WASM_SET_LOCAL(
simd2, WASM_SIMD_F32x4_FROM_U32x4(WASM_GET_LOCAL(simd0))),
WASM_SIMD_CHECK_SPLAT4_F32(F32x4, simd2, expected_unsigned),
WASM_RETURN1(WASM_ONE)));
FOR_INT32_INPUTS(i) {
CHECK_EQ(1, r.Call(*i, static_cast<float>(*i),
static_cast<float>(static_cast<uint32_t>(*i))));
}
}
WASM_EXEC_TEST(S32x4Select) {
FLAG_wasm_simd_prototype = true;
WasmRunner<int32_t, int32_t, int32_t> r(kExecuteCompiled);
byte val1 = 0;
byte val2 = 1;
byte mask = r.AllocateLocal(kAstS128);
byte src1 = r.AllocateLocal(kAstS128);
byte src2 = r.AllocateLocal(kAstS128);
BUILD(r,
WASM_BLOCK(
WASM_SET_LOCAL(mask, WASM_SIMD_I32x4_SPLAT(WASM_ZERO)),
WASM_SET_LOCAL(src1, WASM_SIMD_I32x4_SPLAT(WASM_GET_LOCAL(val1))),
WASM_SET_LOCAL(src2, WASM_SIMD_I32x4_SPLAT(WASM_GET_LOCAL(val2))),
WASM_SET_LOCAL(mask, WASM_SIMD_I32x4_REPLACE_LANE(
1, WASM_GET_LOCAL(mask), WASM_I32V(-1))),
WASM_SET_LOCAL(mask, WASM_SIMD_I32x4_REPLACE_LANE(
2, WASM_GET_LOCAL(mask), WASM_I32V(-1))),
WASM_SET_LOCAL(mask, WASM_SIMD_S32x4_SELECT(WASM_GET_LOCAL(mask),
WASM_GET_LOCAL(src1),
WASM_GET_LOCAL(src2))),
WASM_SIMD_CHECK_LANE(I32x4, mask, I32, val2, 0),
WASM_SIMD_CHECK_LANE(I32x4, mask, I32, val1, 1),
WASM_SIMD_CHECK_LANE(I32x4, mask, I32, val1, 2),
WASM_SIMD_CHECK_LANE(I32x4, mask, I32, val2, 3),
WASM_RETURN1(WASM_ONE)));
CHECK_EQ(1, r.Call(0x1234, 0x5678));
}
static void RunF32x4BinopTest(WasmOpcode simd_op, FloatBinOp expected_op) {
FLAG_wasm_simd_prototype = true;
WasmRunner<int32_t, float, float, float> r(kExecuteCompiled);
byte a = 0;
byte b = 1;
byte expected = 2;
byte simd0 = r.AllocateLocal(kAstS128);
byte simd1 = r.AllocateLocal(kAstS128);
BUILD(r, WASM_BLOCK(
WASM_SET_LOCAL(simd0, WASM_SIMD_F32x4_SPLAT(WASM_GET_LOCAL(a))),
WASM_SET_LOCAL(simd1, WASM_SIMD_F32x4_SPLAT(WASM_GET_LOCAL(b))),
WASM_SET_LOCAL(simd1, WASM_SIMD_BINOP(simd_op & 0xffu,
WASM_GET_LOCAL(simd0),
WASM_GET_LOCAL(simd1))),
WASM_SIMD_CHECK_SPLAT4_F32(F32x4, simd1, expected),
WASM_RETURN1(WASM_ONE)));
FOR_FLOAT32_INPUTS(i) {
if (std::isnan(*i)) continue;
FOR_FLOAT32_INPUTS(j) {
if (std::isnan(*j)) continue;
CHECK_EQ(1, r.Call(*i, *j, expected_op(*i, *j)));
}
}
}
WASM_EXEC_TEST(F32x4Add) { RunF32x4BinopTest(kExprF32x4Add, Add); }
WASM_EXEC_TEST(F32x4Sub) { RunF32x4BinopTest(kExprF32x4Sub, Sub); }
#endif // V8_TARGET_ARCH_ARM
WASM_EXEC_TEST(I32x4Splat) {
FLAG_wasm_simd_prototype = true;
@ -89,7 +267,77 @@ WASM_EXEC_TEST(I32x4ReplaceLane) {
CHECK_EQ(1, r.Call(1, 2));
}
WASM_EXEC_TEST(I32x4Add) {
#if V8_TARGET_ARCH_ARM
// Determines if conversion from float to int will be valid.
bool CanRoundToZeroAndConvert(double val, bool unsigned_integer) {
const double max_uint = static_cast<double>(0xffffffffu);
const double max_int = static_cast<double>(kMaxInt);
const double min_int = static_cast<double>(kMinInt);
// Check for NaN.
if (val != val) {
return false;
}
// Round to zero and check for overflow. This code works because 32 bit
// integers can be exactly represented by ieee-754 64bit floating-point
// values.
return unsigned_integer ? (val < (max_uint + 1.0)) && (val > -1)
: (val < (max_int + 1.0)) && (val > (min_int - 1.0));
}
int ConvertInvalidValue(double val, bool unsigned_integer) {
if (val != val) {
return 0;
} else {
if (unsigned_integer) {
return (val < 0) ? 0 : 0xffffffffu;
} else {
return (val < 0) ? kMinInt : kMaxInt;
}
}
}
int32_t ConvertToInt(double val, bool unsigned_integer) {
int32_t result =
unsigned_integer ? static_cast<uint32_t>(val) : static_cast<int32_t>(val);
if (!CanRoundToZeroAndConvert(val, unsigned_integer)) {
result = ConvertInvalidValue(val, unsigned_integer);
}
return result;
}
// Tests both signed and unsigned conversion.
WASM_EXEC_TEST(I32x4FromFloat32x4) {
FLAG_wasm_simd_prototype = true;
WasmRunner<int32_t, float, int32_t, int32_t> r(kExecuteCompiled);
byte a = 0;
byte expected_signed = 1;
byte expected_unsigned = 2;
byte simd0 = r.AllocateLocal(kAstS128);
byte simd1 = r.AllocateLocal(kAstS128);
byte simd2 = r.AllocateLocal(kAstS128);
BUILD(r, WASM_BLOCK(
WASM_SET_LOCAL(simd0, WASM_SIMD_F32x4_SPLAT(WASM_GET_LOCAL(a))),
WASM_SET_LOCAL(
simd1, WASM_SIMD_I32x4_FROM_F32x4(WASM_GET_LOCAL(simd0))),
WASM_SIMD_CHECK_SPLAT4(I32x4, simd1, I32, expected_signed),
WASM_SET_LOCAL(
simd2, WASM_SIMD_U32x4_FROM_F32x4(WASM_GET_LOCAL(simd0))),
WASM_SIMD_CHECK_SPLAT4(I32x4, simd2, I32, expected_unsigned),
WASM_RETURN1(WASM_ONE)));
FOR_FLOAT32_INPUTS(i) {
int32_t signed_value = ConvertToInt(*i, false);
int32_t unsigned_value = ConvertToInt(*i, true);
CHECK_EQ(1, r.Call(*i, signed_value, unsigned_value));
}
}
#endif // V8_TARGET_ARCH_ARM
static void RunI32x4BinopTest(WasmOpcode simd_op, Int32BinOp expected_op) {
FLAG_wasm_simd_prototype = true;
WasmRunner<int32_t, int32_t, int32_t, int32_t> r(kExecuteCompiled);
byte a = 0;
@ -97,38 +345,26 @@ WASM_EXEC_TEST(I32x4Add) {
byte expected = 2;
byte simd0 = r.AllocateLocal(kAstS128);
byte simd1 = r.AllocateLocal(kAstS128);
BUILD(r,
WASM_BLOCK(
WASM_SET_LOCAL(simd0, WASM_SIMD_I32x4_SPLAT(WASM_GET_LOCAL(a))),
WASM_SET_LOCAL(simd1, WASM_SIMD_I32x4_SPLAT(WASM_GET_LOCAL(b))),
WASM_SET_LOCAL(simd1, WASM_SIMD_I32x4_ADD(WASM_GET_LOCAL(simd0),
WASM_GET_LOCAL(simd1))),
WASM_SIMD_CHECK_SPLAT4(I32x4, simd1, I32, expected),
WASM_RETURN1(WASM_ONE)));
BUILD(r, WASM_BLOCK(
WASM_SET_LOCAL(simd0, WASM_SIMD_I32x4_SPLAT(WASM_GET_LOCAL(a))),
WASM_SET_LOCAL(simd1, WASM_SIMD_I32x4_SPLAT(WASM_GET_LOCAL(b))),
WASM_SET_LOCAL(simd1, WASM_SIMD_BINOP(simd_op & 0xffu,
WASM_GET_LOCAL(simd0),
WASM_GET_LOCAL(simd1))),
WASM_SIMD_CHECK_SPLAT4(I32x4, simd1, I32, expected),
WASM_RETURN1(WASM_ONE)));
FOR_INT32_INPUTS(i) {
FOR_INT32_INPUTS(j) { CHECK_EQ(1, r.Call(*i, *j, *i + *j)); }
FOR_INT32_INPUTS(j) { CHECK_EQ(1, r.Call(*i, *j, expected_op(*i, *j))); }
}
}
WASM_EXEC_TEST(I32x4Sub) {
FLAG_wasm_simd_prototype = true;
WasmRunner<int32_t, int32_t, int32_t, int32_t> r(kExecuteCompiled);
byte a = 0;
byte b = 1;
byte expected = 2;
byte simd0 = r.AllocateLocal(kAstS128);
byte simd1 = r.AllocateLocal(kAstS128);
BUILD(r,
WASM_BLOCK(
WASM_SET_LOCAL(simd0, WASM_SIMD_I32x4_SPLAT(WASM_GET_LOCAL(a))),
WASM_SET_LOCAL(simd1, WASM_SIMD_I32x4_SPLAT(WASM_GET_LOCAL(b))),
WASM_SET_LOCAL(simd1, WASM_SIMD_I32x4_SUB(WASM_GET_LOCAL(simd0),
WASM_GET_LOCAL(simd1))),
WASM_SIMD_CHECK_SPLAT4(I32x4, simd1, I32, expected),
WASM_RETURN1(WASM_ONE)));
WASM_EXEC_TEST(I32x4Add) { RunI32x4BinopTest(kExprI32x4Add, Add); }
FOR_INT32_INPUTS(i) {
FOR_INT32_INPUTS(j) { CHECK_EQ(1, r.Call(*i, *j, *i - *j)); }
}
}
WASM_EXEC_TEST(I32x4Sub) { RunI32x4BinopTest(kExprI32x4Sub, Sub); }
#if V8_TARGET_ARCH_ARM
WASM_EXEC_TEST(I32x4Equal) { RunI32x4BinopTest(kExprI32x4Eq, Equal); }
WASM_EXEC_TEST(I32x4NotEqual) { RunI32x4BinopTest(kExprI32x4Ne, NotEqual); }
#endif // V8_TARGET_ARCH_ARM