s390: [wasm-simd] Implement F64x2 simd operations

Also adding to and modifying some of the F32x4 operations.

Change-Id: Ia57dcd70a3bad2f1ec4ccc64ff2cb02b9c83aa22
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2081832
Reviewed-by: Junliang Yan <jyan@ca.ibm.com>
Commit-Queue: Milad Farazmand <miladfar@ca.ibm.com>
Cr-Commit-Position: refs/heads/master@{#66539}
This commit is contained in:
Milad Farazmand 2020-02-29 18:47:13 +00:00 committed by Commit Bot
parent ae340e19fe
commit 40406eb649
4 changed files with 192 additions and 52 deletions

View File

@ -2900,6 +2900,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
ASSEMBLE_ATOMIC64_COMP_EXCHANGE_WORD64();
break;
// vector replicate element
case kS390_F64x2Splat: {
__ vrep(i.OutputSimd128Register(), i.InputDoubleRegister(0), Operand(0),
Condition(3));
break;
}
case kS390_F32x4Splat: {
#ifdef V8_TARGET_BIG_ENDIAN
__ vrep(i.OutputSimd128Register(), i.InputDoubleRegister(0), Operand(0),
@ -2929,6 +2934,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
// vector extract element
case kS390_F64x2ExtractLane: {
__ vrep(i.OutputDoubleRegister(), i.InputSimd128Register(0),
Operand(1 - i.InputInt8(1)), Condition(3));
break;
}
case kS390_F32x4ExtractLane: {
__ vrep(i.OutputDoubleRegister(), i.InputSimd128Register(0),
Operand(3 - i.InputInt8(1)), Condition(2));
@ -2945,8 +2955,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kS390_I16x8ExtractLaneS: {
__ vlgv(i.OutputRegister(), i.InputSimd128Register(0),
__ vlgv(kScratchReg, i.InputSimd128Register(0),
MemOperand(r0, 7 - i.InputInt8(1)), Condition(1));
__ lghr(i.OutputRegister(), kScratchReg);
break;
}
case kS390_I8x16ExtractLaneU: {
@ -2955,21 +2966,32 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kS390_I8x16ExtractLaneS: {
__ vlgv(i.OutputRegister(), i.InputSimd128Register(0),
__ vlgv(kScratchReg, i.InputSimd128Register(0),
MemOperand(r0, 15 - i.InputInt8(1)), Condition(0));
__ lgbr(i.OutputRegister(), kScratchReg);
break;
}
// vector replace element
case kS390_F64x2ReplaceLane: {
Simd128Register src = i.InputSimd128Register(0);
Simd128Register dst = i.OutputSimd128Register();
__ vlr(kScratchDoubleReg, src, Condition(0), Condition(0), Condition(0));
__ vlgv(kScratchReg, i.InputDoubleRegister(2), MemOperand(r0, 0),
Condition(3));
__ vlvg(kScratchDoubleReg, kScratchReg,
MemOperand(r0, 1 - i.InputInt8(1)), Condition(3));
__ vlr(dst, kScratchDoubleReg, Condition(0), Condition(0), Condition(0));
break;
}
case kS390_F32x4ReplaceLane: {
Simd128Register src = i.InputSimd128Register(0);
Simd128Register dst = i.OutputSimd128Register();
if (src != dst) {
__ vlr(dst, src, Condition(0), Condition(0), Condition(0));
}
__ lgdr(kScratchReg, i.InputDoubleRegister(2));
__ srlg(kScratchReg, kScratchReg, Operand(32));
__ vlvg(i.OutputSimd128Register(), kScratchReg,
__ vlr(kScratchDoubleReg, src, Condition(0), Condition(0), Condition(0));
__ vlgv(kScratchReg, i.InputDoubleRegister(2), MemOperand(r0, 0),
Condition(2));
__ vlvg(kScratchDoubleReg, kScratchReg,
MemOperand(r0, 3 - i.InputInt8(1)), Condition(2));
__ vlr(dst, kScratchDoubleReg, Condition(0), Condition(0), Condition(0));
break;
}
case kS390_I32x4ReplaceLane: {
@ -3003,6 +3025,42 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
// vector binops
case kS390_F64x2Add: {
__ vfa(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1), Condition(0), Condition(0),
Condition(3));
break;
}
case kS390_F64x2Sub: {
__ vfs(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1), Condition(0), Condition(0),
Condition(3));
break;
}
case kS390_F64x2Mul: {
__ vfm(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1), Condition(0), Condition(0),
Condition(3));
break;
}
case kS390_F64x2Div: {
__ vfd(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1), Condition(0), Condition(0),
Condition(3));
break;
}
case kS390_F64x2Min: {
__ vfmin(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1), Condition(1), Condition(0),
Condition(3));
break;
}
case kS390_F64x2Max: {
__ vfmax(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1), Condition(1), Condition(0),
Condition(3));
break;
}
case kS390_F32x4Add: {
__ vfa(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1), Condition(0), Condition(0),
@ -3040,6 +3098,24 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
Condition(2));
break;
}
case kS390_F32x4Div: {
__ vfd(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1), Condition(0), Condition(0),
Condition(2));
break;
}
case kS390_F32x4Min: {
__ vfmin(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1), Condition(1), Condition(0),
Condition(2));
break;
}
case kS390_F32x4Max: {
__ vfmax(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1), Condition(1), Condition(0),
Condition(2));
break;
}
case kS390_I32x4Add: {
__ va(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1), Condition(0), Condition(0),
@ -3123,6 +3199,32 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
// vector comparisons
case kS390_F64x2Eq: {
__ vfce(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1), Condition(0), Condition(0),
Condition(3));
break;
}
case kS390_F64x2Ne: {
__ vfce(kScratchDoubleReg, i.InputSimd128Register(0),
i.InputSimd128Register(1), Condition(0), Condition(0),
Condition(3));
__ vno(i.OutputSimd128Register(), kScratchDoubleReg, kScratchDoubleReg,
Condition(0), Condition(0), Condition(3));
break;
}
case kS390_F64x2Le: {
__ vfche(i.OutputSimd128Register(), i.InputSimd128Register(1),
i.InputSimd128Register(0), Condition(0), Condition(0),
Condition(3));
break;
}
case kS390_F64x2Lt: {
__ vfch(i.OutputSimd128Register(), i.InputSimd128Register(1),
i.InputSimd128Register(0), Condition(0), Condition(0),
Condition(3));
break;
}
case kS390_I32x4MinS: {
__ vmn(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1), Condition(0), Condition(0),
@ -3217,12 +3319,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kS390_F32x4Ne: {
__ vfce(i.OutputSimd128Register(), i.InputSimd128Register(0),
__ vfce(kScratchDoubleReg, i.InputSimd128Register(0),
i.InputSimd128Register(1), Condition(0), Condition(0),
Condition(2));
__ vno(i.OutputSimd128Register(), i.OutputSimd128Register(),
i.OutputSimd128Register(), Condition(0), Condition(0),
Condition(2));
__ vno(i.OutputSimd128Register(), kScratchDoubleReg, kScratchDoubleReg,
Condition(0), Condition(0), Condition(2));
break;
}
case kS390_I32x4Ne: {
@ -3392,6 +3493,21 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
// vector unary ops
case kS390_F64x2Abs: {
__ vfpso(i.OutputSimd128Register(), i.InputSimd128Register(0),
Condition(2), Condition(0), Condition(3));
break;
}
case kS390_F64x2Neg: {
__ vfpso(i.OutputSimd128Register(), i.InputSimd128Register(0),
Condition(0), Condition(0), Condition(3));
break;
}
case kS390_F64x2Sqrt: {
__ vfsq(i.OutputSimd128Register(), i.InputSimd128Register(0),
Condition(0), Condition(0), Condition(3));
break;
}
case kS390_F32x4Abs: {
__ vfpso(i.OutputSimd128Register(), i.InputSimd128Register(0),
Condition(2), Condition(0), Condition(2));
@ -3437,6 +3553,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
Condition(0), Condition(0), Condition(2));
break;
}
case kS390_F32x4Sqrt: {
__ vfsq(i.OutputSimd128Register(), i.InputSimd128Register(0),
Condition(0), Condition(0), Condition(2));
break;
}
case kS390_S128Not: {
Simd128Register src = i.InputSimd128Register(0);
Simd128Register dst = i.OutputSimd128Register();

View File

@ -197,6 +197,22 @@ namespace compiler {
V(S390_Word64AtomicXorUint16) \
V(S390_Word64AtomicXorUint32) \
V(S390_Word64AtomicXorUint64) \
V(S390_F64x2Splat) \
V(S390_F64x2ReplaceLane) \
V(S390_F64x2Abs) \
V(S390_F64x2Neg) \
V(S390_F64x2Sqrt) \
V(S390_F64x2Add) \
V(S390_F64x2Sub) \
V(S390_F64x2Mul) \
V(S390_F64x2Div) \
V(S390_F64x2Eq) \
V(S390_F64x2Ne) \
V(S390_F64x2Lt) \
V(S390_F64x2Le) \
V(S390_F64x2Min) \
V(S390_F64x2Max) \
V(S390_F64x2ExtractLane) \
V(S390_F32x4Splat) \
V(S390_F32x4ExtractLane) \
V(S390_F32x4ReplaceLane) \
@ -214,6 +230,10 @@ namespace compiler {
V(S390_F32x4RecipSqrtApprox) \
V(S390_F32x4SConvertI32x4) \
V(S390_F32x4UConvertI32x4) \
V(S390_F32x4Sqrt) \
V(S390_F32x4Div) \
V(S390_F32x4Min) \
V(S390_F32x4Max) \
V(S390_I32x4Splat) \
V(S390_I32x4ExtractLane) \
V(S390_I32x4ReplaceLane) \

View File

@ -143,6 +143,22 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kS390_CompressSigned:
case kS390_CompressPointer:
case kS390_CompressAny:
case kS390_F64x2Splat:
case kS390_F64x2ReplaceLane:
case kS390_F64x2Abs:
case kS390_F64x2Neg:
case kS390_F64x2Sqrt:
case kS390_F64x2Add:
case kS390_F64x2Sub:
case kS390_F64x2Mul:
case kS390_F64x2Div:
case kS390_F64x2Eq:
case kS390_F64x2Ne:
case kS390_F64x2Lt:
case kS390_F64x2Le:
case kS390_F64x2Min:
case kS390_F64x2Max:
case kS390_F64x2ExtractLane:
case kS390_F32x4Splat:
case kS390_F32x4ExtractLane:
case kS390_F32x4ReplaceLane:
@ -160,6 +176,10 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kS390_F32x4RecipSqrtApprox:
case kS390_F32x4SConvertI32x4:
case kS390_F32x4UConvertI32x4:
case kS390_F32x4Sqrt:
case kS390_F32x4Div:
case kS390_F32x4Min:
case kS390_F32x4Max:
case kS390_I32x4Splat:
case kS390_I32x4ExtractLane:
case kS390_I32x4ReplaceLane:

View File

@ -2519,12 +2519,23 @@ void InstructionSelector::VisitWord64AtomicStore(Node* node) {
}
#define SIMD_TYPES(V) \
V(F64x2) \
V(F32x4) \
V(I32x4) \
V(I16x8) \
V(I8x16)
#define SIMD_BINOP_LIST(V) \
V(F64x2Add) \
V(F64x2Sub) \
V(F64x2Mul) \
V(F64x2Div) \
V(F64x2Eq) \
V(F64x2Ne) \
V(F64x2Lt) \
V(F64x2Le) \
V(F64x2Min) \
V(F64x2Max) \
V(F32x4Add) \
V(F32x4AddHoriz) \
V(F32x4Sub) \
@ -2533,6 +2544,9 @@ void InstructionSelector::VisitWord64AtomicStore(Node* node) {
V(F32x4Ne) \
V(F32x4Lt) \
V(F32x4Le) \
V(F32x4Div) \
V(F32x4Min) \
V(F32x4Max) \
V(I32x4Add) \
V(I32x4AddHoriz) \
V(I32x4Sub) \
@ -2591,10 +2605,14 @@ void InstructionSelector::VisitWord64AtomicStore(Node* node) {
V(S128Xor)
#define SIMD_UNOP_LIST(V) \
V(F64x2Abs) \
V(F64x2Neg) \
V(F64x2Sqrt) \
V(F32x4Abs) \
V(F32x4Neg) \
V(F32x4RecipApprox) \
V(F32x4RecipSqrtApprox) \
V(F32x4Sqrt) \
V(I32x4Neg) \
V(I32x4SConvertI16x8Low) \
V(I32x4SConvertI16x8High) \
@ -2649,6 +2667,7 @@ SIMD_TYPES(SIMD_VISIT_SPLAT)
Emit(kS390_##Type##ExtractLane##Sign, g.DefineAsRegister(node), \
g.UseRegister(node->InputAt(0)), g.UseImmediate(lane)); \
}
SIMD_VISIT_EXTRACT_LANE(F64x2, )
SIMD_VISIT_EXTRACT_LANE(F32x4, )
SIMD_VISIT_EXTRACT_LANE(I32x4, )
SIMD_VISIT_EXTRACT_LANE(I16x8, U)
@ -2798,42 +2817,8 @@ void InstructionSelector::EmitPrepareResults(
}
}
void InstructionSelector::VisitF32x4Sqrt(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4Div(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4Min(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4Max(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitS8x16Swizzle(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Splat(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2ReplaceLane(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Abs(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Neg(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Sqrt(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Add(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Sub(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Mul(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Div(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Eq(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Ne(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Lt(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Le(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2Neg(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2Add(Node* node) { UNIMPLEMENTED(); }
@ -2848,12 +2833,6 @@ void InstructionSelector::VisitI64x2ShrU(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2Mul(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Min(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Max(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2ExtractLane(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitLoadTransform(Node* node) { UNIMPLEMENTED(); }
// static