[mips][wasm-simd] Implement f32x4 f64x2 pmin pmax

This CL also fixes bitmask instructions on mips platform.

Change-Id: I550daca3b6b4ece151928836f316d3960a7af437
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2230090
Commit-Queue: Zhao Jiazhong <zhaojiazhong-hf@loongson.cn>
Reviewed-by: Deepti Gandluri <gdeepti@chromium.org>
Cr-Commit-Position: refs/heads/master@{#68266}
This commit is contained in:
Zhao Jiazhong 2020-06-07 22:29:17 -04:00 committed by Commit Bot
parent d5ba4e5483
commit 2ef37fb675
10 changed files with 158 additions and 8 deletions

View File

@ -2661,7 +2661,8 @@ void InstructionSelector::VisitI64x2MaxU(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_S390X
#if !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_IA32 && \
!V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_S390X
!V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_S390X && !V8_TARGET_ARCH_MIPS && \
!V8_TARGET_ARCH_MIPS64
// TODO(v8:10308) Bitmask operations are in prototype now, we can remove these
// guards when they go into the proposal.
void InstructionSelector::VisitI8x16BitMask(Node* node) { UNIMPLEMENTED(); }
@ -2673,7 +2674,8 @@ void InstructionSelector::VisitF32x4Pmax(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Pmin(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Pmax(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_IA32
// && !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_S390X
// && !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_S390X &&
// !V8_TARGET_ARCH_MIPS && !V8_TARGET_ARCH_MIPS64
#if !V8_TARGET_ARCH_X64
// TODO(v8:10553) Prototyping floating point rounding instructions.

View File

@ -2159,6 +2159,26 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ insert_w(dst, i.InputInt8(1) * 2 + 1, kScratchReg);
break;
}
case kMipsF64x2Pmin: {
CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
Simd128Register dst = i.OutputSimd128Register();
Simd128Register lhs = i.InputSimd128Register(0);
Simd128Register rhs = i.InputSimd128Register(1);
// dst = rhs < lhs ? rhs : lhs
__ fclt_d(dst, rhs, lhs);
__ bsel_v(dst, lhs, rhs);
break;
}
case kMipsF64x2Pmax: {
CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
Simd128Register dst = i.OutputSimd128Register();
Simd128Register lhs = i.InputSimd128Register(0);
Simd128Register rhs = i.InputSimd128Register(1);
// dst = lhs < rhs ? rhs : lhs
__ fclt_d(dst, lhs, rhs);
__ bsel_v(dst, lhs, rhs);
break;
}
case kMipsI64x2Add: {
CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
__ addv_d(i.OutputSimd128Register(), i.InputSimd128Register(0),
@ -2395,6 +2415,26 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputSimd128Register(1));
break;
}
case kMipsF32x4Pmin: {
CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
Simd128Register dst = i.OutputSimd128Register();
Simd128Register lhs = i.InputSimd128Register(0);
Simd128Register rhs = i.InputSimd128Register(1);
// dst = rhs < lhs ? rhs : lhs
__ fclt_w(dst, rhs, lhs);
__ bsel_v(dst, lhs, rhs);
break;
}
case kMipsF32x4Pmax: {
CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
Simd128Register dst = i.OutputSimd128Register();
Simd128Register lhs = i.InputSimd128Register(0);
Simd128Register rhs = i.InputSimd128Register(1);
// dst = lhs < rhs ? rhs : lhs
__ fclt_w(dst, lhs, rhs);
__ bsel_v(dst, lhs, rhs);
break;
}
case kMipsI32x4SConvertF32x4: {
CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
__ ftrunc_s_w(i.OutputSimd128Register(), i.InputSimd128Register(0));

View File

@ -155,6 +155,8 @@ namespace compiler {
V(MipsF64x2Ne) \
V(MipsF64x2Lt) \
V(MipsF64x2Le) \
V(MipsF64x2Pmin) \
V(MipsF64x2Pmax) \
V(MipsI64x2Add) \
V(MipsI64x2Sub) \
V(MipsI64x2Mul) \
@ -196,6 +198,8 @@ namespace compiler {
V(MipsF32x4Ne) \
V(MipsF32x4Lt) \
V(MipsF32x4Le) \
V(MipsF32x4Pmin) \
V(MipsF32x4Pmax) \
V(MipsI32x4SConvertF32x4) \
V(MipsI32x4UConvertF32x4) \
V(MipsI32x4Neg) \

View File

@ -57,6 +57,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kMipsF64x2Splat:
case kMipsF64x2ExtractLane:
case kMipsF64x2ReplaceLane:
case kMipsF64x2Pmin:
case kMipsF64x2Pmax:
case kMipsI64x2Add:
case kMipsI64x2Sub:
case kMipsI64x2Mul:
@ -85,6 +87,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kMipsF32x4Splat:
case kMipsF32x4Sub:
case kMipsF32x4UConvertI32x4:
case kMipsF32x4Pmin:
case kMipsF32x4Pmax:
case kMipsFloat32Max:
case kMipsFloat32Min:
case kMipsFloat32RoundDown:

View File

@ -113,6 +113,14 @@ static void VisitRRR(InstructionSelector* selector, ArchOpcode opcode,
g.UseRegister(node->InputAt(1)));
}
static void VisitUniqueRRR(InstructionSelector* selector, ArchOpcode opcode,
Node* node) {
MipsOperandGenerator g(selector);
selector->Emit(opcode, g.DefineAsRegister(node),
g.UseUniqueRegister(node->InputAt(0)),
g.UseUniqueRegister(node->InputAt(1)));
}
void VisitRRRR(InstructionSelector* selector, ArchOpcode opcode, Node* node) {
MipsOperandGenerator g(selector);
selector->Emit(
@ -2409,6 +2417,22 @@ void InstructionSelector::VisitSignExtendWord16ToInt32(Node* node) {
Emit(kMipsSeh, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0)));
}
void InstructionSelector::VisitF32x4Pmin(Node* node) {
VisitUniqueRRR(this, kMipsF32x4Pmin, node);
}
void InstructionSelector::VisitF32x4Pmax(Node* node) {
VisitUniqueRRR(this, kMipsF32x4Pmax, node);
}
void InstructionSelector::VisitF64x2Pmin(Node* node) {
VisitUniqueRRR(this, kMipsF64x2Pmin, node);
}
void InstructionSelector::VisitF64x2Pmax(Node* node) {
VisitUniqueRRR(this, kMipsF64x2Pmax, node);
}
// static
MachineOperatorBuilder::Flags
InstructionSelector::SupportedMachineOperatorFlags() {

View File

@ -2265,6 +2265,26 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputInt8(1));
break;
}
case kMips64F64x2Pmin: {
CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
Simd128Register dst = i.OutputSimd128Register();
Simd128Register lhs = i.InputSimd128Register(0);
Simd128Register rhs = i.InputSimd128Register(1);
// dst = rhs < lhs ? rhs : lhs
__ fclt_d(dst, rhs, lhs);
__ bsel_v(dst, lhs, rhs);
break;
}
case kMips64F64x2Pmax: {
CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
Simd128Register dst = i.OutputSimd128Register();
Simd128Register lhs = i.InputSimd128Register(0);
Simd128Register rhs = i.InputSimd128Register(1);
// dst = lhs < rhs ? rhs : lhs
__ fclt_d(dst, lhs, rhs);
__ bsel_v(dst, lhs, rhs);
break;
}
case kMips64I64x2ReplaceLane: {
CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
Simd128Register src = i.InputSimd128Register(0);
@ -2581,6 +2601,26 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputSimd128Register(1));
break;
}
case kMips64F32x4Pmin: {
CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
Simd128Register dst = i.OutputSimd128Register();
Simd128Register lhs = i.InputSimd128Register(0);
Simd128Register rhs = i.InputSimd128Register(1);
// dst = rhs < lhs ? rhs : lhs
__ fclt_w(dst, rhs, lhs);
__ bsel_v(dst, lhs, rhs);
break;
}
case kMips64F32x4Pmax: {
CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
Simd128Register dst = i.OutputSimd128Register();
Simd128Register lhs = i.InputSimd128Register(0);
Simd128Register rhs = i.InputSimd128Register(1);
// dst = lhs < rhs ? rhs : lhs
__ fclt_w(dst, lhs, rhs);
__ bsel_v(dst, lhs, rhs);
break;
}
case kMips64I32x4SConvertF32x4: {
CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
__ ftrunc_s_w(i.OutputSimd128Register(), i.InputSimd128Register(0));

View File

@ -203,6 +203,8 @@ namespace compiler {
V(Mips64F64x2Splat) \
V(Mips64F64x2ExtractLane) \
V(Mips64F64x2ReplaceLane) \
V(Mips64F64x2Pmin) \
V(Mips64F64x2Pmax) \
V(Mips64I64x2Splat) \
V(Mips64I64x2ExtractLane) \
V(Mips64I64x2ReplaceLane) \
@ -229,6 +231,8 @@ namespace compiler {
V(Mips64F32x4Ne) \
V(Mips64F32x4Lt) \
V(Mips64F32x4Le) \
V(Mips64F32x4Pmin) \
V(Mips64F32x4Pmax) \
V(Mips64I32x4SConvertF32x4) \
V(Mips64I32x4UConvertF32x4) \
V(Mips64I32x4Neg) \

View File

@ -82,6 +82,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kMips64F64x2Ne:
case kMips64F64x2Lt:
case kMips64F64x2Le:
case kMips64F64x2Pmin:
case kMips64F64x2Pmax:
case kMips64I64x2Splat:
case kMips64I64x2ExtractLane:
case kMips64I64x2ReplaceLane:
@ -113,6 +115,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kMips64F32x4Splat:
case kMips64F32x4Sub:
case kMips64F32x4UConvertI32x4:
case kMips64F32x4Pmin:
case kMips64F32x4Pmax:
case kMips64F64x2Splat:
case kMips64F64x2ExtractLane:
case kMips64F64x2ReplaceLane:

View File

@ -163,6 +163,14 @@ static void VisitRRR(InstructionSelector* selector, ArchOpcode opcode,
g.UseRegister(node->InputAt(1)));
}
static void VisitUniqueRRR(InstructionSelector* selector, ArchOpcode opcode,
Node* node) {
Mips64OperandGenerator g(selector);
selector->Emit(opcode, g.DefineAsRegister(node),
g.UseUniqueRegister(node->InputAt(0)),
g.UseUniqueRegister(node->InputAt(1)));
}
void VisitRRRR(InstructionSelector* selector, ArchOpcode opcode, Node* node) {
Mips64OperandGenerator g(selector);
selector->Emit(
@ -3102,6 +3110,22 @@ void InstructionSelector::VisitSignExtendWord32ToInt64(Node* node) {
g.TempImmediate(0));
}
void InstructionSelector::VisitF32x4Pmin(Node* node) {
VisitUniqueRRR(this, kMips64F32x4Pmin, node);
}
void InstructionSelector::VisitF32x4Pmax(Node* node) {
VisitUniqueRRR(this, kMips64F32x4Pmax, node);
}
void InstructionSelector::VisitF64x2Pmin(Node* node) {
VisitUniqueRRR(this, kMips64F64x2Pmin, node);
}
void InstructionSelector::VisitF64x2Pmax(Node* node) {
VisitUniqueRRR(this, kMips64F64x2Pmax, node);
}
// static
MachineOperatorBuilder::Flags
InstructionSelector::SupportedMachineOperatorFlags() {

View File

@ -783,7 +783,8 @@ WASM_SIMD_TEST(F32x4Max) {
// TODO(v8:10501) Prototyping pmin and pmax instructions.
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32 || V8_TARGET_ARCH_ARM64 || \
V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_S390X
V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_S390X || V8_TARGET_ARCH_MIPS || \
V8_TARGET_ARCH_MIPS64
WASM_SIMD_TEST_NO_LOWERING(F32x4Pmin) {
FLAG_SCOPE(wasm_simd_post_mvp);
RunF32x4BinOpTest(execution_tier, lower_simd, kExprF32x4Pmin, Minimum);
@ -794,7 +795,8 @@ WASM_SIMD_TEST_NO_LOWERING(F32x4Pmax) {
RunF32x4BinOpTest(execution_tier, lower_simd, kExprF32x4Pmax, Maximum);
}
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32 || V8_TARGET_ARCH_ARM64 ||
// V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_S390X
// V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_S390X || V8_TARGET_ARCH_MIPS ||
// V8_TARGET_ARCH_MIPS64
void RunF32x4CompareOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
WasmOpcode opcode, FloatCompareOp expected_op) {
@ -1388,7 +1390,8 @@ WASM_SIMD_TEST_NO_LOWERING(F64x2Div) {
// TODO(v8:10501) Prototyping pmin and pmax instructions.
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32 || V8_TARGET_ARCH_ARM64 || \
V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_S390X
V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_S390X || V8_TARGET_ARCH_MIPS || \
V8_TARGET_ARCH_MIPS64
WASM_SIMD_TEST_NO_LOWERING(F64x2Pmin) {
FLAG_SCOPE(wasm_simd_post_mvp);
RunF64x2BinOpTest(execution_tier, lower_simd, kExprF64x2Pmin, Minimum);
@ -1399,7 +1402,8 @@ WASM_SIMD_TEST_NO_LOWERING(F64x2Pmax) {
RunF64x2BinOpTest(execution_tier, lower_simd, kExprF64x2Pmax, Maximum);
}
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32 || V8_TARGET_ARCH_ARM64 ||
// V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_S390X
// V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_S390X || V8_TARGET_ARCH_MIPS ||
// V8_TARGET_ARCH_MIPS64
void RunF64x2CompareOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
WasmOpcode opcode, DoubleCompareOp expected_op) {
@ -1646,7 +1650,7 @@ WASM_SIMD_TEST(I16x8ReplaceLane) {
}
#if V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_IA32 || \
V8_TARGET_ARCH_X64
V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_MIPS || V8_TARGET_ARCH_MIPS64
WASM_SIMD_TEST_NO_LOWERING(I8x16BitMask) {
FLAG_SCOPE(wasm_simd_post_mvp);
WasmRunner<int32_t, int32_t> r(execution_tier, lower_simd);
@ -1707,7 +1711,7 @@ WASM_SIMD_TEST_NO_LOWERING(I32x4BitMask) {
}
}
#endif // V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_IA32 ||
// V8_TARGET_ARCH_X64
// V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_MIPS || V8_TARGET_ARCH_MIPS64
WASM_SIMD_TEST(I8x16Splat) {
WasmRunner<int32_t, int32_t> r(execution_tier, lower_simd);