[mips64] Move LoadSplat into macro-assembler

Besides, fix i64x2 widen i32x4 instructions in mips32.

Change-Id: I85e3f8f4ab16c268a5b17189f67c78ef45762e39
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2711737
Reviewed-by: Zhao Jiazhong <zhaojiazhong-hf@loongson.cn>
Commit-Queue: Zhao Jiazhong <zhaojiazhong-hf@loongson.cn>
Auto-Submit: Liu yu <liuyu@loongson.cn>
Cr-Commit-Position: refs/heads/master@{#72907}
This commit is contained in:
LiuYu 2021-02-22 19:32:37 +08:00 committed by Commit Bot
parent 5b783479eb
commit 13c3093b58
7 changed files with 147 additions and 141 deletions

View File

@ -2755,6 +2755,31 @@ void TurboAssembler::ExtMulHigh(MSADataType type, MSARegister dst,
}
#undef EXT_MUL_BINOP
void TurboAssembler::LoadSplat(MSASize sz, MSARegister dst, MemOperand src) {
UseScratchRegisterScope temps(this);
Register scratch = temps.Acquire();
switch (sz) {
case MSA_B:
Lb(scratch, src);
fill_b(dst, scratch);
break;
case MSA_H:
Lh(scratch, src);
fill_h(dst, scratch);
break;
case MSA_W:
Lw(scratch, src);
fill_w(dst, scratch);
break;
case MSA_D:
Ld(scratch, src);
fill_d(dst, scratch);
break;
default:
UNREACHABLE();
}
}
void TurboAssembler::MSARoundW(MSARegister dst, MSARegister src,
FPURoundingMode mode) {
BlockTrampolinePoolScope block_trampoline_pool(this);

View File

@ -802,6 +802,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
MSARegister src2);
void ExtMulHigh(MSADataType type, MSARegister dst, MSARegister src1,
MSARegister src2);
void LoadSplat(MSASize sz, MSARegister dst, MemOperand src);
void MSARoundW(MSARegister dst, MSARegister src, FPURoundingMode mode);
void MSARoundD(MSARegister dst, MSARegister src, FPURoundingMode mode);

View File

@ -2128,6 +2128,10 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
V(F64x2PromoteLowF32x4, kMipsF64x2PromoteLowF32x4) \
V(I64x2Neg, kMipsI64x2Neg) \
V(I64x2BitMask, kMipsI64x2BitMask) \
V(I64x2SConvertI32x4Low, kMipsI64x2SConvertI32x4Low) \
V(I64x2SConvertI32x4High, kMipsI64x2SConvertI32x4High) \
V(I64x2UConvertI32x4Low, kMipsI64x2UConvertI32x4Low) \
V(I64x2UConvertI32x4High, kMipsI64x2UConvertI32x4High) \
V(F32x4SConvertI32x4, kMipsF32x4SConvertI32x4) \
V(F32x4UConvertI32x4, kMipsF32x4UConvertI32x4) \
V(F32x4Abs, kMipsF32x4Abs) \
@ -2180,114 +2184,110 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
V(I8x16ShrS) \
V(I8x16ShrU)
#define SIMD_BINOP_LIST(V) \
V(F64x2Add, kMipsF64x2Add) \
V(F64x2Sub, kMipsF64x2Sub) \
V(F64x2Mul, kMipsF64x2Mul) \
V(F64x2Div, kMipsF64x2Div) \
V(F64x2Min, kMipsF64x2Min) \
V(F64x2Max, kMipsF64x2Max) \
V(F64x2Eq, kMipsF64x2Eq) \
V(F64x2Ne, kMipsF64x2Ne) \
V(F64x2Lt, kMipsF64x2Lt) \
V(F64x2Le, kMipsF64x2Le) \
V(I64x2Eq, kMipsI64x2Eq) \
V(I64x2Ne, kMipsI64x2Ne) \
V(I64x2Add, kMipsI64x2Add) \
V(I64x2Sub, kMipsI64x2Sub) \
V(I64x2Mul, kMipsI64x2Mul) \
V(I64x2GtS, kMipsI64x2GtS) \
V(I64x2GeS, kMipsI64x2GeS) \
V(I64x2SConvertI32x4Low, kMipsI64x2SConvertI32x4Low) \
V(I64x2SConvertI32x4High, kMipsI64x2SConvertI32x4High) \
V(I64x2UConvertI32x4Low, kMipsI64x2UConvertI32x4Low) \
V(I64x2UConvertI32x4High, kMipsI64x2UConvertI32x4High) \
V(I64x2ExtMulLowI32x4S, kMipsI64x2ExtMulLowI32x4S) \
V(I64x2ExtMulHighI32x4S, kMipsI64x2ExtMulHighI32x4S) \
V(I64x2ExtMulLowI32x4U, kMipsI64x2ExtMulLowI32x4U) \
V(I64x2ExtMulHighI32x4U, kMipsI64x2ExtMulHighI32x4U) \
V(F32x4Add, kMipsF32x4Add) \
V(F32x4AddHoriz, kMipsF32x4AddHoriz) \
V(F32x4Sub, kMipsF32x4Sub) \
V(F32x4Mul, kMipsF32x4Mul) \
V(F32x4Div, kMipsF32x4Div) \
V(F32x4Max, kMipsF32x4Max) \
V(F32x4Min, kMipsF32x4Min) \
V(F32x4Eq, kMipsF32x4Eq) \
V(F32x4Ne, kMipsF32x4Ne) \
V(F32x4Lt, kMipsF32x4Lt) \
V(F32x4Le, kMipsF32x4Le) \
V(I32x4Add, kMipsI32x4Add) \
V(I32x4AddHoriz, kMipsI32x4AddHoriz) \
V(I32x4Sub, kMipsI32x4Sub) \
V(I32x4Mul, kMipsI32x4Mul) \
V(I32x4MaxS, kMipsI32x4MaxS) \
V(I32x4MinS, kMipsI32x4MinS) \
V(I32x4MaxU, kMipsI32x4MaxU) \
V(I32x4MinU, kMipsI32x4MinU) \
V(I32x4Eq, kMipsI32x4Eq) \
V(I32x4Ne, kMipsI32x4Ne) \
V(I32x4GtS, kMipsI32x4GtS) \
V(I32x4GeS, kMipsI32x4GeS) \
V(I32x4GtU, kMipsI32x4GtU) \
V(I32x4GeU, kMipsI32x4GeU) \
V(I32x4Abs, kMipsI32x4Abs) \
V(I32x4DotI16x8S, kMipsI32x4DotI16x8S) \
V(I32x4ExtMulLowI16x8S, kMipsI32x4ExtMulLowI16x8S) \
V(I32x4ExtMulHighI16x8S, kMipsI32x4ExtMulHighI16x8S) \
V(I32x4ExtMulLowI16x8U, kMipsI32x4ExtMulLowI16x8U) \
V(I32x4ExtMulHighI16x8U, kMipsI32x4ExtMulHighI16x8U) \
V(I16x8Add, kMipsI16x8Add) \
V(I16x8AddSatS, kMipsI16x8AddSatS) \
V(I16x8AddSatU, kMipsI16x8AddSatU) \
V(I16x8AddHoriz, kMipsI16x8AddHoriz) \
V(I16x8Sub, kMipsI16x8Sub) \
V(I16x8SubSatS, kMipsI16x8SubSatS) \
V(I16x8SubSatU, kMipsI16x8SubSatU) \
V(I16x8Mul, kMipsI16x8Mul) \
V(I16x8MaxS, kMipsI16x8MaxS) \
V(I16x8MinS, kMipsI16x8MinS) \
V(I16x8MaxU, kMipsI16x8MaxU) \
V(I16x8MinU, kMipsI16x8MinU) \
V(I16x8Eq, kMipsI16x8Eq) \
V(I16x8Ne, kMipsI16x8Ne) \
V(I16x8GtS, kMipsI16x8GtS) \
V(I16x8GeS, kMipsI16x8GeS) \
V(I16x8GtU, kMipsI16x8GtU) \
V(I16x8GeU, kMipsI16x8GeU) \
V(I16x8SConvertI32x4, kMipsI16x8SConvertI32x4) \
V(I16x8UConvertI32x4, kMipsI16x8UConvertI32x4) \
V(I16x8Q15MulRSatS, kMipsI16x8Q15MulRSatS) \
V(I16x8ExtMulLowI8x16S, kMipsI16x8ExtMulLowI8x16S) \
V(I16x8ExtMulHighI8x16S, kMipsI16x8ExtMulHighI8x16S) \
V(I16x8ExtMulLowI8x16U, kMipsI16x8ExtMulLowI8x16U) \
V(I16x8ExtMulHighI8x16U, kMipsI16x8ExtMulHighI8x16U) \
V(I16x8RoundingAverageU, kMipsI16x8RoundingAverageU) \
V(I16x8Abs, kMipsI16x8Abs) \
V(I8x16Add, kMipsI8x16Add) \
V(I8x16AddSatS, kMipsI8x16AddSatS) \
V(I8x16AddSatU, kMipsI8x16AddSatU) \
V(I8x16Sub, kMipsI8x16Sub) \
V(I8x16SubSatS, kMipsI8x16SubSatS) \
V(I8x16SubSatU, kMipsI8x16SubSatU) \
V(I8x16Mul, kMipsI8x16Mul) \
V(I8x16MaxS, kMipsI8x16MaxS) \
V(I8x16MinS, kMipsI8x16MinS) \
V(I8x16MaxU, kMipsI8x16MaxU) \
V(I8x16MinU, kMipsI8x16MinU) \
V(I8x16Eq, kMipsI8x16Eq) \
V(I8x16Ne, kMipsI8x16Ne) \
V(I8x16GtS, kMipsI8x16GtS) \
V(I8x16GeS, kMipsI8x16GeS) \
V(I8x16GtU, kMipsI8x16GtU) \
V(I8x16GeU, kMipsI8x16GeU) \
V(I8x16RoundingAverageU, kMipsI8x16RoundingAverageU) \
V(I8x16SConvertI16x8, kMipsI8x16SConvertI16x8) \
V(I8x16UConvertI16x8, kMipsI8x16UConvertI16x8) \
V(I8x16Abs, kMipsI8x16Abs) \
V(S128And, kMipsS128And) \
V(S128Or, kMipsS128Or) \
V(S128Xor, kMipsS128Xor) \
#define SIMD_BINOP_LIST(V) \
V(F64x2Add, kMipsF64x2Add) \
V(F64x2Sub, kMipsF64x2Sub) \
V(F64x2Mul, kMipsF64x2Mul) \
V(F64x2Div, kMipsF64x2Div) \
V(F64x2Min, kMipsF64x2Min) \
V(F64x2Max, kMipsF64x2Max) \
V(F64x2Eq, kMipsF64x2Eq) \
V(F64x2Ne, kMipsF64x2Ne) \
V(F64x2Lt, kMipsF64x2Lt) \
V(F64x2Le, kMipsF64x2Le) \
V(I64x2Eq, kMipsI64x2Eq) \
V(I64x2Ne, kMipsI64x2Ne) \
V(I64x2Add, kMipsI64x2Add) \
V(I64x2Sub, kMipsI64x2Sub) \
V(I64x2Mul, kMipsI64x2Mul) \
V(I64x2GtS, kMipsI64x2GtS) \
V(I64x2GeS, kMipsI64x2GeS) \
V(I64x2ExtMulLowI32x4S, kMipsI64x2ExtMulLowI32x4S) \
V(I64x2ExtMulHighI32x4S, kMipsI64x2ExtMulHighI32x4S) \
V(I64x2ExtMulLowI32x4U, kMipsI64x2ExtMulLowI32x4U) \
V(I64x2ExtMulHighI32x4U, kMipsI64x2ExtMulHighI32x4U) \
V(F32x4Add, kMipsF32x4Add) \
V(F32x4AddHoriz, kMipsF32x4AddHoriz) \
V(F32x4Sub, kMipsF32x4Sub) \
V(F32x4Mul, kMipsF32x4Mul) \
V(F32x4Div, kMipsF32x4Div) \
V(F32x4Max, kMipsF32x4Max) \
V(F32x4Min, kMipsF32x4Min) \
V(F32x4Eq, kMipsF32x4Eq) \
V(F32x4Ne, kMipsF32x4Ne) \
V(F32x4Lt, kMipsF32x4Lt) \
V(F32x4Le, kMipsF32x4Le) \
V(I32x4Add, kMipsI32x4Add) \
V(I32x4AddHoriz, kMipsI32x4AddHoriz) \
V(I32x4Sub, kMipsI32x4Sub) \
V(I32x4Mul, kMipsI32x4Mul) \
V(I32x4MaxS, kMipsI32x4MaxS) \
V(I32x4MinS, kMipsI32x4MinS) \
V(I32x4MaxU, kMipsI32x4MaxU) \
V(I32x4MinU, kMipsI32x4MinU) \
V(I32x4Eq, kMipsI32x4Eq) \
V(I32x4Ne, kMipsI32x4Ne) \
V(I32x4GtS, kMipsI32x4GtS) \
V(I32x4GeS, kMipsI32x4GeS) \
V(I32x4GtU, kMipsI32x4GtU) \
V(I32x4GeU, kMipsI32x4GeU) \
V(I32x4Abs, kMipsI32x4Abs) \
V(I32x4DotI16x8S, kMipsI32x4DotI16x8S) \
V(I32x4ExtMulLowI16x8S, kMipsI32x4ExtMulLowI16x8S) \
V(I32x4ExtMulHighI16x8S, kMipsI32x4ExtMulHighI16x8S) \
V(I32x4ExtMulLowI16x8U, kMipsI32x4ExtMulLowI16x8U) \
V(I32x4ExtMulHighI16x8U, kMipsI32x4ExtMulHighI16x8U) \
V(I16x8Add, kMipsI16x8Add) \
V(I16x8AddSatS, kMipsI16x8AddSatS) \
V(I16x8AddSatU, kMipsI16x8AddSatU) \
V(I16x8AddHoriz, kMipsI16x8AddHoriz) \
V(I16x8Sub, kMipsI16x8Sub) \
V(I16x8SubSatS, kMipsI16x8SubSatS) \
V(I16x8SubSatU, kMipsI16x8SubSatU) \
V(I16x8Mul, kMipsI16x8Mul) \
V(I16x8MaxS, kMipsI16x8MaxS) \
V(I16x8MinS, kMipsI16x8MinS) \
V(I16x8MaxU, kMipsI16x8MaxU) \
V(I16x8MinU, kMipsI16x8MinU) \
V(I16x8Eq, kMipsI16x8Eq) \
V(I16x8Ne, kMipsI16x8Ne) \
V(I16x8GtS, kMipsI16x8GtS) \
V(I16x8GeS, kMipsI16x8GeS) \
V(I16x8GtU, kMipsI16x8GtU) \
V(I16x8GeU, kMipsI16x8GeU) \
V(I16x8SConvertI32x4, kMipsI16x8SConvertI32x4) \
V(I16x8UConvertI32x4, kMipsI16x8UConvertI32x4) \
V(I16x8Q15MulRSatS, kMipsI16x8Q15MulRSatS) \
V(I16x8ExtMulLowI8x16S, kMipsI16x8ExtMulLowI8x16S) \
V(I16x8ExtMulHighI8x16S, kMipsI16x8ExtMulHighI8x16S) \
V(I16x8ExtMulLowI8x16U, kMipsI16x8ExtMulLowI8x16U) \
V(I16x8ExtMulHighI8x16U, kMipsI16x8ExtMulHighI8x16U) \
V(I16x8RoundingAverageU, kMipsI16x8RoundingAverageU) \
V(I16x8Abs, kMipsI16x8Abs) \
V(I8x16Add, kMipsI8x16Add) \
V(I8x16AddSatS, kMipsI8x16AddSatS) \
V(I8x16AddSatU, kMipsI8x16AddSatU) \
V(I8x16Sub, kMipsI8x16Sub) \
V(I8x16SubSatS, kMipsI8x16SubSatS) \
V(I8x16SubSatU, kMipsI8x16SubSatU) \
V(I8x16Mul, kMipsI8x16Mul) \
V(I8x16MaxS, kMipsI8x16MaxS) \
V(I8x16MinS, kMipsI8x16MinS) \
V(I8x16MaxU, kMipsI8x16MaxU) \
V(I8x16MinU, kMipsI8x16MinU) \
V(I8x16Eq, kMipsI8x16Eq) \
V(I8x16Ne, kMipsI8x16Ne) \
V(I8x16GtS, kMipsI8x16GtS) \
V(I8x16GeS, kMipsI8x16GeS) \
V(I8x16GtU, kMipsI8x16GtU) \
V(I8x16GeU, kMipsI8x16GeU) \
V(I8x16RoundingAverageU, kMipsI8x16RoundingAverageU) \
V(I8x16SConvertI16x8, kMipsI8x16SConvertI16x8) \
V(I8x16UConvertI16x8, kMipsI8x16UConvertI16x8) \
V(I8x16Abs, kMipsI8x16Abs) \
V(S128And, kMipsS128And) \
V(S128Or, kMipsS128Or) \
V(S128Xor, kMipsS128Xor) \
V(S128AndNot, kMipsS128AndNot)
void InstructionSelector::VisitS128Const(Node* node) { UNIMPLEMENTED(); }

View File

@ -1817,28 +1817,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ ByteSwapSigned(i.OutputRegister(0), i.InputRegister(0), 4);
break;
}
case kMips64S128Load8Splat: {
case kMips64S128LoadSplat: {
CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
__ Lb(kScratchReg, i.MemoryOperand());
__ fill_b(i.OutputSimd128Register(), kScratchReg);
break;
}
case kMips64S128Load16Splat: {
CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
__ Lh(kScratchReg, i.MemoryOperand());
__ fill_h(i.OutputSimd128Register(), kScratchReg);
break;
}
case kMips64S128Load32Splat: {
CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
__ Lw(kScratchReg, i.MemoryOperand());
__ fill_w(i.OutputSimd128Register(), kScratchReg);
break;
}
case kMips64S128Load64Splat: {
CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
__ Ld(kScratchReg, i.MemoryOperand());
__ fill_d(i.OutputSimd128Register(), kScratchReg);
auto sz = static_cast<MSASize>(MiscField::decode(instr->opcode()));
__ LoadSplat(sz, i.OutputSimd128Register(), i.MemoryOperand());
break;
}
case kMips64S128Load8x8S: {

View File

@ -366,10 +366,7 @@ namespace compiler {
V(Mips64S8x8Reverse) \
V(Mips64S8x4Reverse) \
V(Mips64S8x2Reverse) \
V(Mips64S128Load8Splat) \
V(Mips64S128Load16Splat) \
V(Mips64S128Load32Splat) \
V(Mips64S128Load64Splat) \
V(Mips64S128LoadSplat) \
V(Mips64S128Load8x8S) \
V(Mips64S128Load8x8U) \
V(Mips64S128Load16x4S) \

View File

@ -367,10 +367,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kMips64Ulw:
case kMips64Ulwu:
case kMips64Ulwc1:
case kMips64S128Load8Splat:
case kMips64S128Load16Splat:
case kMips64S128Load32Splat:
case kMips64S128Load64Splat:
case kMips64S128LoadSplat:
case kMips64S128Load8x8S:
case kMips64S128Load8x8U:
case kMips64S128Load16x4S:

View File

@ -434,16 +434,20 @@ void InstructionSelector::VisitLoadTransform(Node* node) {
InstructionCode opcode = kArchNop;
switch (params.transformation) {
case LoadTransformation::kS128Load8Splat:
opcode = kMips64S128Load8Splat;
opcode = kMips64S128LoadSplat;
opcode |= MiscField::encode(MSASize::MSA_B);
break;
case LoadTransformation::kS128Load16Splat:
opcode = kMips64S128Load16Splat;
opcode = kMips64S128LoadSplat;
opcode |= MiscField::encode(MSASize::MSA_H);
break;
case LoadTransformation::kS128Load32Splat:
opcode = kMips64S128Load32Splat;
opcode = kMips64S128LoadSplat;
opcode |= MiscField::encode(MSASize::MSA_W);
break;
case LoadTransformation::kS128Load64Splat:
opcode = kMips64S128Load64Splat;
opcode = kMips64S128LoadSplat;
opcode |= MiscField::encode(MSASize::MSA_D);
break;
case LoadTransformation::kS128Load8x8S:
opcode = kMips64S128Load8x8S;