S390 [liftoff]: Implement simd fp32 to i32 conversion

I32x4UConvertF32x4 is also slightly optimized by removing 2
instructions.

Change-Id: Ie61fbd34628beb2410ae3ef916de7c3119c7ad9c
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3429463
Reviewed-by: Junliang Yan <junyan@redhat.com>
Commit-Queue: Milad Farazmand <mfarazma@redhat.com>
Cr-Commit-Position: refs/heads/main@{#78907}
This commit is contained in:
Milad Fa 2022-02-01 15:40:45 -05:00 committed by V8 LUCI CQ
parent b345204579
commit 7f47473a39
5 changed files with 50 additions and 43 deletions

View File

@ -5601,6 +5601,42 @@ void TurboAssembler::V128AnyTrue(Register dst, Simd128Register src,
locgr(Condition(8), dst, scratch);
}
#define CONVERT_FLOAT_TO_INT32(convert, dst, src, scratch1, scratch2) \
for (int index = 0; index < 4; index++) { \
vlgv(scratch2, src, MemOperand(r0, index), Condition(2)); \
MovIntToFloat(scratch1, scratch2); \
convert(scratch2, scratch1, kRoundToZero); \
vlvg(dst, scratch2, MemOperand(r0, index), Condition(2)); \
}
void TurboAssembler::I32x4SConvertF32x4(Simd128Register dst,
Simd128Register src,
Simd128Register scratch1,
Register scratch2) {
// NaN to 0.
vfce(scratch1, src, src, Condition(0), Condition(0), Condition(2));
vn(dst, src, scratch1, Condition(0), Condition(0), Condition(0));
if (CpuFeatures::IsSupported(VECTOR_ENHANCE_FACILITY_2)) {
vcgd(dst, dst, Condition(5), Condition(0), Condition(2));
} else {
CONVERT_FLOAT_TO_INT32(ConvertFloat32ToInt32, dst, dst, scratch1, scratch2)
}
}
void TurboAssembler::I32x4UConvertF32x4(Simd128Register dst,
Simd128Register src,
Simd128Register scratch1,
Register scratch2) {
// vclgd or ConvertFloat32ToUnsignedInt32 will convert NaN to 0, negative to 0
// automatically.
if (CpuFeatures::IsSupported(VECTOR_ENHANCE_FACILITY_2)) {
vclgd(dst, src, Condition(5), Condition(0), Condition(2));
} else {
CONVERT_FLOAT_TO_INT32(ConvertFloat32ToUnsignedInt32, dst, src, scratch1,
scratch2)
}
}
#undef CONVERT_FLOAT_TO_INT32
// Vector LE Load and Transform instructions.
#ifdef V8_TARGET_BIG_ENDIAN
#define IS_BIG_ENDIAN true

View File

@ -1114,6 +1114,10 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
void I8x16BitMask(Register dst, Simd128Register src, Register scratch1,
Register scratch2, Simd128Register scratch3);
void V128AnyTrue(Register dst, Simd128Register src, Register scratch);
void I32x4SConvertF32x4(Simd128Register dst, Simd128Register src,
Simd128Register scratch1, Register scratch2);
void I32x4UConvertF32x4(Simd128Register dst, Simd128Register src,
Simd128Register scratch1, Register scratch2);
#define SIMD_UNOP_LIST(V) \
V(F64x2Abs) \

View File

@ -2880,49 +2880,16 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
// vector conversions
#define CONVERT_FLOAT_TO_INT32(convert) \
for (int index = 0; index < 4; index++) { \
__ vlgv(kScratchReg, kScratchDoubleReg, MemOperand(r0, index), \
Condition(2)); \
__ MovIntToFloat(tempFPReg1, kScratchReg); \
__ convert(kScratchReg, tempFPReg1, kRoundToZero); \
__ vlvg(dst, kScratchReg, MemOperand(r0, index), Condition(2)); \
}
case kS390_I32x4SConvertF32x4: {
Simd128Register src = i.InputSimd128Register(0);
Simd128Register dst = i.OutputSimd128Register();
Simd128Register tempFPReg1 = i.ToDoubleRegister(instr->TempAt(0));
DCHECK_NE(dst, tempFPReg1);
// NaN to 0
__ vlr(kScratchDoubleReg, src, Condition(0), Condition(0), Condition(0));
__ vfce(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg,
Condition(0), Condition(0), Condition(2));
__ vn(kScratchDoubleReg, src, kScratchDoubleReg, Condition(0),
Condition(0), Condition(0));
if (CpuFeatures::IsSupported(VECTOR_ENHANCE_FACILITY_2)) {
__ vcgd(i.OutputSimd128Register(), kScratchDoubleReg, Condition(5),
Condition(0), Condition(2));
} else {
CONVERT_FLOAT_TO_INT32(ConvertFloat32ToInt32)
}
__ I32x4SConvertF32x4(i.OutputSimd128Register(),
i.InputSimd128Register(0), kScratchDoubleReg,
kScratchReg);
break;
}
case kS390_I32x4UConvertF32x4: {
Simd128Register src = i.InputSimd128Register(0);
Simd128Register dst = i.OutputSimd128Register();
Simd128Register tempFPReg1 = i.ToDoubleRegister(instr->TempAt(0));
DCHECK_NE(dst, tempFPReg1);
// NaN to 0, negative to 0
__ vx(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg,
Condition(0), Condition(0), Condition(0));
__ vfmax(kScratchDoubleReg, src, kScratchDoubleReg, Condition(1),
Condition(0), Condition(2));
if (CpuFeatures::IsSupported(VECTOR_ENHANCE_FACILITY_2)) {
__ vclgd(i.OutputSimd128Register(), kScratchDoubleReg, Condition(5),
Condition(0), Condition(2));
} else {
CONVERT_FLOAT_TO_INT32(ConvertFloat32ToUnsignedInt32)
}
__ I32x4UConvertF32x4(i.OutputSimd128Register(),
i.InputSimd128Register(0), kScratchDoubleReg,
kScratchReg);
break;
}
#undef CONVERT_FLOAT_TO_INT32

View File

@ -2565,6 +2565,8 @@ void InstructionSelector::VisitWord64AtomicStore(Node* node) {
V(I64x2AllTrue) \
V(I32x4Neg) \
V(I32x4Abs) \
V(I32x4SConvertF32x4) \
V(I32x4UConvertF32x4) \
V(I32x4SConvertI16x8Low) \
V(I32x4SConvertI16x8High) \
V(I32x4UConvertI16x8Low) \
@ -2593,8 +2595,6 @@ void InstructionSelector::VisitWord64AtomicStore(Node* node) {
V(V128AnyTrue)
#define SIMD_UNOP_UNIQUE_REGISTER_LIST(V) \
V(I32x4SConvertF32x4) \
V(I32x4UConvertF32x4) \
V(I32x4ExtAddPairwiseI16x8S) \
V(I32x4ExtAddPairwiseI16x8U) \
V(I16x8ExtAddPairwiseI8x16S) \

View File

@ -2682,12 +2682,12 @@ void LiftoffAssembler::emit_s128_select(LiftoffRegister dst,
void LiftoffAssembler::emit_i32x4_sconvert_f32x4(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "i32x4_sconvert_f32x4");
I32x4SConvertF32x4(dst.fp(), src.fp(), kScratchDoubleReg, r0);
}
void LiftoffAssembler::emit_i32x4_uconvert_f32x4(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "i32x4_uconvert_f32x4");
I32x4UConvertF32x4(dst.fp(), src.fp(), kScratchDoubleReg, r0);
}
void LiftoffAssembler::emit_f32x4_sconvert_i32x4(LiftoffRegister dst,