S390 [liftoff]: Implement simd fp32 to i32 conversion
I32x4UConvertF32x4 is also slightly optimized by removing 2 instructions. Change-Id: Ie61fbd34628beb2410ae3ef916de7c3119c7ad9c Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3429463 Reviewed-by: Junliang Yan <junyan@redhat.com> Commit-Queue: Milad Farazmand <mfarazma@redhat.com> Cr-Commit-Position: refs/heads/main@{#78907}
This commit is contained in:
parent
b345204579
commit
7f47473a39
@ -5601,6 +5601,42 @@ void TurboAssembler::V128AnyTrue(Register dst, Simd128Register src,
|
||||
locgr(Condition(8), dst, scratch);
|
||||
}
|
||||
|
||||
#define CONVERT_FLOAT_TO_INT32(convert, dst, src, scratch1, scratch2) \
|
||||
for (int index = 0; index < 4; index++) { \
|
||||
vlgv(scratch2, src, MemOperand(r0, index), Condition(2)); \
|
||||
MovIntToFloat(scratch1, scratch2); \
|
||||
convert(scratch2, scratch1, kRoundToZero); \
|
||||
vlvg(dst, scratch2, MemOperand(r0, index), Condition(2)); \
|
||||
}
|
||||
void TurboAssembler::I32x4SConvertF32x4(Simd128Register dst,
|
||||
Simd128Register src,
|
||||
Simd128Register scratch1,
|
||||
Register scratch2) {
|
||||
// NaN to 0.
|
||||
vfce(scratch1, src, src, Condition(0), Condition(0), Condition(2));
|
||||
vn(dst, src, scratch1, Condition(0), Condition(0), Condition(0));
|
||||
if (CpuFeatures::IsSupported(VECTOR_ENHANCE_FACILITY_2)) {
|
||||
vcgd(dst, dst, Condition(5), Condition(0), Condition(2));
|
||||
} else {
|
||||
CONVERT_FLOAT_TO_INT32(ConvertFloat32ToInt32, dst, dst, scratch1, scratch2)
|
||||
}
|
||||
}
|
||||
|
||||
void TurboAssembler::I32x4UConvertF32x4(Simd128Register dst,
|
||||
Simd128Register src,
|
||||
Simd128Register scratch1,
|
||||
Register scratch2) {
|
||||
// vclgd or ConvertFloat32ToUnsignedInt32 will convert NaN to 0, negative to 0
|
||||
// automatically.
|
||||
if (CpuFeatures::IsSupported(VECTOR_ENHANCE_FACILITY_2)) {
|
||||
vclgd(dst, src, Condition(5), Condition(0), Condition(2));
|
||||
} else {
|
||||
CONVERT_FLOAT_TO_INT32(ConvertFloat32ToUnsignedInt32, dst, src, scratch1,
|
||||
scratch2)
|
||||
}
|
||||
}
|
||||
#undef CONVERT_FLOAT_TO_INT32
|
||||
|
||||
// Vector LE Load and Transform instructions.
|
||||
#ifdef V8_TARGET_BIG_ENDIAN
|
||||
#define IS_BIG_ENDIAN true
|
||||
|
@ -1114,6 +1114,10 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
|
||||
void I8x16BitMask(Register dst, Simd128Register src, Register scratch1,
|
||||
Register scratch2, Simd128Register scratch3);
|
||||
void V128AnyTrue(Register dst, Simd128Register src, Register scratch);
|
||||
void I32x4SConvertF32x4(Simd128Register dst, Simd128Register src,
|
||||
Simd128Register scratch1, Register scratch2);
|
||||
void I32x4UConvertF32x4(Simd128Register dst, Simd128Register src,
|
||||
Simd128Register scratch1, Register scratch2);
|
||||
|
||||
#define SIMD_UNOP_LIST(V) \
|
||||
V(F64x2Abs) \
|
||||
|
@ -2880,49 +2880,16 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
break;
|
||||
}
|
||||
// vector conversions
|
||||
#define CONVERT_FLOAT_TO_INT32(convert) \
|
||||
for (int index = 0; index < 4; index++) { \
|
||||
__ vlgv(kScratchReg, kScratchDoubleReg, MemOperand(r0, index), \
|
||||
Condition(2)); \
|
||||
__ MovIntToFloat(tempFPReg1, kScratchReg); \
|
||||
__ convert(kScratchReg, tempFPReg1, kRoundToZero); \
|
||||
__ vlvg(dst, kScratchReg, MemOperand(r0, index), Condition(2)); \
|
||||
}
|
||||
case kS390_I32x4SConvertF32x4: {
|
||||
Simd128Register src = i.InputSimd128Register(0);
|
||||
Simd128Register dst = i.OutputSimd128Register();
|
||||
Simd128Register tempFPReg1 = i.ToDoubleRegister(instr->TempAt(0));
|
||||
DCHECK_NE(dst, tempFPReg1);
|
||||
// NaN to 0
|
||||
__ vlr(kScratchDoubleReg, src, Condition(0), Condition(0), Condition(0));
|
||||
__ vfce(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg,
|
||||
Condition(0), Condition(0), Condition(2));
|
||||
__ vn(kScratchDoubleReg, src, kScratchDoubleReg, Condition(0),
|
||||
Condition(0), Condition(0));
|
||||
if (CpuFeatures::IsSupported(VECTOR_ENHANCE_FACILITY_2)) {
|
||||
__ vcgd(i.OutputSimd128Register(), kScratchDoubleReg, Condition(5),
|
||||
Condition(0), Condition(2));
|
||||
} else {
|
||||
CONVERT_FLOAT_TO_INT32(ConvertFloat32ToInt32)
|
||||
}
|
||||
__ I32x4SConvertF32x4(i.OutputSimd128Register(),
|
||||
i.InputSimd128Register(0), kScratchDoubleReg,
|
||||
kScratchReg);
|
||||
break;
|
||||
}
|
||||
case kS390_I32x4UConvertF32x4: {
|
||||
Simd128Register src = i.InputSimd128Register(0);
|
||||
Simd128Register dst = i.OutputSimd128Register();
|
||||
Simd128Register tempFPReg1 = i.ToDoubleRegister(instr->TempAt(0));
|
||||
DCHECK_NE(dst, tempFPReg1);
|
||||
// NaN to 0, negative to 0
|
||||
__ vx(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg,
|
||||
Condition(0), Condition(0), Condition(0));
|
||||
__ vfmax(kScratchDoubleReg, src, kScratchDoubleReg, Condition(1),
|
||||
Condition(0), Condition(2));
|
||||
if (CpuFeatures::IsSupported(VECTOR_ENHANCE_FACILITY_2)) {
|
||||
__ vclgd(i.OutputSimd128Register(), kScratchDoubleReg, Condition(5),
|
||||
Condition(0), Condition(2));
|
||||
} else {
|
||||
CONVERT_FLOAT_TO_INT32(ConvertFloat32ToUnsignedInt32)
|
||||
}
|
||||
__ I32x4UConvertF32x4(i.OutputSimd128Register(),
|
||||
i.InputSimd128Register(0), kScratchDoubleReg,
|
||||
kScratchReg);
|
||||
break;
|
||||
}
|
||||
#undef CONVERT_FLOAT_TO_INT32
|
||||
|
@ -2565,6 +2565,8 @@ void InstructionSelector::VisitWord64AtomicStore(Node* node) {
|
||||
V(I64x2AllTrue) \
|
||||
V(I32x4Neg) \
|
||||
V(I32x4Abs) \
|
||||
V(I32x4SConvertF32x4) \
|
||||
V(I32x4UConvertF32x4) \
|
||||
V(I32x4SConvertI16x8Low) \
|
||||
V(I32x4SConvertI16x8High) \
|
||||
V(I32x4UConvertI16x8Low) \
|
||||
@ -2593,8 +2595,6 @@ void InstructionSelector::VisitWord64AtomicStore(Node* node) {
|
||||
V(V128AnyTrue)
|
||||
|
||||
#define SIMD_UNOP_UNIQUE_REGISTER_LIST(V) \
|
||||
V(I32x4SConvertF32x4) \
|
||||
V(I32x4UConvertF32x4) \
|
||||
V(I32x4ExtAddPairwiseI16x8S) \
|
||||
V(I32x4ExtAddPairwiseI16x8U) \
|
||||
V(I16x8ExtAddPairwiseI8x16S) \
|
||||
|
@ -2682,12 +2682,12 @@ void LiftoffAssembler::emit_s128_select(LiftoffRegister dst,
|
||||
|
||||
void LiftoffAssembler::emit_i32x4_sconvert_f32x4(LiftoffRegister dst,
|
||||
LiftoffRegister src) {
|
||||
bailout(kSimd, "i32x4_sconvert_f32x4");
|
||||
I32x4SConvertF32x4(dst.fp(), src.fp(), kScratchDoubleReg, r0);
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_i32x4_uconvert_f32x4(LiftoffRegister dst,
|
||||
LiftoffRegister src) {
|
||||
bailout(kSimd, "i32x4_uconvert_f32x4");
|
||||
I32x4UConvertF32x4(dst.fp(), src.fp(), kScratchDoubleReg, r0);
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_f32x4_sconvert_i32x4(LiftoffRegister dst,
|
||||
|
Loading…
Reference in New Issue
Block a user