PPC/s390: [wasm-simd] Implement double precision conversion

Port 3b6eb33543

Original Commit Message:

    Prototype these 6 instructions on arm:

    - f64x2.convert_low_i32x4_s
    - f64x2.convert_low_i32x4_u
    - i32x4.trunc_sat_f64x2_s_zero
    - i32x4.trunc_sat_f64x2_u_zero
    - f32x4.demote_f64x2_zero
    - f64x2.promote_low_f32x4

    For all these instructions we rely on having Q registers that map to S
    registers, which means we can only use q0 to q7. We fix the src/dst
    to q0 arbitrarily.

R=zhin@chromium.org, joransiu@ca.ibm.com, junyan@redhat.com, midawson@redhat.com
BUG=
LOG=N

Bug: v8:11265
Change-Id: Ibef0b05fe1066cd79ddcda12787441b09cc8ede2
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2684360
Reviewed-by: Junliang Yan <junyan@redhat.com>
Commit-Queue: Milad Fa <mfarazma@redhat.com>
Cr-Commit-Position: refs/heads/master@{#72600}
This commit is contained in:
Milad Fa 2021-02-09 10:44:56 -05:00 committed by Commit Bot
parent fd43f1d586
commit 9886c9f64b
6 changed files with 286 additions and 87 deletions

View File

@ -2502,6 +2502,24 @@ void InstructionSelector::VisitI16x8ExtMulHighI8x16U(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitI8x16Popcnt(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2ConvertLowI32x4S(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitF64x2ConvertLowI32x4U(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitF64x2PromoteLowF32x4(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitF32x4DemoteF64x2Zero(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitI32x4TruncSatF64x2SZero(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitI32x4TruncSatF64x2UZero(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::EmitPrepareResults(
ZoneVector<PushParameter>* results, const CallDescriptor* call_descriptor,

View File

@ -4268,6 +4268,90 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
Condition(0), Condition(0), Condition(0));
break;
}
case kS390_F64x2ConvertLowI32x4S: {
__ vupl(kScratchDoubleReg, i.InputSimd128Register(0), Condition(0),
Condition(0), Condition(2));
__ vcdg(i.OutputSimd128Register(), kScratchDoubleReg, Condition(5),
Condition(0), Condition(3));
break;
}
case kS390_F64x2ConvertLowI32x4U: {
__ vupll(kScratchDoubleReg, i.InputSimd128Register(0), Condition(0),
Condition(0), Condition(2));
__ vcdlg(i.OutputSimd128Register(), kScratchDoubleReg, Condition(5),
Condition(0), Condition(3));
break;
}
case kS390_F64x2PromoteLowF32x4: {
Register holder = r1;
for (int index = 0; index < 2; ++index) {
#ifdef V8_TARGET_BIG_ENDIAN
__ vlgv(r0, i.InputSimd128Register(0), MemOperand(r0, index + 2),
Condition(2));
#else
__ vlgv(r0, i.InputSimd128Register(0), MemOperand(r0, index),
Condition(2));
#endif
__ MovIntToFloat(kScratchDoubleReg, r0);
__ ldebr(kScratchDoubleReg, kScratchDoubleReg);
__ MovDoubleToInt64(holder, kScratchDoubleReg);
holder = ip;
}
__ vlvgp(i.OutputSimd128Register(), r1, ip);
break;
}
case kS390_F32x4DemoteF64x2Zero: {
Simd128Register dst = i.OutputSimd128Register();
Register holder = r1;
for (int index = 0; index < 2; ++index) {
__ vlgv(r0, i.InputSimd128Register(0), MemOperand(r0, index),
Condition(3));
__ MovInt64ToDouble(kScratchDoubleReg, r0);
__ ledbr(kScratchDoubleReg, kScratchDoubleReg);
__ MovFloatToInt(holder, kScratchDoubleReg);
holder = ip;
}
__ vx(dst, dst, dst, Condition(0), Condition(0), Condition(2));
#ifdef V8_TARGET_BIG_ENDIAN
__ vlvg(dst, r1, MemOperand(r0, 2), Condition(2));
__ vlvg(dst, ip, MemOperand(r0, 3), Condition(2));
#else
__ vlvg(dst, r1, MemOperand(r0, 0), Condition(2));
__ vlvg(dst, ip, MemOperand(r0, 1), Condition(2));
#endif
break;
}
case kS390_I32x4TruncSatF64x2SZero: {
Simd128Register src = i.InputSimd128Register(0);
Simd128Register dst = i.OutputSimd128Register();
// NaN to 0
__ vlr(kScratchDoubleReg, src, Condition(0), Condition(0), Condition(0));
__ vfce(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg,
Condition(0), Condition(0), Condition(3));
__ vn(kScratchDoubleReg, src, kScratchDoubleReg, Condition(0),
Condition(0), Condition(0));
__ vcgd(kScratchDoubleReg, kScratchDoubleReg, Condition(5), Condition(0),
Condition(3));
__ vx(dst, dst, dst, Condition(0), Condition(0), Condition(2));
#ifdef V8_TARGET_BIG_ENDIAN
__ vpks(dst, dst, kScratchDoubleReg, Condition(0), Condition(3));
#else
__ vpks(dst, kScratchDoubleReg, dst, Condition(0), Condition(3));
#endif
break;
}
case kS390_I32x4TruncSatF64x2UZero: {
Simd128Register dst = i.OutputSimd128Register();
__ vclgd(kScratchDoubleReg, i.InputSimd128Register(0), Condition(5),
Condition(0), Condition(3));
__ vx(dst, dst, dst, Condition(0), Condition(0), Condition(2));
#ifdef V8_TARGET_BIG_ENDIAN
__ vpkls(dst, dst, kScratchDoubleReg, Condition(0), Condition(3));
#else
__ vpkls(dst, kScratchDoubleReg, dst, Condition(0), Condition(3));
#endif
break;
}
case kS390_StoreCompressTagged: {
CHECK(!instr->HasOutput());
size_t index = 0;

View File

@ -214,6 +214,9 @@ namespace compiler {
V(S390_F64x2Floor) \
V(S390_F64x2Trunc) \
V(S390_F64x2NearestInt) \
V(S390_F64x2ConvertLowI32x4S) \
V(S390_F64x2ConvertLowI32x4U) \
V(S390_F64x2PromoteLowF32x4) \
V(S390_F32x4Splat) \
V(S390_F32x4ExtractLane) \
V(S390_F32x4ReplaceLane) \
@ -243,6 +246,7 @@ namespace compiler {
V(S390_F32x4Floor) \
V(S390_F32x4Trunc) \
V(S390_F32x4NearestInt) \
V(S390_F32x4DemoteF64x2Zero) \
V(S390_I64x2Neg) \
V(S390_I64x2Add) \
V(S390_I64x2Sub) \
@ -302,6 +306,8 @@ namespace compiler {
V(S390_I32x4ExtAddPairwiseI16x8S) \
V(S390_I32x4ExtAddPairwiseI16x8U) \
V(S390_I32x4SignSelect) \
V(S390_I32x4TruncSatF64x2SZero) \
V(S390_I32x4TruncSatF64x2UZero) \
V(S390_I16x8Splat) \
V(S390_I16x8ExtractLaneU) \
V(S390_I16x8ExtractLaneS) \

View File

@ -161,6 +161,9 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kS390_F64x2Floor:
case kS390_F64x2Trunc:
case kS390_F64x2NearestInt:
case kS390_F64x2ConvertLowI32x4S:
case kS390_F64x2ConvertLowI32x4U:
case kS390_F64x2PromoteLowF32x4:
case kS390_F32x4Splat:
case kS390_F32x4ExtractLane:
case kS390_F32x4ReplaceLane:
@ -190,6 +193,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kS390_F32x4Floor:
case kS390_F32x4Trunc:
case kS390_F32x4NearestInt:
case kS390_F32x4DemoteF64x2Zero:
case kS390_I64x2Neg:
case kS390_I64x2Add:
case kS390_I64x2Sub:
@ -249,6 +253,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kS390_I32x4ExtAddPairwiseI16x8S:
case kS390_I32x4ExtAddPairwiseI16x8U:
case kS390_I32x4SignSelect:
case kS390_I32x4TruncSatF64x2SZero:
case kS390_I32x4TruncSatF64x2UZero:
case kS390_I16x8Splat:
case kS390_I16x8ExtractLaneU:
case kS390_I16x8ExtractLaneS:

View File

@ -2497,43 +2497,49 @@ void InstructionSelector::VisitWord64AtomicStore(Node* node) {
V(S128Xor) \
V(S128AndNot)
#define SIMD_UNOP_LIST(V) \
V(F64x2Abs) \
V(F64x2Neg) \
V(F64x2Sqrt) \
V(F64x2Ceil) \
V(F64x2Floor) \
V(F64x2Trunc) \
V(F64x2NearestInt) \
V(F32x4Abs) \
V(F32x4Neg) \
V(F32x4RecipApprox) \
V(F32x4RecipSqrtApprox) \
V(F32x4Sqrt) \
V(F32x4Ceil) \
V(F32x4Floor) \
V(F32x4Trunc) \
V(F32x4NearestInt) \
V(I64x2Neg) \
V(I64x2SConvertI32x4Low) \
V(I64x2SConvertI32x4High) \
V(I64x2UConvertI32x4Low) \
V(I64x2UConvertI32x4High) \
V(I32x4Neg) \
V(I32x4Abs) \
V(I32x4SConvertI16x8Low) \
V(I32x4SConvertI16x8High) \
V(I32x4UConvertI16x8Low) \
V(I32x4UConvertI16x8High) \
V(I16x8Neg) \
V(I16x8Abs) \
V(I16x8SConvertI8x16Low) \
V(I16x8SConvertI8x16High) \
V(I16x8UConvertI8x16Low) \
V(I16x8UConvertI8x16High) \
V(I8x16Neg) \
V(I8x16Abs) \
V(I8x16Popcnt) \
#define SIMD_UNOP_LIST(V) \
V(F64x2Abs) \
V(F64x2Neg) \
V(F64x2Sqrt) \
V(F64x2Ceil) \
V(F64x2Floor) \
V(F64x2Trunc) \
V(F64x2NearestInt) \
V(F64x2ConvertLowI32x4S) \
V(F64x2ConvertLowI32x4U) \
V(F64x2PromoteLowF32x4) \
V(F32x4Abs) \
V(F32x4Neg) \
V(F32x4RecipApprox) \
V(F32x4RecipSqrtApprox) \
V(F32x4Sqrt) \
V(F32x4Ceil) \
V(F32x4Floor) \
V(F32x4Trunc) \
V(F32x4NearestInt) \
V(F32x4DemoteF64x2Zero) \
V(I64x2Neg) \
V(I64x2SConvertI32x4Low) \
V(I64x2SConvertI32x4High) \
V(I64x2UConvertI32x4Low) \
V(I64x2UConvertI32x4High) \
V(I32x4Neg) \
V(I32x4Abs) \
V(I32x4SConvertI16x8Low) \
V(I32x4SConvertI16x8High) \
V(I32x4UConvertI16x8Low) \
V(I32x4UConvertI16x8High) \
V(I32x4TruncSatF64x2SZero) \
V(I32x4TruncSatF64x2UZero) \
V(I16x8Neg) \
V(I16x8Abs) \
V(I16x8SConvertI8x16Low) \
V(I16x8SConvertI8x16High) \
V(I16x8UConvertI8x16Low) \
V(I16x8UConvertI8x16High) \
V(I8x16Neg) \
V(I8x16Abs) \
V(I8x16Popcnt) \
V(S128Not)
#define SIMD_SHIFT_LIST(V) \

View File

@ -779,6 +779,10 @@ void Simulator::EvalTableInit() {
V(vupl, VUPL, 0xE7D6) /* type = VRR_A VECTOR UNPACK LOW */ \
V(vuph, VUPH, 0xE7D7) /* type = VRR_A VECTOR UNPACK HIGH */ \
V(vpopct, VPOPCT, 0xE750) /* type = VRR_A VECTOR POPULATION COUNT */ \
V(vcdg, VCDG, 0xE7C3) /* VECTOR FP CONVERT FROM FIXED */ \
V(vcdlg, VCDLG, 0xE7C1) /* VECTOR FP CONVERT FROM LOGICAL */ \
V(vcgd, VCGD, 0xE7C2) /* VECTOR FP CONVERT TO FIXED */ \
V(vclgd, VCLGD, 0xE7C0) /* VECTOR FP CONVERT TO LOGICAL */ \
V(vmnl, VMNL, 0xE7FC) /* type = VRR_C VECTOR MINIMUM LOGICAL */ \
V(vmxl, VMXL, 0xE7FD) /* type = VRR_C VECTOR MAXIMUM LOGICAL */ \
V(vmn, VMN, 0xE7FE) /* type = VRR_C VECTOR MINIMUM */ \
@ -1656,6 +1660,56 @@ T Simulator::get_high_register(int reg) const {
return static_cast<T>(registers_[reg] >> 32);
}
template <class T, class R>
static R ComputeSignedRoundingResult(T a, T n) {
constexpr T NINF = -std::numeric_limits<T>::infinity();
constexpr T PINF = std::numeric_limits<T>::infinity();
constexpr long double MN =
static_cast<long double>(std::numeric_limits<R>::min());
constexpr long double MP =
static_cast<long double>(std::numeric_limits<R>::max());
if (NINF <= a && a < MN && n < MN) {
return std::numeric_limits<R>::min();
} else if (NINF < a && a < MN && n == MN) {
return std::numeric_limits<R>::min();
} else if (MN <= a && a < 0.0) {
return static_cast<R>(n);
} else if (a == 0.0) {
return 0;
} else if (0.0 < a && a <= MP) {
return static_cast<R>(n);
} else if (MP < a && a <= PINF && n == MP) {
return std::numeric_limits<R>::max();
} else if (MP < a && a <= PINF && n > MP) {
return std::numeric_limits<R>::max();
} else if (std::isnan(a)) {
return std::numeric_limits<R>::min();
}
UNIMPLEMENTED();
return 0;
}
template <class T, class R>
static R ComputeLogicalRoundingResult(T a, T n) {
constexpr T NINF = -std::numeric_limits<T>::infinity();
constexpr T PINF = std::numeric_limits<T>::infinity();
constexpr long double MP =
static_cast<long double>(std::numeric_limits<R>::max());
if (NINF <= a && a <= 0.0) {
return 0;
} else if (0.0 < a && a <= MP) {
return static_cast<R>(n);
} else if (MP < a && a <= PINF) {
return std::numeric_limits<R>::max();
} else if (std::isnan(a)) {
return 0;
}
UNIMPLEMENTED();
return 0;
}
void Simulator::set_low_register(int reg, uint32_t value) {
uint64_t shifted_val = static_cast<uint64_t>(value);
uint64_t orig_val = static_cast<uint64_t>(registers_[reg]);
@ -3489,6 +3543,81 @@ EVALUATE(VPOPCT) {
}
#undef CASE
#define CASE(i, S, D) \
case i: { \
FOR_EACH_LANE(index, S) { \
set_simd_register_by_lane<D>( \
r1, index, static_cast<D>(get_simd_register_by_lane<S>(r2, index))); \
} \
break; \
}
EVALUATE(VCDG) {
DCHECK_OPCODE(VCDG);
DECODE_VRR_A_INSTRUCTION(r1, r2, m5, m4, m3);
USE(m4);
USE(m5);
switch (m3) {
CASE(2, int32_t, float);
CASE(3, int64_t, double);
default:
UNREACHABLE();
}
return length;
}
EVALUATE(VCDLG) {
DCHECK_OPCODE(VCDLG);
DECODE_VRR_A_INSTRUCTION(r1, r2, m5, m4, m3);
USE(m4);
USE(m5);
switch (m3) {
CASE(2, uint32_t, float);
CASE(3, uint64_t, double);
default:
UNREACHABLE();
}
return length;
}
#undef CASE
#define CASE(i, S, D, type) \
case i: { \
FOR_EACH_LANE(index, S) { \
S a = get_simd_register_by_lane<S>(r2, index); \
S n = ComputeRounding<S>(a, m5); \
set_simd_register_by_lane<D>( \
r1, index, \
static_cast<D>(Compute##type##RoundingResult<S, D>(a, n))); \
} \
break; \
}
EVALUATE(VCGD) {
DCHECK_OPCODE(VCDG);
DECODE_VRR_A_INSTRUCTION(r1, r2, m5, m4, m3);
USE(m4);
switch (m3) {
CASE(2, float, int32_t, Signed);
CASE(3, double, int64_t, Signed);
default:
UNREACHABLE();
}
return length;
}
EVALUATE(VCLGD) {
DCHECK_OPCODE(VCLGD);
DECODE_VRR_A_INSTRUCTION(r1, r2, m5, m4, m3);
USE(m4);
switch (m3) {
CASE(2, float, uint32_t, Logical);
CASE(3, double, uint64_t, Logical);
default:
UNREACHABLE();
}
return length;
}
#undef CASE
template <class S, class D>
void VectorUnpackLow(Simulator* sim, int dst, int src) {
constexpr size_t kItemCount = kSimd128Size / sizeof(D);
@ -7447,36 +7576,6 @@ static int ComputeSignedRoundingConditionCode(T a, T n) {
return 0;
}
template <class T, class R>
static R ComputeSignedRoundingResult(T a, T n) {
constexpr T NINF = -std::numeric_limits<T>::infinity();
constexpr T PINF = std::numeric_limits<T>::infinity();
constexpr long double MN =
static_cast<long double>(std::numeric_limits<R>::min());
constexpr long double MP =
static_cast<long double>(std::numeric_limits<R>::max());
if (NINF <= a && a < MN && n < MN) {
return std::numeric_limits<R>::min();
} else if (NINF < a && a < MN && n == MN) {
return std::numeric_limits<R>::min();
} else if (MN <= a && a < 0.0) {
return static_cast<R>(n);
} else if (a == 0.0) {
return 0;
} else if (0.0 < a && a <= MP) {
return static_cast<R>(n);
} else if (MP < a && a <= PINF && n == MP) {
return std::numeric_limits<R>::max();
} else if (MP < a && a <= PINF && n > MP) {
return std::numeric_limits<R>::max();
} else if (std::isnan(a)) {
return std::numeric_limits<R>::min();
}
UNIMPLEMENTED();
return 0;
}
EVALUATE(CFDBRA) {
DCHECK_OPCODE(CFDBRA);
DECODE_RRF_E_INSTRUCTION(r1, r2, m3, m4);
@ -7567,26 +7666,6 @@ static int ComputeLogicalRoundingConditionCode(T a, T n) {
return 0;
}
template <class T, class R>
static R ComputeLogicalRoundingResult(T a, T n) {
constexpr T NINF = -std::numeric_limits<T>::infinity();
constexpr T PINF = std::numeric_limits<T>::infinity();
constexpr long double MP =
static_cast<long double>(std::numeric_limits<R>::max());
if (NINF <= a && a <= 0.0) {
return 0;
} else if (0.0 < a && a <= MP) {
return static_cast<R>(n);
} else if (MP < a && a <= PINF) {
return std::numeric_limits<R>::max();
} else if (std::isnan(a)) {
return 0;
}
UNIMPLEMENTED();
return 0;
}
EVALUATE(CLFEBR) {
DCHECK_OPCODE(CLFEBR);
DECODE_RRF_E_INSTRUCTION(r1, r2, m3, m4);