[ia32][wasm] Add I32x4SConvertF32x4, I32x4UConvertF32x4
Change-Id: Ic2c4f02d5e451c4a3a6612ae91e5cc8231d62448 Reviewed-on: https://chromium-review.googlesource.com/1119773 Commit-Queue: Jing Bao <jing.bao@intel.com> Reviewed-by: Bill Budge <bbudge@chromium.org> Cr-Commit-Position: refs/heads/master@{#54307}
This commit is contained in:
parent
a0d8c293d0
commit
054dd559ac
@ -2010,6 +2010,42 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
i.InputOperand(2), i.InputInt8(1));
|
||||
break;
|
||||
}
|
||||
case kSSEI32x4SConvertF32x4: {
|
||||
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
|
||||
XMMRegister dst = i.OutputSimd128Register();
|
||||
// NAN->0
|
||||
__ movaps(kScratchDoubleReg, dst);
|
||||
__ cmpeqps(kScratchDoubleReg, kScratchDoubleReg);
|
||||
__ pand(dst, kScratchDoubleReg);
|
||||
// Set top bit if >= 0 (but not -0.0!)
|
||||
__ pxor(kScratchDoubleReg, dst);
|
||||
// Convert
|
||||
__ cvttps2dq(dst, dst);
|
||||
// Set top bit if >=0 is now < 0
|
||||
__ pand(kScratchDoubleReg, dst);
|
||||
__ psrad(kScratchDoubleReg, 31);
|
||||
// Set positive overflow lanes to 0x7FFFFFFF
|
||||
__ pxor(dst, kScratchDoubleReg);
|
||||
break;
|
||||
}
|
||||
case kAVXI32x4SConvertF32x4: {
|
||||
CpuFeatureScope avx_scope(tasm(), AVX);
|
||||
XMMRegister dst = i.OutputSimd128Register();
|
||||
XMMRegister src = i.InputSimd128Register(0);
|
||||
// NAN->0
|
||||
__ vcmpeqps(kScratchDoubleReg, src, src);
|
||||
__ vpand(dst, src, kScratchDoubleReg);
|
||||
// Set top bit if >= 0 (but not -0.0!)
|
||||
__ vpxor(kScratchDoubleReg, kScratchDoubleReg, dst);
|
||||
// Convert
|
||||
__ vcvttps2dq(dst, dst);
|
||||
// Set top bit if >=0 is now < 0
|
||||
__ vpand(kScratchDoubleReg, kScratchDoubleReg, dst);
|
||||
__ vpsrad(kScratchDoubleReg, kScratchDoubleReg, 31);
|
||||
// Set positive overflow lanes to 0x7FFFFFFF
|
||||
__ vpxor(dst, dst, kScratchDoubleReg);
|
||||
break;
|
||||
}
|
||||
case kIA32I32x4SConvertI16x8Low: {
|
||||
__ Pmovsxwd(i.OutputSimd128Register(), i.InputOperand(0));
|
||||
break;
|
||||
@ -2179,6 +2215,61 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
__ vpcmpeqd(i.OutputSimd128Register(), kScratchDoubleReg, src2);
|
||||
break;
|
||||
}
|
||||
case kSSEI32x4UConvertF32x4: {
|
||||
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
|
||||
CpuFeatureScope sse_scope(tasm(), SSE4_1);
|
||||
XMMRegister dst = i.OutputSimd128Register();
|
||||
XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0));
|
||||
// NAN->0, negative->0
|
||||
__ pxor(kScratchDoubleReg, kScratchDoubleReg);
|
||||
__ maxps(dst, kScratchDoubleReg);
|
||||
// scratch: float representation of max_signed
|
||||
__ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
|
||||
__ psrld(kScratchDoubleReg, 1); // 0x7fffffff
|
||||
__ cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // 0x4f000000
|
||||
// tmp: convert (src-max_signed).
|
||||
// Positive overflow lanes -> 0x7FFFFFFF
|
||||
// Negative lanes -> 0
|
||||
__ movaps(tmp, dst);
|
||||
__ subps(tmp, kScratchDoubleReg);
|
||||
__ cmpleps(kScratchDoubleReg, tmp);
|
||||
__ cvttps2dq(tmp, tmp);
|
||||
__ pxor(tmp, kScratchDoubleReg);
|
||||
__ pxor(kScratchDoubleReg, kScratchDoubleReg);
|
||||
__ pmaxsd(tmp, kScratchDoubleReg);
|
||||
// convert. Overflow lanes above max_signed will be 0x80000000
|
||||
__ cvttps2dq(dst, dst);
|
||||
// Add (src-max_signed) for overflow lanes.
|
||||
__ paddd(dst, tmp);
|
||||
break;
|
||||
}
|
||||
case kAVXI32x4UConvertF32x4: {
|
||||
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
|
||||
CpuFeatureScope avx_scope(tasm(), AVX);
|
||||
XMMRegister dst = i.OutputSimd128Register();
|
||||
XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0));
|
||||
// NAN->0, negative->0
|
||||
__ vpxor(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
|
||||
__ vmaxps(dst, dst, kScratchDoubleReg);
|
||||
// scratch: float representation of max_signed
|
||||
__ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
|
||||
__ vpsrld(kScratchDoubleReg, kScratchDoubleReg, 1); // 0x7fffffff
|
||||
__ vcvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // 0x4f000000
|
||||
// tmp: convert (src-max_signed).
|
||||
// Positive overflow lanes -> 0x7FFFFFFF
|
||||
// Negative lanes -> 0
|
||||
__ vsubps(tmp, dst, kScratchDoubleReg);
|
||||
__ vcmpleps(kScratchDoubleReg, kScratchDoubleReg, tmp);
|
||||
__ vcvttps2dq(tmp, tmp);
|
||||
__ vpxor(tmp, tmp, kScratchDoubleReg);
|
||||
__ vpxor(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
|
||||
__ vpmaxsd(tmp, tmp, kScratchDoubleReg);
|
||||
// convert. Overflow lanes above max_signed will be 0x80000000
|
||||
__ vcvttps2dq(dst, dst);
|
||||
// Add (src-max_signed) for overflow lanes.
|
||||
__ vpaddd(dst, dst, tmp);
|
||||
break;
|
||||
}
|
||||
case kIA32I32x4UConvertI16x8Low: {
|
||||
__ Pmovzxwd(i.OutputSimd128Register(), i.InputOperand(0));
|
||||
break;
|
||||
|
@ -154,6 +154,8 @@ namespace compiler {
|
||||
V(IA32I32x4ExtractLane) \
|
||||
V(SSEI32x4ReplaceLane) \
|
||||
V(AVXI32x4ReplaceLane) \
|
||||
V(SSEI32x4SConvertF32x4) \
|
||||
V(AVXI32x4SConvertF32x4) \
|
||||
V(IA32I32x4SConvertI16x8Low) \
|
||||
V(IA32I32x4SConvertI16x8High) \
|
||||
V(IA32I32x4Neg) \
|
||||
@ -181,6 +183,8 @@ namespace compiler {
|
||||
V(AVXI32x4GtS) \
|
||||
V(SSEI32x4GeS) \
|
||||
V(AVXI32x4GeS) \
|
||||
V(SSEI32x4UConvertF32x4) \
|
||||
V(AVXI32x4UConvertF32x4) \
|
||||
V(IA32I32x4UConvertI16x8Low) \
|
||||
V(IA32I32x4UConvertI16x8High) \
|
||||
V(SSEI32x4ShrU) \
|
||||
|
@ -136,6 +136,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
||||
case kIA32I32x4ExtractLane:
|
||||
case kSSEI32x4ReplaceLane:
|
||||
case kAVXI32x4ReplaceLane:
|
||||
case kSSEI32x4SConvertF32x4:
|
||||
case kAVXI32x4SConvertF32x4:
|
||||
case kIA32I32x4SConvertI16x8Low:
|
||||
case kIA32I32x4SConvertI16x8High:
|
||||
case kIA32I32x4Neg:
|
||||
@ -163,6 +165,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
||||
case kAVXI32x4GtS:
|
||||
case kSSEI32x4GeS:
|
||||
case kAVXI32x4GeS:
|
||||
case kSSEI32x4UConvertF32x4:
|
||||
case kAVXI32x4UConvertF32x4:
|
||||
case kIA32I32x4UConvertI16x8Low:
|
||||
case kIA32I32x4UConvertI16x8High:
|
||||
case kSSEI32x4ShrU:
|
||||
|
@ -1877,6 +1877,19 @@ void InstructionSelector::VisitF32x4UConvertI32x4(Node* node) {
|
||||
VisitRRSimd(this, node, kAVXF32x4UConvertI32x4, kSSEF32x4UConvertI32x4);
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitI32x4SConvertF32x4(Node* node) {
|
||||
VisitRRSimd(this, node, kAVXI32x4SConvertF32x4, kSSEI32x4SConvertF32x4);
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitI32x4UConvertF32x4(Node* node) {
|
||||
IA32OperandGenerator g(this);
|
||||
InstructionOperand temps[] = {g.TempSimd128Register()};
|
||||
InstructionCode opcode =
|
||||
IsSupported(AVX) ? kAVXI32x4UConvertF32x4 : kSSEI32x4UConvertF32x4;
|
||||
Emit(opcode, g.DefineSameAsFirst(node), g.UseRegister(node->InputAt(0)),
|
||||
arraysize(temps), temps);
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitI8x16Mul(Node* node) {
|
||||
IA32OperandGenerator g(this);
|
||||
InstructionOperand operand0 = g.UseUniqueRegister(node->InputAt(0));
|
||||
|
@ -2392,7 +2392,7 @@ void InstructionSelector::VisitWord64AtomicCompareExchange(Node* node) {
|
||||
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64
|
||||
|
||||
#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS && \
|
||||
!V8_TARGET_ARCH_MIPS64
|
||||
!V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_IA32
|
||||
void InstructionSelector::VisitI32x4SConvertF32x4(Node* node) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
@ -2400,11 +2400,7 @@ void InstructionSelector::VisitI32x4SConvertF32x4(Node* node) {
|
||||
void InstructionSelector::VisitI32x4UConvertF32x4(Node* node) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
#endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS
|
||||
// && !V8_TARGET_ARCH_MIPS64
|
||||
|
||||
#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS && \
|
||||
!V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_IA32
|
||||
void InstructionSelector::VisitI32x4SConvertI16x8Low(Node* node) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
@ -2451,11 +2447,7 @@ void InstructionSelector::VisitI8x16SConvertI16x8(Node* node) {
|
||||
void InstructionSelector::VisitI8x16UConvertI16x8(Node* node) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
#endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS
|
||||
// && !V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_IA32
|
||||
|
||||
#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS && \
|
||||
!V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_IA32
|
||||
void InstructionSelector::VisitI8x16Shl(Node* node) { UNIMPLEMENTED(); }
|
||||
|
||||
void InstructionSelector::VisitI8x16ShrS(Node* node) { UNIMPLEMENTED(); }
|
||||
|
@ -821,7 +821,7 @@ WASM_SIMD_TEST(I8x16ReplaceLane) {
|
||||
}
|
||||
|
||||
#if V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS || \
|
||||
V8_TARGET_ARCH_MIPS64
|
||||
V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_IA32
|
||||
|
||||
int32_t ConvertToInt(double val, bool unsigned_integer) {
|
||||
if (std::isnan(val)) return 0;
|
||||
@ -860,11 +860,7 @@ WASM_SIMD_TEST(I32x4ConvertF32x4) {
|
||||
CHECK_EQ(1, r.Call(*i, signed_value, unsigned_value));
|
||||
}
|
||||
}
|
||||
#endif // V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS ||
|
||||
// V8_TARGET_ARCH_MIPS64
|
||||
|
||||
#if V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS || \
|
||||
V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_IA32
|
||||
// Tests both signed and unsigned conversion from I16x8 (unpacking).
|
||||
WASM_SIMD_TEST(I32x4ConvertI16x8) {
|
||||
WasmRunner<int32_t, int32_t, int32_t, int32_t, int32_t> r(execution_mode,
|
||||
|
Loading…
Reference in New Issue
Block a user