[ia32][wasm] Add F32x4SConvertI32x4, F32x4UConvertI32x4
Add Cvtdq2ps macro. Add pblendw/vpblendw. Change-Id: I5c8232d17c220fbbb4845cbfad4ce765f0bbbb90 Reviewed-on: https://chromium-review.googlesource.com/961973 Commit-Queue: Jing Bao <jing.bao@intel.com> Reviewed-by: Bill Budge <bbudge@chromium.org> Cr-Commit-Position: refs/heads/master@{#52255}
This commit is contained in:
parent
caf74f9c61
commit
539fee65e8
@ -1768,6 +1768,40 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
i.InputOperand(2), i.InputInt8(1) << 4);
|
||||
break;
|
||||
}
|
||||
case kIA32F32x4SConvertI32x4: {
|
||||
__ Cvtdq2ps(i.OutputSimd128Register(), i.InputOperand(0));
|
||||
break;
|
||||
}
|
||||
case kSSEF32x4UConvertI32x4: {
|
||||
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
|
||||
CpuFeatureScope sse_scope(tasm(), SSE4_1);
|
||||
XMMRegister dst = i.OutputSimd128Register();
|
||||
__ pxor(kScratchDoubleReg, kScratchDoubleReg); // zeros
|
||||
__ pblendw(kScratchDoubleReg, dst, 0x55); // get lo 16 bits
|
||||
__ psubd(dst, kScratchDoubleReg); // get hi 16 bits
|
||||
__ cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // convert lo exactly
|
||||
__ psrld(dst, 1); // divide by 2 to get in unsigned range
|
||||
__ cvtdq2ps(dst, dst); // convert hi exactly
|
||||
__ addps(dst, dst); // double hi, exactly
|
||||
__ addps(dst, kScratchDoubleReg); // add hi and lo, may round.
|
||||
break;
|
||||
}
|
||||
case kAVXF32x4UConvertI32x4: {
|
||||
CpuFeatureScope avx_scope(tasm(), AVX);
|
||||
XMMRegister dst = i.OutputSimd128Register();
|
||||
XMMRegister src = i.InputSimd128Register(0);
|
||||
__ vpxor(kScratchDoubleReg, kScratchDoubleReg,
|
||||
kScratchDoubleReg); // zeros
|
||||
__ vpblendw(kScratchDoubleReg, kScratchDoubleReg, src,
|
||||
0x55); // get lo 16 bits
|
||||
__ vpsubd(dst, src, kScratchDoubleReg); // get hi 16 bits
|
||||
__ vcvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // convert lo exactly
|
||||
__ vpsrld(dst, dst, 1); // divide by 2 to get in unsigned range
|
||||
__ vcvtdq2ps(dst, dst); // convert hi exactly
|
||||
__ vaddps(dst, dst, dst); // double hi, exactly
|
||||
__ vaddps(dst, dst, kScratchDoubleReg); // add hi and lo, may round.
|
||||
break;
|
||||
}
|
||||
case kSSEF32x4Abs: {
|
||||
XMMRegister dst = i.OutputSimd128Register();
|
||||
Operand src = i.InputOperand(0);
|
||||
|
@ -121,6 +121,9 @@ namespace compiler {
|
||||
V(AVXF32x4ExtractLane) \
|
||||
V(SSEF32x4ReplaceLane) \
|
||||
V(AVXF32x4ReplaceLane) \
|
||||
V(IA32F32x4SConvertI32x4) \
|
||||
V(SSEF32x4UConvertI32x4) \
|
||||
V(AVXF32x4UConvertI32x4) \
|
||||
V(SSEF32x4Abs) \
|
||||
V(AVXF32x4Abs) \
|
||||
V(SSEF32x4Neg) \
|
||||
|
@ -103,6 +103,9 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
||||
case kAVXF32x4ExtractLane:
|
||||
case kSSEF32x4ReplaceLane:
|
||||
case kAVXF32x4ReplaceLane:
|
||||
case kIA32F32x4SConvertI32x4:
|
||||
case kSSEF32x4UConvertI32x4:
|
||||
case kAVXF32x4UConvertI32x4:
|
||||
case kSSEF32x4Abs:
|
||||
case kAVXF32x4Abs:
|
||||
case kSSEF32x4Neg:
|
||||
|
@ -1793,6 +1793,7 @@ VISIT_ATOMIC_BINOP(Xor)
|
||||
V(S128Xor)
|
||||
|
||||
#define SIMD_INT_UNOP_LIST(V) \
|
||||
V(F32x4SConvertI32x4) \
|
||||
V(I32x4Neg) \
|
||||
V(I16x8Neg) \
|
||||
V(I8x16Neg)
|
||||
@ -1832,6 +1833,16 @@ void InstructionSelector::VisitF32x4ExtractLane(Node* node) {
|
||||
}
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitF32x4UConvertI32x4(Node* node) {
|
||||
IA32OperandGenerator g(this);
|
||||
InstructionOperand operand0 = g.UseRegister(node->InputAt(0));
|
||||
if (IsSupported(AVX)) {
|
||||
Emit(kAVXF32x4UConvertI32x4, g.DefineAsRegister(node), operand0);
|
||||
} else {
|
||||
Emit(kSSEF32x4UConvertI32x4, g.DefineSameAsFirst(node), operand0);
|
||||
}
|
||||
}
|
||||
|
||||
#define SIMD_I8X16_SHIFT_OPCODES(V) \
|
||||
V(I8x16Shl) \
|
||||
V(I8x16ShrS) \
|
||||
|
@ -2306,7 +2306,7 @@ void InstructionSelector::VisitWord32PairSar(Node* node) { UNIMPLEMENTED(); }
|
||||
#endif // V8_TARGET_ARCH_64_BIT
|
||||
|
||||
#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS && \
|
||||
!V8_TARGET_ARCH_MIPS64
|
||||
!V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_IA32
|
||||
void InstructionSelector::VisitF32x4SConvertI32x4(Node* node) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
@ -2315,7 +2315,7 @@ void InstructionSelector::VisitF32x4UConvertI32x4(Node* node) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
#endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS
|
||||
// && !V8_TARGET_ARCH_MIPS64
|
||||
// && !V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_IA32
|
||||
|
||||
#if !V8_TARGET_ARCH_X64
|
||||
void InstructionSelector::VisitWord64AtomicLoad(Node* node) { UNIMPLEMENTED(); }
|
||||
|
@ -2725,6 +2725,17 @@ void Assembler::pshufd(XMMRegister dst, Operand src, uint8_t shuffle) {
|
||||
EMIT(shuffle);
|
||||
}
|
||||
|
||||
void Assembler::pblendw(XMMRegister dst, Operand src, uint8_t mask) {
|
||||
DCHECK(IsEnabled(SSE4_1));
|
||||
EnsureSpace ensure_space(this);
|
||||
EMIT(0x66);
|
||||
EMIT(0x0F);
|
||||
EMIT(0x3A);
|
||||
EMIT(0x0E);
|
||||
emit_sse_operand(dst, src);
|
||||
EMIT(mask);
|
||||
}
|
||||
|
||||
void Assembler::pextrb(Operand dst, XMMRegister src, int8_t offset) {
|
||||
DCHECK(IsEnabled(SSE4_1));
|
||||
EnsureSpace ensure_space(this);
|
||||
@ -2959,6 +2970,12 @@ void Assembler::vpshufd(XMMRegister dst, Operand src, uint8_t shuffle) {
|
||||
EMIT(shuffle);
|
||||
}
|
||||
|
||||
void Assembler::vpblendw(XMMRegister dst, XMMRegister src1, Operand src2,
|
||||
uint8_t mask) {
|
||||
vinstr(0x0E, dst, src1, src2, k66, k0F3A, kWIG);
|
||||
EMIT(mask);
|
||||
}
|
||||
|
||||
void Assembler::vpextrb(Operand dst, XMMRegister src, int8_t offset) {
|
||||
vinstr(0x14, src, xmm0, dst, k66, k0F3A, kWIG);
|
||||
EMIT(offset);
|
||||
|
@ -1131,6 +1131,11 @@ class Assembler : public AssemblerBase {
|
||||
}
|
||||
void pshufd(XMMRegister dst, Operand src, uint8_t shuffle);
|
||||
|
||||
void pblendw(XMMRegister dst, XMMRegister src, uint8_t mask) {
|
||||
pblendw(dst, Operand(src), mask);
|
||||
}
|
||||
void pblendw(XMMRegister dst, Operand src, uint8_t mask);
|
||||
|
||||
void pextrb(Register dst, XMMRegister src, int8_t offset) {
|
||||
pextrb(Operand(dst), src, offset);
|
||||
}
|
||||
@ -1439,6 +1444,12 @@ class Assembler : public AssemblerBase {
|
||||
}
|
||||
void vpshufd(XMMRegister dst, Operand src, uint8_t shuffle);
|
||||
|
||||
void vpblendw(XMMRegister dst, XMMRegister src1, XMMRegister src2,
|
||||
uint8_t mask) {
|
||||
vpblendw(dst, src1, Operand(src2), mask);
|
||||
}
|
||||
void vpblendw(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t mask);
|
||||
|
||||
void vpextrb(Register dst, XMMRegister src, int8_t offset) {
|
||||
vpextrb(Operand(dst), src, offset);
|
||||
}
|
||||
|
@ -819,6 +819,13 @@ int DisassemblerIA32::AVXInstruction(byte* data) {
|
||||
int mod, regop, rm, vvvv = vex_vreg();
|
||||
get_modrm(*current, &mod, ®op, &rm);
|
||||
switch (opcode) {
|
||||
case 0x0E:
|
||||
AppendToBuffer("vpblendw %s,%s,", NameOfXMMRegister(regop),
|
||||
NameOfXMMRegister(vvvv));
|
||||
current += PrintRightXMMOperand(current);
|
||||
AppendToBuffer(",%d", *reinterpret_cast<uint8_t*>(current));
|
||||
current++;
|
||||
break;
|
||||
case 0x14:
|
||||
AppendToBuffer("vpextrb ");
|
||||
current += PrintRightOperand(current);
|
||||
@ -1961,6 +1968,14 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer,
|
||||
NameOfXMMRegister(rm),
|
||||
static_cast<int>(imm8));
|
||||
data += 2;
|
||||
} else if (*data == 0x0E) {
|
||||
data++;
|
||||
int mod, regop, rm;
|
||||
get_modrm(*data, &mod, ®op, &rm);
|
||||
AppendToBuffer("pblendw %s,", NameOfXMMRegister(regop));
|
||||
data += PrintRightXMMOperand(data);
|
||||
AppendToBuffer(",%d", *reinterpret_cast<uint8_t*>(data));
|
||||
data++;
|
||||
} else if (*data == 0x14) {
|
||||
data++;
|
||||
int mod, regop, rm;
|
||||
|
@ -228,6 +228,7 @@ class TurboAssembler : public Assembler {
|
||||
AVX_OP2_WITH_TYPE(Movd, movd, XMMRegister, Operand)
|
||||
AVX_OP2_WITH_TYPE(Movd, movd, Register, XMMRegister)
|
||||
AVX_OP2_WITH_TYPE(Movd, movd, Operand, XMMRegister)
|
||||
AVX_OP2_WITH_TYPE(Cvtdq2ps, cvtdq2ps, XMMRegister, Operand)
|
||||
|
||||
#undef AVX_OP2_WITH_TYPE
|
||||
|
||||
|
@ -573,6 +573,8 @@ TEST(DisasmIa320) {
|
||||
{
|
||||
if (CpuFeatures::IsSupported(SSE4_1)) {
|
||||
CpuFeatureScope scope(&assm, SSE4_1);
|
||||
__ pblendw(xmm5, xmm1, 5);
|
||||
__ pblendw(xmm5, Operand(edx, 4), 5);
|
||||
__ pextrb(eax, xmm0, 1);
|
||||
__ pextrb(Operand(edx, 4), xmm0, 1);
|
||||
__ pextrw(eax, xmm0, 1);
|
||||
@ -689,6 +691,8 @@ TEST(DisasmIa320) {
|
||||
__ vpshuflw(xmm5, Operand(edx, 4), 5);
|
||||
__ vpshufd(xmm5, xmm1, 5);
|
||||
__ vpshufd(xmm5, Operand(edx, 4), 5);
|
||||
__ vpblendw(xmm5, xmm1, xmm0, 5);
|
||||
__ vpblendw(xmm5, xmm1, Operand(edx, 4), 5);
|
||||
__ vpextrb(eax, xmm0, 1);
|
||||
__ vpextrb(Operand(edx, 4), xmm0, 1);
|
||||
__ vpextrw(eax, xmm0, 1);
|
||||
|
@ -446,7 +446,7 @@ WASM_SIMD_TEST(F32x4ReplaceLane) {
|
||||
}
|
||||
|
||||
#if V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS || \
|
||||
V8_TARGET_ARCH_MIPS64
|
||||
V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_IA32
|
||||
// Tests both signed and unsigned conversion.
|
||||
WASM_SIMD_TEST(F32x4ConvertI32x4) {
|
||||
WasmRunner<int32_t, int32_t, float, float> r(kExecuteTurbofan, lower_simd);
|
||||
@ -471,7 +471,7 @@ WASM_SIMD_TEST(F32x4ConvertI32x4) {
|
||||
}
|
||||
}
|
||||
#endif // V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS ||
|
||||
// V8_TARGET_ARCH_MIPS64
|
||||
// V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_IA32
|
||||
|
||||
void RunF32x4UnOpTest(LowerSimd lower_simd, WasmOpcode simd_op,
|
||||
FloatUnOp expected_op, float error = 0.0f) {
|
||||
|
Loading…
Reference in New Issue
Block a user