diff --git a/src/compiler/ia32/code-generator-ia32.cc b/src/compiler/ia32/code-generator-ia32.cc index 75d81775b7..de76c8cd51 100644 --- a/src/compiler/ia32/code-generator-ia32.cc +++ b/src/compiler/ia32/code-generator-ia32.cc @@ -1768,6 +1768,40 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( i.InputOperand(2), i.InputInt8(1) << 4); break; } + case kIA32F32x4SConvertI32x4: { + __ Cvtdq2ps(i.OutputSimd128Register(), i.InputOperand(0)); + break; + } + case kSSEF32x4UConvertI32x4: { + DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); + CpuFeatureScope sse_scope(tasm(), SSE4_1); + XMMRegister dst = i.OutputSimd128Register(); + __ pxor(kScratchDoubleReg, kScratchDoubleReg); // zeros + __ pblendw(kScratchDoubleReg, dst, 0x55); // get lo 16 bits + __ psubd(dst, kScratchDoubleReg); // get hi 16 bits + __ cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // convert lo exactly + __ psrld(dst, 1); // divide by 2 to get in unsigned range + __ cvtdq2ps(dst, dst); // convert hi exactly + __ addps(dst, dst); // double hi, exactly + __ addps(dst, kScratchDoubleReg); // add hi and lo, may round. + break; + } + case kAVXF32x4UConvertI32x4: { + CpuFeatureScope avx_scope(tasm(), AVX); + XMMRegister dst = i.OutputSimd128Register(); + XMMRegister src = i.InputSimd128Register(0); + __ vpxor(kScratchDoubleReg, kScratchDoubleReg, + kScratchDoubleReg); // zeros + __ vpblendw(kScratchDoubleReg, kScratchDoubleReg, src, + 0x55); // get lo 16 bits + __ vpsubd(dst, src, kScratchDoubleReg); // get hi 16 bits + __ vcvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // convert lo exactly + __ vpsrld(dst, dst, 1); // divide by 2 to get in unsigned range + __ vcvtdq2ps(dst, dst); // convert hi exactly + __ vaddps(dst, dst, dst); // double hi, exactly + __ vaddps(dst, dst, kScratchDoubleReg); // add hi and lo, may round. + break; + } case kSSEF32x4Abs: { XMMRegister dst = i.OutputSimd128Register(); Operand src = i.InputOperand(0); diff --git a/src/compiler/ia32/instruction-codes-ia32.h b/src/compiler/ia32/instruction-codes-ia32.h index 1d75f2e255..0d84bf97df 100644 --- a/src/compiler/ia32/instruction-codes-ia32.h +++ b/src/compiler/ia32/instruction-codes-ia32.h @@ -121,6 +121,9 @@ namespace compiler { V(AVXF32x4ExtractLane) \ V(SSEF32x4ReplaceLane) \ V(AVXF32x4ReplaceLane) \ + V(IA32F32x4SConvertI32x4) \ + V(SSEF32x4UConvertI32x4) \ + V(AVXF32x4UConvertI32x4) \ V(SSEF32x4Abs) \ V(AVXF32x4Abs) \ V(SSEF32x4Neg) \ diff --git a/src/compiler/ia32/instruction-scheduler-ia32.cc b/src/compiler/ia32/instruction-scheduler-ia32.cc index e586872a8a..291f3c1d3e 100644 --- a/src/compiler/ia32/instruction-scheduler-ia32.cc +++ b/src/compiler/ia32/instruction-scheduler-ia32.cc @@ -103,6 +103,9 @@ int InstructionScheduler::GetTargetInstructionFlags( case kAVXF32x4ExtractLane: case kSSEF32x4ReplaceLane: case kAVXF32x4ReplaceLane: + case kIA32F32x4SConvertI32x4: + case kSSEF32x4UConvertI32x4: + case kAVXF32x4UConvertI32x4: case kSSEF32x4Abs: case kAVXF32x4Abs: case kSSEF32x4Neg: diff --git a/src/compiler/ia32/instruction-selector-ia32.cc b/src/compiler/ia32/instruction-selector-ia32.cc index 5e16325222..103d14f68d 100644 --- a/src/compiler/ia32/instruction-selector-ia32.cc +++ b/src/compiler/ia32/instruction-selector-ia32.cc @@ -1793,6 +1793,7 @@ VISIT_ATOMIC_BINOP(Xor) V(S128Xor) #define SIMD_INT_UNOP_LIST(V) \ + V(F32x4SConvertI32x4) \ V(I32x4Neg) \ V(I16x8Neg) \ V(I8x16Neg) @@ -1832,6 +1833,16 @@ void InstructionSelector::VisitF32x4ExtractLane(Node* node) { } } +void InstructionSelector::VisitF32x4UConvertI32x4(Node* node) { + IA32OperandGenerator g(this); + InstructionOperand operand0 = g.UseRegister(node->InputAt(0)); + if (IsSupported(AVX)) { + Emit(kAVXF32x4UConvertI32x4, g.DefineAsRegister(node), operand0); + } else { + Emit(kSSEF32x4UConvertI32x4, g.DefineSameAsFirst(node), operand0); + } +} + #define SIMD_I8X16_SHIFT_OPCODES(V) \ V(I8x16Shl) \ V(I8x16ShrS) \ diff --git a/src/compiler/instruction-selector.cc b/src/compiler/instruction-selector.cc index 0617b0ab0a..188118b74b 100644 --- a/src/compiler/instruction-selector.cc +++ b/src/compiler/instruction-selector.cc @@ -2306,7 +2306,7 @@ void InstructionSelector::VisitWord32PairSar(Node* node) { UNIMPLEMENTED(); } #endif // V8_TARGET_ARCH_64_BIT #if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS && \ - !V8_TARGET_ARCH_MIPS64 + !V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_IA32 void InstructionSelector::VisitF32x4SConvertI32x4(Node* node) { UNIMPLEMENTED(); } @@ -2315,7 +2315,7 @@ void InstructionSelector::VisitF32x4UConvertI32x4(Node* node) { UNIMPLEMENTED(); } #endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS - // && !V8_TARGET_ARCH_MIPS64 + // && !V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_IA32 #if !V8_TARGET_ARCH_X64 void InstructionSelector::VisitWord64AtomicLoad(Node* node) { UNIMPLEMENTED(); } diff --git a/src/ia32/assembler-ia32.cc b/src/ia32/assembler-ia32.cc index 2875d36b01..8277b4bd4b 100644 --- a/src/ia32/assembler-ia32.cc +++ b/src/ia32/assembler-ia32.cc @@ -2725,6 +2725,17 @@ void Assembler::pshufd(XMMRegister dst, Operand src, uint8_t shuffle) { EMIT(shuffle); } +void Assembler::pblendw(XMMRegister dst, Operand src, uint8_t mask) { + DCHECK(IsEnabled(SSE4_1)); + EnsureSpace ensure_space(this); + EMIT(0x66); + EMIT(0x0F); + EMIT(0x3A); + EMIT(0x0E); + emit_sse_operand(dst, src); + EMIT(mask); +} + void Assembler::pextrb(Operand dst, XMMRegister src, int8_t offset) { DCHECK(IsEnabled(SSE4_1)); EnsureSpace ensure_space(this); @@ -2959,6 +2970,12 @@ void Assembler::vpshufd(XMMRegister dst, Operand src, uint8_t shuffle) { EMIT(shuffle); } +void Assembler::vpblendw(XMMRegister dst, XMMRegister src1, Operand src2, + uint8_t mask) { + vinstr(0x0E, dst, src1, src2, k66, k0F3A, kWIG); + EMIT(mask); +} + void Assembler::vpextrb(Operand dst, XMMRegister src, int8_t offset) { vinstr(0x14, src, xmm0, dst, k66, k0F3A, kWIG); EMIT(offset); diff --git a/src/ia32/assembler-ia32.h b/src/ia32/assembler-ia32.h index 5635710c15..78bcec73a4 100644 --- a/src/ia32/assembler-ia32.h +++ b/src/ia32/assembler-ia32.h @@ -1131,6 +1131,11 @@ class Assembler : public AssemblerBase { } void pshufd(XMMRegister dst, Operand src, uint8_t shuffle); + void pblendw(XMMRegister dst, XMMRegister src, uint8_t mask) { + pblendw(dst, Operand(src), mask); + } + void pblendw(XMMRegister dst, Operand src, uint8_t mask); + void pextrb(Register dst, XMMRegister src, int8_t offset) { pextrb(Operand(dst), src, offset); } @@ -1439,6 +1444,12 @@ class Assembler : public AssemblerBase { } void vpshufd(XMMRegister dst, Operand src, uint8_t shuffle); + void vpblendw(XMMRegister dst, XMMRegister src1, XMMRegister src2, + uint8_t mask) { + vpblendw(dst, src1, Operand(src2), mask); + } + void vpblendw(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t mask); + void vpextrb(Register dst, XMMRegister src, int8_t offset) { vpextrb(Operand(dst), src, offset); } diff --git a/src/ia32/disasm-ia32.cc b/src/ia32/disasm-ia32.cc index a9bac07690..5f07f0803d 100644 --- a/src/ia32/disasm-ia32.cc +++ b/src/ia32/disasm-ia32.cc @@ -819,6 +819,13 @@ int DisassemblerIA32::AVXInstruction(byte* data) { int mod, regop, rm, vvvv = vex_vreg(); get_modrm(*current, &mod, ®op, &rm); switch (opcode) { + case 0x0E: + AppendToBuffer("vpblendw %s,%s,", NameOfXMMRegister(regop), + NameOfXMMRegister(vvvv)); + current += PrintRightXMMOperand(current); + AppendToBuffer(",%d", *reinterpret_cast(current)); + current++; + break; case 0x14: AppendToBuffer("vpextrb "); current += PrintRightOperand(current); @@ -1961,6 +1968,14 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector out_buffer, NameOfXMMRegister(rm), static_cast(imm8)); data += 2; + } else if (*data == 0x0E) { + data++; + int mod, regop, rm; + get_modrm(*data, &mod, ®op, &rm); + AppendToBuffer("pblendw %s,", NameOfXMMRegister(regop)); + data += PrintRightXMMOperand(data); + AppendToBuffer(",%d", *reinterpret_cast(data)); + data++; } else if (*data == 0x14) { data++; int mod, regop, rm; diff --git a/src/ia32/macro-assembler-ia32.h b/src/ia32/macro-assembler-ia32.h index d3ebc3e5d5..2cb55323bf 100644 --- a/src/ia32/macro-assembler-ia32.h +++ b/src/ia32/macro-assembler-ia32.h @@ -228,6 +228,7 @@ class TurboAssembler : public Assembler { AVX_OP2_WITH_TYPE(Movd, movd, XMMRegister, Operand) AVX_OP2_WITH_TYPE(Movd, movd, Register, XMMRegister) AVX_OP2_WITH_TYPE(Movd, movd, Operand, XMMRegister) + AVX_OP2_WITH_TYPE(Cvtdq2ps, cvtdq2ps, XMMRegister, Operand) #undef AVX_OP2_WITH_TYPE diff --git a/test/cctest/test-disasm-ia32.cc b/test/cctest/test-disasm-ia32.cc index ccc106cc32..b5b6130729 100644 --- a/test/cctest/test-disasm-ia32.cc +++ b/test/cctest/test-disasm-ia32.cc @@ -573,6 +573,8 @@ TEST(DisasmIa320) { { if (CpuFeatures::IsSupported(SSE4_1)) { CpuFeatureScope scope(&assm, SSE4_1); + __ pblendw(xmm5, xmm1, 5); + __ pblendw(xmm5, Operand(edx, 4), 5); __ pextrb(eax, xmm0, 1); __ pextrb(Operand(edx, 4), xmm0, 1); __ pextrw(eax, xmm0, 1); @@ -689,6 +691,8 @@ TEST(DisasmIa320) { __ vpshuflw(xmm5, Operand(edx, 4), 5); __ vpshufd(xmm5, xmm1, 5); __ vpshufd(xmm5, Operand(edx, 4), 5); + __ vpblendw(xmm5, xmm1, xmm0, 5); + __ vpblendw(xmm5, xmm1, Operand(edx, 4), 5); __ vpextrb(eax, xmm0, 1); __ vpextrb(Operand(edx, 4), xmm0, 1); __ vpextrw(eax, xmm0, 1); diff --git a/test/cctest/wasm/test-run-wasm-simd.cc b/test/cctest/wasm/test-run-wasm-simd.cc index ba5785e9d8..aa6ee5e2cc 100644 --- a/test/cctest/wasm/test-run-wasm-simd.cc +++ b/test/cctest/wasm/test-run-wasm-simd.cc @@ -446,7 +446,7 @@ WASM_SIMD_TEST(F32x4ReplaceLane) { } #if V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS || \ - V8_TARGET_ARCH_MIPS64 + V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_IA32 // Tests both signed and unsigned conversion. WASM_SIMD_TEST(F32x4ConvertI32x4) { WasmRunner r(kExecuteTurbofan, lower_simd); @@ -471,7 +471,7 @@ WASM_SIMD_TEST(F32x4ConvertI32x4) { } } #endif // V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS || - // V8_TARGET_ARCH_MIPS64 + // V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_IA32 void RunF32x4UnOpTest(LowerSimd lower_simd, WasmOpcode simd_op, FloatUnOp expected_op, float error = 0.0f) {