[ia32][wasm] Add F32x4SConvertI32x4, F32x4UConvertI32x4

Add Cvtdq2ps macro.
Add pblendw/vpblendw.

Change-Id: I5c8232d17c220fbbb4845cbfad4ce765f0bbbb90
Reviewed-on: https://chromium-review.googlesource.com/961973
Commit-Queue: Jing Bao <jing.bao@intel.com>
Reviewed-by: Bill Budge <bbudge@chromium.org>
Cr-Commit-Position: refs/heads/master@{#52255}
This commit is contained in:
jing.bao 2018-03-27 13:05:44 +08:00 committed by Commit Bot
parent caf74f9c61
commit 539fee65e8
11 changed files with 103 additions and 4 deletions

View File

@ -1768,6 +1768,40 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputOperand(2), i.InputInt8(1) << 4);
break;
}
case kIA32F32x4SConvertI32x4: {
__ Cvtdq2ps(i.OutputSimd128Register(), i.InputOperand(0));
break;
}
case kSSEF32x4UConvertI32x4: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
CpuFeatureScope sse_scope(tasm(), SSE4_1);
XMMRegister dst = i.OutputSimd128Register();
__ pxor(kScratchDoubleReg, kScratchDoubleReg); // zeros
__ pblendw(kScratchDoubleReg, dst, 0x55); // get lo 16 bits
__ psubd(dst, kScratchDoubleReg); // get hi 16 bits
__ cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // convert lo exactly
__ psrld(dst, 1); // divide by 2 to get in unsigned range
__ cvtdq2ps(dst, dst); // convert hi exactly
__ addps(dst, dst); // double hi, exactly
__ addps(dst, kScratchDoubleReg); // add hi and lo, may round.
break;
}
case kAVXF32x4UConvertI32x4: {
CpuFeatureScope avx_scope(tasm(), AVX);
XMMRegister dst = i.OutputSimd128Register();
XMMRegister src = i.InputSimd128Register(0);
__ vpxor(kScratchDoubleReg, kScratchDoubleReg,
kScratchDoubleReg); // zeros
__ vpblendw(kScratchDoubleReg, kScratchDoubleReg, src,
0x55); // get lo 16 bits
__ vpsubd(dst, src, kScratchDoubleReg); // get hi 16 bits
__ vcvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // convert lo exactly
__ vpsrld(dst, dst, 1); // divide by 2 to get in unsigned range
__ vcvtdq2ps(dst, dst); // convert hi exactly
__ vaddps(dst, dst, dst); // double hi, exactly
__ vaddps(dst, dst, kScratchDoubleReg); // add hi and lo, may round.
break;
}
case kSSEF32x4Abs: {
XMMRegister dst = i.OutputSimd128Register();
Operand src = i.InputOperand(0);

View File

@ -121,6 +121,9 @@ namespace compiler {
V(AVXF32x4ExtractLane) \
V(SSEF32x4ReplaceLane) \
V(AVXF32x4ReplaceLane) \
V(IA32F32x4SConvertI32x4) \
V(SSEF32x4UConvertI32x4) \
V(AVXF32x4UConvertI32x4) \
V(SSEF32x4Abs) \
V(AVXF32x4Abs) \
V(SSEF32x4Neg) \

View File

@ -103,6 +103,9 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kAVXF32x4ExtractLane:
case kSSEF32x4ReplaceLane:
case kAVXF32x4ReplaceLane:
case kIA32F32x4SConvertI32x4:
case kSSEF32x4UConvertI32x4:
case kAVXF32x4UConvertI32x4:
case kSSEF32x4Abs:
case kAVXF32x4Abs:
case kSSEF32x4Neg:

View File

@ -1793,6 +1793,7 @@ VISIT_ATOMIC_BINOP(Xor)
V(S128Xor)
#define SIMD_INT_UNOP_LIST(V) \
V(F32x4SConvertI32x4) \
V(I32x4Neg) \
V(I16x8Neg) \
V(I8x16Neg)
@ -1832,6 +1833,16 @@ void InstructionSelector::VisitF32x4ExtractLane(Node* node) {
}
}
void InstructionSelector::VisitF32x4UConvertI32x4(Node* node) {
IA32OperandGenerator g(this);
InstructionOperand operand0 = g.UseRegister(node->InputAt(0));
if (IsSupported(AVX)) {
Emit(kAVXF32x4UConvertI32x4, g.DefineAsRegister(node), operand0);
} else {
Emit(kSSEF32x4UConvertI32x4, g.DefineSameAsFirst(node), operand0);
}
}
#define SIMD_I8X16_SHIFT_OPCODES(V) \
V(I8x16Shl) \
V(I8x16ShrS) \

View File

@ -2306,7 +2306,7 @@ void InstructionSelector::VisitWord32PairSar(Node* node) { UNIMPLEMENTED(); }
#endif // V8_TARGET_ARCH_64_BIT
#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS && \
!V8_TARGET_ARCH_MIPS64
!V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_IA32
void InstructionSelector::VisitF32x4SConvertI32x4(Node* node) {
UNIMPLEMENTED();
}
@ -2315,7 +2315,7 @@ void InstructionSelector::VisitF32x4UConvertI32x4(Node* node) {
UNIMPLEMENTED();
}
#endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS
// && !V8_TARGET_ARCH_MIPS64
// && !V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_IA32
#if !V8_TARGET_ARCH_X64
void InstructionSelector::VisitWord64AtomicLoad(Node* node) { UNIMPLEMENTED(); }

View File

@ -2725,6 +2725,17 @@ void Assembler::pshufd(XMMRegister dst, Operand src, uint8_t shuffle) {
EMIT(shuffle);
}
void Assembler::pblendw(XMMRegister dst, Operand src, uint8_t mask) {
DCHECK(IsEnabled(SSE4_1));
EnsureSpace ensure_space(this);
EMIT(0x66);
EMIT(0x0F);
EMIT(0x3A);
EMIT(0x0E);
emit_sse_operand(dst, src);
EMIT(mask);
}
void Assembler::pextrb(Operand dst, XMMRegister src, int8_t offset) {
DCHECK(IsEnabled(SSE4_1));
EnsureSpace ensure_space(this);
@ -2959,6 +2970,12 @@ void Assembler::vpshufd(XMMRegister dst, Operand src, uint8_t shuffle) {
EMIT(shuffle);
}
void Assembler::vpblendw(XMMRegister dst, XMMRegister src1, Operand src2,
uint8_t mask) {
vinstr(0x0E, dst, src1, src2, k66, k0F3A, kWIG);
EMIT(mask);
}
void Assembler::vpextrb(Operand dst, XMMRegister src, int8_t offset) {
vinstr(0x14, src, xmm0, dst, k66, k0F3A, kWIG);
EMIT(offset);

View File

@ -1131,6 +1131,11 @@ class Assembler : public AssemblerBase {
}
void pshufd(XMMRegister dst, Operand src, uint8_t shuffle);
void pblendw(XMMRegister dst, XMMRegister src, uint8_t mask) {
pblendw(dst, Operand(src), mask);
}
void pblendw(XMMRegister dst, Operand src, uint8_t mask);
void pextrb(Register dst, XMMRegister src, int8_t offset) {
pextrb(Operand(dst), src, offset);
}
@ -1439,6 +1444,12 @@ class Assembler : public AssemblerBase {
}
void vpshufd(XMMRegister dst, Operand src, uint8_t shuffle);
void vpblendw(XMMRegister dst, XMMRegister src1, XMMRegister src2,
uint8_t mask) {
vpblendw(dst, src1, Operand(src2), mask);
}
void vpblendw(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t mask);
void vpextrb(Register dst, XMMRegister src, int8_t offset) {
vpextrb(Operand(dst), src, offset);
}

View File

@ -819,6 +819,13 @@ int DisassemblerIA32::AVXInstruction(byte* data) {
int mod, regop, rm, vvvv = vex_vreg();
get_modrm(*current, &mod, &regop, &rm);
switch (opcode) {
case 0x0E:
AppendToBuffer("vpblendw %s,%s,", NameOfXMMRegister(regop),
NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
AppendToBuffer(",%d", *reinterpret_cast<uint8_t*>(current));
current++;
break;
case 0x14:
AppendToBuffer("vpextrb ");
current += PrintRightOperand(current);
@ -1961,6 +1968,14 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer,
NameOfXMMRegister(rm),
static_cast<int>(imm8));
data += 2;
} else if (*data == 0x0E) {
data++;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
AppendToBuffer("pblendw %s,", NameOfXMMRegister(regop));
data += PrintRightXMMOperand(data);
AppendToBuffer(",%d", *reinterpret_cast<uint8_t*>(data));
data++;
} else if (*data == 0x14) {
data++;
int mod, regop, rm;

View File

@ -228,6 +228,7 @@ class TurboAssembler : public Assembler {
AVX_OP2_WITH_TYPE(Movd, movd, XMMRegister, Operand)
AVX_OP2_WITH_TYPE(Movd, movd, Register, XMMRegister)
AVX_OP2_WITH_TYPE(Movd, movd, Operand, XMMRegister)
AVX_OP2_WITH_TYPE(Cvtdq2ps, cvtdq2ps, XMMRegister, Operand)
#undef AVX_OP2_WITH_TYPE

View File

@ -573,6 +573,8 @@ TEST(DisasmIa320) {
{
if (CpuFeatures::IsSupported(SSE4_1)) {
CpuFeatureScope scope(&assm, SSE4_1);
__ pblendw(xmm5, xmm1, 5);
__ pblendw(xmm5, Operand(edx, 4), 5);
__ pextrb(eax, xmm0, 1);
__ pextrb(Operand(edx, 4), xmm0, 1);
__ pextrw(eax, xmm0, 1);
@ -689,6 +691,8 @@ TEST(DisasmIa320) {
__ vpshuflw(xmm5, Operand(edx, 4), 5);
__ vpshufd(xmm5, xmm1, 5);
__ vpshufd(xmm5, Operand(edx, 4), 5);
__ vpblendw(xmm5, xmm1, xmm0, 5);
__ vpblendw(xmm5, xmm1, Operand(edx, 4), 5);
__ vpextrb(eax, xmm0, 1);
__ vpextrb(Operand(edx, 4), xmm0, 1);
__ vpextrw(eax, xmm0, 1);

View File

@ -446,7 +446,7 @@ WASM_SIMD_TEST(F32x4ReplaceLane) {
}
#if V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS || \
V8_TARGET_ARCH_MIPS64
V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_IA32
// Tests both signed and unsigned conversion.
WASM_SIMD_TEST(F32x4ConvertI32x4) {
WasmRunner<int32_t, int32_t, float, float> r(kExecuteTurbofan, lower_simd);
@ -471,7 +471,7 @@ WASM_SIMD_TEST(F32x4ConvertI32x4) {
}
}
#endif // V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS ||
// V8_TARGET_ARCH_MIPS64
// V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_IA32
void RunF32x4UnOpTest(LowerSimd lower_simd, WasmOpcode simd_op,
FloatUnOp expected_op, float error = 0.0f) {