[wasm-simd] Add some AVX codegen

There is some duplication in the AVX definitions, which will be cleaned
up in a future change.

Bug: v8:9561
Change-Id: I78b134f536ec47d45c0a56f653148e8925f7def6
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1893359
Reviewed-by: Deepti Gandluri <gdeepti@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#64706}
This commit is contained in:
Ng Zhi An 2019-10-31 10:25:04 -07:00 committed by Commit Bot
parent 32b2d32cba
commit f09b1337e8
3 changed files with 21 additions and 8 deletions

View File

@ -1340,10 +1340,16 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
// vsqrtpd is defined by sqrtpd in SSE2_INSTRUCTION_LIST
AVX_S_3(vsqrt, 0x51)
AVX_3(vsqrtps, 0x51, vps)
AVX_3(vrsqrtps, 0x52, vps)
AVX_3(vrcpps, 0x53, vps)
AVX_S_3(vadd, 0x58)
AVX_3(vaddps, 0x58, vps)
AVX_S_3(vsub, 0x5c)
AVX_3(vsubps, 0x5c, vps)
AVX_S_3(vmul, 0x59)
AVX_3(vmulps, 0x59, vps)
AVX_S_3(vdiv, 0x5e)
AVX_3(vdivps, 0x5e, vps)
AVX_S_3(vmin, 0x5d)
AVX_S_3(vmax, 0x5f)
AVX_P_3(vand, 0x54)

View File

@ -188,6 +188,13 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
AVX_OP(Divpd, divpd)
AVX_OP(Shufps, shufps)
AVX_OP(Cvtdq2ps, cvtdq2ps)
AVX_OP(Rcpps, rcpps)
AVX_OP(Rsqrtps, rsqrtps)
AVX_OP(Addps, addps)
AVX_OP(Haddps, haddps)
AVX_OP(Subps, subps)
AVX_OP(Mulps, mulps)
AVX_OP(Divps, divps)
AVX_OP_SSSE3(Pshufb, pshufb)
AVX_OP_SSSE3(Psignd, psignd)
AVX_OP_SSE4_1(Pmulld, pmulld)

View File

@ -2520,41 +2520,41 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kX64F32x4Sqrt: {
__ sqrtps(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ Sqrtps(i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kX64F32x4RecipApprox: {
__ rcpps(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ Rcpps(i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kX64F32x4RecipSqrtApprox: {
__ rsqrtps(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ Rsqrtps(i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kX64F32x4Add: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ addps(i.OutputSimd128Register(), i.InputSimd128Register(1));
__ Addps(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kX64F32x4AddHoriz: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
CpuFeatureScope sse_scope(tasm(), SSE3);
__ haddps(i.OutputSimd128Register(), i.InputSimd128Register(1));
__ Haddps(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kX64F32x4Sub: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ subps(i.OutputSimd128Register(), i.InputSimd128Register(1));
__ Subps(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kX64F32x4Mul: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ mulps(i.OutputSimd128Register(), i.InputSimd128Register(1));
__ Mulps(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kX64F32x4Div: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ divps(i.OutputSimd128Register(), i.InputSimd128Register(1));
__ Divps(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kX64F32x4Min: {