[wasm-simd] Unify sse and avx impl for i16x8 shifts
The implementation is pretty much the same, and we instead delegate to a macro assembler to decide if we want the sse or avx instruction. This unification will simplify optimization of constant shifts later on. Bug: v8:10115 Change-Id: I68e60cb3fd51156438989812be189f71e6e47ba7 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2026470 Reviewed-by: Deepti Gandluri <gdeepti@chromium.org> Commit-Queue: Zhi An Ng <zhin@chromium.org> Cr-Commit-Position: refs/heads/master@{#66121}
This commit is contained in:
parent
1775684e86
commit
148b53910e
@ -335,10 +335,13 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
|
||||
AVX_PACKED_OP3(Minpd, minpd)
|
||||
AVX_PACKED_OP3(Maxpd, maxpd)
|
||||
AVX_PACKED_OP3(Cmpunordpd, cmpunordpd)
|
||||
AVX_PACKED_OP3(Psllw, psllw)
|
||||
AVX_PACKED_OP3(Pslld, pslld)
|
||||
AVX_PACKED_OP3(Psllq, psllq)
|
||||
AVX_PACKED_OP3(Psrlw, psrlw)
|
||||
AVX_PACKED_OP3(Psrld, psrld)
|
||||
AVX_PACKED_OP3(Psrlq, psrlq)
|
||||
AVX_PACKED_OP3(Psraw, psraw)
|
||||
AVX_PACKED_OP3(Psrad, psrad)
|
||||
AVX_PACKED_OP3(Paddq, paddq)
|
||||
AVX_PACKED_OP3(Psubq, psubq)
|
||||
|
@ -2818,44 +2818,22 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
}
|
||||
break;
|
||||
}
|
||||
case kSSEI16x8Shl: {
|
||||
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
|
||||
case kIA32I16x8Shl: {
|
||||
XMMRegister tmp = i.TempSimd128Register(0);
|
||||
Register shift = i.InputRegister(1);
|
||||
// Take shift value modulo 16.
|
||||
__ and_(shift, 15);
|
||||
__ movd(tmp, shift);
|
||||
__ psllw(i.OutputSimd128Register(), tmp);
|
||||
__ Movd(tmp, shift);
|
||||
__ Psllw(i.OutputSimd128Register(), i.InputSimd128Register(0), tmp);
|
||||
break;
|
||||
}
|
||||
case kAVXI16x8Shl: {
|
||||
CpuFeatureScope avx_scope(tasm(), AVX);
|
||||
case kIA32I16x8ShrS: {
|
||||
XMMRegister tmp = i.TempSimd128Register(0);
|
||||
Register shift = i.InputRegister(1);
|
||||
// Take shift value modulo 16.
|
||||
__ and_(shift, 15);
|
||||
__ movd(tmp, shift);
|
||||
__ vpsllw(i.OutputSimd128Register(), i.InputSimd128Register(0), tmp);
|
||||
break;
|
||||
}
|
||||
case kSSEI16x8ShrS: {
|
||||
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
|
||||
XMMRegister tmp = i.TempSimd128Register(0);
|
||||
Register shift = i.InputRegister(1);
|
||||
// Take shift value modulo 16.
|
||||
__ and_(shift, 15);
|
||||
__ movd(tmp, shift);
|
||||
__ psraw(i.OutputSimd128Register(), tmp);
|
||||
break;
|
||||
}
|
||||
case kAVXI16x8ShrS: {
|
||||
CpuFeatureScope avx_scope(tasm(), AVX);
|
||||
XMMRegister tmp = i.TempSimd128Register(0);
|
||||
Register shift = i.InputRegister(1);
|
||||
// Take shift value modulo 16.
|
||||
__ and_(shift, 15);
|
||||
__ movd(tmp, shift);
|
||||
__ vpsraw(i.OutputSimd128Register(), i.InputSimd128Register(0), tmp);
|
||||
__ Movd(tmp, shift);
|
||||
__ Psraw(i.OutputSimd128Register(), i.InputSimd128Register(0), tmp);
|
||||
break;
|
||||
}
|
||||
case kSSEI16x8SConvertI32x4: {
|
||||
@ -3022,24 +3000,13 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
__ Pmovzxbw(dst, dst);
|
||||
break;
|
||||
}
|
||||
case kSSEI16x8ShrU: {
|
||||
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
|
||||
case kIA32I16x8ShrU: {
|
||||
XMMRegister tmp = i.TempSimd128Register(0);
|
||||
Register shift = i.InputRegister(1);
|
||||
// Take shift value modulo 16.
|
||||
__ and_(shift, 15);
|
||||
__ movd(tmp, shift);
|
||||
__ psrlw(i.OutputSimd128Register(), tmp);
|
||||
break;
|
||||
}
|
||||
case kAVXI16x8ShrU: {
|
||||
CpuFeatureScope avx_scope(tasm(), AVX);
|
||||
XMMRegister tmp = i.TempSimd128Register(0);
|
||||
Register shift = i.InputRegister(1);
|
||||
// Take shift value modulo 16.
|
||||
__ and_(shift, 15);
|
||||
__ movd(tmp, shift);
|
||||
__ vpsrlw(i.OutputSimd128Register(), i.InputSimd128Register(0), tmp);
|
||||
__ Movd(tmp, shift);
|
||||
__ Psrlw(i.OutputSimd128Register(), i.InputSimd128Register(0), tmp);
|
||||
break;
|
||||
}
|
||||
case kSSEI16x8UConvertI32x4: {
|
||||
|
@ -233,10 +233,8 @@ namespace compiler {
|
||||
V(IA32I16x8SConvertI8x16Low) \
|
||||
V(IA32I16x8SConvertI8x16High) \
|
||||
V(IA32I16x8Neg) \
|
||||
V(SSEI16x8Shl) \
|
||||
V(AVXI16x8Shl) \
|
||||
V(SSEI16x8ShrS) \
|
||||
V(AVXI16x8ShrS) \
|
||||
V(IA32I16x8Shl) \
|
||||
V(IA32I16x8ShrS) \
|
||||
V(SSEI16x8SConvertI32x4) \
|
||||
V(AVXI16x8SConvertI32x4) \
|
||||
V(SSEI16x8Add) \
|
||||
@ -265,8 +263,7 @@ namespace compiler {
|
||||
V(AVXI16x8GeS) \
|
||||
V(IA32I16x8UConvertI8x16Low) \
|
||||
V(IA32I16x8UConvertI8x16High) \
|
||||
V(SSEI16x8ShrU) \
|
||||
V(AVXI16x8ShrU) \
|
||||
V(IA32I16x8ShrU) \
|
||||
V(SSEI16x8UConvertI32x4) \
|
||||
V(AVXI16x8UConvertI32x4) \
|
||||
V(SSEI16x8AddSaturateU) \
|
||||
|
@ -214,10 +214,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
||||
case kIA32I16x8SConvertI8x16Low:
|
||||
case kIA32I16x8SConvertI8x16High:
|
||||
case kIA32I16x8Neg:
|
||||
case kSSEI16x8Shl:
|
||||
case kAVXI16x8Shl:
|
||||
case kSSEI16x8ShrS:
|
||||
case kAVXI16x8ShrS:
|
||||
case kIA32I16x8Shl:
|
||||
case kIA32I16x8ShrS:
|
||||
case kSSEI16x8SConvertI32x4:
|
||||
case kAVXI16x8SConvertI32x4:
|
||||
case kSSEI16x8Add:
|
||||
@ -246,8 +244,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
||||
case kAVXI16x8GeS:
|
||||
case kIA32I16x8UConvertI8x16Low:
|
||||
case kIA32I16x8UConvertI8x16High:
|
||||
case kSSEI16x8ShrU:
|
||||
case kAVXI16x8ShrU:
|
||||
case kIA32I16x8ShrU:
|
||||
case kSSEI16x8UConvertI32x4:
|
||||
case kAVXI16x8UConvertI32x4:
|
||||
case kSSEI16x8AddSaturateU:
|
||||
|
@ -305,18 +305,13 @@ void VisitRRISimd(InstructionSelector* selector, Node* node,
|
||||
}
|
||||
|
||||
void VisitRROSimdShift(InstructionSelector* selector, Node* node,
|
||||
ArchOpcode avx_opcode, ArchOpcode sse_opcode) {
|
||||
ArchOpcode opcode) {
|
||||
IA32OperandGenerator g(selector);
|
||||
InstructionOperand operand0 = g.UseUniqueRegister(node->InputAt(0));
|
||||
InstructionOperand operand1 = g.UseUniqueRegister(node->InputAt(1));
|
||||
InstructionOperand temps[] = {g.TempSimd128Register()};
|
||||
if (selector->IsSupported(AVX)) {
|
||||
selector->Emit(avx_opcode, g.DefineAsRegister(node), operand0, operand1,
|
||||
arraysize(temps), temps);
|
||||
} else {
|
||||
selector->Emit(sse_opcode, g.DefineSameAsFirst(node), operand0, operand1,
|
||||
arraysize(temps), temps);
|
||||
}
|
||||
selector->Emit(opcode, g.DefineSameAsFirst(node), operand0, operand1,
|
||||
arraysize(temps), temps);
|
||||
}
|
||||
|
||||
void VisitRROI8x16SimdRightShift(InstructionSelector* selector, Node* node,
|
||||
@ -2130,17 +2125,15 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) {
|
||||
V(S1x8AllTrue) \
|
||||
V(S1x16AllTrue)
|
||||
|
||||
#define SIMD_SHIFT_OPCODES(V) \
|
||||
V(I16x8Shl) \
|
||||
V(I16x8ShrS) \
|
||||
V(I16x8ShrU)
|
||||
|
||||
#define SIMD_SHIFT_OPCODES_UNIFED_SSE_AVX(V) \
|
||||
V(I64x2Shl) \
|
||||
V(I64x2ShrU) \
|
||||
V(I32x4Shl) \
|
||||
V(I32x4ShrS) \
|
||||
V(I32x4ShrU)
|
||||
V(I32x4ShrU) \
|
||||
V(I16x8Shl) \
|
||||
V(I16x8ShrS) \
|
||||
V(I16x8ShrU)
|
||||
|
||||
#define SIMD_I8X16_RIGHT_SHIFT_OPCODES(V) \
|
||||
V(I8x16ShrS) \
|
||||
@ -2360,17 +2353,9 @@ VISIT_SIMD_REPLACE_LANE(F32x4)
|
||||
VISIT_SIMD_REPLACE_LANE_USE_REG(F64x2)
|
||||
#undef VISIT_SIMD_REPLACE_LANE_USE_REG
|
||||
|
||||
#define VISIT_SIMD_SHIFT(Opcode) \
|
||||
void InstructionSelector::Visit##Opcode(Node* node) { \
|
||||
VisitRROSimdShift(this, node, kAVX##Opcode, kSSE##Opcode); \
|
||||
}
|
||||
SIMD_SHIFT_OPCODES(VISIT_SIMD_SHIFT)
|
||||
#undef VISIT_SIMD_SHIFT
|
||||
#undef SIMD_SHIFT_OPCODES
|
||||
|
||||
#define VISIT_SIMD_SHIFT_UNIFIED_SSE_AVX(Opcode) \
|
||||
void InstructionSelector::Visit##Opcode(Node* node) { \
|
||||
VisitRROSimdShift(this, node, kIA32##Opcode, kIA32##Opcode); \
|
||||
#define VISIT_SIMD_SHIFT_UNIFIED_SSE_AVX(Opcode) \
|
||||
void InstructionSelector::Visit##Opcode(Node* node) { \
|
||||
VisitRROSimdShift(this, node, kIA32##Opcode); \
|
||||
}
|
||||
SIMD_SHIFT_OPCODES_UNIFED_SSE_AVX(VISIT_SIMD_SHIFT_UNIFIED_SSE_AVX)
|
||||
#undef VISIT_SIMD_SHIFT_UNIFIED_SSE_AVX
|
||||
|
Loading…
Reference in New Issue
Block a user