[wasm-simd] Implement f64x2 splat extract_lane replace_lane for ia32
Bug: v8:9728 Change-Id: I8d993368fc23ab9e8cc08e31f4405678ec4ce824 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1803790 Reviewed-by: Deepti Gandluri <gdeepti@chromium.org> Commit-Queue: Zhi An Ng <zhin@chromium.org> Cr-Commit-Position: refs/heads/master@{#63955}
This commit is contained in:
parent
9cd54cd4f5
commit
8a5386f240
@ -2405,6 +2405,16 @@ void Assembler::shufps(XMMRegister dst, XMMRegister src, byte imm8) {
|
||||
EMIT(imm8);
|
||||
}
|
||||
|
||||
void Assembler::shufpd(XMMRegister dst, XMMRegister src, byte imm8) {
|
||||
DCHECK(is_uint8(imm8));
|
||||
EnsureSpace ensure_space(this);
|
||||
EMIT(0x66);
|
||||
EMIT(0x0F);
|
||||
EMIT(0xC6);
|
||||
emit_sse_operand(dst, src);
|
||||
EMIT(imm8);
|
||||
}
|
||||
|
||||
void Assembler::movdqa(Operand dst, XMMRegister src) {
|
||||
EnsureSpace ensure_space(this);
|
||||
EMIT(0x66);
|
||||
@ -2818,6 +2828,13 @@ void Assembler::vpd(byte op, XMMRegister dst, XMMRegister src1, Operand src2) {
|
||||
vinstr(op, dst, src1, src2, k66, k0F, kWIG);
|
||||
}
|
||||
|
||||
void Assembler::vshufpd(XMMRegister dst, XMMRegister src1, Operand src2,
|
||||
byte imm8) {
|
||||
DCHECK(is_uint8(imm8));
|
||||
vpd(0xC6, dst, src1, src2);
|
||||
EMIT(imm8);
|
||||
}
|
||||
|
||||
void Assembler::vcmpps(XMMRegister dst, XMMRegister src1, Operand src2,
|
||||
uint8_t cmp) {
|
||||
vps(0xC2, dst, src1, src2);
|
||||
|
@ -850,6 +850,7 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
|
||||
void movups(XMMRegister dst, Operand src);
|
||||
void movups(Operand dst, XMMRegister src);
|
||||
void shufps(XMMRegister dst, XMMRegister src, byte imm8);
|
||||
void shufpd(XMMRegister dst, XMMRegister src, byte imm8);
|
||||
|
||||
void maxss(XMMRegister dst, XMMRegister src) { maxss(dst, Operand(src)); }
|
||||
void maxss(XMMRegister dst, Operand src);
|
||||
@ -1319,12 +1320,18 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
|
||||
}
|
||||
void vmovaps(XMMRegister dst, XMMRegister src) { vmovaps(dst, Operand(src)); }
|
||||
void vmovaps(XMMRegister dst, Operand src) { vps(0x28, dst, xmm0, src); }
|
||||
void vmovapd(XMMRegister dst, XMMRegister src) { vmovapd(dst, Operand(src)); }
|
||||
void vmovapd(XMMRegister dst, Operand src) { vpd(0x28, dst, xmm0, src); }
|
||||
void vmovups(XMMRegister dst, XMMRegister src) { vmovups(dst, Operand(src)); }
|
||||
void vmovups(XMMRegister dst, Operand src) { vps(0x10, dst, xmm0, src); }
|
||||
void vshufps(XMMRegister dst, XMMRegister src1, XMMRegister src2, byte imm8) {
|
||||
vshufps(dst, src1, Operand(src2), imm8);
|
||||
}
|
||||
void vshufps(XMMRegister dst, XMMRegister src1, Operand src2, byte imm8);
|
||||
void vshufpd(XMMRegister dst, XMMRegister src1, XMMRegister src2, byte imm8) {
|
||||
vshufpd(dst, src1, Operand(src2), imm8);
|
||||
}
|
||||
void vshufpd(XMMRegister dst, XMMRegister src1, Operand src2, byte imm8);
|
||||
|
||||
void vpsllw(XMMRegister dst, XMMRegister src, uint8_t imm8);
|
||||
void vpslld(XMMRegister dst, XMMRegister src, uint8_t imm8);
|
||||
|
@ -1825,6 +1825,79 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
}
|
||||
break;
|
||||
}
|
||||
case kSSEF64x2Splat: {
|
||||
DCHECK_EQ(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
|
||||
XMMRegister dst = i.OutputSimd128Register();
|
||||
__ shufpd(dst, dst, 0x0);
|
||||
break;
|
||||
}
|
||||
case kAVXF64x2Splat: {
|
||||
CpuFeatureScope avx_scope(tasm(), AVX);
|
||||
XMMRegister src = i.InputDoubleRegister(0);
|
||||
__ vshufpd(i.OutputSimd128Register(), src, src, 0x0);
|
||||
break;
|
||||
}
|
||||
case kSSEF64x2ExtractLane: {
|
||||
DCHECK_EQ(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
|
||||
XMMRegister dst = i.OutputDoubleRegister();
|
||||
int8_t lane = i.InputInt8(1);
|
||||
if (lane != 0) {
|
||||
DCHECK_LT(lane, 4);
|
||||
__ shufpd(dst, dst, lane);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case kAVXF64x2ExtractLane: {
|
||||
CpuFeatureScope avx_scope(tasm(), AVX);
|
||||
XMMRegister dst = i.OutputDoubleRegister();
|
||||
XMMRegister src = i.InputSimd128Register(0);
|
||||
int8_t lane = i.InputInt8(1);
|
||||
if (lane == 0) {
|
||||
if (dst != src) __ vmovapd(dst, src);
|
||||
} else {
|
||||
DCHECK_LT(lane, 4);
|
||||
__ vshufpd(dst, src, src, lane);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case kSSEF64x2ReplaceLane: {
|
||||
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
|
||||
CpuFeatureScope sse_scope(tasm(), SSE4_1);
|
||||
XMMRegister dst = i.OutputSimd128Register();
|
||||
int8_t lane = i.InputInt8(1);
|
||||
DoubleRegister rep = i.InputDoubleRegister(2);
|
||||
|
||||
// insertps takes a mask which contains (high to low):
|
||||
// - 2 bit specifying source float element to copy
|
||||
// - 2 bit specifying destination float element to write to
|
||||
// - 4 bits specifying which elements of the destination to zero
|
||||
DCHECK_LT(lane, 2);
|
||||
if (lane == 0) {
|
||||
__ insertps(dst, rep, 0b00000000);
|
||||
__ insertps(dst, rep, 0b01010000);
|
||||
} else {
|
||||
__ insertps(dst, rep, 0b00100000);
|
||||
__ insertps(dst, rep, 0b01110000);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case kAVXF64x2ReplaceLane: {
|
||||
CpuFeatureScope avx_scope(tasm(), AVX);
|
||||
XMMRegister dst = i.OutputSimd128Register();
|
||||
XMMRegister src = i.InputSimd128Register(0);
|
||||
int8_t lane = i.InputInt8(1);
|
||||
DoubleRegister rep = i.InputDoubleRegister(2);
|
||||
|
||||
DCHECK_LT(lane, 2);
|
||||
if (lane == 0) {
|
||||
__ vinsertps(dst, src, rep, 0b00000000);
|
||||
__ vinsertps(dst, src, rep, 0b01010000);
|
||||
} else {
|
||||
__ vinsertps(dst, src, rep, 0b10100000);
|
||||
__ vinsertps(dst, src, rep, 0b11110000);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case kSSEF32x4Splat: {
|
||||
DCHECK_EQ(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
|
||||
XMMRegister dst = i.OutputSimd128Register();
|
||||
|
@ -116,6 +116,12 @@ namespace compiler {
|
||||
V(IA32PushSimd128) \
|
||||
V(IA32Poke) \
|
||||
V(IA32Peek) \
|
||||
V(SSEF64x2Splat) \
|
||||
V(AVXF64x2Splat) \
|
||||
V(SSEF64x2ExtractLane) \
|
||||
V(AVXF64x2ExtractLane) \
|
||||
V(SSEF64x2ReplaceLane) \
|
||||
V(AVXF64x2ReplaceLane) \
|
||||
V(SSEF32x4Splat) \
|
||||
V(AVXF32x4Splat) \
|
||||
V(SSEF32x4ExtractLane) \
|
||||
|
@ -97,6 +97,12 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
||||
case kAVXFloat32Neg:
|
||||
case kIA32BitcastFI:
|
||||
case kIA32BitcastIF:
|
||||
case kSSEF64x2Splat:
|
||||
case kAVXF64x2Splat:
|
||||
case kSSEF64x2ExtractLane:
|
||||
case kAVXF64x2ExtractLane:
|
||||
case kSSEF64x2ReplaceLane:
|
||||
case kAVXF64x2ReplaceLane:
|
||||
case kSSEF32x4Splat:
|
||||
case kAVXF32x4Splat:
|
||||
case kSSEF32x4ExtractLane:
|
||||
|
@ -2000,6 +2000,14 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) {
|
||||
V(I8x16ShrS) \
|
||||
V(I8x16ShrU)
|
||||
|
||||
void InstructionSelector::VisitF64x2Splat(Node* node) {
|
||||
VisitRRSimd(this, node, kAVXF64x2Splat, kSSEF64x2Splat);
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitF64x2ExtractLane(Node* node) {
|
||||
VisitRRISimd(this, node, kAVXF64x2ExtractLane, kSSEF64x2ExtractLane);
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitF32x4Splat(Node* node) {
|
||||
VisitRRSimd(this, node, kAVXF32x4Splat, kSSEF32x4Splat);
|
||||
}
|
||||
@ -2091,6 +2099,28 @@ VISIT_SIMD_REPLACE_LANE(F32x4)
|
||||
#undef VISIT_SIMD_REPLACE_LANE
|
||||
#undef SIMD_INT_TYPES
|
||||
|
||||
// The difference between this and VISIT_SIMD_REPLACE_LANE is that this forces
|
||||
// operand2 to be UseRegister, because the codegen relies on insertps using
|
||||
// registers.
|
||||
// TODO(v8:9764) Remove this UseRegister requirement
|
||||
#define VISIT_SIMD_REPLACE_LANE_USE_REG(Type) \
|
||||
void InstructionSelector::Visit##Type##ReplaceLane(Node* node) { \
|
||||
IA32OperandGenerator g(this); \
|
||||
InstructionOperand operand0 = g.UseRegister(node->InputAt(0)); \
|
||||
InstructionOperand operand1 = \
|
||||
g.UseImmediate(OpParameter<int32_t>(node->op())); \
|
||||
InstructionOperand operand2 = g.UseRegister(node->InputAt(1)); \
|
||||
if (IsSupported(AVX)) { \
|
||||
Emit(kAVX##Type##ReplaceLane, g.DefineAsRegister(node), operand0, \
|
||||
operand1, operand2); \
|
||||
} else { \
|
||||
Emit(kSSE##Type##ReplaceLane, g.DefineSameAsFirst(node), operand0, \
|
||||
operand1, operand2); \
|
||||
} \
|
||||
}
|
||||
VISIT_SIMD_REPLACE_LANE_USE_REG(F64x2)
|
||||
#undef VISIT_SIMD_REPLACE_LANE_USE_REG
|
||||
|
||||
#define VISIT_SIMD_SHIFT(Opcode) \
|
||||
void InstructionSelector::Visit##Opcode(Node* node) { \
|
||||
VisitRROSimdShift(this, node, kAVX##Opcode, kSSE##Opcode); \
|
||||
|
@ -2621,9 +2621,11 @@ void InstructionSelector::VisitWord64AtomicCompareExchange(Node* node) {
|
||||
|
||||
#if !V8_TARGET_ARCH_X64
|
||||
#if !V8_TARGET_ARCH_ARM64
|
||||
#if !V8_TARGET_ARCH_IA32
|
||||
void InstructionSelector::VisitF64x2Splat(Node* node) { UNIMPLEMENTED(); }
|
||||
void InstructionSelector::VisitF64x2ExtractLane(Node* node) { UNIMPLEMENTED(); }
|
||||
void InstructionSelector::VisitF64x2ReplaceLane(Node* node) { UNIMPLEMENTED(); }
|
||||
#endif // !V8_TARGET_ARCH_IA32
|
||||
void InstructionSelector::VisitF64x2Abs(Node* node) { UNIMPLEMENTED(); }
|
||||
void InstructionSelector::VisitF64x2Neg(Node* node) { UNIMPLEMENTED(); }
|
||||
void InstructionSelector::VisitF64x2Sqrt(Node* node) { UNIMPLEMENTED(); }
|
||||
|
@ -1216,6 +1216,13 @@ int DisassemblerIA32::AVXInstruction(byte* data) {
|
||||
AppendToBuffer(",%d", Imm8(current));
|
||||
current++;
|
||||
break;
|
||||
case 0xC6:
|
||||
AppendToBuffer("vshufpd %s,%s,", NameOfXMMRegister(regop),
|
||||
NameOfXMMRegister(vvvv));
|
||||
current += PrintRightXMMOperand(current);
|
||||
AppendToBuffer(",%d", Imm8(current));
|
||||
current++;
|
||||
break;
|
||||
#define DECLARE_SSE_AVX_DIS_CASE(instruction, notUsed1, notUsed2, opcode) \
|
||||
case 0x##opcode: { \
|
||||
AppendToBuffer("v" #instruction " %s,%s,", NameOfXMMRegister(regop), \
|
||||
@ -2269,6 +2276,15 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer,
|
||||
data += PrintRightOperand(data);
|
||||
AppendToBuffer(",%d", Imm8(data));
|
||||
data++;
|
||||
} else if (*data == 0xC6) {
|
||||
// shufpd xmm, xmm/m128, imm8
|
||||
data++;
|
||||
int mod, regop, rm;
|
||||
get_modrm(*data, &mod, ®op, &rm);
|
||||
AppendToBuffer("shufpd %s,", NameOfXMMRegister(regop));
|
||||
data += PrintRightXMMOperand(data);
|
||||
AppendToBuffer(",%d", Imm8(data));
|
||||
data++;
|
||||
} else if (*data == 0xE7) {
|
||||
data++;
|
||||
int mod, regop, rm;
|
||||
|
@ -877,6 +877,7 @@ WASM_SIMD_TEST_NO_LOWERING(F32x4Qfms) {
|
||||
}
|
||||
#endif // V8_TARGET_ARCH_X64
|
||||
|
||||
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_IA32
|
||||
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
|
||||
WASM_SIMD_TEST_NO_LOWERING(I64x2Splat) {
|
||||
WasmRunner<int32_t, int64_t> r(execution_tier, lower_simd);
|
||||
@ -1072,6 +1073,7 @@ WASM_SIMD_TEST_NO_LOWERING(I64x2GeU) {
|
||||
RunI64x2BinOpTest(execution_tier, lower_simd, kExprI64x2GeU,
|
||||
UnsignedGreaterEqual);
|
||||
}
|
||||
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
|
||||
|
||||
WASM_SIMD_TEST_NO_LOWERING(F64x2Splat) {
|
||||
WasmRunner<int32_t, double> r(execution_tier, lower_simd);
|
||||
@ -1095,6 +1097,7 @@ WASM_SIMD_TEST_NO_LOWERING(F64x2Splat) {
|
||||
}
|
||||
}
|
||||
|
||||
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
|
||||
WASM_SIMD_TEST_NO_LOWERING(F64x2ExtractLaneWithI64x2) {
|
||||
WasmRunner<int64_t> r(execution_tier, lower_simd);
|
||||
BUILD(r, WASM_IF_ELSE_L(
|
||||
@ -1104,6 +1107,7 @@ WASM_SIMD_TEST_NO_LOWERING(F64x2ExtractLaneWithI64x2) {
|
||||
WASM_I64V(1), WASM_I64V(0)));
|
||||
CHECK_EQ(1, r.Call());
|
||||
}
|
||||
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
|
||||
|
||||
WASM_SIMD_TEST_NO_LOWERING(F64x2ExtractLane) {
|
||||
WasmRunner<double, double> r(execution_tier, lower_simd);
|
||||
@ -1127,6 +1131,7 @@ WASM_SIMD_TEST_NO_LOWERING(F64x2ExtractLane) {
|
||||
}
|
||||
}
|
||||
|
||||
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
|
||||
WASM_SIMD_TEST_NO_LOWERING(I64x2ExtractWithF64x2) {
|
||||
WasmRunner<int64_t> r(execution_tier, lower_simd);
|
||||
BUILD(r, WASM_IF_ELSE_L(
|
||||
@ -1136,6 +1141,7 @@ WASM_SIMD_TEST_NO_LOWERING(I64x2ExtractWithF64x2) {
|
||||
WASM_I64V(1), WASM_I64V(0)));
|
||||
CHECK_EQ(1, r.Call());
|
||||
}
|
||||
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
|
||||
|
||||
WASM_SIMD_TEST_NO_LOWERING(F64x2ReplaceLane) {
|
||||
WasmRunner<int32_t> r(execution_tier, lower_simd);
|
||||
@ -1156,6 +1162,7 @@ WASM_SIMD_TEST_NO_LOWERING(F64x2ReplaceLane) {
|
||||
}
|
||||
}
|
||||
|
||||
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
|
||||
bool IsExtreme(double x) {
|
||||
double abs_x = std::fabs(x);
|
||||
const double kSmallFloatThreshold = 1.0e-298;
|
||||
@ -1278,11 +1285,9 @@ WASM_SIMD_TEST_NO_LOWERING(F64x2Neg) {
|
||||
RunF64x2UnOpTest(execution_tier, lower_simd, kExprF64x2Neg, Negate);
|
||||
}
|
||||
|
||||
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
|
||||
WASM_SIMD_TEST_NO_LOWERING(F64x2Sqrt) {
|
||||
RunF64x2UnOpTest(execution_tier, lower_simd, kExprF64x2Sqrt, Sqrt);
|
||||
}
|
||||
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
|
||||
|
||||
void RunF64x2BinOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
|
||||
WasmOpcode opcode, DoubleBinOp expected_op) {
|
||||
@ -1413,6 +1418,7 @@ WASM_SIMD_TEST_NO_LOWERING(I64x2Mul) {
|
||||
RunI64x2BinOpTest(execution_tier, lower_simd, kExprI64x2Mul,
|
||||
base::MulWithWraparound);
|
||||
}
|
||||
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
|
||||
|
||||
#if V8_TARGET_ARCH_X64
|
||||
WASM_SIMD_TEST_NO_LOWERING(I64x2MinS) {
|
||||
@ -1481,7 +1487,7 @@ WASM_SIMD_TEST_NO_LOWERING(F64x2Qfms) {
|
||||
}
|
||||
}
|
||||
#endif // V8_TARGET_ARCH_X64
|
||||
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
|
||||
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_IA32
|
||||
|
||||
WASM_SIMD_TEST(I32x4Splat) {
|
||||
WasmRunner<int32_t, int32_t> r(execution_tier, lower_simd);
|
||||
|
Loading…
Reference in New Issue
Block a user