From 386e5a1149dcf5d2834773604c6934c0e797ba0c Mon Sep 17 00:00:00 2001 From: bbudge Date: Thu, 2 Mar 2017 11:50:33 -0800 Subject: [PATCH] Implement remaining Boolean SIMD operations on ARM. - Implements Select instructions using a single ARM vbsl instruction. - Renames boolean machine operators to match renamed S1xN machine types. - Implements S1xN vector logical ops, AND, OR, XOR, NOT for ARM. - Implements S1xN AnyTrue, AllTrue ops for ARM. - Eliminates unused SIMD op categories in opcodes.h. LOG=N BUG=v8:6020 Review-Url: https://codereview.chromium.org/2711863002 Cr-Commit-Position: refs/heads/master@{#43556} --- src/arm/assembler-arm.cc | 105 ++++-- src/arm/assembler-arm.h | 7 +- src/arm/disasm-arm.cc | 29 +- src/arm/macro-assembler-arm.cc | 9 + src/arm/macro-assembler-arm.h | 1 + src/arm/simulator-arm.cc | 146 ++++++-- src/arm/simulator-arm.h | 4 + src/compiler/arm/code-generator-arm.cc | 78 ++++- src/compiler/arm/instruction-codes-arm.h | 10 +- src/compiler/arm/instruction-scheduler-arm.cc | 10 +- src/compiler/arm/instruction-selector-arm.cc | 160 +++++---- src/compiler/instruction-selector.cc | 72 ++++ src/compiler/machine-operator.cc | 38 +-- src/compiler/machine-operator.h | 41 ++- src/compiler/opcodes.h | 318 +++++++++--------- src/compiler/wasm-compiler.cc | 69 +++- src/wasm/wasm-opcodes.cc | 25 +- src/wasm/wasm-opcodes.h | 55 ++- test/cctest/test-assembler-arm.cc | 39 +++ test/cctest/test-disasm-arm.cc | 6 + test/cctest/test-macro-assembler-arm.cc | 28 ++ test/cctest/wasm/test-run-wasm-simd.cc | 181 ++++++++++ 22 files changed, 1039 insertions(+), 392 deletions(-) diff --git a/src/arm/assembler-arm.cc b/src/arm/assembler-arm.cc index 63cf74d474..c88f1f4840 100644 --- a/src/arm/assembler-arm.cc +++ b/src/arm/assembler-arm.cc @@ -4123,25 +4123,43 @@ void Assembler::vcvt_u32_f32(const QwNeonRegister dst, emit(EncodeNeonVCVT(U32, dst, F32, src)); } -// op is instr->Bits(11, 7). -static Instr EncodeNeonUnaryOp(int op, bool is_float, NeonSize size, - const QwNeonRegister dst, - const QwNeonRegister src) { - DCHECK_IMPLIES(is_float, size == Neon32); +enum UnaryOp { VABS, VABSF, VNEG, VNEGF }; + +static Instr EncodeNeonUnaryOp(UnaryOp op, NeonSize size, QwNeonRegister dst, + QwNeonRegister src) { + int op_encoding = 0; + switch (op) { + case VABS: + op_encoding = 0x6 * B7; + break; + case VABSF: + DCHECK_EQ(Neon32, size); + op_encoding = 0x6 * B7 | B10; + break; + case VNEG: + op_encoding = 0x7 * B7; + break; + case VNEGF: + DCHECK_EQ(Neon32, size); + op_encoding = 0x7 * B7 | B10; + break; + default: + UNREACHABLE(); + break; + } int vd, d; dst.split_code(&vd, &d); int vm, m; src.split_code(&vm, &m); - int F = is_float ? 1 : 0; - return 0x1E7U * B23 | d * B22 | 0x3 * B20 | size * B18 | B16 | vd * B12 | - F * B10 | B8 | op * B7 | B6 | m * B5 | vm; + return 0x1E7U * B23 | d * B22 | 0x3 * B20 | size * B18 | B16 | vd * B12 | B6 | + m * B5 | vm | op_encoding; } void Assembler::vabs(const QwNeonRegister dst, const QwNeonRegister src) { // Qd = vabs.f(Qn, Qm) SIMD floating point absolute value. // Instruction details available in ARM DDI 0406C.b, A8.8.824. DCHECK(IsEnabled(NEON)); - emit(EncodeNeonUnaryOp(0x6, true, Neon32, dst, src)); + emit(EncodeNeonUnaryOp(VABSF, Neon32, dst, src)); } void Assembler::vabs(NeonSize size, const QwNeonRegister dst, @@ -4149,14 +4167,14 @@ void Assembler::vabs(NeonSize size, const QwNeonRegister dst, // Qd = vabs.s(Qn, Qm) SIMD integer absolute value. // Instruction details available in ARM DDI 0406C.b, A8.8.824. DCHECK(IsEnabled(NEON)); - emit(EncodeNeonUnaryOp(0x6, false, size, dst, src)); + emit(EncodeNeonUnaryOp(VABS, size, dst, src)); } void Assembler::vneg(const QwNeonRegister dst, const QwNeonRegister src) { // Qd = vabs.f(Qn, Qm) SIMD floating point negate. // Instruction details available in ARM DDI 0406C.b, A8.8.968. DCHECK(IsEnabled(NEON)); - emit(EncodeNeonUnaryOp(0x7, true, Neon32, dst, src)); + emit(EncodeNeonUnaryOp(VNEGF, Neon32, dst, src)); } void Assembler::vneg(NeonSize size, const QwNeonRegister dst, @@ -4164,7 +4182,7 @@ void Assembler::vneg(NeonSize size, const QwNeonRegister dst, // Qd = vabs.s(Qn, Qm) SIMD integer negate. // Instruction details available in ARM DDI 0406C.b, A8.8.968. DCHECK(IsEnabled(NEON)); - emit(EncodeNeonUnaryOp(0x7, false, size, dst, src)); + emit(EncodeNeonUnaryOp(VNEG, size, dst, src)); } void Assembler::veor(DwVfpRegister dst, DwVfpRegister src1, @@ -4184,10 +4202,9 @@ void Assembler::veor(DwVfpRegister dst, DwVfpRegister src1, enum BinaryBitwiseOp { VAND, VBIC, VBIF, VBIT, VBSL, VEOR, VORR, VORN }; -static Instr EncodeNeonBinaryBitwiseOp(BinaryBitwiseOp op, - const QwNeonRegister dst, - const QwNeonRegister src1, - const QwNeonRegister src2) { +static Instr EncodeNeonBinaryBitwiseOp(BinaryBitwiseOp op, QwNeonRegister dst, + QwNeonRegister src1, + QwNeonRegister src2) { int op_encoding = 0; switch (op) { case VBIC: @@ -4336,9 +4353,8 @@ enum IntegerBinOp { }; static Instr EncodeNeonBinOp(IntegerBinOp op, NeonDataType dt, - const QwNeonRegister dst, - const QwNeonRegister src1, - const QwNeonRegister src2) { + QwNeonRegister dst, QwNeonRegister src1, + QwNeonRegister src2) { int op_encoding = 0; switch (op) { case VADD: @@ -4390,10 +4406,8 @@ static Instr EncodeNeonBinOp(IntegerBinOp op, NeonDataType dt, n * B7 | B6 | m * B5 | vm | op_encoding; } -static Instr EncodeNeonBinOp(IntegerBinOp op, NeonSize size, - const QwNeonRegister dst, - const QwNeonRegister src1, - const QwNeonRegister src2) { +static Instr EncodeNeonBinOp(IntegerBinOp op, NeonSize size, QwNeonRegister dst, + QwNeonRegister src1, QwNeonRegister src2) { // Map NeonSize values to the signed values in NeonDataType, so the U bit // will be 0. return EncodeNeonBinOp(op, static_cast(size), dst, src1, src2); @@ -4578,6 +4592,51 @@ void Assembler::vrsqrts(QwNeonRegister dst, QwNeonRegister src1, emit(EncodeNeonBinOp(VRSQRTS, dst, src1, src2)); } +enum PairwiseOp { VPMIN, VPMAX }; + +static Instr EncodeNeonPairwiseOp(PairwiseOp op, NeonDataType dt, + DwVfpRegister dst, DwVfpRegister src1, + DwVfpRegister src2) { + int op_encoding = 0; + switch (op) { + case VPMIN: + op_encoding = 0xA * B8 | B4; + break; + case VPMAX: + op_encoding = 0xA * B8; + break; + default: + UNREACHABLE(); + break; + } + int vd, d; + dst.split_code(&vd, &d); + int vn, n; + src1.split_code(&vn, &n); + int vm, m; + src2.split_code(&vm, &m); + int size = NeonSz(dt); + int u = NeonU(dt); + return 0x1E4U * B23 | u * B24 | d * B22 | size * B20 | vn * B16 | vd * B12 | + n * B7 | m * B5 | vm | op_encoding; +} + +void Assembler::vpmin(NeonDataType dt, DwVfpRegister dst, DwVfpRegister src1, + DwVfpRegister src2) { + DCHECK(IsEnabled(NEON)); + // Dd = vpmin(Dn, Dm) SIMD integer pairwise MIN. + // Instruction details available in ARM DDI 0406C.b, A8-986. + emit(EncodeNeonPairwiseOp(VPMIN, dt, dst, src1, src2)); +} + +void Assembler::vpmax(NeonDataType dt, DwVfpRegister dst, DwVfpRegister src1, + DwVfpRegister src2) { + DCHECK(IsEnabled(NEON)); + // Dd = vpmax(Dn, Dm) SIMD integer pairwise MAX. + // Instruction details available in ARM DDI 0406C.b, A8-986. + emit(EncodeNeonPairwiseOp(VPMAX, dt, dst, src1, src2)); +} + void Assembler::vtst(NeonSize size, QwNeonRegister dst, QwNeonRegister src1, QwNeonRegister src2) { DCHECK(IsEnabled(NEON)); diff --git a/src/arm/assembler-arm.h b/src/arm/assembler-arm.h index e9424f0244..420d195f3d 100644 --- a/src/arm/assembler-arm.h +++ b/src/arm/assembler-arm.h @@ -418,7 +418,8 @@ const QwNeonRegister q15 = { 15 }; // compilation unit that includes this header doesn't use the variables. #define kFirstCalleeSavedDoubleReg d8 #define kLastCalleeSavedDoubleReg d15 -// kDoubleRegZero and kScratchDoubleReg must pair to form kScratchQuadReg. +// kDoubleRegZero and kScratchDoubleReg must pair to form kScratchQuadReg. SIMD +// code depends on kDoubleRegZero before kScratchDoubleReg. #define kDoubleRegZero d14 #define kScratchDoubleReg d15 // After using kScratchQuadReg, kDoubleRegZero must be reset to 0. @@ -1359,6 +1360,10 @@ class Assembler : public AssemblerBase { void vmax(QwNeonRegister dst, QwNeonRegister src1, QwNeonRegister src2); void vmax(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1, QwNeonRegister src2); + void vpmin(NeonDataType dt, DwVfpRegister dst, DwVfpRegister src1, + DwVfpRegister src2); + void vpmax(NeonDataType dt, DwVfpRegister dst, DwVfpRegister src1, + DwVfpRegister src2); void vshl(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src, int shift); void vshr(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src, int shift); // vrecpe and vrsqrte only support floating point lanes. diff --git a/src/arm/disasm-arm.cc b/src/arm/disasm-arm.cc index 041df55858..761192f59d 100644 --- a/src/arm/disasm-arm.cc +++ b/src/arm/disasm-arm.cc @@ -1867,10 +1867,10 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) { Vm = instr->VFPMRegValue(kSimd128Precision); Vn = instr->VFPNRegValue(kSimd128Precision); } + int size = kBitsPerByte * (1 << instr->Bits(21, 20)); switch (instr->Bits(11, 8)) { case 0x0: { if (instr->Bit(4) == 1) { - int size = kBitsPerByte * (1 << instr->Bits(21, 20)); // vqadd.s Qd, Qm, Qn. out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, @@ -1904,7 +1904,6 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) { } case 0x2: { if (instr->Bit(4) == 1) { - int size = kBitsPerByte * (1 << instr->Bits(21, 20)); // vqsub.s Qd, Qm, Qn. out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, @@ -1915,7 +1914,6 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) { break; } case 0x3: { - int size = kBitsPerByte * (1 << instr->Bits(21, 20)); const char* op = (instr->Bit(4) == 1) ? "vcge" : "vcgt"; // vcge/vcgt.s Qd, Qm, Qn. out_buffer_pos_ += @@ -1924,7 +1922,6 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) { break; } case 0x6: { - int size = kBitsPerByte * (1 << instr->Bits(21, 20)); // vmin/vmax.s Qd, Qm, Qn. const char* op = instr->Bit(4) == 1 ? "vmin" : "vmax"; out_buffer_pos_ += @@ -1934,7 +1931,6 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) { } case 0x8: { const char* op = (instr->Bit(4) == 0) ? "vadd" : "vtst"; - int size = kBitsPerByte * (1 << instr->Bits(21, 20)); // vadd/vtst.i Qd, Qm, Qn. out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "%s.i%d q%d, q%d, q%d", @@ -1943,7 +1939,6 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) { } case 0x9: { if (instr->Bit(6) == 1 && instr->Bit(4) == 1) { - int size = kBitsPerByte * (1 << instr->Bits(21, 20)); // vmul.i Qd, Qm, Qn. out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, @@ -1953,6 +1948,14 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) { } break; } + case 0xa: { + // vpmin/vpmax.s Dd, Dm, Dn. + const char* op = instr->Bit(4) == 1 ? "vpmin" : "vpmax"; + out_buffer_pos_ += + SNPrintF(out_buffer_ + out_buffer_pos_, "%s.s%d d%d, d%d, d%d", + op, size, Vd, Vn, Vm); + break; + } case 0xd: { if (instr->Bit(4) == 0) { const char* op = (instr->Bits(21, 20) == 0) ? "vadd" : "vsub"; @@ -2052,10 +2055,10 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) { Vm = instr->VFPMRegValue(kSimd128Precision); Vn = instr->VFPNRegValue(kSimd128Precision); } + int size = kBitsPerByte * (1 << instr->Bits(21, 20)); switch (instr->Bits(11, 8)) { case 0x0: { if (instr->Bit(4) == 1) { - int size = kBitsPerByte * (1 << instr->Bits(21, 20)); // vqadd.u Qd, Qm, Qn. out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, @@ -2087,7 +2090,6 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) { } case 0x2: { if (instr->Bit(4) == 1) { - int size = kBitsPerByte * (1 << instr->Bits(21, 20)); // vqsub.u Qd, Qm, Qn. out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, @@ -2098,7 +2100,6 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) { break; } case 0x3: { - int size = kBitsPerByte * (1 << instr->Bits(21, 20)); const char* op = (instr->Bit(4) == 1) ? "vcge" : "vcgt"; // vcge/vcgt.u Qd, Qm, Qn. out_buffer_pos_ += @@ -2107,7 +2108,6 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) { break; } case 0x6: { - int size = kBitsPerByte * (1 << instr->Bits(21, 20)); // vmin/vmax.u Qd, Qm, Qn. const char* op = instr->Bit(4) == 1 ? "vmin" : "vmax"; out_buffer_pos_ += @@ -2116,7 +2116,6 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) { break; } case 0x8: { - int size = kBitsPerByte * (1 << instr->Bits(21, 20)); if (instr->Bit(4) == 0) { out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, @@ -2128,6 +2127,14 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) { } break; } + case 0xa: { + // vpmin/vpmax.u Dd, Dm, Dn. + const char* op = instr->Bit(4) == 1 ? "vpmin" : "vpmax"; + out_buffer_pos_ += + SNPrintF(out_buffer_ + out_buffer_pos_, "%s.u%d d%d, d%d, d%d", + op, size, Vd, Vn, Vm); + break; + } case 0xd: { if (instr->Bit(21) == 0 && instr->Bit(6) == 1 && instr->Bit(4) == 1) { // vmul.f32 Qd, Qn, Qm diff --git a/src/arm/macro-assembler-arm.cc b/src/arm/macro-assembler-arm.cc index 49e534a346..8d87db283f 100644 --- a/src/arm/macro-assembler-arm.cc +++ b/src/arm/macro-assembler-arm.cc @@ -1162,6 +1162,15 @@ void MacroAssembler::ExtractLane(Register dst, QwNeonRegister src, vmov(dt, dst, double_source, double_lane); } +void MacroAssembler::ExtractLane(Register dst, DwVfpRegister src, + NeonDataType dt, int lane) { + int size = NeonSz(dt); // 0, 1, 2 + int byte = lane << size; + int double_byte = byte & (kDoubleSize - 1); + int double_lane = double_byte >> size; + vmov(dt, dst, src, double_lane); +} + void MacroAssembler::ExtractLane(SwVfpRegister dst, QwNeonRegister src, Register scratch, int lane) { int s_code = src.code() * 4 + lane; diff --git a/src/arm/macro-assembler-arm.h b/src/arm/macro-assembler-arm.h index 0f34c09e92..e285bd49fc 100644 --- a/src/arm/macro-assembler-arm.h +++ b/src/arm/macro-assembler-arm.h @@ -563,6 +563,7 @@ class MacroAssembler: public Assembler { void VmovExtended(const MemOperand& dst, int src_code, Register scratch); void ExtractLane(Register dst, QwNeonRegister src, NeonDataType dt, int lane); + void ExtractLane(Register dst, DwVfpRegister src, NeonDataType dt, int lane); void ExtractLane(SwVfpRegister dst, QwNeonRegister src, Register scratch, int lane); void ReplaceLane(QwNeonRegister dst, QwNeonRegister src, Register src_lane, diff --git a/src/arm/simulator-arm.cc b/src/arm/simulator-arm.cc index 4d1c6e5b08..e2bcd952a6 100644 --- a/src/arm/simulator-arm.cc +++ b/src/arm/simulator-arm.cc @@ -900,6 +900,18 @@ void Simulator::set_d_register(int dreg, const uint32_t* value) { memcpy(vfp_registers_ + dreg * 2, value, sizeof(*value) * 2); } +template +void Simulator::get_d_register(int dreg, T* value) { + DCHECK((dreg >= 0) && (dreg < num_d_registers)); + memcpy(value, vfp_registers_ + dreg * 2, kDoubleSize); +} + +template +void Simulator::set_d_register(int dreg, const T* value) { + DCHECK((dreg >= 0) && (dreg < num_d_registers)); + memcpy(vfp_registers_ + dreg * 2, value, kDoubleSize); +} + template void Simulator::get_q_register(int qreg, T* value) { DCHECK((qreg >= 0) && (qreg < num_q_registers)); @@ -912,7 +924,6 @@ void Simulator::set_q_register(int qreg, const T* value) { memcpy(vfp_registers_ + qreg * 4, value, kSimd128Size); } - // Raw access to the PC register. void Simulator::set_pc(int32_t value) { pc_modified_ = true; @@ -4006,6 +4017,11 @@ T Clamp(int64_t value) { return static_cast(clamped); } +template +T MinMax(T a, T b, bool is_min) { + return is_min ? std::min(a, b) : std::max(a, b); +} + template void AddSaturate(Simulator* simulator, int Vd, int Vm, int Vn) { static const int kLanes = 16 / sizeof(T); @@ -4180,10 +4196,7 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { get_q_register(Vn, src1); get_q_register(Vm, src2); for (int i = 0; i < 16; i++) { - if (min) - src1[i] = std::min(src1[i], src2[i]); - else - src1[i] = std::max(src1[i], src2[i]); + src1[i] = MinMax(src1[i], src2[i], min); } set_q_register(Vd, src1); break; @@ -4193,10 +4206,7 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { get_q_register(Vn, src1); get_q_register(Vm, src2); for (int i = 0; i < 8; i++) { - if (min) - src1[i] = std::min(src1[i], src2[i]); - else - src1[i] = std::max(src1[i], src2[i]); + src1[i] = MinMax(src1[i], src2[i], min); } set_q_register(Vd, src1); break; @@ -4206,10 +4216,7 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { get_q_register(Vn, src1); get_q_register(Vm, src2); for (int i = 0; i < 4; i++) { - if (min) - src1[i] = std::min(src1[i], src2[i]); - else - src1[i] = std::max(src1[i], src2[i]); + src1[i] = MinMax(src1[i], src2[i], min); } set_q_register(Vd, src1); break; @@ -4344,6 +4351,48 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { } break; } + case 0xa: { + // vpmin/vpmax.s Dd, Dm, Dn. + NeonSize size = static_cast(instr->Bits(21, 20)); + bool min = instr->Bit(4) != 0; + switch (size) { + case Neon8: { + int8_t dst[8], src1[8], src2[8]; + get_d_register(Vn, src1); + get_d_register(Vm, src2); + for (int i = 0; i < 4; i++) { + dst[i + 0] = MinMax(src1[i * 2], src1[i * 2 + 1], min); + dst[i + 4] = MinMax(src2[i * 2], src2[i * 2 + 1], min); + } + set_d_register(Vd, dst); + break; + } + case Neon16: { + int16_t dst[4], src1[4], src2[4]; + get_d_register(Vn, src1); + get_d_register(Vm, src2); + for (int i = 0; i < 2; i++) { + dst[i + 0] = MinMax(src1[i * 2], src1[i * 2 + 1], min); + dst[i + 2] = MinMax(src2[i * 2], src2[i * 2 + 1], min); + } + set_d_register(Vd, dst); + break; + } + case Neon32: { + int32_t dst[2], src1[2], src2[2]; + get_d_register(Vn, src1); + get_d_register(Vm, src2); + dst[0] = MinMax(src1[0], src1[1], min); + dst[1] = MinMax(src2[0], src2[1], min); + set_d_register(Vd, dst); + break; + } + default: + UNREACHABLE(); + break; + } + break; + } case 0xd: { if (instr->Bit(4) == 0) { float src1[4], src2[4]; @@ -4398,16 +4447,10 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { } } } else { - if (instr->Bit(21) == 1) { - // vmin.f32 Qd, Qm, Qn. - for (int i = 0; i < 4; i++) { - src1[i] = std::min(src1[i], src2[i]); - } - } else { - // vmax.f32 Qd, Qm, Qn. - for (int i = 0; i < 4; i++) { - src1[i] = std::max(src1[i], src2[i]); - } + // vmin/vmax.f32 Qd, Qm, Qn. + bool min = instr->Bit(21) == 1; + for (int i = 0; i < 4; i++) { + src1[i] = MinMax(src1[i], src2[i], min); } } set_q_register(Vd, src1); @@ -4693,10 +4736,7 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { get_q_register(Vn, src1); get_q_register(Vm, src2); for (int i = 0; i < 16; i++) { - if (min) - src1[i] = std::min(src1[i], src2[i]); - else - src1[i] = std::max(src1[i], src2[i]); + src1[i] = MinMax(src1[i], src2[i], min); } set_q_register(Vd, src1); break; @@ -4706,10 +4746,7 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { get_q_register(Vn, src1); get_q_register(Vm, src2); for (int i = 0; i < 8; i++) { - if (min) - src1[i] = std::min(src1[i], src2[i]); - else - src1[i] = std::max(src1[i], src2[i]); + src1[i] = MinMax(src1[i], src2[i], min); } set_q_register(Vd, src1); break; @@ -4719,10 +4756,7 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { get_q_register(Vn, src1); get_q_register(Vm, src2); for (int i = 0; i < 4; i++) { - if (min) - src1[i] = std::min(src1[i], src2[i]); - else - src1[i] = std::max(src1[i], src2[i]); + src1[i] = MinMax(src1[i], src2[i], min); } set_q_register(Vd, src1); break; @@ -4813,6 +4847,48 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { } break; } + case 0xa: { + // vpmin/vpmax.u Dd, Dm, Dn. + NeonSize size = static_cast(instr->Bits(21, 20)); + bool min = instr->Bit(4) != 0; + switch (size) { + case Neon8: { + uint8_t dst[8], src1[8], src2[8]; + get_d_register(Vn, src1); + get_d_register(Vm, src2); + for (int i = 0; i < 4; i++) { + dst[i + 0] = MinMax(src1[i * 2], src1[i * 2 + 1], min); + dst[i + 4] = MinMax(src2[i * 2], src2[i * 2 + 1], min); + } + set_d_register(Vd, dst); + break; + } + case Neon16: { + uint16_t dst[4], src1[4], src2[4]; + get_d_register(Vn, src1); + get_d_register(Vm, src2); + for (int i = 0; i < 2; i++) { + dst[i + 0] = MinMax(src1[i * 2], src1[i * 2 + 1], min); + dst[i + 2] = MinMax(src2[i * 2], src2[i * 2 + 1], min); + } + set_d_register(Vd, dst); + break; + } + case Neon32: { + uint32_t dst[2], src1[2], src2[2]; + get_d_register(Vn, src1); + get_d_register(Vm, src2); + dst[0] = MinMax(src1[0], src1[1], min); + dst[1] = MinMax(src2[0], src2[1], min); + set_d_register(Vd, dst); + break; + } + default: + UNREACHABLE(); + break; + } + break; + } case 0xd: { if (instr->Bit(21) == 0 && instr->Bit(6) == 1 && instr->Bit(4) == 1) { // vmul.f32 Qd, Qn, Qm diff --git a/src/arm/simulator-arm.h b/src/arm/simulator-arm.h index 39d9b7f65c..f66450ef13 100644 --- a/src/arm/simulator-arm.h +++ b/src/arm/simulator-arm.h @@ -155,6 +155,10 @@ class Simulator { void set_d_register(int dreg, const uint32_t* value); // Support for NEON. template + void get_d_register(int dreg, T* value); + template + void set_d_register(int dreg, const T* value); + template void get_q_register(int qreg, T* value); template void set_q_register(int qreg, const T* value); diff --git a/src/compiler/arm/code-generator-arm.cc b/src/compiler/arm/code-generator-arm.cc index a3302a1ac1..e9f83bdd07 100644 --- a/src/compiler/arm/code-generator-arm.cc +++ b/src/compiler/arm/code-generator-arm.cc @@ -1636,8 +1636,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; } case kArmInt32x4GreaterThanOrEqual: { - Simd128Register dst = i.OutputSimd128Register(); - __ vcge(NeonS32, dst, i.InputSimd128Register(0), + __ vcge(NeonS32, i.OutputSimd128Register(), i.InputSimd128Register(0), i.InputSimd128Register(1)); break; } @@ -1662,8 +1661,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; } case kArmUint32x4GreaterThanOrEqual: { - Simd128Register dst = i.OutputSimd128Register(); - __ vcge(NeonU32, dst, i.InputSimd128Register(0), + __ vcge(NeonU32, i.OutputSimd128Register(), i.InputSimd128Register(0), i.InputSimd128Register(1)); break; } @@ -1748,8 +1746,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; } case kArmInt16x8GreaterThanOrEqual: { - Simd128Register dst = i.OutputSimd128Register(); - __ vcge(NeonS16, dst, i.InputSimd128Register(0), + __ vcge(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0), i.InputSimd128Register(1)); break; } @@ -1784,8 +1781,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; } case kArmUint16x8GreaterThanOrEqual: { - Simd128Register dst = i.OutputSimd128Register(); - __ vcge(NeonU16, dst, i.InputSimd128Register(0), + __ vcge(NeonU16, i.OutputSimd128Register(), i.InputSimd128Register(0), i.InputSimd128Register(1)); break; } @@ -1869,8 +1865,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; } case kArmInt8x16GreaterThanOrEqual: { - Simd128Register dst = i.OutputSimd128Register(); - __ vcge(NeonS8, dst, i.InputSimd128Register(0), + __ vcge(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0), i.InputSimd128Register(1)); break; } @@ -1905,8 +1900,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; } case kArmUint8x16GreaterThanOrEqual: { - Simd128Register dst = i.OutputSimd128Register(); - __ vcge(NeonU8, dst, i.InputSimd128Register(0), + __ vcge(NeonU8, i.OutputSimd128Register(), i.InputSimd128Register(0), i.InputSimd128Register(1)); break; } @@ -1934,15 +1928,69 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ vmvn(i.OutputSimd128Register(), i.InputSimd128Register(0)); break; } - case kArmSimd32x4Select: - case kArmSimd16x8Select: - case kArmSimd8x16Select: { + case kArmSimd128Select: { // vbsl clobbers the mask input so make sure it was DefineSameAsFirst. DCHECK(i.OutputSimd128Register().is(i.InputSimd128Register(0))); __ vbsl(i.OutputSimd128Register(), i.InputSimd128Register(1), i.InputSimd128Register(2)); break; } + case kArmSimd1x4AnyTrue: { + const QwNeonRegister& src = i.InputSimd128Register(0); + __ vpmax(NeonU32, kScratchDoubleReg, src.low(), src.high()); + __ vpmax(NeonU32, kScratchDoubleReg, kScratchDoubleReg, + kScratchDoubleReg); + __ ExtractLane(i.OutputRegister(), kScratchDoubleReg, NeonS32, 0); + break; + } + case kArmSimd1x4AllTrue: { + const QwNeonRegister& src = i.InputSimd128Register(0); + __ vpmin(NeonU32, kScratchDoubleReg, src.low(), src.high()); + __ vpmin(NeonU32, kScratchDoubleReg, kScratchDoubleReg, + kScratchDoubleReg); + __ ExtractLane(i.OutputRegister(), kScratchDoubleReg, NeonS32, 0); + break; + } + case kArmSimd1x8AnyTrue: { + const QwNeonRegister& src = i.InputSimd128Register(0); + __ vpmax(NeonU16, kScratchDoubleReg, src.low(), src.high()); + __ vpmax(NeonU16, kScratchDoubleReg, kScratchDoubleReg, + kScratchDoubleReg); + __ vpmax(NeonU16, kScratchDoubleReg, kScratchDoubleReg, + kScratchDoubleReg); + __ ExtractLane(i.OutputRegister(), kScratchDoubleReg, NeonS16, 0); + break; + } + case kArmSimd1x8AllTrue: { + const QwNeonRegister& src = i.InputSimd128Register(0); + __ vpmin(NeonU16, kScratchDoubleReg, src.low(), src.high()); + __ vpmin(NeonU16, kScratchDoubleReg, kScratchDoubleReg, + kScratchDoubleReg); + __ vpmin(NeonU16, kScratchDoubleReg, kScratchDoubleReg, + kScratchDoubleReg); + __ ExtractLane(i.OutputRegister(), kScratchDoubleReg, NeonS16, 0); + break; + } + case kArmSimd1x16AnyTrue: { + const QwNeonRegister& src = i.InputSimd128Register(0); + __ vpmax(NeonU8, kScratchDoubleReg, src.low(), src.high()); + __ vpmax(NeonU8, kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg); + // vtst to detect any bits in the bottom 32 bits of kScratchDoubleReg. + // This saves an instruction vs. the naive sequence of vpmax. + // kDoubleRegZero is not changed, since it is 0. + __ vtst(Neon32, kScratchQuadReg, kScratchQuadReg, kScratchQuadReg); + __ ExtractLane(i.OutputRegister(), kScratchDoubleReg, NeonS32, 0); + break; + } + case kArmSimd1x16AllTrue: { + const QwNeonRegister& src = i.InputSimd128Register(0); + __ vpmin(NeonU8, kScratchDoubleReg, src.low(), src.high()); + __ vpmin(NeonU8, kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg); + __ vpmin(NeonU8, kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg); + __ vpmin(NeonU8, kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg); + __ ExtractLane(i.OutputRegister(), kScratchDoubleReg, NeonS8, 0); + break; + } case kCheckedLoadInt8: ASSEMBLE_CHECKED_LOAD_INTEGER(ldrsb); break; diff --git a/src/compiler/arm/instruction-codes-arm.h b/src/compiler/arm/instruction-codes-arm.h index 184a9da7b0..dd344292ad 100644 --- a/src/compiler/arm/instruction-codes-arm.h +++ b/src/compiler/arm/instruction-codes-arm.h @@ -206,9 +206,13 @@ namespace compiler { V(ArmSimd128Or) \ V(ArmSimd128Xor) \ V(ArmSimd128Not) \ - V(ArmSimd32x4Select) \ - V(ArmSimd16x8Select) \ - V(ArmSimd8x16Select) + V(ArmSimd128Select) \ + V(ArmSimd1x4AnyTrue) \ + V(ArmSimd1x4AllTrue) \ + V(ArmSimd1x8AnyTrue) \ + V(ArmSimd1x8AllTrue) \ + V(ArmSimd1x16AnyTrue) \ + V(ArmSimd1x16AllTrue) // Addressing modes represent the "shape" of inputs to an instruction. // Many instructions support multiple addressing modes. Addressing modes diff --git a/src/compiler/arm/instruction-scheduler-arm.cc b/src/compiler/arm/instruction-scheduler-arm.cc index fb8df60abb..ba755a283d 100644 --- a/src/compiler/arm/instruction-scheduler-arm.cc +++ b/src/compiler/arm/instruction-scheduler-arm.cc @@ -194,9 +194,13 @@ int InstructionScheduler::GetTargetInstructionFlags( case kArmSimd128Or: case kArmSimd128Xor: case kArmSimd128Not: - case kArmSimd32x4Select: - case kArmSimd16x8Select: - case kArmSimd8x16Select: + case kArmSimd128Select: + case kArmSimd1x4AnyTrue: + case kArmSimd1x4AllTrue: + case kArmSimd1x8AnyTrue: + case kArmSimd1x8AllTrue: + case kArmSimd1x16AnyTrue: + case kArmSimd1x16AllTrue: return kNoOpcodeFlags; case kArmVldrF32: diff --git a/src/compiler/arm/instruction-selector-arm.cc b/src/compiler/arm/instruction-selector-arm.cc index a0420eafb9..da99db1049 100644 --- a/src/compiler/arm/instruction-selector-arm.cc +++ b/src/compiler/arm/instruction-selector-arm.cc @@ -2198,73 +2198,91 @@ void InstructionSelector::VisitAtomicStore(Node* node) { V(Simd1x8Zero) \ V(Simd1x16Zero) -#define SIMD_UNOP_LIST(V) \ - V(Float32x4FromInt32x4) \ - V(Float32x4FromUint32x4) \ - V(Float32x4Abs) \ - V(Float32x4Neg) \ - V(Int32x4FromFloat32x4) \ - V(Uint32x4FromFloat32x4) \ - V(Int32x4Neg) \ - V(Int16x8Neg) \ - V(Int8x16Neg) \ - V(Simd128Not) +#define SIMD_UNOP_LIST(V) \ + V(Float32x4FromInt32x4, kArmFloat32x4FromInt32x4) \ + V(Float32x4FromUint32x4, kArmFloat32x4FromUint32x4) \ + V(Float32x4Abs, kArmFloat32x4Abs) \ + V(Float32x4Neg, kArmFloat32x4Neg) \ + V(Int32x4FromFloat32x4, kArmInt32x4FromFloat32x4) \ + V(Uint32x4FromFloat32x4, kArmUint32x4FromFloat32x4) \ + V(Int32x4Neg, kArmInt32x4Neg) \ + V(Int16x8Neg, kArmInt16x8Neg) \ + V(Int8x16Neg, kArmInt8x16Neg) \ + V(Simd128Not, kArmSimd128Not) \ + V(Simd1x4Not, kArmSimd128Not) \ + V(Simd1x4AnyTrue, kArmSimd1x4AnyTrue) \ + V(Simd1x4AllTrue, kArmSimd1x4AllTrue) \ + V(Simd1x8Not, kArmSimd128Not) \ + V(Simd1x8AnyTrue, kArmSimd1x8AnyTrue) \ + V(Simd1x8AllTrue, kArmSimd1x8AllTrue) \ + V(Simd1x16Not, kArmSimd128Not) \ + V(Simd1x16AnyTrue, kArmSimd1x16AnyTrue) \ + V(Simd1x16AllTrue, kArmSimd1x16AllTrue) -#define SIMD_BINOP_LIST(V) \ - V(Float32x4Add) \ - V(Float32x4Sub) \ - V(Float32x4Equal) \ - V(Float32x4NotEqual) \ - V(Int32x4Add) \ - V(Int32x4Sub) \ - V(Int32x4Mul) \ - V(Int32x4Min) \ - V(Int32x4Max) \ - V(Int32x4Equal) \ - V(Int32x4NotEqual) \ - V(Int32x4GreaterThan) \ - V(Int32x4GreaterThanOrEqual) \ - V(Uint32x4Min) \ - V(Uint32x4Max) \ - V(Uint32x4GreaterThan) \ - V(Uint32x4GreaterThanOrEqual) \ - V(Int16x8Add) \ - V(Int16x8AddSaturate) \ - V(Int16x8Sub) \ - V(Int16x8SubSaturate) \ - V(Int16x8Mul) \ - V(Int16x8Min) \ - V(Int16x8Max) \ - V(Int16x8Equal) \ - V(Int16x8NotEqual) \ - V(Int16x8GreaterThan) \ - V(Int16x8GreaterThanOrEqual) \ - V(Uint16x8AddSaturate) \ - V(Uint16x8SubSaturate) \ - V(Uint16x8Min) \ - V(Uint16x8Max) \ - V(Uint16x8GreaterThan) \ - V(Uint16x8GreaterThanOrEqual) \ - V(Int8x16Add) \ - V(Int8x16AddSaturate) \ - V(Int8x16Sub) \ - V(Int8x16SubSaturate) \ - V(Int8x16Mul) \ - V(Int8x16Min) \ - V(Int8x16Max) \ - V(Int8x16Equal) \ - V(Int8x16NotEqual) \ - V(Int8x16GreaterThan) \ - V(Int8x16GreaterThanOrEqual) \ - V(Uint8x16AddSaturate) \ - V(Uint8x16SubSaturate) \ - V(Uint8x16Min) \ - V(Uint8x16Max) \ - V(Uint8x16GreaterThan) \ - V(Uint8x16GreaterThanOrEqual) \ - V(Simd128And) \ - V(Simd128Or) \ - V(Simd128Xor) +#define SIMD_BINOP_LIST(V) \ + V(Float32x4Add, kArmFloat32x4Add) \ + V(Float32x4Sub, kArmFloat32x4Sub) \ + V(Float32x4Equal, kArmFloat32x4Equal) \ + V(Float32x4NotEqual, kArmFloat32x4NotEqual) \ + V(Int32x4Add, kArmInt32x4Add) \ + V(Int32x4Sub, kArmInt32x4Sub) \ + V(Int32x4Mul, kArmInt32x4Mul) \ + V(Int32x4Min, kArmInt32x4Min) \ + V(Int32x4Max, kArmInt32x4Max) \ + V(Int32x4Equal, kArmInt32x4Equal) \ + V(Int32x4NotEqual, kArmInt32x4NotEqual) \ + V(Int32x4GreaterThan, kArmInt32x4GreaterThan) \ + V(Int32x4GreaterThanOrEqual, kArmInt32x4GreaterThanOrEqual) \ + V(Uint32x4Min, kArmUint32x4Min) \ + V(Uint32x4Max, kArmUint32x4Max) \ + V(Uint32x4GreaterThan, kArmUint32x4GreaterThan) \ + V(Uint32x4GreaterThanOrEqual, kArmUint32x4GreaterThanOrEqual) \ + V(Int16x8Add, kArmInt16x8Add) \ + V(Int16x8AddSaturate, kArmInt16x8AddSaturate) \ + V(Int16x8Sub, kArmInt16x8Sub) \ + V(Int16x8SubSaturate, kArmInt16x8SubSaturate) \ + V(Int16x8Mul, kArmInt16x8Mul) \ + V(Int16x8Min, kArmInt16x8Min) \ + V(Int16x8Max, kArmInt16x8Max) \ + V(Int16x8Equal, kArmInt16x8Equal) \ + V(Int16x8NotEqual, kArmInt16x8NotEqual) \ + V(Int16x8GreaterThan, kArmInt16x8GreaterThan) \ + V(Int16x8GreaterThanOrEqual, kArmInt16x8GreaterThanOrEqual) \ + V(Uint16x8AddSaturate, kArmUint16x8AddSaturate) \ + V(Uint16x8SubSaturate, kArmUint16x8SubSaturate) \ + V(Uint16x8Min, kArmUint16x8Min) \ + V(Uint16x8Max, kArmUint16x8Max) \ + V(Uint16x8GreaterThan, kArmUint16x8GreaterThan) \ + V(Uint16x8GreaterThanOrEqual, kArmUint16x8GreaterThanOrEqual) \ + V(Int8x16Add, kArmInt8x16Add) \ + V(Int8x16AddSaturate, kArmInt8x16AddSaturate) \ + V(Int8x16Sub, kArmInt8x16Sub) \ + V(Int8x16SubSaturate, kArmInt8x16SubSaturate) \ + V(Int8x16Mul, kArmInt8x16Mul) \ + V(Int8x16Min, kArmInt8x16Min) \ + V(Int8x16Max, kArmInt8x16Max) \ + V(Int8x16Equal, kArmInt8x16Equal) \ + V(Int8x16NotEqual, kArmInt8x16NotEqual) \ + V(Int8x16GreaterThan, kArmInt8x16GreaterThan) \ + V(Int8x16GreaterThanOrEqual, kArmInt8x16GreaterThanOrEqual) \ + V(Uint8x16AddSaturate, kArmUint8x16AddSaturate) \ + V(Uint8x16SubSaturate, kArmUint8x16SubSaturate) \ + V(Uint8x16Min, kArmUint8x16Min) \ + V(Uint8x16Max, kArmUint8x16Max) \ + V(Uint8x16GreaterThan, kArmUint8x16GreaterThan) \ + V(Uint8x16GreaterThanOrEqual, kArmUint8x16GreaterThanOrEqual) \ + V(Simd128And, kArmSimd128And) \ + V(Simd128Or, kArmSimd128Or) \ + V(Simd128Xor, kArmSimd128Xor) \ + V(Simd1x4And, kArmSimd128And) \ + V(Simd1x4Or, kArmSimd128Or) \ + V(Simd1x4Xor, kArmSimd128Xor) \ + V(Simd1x8And, kArmSimd128And) \ + V(Simd1x8Or, kArmSimd128Or) \ + V(Simd1x8Xor, kArmSimd128Xor) \ + V(Simd1x16And, kArmSimd128And) \ + V(Simd1x16Or, kArmSimd128Or) \ + V(Simd1x16Xor, kArmSimd128Xor) #define SIMD_SHIFT_OP_LIST(V) \ V(Int32x4ShiftLeftByScalar) \ @@ -2306,16 +2324,16 @@ SIMD_TYPE_LIST(SIMD_VISIT_REPLACE_LANE) SIMD_ZERO_OP_LIST(SIMD_VISIT_ZERO_OP) #undef SIMD_VISIT_ZERO_OP -#define SIMD_VISIT_UNOP(Name) \ +#define SIMD_VISIT_UNOP(Name, instruction) \ void InstructionSelector::Visit##Name(Node* node) { \ - VisitRR(this, kArm##Name, node); \ + VisitRR(this, instruction, node); \ } SIMD_UNOP_LIST(SIMD_VISIT_UNOP) #undef SIMD_VISIT_UNOP -#define SIMD_VISIT_BINOP(Name) \ +#define SIMD_VISIT_BINOP(Name, instruction) \ void InstructionSelector::Visit##Name(Node* node) { \ - VisitRRR(this, kArm##Name, node); \ + VisitRRR(this, instruction, node); \ } SIMD_BINOP_LIST(SIMD_VISIT_BINOP) #undef SIMD_VISIT_BINOP @@ -2329,7 +2347,7 @@ SIMD_SHIFT_OP_LIST(SIMD_VISIT_SHIFT_OP) #define SIMD_VISIT_SELECT_OP(format) \ void InstructionSelector::VisitSimd##format##Select(Node* node) { \ - VisitRRRR(this, kArmSimd##format##Select, node); \ + VisitRRRR(this, kArmSimd128Select, node); \ } SIMD_FORMAT_LIST(SIMD_VISIT_SELECT_OP) #undef SIMD_VISIT_SELECT_OP diff --git a/src/compiler/instruction-selector.cc b/src/compiler/instruction-selector.cc index 5121983e07..e095bc20b3 100644 --- a/src/compiler/instruction-selector.cc +++ b/src/compiler/instruction-selector.cc @@ -1637,10 +1637,46 @@ void InstructionSelector::VisitNode(Node* node) { return MarkAsSimd128(node), VisitSimd8x16Select(node); case IrOpcode::kSimd1x4Zero: return MarkAsSimd1x4(node), VisitSimd1x4Zero(node); + case IrOpcode::kSimd1x4And: + return MarkAsSimd1x4(node), VisitSimd1x4And(node); + case IrOpcode::kSimd1x4Or: + return MarkAsSimd1x4(node), VisitSimd1x4Or(node); + case IrOpcode::kSimd1x4Xor: + return MarkAsSimd1x4(node), VisitSimd1x4Xor(node); + case IrOpcode::kSimd1x4Not: + return MarkAsSimd1x4(node), VisitSimd1x4Not(node); + case IrOpcode::kSimd1x4AnyTrue: + return MarkAsWord32(node), VisitSimd1x4AnyTrue(node); + case IrOpcode::kSimd1x4AllTrue: + return MarkAsWord32(node), VisitSimd1x4AllTrue(node); case IrOpcode::kSimd1x8Zero: return MarkAsSimd1x8(node), VisitSimd1x8Zero(node); + case IrOpcode::kSimd1x8And: + return MarkAsSimd1x8(node), VisitSimd1x8And(node); + case IrOpcode::kSimd1x8Or: + return MarkAsSimd1x8(node), VisitSimd1x8Or(node); + case IrOpcode::kSimd1x8Xor: + return MarkAsSimd1x8(node), VisitSimd1x8Xor(node); + case IrOpcode::kSimd1x8Not: + return MarkAsSimd1x8(node), VisitSimd1x8Not(node); + case IrOpcode::kSimd1x8AnyTrue: + return MarkAsWord32(node), VisitSimd1x8AnyTrue(node); + case IrOpcode::kSimd1x8AllTrue: + return MarkAsWord32(node), VisitSimd1x8AllTrue(node); case IrOpcode::kSimd1x16Zero: return MarkAsSimd1x16(node), VisitSimd1x16Zero(node); + case IrOpcode::kSimd1x16And: + return MarkAsSimd1x16(node), VisitSimd1x16And(node); + case IrOpcode::kSimd1x16Or: + return MarkAsSimd1x16(node), VisitSimd1x16Or(node); + case IrOpcode::kSimd1x16Xor: + return MarkAsSimd1x16(node), VisitSimd1x16Xor(node); + case IrOpcode::kSimd1x16Not: + return MarkAsSimd1x16(node), VisitSimd1x16Not(node); + case IrOpcode::kSimd1x16AnyTrue: + return MarkAsWord32(node), VisitSimd1x16AnyTrue(node); + case IrOpcode::kSimd1x16AllTrue: + return MarkAsWord32(node), VisitSimd1x16AllTrue(node); default: V8_Fatal(__FILE__, __LINE__, "Unexpected operator #%d:%s @ node #%d", node->opcode(), node->op()->mnemonic(), node->id()); @@ -2254,6 +2290,42 @@ void InstructionSelector::VisitSimd32x4Select(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitSimd16x8Select(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitSimd8x16Select(Node* node) { UNIMPLEMENTED(); } + +void InstructionSelector::VisitSimd1x4And(Node* node) { UNIMPLEMENTED(); } + +void InstructionSelector::VisitSimd1x4Or(Node* node) { UNIMPLEMENTED(); } + +void InstructionSelector::VisitSimd1x4Xor(Node* node) { UNIMPLEMENTED(); } + +void InstructionSelector::VisitSimd1x4Not(Node* node) { UNIMPLEMENTED(); } + +void InstructionSelector::VisitSimd1x4AnyTrue(Node* node) { UNIMPLEMENTED(); } + +void InstructionSelector::VisitSimd1x4AllTrue(Node* node) { UNIMPLEMENTED(); } + +void InstructionSelector::VisitSimd1x8And(Node* node) { UNIMPLEMENTED(); } + +void InstructionSelector::VisitSimd1x8Or(Node* node) { UNIMPLEMENTED(); } + +void InstructionSelector::VisitSimd1x8Xor(Node* node) { UNIMPLEMENTED(); } + +void InstructionSelector::VisitSimd1x8Not(Node* node) { UNIMPLEMENTED(); } + +void InstructionSelector::VisitSimd1x8AnyTrue(Node* node) { UNIMPLEMENTED(); } + +void InstructionSelector::VisitSimd1x8AllTrue(Node* node) { UNIMPLEMENTED(); } + +void InstructionSelector::VisitSimd1x16And(Node* node) { UNIMPLEMENTED(); } + +void InstructionSelector::VisitSimd1x16Or(Node* node) { UNIMPLEMENTED(); } + +void InstructionSelector::VisitSimd1x16Xor(Node* node) { UNIMPLEMENTED(); } + +void InstructionSelector::VisitSimd1x16Not(Node* node) { UNIMPLEMENTED(); } + +void InstructionSelector::VisitSimd1x16AnyTrue(Node* node) { UNIMPLEMENTED(); } + +void InstructionSelector::VisitSimd1x16AllTrue(Node* node) { UNIMPLEMENTED(); } #endif // !V8_TARGET_ARCH_ARM void InstructionSelector::VisitFinishRegion(Node* node) { EmitIdentity(node); } diff --git a/src/compiler/machine-operator.cc b/src/compiler/machine-operator.cc index e248b42203..8123dcef8f 100644 --- a/src/compiler/machine-operator.cc +++ b/src/compiler/machine-operator.cc @@ -263,12 +263,6 @@ MachineRepresentation AtomicStoreRepresentationOf(Operator const* op) { V(Uint32x4GreaterThan, Operator::kNoProperties, 2, 0, 1) \ V(Uint32x4GreaterThanOrEqual, Operator::kNoProperties, 2, 0, 1) \ V(Uint32x4FromFloat32x4, Operator::kNoProperties, 1, 0, 1) \ - V(Bool32x4And, Operator::kAssociative | Operator::kCommutative, 2, 0, 1) \ - V(Bool32x4Or, Operator::kAssociative | Operator::kCommutative, 2, 0, 1) \ - V(Bool32x4Xor, Operator::kAssociative | Operator::kCommutative, 2, 0, 1) \ - V(Bool32x4Not, Operator::kNoProperties, 1, 0, 1) \ - V(Bool32x4AnyTrue, Operator::kNoProperties, 1, 0, 1) \ - V(Bool32x4AllTrue, Operator::kNoProperties, 1, 0, 1) \ V(Int16x8Splat, Operator::kNoProperties, 1, 0, 1) \ V(Int16x8Neg, Operator::kNoProperties, 1, 0, 1) \ V(Int16x8Add, Operator::kCommutative, 2, 0, 1) \ @@ -292,12 +286,6 @@ MachineRepresentation AtomicStoreRepresentationOf(Operator const* op) { V(Uint16x8LessThanOrEqual, Operator::kNoProperties, 2, 0, 1) \ V(Uint16x8GreaterThan, Operator::kNoProperties, 2, 0, 1) \ V(Uint16x8GreaterThanOrEqual, Operator::kNoProperties, 2, 0, 1) \ - V(Bool16x8And, Operator::kAssociative | Operator::kCommutative, 2, 0, 1) \ - V(Bool16x8Or, Operator::kAssociative | Operator::kCommutative, 2, 0, 1) \ - V(Bool16x8Xor, Operator::kAssociative | Operator::kCommutative, 2, 0, 1) \ - V(Bool16x8Not, Operator::kNoProperties, 1, 0, 1) \ - V(Bool16x8AnyTrue, Operator::kNoProperties, 1, 0, 1) \ - V(Bool16x8AllTrue, Operator::kNoProperties, 1, 0, 1) \ V(Int8x16Splat, Operator::kNoProperties, 1, 0, 1) \ V(Int8x16Neg, Operator::kNoProperties, 1, 0, 1) \ V(Int8x16Add, Operator::kCommutative, 2, 0, 1) \ @@ -321,12 +309,6 @@ MachineRepresentation AtomicStoreRepresentationOf(Operator const* op) { V(Uint8x16LessThanOrEqual, Operator::kNoProperties, 2, 0, 1) \ V(Uint8x16GreaterThan, Operator::kNoProperties, 2, 0, 1) \ V(Uint8x16GreaterThanOrEqual, Operator::kNoProperties, 2, 0, 1) \ - V(Bool8x16And, Operator::kAssociative | Operator::kCommutative, 2, 0, 1) \ - V(Bool8x16Or, Operator::kAssociative | Operator::kCommutative, 2, 0, 1) \ - V(Bool8x16Xor, Operator::kAssociative | Operator::kCommutative, 2, 0, 1) \ - V(Bool8x16Not, Operator::kNoProperties, 1, 0, 1) \ - V(Bool8x16AnyTrue, Operator::kNoProperties, 1, 0, 1) \ - V(Bool8x16AllTrue, Operator::kNoProperties, 1, 0, 1) \ V(Simd128Load, Operator::kNoProperties, 2, 0, 1) \ V(Simd128Load1, Operator::kNoProperties, 2, 0, 1) \ V(Simd128Load2, Operator::kNoProperties, 2, 0, 1) \ @@ -344,8 +326,26 @@ MachineRepresentation AtomicStoreRepresentationOf(Operator const* op) { V(Simd16x8Select, Operator::kNoProperties, 3, 0, 1) \ V(Simd8x16Select, Operator::kNoProperties, 3, 0, 1) \ V(Simd1x4Zero, Operator::kNoProperties, 0, 0, 1) \ + V(Simd1x4And, Operator::kAssociative | Operator::kCommutative, 2, 0, 1) \ + V(Simd1x4Or, Operator::kAssociative | Operator::kCommutative, 2, 0, 1) \ + V(Simd1x4Xor, Operator::kAssociative | Operator::kCommutative, 2, 0, 1) \ + V(Simd1x4Not, Operator::kNoProperties, 1, 0, 1) \ + V(Simd1x4AnyTrue, Operator::kNoProperties, 1, 0, 1) \ + V(Simd1x4AllTrue, Operator::kNoProperties, 1, 0, 1) \ V(Simd1x8Zero, Operator::kNoProperties, 0, 0, 1) \ - V(Simd1x16Zero, Operator::kNoProperties, 0, 0, 1) + V(Simd1x8And, Operator::kAssociative | Operator::kCommutative, 2, 0, 1) \ + V(Simd1x8Or, Operator::kAssociative | Operator::kCommutative, 2, 0, 1) \ + V(Simd1x8Xor, Operator::kAssociative | Operator::kCommutative, 2, 0, 1) \ + V(Simd1x8Not, Operator::kNoProperties, 1, 0, 1) \ + V(Simd1x8AnyTrue, Operator::kNoProperties, 1, 0, 1) \ + V(Simd1x8AllTrue, Operator::kNoProperties, 1, 0, 1) \ + V(Simd1x16Zero, Operator::kNoProperties, 0, 0, 1) \ + V(Simd1x16And, Operator::kAssociative | Operator::kCommutative, 2, 0, 1) \ + V(Simd1x16Or, Operator::kAssociative | Operator::kCommutative, 2, 0, 1) \ + V(Simd1x16Xor, Operator::kAssociative | Operator::kCommutative, 2, 0, 1) \ + V(Simd1x16Not, Operator::kNoProperties, 1, 0, 1) \ + V(Simd1x16AnyTrue, Operator::kNoProperties, 1, 0, 1) \ + V(Simd1x16AllTrue, Operator::kNoProperties, 1, 0, 1) #define PURE_OPTIONAL_OP_LIST(V) \ V(Word32Ctz, Operator::kNoProperties, 1, 0, 1) \ diff --git a/src/compiler/machine-operator.h b/src/compiler/machine-operator.h index 2087ab8454..aee27045fd 100644 --- a/src/compiler/machine-operator.h +++ b/src/compiler/machine-operator.h @@ -478,13 +478,6 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final const Operator* Uint32x4GreaterThanOrEqual(); const Operator* Uint32x4FromFloat32x4(); - const Operator* Bool32x4And(); - const Operator* Bool32x4Or(); - const Operator* Bool32x4Xor(); - const Operator* Bool32x4Not(); - const Operator* Bool32x4AnyTrue(); - const Operator* Bool32x4AllTrue(); - const Operator* Int16x8Splat(); const Operator* Int16x8ExtractLane(int32_t); const Operator* Int16x8ReplaceLane(int32_t); @@ -515,13 +508,6 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final const Operator* Uint16x8GreaterThan(); const Operator* Uint16x8GreaterThanOrEqual(); - const Operator* Bool16x8And(); - const Operator* Bool16x8Or(); - const Operator* Bool16x8Xor(); - const Operator* Bool16x8Not(); - const Operator* Bool16x8AnyTrue(); - const Operator* Bool16x8AllTrue(); - const Operator* Int8x16Splat(); const Operator* Int8x16ExtractLane(int32_t); const Operator* Int8x16ReplaceLane(int32_t); @@ -552,13 +538,6 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final const Operator* Uint8x16GreaterThan(); const Operator* Uint8x16GreaterThanOrEqual(); - const Operator* Bool8x16And(); - const Operator* Bool8x16Or(); - const Operator* Bool8x16Xor(); - const Operator* Bool8x16Not(); - const Operator* Bool8x16AnyTrue(); - const Operator* Bool8x16AllTrue(); - const Operator* Simd128Load(); const Operator* Simd128Load1(); const Operator* Simd128Load2(); @@ -585,8 +564,28 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final const Operator* Simd8x16Shuffle(); const Operator* Simd1x4Zero(); + const Operator* Simd1x4And(); + const Operator* Simd1x4Or(); + const Operator* Simd1x4Xor(); + const Operator* Simd1x4Not(); + const Operator* Simd1x4AnyTrue(); + const Operator* Simd1x4AllTrue(); + const Operator* Simd1x8Zero(); + const Operator* Simd1x8And(); + const Operator* Simd1x8Or(); + const Operator* Simd1x8Xor(); + const Operator* Simd1x8Not(); + const Operator* Simd1x8AnyTrue(); + const Operator* Simd1x8AllTrue(); + const Operator* Simd1x16Zero(); + const Operator* Simd1x16And(); + const Operator* Simd1x16Or(); + const Operator* Simd1x16Xor(); + const Operator* Simd1x16Not(); + const Operator* Simd1x16AnyTrue(); + const Operator* Simd1x16AllTrue(); // load [base + index] const Operator* Load(LoadRepresentation rep); diff --git a/src/compiler/opcodes.h b/src/compiler/opcodes.h index 7dab45e017..16855d1627 100644 --- a/src/compiler/opcodes.h +++ b/src/compiler/opcodes.h @@ -552,171 +552,159 @@ V(AtomicStore) \ V(UnsafePointerAdd) -#define MACHINE_SIMD_RETURN_SIMD_OP_LIST(V) \ - V(Float32x4Splat) \ - V(Float32x4ReplaceLane) \ - V(Float32x4Abs) \ - V(Float32x4Neg) \ - V(Float32x4Sqrt) \ - V(Float32x4RecipApprox) \ - V(Float32x4RecipSqrtApprox) \ - V(Float32x4Add) \ - V(Float32x4Sub) \ - V(Float32x4Mul) \ - V(Float32x4Div) \ - V(Float32x4Min) \ - V(Float32x4Max) \ - V(Float32x4MinNum) \ - V(Float32x4MaxNum) \ - V(Float32x4Equal) \ - V(Float32x4NotEqual) \ - V(Float32x4LessThan) \ - V(Float32x4LessThanOrEqual) \ - V(Float32x4GreaterThan) \ - V(Float32x4GreaterThanOrEqual) \ - V(Float32x4FromInt32x4) \ - V(Float32x4FromUint32x4) \ - V(Int32x4Splat) \ - V(Int32x4ReplaceLane) \ - V(Int32x4Neg) \ - V(Int32x4Add) \ - V(Int32x4Sub) \ - V(Int32x4Mul) \ - V(Int32x4Min) \ - V(Int32x4Max) \ - V(Int32x4ShiftLeftByScalar) \ - V(Int32x4ShiftRightByScalar) \ - V(Int32x4Equal) \ - V(Int32x4NotEqual) \ - V(Int32x4LessThan) \ - V(Int32x4LessThanOrEqual) \ - V(Int32x4GreaterThan) \ - V(Int32x4GreaterThanOrEqual) \ - V(Int32x4FromFloat32x4) \ - V(Uint32x4Min) \ - V(Uint32x4Max) \ - V(Uint32x4ShiftLeftByScalar) \ - V(Uint32x4ShiftRightByScalar) \ - V(Uint32x4LessThan) \ - V(Uint32x4LessThanOrEqual) \ - V(Uint32x4GreaterThan) \ - V(Uint32x4GreaterThanOrEqual) \ - V(Uint32x4FromFloat32x4) \ - V(Bool32x4And) \ - V(Bool32x4Or) \ - V(Bool32x4Xor) \ - V(Bool32x4Not) \ - V(Int16x8Splat) \ - V(Int16x8ReplaceLane) \ - V(Int16x8Neg) \ - V(Int16x8Add) \ - V(Int16x8AddSaturate) \ - V(Int16x8Sub) \ - V(Int16x8SubSaturate) \ - V(Int16x8Mul) \ - V(Int16x8Min) \ - V(Int16x8Max) \ - V(Int16x8ShiftLeftByScalar) \ - V(Int16x8ShiftRightByScalar) \ - V(Int16x8Equal) \ - V(Int16x8NotEqual) \ - V(Int16x8LessThan) \ - V(Int16x8LessThanOrEqual) \ - V(Int16x8GreaterThan) \ - V(Int16x8GreaterThanOrEqual) \ - V(Uint16x8AddSaturate) \ - V(Uint16x8SubSaturate) \ - V(Uint16x8Min) \ - V(Uint16x8Max) \ - V(Uint16x8ShiftLeftByScalar) \ - V(Uint16x8ShiftRightByScalar) \ - V(Uint16x8LessThan) \ - V(Uint16x8LessThanOrEqual) \ - V(Uint16x8GreaterThan) \ - V(Uint16x8GreaterThanOrEqual) \ - V(Bool16x8And) \ - V(Bool16x8Or) \ - V(Bool16x8Xor) \ - V(Bool16x8Not) \ - V(Int8x16Splat) \ - V(Int8x16ReplaceLane) \ - V(Int8x16Neg) \ - V(Int8x16Add) \ - V(Int8x16AddSaturate) \ - V(Int8x16Sub) \ - V(Int8x16SubSaturate) \ - V(Int8x16Mul) \ - V(Int8x16Min) \ - V(Int8x16Max) \ - V(Int8x16ShiftLeftByScalar) \ - V(Int8x16ShiftRightByScalar) \ - V(Int8x16Equal) \ - V(Int8x16NotEqual) \ - V(Int8x16LessThan) \ - V(Int8x16LessThanOrEqual) \ - V(Int8x16GreaterThan) \ - V(Int8x16GreaterThanOrEqual) \ - V(Uint8x16AddSaturate) \ - V(Uint8x16SubSaturate) \ - V(Uint8x16Min) \ - V(Uint8x16Max) \ - V(Uint8x16ShiftLeftByScalar) \ - V(Uint8x16ShiftRightByScalar) \ - V(Uint8x16LessThan) \ - V(Uint8x16LessThanOrEqual) \ - V(Uint8x16GreaterThan) \ - V(Uint8x16GreaterThanOrEqual) \ - V(Bool8x16And) \ - V(Bool8x16Or) \ - V(Bool8x16Xor) \ - V(Bool8x16Not) \ - V(Simd128Zero) \ - V(Simd128And) \ - V(Simd128Or) \ - V(Simd128Xor) \ - V(Simd128Not) \ - V(Simd32x4Select) \ - V(Simd32x4Swizzle) \ - V(Simd32x4Shuffle) \ - V(Simd16x8Select) \ - V(Simd16x8Swizzle) \ - V(Simd16x8Shuffle) \ - V(Simd8x16Select) \ - V(Simd8x16Swizzle) \ - V(Simd8x16Shuffle) \ - V(Simd1x4Zero) \ - V(Simd1x8Zero) \ - V(Simd1x16Zero) - -#define MACHINE_SIMD_RETURN_NUM_OP_LIST(V) \ - V(Float32x4ExtractLane) \ - V(Int32x4ExtractLane) \ - V(Int16x8ExtractLane) \ - V(Int8x16ExtractLane) - -#define MACHINE_SIMD_RETURN_BOOL_OP_LIST(V) \ - V(Bool32x4AnyTrue) \ - V(Bool32x4AllTrue) \ - V(Bool16x8AnyTrue) \ - V(Bool16x8AllTrue) \ - V(Bool8x16AnyTrue) \ - V(Bool8x16AllTrue) - -#define MACHINE_SIMD_GENERIC_OP_LIST(V) \ - V(Simd128Load) \ - V(Simd128Load1) \ - V(Simd128Load2) \ - V(Simd128Load3) \ - V(Simd128Store) \ - V(Simd128Store1) \ - V(Simd128Store2) \ - V(Simd128Store3) - -#define MACHINE_SIMD_OP_LIST(V) \ - MACHINE_SIMD_RETURN_SIMD_OP_LIST(V) \ - MACHINE_SIMD_RETURN_NUM_OP_LIST(V) \ - MACHINE_SIMD_RETURN_BOOL_OP_LIST(V) \ - MACHINE_SIMD_GENERIC_OP_LIST(V) +#define MACHINE_SIMD_OP_LIST(V) \ + V(Float32x4Splat) \ + V(Float32x4ExtractLane) \ + V(Float32x4ReplaceLane) \ + V(Float32x4Abs) \ + V(Float32x4Neg) \ + V(Float32x4Sqrt) \ + V(Float32x4RecipApprox) \ + V(Float32x4RecipSqrtApprox) \ + V(Float32x4Add) \ + V(Float32x4Sub) \ + V(Float32x4Mul) \ + V(Float32x4Div) \ + V(Float32x4Min) \ + V(Float32x4Max) \ + V(Float32x4MinNum) \ + V(Float32x4MaxNum) \ + V(Float32x4Equal) \ + V(Float32x4NotEqual) \ + V(Float32x4LessThan) \ + V(Float32x4LessThanOrEqual) \ + V(Float32x4GreaterThan) \ + V(Float32x4GreaterThanOrEqual) \ + V(Float32x4FromInt32x4) \ + V(Float32x4FromUint32x4) \ + V(Int32x4Splat) \ + V(Int32x4ExtractLane) \ + V(Int32x4ReplaceLane) \ + V(Int32x4Neg) \ + V(Int32x4Add) \ + V(Int32x4Sub) \ + V(Int32x4Mul) \ + V(Int32x4Min) \ + V(Int32x4Max) \ + V(Int32x4ShiftLeftByScalar) \ + V(Int32x4ShiftRightByScalar) \ + V(Int32x4Equal) \ + V(Int32x4NotEqual) \ + V(Int32x4LessThan) \ + V(Int32x4LessThanOrEqual) \ + V(Int32x4GreaterThan) \ + V(Int32x4GreaterThanOrEqual) \ + V(Int32x4FromFloat32x4) \ + V(Uint32x4Min) \ + V(Uint32x4Max) \ + V(Uint32x4ShiftLeftByScalar) \ + V(Uint32x4ShiftRightByScalar) \ + V(Uint32x4LessThan) \ + V(Uint32x4LessThanOrEqual) \ + V(Uint32x4GreaterThan) \ + V(Uint32x4GreaterThanOrEqual) \ + V(Uint32x4FromFloat32x4) \ + V(Int16x8Splat) \ + V(Int16x8ExtractLane) \ + V(Int16x8ReplaceLane) \ + V(Int16x8Neg) \ + V(Int16x8Add) \ + V(Int16x8AddSaturate) \ + V(Int16x8Sub) \ + V(Int16x8SubSaturate) \ + V(Int16x8Mul) \ + V(Int16x8Min) \ + V(Int16x8Max) \ + V(Int16x8ShiftLeftByScalar) \ + V(Int16x8ShiftRightByScalar) \ + V(Int16x8Equal) \ + V(Int16x8NotEqual) \ + V(Int16x8LessThan) \ + V(Int16x8LessThanOrEqual) \ + V(Int16x8GreaterThan) \ + V(Int16x8GreaterThanOrEqual) \ + V(Uint16x8AddSaturate) \ + V(Uint16x8SubSaturate) \ + V(Uint16x8Min) \ + V(Uint16x8Max) \ + V(Uint16x8ShiftLeftByScalar) \ + V(Uint16x8ShiftRightByScalar) \ + V(Uint16x8LessThan) \ + V(Uint16x8LessThanOrEqual) \ + V(Uint16x8GreaterThan) \ + V(Uint16x8GreaterThanOrEqual) \ + V(Int8x16Splat) \ + V(Int8x16ExtractLane) \ + V(Int8x16ReplaceLane) \ + V(Int8x16Neg) \ + V(Int8x16Add) \ + V(Int8x16AddSaturate) \ + V(Int8x16Sub) \ + V(Int8x16SubSaturate) \ + V(Int8x16Mul) \ + V(Int8x16Min) \ + V(Int8x16Max) \ + V(Int8x16ShiftLeftByScalar) \ + V(Int8x16ShiftRightByScalar) \ + V(Int8x16Equal) \ + V(Int8x16NotEqual) \ + V(Int8x16LessThan) \ + V(Int8x16LessThanOrEqual) \ + V(Int8x16GreaterThan) \ + V(Int8x16GreaterThanOrEqual) \ + V(Uint8x16AddSaturate) \ + V(Uint8x16SubSaturate) \ + V(Uint8x16Min) \ + V(Uint8x16Max) \ + V(Uint8x16ShiftLeftByScalar) \ + V(Uint8x16ShiftRightByScalar) \ + V(Uint8x16LessThan) \ + V(Uint8x16LessThanOrEqual) \ + V(Uint8x16GreaterThan) \ + V(Uint8x16GreaterThanOrEqual) \ + V(Simd128Load) \ + V(Simd128Load1) \ + V(Simd128Load2) \ + V(Simd128Load3) \ + V(Simd128Store) \ + V(Simd128Store1) \ + V(Simd128Store2) \ + V(Simd128Store3) \ + V(Simd128Zero) \ + V(Simd128And) \ + V(Simd128Or) \ + V(Simd128Xor) \ + V(Simd128Not) \ + V(Simd32x4Select) \ + V(Simd32x4Swizzle) \ + V(Simd32x4Shuffle) \ + V(Simd16x8Select) \ + V(Simd16x8Swizzle) \ + V(Simd16x8Shuffle) \ + V(Simd8x16Select) \ + V(Simd8x16Swizzle) \ + V(Simd8x16Shuffle) \ + V(Simd1x4Zero) \ + V(Simd1x4And) \ + V(Simd1x4Or) \ + V(Simd1x4Xor) \ + V(Simd1x4Not) \ + V(Simd1x4AnyTrue) \ + V(Simd1x4AllTrue) \ + V(Simd1x8Zero) \ + V(Simd1x8And) \ + V(Simd1x8Or) \ + V(Simd1x8Xor) \ + V(Simd1x8Not) \ + V(Simd1x8AnyTrue) \ + V(Simd1x8AllTrue) \ + V(Simd1x16Zero) \ + V(Simd1x16And) \ + V(Simd1x16Or) \ + V(Simd1x16Xor) \ + V(Simd1x16Not) \ + V(Simd1x16AnyTrue) \ + V(Simd1x16AllTrue) #define VALUE_OP_LIST(V) \ COMMON_OP_LIST(V) \ diff --git a/src/compiler/wasm-compiler.cc b/src/compiler/wasm-compiler.cc index 3903228737..2d85e30f9c 100644 --- a/src/compiler/wasm-compiler.cc +++ b/src/compiler/wasm-compiler.cc @@ -3611,15 +3611,6 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, return graph()->NewNode( jsgraph()->machine()->Uint8x16GreaterThanOrEqual(), inputs[0], inputs[1]); - case wasm::kExprS32x4Select: - return graph()->NewNode(jsgraph()->machine()->Simd32x4Select(), inputs[0], - inputs[1], inputs[2]); - case wasm::kExprS16x8Select: - return graph()->NewNode(jsgraph()->machine()->Simd16x8Select(), inputs[0], - inputs[1], inputs[2]); - case wasm::kExprS8x16Select: - return graph()->NewNode(jsgraph()->machine()->Simd8x16Select(), inputs[0], - inputs[1], inputs[2]); case wasm::kExprS128And: return graph()->NewNode(jsgraph()->machine()->Simd128And(), inputs[0], inputs[1]); @@ -3631,6 +3622,66 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, inputs[1]); case wasm::kExprS128Not: return graph()->NewNode(jsgraph()->machine()->Simd128Not(), inputs[0]); + case wasm::kExprS32x4Select: + return graph()->NewNode(jsgraph()->machine()->Simd32x4Select(), inputs[0], + inputs[1], inputs[2]); + case wasm::kExprS16x8Select: + return graph()->NewNode(jsgraph()->machine()->Simd16x8Select(), inputs[0], + inputs[1], inputs[2]); + case wasm::kExprS8x16Select: + return graph()->NewNode(jsgraph()->machine()->Simd8x16Select(), inputs[0], + inputs[1], inputs[2]); + case wasm::kExprS1x4And: + return graph()->NewNode(jsgraph()->machine()->Simd1x4And(), inputs[0], + inputs[1]); + case wasm::kExprS1x4Or: + return graph()->NewNode(jsgraph()->machine()->Simd1x4Or(), inputs[0], + inputs[1]); + case wasm::kExprS1x4Xor: + return graph()->NewNode(jsgraph()->machine()->Simd1x4Xor(), inputs[0], + inputs[1]); + case wasm::kExprS1x4Not: + return graph()->NewNode(jsgraph()->machine()->Simd1x4Not(), inputs[0]); + case wasm::kExprS1x4AnyTrue: + return graph()->NewNode(jsgraph()->machine()->Simd1x4AnyTrue(), + inputs[0]); + case wasm::kExprS1x4AllTrue: + return graph()->NewNode(jsgraph()->machine()->Simd1x4AllTrue(), + inputs[0]); + case wasm::kExprS1x8And: + return graph()->NewNode(jsgraph()->machine()->Simd1x8And(), inputs[0], + inputs[1]); + case wasm::kExprS1x8Or: + return graph()->NewNode(jsgraph()->machine()->Simd1x8Or(), inputs[0], + inputs[1]); + case wasm::kExprS1x8Xor: + return graph()->NewNode(jsgraph()->machine()->Simd1x8Xor(), inputs[0], + inputs[1]); + case wasm::kExprS1x8Not: + return graph()->NewNode(jsgraph()->machine()->Simd1x8Not(), inputs[0]); + case wasm::kExprS1x8AnyTrue: + return graph()->NewNode(jsgraph()->machine()->Simd1x8AnyTrue(), + inputs[0]); + case wasm::kExprS1x8AllTrue: + return graph()->NewNode(jsgraph()->machine()->Simd1x8AllTrue(), + inputs[0]); + case wasm::kExprS1x16And: + return graph()->NewNode(jsgraph()->machine()->Simd1x16And(), inputs[0], + inputs[1]); + case wasm::kExprS1x16Or: + return graph()->NewNode(jsgraph()->machine()->Simd1x16Or(), inputs[0], + inputs[1]); + case wasm::kExprS1x16Xor: + return graph()->NewNode(jsgraph()->machine()->Simd1x16Xor(), inputs[0], + inputs[1]); + case wasm::kExprS1x16Not: + return graph()->NewNode(jsgraph()->machine()->Simd1x16Not(), inputs[0]); + case wasm::kExprS1x16AnyTrue: + return graph()->NewNode(jsgraph()->machine()->Simd1x16AnyTrue(), + inputs[0]); + case wasm::kExprS1x16AllTrue: + return graph()->NewNode(jsgraph()->machine()->Simd1x16AllTrue(), + inputs[0]); default: return graph()->NewNode(UnsupportedOpcode(opcode), nullptr); } diff --git a/src/wasm/wasm-opcodes.cc b/src/wasm/wasm-opcodes.cc index ec1cbd59b2..1442519620 100644 --- a/src/wasm/wasm-opcodes.cc +++ b/src/wasm/wasm-opcodes.cc @@ -20,14 +20,17 @@ typedef Signature FunctionSig; #define CASE_I64_OP(name, str) CASE_OP(I64##name, "i64." str) #define CASE_F32_OP(name, str) CASE_OP(F32##name, "f32." str) #define CASE_F64_OP(name, str) CASE_OP(F64##name, "f64." str) -#define CASE_S128_OP(name, str) CASE_OP(S128##name, "s128." str) #define CASE_F32x4_OP(name, str) CASE_OP(F32x4##name, "f32x4." str) #define CASE_I32x4_OP(name, str) CASE_OP(I32x4##name, "i32x4." str) #define CASE_I16x8_OP(name, str) CASE_OP(I16x8##name, "i16x8." str) #define CASE_I8x16_OP(name, str) CASE_OP(I8x16##name, "i8x16." str) +#define CASE_S128_OP(name, str) CASE_OP(S128##name, "s128." str) #define CASE_S32x4_OP(name, str) CASE_OP(S32x4##name, "s32x4." str) #define CASE_S16x8_OP(name, str) CASE_OP(S16x8##name, "s16x8." str) #define CASE_S8x16_OP(name, str) CASE_OP(S8x16##name, "s8x16." str) +#define CASE_S1x4_OP(name, str) CASE_OP(S1x4##name, "s1x4." str) +#define CASE_S1x8_OP(name, str) CASE_OP(S1x8##name, "s1x8." str) +#define CASE_S1x16_OP(name, str) CASE_OP(S1x16##name, "s1x16." str) #define CASE_INT_OP(name, str) CASE_I32_OP(name, str) CASE_I64_OP(name, str) #define CASE_FLOAT_OP(name, str) CASE_F32_OP(name, str) CASE_F64_OP(name, str) #define CASE_ALL_OP(name, str) CASE_FLOAT_OP(name, str) CASE_INT_OP(name, str) @@ -202,9 +205,9 @@ const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) { CASE_SIGN_OP(I8x16, AddSaturate, "add_saturate") CASE_SIGN_OP(I16x8, SubSaturate, "sub_saturate") CASE_SIGN_OP(I8x16, SubSaturate, "sub_saturate") + CASE_S128_OP(And, "and") CASE_S128_OP(Or, "or") CASE_S128_OP(Xor, "xor") - CASE_S128_OP(And, "and") CASE_S128_OP(Not, "not") CASE_S32x4_OP(Select, "select") CASE_S32x4_OP(Swizzle, "swizzle") @@ -215,6 +218,24 @@ const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) { CASE_S8x16_OP(Select, "select") CASE_S8x16_OP(Swizzle, "swizzle") CASE_S8x16_OP(Shuffle, "shuffle") + CASE_S1x4_OP(And, "and") + CASE_S1x4_OP(Or, "or") + CASE_S1x4_OP(Xor, "xor") + CASE_S1x4_OP(Not, "not") + CASE_S1x4_OP(AnyTrue, "any_true") + CASE_S1x4_OP(AllTrue, "all_true") + CASE_S1x8_OP(And, "and") + CASE_S1x8_OP(Or, "or") + CASE_S1x8_OP(Xor, "xor") + CASE_S1x8_OP(Not, "not") + CASE_S1x8_OP(AnyTrue, "any_true") + CASE_S1x8_OP(AllTrue, "all_true") + CASE_S1x16_OP(And, "and") + CASE_S1x16_OP(Or, "or") + CASE_S1x16_OP(Xor, "xor") + CASE_S1x16_OP(Not, "not") + CASE_S1x16_OP(AnyTrue, "any_true") + CASE_S1x16_OP(AllTrue, "all_true") // Atomic operations. CASE_L32_OP(AtomicAdd, "atomic_add") diff --git a/src/wasm/wasm-opcodes.h b/src/wasm/wasm-opcodes.h index 49eefc578f..3021bcdc3f 100644 --- a/src/wasm/wasm-opcodes.h +++ b/src/wasm/wasm-opcodes.h @@ -381,7 +381,25 @@ constexpr WasmCodePosition kNoCodePosition = -1; V(S16x8Shuffle, 0xe54d, s_ss) \ V(S8x16Select, 0xe56a, s_s1x16ss) \ V(S8x16Swizzle, 0xe56b, s_s) \ - V(S8x16Shuffle, 0xe56c, s_ss) + V(S8x16Shuffle, 0xe56c, s_ss) \ + V(S1x4And, 0xe580, s1x4_s1x4s1x4) \ + V(S1x4Or, 0xe581, s1x4_s1x4s1x4) \ + V(S1x4Xor, 0xe582, s1x4_s1x4s1x4) \ + V(S1x4Not, 0xe583, s1x4_s1x4) \ + V(S1x4AnyTrue, 0xe584, i_s1x4) \ + V(S1x4AllTrue, 0xe585, i_s1x4) \ + V(S1x8And, 0xe586, s1x8_s1x8s1x8) \ + V(S1x8Or, 0xe587, s1x8_s1x8s1x8) \ + V(S1x8Xor, 0xe588, s1x8_s1x8s1x8) \ + V(S1x8Not, 0xe589, s1x8_s1x8) \ + V(S1x8AnyTrue, 0xe58a, i_s1x8) \ + V(S1x8AllTrue, 0xe58b, i_s1x8) \ + V(S1x16And, 0xe58c, s1x16_s1x16s1x16) \ + V(S1x16Or, 0xe58d, s1x16_s1x16s1x16) \ + V(S1x16Xor, 0xe58e, s1x16_s1x16s1x16) \ + V(S1x16Not, 0xe58f, s1x16_s1x16) \ + V(S1x16AnyTrue, 0xe590, i_s1x16) \ + V(S1x16AllTrue, 0xe591, i_s1x16) #define FOREACH_SIMD_1_OPERAND_OPCODE(V) \ V(F32x4ExtractLane, 0xe501, _) \ @@ -483,19 +501,28 @@ constexpr WasmCodePosition kNoCodePosition = -1; V(f_if, kWasmF32, kWasmI32, kWasmF32) \ V(l_il, kWasmI64, kWasmI32, kWasmI64) -#define FOREACH_SIMD_SIGNATURE(V) \ - V(s_s, kWasmS128, kWasmS128) \ - V(s_f, kWasmS128, kWasmF32) \ - V(s_ss, kWasmS128, kWasmS128, kWasmS128) \ - V(s1x4_ss, kWasmS1x4, kWasmS128, kWasmS128) \ - V(s1x8_ss, kWasmS1x8, kWasmS128, kWasmS128) \ - V(s1x16_ss, kWasmS1x16, kWasmS128, kWasmS128) \ - V(s_i, kWasmS128, kWasmI32) \ - V(s_si, kWasmS128, kWasmS128, kWasmI32) \ - V(i_s, kWasmI32, kWasmS128) \ - V(s_s1x4ss, kWasmS128, kWasmS1x4, kWasmS128, kWasmS128) \ - V(s_s1x8ss, kWasmS128, kWasmS1x8, kWasmS128, kWasmS128) \ - V(s_s1x16ss, kWasmS128, kWasmS1x16, kWasmS128, kWasmS128) +#define FOREACH_SIMD_SIGNATURE(V) \ + V(s_s, kWasmS128, kWasmS128) \ + V(s_f, kWasmS128, kWasmF32) \ + V(s_ss, kWasmS128, kWasmS128, kWasmS128) \ + V(s1x4_ss, kWasmS1x4, kWasmS128, kWasmS128) \ + V(s1x8_ss, kWasmS1x8, kWasmS128, kWasmS128) \ + V(s1x16_ss, kWasmS1x16, kWasmS128, kWasmS128) \ + V(s_i, kWasmS128, kWasmI32) \ + V(s_si, kWasmS128, kWasmS128, kWasmI32) \ + V(i_s, kWasmI32, kWasmS128) \ + V(i_s1x4, kWasmI32, kWasmS1x4) \ + V(i_s1x8, kWasmI32, kWasmS1x8) \ + V(i_s1x16, kWasmI32, kWasmS1x16) \ + V(s_s1x4ss, kWasmS128, kWasmS1x4, kWasmS128, kWasmS128) \ + V(s_s1x8ss, kWasmS128, kWasmS1x8, kWasmS128, kWasmS128) \ + V(s_s1x16ss, kWasmS128, kWasmS1x16, kWasmS128, kWasmS128) \ + V(s1x4_s1x4, kWasmS1x4, kWasmS1x4) \ + V(s1x4_s1x4s1x4, kWasmS1x4, kWasmS1x4, kWasmS1x4) \ + V(s1x8_s1x8, kWasmS1x8, kWasmS1x8) \ + V(s1x8_s1x8s1x8, kWasmS1x8, kWasmS1x8, kWasmS1x8) \ + V(s1x16_s1x16, kWasmS1x16, kWasmS1x16) \ + V(s1x16_s1x16s1x16, kWasmS1x16, kWasmS1x16, kWasmS1x16) #define FOREACH_PREFIX(V) \ V(Simd, 0xe5) \ diff --git a/test/cctest/test-assembler-arm.cc b/test/cctest/test-assembler-arm.cc index 7f49bb7be1..4138f77c69 100644 --- a/test/cctest/test-assembler-arm.cc +++ b/test/cctest/test-assembler-arm.cc @@ -1227,6 +1227,10 @@ TEST(14) { CHECK_EQ(ex, t.field[2]); \ CHECK_EQ(ex, t.field[3]); +#define CHECK_EQ_32X2(field, ex0, ex1) \ + CHECK_EQ(ex0, t.field[0]); \ + CHECK_EQ(ex1, t.field[1]); + #define CHECK_EQ_32X4(field, ex0, ex1, ex2, ex3) \ CHECK_EQ(ex0, t.field[0]); \ CHECK_EQ(ex1, t.field[1]); \ @@ -1298,6 +1302,8 @@ TEST(15) { float vdupf[4], vaddf[4], vsubf[4], vmulf[4]; uint32_t vmin_s8[4], vmin_u16[4], vmin_s32[4]; uint32_t vmax_s8[4], vmax_u16[4], vmax_s32[4]; + uint32_t vpmin_s8[2], vpmin_u16[2], vpmin_s32[2]; + uint32_t vpmax_s8[2], vpmax_u16[2], vpmax_s32[2]; uint32_t vadd8[4], vadd16[4], vadd32[4]; uint32_t vqadd_s8[4], vqadd_u16[4], vqadd_s32[4]; uint32_t vsub8[4], vsub16[4], vsub32[4]; @@ -1612,6 +1618,30 @@ TEST(15) { __ add(r4, r0, Operand(static_cast(offsetof(T, vmax_s32)))); __ vst1(Neon8, NeonListOperand(q2), NeonMemOperand(r4)); + // vpmin/vpmax integer. + __ mov(r4, Operand(0x03)); + __ vdup(Neon16, q0, r4); + __ vdup(Neon8, q1, r4); + __ vpmin(NeonS8, d4, d0, d2); + __ vstr(d4, r0, offsetof(T, vpmin_s8)); + __ vpmax(NeonS8, d4, d0, d2); + __ vstr(d4, r0, offsetof(T, vpmax_s8)); + __ mov(r4, Operand(0xffff)); + __ vdup(Neon32, q0, r4); + __ vdup(Neon16, q1, r4); + __ vpmin(NeonU16, d4, d0, d2); + __ vstr(d4, r0, offsetof(T, vpmin_u16)); + __ vpmax(NeonU16, d4, d0, d2); + __ vstr(d4, r0, offsetof(T, vpmax_u16)); + __ mov(r4, Operand(0xff)); + __ veor(q0, q0, q0); + __ vmov(s0, r4); + __ vdup(Neon8, q1, r4); + __ vpmin(NeonS32, d4, d0, d2); + __ vstr(d4, r0, offsetof(T, vpmin_s32)); + __ vpmax(NeonS32, d4, d0, d2); + __ vstr(d4, r0, offsetof(T, vpmax_s32)); + // vadd (integer). __ mov(r4, Operand(0x81)); __ vdup(Neon8, q0, r4); @@ -1992,6 +2022,15 @@ TEST(15) { // [0x000000ff, 0x000000ff, ...] and [0xffffffff, 0xffffffff, ...] CHECK_EQ_SPLAT(vmin_s32, 0xffffffffu); CHECK_EQ_SPLAT(vmax_s32, 0xffu); + // [0, 3, 0, 3, ...] and [3, 3, 3, 3, ...] + CHECK_EQ_32X2(vpmin_s8, 0x00000000u, 0x03030303u); + CHECK_EQ_32X2(vpmax_s8, 0x03030303u, 0x03030303u); + // [0, ffff, 0, ffff] and [ffff, ffff] + CHECK_EQ_32X2(vpmin_u16, 0x00000000u, 0xffffffffu); + CHECK_EQ_32X2(vpmax_u16, 0xffffffffu, 0xffffffffu); + // [0x000000ff, 0x00000000u] and [0xffffffff, 0xffffffff, ...] + CHECK_EQ_32X2(vpmin_s32, 0x00u, 0xffffffffu); + CHECK_EQ_32X2(vpmax_s32, 0xffu, 0xffffffffu); CHECK_EQ_SPLAT(vadd8, 0x03030303u); CHECK_EQ_SPLAT(vadd16, 0x00030003u); CHECK_EQ_SPLAT(vadd32, 0x00000003u); diff --git a/test/cctest/test-disasm-arm.cc b/test/cctest/test-disasm-arm.cc index 7d56442e27..faba2b9d8b 100644 --- a/test/cctest/test-disasm-arm.cc +++ b/test/cctest/test-disasm-arm.cc @@ -1040,6 +1040,12 @@ TEST(Neon) { "f3142670 vmin.u16 q1, q2, q8"); COMPARE(vmax(NeonS32, q15, q0, q8), "f260e660 vmax.s32 q15, q0, q8"); + COMPARE(vpmax(NeonS8, d0, d1, d2), + "f2010a02 vpmax.s8 d0, d1, d2"); + COMPARE(vpmin(NeonU16, d1, d2, d8), + "f3121a18 vpmin.u16 d1, d2, d8"); + COMPARE(vpmax(NeonS32, d15, d0, d8), + "f220fa08 vpmax.s32 d15, d0, d8"); COMPARE(vadd(q15, q0, q8), "f240ed60 vadd.f32 q15, q0, q8"); COMPARE(vadd(Neon8, q0, q1, q2), diff --git a/test/cctest/test-macro-assembler-arm.cc b/test/cctest/test-macro-assembler-arm.cc index dcc5fba0a2..4de4647f98 100644 --- a/test/cctest/test-macro-assembler-arm.cc +++ b/test/cctest/test-macro-assembler-arm.cc @@ -156,6 +156,8 @@ TEST(ExtractLane) { int32_t i8x16_high[16]; int32_t f32x4_low[4]; int32_t f32x4_high[4]; + int32_t i8x16_low_d[16]; + int32_t i8x16_high_d[16]; } T; T t; @@ -185,6 +187,15 @@ TEST(ExtractLane) { __ str(r5, MemOperand(r0, offsetof(T, i8x16_low) + 4 * i)); } + for (int i = 0; i < 8; i++) { + __ mov(r4, Operand(i)); + __ vdup(Neon8, q1, r4); // q1 = d2,d3 + __ ExtractLane(r5, d2, NeonS8, i); + __ str(r5, MemOperand(r0, offsetof(T, i8x16_low_d) + 4 * i)); + __ ExtractLane(r5, d3, NeonS8, i); + __ str(r5, MemOperand(r0, offsetof(T, i8x16_low_d) + 4 * (i + 8))); + } + if (CpuFeatures::IsSupported(VFP32DREGS)) { for (int i = 0; i < 4; i++) { __ mov(r4, Operand(-i)); @@ -209,6 +220,15 @@ TEST(ExtractLane) { __ ExtractLane(r5, q15, NeonS8, i); __ str(r5, MemOperand(r0, offsetof(T, i8x16_high) + 4 * i)); } + + for (int i = 0; i < 8; i++) { + __ mov(r4, Operand(-i)); + __ vdup(Neon8, q15, r4); // q1 = d30,d31 + __ ExtractLane(r5, d30, NeonS8, i); + __ str(r5, MemOperand(r0, offsetof(T, i8x16_high_d) + 4 * i)); + __ ExtractLane(r5, d31, NeonS8, i); + __ str(r5, MemOperand(r0, offsetof(T, i8x16_high_d) + 4 * (i + 8))); + } } __ ldm(ia_w, sp, r4.bit() | r5.bit() | pc.bit()); @@ -234,6 +254,10 @@ TEST(ExtractLane) { for (int i = 0; i < 16; i++) { CHECK_EQ(i, t.i8x16_low[i]); } + for (int i = 0; i < 8; i++) { + CHECK_EQ(i, t.i8x16_low_d[i]); + CHECK_EQ(i, t.i8x16_low_d[i + 8]); + } if (CpuFeatures::IsSupported(VFP32DREGS)) { for (int i = 0; i < 4; i++) { CHECK_EQ(-i, t.i32x4_high[i]); @@ -245,6 +269,10 @@ TEST(ExtractLane) { for (int i = 0; i < 16; i++) { CHECK_EQ(-i, t.i8x16_high[i]); } + for (int i = 0; i < 8; i++) { + CHECK_EQ(-i, t.i8x16_high_d[i]); + CHECK_EQ(-i, t.i8x16_high_d[i + 8]); + } } } diff --git a/test/cctest/wasm/test-run-wasm-simd.cc b/test/cctest/wasm/test-run-wasm-simd.cc index 4fd1d63b18..7d0bc8a8a6 100644 --- a/test/cctest/wasm/test-run-wasm-simd.cc +++ b/test/cctest/wasm/test-run-wasm-simd.cc @@ -212,6 +212,11 @@ T Not(T a) { return ~a; } +template +T LogicalNot(T a) { + return a == 0 ? 1 : 0; +} + template T Sqrt(T a) { return std::sqrt(a); @@ -1340,6 +1345,182 @@ WASM_EXEC_COMPILED_TEST(I8x16ShrU) { WASM_SIMD_SELECT_TEST(32x4) WASM_SIMD_SELECT_TEST(16x8) WASM_SIMD_SELECT_TEST(8x16) + +// Boolean unary operations are 'AllTrue' and 'AnyTrue', which return an integer +// result. Use relational ops on numeric vectors to create the boolean vector +// test inputs. Test inputs with all true, all false, one true, and one false. +#define WASM_SIMD_BOOL_REDUCTION_TEST(format, lanes) \ + WASM_EXEC_TEST(ReductionTest##lanes) { \ + FLAG_wasm_simd_prototype = true; \ + WasmRunner r(kExecuteCompiled); \ + byte zero = r.AllocateLocal(kWasmS128); \ + byte one_one = r.AllocateLocal(kWasmS128); \ + byte reduced = r.AllocateLocal(kWasmI32); \ + BUILD(r, WASM_SET_LOCAL(zero, WASM_SIMD_I##format##_SPLAT(WASM_ZERO)), \ + WASM_SET_LOCAL( \ + reduced, WASM_SIMD_UNOP(kExprS1x##lanes##AnyTrue, \ + WASM_SIMD_BINOP(kExprI##format##Eq, \ + WASM_GET_LOCAL(zero), \ + WASM_GET_LOCAL(zero)))), \ + WASM_IF(WASM_I32_EQ(WASM_GET_LOCAL(reduced), WASM_ZERO), \ + WASM_RETURN1(WASM_ZERO)), \ + WASM_SET_LOCAL( \ + reduced, WASM_SIMD_UNOP(kExprS1x##lanes##AnyTrue, \ + WASM_SIMD_BINOP(kExprI##format##Ne, \ + WASM_GET_LOCAL(zero), \ + WASM_GET_LOCAL(zero)))), \ + WASM_IF(WASM_I32_NE(WASM_GET_LOCAL(reduced), WASM_ZERO), \ + WASM_RETURN1(WASM_ZERO)), \ + WASM_SET_LOCAL( \ + reduced, WASM_SIMD_UNOP(kExprS1x##lanes##AllTrue, \ + WASM_SIMD_BINOP(kExprI##format##Eq, \ + WASM_GET_LOCAL(zero), \ + WASM_GET_LOCAL(zero)))), \ + WASM_IF(WASM_I32_EQ(WASM_GET_LOCAL(reduced), WASM_ZERO), \ + WASM_RETURN1(WASM_ZERO)), \ + WASM_SET_LOCAL( \ + reduced, WASM_SIMD_UNOP(kExprS1x##lanes##AllTrue, \ + WASM_SIMD_BINOP(kExprI##format##Ne, \ + WASM_GET_LOCAL(zero), \ + WASM_GET_LOCAL(zero)))), \ + WASM_IF(WASM_I32_NE(WASM_GET_LOCAL(reduced), WASM_ZERO), \ + WASM_RETURN1(WASM_ZERO)), \ + WASM_SET_LOCAL(one_one, \ + WASM_SIMD_I##format##_REPLACE_LANE( \ + lanes - 1, WASM_GET_LOCAL(zero), WASM_ONE)), \ + WASM_SET_LOCAL( \ + reduced, WASM_SIMD_UNOP(kExprS1x##lanes##AnyTrue, \ + WASM_SIMD_BINOP(kExprI##format##Eq, \ + WASM_GET_LOCAL(one_one), \ + WASM_GET_LOCAL(zero)))), \ + WASM_IF(WASM_I32_EQ(WASM_GET_LOCAL(reduced), WASM_ZERO), \ + WASM_RETURN1(WASM_ZERO)), \ + WASM_SET_LOCAL( \ + reduced, WASM_SIMD_UNOP(kExprS1x##lanes##AnyTrue, \ + WASM_SIMD_BINOP(kExprI##format##Ne, \ + WASM_GET_LOCAL(one_one), \ + WASM_GET_LOCAL(zero)))), \ + WASM_IF(WASM_I32_EQ(WASM_GET_LOCAL(reduced), WASM_ZERO), \ + WASM_RETURN1(WASM_ZERO)), \ + WASM_SET_LOCAL( \ + reduced, WASM_SIMD_UNOP(kExprS1x##lanes##AllTrue, \ + WASM_SIMD_BINOP(kExprI##format##Eq, \ + WASM_GET_LOCAL(one_one), \ + WASM_GET_LOCAL(zero)))), \ + WASM_IF(WASM_I32_NE(WASM_GET_LOCAL(reduced), WASM_ZERO), \ + WASM_RETURN1(WASM_ZERO)), \ + WASM_SET_LOCAL( \ + reduced, WASM_SIMD_UNOP(kExprS1x##lanes##AllTrue, \ + WASM_SIMD_BINOP(kExprI##format##Ne, \ + WASM_GET_LOCAL(one_one), \ + WASM_GET_LOCAL(zero)))), \ + WASM_IF(WASM_I32_NE(WASM_GET_LOCAL(reduced), WASM_ZERO), \ + WASM_RETURN1(WASM_ZERO)), \ + WASM_ONE); \ + CHECK_EQ(1, r.Call()); \ + } + +WASM_SIMD_BOOL_REDUCTION_TEST(32x4, 4) +WASM_SIMD_BOOL_REDUCTION_TEST(16x8, 8) +WASM_SIMD_BOOL_REDUCTION_TEST(8x16, 16) + +#define WASM_SIMD_UNOP_HELPER(format, lanes, lane_size) \ + void RunS1x##lanes##UnOpTest(WasmOpcode simd_op, \ + Int##lane_size##UnOp expected_op) { \ + FLAG_wasm_simd_prototype = true; \ + WasmRunner r(kExecuteCompiled); \ + byte a = 0; \ + byte expected = 1; \ + byte zero = r.AllocateLocal(kWasmS128); \ + byte simd = r.AllocateLocal(kWasmS128); \ + BUILD( \ + r, WASM_SET_LOCAL(zero, WASM_SIMD_I##format##_SPLAT(WASM_ZERO)), \ + WASM_SET_LOCAL(simd, WASM_SIMD_I##format##_SPLAT(WASM_GET_LOCAL(a))), \ + WASM_SET_LOCAL( \ + simd, \ + WASM_SIMD_MATERIALIZE_BOOLS( \ + format, WASM_SIMD_UNOP( \ + simd_op, WASM_SIMD_BINOP(kExprI##format##Ne, \ + WASM_GET_LOCAL(simd), \ + WASM_GET_LOCAL(zero))))), \ + WASM_SIMD_CHECK_SPLAT##lanes(I##format, simd, I32, expected), \ + WASM_ONE); \ + \ + for (int i = 0; i <= 1; i++) { \ + CHECK_EQ(1, r.Call(i, expected_op(i))); \ + } \ + } +WASM_SIMD_UNOP_HELPER(32x4, 4, 32); +WASM_SIMD_UNOP_HELPER(16x8, 8, 16); +WASM_SIMD_UNOP_HELPER(8x16, 16, 8); +#undef WASM_SIMD_UNOP_HELPER + +WASM_EXEC_COMPILED_TEST(S1x4Not) { RunS1x4UnOpTest(kExprS1x4Not, LogicalNot); } + +WASM_EXEC_COMPILED_TEST(S1x8Not) { RunS1x8UnOpTest(kExprS1x8Not, LogicalNot); } + +WASM_EXEC_COMPILED_TEST(S1x16Not) { + RunS1x16UnOpTest(kExprS1x16Not, LogicalNot); +} + +#define WASM_SIMD_BINOP_HELPER(format, lanes, lane_size) \ + void RunS1x##lanes##BinOpTest(WasmOpcode simd_op, \ + Int##lane_size##BinOp expected_op) { \ + FLAG_wasm_simd_prototype = true; \ + WasmRunner r(kExecuteCompiled); \ + byte a = 0; \ + byte b = 1; \ + byte expected = 2; \ + byte zero = r.AllocateLocal(kWasmS128); \ + byte simd0 = r.AllocateLocal(kWasmS128); \ + byte simd1 = r.AllocateLocal(kWasmS128); \ + BUILD( \ + r, WASM_SET_LOCAL(zero, WASM_SIMD_I##format##_SPLAT(WASM_ZERO)), \ + WASM_SET_LOCAL(simd0, WASM_SIMD_I##format##_SPLAT(WASM_GET_LOCAL(a))), \ + WASM_SET_LOCAL(simd1, WASM_SIMD_I##format##_SPLAT(WASM_GET_LOCAL(b))), \ + WASM_SET_LOCAL( \ + simd1, \ + WASM_SIMD_MATERIALIZE_BOOLS( \ + format, \ + WASM_SIMD_BINOP( \ + simd_op, \ + WASM_SIMD_BINOP(kExprI##format##Ne, WASM_GET_LOCAL(simd0), \ + WASM_GET_LOCAL(zero)), \ + WASM_SIMD_BINOP(kExprI##format##Ne, WASM_GET_LOCAL(simd1), \ + WASM_GET_LOCAL(zero))))), \ + WASM_SIMD_CHECK_SPLAT##lanes(I##format, simd1, I32, expected), \ + WASM_ONE); \ + \ + for (int i = 0; i <= 1; i++) { \ + for (int j = 0; j <= 1; j++) { \ + CHECK_EQ(1, r.Call(i, j, expected_op(i, j))); \ + } \ + } \ + } + +WASM_SIMD_BINOP_HELPER(32x4, 4, 32); +WASM_SIMD_BINOP_HELPER(16x8, 8, 16); +WASM_SIMD_BINOP_HELPER(8x16, 16, 8); +#undef WASM_SIMD_BINOP_HELPER + +WASM_EXEC_COMPILED_TEST(S1x4And) { RunS1x4BinOpTest(kExprS1x4And, And); } + +WASM_EXEC_COMPILED_TEST(S1x4Or) { RunS1x4BinOpTest(kExprS1x4Or, Or); } + +WASM_EXEC_COMPILED_TEST(S1x4Xor) { RunS1x4BinOpTest(kExprS1x4Xor, Xor); } + +WASM_EXEC_COMPILED_TEST(S1x8And) { RunS1x8BinOpTest(kExprS1x8And, And); } + +WASM_EXEC_COMPILED_TEST(S1x8Or) { RunS1x8BinOpTest(kExprS1x8Or, Or); } + +WASM_EXEC_COMPILED_TEST(S1x8Xor) { RunS1x8BinOpTest(kExprS1x8Xor, Xor); } + +WASM_EXEC_COMPILED_TEST(S1x16And) { RunS1x16BinOpTest(kExprS1x16And, And); } + +WASM_EXEC_COMPILED_TEST(S1x16Or) { RunS1x16BinOpTest(kExprS1x16Or, Or); } + +WASM_EXEC_COMPILED_TEST(S1x16Xor) { RunS1x16BinOpTest(kExprS1x16Xor, Xor); } + #endif // V8_TARGET_ARCH_ARM #if SIMD_LOWERING_TARGET