From 25d4f9374070a70da75c912b98aca1954823698e Mon Sep 17 00:00:00 2001 From: Predrag Rudic Date: Mon, 27 Nov 2017 16:11:08 +0100 Subject: [PATCH] Reland "MIPS[64] Implementation of MSA instructions on builtin simulator" This is reland of 3e0bf580e8424e37ae2ed311616d109e91052292 Original change's description: > This commit is a step toward enabling test-run-wasm-simd tests for MIPS. > 36 of those were failing in V8 builtin simulator because some instructions > were not implemented. Also there are minor fixes to some of the already > implemented instructions. > > This commit has only 32-bit implementation. After review I will add > 64-bit version. > > Bug: > Change-Id: I25b0cac352db3efb56b922ace64ab2aaef82472d > Reviewed-on: https://chromium-review.googlesource.com/744008 > Reviewed-by: Ivica Bogosavljevic > Reviewed-by: Jakob Kummerow > Commit-Queue: Ivica Bogosavljevic > Cr-Commit-Position: refs/heads/master@{#49439} Bug: Change-Id: I3a904caf675d314186c02c1c843d1e6a91a21a14 Reviewed-on: https://chromium-review.googlesource.com/776813 Reviewed-by: Jakob Kummerow Reviewed-by: Ivica Bogosavljevic Reviewed-by: Benedikt Meurer Commit-Queue: Ivica Bogosavljevic Cr-Commit-Position: refs/heads/master@{#49666} --- src/mips/simulator-mips.cc | 386 +++++- src/mips64/simulator-mips64.cc | 385 +++++- src/utils.h | 2 +- test/cctest/test-assembler-mips.cc | 1815 ++++++++++++++++---------- test/cctest/test-assembler-mips64.cc | 1813 +++++++++++++++---------- test/unittests/BUILD.gn | 1 + test/unittests/unittests.gyp | 1 + test/unittests/utils-unittest.cc | 113 ++ 8 files changed, 2972 insertions(+), 1544 deletions(-) create mode 100644 test/unittests/utils-unittest.cc diff --git a/src/mips/simulator-mips.cc b/src/mips/simulator-mips.cc index b420bc59b4..342f27666d 100644 --- a/src/mips/simulator-mips.cc +++ b/src/mips/simulator-mips.cc @@ -4671,9 +4671,12 @@ void Simulator::DecodeTypeMsaELM() { DCHECK_EQ(rd_reg(), kMSACSRRegister); SetResult(sa(), bit_cast(MSACSR_)); break; - case MOVE_V: - UNIMPLEMENTED(); - break; + case MOVE_V: { + msa_reg_t ws; + get_msa_register(ws_reg(), &ws); + set_msa_register(wd_reg(), &ws); + TraceMSARegWr(&ws); + } break; default: opcode &= kMsaELMMask; switch (opcode) { @@ -4742,7 +4745,50 @@ void Simulator::DecodeTypeMsaELM() { UNREACHABLE(); } } break; - case SLDI: + case SLDI: { + uint8_t v[32]; + msa_reg_t ws; + msa_reg_t wd; + get_msa_register(ws_reg(), &ws); + get_msa_register(wd_reg(), &wd); +#define SLDI_DF(s, k) \ + for (unsigned i = 0; i < s; i++) { \ + v[i] = ws.b[s * k + i]; \ + v[i + s] = wd.b[s * k + i]; \ + } \ + for (unsigned i = 0; i < s; i++) { \ + wd.b[s * k + i] = v[i + n]; \ + } + switch (DecodeMsaDataFormat()) { + case MSA_BYTE: + DCHECK(n < kMSALanesByte); + SLDI_DF(kMSARegSize / sizeof(int8_t) / kBitsPerByte, 0) + break; + case MSA_HALF: + DCHECK(n < kMSALanesHalf); + for (int k = 0; k < 2; ++k) { + SLDI_DF(kMSARegSize / sizeof(int16_t) / kBitsPerByte, k) + } + break; + case MSA_WORD: + DCHECK(n < kMSALanesWord); + for (int k = 0; k < 4; ++k) { + SLDI_DF(kMSARegSize / sizeof(int32_t) / kBitsPerByte, k) + } + break; + case MSA_DWORD: + DCHECK(n < kMSALanesDword); + for (int k = 0; k < 8; ++k) { + SLDI_DF(kMSARegSize / sizeof(int64_t) / kBitsPerByte, k) + } + break; + default: + UNREACHABLE(); + } + set_msa_register(wd_reg(), &wd); + TraceMSARegWr(&wd); + } break; +#undef SLDI_DF case SPLATI: case INSVE: UNIMPLEMENTED(); @@ -4879,6 +4925,7 @@ void Simulator::DecodeTypeMsaBIT() { default: UNREACHABLE(); } +#undef MSA_BIT_DF } void Simulator::DecodeTypeMsaMI10() { @@ -5161,13 +5208,6 @@ T Simulator::Msa3RInstrHelper(uint32_t opcode, T wd, T ws, T wt) { case DPSUB_U: case SLD: case SPLAT: - case PCKEV: - case PCKOD: - case ILVL: - case ILVR: - case ILVEV: - case ILVOD: - case VSHF: UNIMPLEMENTED(); break; case SRAR: { @@ -5179,51 +5219,210 @@ T Simulator::Msa3RInstrHelper(uint32_t opcode, T wd, T ws, T wt) { int bit = wt_modulo == 0 ? 0 : (wsu >> (wt_modulo - 1)) & 1; res = static_cast((wsu >> wt_modulo) + bit); } break; - case HADD_S: - case HADD_U: - case HSUB_S: - case HSUB_U: - UNIMPLEMENTED(); - break; default: UNREACHABLE(); } return res; } +template +void Msa3RInstrHelper_shuffle(const uint32_t opcode, T_reg ws, T_reg wt, + T_reg wd, const int i, const int num_of_lanes) { + T_int *ws_p, *wt_p, *wd_p; + ws_p = reinterpret_cast(ws); + wt_p = reinterpret_cast(wt); + wd_p = reinterpret_cast(wd); + switch (opcode) { + case PCKEV: + wd_p[i] = wt_p[2 * i]; + wd_p[i + num_of_lanes / 2] = ws_p[2 * i]; + break; + case PCKOD: + wd_p[i] = wt_p[2 * i + 1]; + wd_p[i + num_of_lanes / 2] = ws_p[2 * i + 1]; + break; + case ILVL: + wd_p[2 * i] = wt_p[i + num_of_lanes / 2]; + wd_p[2 * i + 1] = ws_p[i + num_of_lanes / 2]; + break; + case ILVR: + wd_p[2 * i] = wt_p[i]; + wd_p[2 * i + 1] = ws_p[i]; + break; + case ILVEV: + wd_p[2 * i] = wt_p[2 * i]; + wd_p[2 * i + 1] = ws_p[2 * i]; + break; + case ILVOD: + wd_p[2 * i] = wt_p[2 * i + 1]; + wd_p[2 * i + 1] = ws_p[2 * i + 1]; + break; + case VSHF: { + const int mask_not_valid = 0xc0; + const int mask_6_bits = 0x3f; + if ((wd_p[i] & mask_not_valid)) { + wd_p[i] = 0; + } else { + int k = (wd_p[i] & mask_6_bits) % (num_of_lanes * 2); + wd_p[i] = k >= num_of_lanes ? ws_p[k - num_of_lanes] : wt_p[k]; + } + } break; + default: + UNREACHABLE(); + } +} + +template +void Msa3RInstrHelper_horizontal(const uint32_t opcode, T_reg ws, T_reg wt, + T_reg wd, const int i, + const int num_of_lanes) { + typedef typename std::make_unsigned::type T_uint; + typedef typename std::make_unsigned::type T_smaller_uint; + T_int* wd_p; + T_smaller_int *ws_p, *wt_p; + ws_p = reinterpret_cast(ws); + wt_p = reinterpret_cast(wt); + wd_p = reinterpret_cast(wd); + T_uint* wd_pu; + T_smaller_uint *ws_pu, *wt_pu; + ws_pu = reinterpret_cast(ws); + wt_pu = reinterpret_cast(wt); + wd_pu = reinterpret_cast(wd); + switch (opcode) { + case HADD_S: + wd_p[i] = + static_cast(ws_p[2 * i + 1]) + static_cast(wt_p[2 * i]); + break; + case HADD_U: + wd_pu[i] = static_cast(ws_pu[2 * i + 1]) + + static_cast(wt_pu[2 * i]); + break; + case HSUB_S: + wd_p[i] = + static_cast(ws_p[2 * i + 1]) - static_cast(wt_p[2 * i]); + break; + case HSUB_U: + wd_pu[i] = static_cast(ws_pu[2 * i + 1]) - + static_cast(wt_pu[2 * i]); + break; + default: + UNREACHABLE(); + } +} + void Simulator::DecodeTypeMsa3R() { DCHECK(IsMipsArchVariant(kMips32r6)); DCHECK(CpuFeatures::IsSupported(MIPS_SIMD)); uint32_t opcode = instr_.InstructionBits() & kMsa3RMask; msa_reg_t ws, wd, wt; - + get_msa_register(ws_reg(), &ws); + get_msa_register(wt_reg(), &wt); + get_msa_register(wd_reg(), &wd); + switch (opcode) { + case HADD_S: + case HADD_U: + case HSUB_S: + case HSUB_U: +#define HORIZONTAL_ARITHMETIC_DF(num_of_lanes, int_type, lesser_int_type) \ + for (int i = 0; i < num_of_lanes; ++i) { \ + Msa3RInstrHelper_horizontal( \ + opcode, &ws, &wt, &wd, i, num_of_lanes); \ + } + switch (DecodeMsaDataFormat()) { + case MSA_HALF: + HORIZONTAL_ARITHMETIC_DF(kMSALanesHalf, int16_t, int8_t); + break; + case MSA_WORD: + HORIZONTAL_ARITHMETIC_DF(kMSALanesWord, int32_t, int16_t); + break; + case MSA_DWORD: + HORIZONTAL_ARITHMETIC_DF(kMSALanesDword, int64_t, int32_t); + break; + default: + UNREACHABLE(); + } + break; +#undef HORIZONTAL_ARITHMETIC_DF + case VSHF: +#define VSHF_DF(num_of_lanes, int_type) \ + for (int i = 0; i < num_of_lanes; ++i) { \ + Msa3RInstrHelper_shuffle(opcode, &ws, &wt, &wd, i, \ + num_of_lanes); \ + } + switch (DecodeMsaDataFormat()) { + case MSA_BYTE: + VSHF_DF(kMSALanesByte, int8_t); + break; + case MSA_HALF: + VSHF_DF(kMSALanesHalf, int16_t); + break; + case MSA_WORD: + VSHF_DF(kMSALanesWord, int32_t); + break; + case MSA_DWORD: + VSHF_DF(kMSALanesDword, int64_t); + break; + default: + UNREACHABLE(); + } +#undef VSHF_DF + break; + case PCKEV: + case PCKOD: + case ILVL: + case ILVR: + case ILVEV: + case ILVOD: +#define INTERLEAVE_PACK_DF(num_of_lanes, int_type) \ + for (int i = 0; i < num_of_lanes / 2; ++i) { \ + Msa3RInstrHelper_shuffle(opcode, &ws, &wt, &wd, i, \ + num_of_lanes); \ + } + switch (DecodeMsaDataFormat()) { + case MSA_BYTE: + INTERLEAVE_PACK_DF(kMSALanesByte, int8_t); + break; + case MSA_HALF: + INTERLEAVE_PACK_DF(kMSALanesHalf, int16_t); + break; + case MSA_WORD: + INTERLEAVE_PACK_DF(kMSALanesWord, int32_t); + break; + case MSA_DWORD: + INTERLEAVE_PACK_DF(kMSALanesDword, int64_t); + break; + default: + UNREACHABLE(); + } + break; +#undef INTERLEAVE_PACK_DF + default: #define MSA_3R_DF(elem, num_of_lanes) \ - get_msa_register(instr_.WdValue(), wd.elem); \ - get_msa_register(instr_.WsValue(), ws.elem); \ - get_msa_register(instr_.WtValue(), wt.elem); \ for (int i = 0; i < num_of_lanes; i++) { \ wd.elem[i] = Msa3RInstrHelper(opcode, wd.elem[i], ws.elem[i], wt.elem[i]); \ - } \ - set_msa_register(instr_.WdValue(), wd.elem); \ - TraceMSARegWr(wd.elem); + } - switch (DecodeMsaDataFormat()) { - case MSA_BYTE: - MSA_3R_DF(b, kMSALanesByte); - break; - case MSA_HALF: - MSA_3R_DF(h, kMSALanesHalf); - break; - case MSA_WORD: - MSA_3R_DF(w, kMSALanesWord); - break; - case MSA_DWORD: - MSA_3R_DF(d, kMSALanesDword); - break; - default: - UNREACHABLE(); - } + switch (DecodeMsaDataFormat()) { + case MSA_BYTE: + MSA_3R_DF(b, kMSALanesByte); + break; + case MSA_HALF: + MSA_3R_DF(h, kMSALanesHalf); + break; + case MSA_WORD: + MSA_3R_DF(w, kMSALanesWord); + break; + case MSA_DWORD: + MSA_3R_DF(d, kMSALanesDword); + break; + default: + UNREACHABLE(); + } #undef MSA_3R_DF + break; + } + set_msa_register(wd_reg(), &wd); + TraceMSARegWr(&wd); } template @@ -5321,7 +5520,7 @@ void Msa3RFInstrHelper(uint32_t opcode, T_reg ws, T_reg wt, T_reg& wd) { break; case FDIV: { if (t_element == 0) { - wd = std::numeric_limits::quiet_NaN(); + wd = bit_cast(std::numeric_limits::quiet_NaN()); } else { wd = bit_cast(s_element / t_element); } @@ -5541,6 +5740,7 @@ void Simulator::DecodeTypeMsa3RF() { UNREACHABLE(); } break; +#undef PACK_FLOAT16 #undef FEXDO_DF case FTQ: #define FTQ_DF(source, dst, fp_type, int_type) \ @@ -5887,8 +6087,8 @@ T_int Msa2RFInstrHelper(uint32_t opcode, T_src src, T_dst& dst, const T_int min_int = std::numeric_limits::min(); if (std::isnan(element)) { dst = 0; - } else if (element > max_int || element < min_int) { - dst = element > max_int ? max_int : min_int; + } else if (element >= max_int || element <= min_int) { + dst = element >= max_int ? max_int : min_int; } else { dst = static_cast(std::trunc(element)); } @@ -5899,8 +6099,8 @@ T_int Msa2RFInstrHelper(uint32_t opcode, T_src src, T_dst& dst, const T_uint max_int = std::numeric_limits::max(); if (std::isnan(element)) { dst = 0; - } else if (element > max_int || element < 0) { - dst = element > max_int ? max_int : 0; + } else if (element >= max_int || element <= 0) { + dst = element >= max_int ? max_int : 0; } else { dst = static_cast(std::trunc(element)); } @@ -6009,8 +6209,8 @@ T_int Msa2RFInstrHelper(uint32_t opcode, T_src src, T_dst& dst, return 0; } -template -T_int Msa2RFInstrHelper2(uint32_t opcode, T_reg ws, T_i i) { +template +T_int Msa2RFInstrHelper2(uint32_t opcode, T_reg ws, int i) { switch (opcode) { #define EXTRACT_FLOAT16_SIGN(fp16) (fp16 >> 15) #define EXTRACT_FLOAT16_EXP(fp16) (fp16 >> 10 & 0x1f) @@ -6231,6 +6431,30 @@ void Simulator::DecodeTypeImmediate() { } }; + auto BranchHelper_MSA = [this, &next_pc, imm16, + &execute_branch_delay_instruction](bool do_branch) { + execute_branch_delay_instruction = true; + int32_t current_pc = get_pc(); + const int32_t bitsIn16Int = sizeof(int16_t) * kBitsPerByte; + if (do_branch) { + if (FLAG_debug_code) { + int16_t bits = imm16 & 0xfc; + if (imm16 >= 0) { + CHECK_EQ(bits, 0); + } else { + CHECK_EQ(bits ^ 0xfc, 0); + } + } + // jump range :[pc + kInstrSize - 512 * kInstrSize, + // pc + kInstrSize + 511 * kInstrSize] + int16_t offset = static_cast(imm16 << (bitsIn16Int - 10)) >> + (bitsIn16Int - 12); + next_pc = current_pc + offset + Instruction::kInstrSize; + } else { + next_pc = current_pc + 2 * Instruction::kInstrSize; + } + }; + auto BranchAndLinkCompactHelper = [this, &next_pc](bool do_branch, int bits) { int32_t current_pc = get_pc(); CheckForbiddenSlot(current_pc); @@ -6273,18 +6497,66 @@ void Simulator::DecodeTypeImmediate() { case BC1NEZ: BranchHelper(get_fpu_register(ft_reg) & 0x1); break; - case BZ_V: + case BZ_V: { + msa_reg_t wt; + get_msa_register(wt_reg(), &wt); + BranchHelper_MSA(wt.d[0] == 0 && wt.d[1] == 0); + } break; +#define BZ_DF(witdh, lanes) \ + { \ + msa_reg_t wt; \ + get_msa_register(wt_reg(), &wt); \ + int i; \ + for (i = 0; i < lanes; ++i) { \ + if (wt.witdh[i] == 0) { \ + break; \ + } \ + } \ + BranchHelper_MSA(i != lanes); \ + } case BZ_B: - case BZ_H: - case BZ_W: - case BZ_D: - case BNZ_V: - case BNZ_B: - case BNZ_H: - case BNZ_W: - case BNZ_D: - UNIMPLEMENTED(); + BZ_DF(b, kMSALanesByte) break; + case BZ_H: + BZ_DF(h, kMSALanesHalf) + break; + case BZ_W: + BZ_DF(w, kMSALanesWord) + break; + case BZ_D: + BZ_DF(d, kMSALanesDword) + break; +#undef BZ_DF + case BNZ_V: { + msa_reg_t wt; + get_msa_register(wt_reg(), &wt); + BranchHelper_MSA(wt.d[0] != 0 || wt.d[1] != 0); + } break; +#define BNZ_DF(witdh, lanes) \ + { \ + msa_reg_t wt; \ + get_msa_register(wt_reg(), &wt); \ + int i; \ + for (i = 0; i < lanes; ++i) { \ + if (wt.witdh[i] == 0) { \ + break; \ + } \ + } \ + BranchHelper_MSA(i == lanes); \ + } + case BNZ_B: + BNZ_DF(b, kMSALanesByte) + break; + case BNZ_H: + BNZ_DF(h, kMSALanesHalf) + break; + case BNZ_W: + BNZ_DF(w, kMSALanesWord) + break; + case BNZ_D: + BNZ_DF(d, kMSALanesDword) + break; +#undef BNZ_DF default: UNREACHABLE(); } diff --git a/src/mips64/simulator-mips64.cc b/src/mips64/simulator-mips64.cc index 3c10f89a06..e992efebf5 100644 --- a/src/mips64/simulator-mips64.cc +++ b/src/mips64/simulator-mips64.cc @@ -4883,9 +4883,12 @@ void Simulator::DecodeTypeMsaELM() { DCHECK_EQ(rd_reg(), kMSACSRRegister); SetResult(sa(), static_cast(bit_cast(MSACSR_))); break; - case MOVE_V: - UNIMPLEMENTED(); - break; + case MOVE_V: { + msa_reg_t ws; + get_msa_register(ws_reg(), &ws); + set_msa_register(wd_reg(), &ws); + TraceMSARegWr(&ws); + } break; default: opcode &= kMsaELMMask; switch (opcode) { @@ -4967,7 +4970,50 @@ void Simulator::DecodeTypeMsaELM() { UNREACHABLE(); } } break; - case SLDI: + case SLDI: { + uint8_t v[32]; + msa_reg_t ws; + msa_reg_t wd; + get_msa_register(ws_reg(), &ws); + get_msa_register(wd_reg(), &wd); +#define SLDI_DF(s, k) \ + for (unsigned i = 0; i < s; i++) { \ + v[i] = ws.b[s * k + i]; \ + v[i + s] = wd.b[s * k + i]; \ + } \ + for (unsigned i = 0; i < s; i++) { \ + wd.b[s * k + i] = v[i + n]; \ + } + switch (DecodeMsaDataFormat()) { + case MSA_BYTE: + DCHECK(n < kMSALanesByte); + SLDI_DF(kMSARegSize / sizeof(int8_t) / kBitsPerByte, 0) + break; + case MSA_HALF: + DCHECK(n < kMSALanesHalf); + for (int k = 0; k < 2; ++k) { + SLDI_DF(kMSARegSize / sizeof(int16_t) / kBitsPerByte, k) + } + break; + case MSA_WORD: + DCHECK(n < kMSALanesWord); + for (int k = 0; k < 4; ++k) { + SLDI_DF(kMSARegSize / sizeof(int32_t) / kBitsPerByte, k) + } + break; + case MSA_DWORD: + DCHECK(n < kMSALanesDword); + for (int k = 0; k < 8; ++k) { + SLDI_DF(kMSARegSize / sizeof(int64_t) / kBitsPerByte, k) + } + break; + default: + UNREACHABLE(); + } + set_msa_register(wd_reg(), &wd); + TraceMSARegWr(&wd); + } break; +#undef SLDI_DF case SPLATI: case INSVE: UNIMPLEMENTED(); @@ -5104,6 +5150,7 @@ void Simulator::DecodeTypeMsaBIT() { default: UNREACHABLE(); } +#undef MSA_BIT_DF } void Simulator::DecodeTypeMsaMI10() { @@ -5386,13 +5433,6 @@ T Simulator::Msa3RInstrHelper(uint32_t opcode, T wd, T ws, T wt) { case DPSUB_U: case SLD: case SPLAT: - case PCKEV: - case PCKOD: - case ILVL: - case ILVR: - case ILVEV: - case ILVOD: - case VSHF: UNIMPLEMENTED(); break; case SRAR: { @@ -5404,51 +5444,209 @@ T Simulator::Msa3RInstrHelper(uint32_t opcode, T wd, T ws, T wt) { int bit = wt_modulo == 0 ? 0 : (wsu >> (wt_modulo - 1)) & 1; res = static_cast((wsu >> wt_modulo) + bit); } break; - case HADD_S: - case HADD_U: - case HSUB_S: - case HSUB_U: - UNIMPLEMENTED(); - break; default: UNREACHABLE(); } return res; } +template +void Msa3RInstrHelper_shuffle(const uint32_t opcode, T_reg ws, T_reg wt, + T_reg wd, const int i, const int num_of_lanes) { + T_int *ws_p, *wt_p, *wd_p; + ws_p = reinterpret_cast(ws); + wt_p = reinterpret_cast(wt); + wd_p = reinterpret_cast(wd); + switch (opcode) { + case PCKEV: + wd_p[i] = wt_p[2 * i]; + wd_p[i + num_of_lanes / 2] = ws_p[2 * i]; + break; + case PCKOD: + wd_p[i] = wt_p[2 * i + 1]; + wd_p[i + num_of_lanes / 2] = ws_p[2 * i + 1]; + break; + case ILVL: + wd_p[2 * i] = wt_p[i + num_of_lanes / 2]; + wd_p[2 * i + 1] = ws_p[i + num_of_lanes / 2]; + break; + case ILVR: + wd_p[2 * i] = wt_p[i]; + wd_p[2 * i + 1] = ws_p[i]; + break; + case ILVEV: + wd_p[2 * i] = wt_p[2 * i]; + wd_p[2 * i + 1] = ws_p[2 * i]; + break; + case ILVOD: + wd_p[2 * i] = wt_p[2 * i + 1]; + wd_p[2 * i + 1] = ws_p[2 * i + 1]; + break; + case VSHF: { + const int mask_not_valid = 0xc0; + const int mask_6_bits = 0x3f; + if ((wd_p[i] & mask_not_valid)) { + wd_p[i] = 0; + } else { + int k = (wd_p[i] & mask_6_bits) % (num_of_lanes * 2); + wd_p[i] = k >= num_of_lanes ? ws_p[k - num_of_lanes] : wt_p[k]; + } + } break; + default: + UNREACHABLE(); + } +} + +template +void Msa3RInstrHelper_horizontal(const uint32_t opcode, T_reg ws, T_reg wt, + T_reg wd, const int i, + const int num_of_lanes) { + typedef typename std::make_unsigned::type T_uint; + typedef typename std::make_unsigned::type T_smaller_uint; + T_int* wd_p; + T_smaller_int *ws_p, *wt_p; + ws_p = reinterpret_cast(ws); + wt_p = reinterpret_cast(wt); + wd_p = reinterpret_cast(wd); + T_uint* wd_pu; + T_smaller_uint *ws_pu, *wt_pu; + ws_pu = reinterpret_cast(ws); + wt_pu = reinterpret_cast(wt); + wd_pu = reinterpret_cast(wd); + switch (opcode) { + case HADD_S: + wd_p[i] = + static_cast(ws_p[2 * i + 1]) + static_cast(wt_p[2 * i]); + break; + case HADD_U: + wd_pu[i] = static_cast(ws_pu[2 * i + 1]) + + static_cast(wt_pu[2 * i]); + break; + case HSUB_S: + wd_p[i] = + static_cast(ws_p[2 * i + 1]) - static_cast(wt_p[2 * i]); + break; + case HSUB_U: + wd_pu[i] = static_cast(ws_pu[2 * i + 1]) - + static_cast(wt_pu[2 * i]); + break; + default: + UNREACHABLE(); + } +} void Simulator::DecodeTypeMsa3R() { DCHECK_EQ(kArchVariant, kMips64r6); DCHECK(CpuFeatures::IsSupported(MIPS_SIMD)); uint32_t opcode = instr_.InstructionBits() & kMsa3RMask; msa_reg_t ws, wd, wt; - + get_msa_register(ws_reg(), &ws); + get_msa_register(wt_reg(), &wt); + get_msa_register(wd_reg(), &wd); + switch (opcode) { + case HADD_S: + case HADD_U: + case HSUB_S: + case HSUB_U: +#define HORIZONTAL_ARITHMETIC_DF(num_of_lanes, int_type, lesser_int_type) \ + for (int i = 0; i < num_of_lanes; ++i) { \ + Msa3RInstrHelper_horizontal( \ + opcode, &ws, &wt, &wd, i, num_of_lanes); \ + } + switch (DecodeMsaDataFormat()) { + case MSA_HALF: + HORIZONTAL_ARITHMETIC_DF(kMSALanesHalf, int16_t, int8_t); + break; + case MSA_WORD: + HORIZONTAL_ARITHMETIC_DF(kMSALanesWord, int32_t, int16_t); + break; + case MSA_DWORD: + HORIZONTAL_ARITHMETIC_DF(kMSALanesDword, int64_t, int32_t); + break; + default: + UNREACHABLE(); + } + break; +#undef HORIZONTAL_ARITHMETIC_DF + case VSHF: +#define VSHF_DF(num_of_lanes, int_type) \ + for (int i = 0; i < num_of_lanes; ++i) { \ + Msa3RInstrHelper_shuffle(opcode, &ws, &wt, &wd, i, \ + num_of_lanes); \ + } + switch (DecodeMsaDataFormat()) { + case MSA_BYTE: + VSHF_DF(kMSALanesByte, int8_t); + break; + case MSA_HALF: + VSHF_DF(kMSALanesHalf, int16_t); + break; + case MSA_WORD: + VSHF_DF(kMSALanesWord, int32_t); + break; + case MSA_DWORD: + VSHF_DF(kMSALanesDword, int64_t); + break; + default: + UNREACHABLE(); + } +#undef VSHF_DF + break; + case PCKEV: + case PCKOD: + case ILVL: + case ILVR: + case ILVEV: + case ILVOD: +#define INTERLEAVE_PACK_DF(num_of_lanes, int_type) \ + for (int i = 0; i < num_of_lanes / 2; ++i) { \ + Msa3RInstrHelper_shuffle(opcode, &ws, &wt, &wd, i, \ + num_of_lanes); \ + } + switch (DecodeMsaDataFormat()) { + case MSA_BYTE: + INTERLEAVE_PACK_DF(kMSALanesByte, int8_t); + break; + case MSA_HALF: + INTERLEAVE_PACK_DF(kMSALanesHalf, int16_t); + break; + case MSA_WORD: + INTERLEAVE_PACK_DF(kMSALanesWord, int32_t); + break; + case MSA_DWORD: + INTERLEAVE_PACK_DF(kMSALanesDword, int64_t); + break; + default: + UNREACHABLE(); + } + break; +#undef INTERLEAVE_PACK_DF + default: #define MSA_3R_DF(elem, num_of_lanes) \ - get_msa_register(instr_.WdValue(), wd.elem); \ - get_msa_register(instr_.WsValue(), ws.elem); \ - get_msa_register(instr_.WtValue(), wt.elem); \ for (int i = 0; i < num_of_lanes; i++) { \ wd.elem[i] = Msa3RInstrHelper(opcode, wd.elem[i], ws.elem[i], wt.elem[i]); \ - } \ - set_msa_register(instr_.WdValue(), wd.elem); \ - TraceMSARegWr(wd.elem); - - switch (DecodeMsaDataFormat()) { - case MSA_BYTE: - MSA_3R_DF(b, kMSALanesByte); - break; - case MSA_HALF: - MSA_3R_DF(h, kMSALanesHalf); - break; - case MSA_WORD: - MSA_3R_DF(w, kMSALanesWord); - break; - case MSA_DWORD: - MSA_3R_DF(d, kMSALanesDword); - break; - default: - UNREACHABLE(); } + + switch (DecodeMsaDataFormat()) { + case MSA_BYTE: + MSA_3R_DF(b, kMSALanesByte); + break; + case MSA_HALF: + MSA_3R_DF(h, kMSALanesHalf); + break; + case MSA_WORD: + MSA_3R_DF(w, kMSALanesWord); + break; + case MSA_DWORD: + MSA_3R_DF(d, kMSALanesDword); + break; + default: + UNREACHABLE(); + } #undef MSA_3R_DF + break; + } + set_msa_register(wd_reg(), &wd); + TraceMSARegWr(&wd); } template @@ -5546,7 +5744,7 @@ void Msa3RFInstrHelper(uint32_t opcode, T_reg ws, T_reg wt, T_reg& wd) { break; case FDIV: { if (t_element == 0) { - wd = std::numeric_limits::quiet_NaN(); + wd = bit_cast(std::numeric_limits::quiet_NaN()); } else { wd = bit_cast(s_element / t_element); } @@ -5766,6 +5964,7 @@ void Simulator::DecodeTypeMsa3RF() { UNREACHABLE(); } break; +#undef PACK_FLOAT16 #undef FEXDO_DF case FTQ: #define FTQ_DF(source, dst, fp_type, int_type) \ @@ -6119,8 +6318,8 @@ T_int Msa2RFInstrHelper(uint32_t opcode, T_src src, T_dst& dst, const T_int min_int = std::numeric_limits::min(); if (std::isnan(element)) { dst = 0; - } else if (element > max_int || element < min_int) { - dst = element > max_int ? max_int : min_int; + } else if (element >= max_int || element <= min_int) { + dst = element >= max_int ? max_int : min_int; } else { dst = static_cast(std::trunc(element)); } @@ -6131,8 +6330,8 @@ T_int Msa2RFInstrHelper(uint32_t opcode, T_src src, T_dst& dst, const T_uint max_int = std::numeric_limits::max(); if (std::isnan(element)) { dst = 0; - } else if (element > max_int || element < 0) { - dst = element > max_int ? max_int : 0; + } else if (element >= max_int || element <= 0) { + dst = element >= max_int ? max_int : 0; } else { dst = static_cast(std::trunc(element)); } @@ -6241,8 +6440,8 @@ T_int Msa2RFInstrHelper(uint32_t opcode, T_src src, T_dst& dst, return 0; } -template -T_int Msa2RFInstrHelper2(uint32_t opcode, T_reg ws, T_i i) { +template +T_int Msa2RFInstrHelper2(uint32_t opcode, T_reg ws, int i) { switch (opcode) { #define EXTRACT_FLOAT16_SIGN(fp16) (fp16 >> 15) #define EXTRACT_FLOAT16_EXP(fp16) (fp16 >> 10 & 0x1f) @@ -6472,6 +6671,30 @@ void Simulator::DecodeTypeImmediate() { } }; + auto BranchHelper_MSA = [this, &next_pc, imm16, + &execute_branch_delay_instruction](bool do_branch) { + execute_branch_delay_instruction = true; + int64_t current_pc = get_pc(); + const int32_t bitsIn16Int = sizeof(int16_t) * kBitsPerByte; + if (do_branch) { + if (FLAG_debug_code) { + int16_t bits = imm16 & 0xfc; + if (imm16 >= 0) { + CHECK_EQ(bits, 0); + } else { + CHECK_EQ(bits ^ 0xfc, 0); + } + } + // jump range :[pc + kInstrSize - 512 * kInstrSize, + // pc + kInstrSize + 511 * kInstrSize] + int16_t offset = static_cast(imm16 << (bitsIn16Int - 10)) >> + (bitsIn16Int - 12); + next_pc = current_pc + offset + Instruction::kInstrSize; + } else { + next_pc = current_pc + 2 * Instruction::kInstrSize; + } + }; + auto BranchAndLinkCompactHelper = [this, &next_pc](bool do_branch, int bits) { int64_t current_pc = get_pc(); CheckForbiddenSlot(current_pc); @@ -6513,18 +6736,66 @@ void Simulator::DecodeTypeImmediate() { case BC1NEZ: BranchHelper(get_fpu_register(ft_reg) & 0x1); break; - case BZ_V: + case BZ_V: { + msa_reg_t wt; + get_msa_register(wt_reg(), &wt); + BranchHelper_MSA(wt.d[0] == 0 && wt.d[1] == 0); + } break; +#define BZ_DF(witdh, lanes) \ + { \ + msa_reg_t wt; \ + get_msa_register(wt_reg(), &wt); \ + int i; \ + for (i = 0; i < lanes; ++i) { \ + if (wt.witdh[i] == 0) { \ + break; \ + } \ + } \ + BranchHelper_MSA(i != lanes); \ + } case BZ_B: - case BZ_H: - case BZ_W: - case BZ_D: - case BNZ_V: - case BNZ_B: - case BNZ_H: - case BNZ_W: - case BNZ_D: - UNIMPLEMENTED(); + BZ_DF(b, kMSALanesByte) break; + case BZ_H: + BZ_DF(h, kMSALanesHalf) + break; + case BZ_W: + BZ_DF(w, kMSALanesWord) + break; + case BZ_D: + BZ_DF(d, kMSALanesDword) + break; +#undef BZ_DF + case BNZ_V: { + msa_reg_t wt; + get_msa_register(wt_reg(), &wt); + BranchHelper_MSA(wt.d[0] != 0 || wt.d[1] != 0); + } break; +#define BNZ_DF(witdh, lanes) \ + { \ + msa_reg_t wt; \ + get_msa_register(wt_reg(), &wt); \ + int i; \ + for (i = 0; i < lanes; ++i) { \ + if (wt.witdh[i] == 0) { \ + break; \ + } \ + } \ + BranchHelper_MSA(i == lanes); \ + } + case BNZ_B: + BNZ_DF(b, kMSALanesByte) + break; + case BNZ_H: + BNZ_DF(h, kMSALanesHalf) + break; + case BNZ_W: + BNZ_DF(w, kMSALanesWord) + break; + case BNZ_D: + BNZ_DF(d, kMSALanesDword) + break; +#undef BNZ_DF default: UNREACHABLE(); } diff --git a/src/utils.h b/src/utils.h index b585217f7c..e6e98fabba 100644 --- a/src/utils.h +++ b/src/utils.h @@ -288,7 +288,7 @@ T SaturateAdd(T a, T b) { template T SaturateSub(T a, T b) { if (std::is_signed::value) { - if (a > 0 && b < 0) { + if (a >= 0 && b < 0) { if (a > std::numeric_limits::max() + b) { return std::numeric_limits::max(); } diff --git a/test/cctest/test-assembler-mips.cc b/test/cctest/test-assembler-mips.cc index de149a2165..79a80c3a43 100644 --- a/test/cctest/test-assembler-mips.cc +++ b/test/cctest/test-assembler-mips.cc @@ -4968,6 +4968,189 @@ TEST(r6_beqzc) { } } +void load_elements_of_vector(MacroAssembler& assm, const uint64_t elements[], + MSARegister w, Register t0, Register t1) { + __ li(t0, static_cast(elements[0] & 0xffffffff)); + __ li(t1, static_cast((elements[0] >> 32) & 0xffffffff)); + __ insert_w(w, 0, t0); + __ insert_w(w, 1, t1); + __ li(t0, static_cast(elements[1] & 0xffffffff)); + __ li(t1, static_cast((elements[1] >> 32) & 0xffffffff)); + __ insert_w(w, 2, t0); + __ insert_w(w, 3, t1); +} + +inline void store_elements_of_vector(MacroAssembler& assm, MSARegister w, + Register a) { + __ st_d(w, MemOperand(a, 0)); +} + +typedef union { + uint8_t b[16]; + uint16_t h[8]; + uint32_t w[4]; + uint64_t d[2]; +} msa_reg_t; + +struct TestCaseMsaBranch { + uint64_t wt_lo; + uint64_t wt_hi; +}; + +template +void run_bz_bnz(TestCaseMsaBranch* input, Branch GenerateBranch, + bool branched) { + Isolate* isolate = CcTest::i_isolate(); + HandleScope scope(isolate); + + MacroAssembler assm(isolate, NULL, 0, v8::internal::CodeObjectRequired::kYes); + CpuFeatureScope fscope(&assm, MIPS_SIMD); + + typedef struct { + uint64_t ws_lo; + uint64_t ws_hi; + uint64_t wd_lo; + uint64_t wd_hi; + } T; + T t = {0x20b9cc4f1a83e0c5, 0xa27e1b5f2f5bb18a, 0x0000000000000000, + 0x0000000000000000}; + msa_reg_t res; + Label do_not_move_w0_to_w2; + + load_elements_of_vector(assm, &t.ws_lo, w0, t0, t1); + load_elements_of_vector(assm, &t.wd_lo, w2, t0, t1); + load_elements_of_vector(assm, &input->wt_lo, w1, t0, t1); + GenerateBranch(assm, do_not_move_w0_to_w2); + __ nop(); + __ move_v(w2, w0); + + __ bind(&do_not_move_w0_to_w2); + store_elements_of_vector(assm, w2, a0); + __ jr(ra); + __ nop(); + + CodeDesc desc; + assm.GetCode(isolate, &desc); + Handle code = + isolate->factory()->NewCode(desc, Code::STUB, Handle()); +#ifdef OBJECT_PRINT + code->Print(std::cout); +#endif + F3 f = FUNCTION_CAST(code->entry()); + + (CALL_GENERATED_CODE(isolate, f, &res, 0, 0, 0, 0)); + if (branched) { + CHECK_EQ(t.wd_lo, res.d[0]); + CHECK_EQ(t.wd_hi, res.d[1]); + } else { + CHECK_EQ(t.ws_lo, res.d[0]); + CHECK_EQ(t.ws_hi, res.d[1]); + } +} + +TEST(MSA_bz_bnz) { + if (!IsMipsArchVariant(kMips32r6) || !CpuFeatures::IsSupported(MIPS_SIMD)) + return; + + TestCaseMsaBranch tz_v[] = { + {0x0, 0x0}, {0xabc, 0x0}, {0x0, 0xabc}, {0xabc, 0xabc}}; + for (unsigned i = 0; i < arraysize(tz_v); ++i) { + run_bz_bnz( + &tz_v[i], + [](MacroAssembler& assm, Label& br_target) { __ bz_v(w1, &br_target); }, + tz_v[i].wt_lo == 0 && tz_v[i].wt_hi == 0); + } + +#define TEST_BZ_DF(input_array, lanes, instruction, int_type) \ + for (unsigned i = 0; i < arraysize(input_array); ++i) { \ + int j; \ + int_type* element = reinterpret_cast(&input_array[i]); \ + for (j = 0; j < lanes; ++j) { \ + if (element[j] == 0) { \ + break; \ + } \ + } \ + run_bz_bnz(&input_array[i], \ + [](MacroAssembler& assm, Label& br_target) { \ + __ instruction(w1, &br_target); \ + }, \ + j != lanes); \ + } + TestCaseMsaBranch tz_b[] = {{0x0, 0x0}, + {0xbc0000, 0x0}, + {0x0, 0xab000000000000cd}, + {0x123456789abcdef0, 0xaaaaaaaaaaaaaaaa}}; + TEST_BZ_DF(tz_b, kMSALanesByte, bz_b, int8_t) + + TestCaseMsaBranch tz_h[] = {{0x0, 0x0}, + {0xbcde0000, 0x0}, + {0x0, 0xabcd00000000abcd}, + {0x123456789abcdef0, 0xaaaaaaaaaaaaaaaa}}; + TEST_BZ_DF(tz_h, kMSALanesHalf, bz_h, int16_t) + + TestCaseMsaBranch tz_w[] = {{0x0, 0x0}, + {0xbcde123400000000, 0x0}, + {0x0, 0x000000001234abcd}, + {0x123456789abcdef0, 0xaaaaaaaaaaaaaaaa}}; + TEST_BZ_DF(tz_w, kMSALanesWord, bz_w, int32_t) + + TestCaseMsaBranch tz_d[] = {{0x0, 0x0}, + {0xbcde0000, 0x0}, + {0x0, 0xabcd00000000abcd}, + {0x123456789abcdef0, 0xaaaaaaaaaaaaaaaa}}; + TEST_BZ_DF(tz_d, kMSALanesDword, bz_d, int64_t) +#undef TEST_BZ_DF + + TestCaseMsaBranch tnz_v[] = { + {0x0, 0x0}, {0xabc, 0x0}, {0x0, 0xabc}, {0xabc, 0xabc}}; + for (unsigned i = 0; i < arraysize(tnz_v); ++i) { + run_bz_bnz(&tnz_v[i], + [](MacroAssembler& assm, Label& br_target) { + __ bnz_v(w1, &br_target); + }, + tnz_v[i].wt_lo != 0 || tnz_v[i].wt_hi != 0); + } + +#define TEST_BNZ_DF(input_array, lanes, instruction, int_type) \ + for (unsigned i = 0; i < arraysize(input_array); ++i) { \ + int j; \ + int_type* element = reinterpret_cast(&input_array[i]); \ + for (j = 0; j < lanes; ++j) { \ + if (element[j] == 0) { \ + break; \ + } \ + } \ + run_bz_bnz(&input_array[i], \ + [](MacroAssembler& assm, Label& br_target) { \ + __ instruction(w1, &br_target); \ + }, \ + j == lanes); \ + } + TestCaseMsaBranch tnz_b[] = {{0x0, 0x0}, + {0xbc0000, 0x0}, + {0x0, 0xab000000000000cd}, + {0x123456789abcdef0, 0xaaaaaaaaaaaaaaaa}}; + TEST_BNZ_DF(tnz_b, 16, bnz_b, int8_t) + + TestCaseMsaBranch tnz_h[] = {{0x0, 0x0}, + {0xbcde0000, 0x0}, + {0x0, 0xabcd00000000abcd}, + {0x123456789abcdef0, 0xaaaaaaaaaaaaaaaa}}; + TEST_BNZ_DF(tnz_h, 8, bnz_h, int16_t) + + TestCaseMsaBranch tnz_w[] = {{0x0, 0x0}, + {0xbcde123400000000, 0x0}, + {0x0, 0x000000001234abcd}, + {0x123456789abcdef0, 0xaaaaaaaaaaaaaaaa}}; + TEST_BNZ_DF(tnz_w, 4, bnz_w, int32_t) + + TestCaseMsaBranch tnz_d[] = {{0x0, 0x0}, + {0xbcde0000, 0x0}, + {0x0, 0xabcd00000000abcd}, + {0x123456789abcdef0, 0xaaaaaaaaaaaaaaaa}}; + TEST_BNZ_DF(tnz_d, 2, bnz_d, int64_t) +#undef TEST_BNZ_DF +} uint32_t run_jialc(int16_t offset) { Isolate* isolate = CcTest::i_isolate(); @@ -5626,70 +5809,6 @@ TEST(Subu) { } } -void load_uint64_elements_of_vector(MacroAssembler& assm, - const uint64_t elements[], MSARegister w, - Register t0, Register t1) { - __ li(t0, static_cast(elements[0] & 0xffffffff)); - __ li(t1, static_cast((elements[0] >> 32) & 0xffffffff)); - __ insert_w(w, 0, t0); - __ insert_w(w, 1, t1); - __ li(t0, static_cast(elements[1] & 0xffffffff)); - __ li(t1, static_cast((elements[1] >> 32) & 0xffffffff)); - __ insert_w(w, 2, t0); - __ insert_w(w, 3, t1); -} - -void load_uint32_elements_of_vector(MacroAssembler& assm, - const uint64_t elements[], MSARegister w, - Register t0, Register t1) { - const uint32_t* const element = reinterpret_cast(elements); - __ li(t0, element[0]); - __ li(t1, element[1]); - __ insert_w(w, 0, t0); - __ insert_w(w, 1, t1); - __ li(t0, element[2]); - __ li(t1, element[3]); - __ insert_w(w, 2, t0); - __ insert_w(w, 3, t1); -} - -void load_uint16_elements_of_vector(MacroAssembler& assm, - const uint64_t elements[], MSARegister w, - Register t0, Register t1) { - const uint16_t* const element = reinterpret_cast(elements); - __ li(t0, element[0]); - __ li(t1, element[1]); - __ insert_h(w, 0, t0); - __ insert_h(w, 1, t1); - __ li(t0, element[2]); - __ li(t1, element[3]); - __ insert_h(w, 2, t0); - __ insert_h(w, 3, t1); - __ li(t0, element[4]); - __ li(t1, element[5]); - __ insert_h(w, 4, t0); - __ insert_h(w, 5, t1); - __ li(t0, element[6]); - __ li(t1, element[7]); - __ insert_h(w, 6, t0); - __ insert_h(w, 7, t1); -} - -inline void store_uint64_elements_of_vector(MacroAssembler& assm, MSARegister w, - Register a) { - __ st_d(w, MemOperand(a, 0)); -} - -inline void store_uint32_elements_of_vector(MacroAssembler& assm, MSARegister w, - Register a) { - __ st_w(w, MemOperand(a, 0)); -} - -void store_uint16_elements_of_vector(MacroAssembler& assm, MSARegister w, - Register a) { - __ st_h(w, MemOperand(a, 0)); -} - TEST(MSA_fill_copy) { CcTest::InitializeVM(); Isolate* isolate = CcTest::i_isolate(); @@ -5881,13 +6000,6 @@ TEST(MSA_fill_copy_3) { CHECK_EQ(0x5555555555555555, t[1].d0); } -typedef union { - uint8_t b[16]; - uint16_t h[8]; - uint32_t w[4]; - uint64_t d[2]; -} msa_reg_t; - template void run_msa_insert(int32_t rs_value, int n, msa_reg_t* w) { Isolate* isolate = CcTest::i_isolate(); @@ -5914,7 +6026,7 @@ void run_msa_insert(int32_t rs_value, int n, msa_reg_t* w) { UNREACHABLE(); } - store_uint64_elements_of_vector(assm, w0, a0); + store_elements_of_vector(assm, w0, a0); __ jr(ra); __ nop(); @@ -5987,6 +6099,152 @@ TEST(MSA_insert) { } } +TEST(MSA_move_v) { + if (!IsMipsArchVariant(kMips32r6) || !CpuFeatures::IsSupported(MIPS_SIMD)) + return; + CcTest::InitializeVM(); + Isolate* isolate = CcTest::i_isolate(); + HandleScope scope(isolate); + + typedef struct { + uint64_t ws_lo; + uint64_t ws_hi; + uint64_t wd_lo; + uint64_t wd_hi; + } T; + T t[] = {{0x20b9cc4f1a83e0c5, 0xa27e1b5f2f5bb18a, 0x1e86678b52f8e1ff, + 0x706e51290ac76fb9}, + {0x4414aed7883ffd18, 0x047d183a06b67016, 0x4ef258cf8d822870, + 0x2686b73484c2e843}, + {0xd38ff9d048884ffc, 0x6dc63a57c0943ca7, 0x8520ca2f3e97c426, + 0xa9913868fb819c59}}; + + for (unsigned i = 0; i < arraysize(t); ++i) { + MacroAssembler assm(isolate, nullptr, 0, + v8::internal::CodeObjectRequired::kYes); + CpuFeatureScope fscope(&assm, MIPS_SIMD); + + load_elements_of_vector(assm, &t[i].ws_lo, w0, t0, t1); + load_elements_of_vector(assm, &t[i].wd_lo, w2, t0, t1); + __ move_v(w2, w0); + store_elements_of_vector(assm, w2, a0); + + __ jr(ra); + __ nop(); + + CodeDesc desc; + assm.GetCode(isolate, &desc); + Handle code = + isolate->factory()->NewCode(desc, Code::STUB, Handle()); +#ifdef OBJECT_PRINT + code->Print(std::cout); +#endif + F3 f = FUNCTION_CAST(code->entry()); + (CALL_GENERATED_CODE(isolate, f, &t[i].wd_lo, 0, 0, 0, 0)); + CHECK_EQ(t[i].ws_lo, t[i].wd_lo); + CHECK_EQ(t[i].ws_hi, t[i].wd_hi); + } +} + +template +void run_msa_sldi(OperFunc GenerateOperation, + ExpectFunc GenerateExpectedResult) { + Isolate* isolate = CcTest::i_isolate(); + HandleScope scope(isolate); + + typedef struct { + uint64_t ws_lo; + uint64_t ws_hi; + uint64_t wd_lo; + uint64_t wd_hi; + } T; + T t[] = {{0x20b9cc4f1a83e0c5, 0xa27e1b5f2f5bb18a, 0x1e86678b52f8e1ff, + 0x706e51290ac76fb9}, + {0x4414aed7883ffd18, 0x047d183a06b67016, 0x4ef258cf8d822870, + 0x2686b73484c2e843}, + {0xd38ff9d048884ffc, 0x6dc63a57c0943ca7, 0x8520ca2f3e97c426, + 0xa9913868fb819c59}}; + uint64_t res[2]; + + for (unsigned i = 0; i < arraysize(t); ++i) { + MacroAssembler assm(isolate, nullptr, 0, + v8::internal::CodeObjectRequired::kYes); + CpuFeatureScope fscope(&assm, MIPS_SIMD); + load_elements_of_vector(assm, &t[i].ws_lo, w0, t0, t1); + load_elements_of_vector(assm, &t[i].wd_lo, w2, t0, t1); + GenerateOperation(assm); + store_elements_of_vector(assm, w2, a0); + + __ jr(ra); + __ nop(); + + CodeDesc desc; + assm.GetCode(isolate, &desc); + Handle code = + isolate->factory()->NewCode(desc, Code::STUB, Handle()); +#ifdef OBJECT_PRINT + code->Print(std::cout); +#endif + F3 f = FUNCTION_CAST(code->entry()); + (CALL_GENERATED_CODE(isolate, f, &res[0], 0, 0, 0, 0)); + GenerateExpectedResult(reinterpret_cast(&t[i].ws_lo), + reinterpret_cast(&t[i].wd_lo)); + CHECK_EQ(res[0], t[i].wd_lo); + CHECK_EQ(res[1], t[i].wd_hi); + } +} + +TEST(MSA_sldi) { + if (!IsMipsArchVariant(kMips32r6) || !CpuFeatures::IsSupported(MIPS_SIMD)) + return; + CcTest::InitializeVM(); + +#define SLDI_DF(s, k) \ + uint8_t v[32]; \ + for (unsigned i = 0; i < s; i++) { \ + v[i] = ws[s * k + i]; \ + v[i + s] = wd[s * k + i]; \ + } \ + for (unsigned i = 0; i < s; i++) { \ + wd[s * k + i] = v[i + n]; \ + } + + for (int n = 0; n < 16; ++n) { + run_msa_sldi([n](MacroAssembler& assm) { __ sldi_b(w2, w0, n); }, + [n](uint8_t* ws, uint8_t* wd) { + SLDI_DF(kMSARegSize / sizeof(int8_t) / kBitsPerByte, 0) + }); + } + + for (int n = 0; n < 8; ++n) { + run_msa_sldi([n](MacroAssembler& assm) { __ sldi_h(w2, w0, n); }, + [n](uint8_t* ws, uint8_t* wd) { + for (int k = 0; k < 2; ++k) { + SLDI_DF(kMSARegSize / sizeof(int16_t) / kBitsPerByte, k) + } + }); + } + + for (int n = 0; n < 4; ++n) { + run_msa_sldi([n](MacroAssembler& assm) { __ sldi_w(w2, w0, n); }, + [n](uint8_t* ws, uint8_t* wd) { + for (int k = 0; k < 4; ++k) { + SLDI_DF(kMSARegSize / sizeof(int32_t) / kBitsPerByte, k) + } + }); + } + + for (int n = 0; n < 2; ++n) { + run_msa_sldi([n](MacroAssembler& assm) { __ sldi_d(w2, w0, n); }, + [n](uint8_t* ws, uint8_t* wd) { + for (int k = 0; k < 8; ++k) { + SLDI_DF(kMSARegSize / sizeof(int64_t) / kBitsPerByte, k) + } + }); + } +#undef SLDI_DF +} + void run_msa_ctc_cfc(uint32_t value) { Isolate* isolate = CcTest::i_isolate(); HandleScope scope(isolate); @@ -6110,7 +6368,7 @@ void run_msa_i8(SecondaryField opcode, uint64_t ws_lo, uint64_t ws_hi, UNREACHABLE(); } - store_uint64_elements_of_vector(assm, w2, a0); + store_elements_of_vector(assm, w2, a0); __ jr(ra); __ nop(); @@ -6403,11 +6661,11 @@ void run_msa_i5(struct TestCaseMsaI5* input, bool i5_sign_ext, int32_t i5 = i5_sign_ext ? static_cast(input->i5 << 27) >> 27 : input->i5; - load_uint64_elements_of_vector(assm, &(input->ws_lo), w0, t0, t1); + load_elements_of_vector(assm, &(input->ws_lo), w0, t0, t1); GenerateI5InstructionFunc(assm, i5); - store_uint64_elements_of_vector(assm, w2, a0); + store_elements_of_vector(assm, w2, a0); __ jr(ra); __ nop(); @@ -6814,11 +7072,9 @@ struct TestCaseMsa2R { uint64_t exp_res_hi; }; -template +template void run_msa_2r(const struct TestCaseMsa2R* input, - Func Generate2RInstructionFunc, - FuncLoad load_elements_of_vector, - FuncStore store_elements_of_vector) { + Func Generate2RInstructionFunc) { Isolate* isolate = CcTest::i_isolate(); HandleScope scope(isolate); @@ -6846,17 +7102,8 @@ void run_msa_2r(const struct TestCaseMsa2R* input, (CALL_GENERATED_CODE(isolate, f, &res, 0, 0, 0, 0)); - if (store_elements_of_vector == store_uint64_elements_of_vector) { - CHECK_EQ(input->exp_res_lo, res.d[0]); - CHECK_EQ(input->exp_res_hi, res.d[1]); - } else if (store_elements_of_vector == store_uint32_elements_of_vector) { - const uint32_t* exp_res = - reinterpret_cast(&input->exp_res_lo); - CHECK_EQ(exp_res[0], res.w[0]); - CHECK_EQ(exp_res[1], res.w[1]); - CHECK_EQ(exp_res[2], res.w[2]); - CHECK_EQ(exp_res[3], res.w[3]); - } + CHECK_EQ(input->exp_res_lo, res.d[0]); + CHECK_EQ(input->exp_res_hi, res.d[1]); } TEST(MSA_pcnt) { @@ -6907,14 +7154,10 @@ TEST(MSA_pcnt) { {0xf35862e13e38f8b0, 0x4f41ffdef2bfe636, 0x20, 0x2a}}; for (size_t i = 0; i < sizeof(tc_b) / sizeof(TestCaseMsa2R); ++i) { - run_msa_2r(&tc_b[i], [](MacroAssembler& assm) { __ pcnt_b(w2, w0); }, - load_uint64_elements_of_vector, store_uint64_elements_of_vector); - run_msa_2r(&tc_h[i], [](MacroAssembler& assm) { __ pcnt_h(w2, w0); }, - load_uint64_elements_of_vector, store_uint64_elements_of_vector); - run_msa_2r(&tc_w[i], [](MacroAssembler& assm) { __ pcnt_w(w2, w0); }, - load_uint64_elements_of_vector, store_uint64_elements_of_vector); - run_msa_2r(&tc_d[i], [](MacroAssembler& assm) { __ pcnt_d(w2, w0); }, - load_uint64_elements_of_vector, store_uint64_elements_of_vector); + run_msa_2r(&tc_b[i], [](MacroAssembler& assm) { __ pcnt_b(w2, w0); }); + run_msa_2r(&tc_h[i], [](MacroAssembler& assm) { __ pcnt_h(w2, w0); }); + run_msa_2r(&tc_w[i], [](MacroAssembler& assm) { __ pcnt_w(w2, w0); }); + run_msa_2r(&tc_d[i], [](MacroAssembler& assm) { __ pcnt_d(w2, w0); }); } } @@ -6966,14 +7209,10 @@ TEST(MSA_nlzc) { {0x00000000e338f8b0, 0x0754534acab32654, 0x20, 0x5}}; for (size_t i = 0; i < sizeof(tc_b) / sizeof(TestCaseMsa2R); ++i) { - run_msa_2r(&tc_b[i], [](MacroAssembler& assm) { __ nlzc_b(w2, w0); }, - load_uint64_elements_of_vector, store_uint64_elements_of_vector); - run_msa_2r(&tc_h[i], [](MacroAssembler& assm) { __ nlzc_h(w2, w0); }, - load_uint64_elements_of_vector, store_uint64_elements_of_vector); - run_msa_2r(&tc_w[i], [](MacroAssembler& assm) { __ nlzc_w(w2, w0); }, - load_uint64_elements_of_vector, store_uint64_elements_of_vector); - run_msa_2r(&tc_d[i], [](MacroAssembler& assm) { __ nlzc_d(w2, w0); }, - load_uint64_elements_of_vector, store_uint64_elements_of_vector); + run_msa_2r(&tc_b[i], [](MacroAssembler& assm) { __ nlzc_b(w2, w0); }); + run_msa_2r(&tc_h[i], [](MacroAssembler& assm) { __ nlzc_h(w2, w0); }); + run_msa_2r(&tc_w[i], [](MacroAssembler& assm) { __ nlzc_w(w2, w0); }); + run_msa_2r(&tc_d[i], [](MacroAssembler& assm) { __ nlzc_d(w2, w0); }); } } @@ -7025,14 +7264,10 @@ TEST(MSA_nloc) { {0xFFFFFFFF1CC7074F, 0xF8ABACB5354CD9AB, 0x20, 0x5}}; for (size_t i = 0; i < sizeof(tc_b) / sizeof(TestCaseMsa2R); ++i) { - run_msa_2r(&tc_b[i], [](MacroAssembler& assm) { __ nloc_b(w2, w0); }, - load_uint64_elements_of_vector, store_uint64_elements_of_vector); - run_msa_2r(&tc_h[i], [](MacroAssembler& assm) { __ nloc_h(w2, w0); }, - load_uint64_elements_of_vector, store_uint64_elements_of_vector); - run_msa_2r(&tc_w[i], [](MacroAssembler& assm) { __ nloc_w(w2, w0); }, - load_uint64_elements_of_vector, store_uint64_elements_of_vector); - run_msa_2r(&tc_d[i], [](MacroAssembler& assm) { __ nloc_d(w2, w0); }, - load_uint64_elements_of_vector, store_uint64_elements_of_vector); + run_msa_2r(&tc_b[i], [](MacroAssembler& assm) { __ nloc_b(w2, w0); }); + run_msa_2r(&tc_h[i], [](MacroAssembler& assm) { __ nloc_h(w2, w0); }); + run_msa_2r(&tc_w[i], [](MacroAssembler& assm) { __ nloc_w(w2, w0); }); + run_msa_2r(&tc_d[i], [](MacroAssembler& assm) { __ nloc_d(w2, w0); }); } } @@ -7093,13 +7328,11 @@ TEST(MSA_fclass) { for (size_t i = 0; i < sizeof(tc_s) / sizeof(TestCaseMsa2RF_F_U); ++i) { run_msa_2r(reinterpret_cast(&tc_s[i]), - [](MacroAssembler& assm) { __ fclass_w(w2, w0); }, - load_uint32_elements_of_vector, store_uint32_elements_of_vector); + [](MacroAssembler& assm) { __ fclass_w(w2, w0); }); } for (size_t i = 0; i < sizeof(tc_d) / sizeof(TestCaseMsa2RF_D_U); ++i) { run_msa_2r(reinterpret_cast(&tc_d[i]), - [](MacroAssembler& assm) { __ fclass_d(w2, w0); }, - load_uint64_elements_of_vector, store_uint64_elements_of_vector); + [](MacroAssembler& assm) { __ fclass_d(w2, w0); }); } #undef BIT @@ -7165,13 +7398,11 @@ TEST(MSA_ftrunc_s) { for (size_t i = 0; i < sizeof(tc_s) / sizeof(TestCaseMsa2RF_F_I); ++i) { run_msa_2r(reinterpret_cast(&tc_s[i]), - [](MacroAssembler& assm) { __ ftrunc_s_w(w2, w0); }, - load_uint32_elements_of_vector, store_uint32_elements_of_vector); + [](MacroAssembler& assm) { __ ftrunc_s_w(w2, w0); }); } for (size_t i = 0; i < sizeof(tc_d) / sizeof(TestCaseMsa2RF_D_I); ++i) { run_msa_2r(reinterpret_cast(&tc_d[i]), - [](MacroAssembler& assm) { __ ftrunc_s_d(w2, w0); }, - load_uint64_elements_of_vector, store_uint64_elements_of_vector); + [](MacroAssembler& assm) { __ ftrunc_s_d(w2, w0); }); } } @@ -7204,13 +7435,11 @@ TEST(MSA_ftrunc_u) { for (size_t i = 0; i < sizeof(tc_s) / sizeof(TestCaseMsa2RF_F_U); ++i) { run_msa_2r(reinterpret_cast(&tc_s[i]), - [](MacroAssembler& assm) { __ ftrunc_u_w(w2, w0); }, - load_uint32_elements_of_vector, store_uint32_elements_of_vector); + [](MacroAssembler& assm) { __ ftrunc_u_w(w2, w0); }); } for (size_t i = 0; i < sizeof(tc_d) / sizeof(TestCaseMsa2RF_D_U); ++i) { run_msa_2r(reinterpret_cast(&tc_d[i]), - [](MacroAssembler& assm) { __ ftrunc_u_d(w2, w0); }, - load_uint64_elements_of_vector, store_uint64_elements_of_vector); + [](MacroAssembler& assm) { __ ftrunc_u_d(w2, w0); }); } } @@ -7249,13 +7478,11 @@ TEST(MSA_fsqrt) { for (size_t i = 0; i < sizeof(tc_s) / sizeof(TestCaseMsa2RF_F_F); ++i) { run_msa_2r(reinterpret_cast(&tc_s[i]), - [](MacroAssembler& assm) { __ fsqrt_w(w2, w0); }, - load_uint32_elements_of_vector, store_uint32_elements_of_vector); + [](MacroAssembler& assm) { __ fsqrt_w(w2, w0); }); } for (size_t i = 0; i < sizeof(tc_d) / sizeof(TestCaseMsa2RF_D_D); ++i) { run_msa_2r(reinterpret_cast(&tc_d[i]), - [](MacroAssembler& assm) { __ fsqrt_d(w2, w0); }, - load_uint64_elements_of_vector, store_uint64_elements_of_vector); + [](MacroAssembler& assm) { __ fsqrt_d(w2, w0); }); } } @@ -7279,13 +7506,11 @@ TEST(MSA_frsqrt) { for (size_t i = 0; i < sizeof(tc_s) / sizeof(TestCaseMsa2RF_F_F); ++i) { run_msa_2r(reinterpret_cast(&tc_s[i]), - [](MacroAssembler& assm) { __ frsqrt_w(w2, w0); }, - load_uint32_elements_of_vector, store_uint32_elements_of_vector); + [](MacroAssembler& assm) { __ frsqrt_w(w2, w0); }); } for (size_t i = 0; i < sizeof(tc_d) / sizeof(TestCaseMsa2RF_D_D); ++i) { run_msa_2r(reinterpret_cast(&tc_d[i]), - [](MacroAssembler& assm) { __ frsqrt_d(w2, w0); }, - load_uint64_elements_of_vector, store_uint64_elements_of_vector); + [](MacroAssembler& assm) { __ frsqrt_d(w2, w0); }); } } @@ -7311,13 +7536,11 @@ TEST(MSA_frcp) { for (size_t i = 0; i < sizeof(tc_s) / sizeof(TestCaseMsa2RF_F_F); ++i) { run_msa_2r(reinterpret_cast(&tc_s[i]), - [](MacroAssembler& assm) { __ frcp_w(w2, w0); }, - load_uint32_elements_of_vector, store_uint32_elements_of_vector); + [](MacroAssembler& assm) { __ frcp_w(w2, w0); }); } for (size_t i = 0; i < sizeof(tc_d) / sizeof(TestCaseMsa2RF_D_D); ++i) { run_msa_2r(reinterpret_cast(&tc_d[i]), - [](MacroAssembler& assm) { __ frcp_d(w2, w0); }, - load_uint64_elements_of_vector, store_uint64_elements_of_vector); + [](MacroAssembler& assm) { __ frcp_d(w2, w0); }); } } @@ -7332,8 +7555,7 @@ void test_frint_s(size_t data_size, TestCaseMsa2RF_F_F tc_d[], __ ctcmsa(msareg, t0); __ frint_w(w2, w0); __ ctcmsa(msareg, t1); - }, - load_uint32_elements_of_vector, store_uint32_elements_of_vector); + }); } } @@ -7348,8 +7570,7 @@ void test_frint_d(size_t data_size, TestCaseMsa2RF_D_D tc_d[], __ ctcmsa(msareg, t0); __ frint_d(w2, w0); __ ctcmsa(msareg, t1); - }, - load_uint64_elements_of_vector, store_uint64_elements_of_vector); + }); } } @@ -7431,14 +7652,12 @@ TEST(MSA_flog2) { for (size_t i = 0; i < sizeof(tc_s) / sizeof(TestCaseMsa2RF_F_F); ++i) { run_msa_2r(reinterpret_cast(&tc_s[i]), - [](MacroAssembler& assm) { __ flog2_w(w2, w0); }, - load_uint32_elements_of_vector, store_uint32_elements_of_vector); + [](MacroAssembler& assm) { __ flog2_w(w2, w0); }); } for (size_t i = 0; i < sizeof(tc_d) / sizeof(TestCaseMsa2RF_D_D); ++i) { run_msa_2r(reinterpret_cast(&tc_d[i]), - [](MacroAssembler& assm) { __ flog2_d(w2, w0); }, - load_uint64_elements_of_vector, store_uint64_elements_of_vector); + [](MacroAssembler& assm) { __ flog2_d(w2, w0); }); } } @@ -7453,8 +7672,7 @@ void test_ftint_s_s(size_t data_size, TestCaseMsa2RF_F_I tc_d[], __ ctcmsa(msareg, t0); __ ftint_s_w(w2, w0); __ ctcmsa(msareg, t1); - }, - load_uint32_elements_of_vector, store_uint32_elements_of_vector); + }); } } @@ -7469,8 +7687,7 @@ void test_ftint_s_d(size_t data_size, TestCaseMsa2RF_D_I tc_d[], __ ctcmsa(msareg, t0); __ ftint_s_d(w2, w0); __ ctcmsa(msareg, t1); - }, - load_uint64_elements_of_vector, store_uint64_elements_of_vector); + }); } } @@ -7567,8 +7784,7 @@ void test_ftint_u_s(size_t data_size, TestCaseMsa2RF_F_U tc_d[], __ ctcmsa(msareg, t0); __ ftint_u_w(w2, w0); __ ctcmsa(msareg, t1); - }, - load_uint32_elements_of_vector, store_uint32_elements_of_vector); + }); } } @@ -7583,8 +7799,7 @@ void test_ftint_u_d(size_t data_size, TestCaseMsa2RF_D_U tc_d[], __ ctcmsa(msareg, t0); __ ftint_u_d(w2, w0); __ ctcmsa(msareg, t1); - }, - load_uint64_elements_of_vector, store_uint64_elements_of_vector); + }); } } @@ -7700,13 +7915,11 @@ TEST(MSA_ffint_u) { for (size_t i = 0; i < sizeof(tc_s) / sizeof(TestCaseMsa2RF_U_F); ++i) { run_msa_2r(reinterpret_cast(&tc_s[i]), - [](MacroAssembler& assm) { __ ffint_u_w(w2, w0); }, - load_uint32_elements_of_vector, store_uint32_elements_of_vector); + [](MacroAssembler& assm) { __ ffint_u_w(w2, w0); }); } for (size_t i = 0; i < sizeof(tc_d) / sizeof(TestCaseMsa2RF_U_D); ++i) { run_msa_2r(reinterpret_cast(&tc_d[i]), - [](MacroAssembler& assm) { __ ffint_u_d(w2, w0); }, - load_uint64_elements_of_vector, store_uint64_elements_of_vector); + [](MacroAssembler& assm) { __ ffint_u_d(w2, w0); }); } } @@ -7742,13 +7955,11 @@ TEST(MSA_ffint_s) { for (size_t i = 0; i < sizeof(tc_s) / sizeof(TestCaseMsa2RF_I_F); ++i) { run_msa_2r(reinterpret_cast(&tc_s[i]), - [](MacroAssembler& assm) { __ ffint_s_w(w2, w0); }, - load_uint32_elements_of_vector, store_uint32_elements_of_vector); + [](MacroAssembler& assm) { __ ffint_s_w(w2, w0); }); } for (size_t i = 0; i < sizeof(tc_d) / sizeof(TestCaseMsa2RF_I_D); ++i) { run_msa_2r(reinterpret_cast(&tc_d[i]), - [](MacroAssembler& assm) { __ ffint_s_d(w2, w0); }, - load_uint64_elements_of_vector, store_uint64_elements_of_vector); + [](MacroAssembler& assm) { __ ffint_s_d(w2, w0); }); } } @@ -7801,13 +8012,11 @@ TEST(MSA_fexupl) { for (size_t i = 0; i < sizeof(tc_s) / sizeof(TestCaseMsa2RF_U16_F); ++i) { run_msa_2r(reinterpret_cast(&tc_s[i]), - [](MacroAssembler& assm) { __ fexupl_w(w2, w0); }, - load_uint16_elements_of_vector, store_uint32_elements_of_vector); + [](MacroAssembler& assm) { __ fexupl_w(w2, w0); }); } for (size_t i = 0; i < sizeof(tc_d) / sizeof(TestCaseMsa2RF_F_D); ++i) { run_msa_2r(reinterpret_cast(&tc_d[i]), - [](MacroAssembler& assm) { __ fexupl_d(w2, w0); }, - load_uint32_elements_of_vector, store_uint64_elements_of_vector); + [](MacroAssembler& assm) { __ fexupl_d(w2, w0); }); } } @@ -7836,13 +8045,11 @@ TEST(MSA_fexupr) { for (size_t i = 0; i < sizeof(tc_s) / sizeof(TestCaseMsa2RF_U16_F); ++i) { run_msa_2r(reinterpret_cast(&tc_s[i]), - [](MacroAssembler& assm) { __ fexupr_w(w2, w0); }, - load_uint16_elements_of_vector, store_uint32_elements_of_vector); + [](MacroAssembler& assm) { __ fexupr_w(w2, w0); }); } for (size_t i = 0; i < sizeof(tc_d) / sizeof(TestCaseMsa2RF_F_D); ++i) { run_msa_2r(reinterpret_cast(&tc_d[i]), - [](MacroAssembler& assm) { __ fexupr_d(w2, w0); }, - load_uint32_elements_of_vector, store_uint64_elements_of_vector); + [](MacroAssembler& assm) { __ fexupr_d(w2, w0); }); } } @@ -7871,13 +8078,11 @@ TEST(MSA_ffql) { for (size_t i = 0; i < sizeof(tc_s) / sizeof(TestCaseMsa2RF_U16_F); ++i) { run_msa_2r(reinterpret_cast(&tc_s[i]), - [](MacroAssembler& assm) { __ ffql_w(w2, w0); }, - load_uint16_elements_of_vector, store_uint32_elements_of_vector); + [](MacroAssembler& assm) { __ ffql_w(w2, w0); }); } for (size_t i = 0; i < sizeof(tc_d) / sizeof(TestCaseMsa2RF_U32_D); ++i) { run_msa_2r(reinterpret_cast(&tc_d[i]), - [](MacroAssembler& assm) { __ ffql_d(w2, w0); }, - load_uint32_elements_of_vector, store_uint64_elements_of_vector); + [](MacroAssembler& assm) { __ ffql_d(w2, w0); }); } } @@ -7897,13 +8102,11 @@ TEST(MSA_ffqr) { for (size_t i = 0; i < sizeof(tc_s) / sizeof(TestCaseMsa2RF_U16_F); ++i) { run_msa_2r(reinterpret_cast(&tc_s[i]), - [](MacroAssembler& assm) { __ ffqr_w(w2, w0); }, - load_uint16_elements_of_vector, store_uint32_elements_of_vector); + [](MacroAssembler& assm) { __ ffqr_w(w2, w0); }); } for (size_t i = 0; i < sizeof(tc_d) / sizeof(TestCaseMsa2RF_U32_D); ++i) { run_msa_2r(reinterpret_cast(&tc_d[i]), - [](MacroAssembler& assm) { __ ffqr_d(w2, w0); }, - load_uint32_elements_of_vector, store_uint64_elements_of_vector); + [](MacroAssembler& assm) { __ ffqr_d(w2, w0); }); } } @@ -7928,13 +8131,13 @@ void run_msa_vector(struct TestCaseMsaVector* input, CpuFeatureScope fscope(&assm, MIPS_SIMD); msa_reg_t res; - load_uint64_elements_of_vector(assm, &(input->ws_lo), w0, t0, t1); - load_uint64_elements_of_vector(assm, &(input->wt_lo), w2, t0, t1); - load_uint64_elements_of_vector(assm, &(input->wd_lo), w4, t0, t1); + load_elements_of_vector(assm, &(input->ws_lo), w0, t0, t1); + load_elements_of_vector(assm, &(input->wt_lo), w2, t0, t1); + load_elements_of_vector(assm, &(input->wd_lo), w4, t0, t1); GenerateVectorInstructionFunc(assm); - store_uint64_elements_of_vector(assm, w4, a0); + store_elements_of_vector(assm, w4, a0); __ jr(ra); __ nop(); @@ -8018,12 +8221,12 @@ void run_msa_bit(struct TestCaseMsaBit* input, InstFunc GenerateInstructionFunc, CpuFeatureScope fscope(&assm, MIPS_SIMD); msa_reg_t res; - load_uint64_elements_of_vector(assm, &(input->ws_lo), w0, t0, t1); - load_uint64_elements_of_vector(assm, &(input->wd_lo), w2, t0, t1); + load_elements_of_vector(assm, &(input->ws_lo), w0, t0, t1); + load_elements_of_vector(assm, &(input->wd_lo), w2, t0, t1); GenerateInstructionFunc(assm, input->m); - store_uint64_elements_of_vector(assm, w2, a0); + store_elements_of_vector(assm, w2, a0); __ jr(ra); __ nop(); @@ -8497,7 +8700,7 @@ void run_msa_i10(int32_t input, InstFunc GenerateVectorInstructionFunc, GenerateVectorInstructionFunc(assm, input); - store_uint64_elements_of_vector(assm, w0, a0); + store_elements_of_vector(assm, w0, a0); __ jr(ra); __ nop(); @@ -8626,7 +8829,6 @@ TEST(MSA_load_store_vector) { __ st_d(w0, MemOperand(a1, i)); } }); -#undef LDI_DF } struct TestCaseMsa3R { @@ -8650,15 +8852,14 @@ void run_msa_3r(struct TestCaseMsa3R* input, InstFunc GenerateI5InstructionFunc, v8::internal::CodeObjectRequired::kYes); CpuFeatureScope fscope(&assm, MIPS_SIMD); msa_reg_t res; - uint64_t expected; - load_uint64_elements_of_vector(assm, &(input->wt_lo), w0, t0, t1); - load_uint64_elements_of_vector(assm, &(input->ws_lo), w1, t0, t1); - load_uint64_elements_of_vector(assm, &(input->wd_lo), w2, t0, t1); + load_elements_of_vector(assm, &(input->wt_lo), w0, t0, t1); + load_elements_of_vector(assm, &(input->ws_lo), w1, t0, t1); + load_elements_of_vector(assm, &(input->wd_lo), w2, t0, t1); GenerateI5InstructionFunc(assm); - store_uint64_elements_of_vector(assm, w2, a0); + store_elements_of_vector(assm, w2, a0); __ jr(ra); __ nop(); @@ -8674,14 +8875,12 @@ void run_msa_3r(struct TestCaseMsa3R* input, InstFunc GenerateI5InstructionFunc, (CALL_GENERATED_CODE(isolate, f, &res, 0, 0, 0, 0)); - expected = GenerateOperationFunc(input->ws_lo, input->wt_lo, input->wd_lo); - if (expected != Unpredictable) { - CHECK_EQ(expected, res.d[0]); + GenerateOperationFunc(&input->ws_lo, &input->wt_lo, &input->wd_lo); + if (input->wd_lo != Unpredictable) { + CHECK_EQ(input->wd_lo, res.d[0]); } - - expected = GenerateOperationFunc(input->ws_hi, input->wt_hi, input->wd_hi); - if (expected != Unpredictable) { - CHECK_EQ(expected, res.d[1]); + if (input->wd_hi != Unpredictable) { + CHECK_EQ(input->wd_hi, res.d[1]); } } @@ -8719,479 +8918,630 @@ TEST(MSA_3R_instructions) { {0xffff00000000ffff, 0xffff00000000ffff, 0xffff00000000ffff, 0xffff00000000ffff, 0xffff00000000ffff, 0xffff00000000ffff}}; -#define SLL_DF(T, lanes, mask) \ - uint64_t res = 0; \ - int size_in_bits = kMSARegSize / lanes; \ - for (int i = 0; i < lanes / 2; ++i) { \ - uint64_t shift = size_in_bits * i; \ - T src_op = static_cast((ws >> shift) & mask); \ - T shift_op = static_cast((wt >> shift) & mask) % size_in_bits; \ - res |= (static_cast(src_op << shift_op) & mask) << shift; \ - } \ - return res +#define SLL_DF(T, lanes, mask) \ + int size_in_bits = kMSARegSize / lanes; \ + for (int i = 0; i < 2; i++) { \ + uint64_t res = 0; \ + for (int j = 0; j < lanes / 2; ++j) { \ + uint64_t shift = size_in_bits * j; \ + T src_op = static_cast((ws[i] >> shift) & mask); \ + T shift_op = static_cast((wt[i] >> shift) & mask) % size_in_bits; \ + res |= (static_cast(src_op << shift_op) & mask) << shift; \ + } \ + wd[i] = res; \ + } -#define SRA_DF(T, lanes, mask) \ - uint64_t res = 0; \ - int size_in_bits = kMSARegSize / lanes; \ - for (int i = 0; i < lanes / 2; ++i) { \ - uint64_t shift = size_in_bits * i; \ - T src_op = static_cast((ws >> shift) & mask); \ - T shift_op = ((wt >> shift) & mask) % size_in_bits; \ - res |= \ - (static_cast(ArithmeticShiftRight(src_op, shift_op) & mask)) \ - << shift; \ - } \ - return res - -#define SRL_DF(T, lanes, mask) \ - uint64_t res = 0; \ - int size_in_bits = kMSARegSize / lanes; \ - for (int i = 0; i < lanes / 2; ++i) { \ - uint64_t shift = size_in_bits * i; \ - T src_op = static_cast((ws >> shift) & mask); \ - T shift_op = static_cast(((wt >> shift) & mask) % size_in_bits); \ - res |= (static_cast(src_op >> shift_op) & mask) << shift; \ - } \ - return res - -#define BCRL_DF(T, lanes, mask) \ - uint64_t res = 0; \ - int size_in_bits = kMSARegSize / lanes; \ - for (int i = 0; i < lanes / 2; ++i) { \ - uint64_t shift = size_in_bits * i; \ - T src_op = static_cast((ws >> shift) & mask); \ - T shift_op = static_cast(((wt >> shift) & mask) % size_in_bits); \ - T r = (static_cast(~(1ull << shift_op)) & src_op) & mask; \ - res |= static_cast(r) << shift; \ - } \ - return res - -#define BSET_DF(T, lanes, mask) \ - uint64_t res = 0; \ - int size_in_bits = kMSARegSize / lanes; \ - for (int i = 0; i < lanes / 2; ++i) { \ - uint64_t shift = size_in_bits * i; \ - T src_op = static_cast((ws >> shift) & mask); \ - T shift_op = static_cast(((wt >> shift) & mask) % size_in_bits); \ - T r = (static_cast(1ull << shift_op) | src_op) & mask; \ - res |= static_cast(r) << shift; \ - } \ - return res - -#define BNEG_DF(T, lanes, mask) \ - uint64_t res = 0; \ - int size_in_bits = kMSARegSize / lanes; \ - for (int i = 0; i < lanes / 2; ++i) { \ - uint64_t shift = size_in_bits * i; \ - T src_op = static_cast((ws >> shift) & mask); \ - T shift_op = static_cast(((wt >> shift) & mask) % size_in_bits); \ - T r = (static_cast(1ull << shift_op) ^ src_op) & mask; \ - res |= static_cast(r) << shift; \ - } \ - return res - -#define BINSL_DF(T, lanes, mask) \ - uint64_t res = 0; \ - int size_in_bits = kMSARegSize / lanes; \ - for (int i = 0; i < lanes / 2; ++i) { \ - uint64_t shift = size_in_bits * i; \ - T ws_op = static_cast((ws >> shift) & mask); \ - T wd_op = static_cast((wd >> shift) & mask); \ - T shift_op = static_cast(((wt >> shift) & mask) % size_in_bits); \ - int bits = shift_op + 1; \ - T r; \ - if (bits == size_in_bits) { \ - r = static_cast(ws_op); \ - } else { \ - uint64_t mask2 = ((1ull << bits) - 1) << (size_in_bits - bits); \ - r = static_cast((static_cast(mask2) & ws_op) | \ - (static_cast(~mask2) & wd_op)); \ - } \ - res |= static_cast(r) << shift; \ - } \ - return res - -#define BINSR_DF(T, lanes, mask) \ - uint64_t res = 0; \ - int size_in_bits = kMSARegSize / lanes; \ - for (int i = 0; i < lanes / 2; ++i) { \ - uint64_t shift = size_in_bits * i; \ - T ws_op = static_cast((ws >> shift) & mask); \ - T wd_op = static_cast((wd >> shift) & mask); \ - T shift_op = static_cast(((wt >> shift) & mask) % size_in_bits); \ - int bits = shift_op + 1; \ - T r; \ - if (bits == size_in_bits) { \ - r = static_cast(ws_op); \ - } else { \ - uint64_t mask2 = (1ull << bits) - 1; \ - r = static_cast((static_cast(mask2) & ws_op) | \ - (static_cast(~mask2) & wd_op)); \ - } \ - res |= static_cast(r) << shift; \ - } \ - return res - -#define ADDV_DF(T, lanes, mask) \ - uint64_t res = 0; \ - int size_in_bits = kMSARegSize / lanes; \ - for (int i = 0; i < lanes / 2; ++i) { \ - uint64_t shift = size_in_bits * i; \ - T ws_op = static_cast((ws >> shift) & mask); \ - T wt_op = static_cast((wt >> shift) & mask); \ - res |= (static_cast(ws_op + wt_op) & mask) << shift; \ - } \ - return res - -#define SUBV_DF(T, lanes, mask) \ - uint64_t res = 0; \ - int size_in_bits = kMSARegSize / lanes; \ - for (int i = 0; i < lanes / 2; ++i) { \ - uint64_t shift = size_in_bits * i; \ - T ws_op = static_cast((ws >> shift) & mask); \ - T wt_op = static_cast((wt >> shift) & mask); \ - res |= (static_cast(ws_op - wt_op) & mask) << shift; \ - } \ - return res - -#define MAX_DF(T, lanes, mask) \ - uint64_t res = 0; \ - int size_in_bits = kMSARegSize / lanes; \ - for (int i = 0; i < lanes / 2; ++i) { \ - uint64_t shift = size_in_bits * i; \ - T ws_op = static_cast((ws >> shift) & mask); \ - T wt_op = static_cast((wt >> shift) & mask); \ - res |= (static_cast(Max(ws_op, wt_op)) & mask) << shift; \ - } \ - return res - -#define MIN_DF(T, lanes, mask) \ - uint64_t res = 0; \ - int size_in_bits = kMSARegSize / lanes; \ - for (int i = 0; i < lanes / 2; ++i) { \ - uint64_t shift = size_in_bits * i; \ - T ws_op = static_cast((ws >> shift) & mask); \ - T wt_op = static_cast((wt >> shift) & mask); \ - res |= (static_cast(Min(ws_op, wt_op)) & mask) << shift; \ - } \ - return res - -#define MAXA_DF(T, lanes, mask) \ - uint64_t res = 0; \ - int size_in_bits = kMSARegSize / lanes; \ - for (int i = 0; i < lanes / 2; ++i) { \ - uint64_t shift = size_in_bits * i; \ - T ws_op = static_cast((ws >> shift) & mask); \ - T wt_op = static_cast((wt >> shift) & mask); \ - res |= (static_cast(Nabs(ws_op) < Nabs(wt_op) ? ws_op : wt_op) & \ - mask) \ - << shift; \ - } \ - return res - -#define MINA_DF(T, lanes, mask) \ - uint64_t res = 0; \ - int size_in_bits = kMSARegSize / lanes; \ - for (int i = 0; i < lanes / 2; ++i) { \ - uint64_t shift = size_in_bits * i; \ - T ws_op = static_cast((ws >> shift) & mask); \ - T wt_op = static_cast((wt >> shift) & mask); \ - res |= (static_cast(Nabs(ws_op) > Nabs(wt_op) ? ws_op : wt_op) & \ - mask) \ - << shift; \ - } \ - return res - -#define CEQ_DF(T, lanes, mask) \ - uint64_t res = 0; \ - int size_in_bits = kMSARegSize / lanes; \ - for (int i = 0; i < lanes / 2; ++i) { \ - uint64_t shift = size_in_bits * i; \ - T ws_op = static_cast((ws >> shift) & mask); \ - T wt_op = static_cast((wt >> shift) & mask); \ - res |= \ - (static_cast(!Compare(ws_op, wt_op) ? -1ull : 0ull) & mask) \ - << shift; \ - } \ - return res - -#define CLT_DF(T, lanes, mask) \ - uint64_t res = 0; \ - int size_in_bits = kMSARegSize / lanes; \ - for (int i = 0; i < lanes / 2; ++i) { \ - uint64_t shift = size_in_bits * i; \ - T ws_op = static_cast((ws >> shift) & mask); \ - T wt_op = static_cast((wt >> shift) & mask); \ - res |= \ - (static_cast((Compare(ws_op, wt_op) == -1) ? -1ull : 0ull) & \ - mask) \ - << shift; \ - } \ - return res - -#define CLE_DF(T, lanes, mask) \ - uint64_t res = 0; \ - int size_in_bits = kMSARegSize / lanes; \ - for (int i = 0; i < lanes / 2; ++i) { \ - uint64_t shift = size_in_bits * i; \ - T ws_op = static_cast((ws >> shift) & mask); \ - T wt_op = static_cast((wt >> shift) & mask); \ - res |= \ - (static_cast((Compare(ws_op, wt_op) != 1) ? -1ull : 0ull) & \ - mask) \ - << shift; \ - } \ - return res - -#define ADD_A_DF(T, lanes, mask) \ - uint64_t res = 0; \ +#define SRA_DF(T, lanes, mask) \ int size_in_bits = kMSARegSize / lanes; \ - for (int i = 0; i < lanes / 2; ++i) { \ - uint64_t shift = size_in_bits * i; \ - T ws_op = static_cast((ws >> shift) & mask); \ - T wt_op = static_cast((wt >> shift) & mask); \ - res |= (static_cast(Abs(ws_op) + Abs(wt_op)) & mask) << shift; \ - } \ - return res + for (int i = 0; i < 2; i++) { \ + uint64_t res = 0; \ + for (int j = 0; j < lanes / 2; ++j) { \ + uint64_t shift = size_in_bits * j; \ + T src_op = static_cast((ws[i] >> shift) & mask); \ + T shift_op = ((wt[i] >> shift) & mask) % size_in_bits; \ + res |= (static_cast(ArithmeticShiftRight(src_op, shift_op) & \ + mask)) \ + << shift; \ + } \ + wd[i] = res; \ + } -#define ADDS_A_DF(T, lanes, mask) \ - uint64_t res = 0; \ - int size_in_bits = kMSARegSize / lanes; \ - for (int i = 0; i < lanes / 2; ++i) { \ - uint64_t shift = size_in_bits * i; \ - T ws_op = Nabs(static_cast((ws >> shift) & mask)); \ - T wt_op = Nabs(static_cast((wt >> shift) & mask)); \ - T r; \ - if (ws_op < -std::numeric_limits::max() - wt_op) { \ - r = std::numeric_limits::max(); \ - } else { \ - r = -(ws_op + wt_op); \ - } \ - res |= (static_cast(r) & mask) << shift; \ - } \ - return res +#define SRL_DF(T, lanes, mask) \ + int size_in_bits = kMSARegSize / lanes; \ + for (int i = 0; i < 2; i++) { \ + uint64_t res = 0; \ + for (int j = 0; j < lanes / 2; ++j) { \ + uint64_t shift = size_in_bits * j; \ + T src_op = static_cast((ws[i] >> shift) & mask); \ + T shift_op = static_cast(((wt[i] >> shift) & mask) % size_in_bits); \ + res |= (static_cast(src_op >> shift_op) & mask) << shift; \ + } \ + wd[i] = res; \ + } -#define ADDS_DF(T, lanes, mask) \ - uint64_t res = 0; \ - int size_in_bits = kMSARegSize / lanes; \ - for (int i = 0; i < lanes / 2; ++i) { \ - uint64_t shift = size_in_bits * i; \ - T ws_op = static_cast((ws >> shift) & mask); \ - T wt_op = static_cast((wt >> shift) & mask); \ - res |= (static_cast(SaturateAdd(ws_op, wt_op)) & mask) << shift; \ - } \ - return res +#define BCRL_DF(T, lanes, mask) \ + int size_in_bits = kMSARegSize / lanes; \ + for (int i = 0; i < 2; i++) { \ + uint64_t res = 0; \ + for (int j = 0; j < lanes / 2; ++j) { \ + uint64_t shift = size_in_bits * j; \ + T src_op = static_cast((ws[i] >> shift) & mask); \ + T shift_op = static_cast(((wt[i] >> shift) & mask) % size_in_bits); \ + T r = (static_cast(~(1ull << shift_op)) & src_op) & mask; \ + res |= static_cast(r) << shift; \ + } \ + wd[i] = res; \ + } -#define AVE_DF(T, lanes, mask) \ - uint64_t res = 0; \ - int size_in_bits = kMSARegSize / lanes; \ - for (int i = 0; i < lanes / 2; ++i) { \ - uint64_t shift = size_in_bits * i; \ - T ws_op = static_cast((ws >> shift) & mask); \ - T wt_op = static_cast((wt >> shift) & mask); \ - res |= (static_cast(((wt_op & ws_op) + ((ws_op ^ wt_op) >> 1)) & \ - mask)) \ - << shift; \ - } \ - return res +#define BSET_DF(T, lanes, mask) \ + int size_in_bits = kMSARegSize / lanes; \ + for (int i = 0; i < 2; i++) { \ + uint64_t res = 0; \ + for (int j = 0; j < lanes / 2; ++j) { \ + uint64_t shift = size_in_bits * j; \ + T src_op = static_cast((ws[i] >> shift) & mask); \ + T shift_op = static_cast(((wt[i] >> shift) & mask) % size_in_bits); \ + T r = (static_cast(1ull << shift_op) | src_op) & mask; \ + res |= static_cast(r) << shift; \ + } \ + wd[i] = res; \ + } -#define AVER_DF(T, lanes, mask) \ - uint64_t res = 0; \ - int size_in_bits = kMSARegSize / lanes; \ - for (int i = 0; i < lanes / 2; ++i) { \ - uint64_t shift = size_in_bits * i; \ - T ws_op = static_cast((ws >> shift) & mask); \ - T wt_op = static_cast((wt >> shift) & mask); \ - res |= (static_cast(((wt_op | ws_op) - ((ws_op ^ wt_op) >> 1)) & \ - mask)) \ - << shift; \ - } \ - return res +#define BNEG_DF(T, lanes, mask) \ + int size_in_bits = kMSARegSize / lanes; \ + for (int i = 0; i < 2; i++) { \ + uint64_t res = 0; \ + for (int j = 0; j < lanes / 2; ++j) { \ + uint64_t shift = size_in_bits * j; \ + T src_op = static_cast((ws[i] >> shift) & mask); \ + T shift_op = static_cast(((wt[i] >> shift) & mask) % size_in_bits); \ + T r = (static_cast(1ull << shift_op) ^ src_op) & mask; \ + res |= static_cast(r) << shift; \ + } \ + wd[i] = res; \ + } -#define SUBS_DF(T, lanes, mask) \ - uint64_t res = 0; \ - int size_in_bits = kMSARegSize / lanes; \ - for (int i = 0; i < lanes / 2; ++i) { \ - uint64_t shift = size_in_bits * i; \ - T ws_op = static_cast((ws >> shift) & mask); \ - T wt_op = static_cast((wt >> shift) & mask); \ - res |= (static_cast(SaturateSub(ws_op, wt_op)) & mask) << shift; \ - } \ - return res +#define BINSL_DF(T, lanes, mask) \ + int size_in_bits = kMSARegSize / lanes; \ + for (int i = 0; i < 2; i++) { \ + uint64_t res = 0; \ + for (int j = 0; j < lanes / 2; ++j) { \ + uint64_t shift = size_in_bits * j; \ + T ws_op = static_cast((ws[i] >> shift) & mask); \ + T wd_op = static_cast((wd[i] >> shift) & mask); \ + T shift_op = static_cast(((wt[i] >> shift) & mask) % size_in_bits); \ + int bits = shift_op + 1; \ + T r; \ + if (bits == size_in_bits) { \ + r = static_cast(ws_op); \ + } else { \ + uint64_t mask2 = ((1ull << bits) - 1) << (size_in_bits - bits); \ + r = static_cast((static_cast(mask2) & ws_op) | \ + (static_cast(~mask2) & wd_op)); \ + } \ + res |= static_cast(r) << shift; \ + } \ + wd[i] = res; \ + } -#define SUBSUS_U_DF(T, lanes, mask) \ - typedef typename std::make_unsigned::type uT; \ - uint64_t res = 0; \ - int size_in_bits = kMSARegSize / lanes; \ - for (int i = 0; i < lanes / 2; ++i) { \ - uint64_t shift = size_in_bits * i; \ - uT ws_op = static_cast((ws >> shift) & mask); \ - T wt_op = static_cast((wt >> shift) & mask); \ - T r; \ - if (wt_op > 0) { \ - uT wtu = static_cast(wt_op); \ - if (wtu > ws_op) { \ - r = 0; \ - } else { \ - r = static_cast(ws_op - wtu); \ - } \ - } else { \ - if (ws_op > std::numeric_limits::max() + wt_op) { \ - r = static_cast(std::numeric_limits::max()); \ - } else { \ - r = static_cast(ws_op - wt_op); \ - } \ - } \ - res |= (static_cast(r) & mask) << shift; \ - } \ - return res +#define BINSR_DF(T, lanes, mask) \ + int size_in_bits = kMSARegSize / lanes; \ + for (int i = 0; i < 2; i++) { \ + uint64_t res = 0; \ + for (int j = 0; j < lanes / 2; ++j) { \ + uint64_t shift = size_in_bits * j; \ + T ws_op = static_cast((ws[i] >> shift) & mask); \ + T wd_op = static_cast((wd[i] >> shift) & mask); \ + T shift_op = static_cast(((wt[i] >> shift) & mask) % size_in_bits); \ + int bits = shift_op + 1; \ + T r; \ + if (bits == size_in_bits) { \ + r = static_cast(ws_op); \ + } else { \ + uint64_t mask2 = (1ull << bits) - 1; \ + r = static_cast((static_cast(mask2) & ws_op) | \ + (static_cast(~mask2) & wd_op)); \ + } \ + res |= static_cast(r) << shift; \ + } \ + wd[i] = res; \ + } -#define SUBSUU_S_DF(T, lanes, mask) \ - typedef typename std::make_unsigned::type uT; \ - uint64_t res = 0; \ - int size_in_bits = kMSARegSize / lanes; \ - for (int i = 0; i < lanes / 2; ++i) { \ - uint64_t shift = size_in_bits * i; \ - uT ws_op = static_cast((ws >> shift) & mask); \ - uT wt_op = static_cast((wt >> shift) & mask); \ - uT wdu; \ - T r; \ - if (ws_op > wt_op) { \ - wdu = ws_op - wt_op; \ - if (wdu > std::numeric_limits::max()) { \ - r = std::numeric_limits::max(); \ - } else { \ - r = static_cast(wdu); \ - } \ - } else { \ - wdu = wt_op - ws_op; \ - CHECK(-std::numeric_limits::max() == \ - std::numeric_limits::min() + 1); \ - if (wdu <= std::numeric_limits::max()) { \ - r = -static_cast(wdu); \ - } else { \ - r = std::numeric_limits::min(); \ - } \ - } \ - res |= (static_cast(r) & mask) << shift; \ - } \ - return res +#define ADDV_DF(T, lanes, mask) \ + int size_in_bits = kMSARegSize / lanes; \ + for (int i = 0; i < 2; i++) { \ + uint64_t res = 0; \ + for (int j = 0; j < lanes / 2; ++j) { \ + uint64_t shift = size_in_bits * j; \ + T ws_op = static_cast((ws[i] >> shift) & mask); \ + T wt_op = static_cast((wt[i] >> shift) & mask); \ + res |= (static_cast(ws_op + wt_op) & mask) << shift; \ + } \ + wd[i] = res; \ + } -#define ASUB_S_DF(T, lanes, mask) \ - uint64_t res = 0; \ - int size_in_bits = kMSARegSize / lanes; \ - for (int i = 0; i < lanes / 2; ++i) { \ - uint64_t shift = size_in_bits * i; \ - T ws_op = static_cast((ws >> shift) & mask); \ - T wt_op = static_cast((wt >> shift) & mask); \ - res |= (static_cast(Abs(ws_op - wt_op)) & mask) << shift; \ - } \ - return res +#define SUBV_DF(T, lanes, mask) \ + int size_in_bits = kMSARegSize / lanes; \ + for (int i = 0; i < 2; i++) { \ + uint64_t res = 0; \ + for (int j = 0; j < lanes / 2; ++j) { \ + uint64_t shift = size_in_bits * j; \ + T ws_op = static_cast((ws[i] >> shift) & mask); \ + T wt_op = static_cast((wt[i] >> shift) & mask); \ + res |= (static_cast(ws_op - wt_op) & mask) << shift; \ + } \ + wd[i] = res; \ + } -#define ASUB_U_DF(T, lanes, mask) \ - uint64_t res = 0; \ - int size_in_bits = kMSARegSize / lanes; \ - for (int i = 0; i < lanes / 2; ++i) { \ - uint64_t shift = size_in_bits * i; \ - T ws_op = static_cast((ws >> shift) & mask); \ - T wt_op = static_cast((wt >> shift) & mask); \ - res |= (static_cast(ws_op > wt_op ? ws_op - wt_op \ - : wt_op - ws_op) & \ - mask) \ - << shift; \ - } \ - return res +#define MAX_DF(T, lanes, mask) \ + int size_in_bits = kMSARegSize / lanes; \ + for (int i = 0; i < 2; i++) { \ + uint64_t res = 0; \ + for (int j = 0; j < lanes / 2; ++j) { \ + uint64_t shift = size_in_bits * j; \ + T ws_op = static_cast((ws[i] >> shift) & mask); \ + T wt_op = static_cast((wt[i] >> shift) & mask); \ + res |= (static_cast(Max(ws_op, wt_op)) & mask) << shift; \ + } \ + wd[i] = res; \ + } -#define MULV_DF(T, lanes, mask) \ - uint64_t res = 0; \ - int size_in_bits = kMSARegSize / lanes; \ - for (int i = 0; i < lanes / 2; ++i) { \ - uint64_t shift = size_in_bits * i; \ - T ws_op = static_cast((ws >> shift) & mask); \ - T wt_op = static_cast((wt >> shift) & mask); \ - res |= (static_cast(ws_op * wt_op) & mask) << shift; \ - } \ - return res +#define MIN_DF(T, lanes, mask) \ + int size_in_bits = kMSARegSize / lanes; \ + for (int i = 0; i < 2; i++) { \ + uint64_t res = 0; \ + for (int j = 0; j < lanes / 2; ++j) { \ + uint64_t shift = size_in_bits * j; \ + T ws_op = static_cast((ws[i] >> shift) & mask); \ + T wt_op = static_cast((wt[i] >> shift) & mask); \ + res |= (static_cast(Min(ws_op, wt_op)) & mask) << shift; \ + } \ + wd[i] = res; \ + } -#define MADDV_DF(T, lanes, mask) \ - uint64_t res = 0; \ +#define MAXA_DF(T, lanes, mask) \ + int size_in_bits = kMSARegSize / lanes; \ + for (int i = 0; i < 2; i++) { \ + uint64_t res = 0; \ + for (int j = 0; j < lanes / 2; ++j) { \ + uint64_t shift = size_in_bits * j; \ + T ws_op = static_cast((ws[i] >> shift) & mask); \ + T wt_op = static_cast((wt[i] >> shift) & mask); \ + res |= \ + (static_cast(Nabs(ws_op) < Nabs(wt_op) ? ws_op : wt_op) & \ + mask) \ + << shift; \ + } \ + wd[i] = res; \ + } + +#define MINA_DF(T, lanes, mask) \ + int size_in_bits = kMSARegSize / lanes; \ + for (int i = 0; i < 2; i++) { \ + uint64_t res = 0; \ + for (int j = 0; j < lanes / 2; ++j) { \ + uint64_t shift = size_in_bits * j; \ + T ws_op = static_cast((ws[i] >> shift) & mask); \ + T wt_op = static_cast((wt[i] >> shift) & mask); \ + res |= \ + (static_cast(Nabs(ws_op) > Nabs(wt_op) ? ws_op : wt_op) & \ + mask) \ + << shift; \ + } \ + wd[i] = res; \ + } + +#define CEQ_DF(T, lanes, mask) \ + int size_in_bits = kMSARegSize / lanes; \ + for (int i = 0; i < 2; i++) { \ + uint64_t res = 0; \ + for (int j = 0; j < lanes / 2; ++j) { \ + uint64_t shift = size_in_bits * j; \ + T ws_op = static_cast((ws[i] >> shift) & mask); \ + T wt_op = static_cast((wt[i] >> shift) & mask); \ + res |= (static_cast(!Compare(ws_op, wt_op) ? -1ull : 0ull) & \ + mask) \ + << shift; \ + } \ + wd[i] = res; \ + } + +#define CLT_DF(T, lanes, mask) \ + int size_in_bits = kMSARegSize / lanes; \ + for (int i = 0; i < 2; i++) { \ + uint64_t res = 0; \ + for (int j = 0; j < lanes / 2; ++j) { \ + uint64_t shift = size_in_bits * j; \ + T ws_op = static_cast((ws[i] >> shift) & mask); \ + T wt_op = static_cast((wt[i] >> shift) & mask); \ + res |= (static_cast((Compare(ws_op, wt_op) == -1) ? -1ull \ + : 0ull) & \ + mask) \ + << shift; \ + } \ + wd[i] = res; \ + } + +#define CLE_DF(T, lanes, mask) \ int size_in_bits = kMSARegSize / lanes; \ - for (int i = 0; i < lanes / 2; ++i) { \ - uint64_t shift = size_in_bits * i; \ - T ws_op = static_cast((ws >> shift) & mask); \ - T wt_op = static_cast((wt >> shift) & mask); \ - T wd_op = static_cast((wd >> shift) & mask); \ - res |= (static_cast(wd_op + ws_op * wt_op) & mask) << shift; \ - } \ - return res + for (int i = 0; i < 2; i++) { \ + uint64_t res = 0; \ + for (int j = 0; j < lanes / 2; ++j) { \ + uint64_t shift = size_in_bits * j; \ + T ws_op = static_cast((ws[i] >> shift) & mask); \ + T wt_op = static_cast((wt[i] >> shift) & mask); \ + res |= (static_cast((Compare(ws_op, wt_op) != 1) ? -1ull \ + : 0ull) & \ + mask) \ + << shift; \ + } \ + wd[i] = res; \ + } -#define MSUBV_DF(T, lanes, mask) \ - uint64_t res = 0; \ - int size_in_bits = kMSARegSize / lanes; \ - for (int i = 0; i < lanes / 2; ++i) { \ - uint64_t shift = size_in_bits * i; \ - T ws_op = static_cast((ws >> shift) & mask); \ - T wt_op = static_cast((wt >> shift) & mask); \ - T wd_op = static_cast((wd >> shift) & mask); \ - res |= (static_cast(wd_op - ws_op * wt_op) & mask) << shift; \ - } \ - return res +#define ADD_A_DF(T, lanes, mask) \ + int size_in_bits = kMSARegSize / lanes; \ + for (int i = 0; i < 2; i++) { \ + uint64_t res = 0; \ + for (int j = 0; j < lanes / 2; ++j) { \ + uint64_t shift = size_in_bits * j; \ + T ws_op = static_cast((ws[i] >> shift) & mask); \ + T wt_op = static_cast((wt[i] >> shift) & mask); \ + res |= (static_cast(Abs(ws_op) + Abs(wt_op)) & mask) << shift; \ + } \ + wd[i] = res; \ + } -#define DIV_DF(T, lanes, mask) \ - uint64_t res = 0; \ - int size_in_bits = kMSARegSize / lanes; \ - for (int i = 0; i < lanes / 2; ++i) { \ - uint64_t shift = size_in_bits * i; \ - T ws_op = static_cast((ws >> shift) & mask); \ - T wt_op = static_cast((wt >> shift) & mask); \ - if (wt_op == 0) { \ - res = Unpredictable; \ - break; \ - } \ - res |= (static_cast(ws_op / wt_op) & mask) << shift; \ - } \ - return res +#define ADDS_A_DF(T, lanes, mask) \ + int size_in_bits = kMSARegSize / lanes; \ + for (int i = 0; i < 2; i++) { \ + uint64_t res = 0; \ + for (int j = 0; j < lanes / 2; ++j) { \ + uint64_t shift = size_in_bits * j; \ + T ws_op = Nabs(static_cast((ws[i] >> shift) & mask)); \ + T wt_op = Nabs(static_cast((wt[i] >> shift) & mask)); \ + T r; \ + if (ws_op < -std::numeric_limits::max() - wt_op) { \ + r = std::numeric_limits::max(); \ + } else { \ + r = -(ws_op + wt_op); \ + } \ + res |= (static_cast(r) & mask) << shift; \ + } \ + wd[i] = res; \ + } -#define MOD_DF(T, lanes, mask) \ - uint64_t res = 0; \ +#define ADDS_DF(T, lanes, mask) \ + int size_in_bits = kMSARegSize / lanes; \ + for (int i = 0; i < 2; i++) { \ + uint64_t res = 0; \ + for (int j = 0; j < lanes / 2; ++j) { \ + uint64_t shift = size_in_bits * j; \ + T ws_op = static_cast((ws[i] >> shift) & mask); \ + T wt_op = static_cast((wt[i] >> shift) & mask); \ + res |= (static_cast(SaturateAdd(ws_op, wt_op)) & mask) \ + << shift; \ + } \ + wd[i] = res; \ + } + +#define AVE_DF(T, lanes, mask) \ + int size_in_bits = kMSARegSize / lanes; \ + for (int i = 0; i < 2; i++) { \ + uint64_t res = 0; \ + for (int j = 0; j < lanes / 2; ++j) { \ + uint64_t shift = size_in_bits * j; \ + T ws_op = static_cast((ws[i] >> shift) & mask); \ + T wt_op = static_cast((wt[i] >> shift) & mask); \ + res |= (static_cast( \ + ((wt_op & ws_op) + ((ws_op ^ wt_op) >> 1)) & mask)) \ + << shift; \ + } \ + wd[i] = res; \ + } + +#define AVER_DF(T, lanes, mask) \ + int size_in_bits = kMSARegSize / lanes; \ + for (int i = 0; i < 2; i++) { \ + uint64_t res = 0; \ + for (int j = 0; j < lanes / 2; ++j) { \ + uint64_t shift = size_in_bits * j; \ + T ws_op = static_cast((ws[i] >> shift) & mask); \ + T wt_op = static_cast((wt[i] >> shift) & mask); \ + res |= (static_cast( \ + ((wt_op | ws_op) - ((ws_op ^ wt_op) >> 1)) & mask)) \ + << shift; \ + } \ + wd[i] = res; \ + } + +#define SUBS_DF(T, lanes, mask) \ + int size_in_bits = kMSARegSize / lanes; \ + for (int i = 0; i < 2; i++) { \ + uint64_t res = 0; \ + for (int j = 0; j < lanes / 2; ++j) { \ + uint64_t shift = size_in_bits * j; \ + T ws_op = static_cast((ws[i] >> shift) & mask); \ + T wt_op = static_cast((wt[i] >> shift) & mask); \ + res |= (static_cast(SaturateSub(ws_op, wt_op)) & mask) \ + << shift; \ + } \ + wd[i] = res; \ + } + +#define SUBSUS_U_DF(T, lanes, mask) \ + typedef typename std::make_unsigned::type uT; \ + int size_in_bits = kMSARegSize / lanes; \ + for (int i = 0; i < 2; i++) { \ + uint64_t res = 0; \ + for (int j = 0; j < lanes / 2; ++j) { \ + uint64_t shift = size_in_bits * j; \ + uT ws_op = static_cast((ws[i] >> shift) & mask); \ + T wt_op = static_cast((wt[i] >> shift) & mask); \ + T r; \ + if (wt_op > 0) { \ + uT wtu = static_cast(wt_op); \ + if (wtu > ws_op) { \ + r = 0; \ + } else { \ + r = static_cast(ws_op - wtu); \ + } \ + } else { \ + if (ws_op > std::numeric_limits::max() + wt_op) { \ + r = static_cast(std::numeric_limits::max()); \ + } else { \ + r = static_cast(ws_op - wt_op); \ + } \ + } \ + res |= (static_cast(r) & mask) << shift; \ + } \ + wd[i] = res; \ + } + +#define SUBSUU_S_DF(T, lanes, mask) \ + typedef typename std::make_unsigned::type uT; \ + int size_in_bits = kMSARegSize / lanes; \ + for (int i = 0; i < 2; i++) { \ + uint64_t res = 0; \ + for (int j = 0; j < lanes / 2; ++j) { \ + uint64_t shift = size_in_bits * j; \ + uT ws_op = static_cast((ws[i] >> shift) & mask); \ + uT wt_op = static_cast((wt[i] >> shift) & mask); \ + uT wdu; \ + T r; \ + if (ws_op > wt_op) { \ + wdu = ws_op - wt_op; \ + if (wdu > std::numeric_limits::max()) { \ + r = std::numeric_limits::max(); \ + } else { \ + r = static_cast(wdu); \ + } \ + } else { \ + wdu = wt_op - ws_op; \ + CHECK(-std::numeric_limits::max() == \ + std::numeric_limits::min() + 1); \ + if (wdu <= std::numeric_limits::max()) { \ + r = -static_cast(wdu); \ + } else { \ + r = std::numeric_limits::min(); \ + } \ + } \ + res |= (static_cast(r) & mask) << shift; \ + } \ + wd[i] = res; \ + } + +#define ASUB_S_DF(T, lanes, mask) \ int size_in_bits = kMSARegSize / lanes; \ - for (int i = 0; i < lanes / 2; ++i) { \ - uint64_t shift = size_in_bits * i; \ - T ws_op = static_cast((ws >> shift) & mask); \ - T wt_op = static_cast((wt >> shift) & mask); \ - if (wt_op == 0) { \ - res = Unpredictable; \ - break; \ + for (int i = 0; i < 2; i++) { \ + uint64_t res = 0; \ + for (int j = 0; j < lanes / 2; ++j) { \ + uint64_t shift = size_in_bits * j; \ + T ws_op = static_cast((ws[i] >> shift) & mask); \ + T wt_op = static_cast((wt[i] >> shift) & mask); \ + res |= (static_cast(Abs(ws_op - wt_op)) & mask) << shift; \ } \ - res |= (static_cast(wt_op != 0 ? ws_op % wt_op : 0) & mask) \ - << shift; \ - } \ - return res + wd[i] = res; \ + } -#define SRAR_DF(T, lanes, mask) \ - uint64_t res = 0; \ - int size_in_bits = kMSARegSize / lanes; \ - for (int i = 0; i < lanes / 2; ++i) { \ - uint64_t shift = size_in_bits * i; \ - T src_op = static_cast((ws >> shift) & mask); \ - T shift_op = ((wt >> shift) & mask) % size_in_bits; \ - uint32_t bit = shift_op == 0 ? 0 : src_op >> (shift_op - 1) & 1; \ - res |= \ - (static_cast(ArithmeticShiftRight(src_op, shift_op) + bit) & \ - mask) \ - << shift; \ - } \ - return res +#define ASUB_U_DF(T, lanes, mask) \ + int size_in_bits = kMSARegSize / lanes; \ + for (int i = 0; i < 2; i++) { \ + uint64_t res = 0; \ + for (int j = 0; j < lanes / 2; ++j) { \ + uint64_t shift = size_in_bits * j; \ + T ws_op = static_cast((ws[i] >> shift) & mask); \ + T wt_op = static_cast((wt[i] >> shift) & mask); \ + res |= (static_cast(ws_op > wt_op ? ws_op - wt_op \ + : wt_op - ws_op) & \ + mask) \ + << shift; \ + } \ + wd[i] = res; \ + } + +#define MULV_DF(T, lanes, mask) \ + int size_in_bits = kMSARegSize / lanes; \ + for (int i = 0; i < 2; i++) { \ + uint64_t res = 0; \ + for (int j = 0; j < lanes / 2; ++j) { \ + uint64_t shift = size_in_bits * j; \ + T ws_op = static_cast((ws[i] >> shift) & mask); \ + T wt_op = static_cast((wt[i] >> shift) & mask); \ + res |= (static_cast(ws_op * wt_op) & mask) << shift; \ + } \ + wd[i] = res; \ + } + +#define MADDV_DF(T, lanes, mask) \ + int size_in_bits = kMSARegSize / lanes; \ + for (int i = 0; i < 2; i++) { \ + uint64_t res = 0; \ + for (int j = 0; j < lanes / 2; ++j) { \ + uint64_t shift = size_in_bits * j; \ + T ws_op = static_cast((ws[i] >> shift) & mask); \ + T wt_op = static_cast((wt[i] >> shift) & mask); \ + T wd_op = static_cast((wd[i] >> shift) & mask); \ + res |= (static_cast(wd_op + ws_op * wt_op) & mask) << shift; \ + } \ + wd[i] = res; \ + } + +#define MSUBV_DF(T, lanes, mask) \ + int size_in_bits = kMSARegSize / lanes; \ + for (int i = 0; i < 2; i++) { \ + uint64_t res = 0; \ + for (int j = 0; j < lanes / 2; ++j) { \ + uint64_t shift = size_in_bits * j; \ + T ws_op = static_cast((ws[i] >> shift) & mask); \ + T wt_op = static_cast((wt[i] >> shift) & mask); \ + T wd_op = static_cast((wd[i] >> shift) & mask); \ + res |= (static_cast(wd_op - ws_op * wt_op) & mask) << shift; \ + } \ + wd[i] = res; \ + } + +#define DIV_DF(T, lanes, mask) \ + int size_in_bits = kMSARegSize / lanes; \ + for (int i = 0; i < 2; i++) { \ + uint64_t res = 0; \ + for (int j = 0; j < lanes / 2; ++j) { \ + uint64_t shift = size_in_bits * j; \ + T ws_op = static_cast((ws[i] >> shift) & mask); \ + T wt_op = static_cast((wt[i] >> shift) & mask); \ + if (wt_op == 0) { \ + res = Unpredictable; \ + break; \ + } \ + res |= (static_cast(ws_op / wt_op) & mask) << shift; \ + } \ + wd[i] = res; \ + } + +#define MOD_DF(T, lanes, mask) \ + int size_in_bits = kMSARegSize / lanes; \ + for (int i = 0; i < 2; i++) { \ + uint64_t res = 0; \ + for (int j = 0; j < lanes / 2; ++j) { \ + uint64_t shift = size_in_bits * j; \ + T ws_op = static_cast((ws[i] >> shift) & mask); \ + T wt_op = static_cast((wt[i] >> shift) & mask); \ + if (wt_op == 0) { \ + res = Unpredictable; \ + break; \ + } \ + res |= (static_cast(wt_op != 0 ? ws_op % wt_op : 0) & mask) \ + << shift; \ + } \ + wd[i] = res; \ + } + +#define SRAR_DF(T, lanes, mask) \ + int size_in_bits = kMSARegSize / lanes; \ + for (int i = 0; i < 2; i++) { \ + uint64_t res = 0; \ + for (int j = 0; j < lanes / 2; ++j) { \ + uint64_t shift = size_in_bits * j; \ + T src_op = static_cast((ws[i] >> shift) & mask); \ + T shift_op = ((wt[i] >> shift) & mask) % size_in_bits; \ + uint32_t bit = shift_op == 0 ? 0 : src_op >> (shift_op - 1) & 1; \ + res |= (static_cast(ArithmeticShiftRight(src_op, shift_op) + \ + bit) & \ + mask) \ + << shift; \ + } \ + wd[i] = res; \ + } + +#define PCKEV_DF(T, lanes, mask) \ + T* ws_p = reinterpret_cast(ws); \ + T* wt_p = reinterpret_cast(wt); \ + T* wd_p = reinterpret_cast(wd); \ + for (int i = 0; i < lanes / 2; ++i) { \ + wd_p[i] = wt_p[2 * i]; \ + wd_p[i + lanes / 2] = ws_p[2 * i]; \ + } + +#define PCKOD_DF(T, lanes, mask) \ + T* ws_p = reinterpret_cast(ws); \ + T* wt_p = reinterpret_cast(wt); \ + T* wd_p = reinterpret_cast(wd); \ + for (int i = 0; i < lanes / 2; ++i) { \ + wd_p[i] = wt_p[2 * i + 1]; \ + wd_p[i + lanes / 2] = ws_p[2 * i + 1]; \ + } + +#define ILVL_DF(T, lanes, mask) \ + T* ws_p = reinterpret_cast(ws); \ + T* wt_p = reinterpret_cast(wt); \ + T* wd_p = reinterpret_cast(wd); \ + for (int i = 0; i < lanes / 2; ++i) { \ + wd_p[2 * i] = wt_p[i + lanes / 2]; \ + wd_p[2 * i + 1] = ws_p[i + lanes / 2]; \ + } + +#define ILVR_DF(T, lanes, mask) \ + T* ws_p = reinterpret_cast(ws); \ + T* wt_p = reinterpret_cast(wt); \ + T* wd_p = reinterpret_cast(wd); \ + for (int i = 0; i < lanes / 2; ++i) { \ + wd_p[2 * i] = wt_p[i]; \ + wd_p[2 * i + 1] = ws_p[i]; \ + } + +#define ILVEV_DF(T, lanes, mask) \ + T* ws_p = reinterpret_cast(ws); \ + T* wt_p = reinterpret_cast(wt); \ + T* wd_p = reinterpret_cast(wd); \ + for (int i = 0; i < lanes / 2; ++i) { \ + wd_p[2 * i] = wt_p[2 * i]; \ + wd_p[2 * i + 1] = ws_p[2 * i]; \ + } + +#define ILVOD_DF(T, lanes, mask) \ + T* ws_p = reinterpret_cast(ws); \ + T* wt_p = reinterpret_cast(wt); \ + T* wd_p = reinterpret_cast(wd); \ + for (int i = 0; i < lanes / 2; ++i) { \ + wd_p[2 * i] = wt_p[2 * i + 1]; \ + wd_p[2 * i + 1] = ws_p[2 * i + 1]; \ + } + +#define VSHF_DF(T, lanes, mask) \ + T* ws_p = reinterpret_cast(ws); \ + T* wt_p = reinterpret_cast(wt); \ + T* wd_p = reinterpret_cast(wd); \ + const int mask_not_valid = 0xc0; \ + const int mask_6bits = 0x3f; \ + for (int i = 0; i < lanes; ++i) { \ + if ((wd_p[i] & mask_not_valid)) { \ + wd_p[i] = 0; \ + } else { \ + int k = (wd_p[i] & mask_6bits) % (lanes * 2); \ + wd_p[i] = k > lanes ? ws_p[k - lanes] : wt_p[k]; \ + } \ + } + +#define HADD_DF(T, T_small, lanes) \ + T_small* ws_p = reinterpret_cast(ws); \ + T_small* wt_p = reinterpret_cast(wt); \ + T* wd_p = reinterpret_cast(wd); \ + for (int i = 0; i < lanes; ++i) { \ + wd_p[i] = static_cast(ws_p[2 * i + 1]) + static_cast(wt_p[2 * i]); \ + } + +#define HSUB_DF(T, T_small, lanes) \ + T_small* ws_p = reinterpret_cast(ws); \ + T_small* wt_p = reinterpret_cast(wt); \ + T* wd_p = reinterpret_cast(wd); \ + for (int i = 0; i < lanes; ++i) { \ + wd_p[i] = static_cast(ws_p[2 * i + 1]) - static_cast(wt_p[2 * i]); \ + } #define TEST_CASE(V) \ V(sll_b, SLL_DF, uint8_t, kMSALanesByte, UINT8_MAX) \ V(sll_h, SLL_DF, uint16_t, kMSALanesHalf, UINT16_MAX) \ V(sll_w, SLL_DF, uint32_t, kMSALanesWord, UINT32_MAX) \ V(sll_d, SLL_DF, uint64_t, kMSALanesDword, UINT64_MAX) \ - V(sra_b, SRA_DF, int8_t, kMSALanesByte, UINT8_MAX) \ - V(sra_h, SRA_DF, int16_t, kMSALanesHalf, UINT16_MAX) \ - V(sra_w, SRA_DF, int32_t, kMSALanesWord, UINT32_MAX) \ - V(sra_d, SRA_DF, int64_t, kMSALanesDword, UINT64_MAX) \ V(srl_b, SRL_DF, uint8_t, kMSALanesByte, UINT8_MAX) \ V(srl_h, SRL_DF, uint16_t, kMSALanesHalf, UINT16_MAX) \ V(srl_w, SRL_DF, uint32_t, kMSALanesWord, UINT32_MAX) \ @@ -9352,18 +9702,54 @@ TEST(MSA_3R_instructions) { V(mod_u_h, MOD_DF, uint16_t, kMSALanesHalf, UINT16_MAX) \ V(mod_u_w, MOD_DF, uint32_t, kMSALanesWord, UINT32_MAX) \ V(mod_u_d, MOD_DF, uint64_t, kMSALanesDword, UINT64_MAX) \ - V(srar_b, SRAR_DF, int8_t, kMSALanesByte, UINT8_MAX) \ - V(srar_h, SRAR_DF, int16_t, kMSALanesHalf, UINT16_MAX) \ - V(srar_w, SRAR_DF, int32_t, kMSALanesWord, UINT32_MAX) \ - V(srar_d, SRAR_DF, int64_t, kMSALanesDword, UINT64_MAX) \ V(srlr_b, SRAR_DF, uint8_t, kMSALanesByte, UINT8_MAX) \ V(srlr_h, SRAR_DF, uint16_t, kMSALanesHalf, UINT16_MAX) \ V(srlr_w, SRAR_DF, uint32_t, kMSALanesWord, UINT32_MAX) \ - V(srlr_d, SRAR_DF, uint64_t, kMSALanesDword, UINT64_MAX) + V(srlr_d, SRAR_DF, uint64_t, kMSALanesDword, UINT64_MAX) \ + V(pckev_b, PCKEV_DF, uint8_t, kMSALanesByte, UINT8_MAX) \ + V(pckev_h, PCKEV_DF, uint16_t, kMSALanesHalf, UINT16_MAX) \ + V(pckev_w, PCKEV_DF, uint32_t, kMSALanesWord, UINT32_MAX) \ + V(pckev_d, PCKEV_DF, uint64_t, kMSALanesDword, UINT64_MAX) \ + V(pckod_b, PCKOD_DF, uint8_t, kMSALanesByte, UINT8_MAX) \ + V(pckod_h, PCKOD_DF, uint16_t, kMSALanesHalf, UINT16_MAX) \ + V(pckod_w, PCKOD_DF, uint32_t, kMSALanesWord, UINT32_MAX) \ + V(pckod_d, PCKOD_DF, uint64_t, kMSALanesDword, UINT64_MAX) \ + V(ilvl_b, ILVL_DF, uint8_t, kMSALanesByte, UINT8_MAX) \ + V(ilvl_h, ILVL_DF, uint16_t, kMSALanesHalf, UINT16_MAX) \ + V(ilvl_w, ILVL_DF, uint32_t, kMSALanesWord, UINT32_MAX) \ + V(ilvl_d, ILVL_DF, uint64_t, kMSALanesDword, UINT64_MAX) \ + V(ilvr_b, ILVR_DF, uint8_t, kMSALanesByte, UINT8_MAX) \ + V(ilvr_h, ILVR_DF, uint16_t, kMSALanesHalf, UINT16_MAX) \ + V(ilvr_w, ILVR_DF, uint32_t, kMSALanesWord, UINT32_MAX) \ + V(ilvr_d, ILVR_DF, uint64_t, kMSALanesDword, UINT64_MAX) \ + V(ilvev_b, ILVEV_DF, uint8_t, kMSALanesByte, UINT8_MAX) \ + V(ilvev_h, ILVEV_DF, uint16_t, kMSALanesHalf, UINT16_MAX) \ + V(ilvev_w, ILVEV_DF, uint32_t, kMSALanesWord, UINT32_MAX) \ + V(ilvev_d, ILVEV_DF, uint64_t, kMSALanesDword, UINT64_MAX) \ + V(ilvod_b, ILVOD_DF, uint8_t, kMSALanesByte, UINT8_MAX) \ + V(ilvod_h, ILVOD_DF, uint16_t, kMSALanesHalf, UINT16_MAX) \ + V(ilvod_w, ILVOD_DF, uint32_t, kMSALanesWord, UINT32_MAX) \ + V(ilvod_d, ILVOD_DF, uint64_t, kMSALanesDword, UINT64_MAX) \ + V(vshf_b, VSHF_DF, uint8_t, kMSALanesByte, UINT8_MAX) \ + V(vshf_h, VSHF_DF, uint16_t, kMSALanesHalf, UINT16_MAX) \ + V(vshf_w, VSHF_DF, uint32_t, kMSALanesWord, UINT32_MAX) \ + V(vshf_d, VSHF_DF, uint64_t, kMSALanesDword, UINT64_MAX) \ + V(hadd_s_h, HADD_DF, int16_t, int8_t, kMSALanesHalf) \ + V(hadd_s_w, HADD_DF, int32_t, int16_t, kMSALanesWord) \ + V(hadd_s_d, HADD_DF, int64_t, int32_t, kMSALanesDword) \ + V(hadd_u_h, HADD_DF, uint16_t, uint8_t, kMSALanesHalf) \ + V(hadd_u_w, HADD_DF, uint32_t, uint16_t, kMSALanesWord) \ + V(hadd_u_d, HADD_DF, uint64_t, uint32_t, kMSALanesDword) \ + V(hsub_s_h, HSUB_DF, int16_t, int8_t, kMSALanesHalf) \ + V(hsub_s_w, HSUB_DF, int32_t, int16_t, kMSALanesWord) \ + V(hsub_s_d, HSUB_DF, int64_t, int32_t, kMSALanesDword) \ + V(hsub_u_h, HSUB_DF, uint16_t, uint8_t, kMSALanesHalf) \ + V(hsub_u_w, HSUB_DF, uint32_t, uint16_t, kMSALanesWord) \ + V(hsub_u_d, HSUB_DF, uint64_t, uint32_t, kMSALanesDword) #define RUN_TEST(instr, verify, type, lanes, mask) \ run_msa_3r(&tc[i], [](MacroAssembler& assm) { __ instr(w2, w1, w0); }, \ - [](uint64_t ws, uint64_t wt, uint64_t wd) { \ + [](uint64_t* ws, uint64_t* wt, uint64_t* wd) { \ verify(type, lanes, mask); \ }); @@ -9371,9 +9757,41 @@ TEST(MSA_3R_instructions) { TEST_CASE(RUN_TEST) } +#define RUN_TEST2(instr, verify, type, lanes, mask) \ + for (unsigned i = 0; i < arraysize(tc); i++) { \ + for (unsigned j = 0; j < 3; j++) { \ + for (unsigned k = 0; k < lanes; k++) { \ + type* element = reinterpret_cast(&tc[i]); \ + element[k + j * lanes] &= std::numeric_limits::max(); \ + } \ + } \ + } \ + run_msa_3r(&tc[i], [](MacroAssembler& assm) { __ instr(w2, w1, w0); }, \ + [](uint64_t* ws, uint64_t* wt, uint64_t* wd) { \ + verify(type, lanes, mask); \ + }); + +#define TEST_CASE2(V) \ + V(sra_b, SRA_DF, int8_t, kMSALanesByte, UINT8_MAX) \ + V(sra_h, SRA_DF, int16_t, kMSALanesHalf, UINT16_MAX) \ + V(sra_w, SRA_DF, int32_t, kMSALanesWord, UINT32_MAX) \ + V(sra_d, SRA_DF, int64_t, kMSALanesDword, UINT64_MAX) \ + V(srar_b, SRAR_DF, int8_t, kMSALanesByte, UINT8_MAX) \ + V(srar_h, SRAR_DF, int16_t, kMSALanesHalf, UINT16_MAX) \ + V(srar_w, SRAR_DF, int32_t, kMSALanesWord, UINT32_MAX) \ + V(srar_d, SRAR_DF, int64_t, kMSALanesDword, UINT64_MAX) + + for (size_t i = 0; i < arraysize(tc); ++i) { + TEST_CASE2(RUN_TEST2) + } + +#undef TEST_CASE +#undef TEST_CASE2 #undef RUN_TEST +#undef RUN_TEST2 #undef SLL_DF #undef SRL_DF +#undef SRA_DF #undef BCRL_DF #undef BSET_DF #undef BNEG_DF @@ -9404,6 +9822,15 @@ TEST(MSA_3R_instructions) { #undef DIV_DF #undef MOD_DF #undef SRAR_DF +#undef PCKEV_DF +#undef PCKOD_DF +#undef ILVL_DF +#undef ILVR_DF +#undef ILVEV_DF +#undef ILVOD_DF +#undef VSHF_DF +#undef HADD_DF +#undef HSUB_DF } // namespace internal struct TestCaseMsa3RF { @@ -9420,12 +9847,10 @@ struct ExpectedResult_MSA3RF { uint64_t exp_res_hi; }; -template +template void run_msa_3rf(const struct TestCaseMsa3RF* input, const struct ExpectedResult_MSA3RF* output, - Func Generate2RInstructionFunc, - FuncLoad load_elements_of_vector, - FuncStore store_elements_of_vector) { + Func Generate2RInstructionFunc) { Isolate* isolate = CcTest::i_isolate(); HandleScope scope(isolate); @@ -9456,28 +9881,8 @@ void run_msa_3rf(const struct TestCaseMsa3RF* input, (CALL_GENERATED_CODE(isolate, f, &res, 0, 0, 0, 0)); - if (store_elements_of_vector == store_uint64_elements_of_vector) { - CHECK_EQ(output->exp_res_lo, res.d[0]); - CHECK_EQ(output->exp_res_hi, res.d[1]); - } else if (store_elements_of_vector == store_uint32_elements_of_vector) { - const uint32_t* exp_res = - reinterpret_cast(&output->exp_res_lo); - CHECK_EQ(exp_res[0], res.w[0]); - CHECK_EQ(exp_res[1], res.w[1]); - CHECK_EQ(exp_res[2], res.w[2]); - CHECK_EQ(exp_res[3], res.w[3]); - } else { - const uint16_t* exp_res = - reinterpret_cast(&output->exp_res_lo); - CHECK_EQ(exp_res[0], res.h[0]); - CHECK_EQ(exp_res[1], res.h[1]); - CHECK_EQ(exp_res[2], res.h[2]); - CHECK_EQ(exp_res[3], res.h[3]); - CHECK_EQ(exp_res[4], res.h[4]); - CHECK_EQ(exp_res[5], res.h[5]); - CHECK_EQ(exp_res[6], res.h[6]); - CHECK_EQ(exp_res[7], res.h[7]); - } + CHECK_EQ(output->exp_res_lo, res.d[0]); + CHECK_EQ(output->exp_res_hi, res.d[1]); } struct TestCaseMsa3RF_F { @@ -9579,15 +9984,11 @@ TEST(MSA_floating_point_quiet_compare) { #define TEST_FP_QUIET_COMPARE_W(instruction, src, exp_res) \ run_msa_3rf(reinterpret_cast(src), \ reinterpret_cast(exp_res), \ - [](MacroAssembler& assm) { __ instruction(w2, w0, w1); }, \ - load_uint32_elements_of_vector, \ - store_uint32_elements_of_vector); + [](MacroAssembler& assm) { __ instruction(w2, w0, w1); }); #define TEST_FP_QUIET_COMPARE_D(instruction, src, exp_res) \ run_msa_3rf(reinterpret_cast(src), \ reinterpret_cast(exp_res), \ - [](MacroAssembler& assm) { __ instruction(w2, w0, w1); }, \ - load_uint64_elements_of_vector, \ - store_uint64_elements_of_vector); + [](MacroAssembler& assm) { __ instruction(w2, w0, w1); }); for (uint64_t i = 0; i < arraysize(tc_w); i++) { TEST_FP_QUIET_COMPARE_W(fcaf_w, &tc_w[i], &exp_res_fcaf) @@ -9702,16 +10103,14 @@ TEST(MSA_floating_point_arithmetic) { reinterpret_cast(src1), \ reinterpret_cast(function( \ src1, src2, src3, reinterpret_cast(&dst_container))), \ - [](MacroAssembler& assm) { __ instr(w2, w0, w1); }, \ - load_uint32_elements_of_vector, store_uint32_elements_of_vector); + [](MacroAssembler& assm) { __ instr(w2, w0, w1); }); #define FP_ARITHMETIC_DF_D(instr, function, src1, src2, src3) \ run_msa_3rf( \ reinterpret_cast(src1), \ reinterpret_cast(function( \ src1, src2, src3, reinterpret_cast(&dst_container))), \ - [](MacroAssembler& assm) { __ instr(w2, w0, w1); }, \ - load_uint64_elements_of_vector, store_uint64_elements_of_vector); + [](MacroAssembler& assm) { __ instr(w2, w0, w1); }); for (uint64_t i = 0; i < arraysize(tc_w); i++) { FP_ARITHMETIC_DF_W(fadd_w, fadd_function, &tc_w[i].ws_1, &tc_w[i].wt_1, @@ -9808,16 +10207,12 @@ TEST(MSA_fmin_fmin_a_fmax_fmax_a) { #define TEST_FP_MIN_MAX_W(instruction, src, exp_res) \ run_msa_3rf(reinterpret_cast(src), \ reinterpret_cast(exp_res), \ - [](MacroAssembler& assm) { __ instruction(w2, w0, w1); }, \ - load_uint32_elements_of_vector, \ - store_uint32_elements_of_vector); + [](MacroAssembler& assm) { __ instruction(w2, w0, w1); }); #define TEST_FP_MIN_MAX_D(instruction, src, exp_res) \ run_msa_3rf(reinterpret_cast(src), \ reinterpret_cast(exp_res), \ - [](MacroAssembler& assm) { __ instruction(w2, w0, w1); }, \ - load_uint64_elements_of_vector, \ - store_uint64_elements_of_vector); + [](MacroAssembler& assm) { __ instruction(w2, w0, w1); }); for (uint64_t i = 0; i < arraysize(tc_w); i++) { TEST_FP_MIN_MAX_W(fmax_w, &tc_w[i], &exp_res_fmax_w[i]) @@ -9930,17 +10325,13 @@ TEST(MSA_fixed_point_arithmetic) { {0x800000007fffffff, 0x800000007c33f15e}, {0xb5deb625939d884d, 0xe40dcbfe728756b5}}; -#define TEST_FIXED_POINT_DF_H(instruction, src, exp_res) \ - run_msa_3rf((src), (exp_res), \ - [](MacroAssembler& assm) { __ instruction(w2, w0, w1); }, \ - load_uint16_elements_of_vector, \ - store_uint16_elements_of_vector); +#define TEST_FIXED_POINT_DF_H(instruction, src, exp_res) \ + run_msa_3rf((src), (exp_res), \ + [](MacroAssembler& assm) { __ instruction(w2, w0, w1); }); -#define TEST_FIXED_POINT_DF_W(instruction, src, exp_res) \ - run_msa_3rf((src), (exp_res), \ - [](MacroAssembler& assm) { __ instruction(w2, w0, w1); }, \ - load_uint32_elements_of_vector, \ - store_uint32_elements_of_vector); +#define TEST_FIXED_POINT_DF_W(instruction, src, exp_res) \ + run_msa_3rf((src), (exp_res), \ + [](MacroAssembler& assm) { __ instruction(w2, w0, w1); }); for (uint64_t i = 0; i < arraysize(tc_h); i++) { TEST_FIXED_POINT_DF_H(mul_q_h, &tc_h[i], &exp_res_mul_q_h[i]) @@ -10021,16 +10412,12 @@ TEST(MSA_fexdo) { #define TEST_FEXDO_H(instruction, src, exp_res) \ run_msa_3rf(reinterpret_cast(src), \ reinterpret_cast(exp_res), \ - [](MacroAssembler& assm) { __ instruction(w2, w0, w1); }, \ - load_uint32_elements_of_vector, \ - store_uint16_elements_of_vector); + [](MacroAssembler& assm) { __ instruction(w2, w0, w1); }); #define TEST_FEXDO_W(instruction, src, exp_res) \ run_msa_3rf(reinterpret_cast(src), \ reinterpret_cast(exp_res), \ - [](MacroAssembler& assm) { __ instruction(w2, w0, w1); }, \ - load_uint64_elements_of_vector, \ - store_uint32_elements_of_vector); + [](MacroAssembler& assm) { __ instruction(w2, w0, w1); }); for (uint64_t i = 0; i < arraysize(tc_w); i++) { TEST_FEXDO_H(fexdo_h, &tc_w[i], &exp_res_fexdo_w[i]) @@ -10100,16 +10487,12 @@ TEST(MSA_ftq) { #define TEST_FTQ_H(instruction, src, exp_res) \ run_msa_3rf(reinterpret_cast(src), \ reinterpret_cast(exp_res), \ - [](MacroAssembler& assm) { __ instruction(w2, w0, w1); }, \ - load_uint32_elements_of_vector, \ - store_uint16_elements_of_vector); + [](MacroAssembler& assm) { __ instruction(w2, w0, w1); }); #define TEST_FTQ_W(instruction, src, exp_res) \ run_msa_3rf(reinterpret_cast(src), \ reinterpret_cast(exp_res), \ - [](MacroAssembler& assm) { __ instruction(w2, w0, w1); }, \ - load_uint64_elements_of_vector, \ - store_uint32_elements_of_vector); + [](MacroAssembler& assm) { __ instruction(w2, w0, w1); }); for (uint64_t i = 0; i < arraysize(tc_w); i++) { TEST_FTQ_H(ftq_h, &tc_w[i], &exp_res_ftq_w[i]) diff --git a/test/cctest/test-assembler-mips64.cc b/test/cctest/test-assembler-mips64.cc index 394ea882ce..f809ea8f39 100644 --- a/test/cctest/test-assembler-mips64.cc +++ b/test/cctest/test-assembler-mips64.cc @@ -5597,6 +5597,189 @@ TEST(r6_beqzc) { } } +void load_elements_of_vector(MacroAssembler& assm, const uint64_t elements[], + MSARegister w, Register t0, Register t1) { + __ li(t0, static_cast(elements[0] & 0xffffffff)); + __ li(t1, static_cast((elements[0] >> 32) & 0xffffffff)); + __ insert_w(w, 0, t0); + __ insert_w(w, 1, t1); + __ li(t0, static_cast(elements[1] & 0xffffffff)); + __ li(t1, static_cast((elements[1] >> 32) & 0xffffffff)); + __ insert_w(w, 2, t0); + __ insert_w(w, 3, t1); +} + +inline void store_elements_of_vector(MacroAssembler& assm, MSARegister w, + Register a) { + __ st_d(w, MemOperand(a, 0)); +} + +typedef union { + uint8_t b[16]; + uint16_t h[8]; + uint32_t w[4]; + uint64_t d[2]; +} msa_reg_t; + +struct TestCaseMsaBranch { + uint64_t wt_lo; + uint64_t wt_hi; +}; + +template +void run_bz_bnz(TestCaseMsaBranch* input, Branch GenerateBranch, + bool branched) { + Isolate* isolate = CcTest::i_isolate(); + HandleScope scope(isolate); + + MacroAssembler assm(isolate, NULL, 0, v8::internal::CodeObjectRequired::kYes); + CpuFeatureScope fscope(&assm, MIPS_SIMD); + + typedef struct { + uint64_t ws_lo; + uint64_t ws_hi; + uint64_t wd_lo; + uint64_t wd_hi; + } T; + T t = {0x20b9cc4f1a83e0c5, 0xa27e1b5f2f5bb18a, 0x0000000000000000, + 0x0000000000000000}; + msa_reg_t res; + Label do_not_move_w0_to_w2; + + load_elements_of_vector(assm, &t.ws_lo, w0, t0, t1); + load_elements_of_vector(assm, &t.wd_lo, w2, t0, t1); + load_elements_of_vector(assm, &input->wt_lo, w1, t0, t1); + GenerateBranch(assm, do_not_move_w0_to_w2); + __ nop(); + __ move_v(w2, w0); + + __ bind(&do_not_move_w0_to_w2); + store_elements_of_vector(assm, w2, a0); + __ jr(ra); + __ nop(); + + CodeDesc desc; + assm.GetCode(isolate, &desc); + Handle code = + isolate->factory()->NewCode(desc, Code::STUB, Handle()); +#ifdef OBJECT_PRINT + code->Print(std::cout); +#endif + F3 f = FUNCTION_CAST(code->entry()); + + (CALL_GENERATED_CODE(isolate, f, &res, 0, 0, 0, 0)); + if (branched) { + CHECK_EQ(t.wd_lo, res.d[0]); + CHECK_EQ(t.wd_hi, res.d[1]); + } else { + CHECK_EQ(t.ws_lo, res.d[0]); + CHECK_EQ(t.ws_hi, res.d[1]); + } +} + +TEST(MSA_bz_bnz) { + if ((kArchVariant != kMips64r6) || !CpuFeatures::IsSupported(MIPS_SIMD)) + return; + + TestCaseMsaBranch tz_v[] = { + {0x0, 0x0}, {0xabc, 0x0}, {0x0, 0xabc}, {0xabc, 0xabc}}; + for (unsigned i = 0; i < arraysize(tz_v); ++i) { + run_bz_bnz( + &tz_v[i], + [](MacroAssembler& assm, Label& br_target) { __ bz_v(w1, &br_target); }, + tz_v[i].wt_lo == 0 && tz_v[i].wt_hi == 0); + } + +#define TEST_BZ_DF(input_array, lanes, instruction, int_type) \ + for (unsigned i = 0; i < arraysize(input_array); ++i) { \ + int j; \ + int_type* element = reinterpret_cast(&input_array[i]); \ + for (j = 0; j < lanes; ++j) { \ + if (element[j] == 0) { \ + break; \ + } \ + } \ + run_bz_bnz(&input_array[i], \ + [](MacroAssembler& assm, Label& br_target) { \ + __ instruction(w1, &br_target); \ + }, \ + j != lanes); \ + } + TestCaseMsaBranch tz_b[] = {{0x0, 0x0}, + {0xbc0000, 0x0}, + {0x0, 0xab000000000000cd}, + {0x123456789abcdef0, 0xaaaaaaaaaaaaaaaa}}; + TEST_BZ_DF(tz_b, kMSALanesByte, bz_b, int8_t) + + TestCaseMsaBranch tz_h[] = {{0x0, 0x0}, + {0xbcde0000, 0x0}, + {0x0, 0xabcd00000000abcd}, + {0x123456789abcdef0, 0xaaaaaaaaaaaaaaaa}}; + TEST_BZ_DF(tz_h, kMSALanesHalf, bz_h, int16_t) + + TestCaseMsaBranch tz_w[] = {{0x0, 0x0}, + {0xbcde123400000000, 0x0}, + {0x0, 0x000000001234abcd}, + {0x123456789abcdef0, 0xaaaaaaaaaaaaaaaa}}; + TEST_BZ_DF(tz_w, kMSALanesWord, bz_w, int32_t) + + TestCaseMsaBranch tz_d[] = {{0x0, 0x0}, + {0xbcde0000, 0x0}, + {0x0, 0xabcd00000000abcd}, + {0x123456789abcdef0, 0xaaaaaaaaaaaaaaaa}}; + TEST_BZ_DF(tz_d, kMSALanesDword, bz_d, int64_t) +#undef TEST_BZ_DF + + TestCaseMsaBranch tnz_v[] = { + {0x0, 0x0}, {0xabc, 0x0}, {0x0, 0xabc}, {0xabc, 0xabc}}; + for (unsigned i = 0; i < arraysize(tnz_v); ++i) { + run_bz_bnz(&tnz_v[i], + [](MacroAssembler& assm, Label& br_target) { + __ bnz_v(w1, &br_target); + }, + tnz_v[i].wt_lo != 0 || tnz_v[i].wt_hi != 0); + } + +#define TEST_BNZ_DF(input_array, lanes, instruction, int_type) \ + for (unsigned i = 0; i < arraysize(input_array); ++i) { \ + int j; \ + int_type* element = reinterpret_cast(&input_array[i]); \ + for (j = 0; j < lanes; ++j) { \ + if (element[j] == 0) { \ + break; \ + } \ + } \ + run_bz_bnz(&input_array[i], \ + [](MacroAssembler& assm, Label& br_target) { \ + __ instruction(w1, &br_target); \ + }, \ + j == lanes); \ + } + TestCaseMsaBranch tnz_b[] = {{0x0, 0x0}, + {0xbc0000, 0x0}, + {0x0, 0xab000000000000cd}, + {0x123456789abcdef0, 0xaaaaaaaaaaaaaaaa}}; + TEST_BNZ_DF(tnz_b, 16, bnz_b, int8_t) + + TestCaseMsaBranch tnz_h[] = {{0x0, 0x0}, + {0xbcde0000, 0x0}, + {0x0, 0xabcd00000000abcd}, + {0x123456789abcdef0, 0xaaaaaaaaaaaaaaaa}}; + TEST_BNZ_DF(tnz_h, 8, bnz_h, int16_t) + + TestCaseMsaBranch tnz_w[] = {{0x0, 0x0}, + {0xbcde123400000000, 0x0}, + {0x0, 0x000000001234abcd}, + {0x123456789abcdef0, 0xaaaaaaaaaaaaaaaa}}; + TEST_BNZ_DF(tnz_w, 4, bnz_w, int32_t) + + TestCaseMsaBranch tnz_d[] = {{0x0, 0x0}, + {0xbcde0000, 0x0}, + {0x0, 0xabcd00000000abcd}, + {0x123456789abcdef0, 0xaaaaaaaaaaaaaaaa}}; + TEST_BNZ_DF(tnz_d, 2, bnz_d, int64_t) +#undef TEST_BNZ_DF +} uint64_t run_jialc(int16_t offset) { Isolate* isolate = CcTest::i_isolate(); @@ -6612,68 +6795,6 @@ TEST(Ext) { CHECK_EQ(run_Ext(0x0000000040000000, 31, 1), 0x0000000000000000); } -// Load elements in w0 MSA vector register -void load_uint64_elements_of_vector(MacroAssembler& assm, - const uint64_t elements[], MSARegister w, - Register t0, Register t1) { - __ li(t0, elements[0]); - __ li(t1, elements[1]); - __ insert_d(w, 0, t0); - __ insert_d(w, 1, t1); -} - -void load_uint32_elements_of_vector(MacroAssembler& assm, - const uint64_t elements[], MSARegister w, - Register t0, Register t1) { - const uint32_t* const element = reinterpret_cast(elements); - __ li(t0, element[0]); - __ li(t1, element[1]); - __ insert_w(w, 0, t0); - __ insert_w(w, 1, t1); - __ li(t0, element[2]); - __ li(t1, element[3]); - __ insert_w(w, 2, t0); - __ insert_w(w, 3, t1); -} - -void load_uint16_elements_of_vector(MacroAssembler& assm, - const uint64_t elements[], MSARegister w, - Register t0, Register t1) { - const uint16_t* const element = reinterpret_cast(elements); - __ li(t0, element[0]); - __ li(t1, element[1]); - __ insert_h(w, 0, t0); - __ insert_h(w, 1, t1); - __ li(t0, element[2]); - __ li(t1, element[3]); - __ insert_h(w, 2, t0); - __ insert_h(w, 3, t1); - __ li(t0, element[4]); - __ li(t1, element[5]); - __ insert_h(w, 4, t0); - __ insert_h(w, 5, t1); - __ li(t0, element[6]); - __ li(t1, element[7]); - __ insert_h(w, 6, t0); - __ insert_h(w, 7, t1); -} - -// Store vector elements from w2 to the memory pointed by a0 -void store_uint64_elements_of_vector(MacroAssembler& assm, MSARegister w, - Register a) { - __ st_d(w, MemOperand(a, 0)); -} - -void store_uint32_elements_of_vector(MacroAssembler& assm, MSARegister w, - Register a) { - __ st_w(w, MemOperand(a, 0)); -} - -void store_uint16_elements_of_vector(MacroAssembler& assm, MSARegister w, - Register a) { - __ st_h(w, MemOperand(a, 0)); -} - TEST(MSA_fill_copy) { CcTest::InitializeVM(); Isolate* isolate = CcTest::i_isolate(); @@ -6860,12 +6981,6 @@ TEST(MSA_fill_copy_3) { CHECK_EQ(0x5555555555555555, t[1].d0); } -typedef union { - uint8_t b[16]; - uint16_t h[8]; - uint32_t w[4]; - uint64_t d[2]; -} msa_reg_t; template void run_msa_insert(int64_t rs_value, int n, msa_reg_t* w) { @@ -6896,7 +7011,7 @@ void run_msa_insert(int64_t rs_value, int n, msa_reg_t* w) { UNREACHABLE(); } - store_uint64_elements_of_vector(assm, w0, a0); + store_elements_of_vector(assm, w0, a0); __ jr(ra); __ nop(); @@ -7017,6 +7132,152 @@ void run_msa_ctc_cfc(uint64_t value) { res); } +TEST(MSA_move_v) { + if ((kArchVariant != kMips64r6) || !CpuFeatures::IsSupported(MIPS_SIMD)) + return; + CcTest::InitializeVM(); + Isolate* isolate = CcTest::i_isolate(); + HandleScope scope(isolate); + + typedef struct { + uint64_t ws_lo; + uint64_t ws_hi; + uint64_t wd_lo; + uint64_t wd_hi; + } T; + T t[] = {{0x20b9cc4f1a83e0c5, 0xa27e1b5f2f5bb18a, 0x1e86678b52f8e1ff, + 0x706e51290ac76fb9}, + {0x4414aed7883ffd18, 0x047d183a06b67016, 0x4ef258cf8d822870, + 0x2686b73484c2e843}, + {0xd38ff9d048884ffc, 0x6dc63a57c0943ca7, 0x8520ca2f3e97c426, + 0xa9913868fb819c59}}; + + for (unsigned i = 0; i < arraysize(t); ++i) { + MacroAssembler assm(isolate, nullptr, 0, + v8::internal::CodeObjectRequired::kYes); + CpuFeatureScope fscope(&assm, MIPS_SIMD); + + load_elements_of_vector(assm, &t[i].ws_lo, w0, t0, t1); + load_elements_of_vector(assm, &t[i].wd_lo, w2, t0, t1); + __ move_v(w2, w0); + store_elements_of_vector(assm, w2, a0); + + __ jr(ra); + __ nop(); + + CodeDesc desc; + assm.GetCode(isolate, &desc); + Handle code = + isolate->factory()->NewCode(desc, Code::STUB, Handle()); +#ifdef OBJECT_PRINT + code->Print(std::cout); +#endif + F3 f = FUNCTION_CAST(code->entry()); + (CALL_GENERATED_CODE(isolate, f, &t[i].wd_lo, 0, 0, 0, 0)); + CHECK_EQ(t[i].ws_lo, t[i].wd_lo); + CHECK_EQ(t[i].ws_hi, t[i].wd_hi); + } +} + +template +void run_msa_sldi(OperFunc GenerateOperation, + ExpectFunc GenerateExpectedResult) { + Isolate* isolate = CcTest::i_isolate(); + HandleScope scope(isolate); + + typedef struct { + uint64_t ws_lo; + uint64_t ws_hi; + uint64_t wd_lo; + uint64_t wd_hi; + } T; + T t[] = {{0x20b9cc4f1a83e0c5, 0xa27e1b5f2f5bb18a, 0x1e86678b52f8e1ff, + 0x706e51290ac76fb9}, + {0x4414aed7883ffd18, 0x047d183a06b67016, 0x4ef258cf8d822870, + 0x2686b73484c2e843}, + {0xd38ff9d048884ffc, 0x6dc63a57c0943ca7, 0x8520ca2f3e97c426, + 0xa9913868fb819c59}}; + uint64_t res[2]; + + for (unsigned i = 0; i < arraysize(t); ++i) { + MacroAssembler assm(isolate, nullptr, 0, + v8::internal::CodeObjectRequired::kYes); + CpuFeatureScope fscope(&assm, MIPS_SIMD); + load_elements_of_vector(assm, &t[i].ws_lo, w0, t0, t1); + load_elements_of_vector(assm, &t[i].wd_lo, w2, t0, t1); + GenerateOperation(assm); + store_elements_of_vector(assm, w2, a0); + + __ jr(ra); + __ nop(); + + CodeDesc desc; + assm.GetCode(isolate, &desc); + Handle code = + isolate->factory()->NewCode(desc, Code::STUB, Handle()); +#ifdef OBJECT_PRINT + code->Print(std::cout); +#endif + F3 f = FUNCTION_CAST(code->entry()); + (CALL_GENERATED_CODE(isolate, f, &res[0], 0, 0, 0, 0)); + GenerateExpectedResult(reinterpret_cast(&t[i].ws_lo), + reinterpret_cast(&t[i].wd_lo)); + CHECK_EQ(res[0], t[i].wd_lo); + CHECK_EQ(res[1], t[i].wd_hi); + } +} + +TEST(MSA_sldi) { + if ((kArchVariant != kMips64r6) || !CpuFeatures::IsSupported(MIPS_SIMD)) + return; + CcTest::InitializeVM(); + +#define SLDI_DF(s, k) \ + uint8_t v[32]; \ + for (unsigned i = 0; i < s; i++) { \ + v[i] = ws[s * k + i]; \ + v[i + s] = wd[s * k + i]; \ + } \ + for (unsigned i = 0; i < s; i++) { \ + wd[s * k + i] = v[i + n]; \ + } + + for (int n = 0; n < 16; ++n) { + run_msa_sldi([n](MacroAssembler& assm) { __ sldi_b(w2, w0, n); }, + [n](uint8_t* ws, uint8_t* wd) { + SLDI_DF(kMSARegSize / sizeof(int8_t) / kBitsPerByte, 0) + }); + } + + for (int n = 0; n < 8; ++n) { + run_msa_sldi([n](MacroAssembler& assm) { __ sldi_h(w2, w0, n); }, + [n](uint8_t* ws, uint8_t* wd) { + for (int k = 0; k < 2; ++k) { + SLDI_DF(kMSARegSize / sizeof(int16_t) / kBitsPerByte, k) + } + }); + } + + for (int n = 0; n < 4; ++n) { + run_msa_sldi([n](MacroAssembler& assm) { __ sldi_w(w2, w0, n); }, + [n](uint8_t* ws, uint8_t* wd) { + for (int k = 0; k < 4; ++k) { + SLDI_DF(kMSARegSize / sizeof(int32_t) / kBitsPerByte, k) + } + }); + } + + for (int n = 0; n < 2; ++n) { + run_msa_sldi([n](MacroAssembler& assm) { __ sldi_d(w2, w0, n); }, + [n](uint8_t* ws, uint8_t* wd) { + for (int k = 0; k < 8; ++k) { + SLDI_DF(kMSARegSize / sizeof(int64_t) / kBitsPerByte, k) + } + }); + } +#undef SLDI_DF +} + TEST(MSA_cfc_ctc) { if ((kArchVariant != kMips64r6) || !CpuFeatures::IsSupported(MIPS_SIMD)) return; @@ -7104,7 +7365,7 @@ void run_msa_i8(SecondaryField opcode, uint64_t ws_lo, uint64_t ws_hi, UNREACHABLE(); } - store_uint64_elements_of_vector(assm, w2, a0); + store_elements_of_vector(assm, w2, a0); __ jr(ra); __ nop(); @@ -7297,11 +7558,11 @@ void run_msa_i5(struct TestCaseMsaI5* input, bool i5_sign_ext, int32_t i5 = i5_sign_ext ? static_cast(input->i5 << 27) >> 27 : input->i5; - load_uint64_elements_of_vector(assm, &(input->ws_lo), w0, t0, t1); + load_elements_of_vector(assm, &(input->ws_lo), w0, t0, t1); GenerateI5InstructionFunc(assm, i5); - store_uint64_elements_of_vector(assm, w2, a0); + store_elements_of_vector(assm, w2, a0); __ jr(ra); __ nop(); @@ -7708,11 +7969,9 @@ struct TestCaseMsa2R { uint64_t exp_res_hi; }; -template +template void run_msa_2r(const struct TestCaseMsa2R* input, - Func Generate2RInstructionFunc, - FuncLoad load_elements_of_vector, - FuncStore store_elements_of_vector) { + Func Generate2RInstructionFunc) { Isolate* isolate = CcTest::i_isolate(); HandleScope scope(isolate); @@ -7740,17 +7999,8 @@ void run_msa_2r(const struct TestCaseMsa2R* input, (CALL_GENERATED_CODE(isolate, f, &res, 0, 0, 0, 0)); - if (store_elements_of_vector == store_uint64_elements_of_vector) { - CHECK_EQ(input->exp_res_lo, res.d[0]); - CHECK_EQ(input->exp_res_hi, res.d[1]); - } else if (store_elements_of_vector == store_uint32_elements_of_vector) { - const uint32_t* exp_res = - reinterpret_cast(&input->exp_res_lo); - CHECK_EQ(exp_res[0], res.w[0]); - CHECK_EQ(exp_res[1], res.w[1]); - CHECK_EQ(exp_res[2], res.w[2]); - CHECK_EQ(exp_res[3], res.w[3]); - } + CHECK_EQ(input->exp_res_lo, res.d[0]); + CHECK_EQ(input->exp_res_hi, res.d[1]); } TEST(MSA_pcnt) { @@ -7801,14 +8051,10 @@ TEST(MSA_pcnt) { {0xf35862e13e38f8b0, 0x4f41ffdef2bfe636, 0x20, 0x2a}}; for (size_t i = 0; i < sizeof(tc_b) / sizeof(TestCaseMsa2R); ++i) { - run_msa_2r(&tc_b[i], [](MacroAssembler& assm) { __ pcnt_b(w2, w0); }, - load_uint64_elements_of_vector, store_uint64_elements_of_vector); - run_msa_2r(&tc_h[i], [](MacroAssembler& assm) { __ pcnt_h(w2, w0); }, - load_uint64_elements_of_vector, store_uint64_elements_of_vector); - run_msa_2r(&tc_w[i], [](MacroAssembler& assm) { __ pcnt_w(w2, w0); }, - load_uint64_elements_of_vector, store_uint64_elements_of_vector); - run_msa_2r(&tc_d[i], [](MacroAssembler& assm) { __ pcnt_d(w2, w0); }, - load_uint64_elements_of_vector, store_uint64_elements_of_vector); + run_msa_2r(&tc_b[i], [](MacroAssembler& assm) { __ pcnt_b(w2, w0); }); + run_msa_2r(&tc_h[i], [](MacroAssembler& assm) { __ pcnt_h(w2, w0); }); + run_msa_2r(&tc_w[i], [](MacroAssembler& assm) { __ pcnt_w(w2, w0); }); + run_msa_2r(&tc_d[i], [](MacroAssembler& assm) { __ pcnt_d(w2, w0); }); } } @@ -7860,14 +8106,10 @@ TEST(MSA_nlzc) { {0x00000000e338f8b0, 0x0754534acab32654, 0x20, 0x5}}; for (size_t i = 0; i < sizeof(tc_b) / sizeof(TestCaseMsa2R); ++i) { - run_msa_2r(&tc_b[i], [](MacroAssembler& assm) { __ nlzc_b(w2, w0); }, - load_uint64_elements_of_vector, store_uint64_elements_of_vector); - run_msa_2r(&tc_h[i], [](MacroAssembler& assm) { __ nlzc_h(w2, w0); }, - load_uint64_elements_of_vector, store_uint64_elements_of_vector); - run_msa_2r(&tc_w[i], [](MacroAssembler& assm) { __ nlzc_w(w2, w0); }, - load_uint64_elements_of_vector, store_uint64_elements_of_vector); - run_msa_2r(&tc_d[i], [](MacroAssembler& assm) { __ nlzc_d(w2, w0); }, - load_uint64_elements_of_vector, store_uint64_elements_of_vector); + run_msa_2r(&tc_b[i], [](MacroAssembler& assm) { __ nlzc_b(w2, w0); }); + run_msa_2r(&tc_h[i], [](MacroAssembler& assm) { __ nlzc_h(w2, w0); }); + run_msa_2r(&tc_w[i], [](MacroAssembler& assm) { __ nlzc_w(w2, w0); }); + run_msa_2r(&tc_d[i], [](MacroAssembler& assm) { __ nlzc_d(w2, w0); }); } } @@ -7919,14 +8161,10 @@ TEST(MSA_nloc) { {0xFFFFFFFF1CC7074F, 0xF8ABACB5354CD9AB, 0x20, 0x5}}; for (size_t i = 0; i < sizeof(tc_b) / sizeof(TestCaseMsa2R); ++i) { - run_msa_2r(&tc_b[i], [](MacroAssembler& assm) { __ nloc_b(w2, w0); }, - load_uint64_elements_of_vector, store_uint64_elements_of_vector); - run_msa_2r(&tc_h[i], [](MacroAssembler& assm) { __ nloc_h(w2, w0); }, - load_uint64_elements_of_vector, store_uint64_elements_of_vector); - run_msa_2r(&tc_w[i], [](MacroAssembler& assm) { __ nloc_w(w2, w0); }, - load_uint64_elements_of_vector, store_uint64_elements_of_vector); - run_msa_2r(&tc_d[i], [](MacroAssembler& assm) { __ nloc_d(w2, w0); }, - load_uint64_elements_of_vector, store_uint64_elements_of_vector); + run_msa_2r(&tc_b[i], [](MacroAssembler& assm) { __ nloc_b(w2, w0); }); + run_msa_2r(&tc_h[i], [](MacroAssembler& assm) { __ nloc_h(w2, w0); }); + run_msa_2r(&tc_w[i], [](MacroAssembler& assm) { __ nloc_w(w2, w0); }); + run_msa_2r(&tc_d[i], [](MacroAssembler& assm) { __ nloc_d(w2, w0); }); } } @@ -7987,13 +8225,11 @@ TEST(MSA_fclass) { for (size_t i = 0; i < sizeof(tc_s) / sizeof(TestCaseMsa2RF_F_U); ++i) { run_msa_2r(reinterpret_cast(&tc_s[i]), - [](MacroAssembler& assm) { __ fclass_w(w2, w0); }, - load_uint32_elements_of_vector, store_uint32_elements_of_vector); + [](MacroAssembler& assm) { __ fclass_w(w2, w0); }); } for (size_t i = 0; i < sizeof(tc_d) / sizeof(TestCaseMsa2RF_D_U); ++i) { run_msa_2r(reinterpret_cast(&tc_d[i]), - [](MacroAssembler& assm) { __ fclass_d(w2, w0); }, - load_uint64_elements_of_vector, store_uint64_elements_of_vector); + [](MacroAssembler& assm) { __ fclass_d(w2, w0); }); } #undef BIT @@ -8059,13 +8295,11 @@ TEST(MSA_ftrunc_s) { for (size_t i = 0; i < sizeof(tc_s) / sizeof(TestCaseMsa2RF_F_I); ++i) { run_msa_2r(reinterpret_cast(&tc_s[i]), - [](MacroAssembler& assm) { __ ftrunc_s_w(w2, w0); }, - load_uint32_elements_of_vector, store_uint32_elements_of_vector); + [](MacroAssembler& assm) { __ ftrunc_s_w(w2, w0); }); } for (size_t i = 0; i < sizeof(tc_d) / sizeof(TestCaseMsa2RF_D_I); ++i) { run_msa_2r(reinterpret_cast(&tc_d[i]), - [](MacroAssembler& assm) { __ ftrunc_s_d(w2, w0); }, - load_uint64_elements_of_vector, store_uint64_elements_of_vector); + [](MacroAssembler& assm) { __ ftrunc_s_d(w2, w0); }); } } @@ -8098,13 +8332,11 @@ TEST(MSA_ftrunc_u) { for (size_t i = 0; i < sizeof(tc_s) / sizeof(TestCaseMsa2RF_F_U); ++i) { run_msa_2r(reinterpret_cast(&tc_s[i]), - [](MacroAssembler& assm) { __ ftrunc_u_w(w2, w0); }, - load_uint32_elements_of_vector, store_uint32_elements_of_vector); + [](MacroAssembler& assm) { __ ftrunc_u_w(w2, w0); }); } for (size_t i = 0; i < sizeof(tc_d) / sizeof(TestCaseMsa2RF_D_U); ++i) { run_msa_2r(reinterpret_cast(&tc_d[i]), - [](MacroAssembler& assm) { __ ftrunc_u_d(w2, w0); }, - load_uint64_elements_of_vector, store_uint64_elements_of_vector); + [](MacroAssembler& assm) { __ ftrunc_u_d(w2, w0); }); } } @@ -8143,13 +8375,11 @@ TEST(MSA_fsqrt) { for (size_t i = 0; i < sizeof(tc_s) / sizeof(TestCaseMsa2RF_F_F); ++i) { run_msa_2r(reinterpret_cast(&tc_s[i]), - [](MacroAssembler& assm) { __ fsqrt_w(w2, w0); }, - load_uint32_elements_of_vector, store_uint32_elements_of_vector); + [](MacroAssembler& assm) { __ fsqrt_w(w2, w0); }); } for (size_t i = 0; i < sizeof(tc_d) / sizeof(TestCaseMsa2RF_D_D); ++i) { run_msa_2r(reinterpret_cast(&tc_d[i]), - [](MacroAssembler& assm) { __ fsqrt_d(w2, w0); }, - load_uint64_elements_of_vector, store_uint64_elements_of_vector); + [](MacroAssembler& assm) { __ fsqrt_d(w2, w0); }); } } @@ -8173,13 +8403,11 @@ TEST(MSA_frsqrt) { for (size_t i = 0; i < sizeof(tc_s) / sizeof(TestCaseMsa2RF_F_F); ++i) { run_msa_2r(reinterpret_cast(&tc_s[i]), - [](MacroAssembler& assm) { __ frsqrt_w(w2, w0); }, - load_uint32_elements_of_vector, store_uint32_elements_of_vector); + [](MacroAssembler& assm) { __ frsqrt_w(w2, w0); }); } for (size_t i = 0; i < sizeof(tc_d) / sizeof(TestCaseMsa2RF_D_D); ++i) { run_msa_2r(reinterpret_cast(&tc_d[i]), - [](MacroAssembler& assm) { __ frsqrt_d(w2, w0); }, - load_uint64_elements_of_vector, store_uint64_elements_of_vector); + [](MacroAssembler& assm) { __ frsqrt_d(w2, w0); }); } } @@ -8205,13 +8433,11 @@ TEST(MSA_frcp) { for (size_t i = 0; i < sizeof(tc_s) / sizeof(TestCaseMsa2RF_F_F); ++i) { run_msa_2r(reinterpret_cast(&tc_s[i]), - [](MacroAssembler& assm) { __ frcp_w(w2, w0); }, - load_uint32_elements_of_vector, store_uint32_elements_of_vector); + [](MacroAssembler& assm) { __ frcp_w(w2, w0); }); } for (size_t i = 0; i < sizeof(tc_d) / sizeof(TestCaseMsa2RF_D_D); ++i) { run_msa_2r(reinterpret_cast(&tc_d[i]), - [](MacroAssembler& assm) { __ frcp_d(w2, w0); }, - load_uint64_elements_of_vector, store_uint64_elements_of_vector); + [](MacroAssembler& assm) { __ frcp_d(w2, w0); }); } } @@ -8226,8 +8452,7 @@ void test_frint_s(size_t data_size, TestCaseMsa2RF_F_F tc_d[], __ ctcmsa(msareg, t0); __ frint_w(w2, w0); __ ctcmsa(msareg, t1); - }, - load_uint32_elements_of_vector, store_uint32_elements_of_vector); + }); } } @@ -8242,8 +8467,7 @@ void test_frint_d(size_t data_size, TestCaseMsa2RF_D_D tc_d[], __ ctcmsa(msareg, t0); __ frint_d(w2, w0); __ ctcmsa(msareg, t1); - }, - load_uint64_elements_of_vector, store_uint64_elements_of_vector); + }); } } @@ -8325,14 +8549,12 @@ TEST(MSA_flog2) { for (size_t i = 0; i < sizeof(tc_s) / sizeof(TestCaseMsa2RF_F_F); ++i) { run_msa_2r(reinterpret_cast(&tc_s[i]), - [](MacroAssembler& assm) { __ flog2_w(w2, w0); }, - load_uint32_elements_of_vector, store_uint32_elements_of_vector); + [](MacroAssembler& assm) { __ flog2_w(w2, w0); }); } for (size_t i = 0; i < sizeof(tc_d) / sizeof(TestCaseMsa2RF_D_D); ++i) { run_msa_2r(reinterpret_cast(&tc_d[i]), - [](MacroAssembler& assm) { __ flog2_d(w2, w0); }, - load_uint64_elements_of_vector, store_uint64_elements_of_vector); + [](MacroAssembler& assm) { __ flog2_d(w2, w0); }); } } @@ -8347,8 +8569,7 @@ void test_ftint_s_s(size_t data_size, TestCaseMsa2RF_F_I tc_d[], __ ctcmsa(msareg, t0); __ ftint_s_w(w2, w0); __ ctcmsa(msareg, t1); - }, - load_uint32_elements_of_vector, store_uint32_elements_of_vector); + }); } } @@ -8363,8 +8584,7 @@ void test_ftint_s_d(size_t data_size, TestCaseMsa2RF_D_I tc_d[], __ ctcmsa(msareg, t0); __ ftint_s_d(w2, w0); __ ctcmsa(msareg, t1); - }, - load_uint64_elements_of_vector, store_uint64_elements_of_vector); + }); } } @@ -8461,8 +8681,7 @@ void test_ftint_u_s(size_t data_size, TestCaseMsa2RF_F_U tc_d[], __ ctcmsa(msareg, t0); __ ftint_u_w(w2, w0); __ ctcmsa(msareg, t1); - }, - load_uint32_elements_of_vector, store_uint32_elements_of_vector); + }); } } @@ -8477,8 +8696,7 @@ void test_ftint_u_d(size_t data_size, TestCaseMsa2RF_D_U tc_d[], __ ctcmsa(msareg, t0); __ ftint_u_d(w2, w0); __ ctcmsa(msareg, t1); - }, - load_uint64_elements_of_vector, store_uint64_elements_of_vector); + }); } } @@ -8594,13 +8812,11 @@ TEST(MSA_ffint_u) { for (size_t i = 0; i < sizeof(tc_s) / sizeof(TestCaseMsa2RF_U_F); ++i) { run_msa_2r(reinterpret_cast(&tc_s[i]), - [](MacroAssembler& assm) { __ ffint_u_w(w2, w0); }, - load_uint32_elements_of_vector, store_uint32_elements_of_vector); + [](MacroAssembler& assm) { __ ffint_u_w(w2, w0); }); } for (size_t i = 0; i < sizeof(tc_d) / sizeof(TestCaseMsa2RF_U_D); ++i) { run_msa_2r(reinterpret_cast(&tc_d[i]), - [](MacroAssembler& assm) { __ ffint_u_d(w2, w0); }, - load_uint64_elements_of_vector, store_uint64_elements_of_vector); + [](MacroAssembler& assm) { __ ffint_u_d(w2, w0); }); } } @@ -8636,13 +8852,11 @@ TEST(MSA_ffint_s) { for (size_t i = 0; i < sizeof(tc_s) / sizeof(TestCaseMsa2RF_I_F); ++i) { run_msa_2r(reinterpret_cast(&tc_s[i]), - [](MacroAssembler& assm) { __ ffint_s_w(w2, w0); }, - load_uint32_elements_of_vector, store_uint32_elements_of_vector); + [](MacroAssembler& assm) { __ ffint_s_w(w2, w0); }); } for (size_t i = 0; i < sizeof(tc_d) / sizeof(TestCaseMsa2RF_I_D); ++i) { run_msa_2r(reinterpret_cast(&tc_d[i]), - [](MacroAssembler& assm) { __ ffint_s_d(w2, w0); }, - load_uint64_elements_of_vector, store_uint64_elements_of_vector); + [](MacroAssembler& assm) { __ ffint_s_d(w2, w0); }); } } @@ -8695,13 +8909,11 @@ TEST(MSA_fexupl) { for (size_t i = 0; i < sizeof(tc_s) / sizeof(TestCaseMsa2RF_U16_F); ++i) { run_msa_2r(reinterpret_cast(&tc_s[i]), - [](MacroAssembler& assm) { __ fexupl_w(w2, w0); }, - load_uint16_elements_of_vector, store_uint32_elements_of_vector); + [](MacroAssembler& assm) { __ fexupl_w(w2, w0); }); } for (size_t i = 0; i < sizeof(tc_d) / sizeof(TestCaseMsa2RF_F_D); ++i) { run_msa_2r(reinterpret_cast(&tc_d[i]), - [](MacroAssembler& assm) { __ fexupl_d(w2, w0); }, - load_uint32_elements_of_vector, store_uint64_elements_of_vector); + [](MacroAssembler& assm) { __ fexupl_d(w2, w0); }); } } @@ -8730,13 +8942,11 @@ TEST(MSA_fexupr) { for (size_t i = 0; i < sizeof(tc_s) / sizeof(TestCaseMsa2RF_U16_F); ++i) { run_msa_2r(reinterpret_cast(&tc_s[i]), - [](MacroAssembler& assm) { __ fexupr_w(w2, w0); }, - load_uint16_elements_of_vector, store_uint32_elements_of_vector); + [](MacroAssembler& assm) { __ fexupr_w(w2, w0); }); } for (size_t i = 0; i < sizeof(tc_d) / sizeof(TestCaseMsa2RF_F_D); ++i) { run_msa_2r(reinterpret_cast(&tc_d[i]), - [](MacroAssembler& assm) { __ fexupr_d(w2, w0); }, - load_uint32_elements_of_vector, store_uint64_elements_of_vector); + [](MacroAssembler& assm) { __ fexupr_d(w2, w0); }); } } @@ -8765,13 +8975,11 @@ TEST(MSA_ffql) { for (size_t i = 0; i < sizeof(tc_s) / sizeof(TestCaseMsa2RF_U16_F); ++i) { run_msa_2r(reinterpret_cast(&tc_s[i]), - [](MacroAssembler& assm) { __ ffql_w(w2, w0); }, - load_uint16_elements_of_vector, store_uint32_elements_of_vector); + [](MacroAssembler& assm) { __ ffql_w(w2, w0); }); } for (size_t i = 0; i < sizeof(tc_d) / sizeof(TestCaseMsa2RF_U32_D); ++i) { run_msa_2r(reinterpret_cast(&tc_d[i]), - [](MacroAssembler& assm) { __ ffql_d(w2, w0); }, - load_uint32_elements_of_vector, store_uint64_elements_of_vector); + [](MacroAssembler& assm) { __ ffql_d(w2, w0); }); } } @@ -8791,13 +8999,11 @@ TEST(MSA_ffqr) { for (size_t i = 0; i < sizeof(tc_s) / sizeof(TestCaseMsa2RF_U16_F); ++i) { run_msa_2r(reinterpret_cast(&tc_s[i]), - [](MacroAssembler& assm) { __ ffqr_w(w2, w0); }, - load_uint16_elements_of_vector, store_uint32_elements_of_vector); + [](MacroAssembler& assm) { __ ffqr_w(w2, w0); }); } for (size_t i = 0; i < sizeof(tc_d) / sizeof(TestCaseMsa2RF_U32_D); ++i) { run_msa_2r(reinterpret_cast(&tc_d[i]), - [](MacroAssembler& assm) { __ ffqr_d(w2, w0); }, - load_uint32_elements_of_vector, store_uint64_elements_of_vector); + [](MacroAssembler& assm) { __ ffqr_d(w2, w0); }); } } @@ -8822,13 +9028,13 @@ void run_msa_vector(struct TestCaseMsaVector* input, CpuFeatureScope fscope(&assm, MIPS_SIMD); msa_reg_t res; - load_uint64_elements_of_vector(assm, &(input->ws_lo), w0, t0, t1); - load_uint64_elements_of_vector(assm, &(input->wt_lo), w2, t0, t1); - load_uint64_elements_of_vector(assm, &(input->wd_lo), w4, t0, t1); + load_elements_of_vector(assm, &(input->ws_lo), w0, t0, t1); + load_elements_of_vector(assm, &(input->wt_lo), w2, t0, t1); + load_elements_of_vector(assm, &(input->wd_lo), w4, t0, t1); GenerateVectorInstructionFunc(assm); - store_uint64_elements_of_vector(assm, w4, a0); + store_elements_of_vector(assm, w4, a0); __ jr(ra); __ nop(); @@ -8912,12 +9118,12 @@ void run_msa_bit(struct TestCaseMsaBit* input, InstFunc GenerateInstructionFunc, CpuFeatureScope fscope(&assm, MIPS_SIMD); msa_reg_t res; - load_uint64_elements_of_vector(assm, &(input->ws_lo), w0, t0, t1); - load_uint64_elements_of_vector(assm, &(input->wd_lo), w2, t0, t1); + load_elements_of_vector(assm, &(input->ws_lo), w0, t0, t1); + load_elements_of_vector(assm, &(input->wd_lo), w2, t0, t1); GenerateInstructionFunc(assm, input->m); - store_uint64_elements_of_vector(assm, w2, a0); + store_elements_of_vector(assm, w2, a0); __ jr(ra); __ nop(); @@ -9391,7 +9597,7 @@ void run_msa_i10(int32_t input, InstFunc GenerateVectorInstructionFunc, GenerateVectorInstructionFunc(assm, input); - store_uint64_elements_of_vector(assm, w0, a0); + store_elements_of_vector(assm, w0, a0); __ jr(ra); __ nop(); @@ -9520,7 +9726,6 @@ TEST(MSA_load_store_vector) { __ st_d(w0, MemOperand(a1, i)); } }); -#undef LDI_DF } struct TestCaseMsa3R { @@ -9544,15 +9749,14 @@ void run_msa_3r(struct TestCaseMsa3R* input, InstFunc GenerateI5InstructionFunc, v8::internal::CodeObjectRequired::kYes); CpuFeatureScope fscope(&assm, MIPS_SIMD); msa_reg_t res; - uint64_t expected; - load_uint64_elements_of_vector(assm, &(input->wt_lo), w0, t0, t1); - load_uint64_elements_of_vector(assm, &(input->ws_lo), w1, t0, t1); - load_uint64_elements_of_vector(assm, &(input->wd_lo), w2, t0, t1); + load_elements_of_vector(assm, &(input->wt_lo), w0, t0, t1); + load_elements_of_vector(assm, &(input->ws_lo), w1, t0, t1); + load_elements_of_vector(assm, &(input->wd_lo), w2, t0, t1); GenerateI5InstructionFunc(assm); - store_uint64_elements_of_vector(assm, w2, a0); + store_elements_of_vector(assm, w2, a0); __ jr(ra); __ nop(); @@ -9568,14 +9772,12 @@ void run_msa_3r(struct TestCaseMsa3R* input, InstFunc GenerateI5InstructionFunc, (CALL_GENERATED_CODE(isolate, f, &res, 0, 0, 0, 0)); - expected = GenerateOperationFunc(input->ws_lo, input->wt_lo, input->wd_lo); - if (expected != Unpredictable) { - CHECK_EQ(expected, res.d[0]); + GenerateOperationFunc(&input->ws_lo, &input->wt_lo, &input->wd_lo); + if (input->wd_lo != Unpredictable) { + CHECK_EQ(input->wd_lo, res.d[0]); } - - expected = GenerateOperationFunc(input->ws_hi, input->wt_hi, input->wd_hi); - if (expected != Unpredictable) { - CHECK_EQ(expected, res.d[1]); + if (input->wd_hi != Unpredictable) { + CHECK_EQ(input->wd_hi, res.d[1]); } } @@ -9612,479 +9814,630 @@ TEST(MSA_3R_instructions) { {0xffff00000000ffff, 0xffff00000000ffff, 0xffff00000000ffff, 0xffff00000000ffff, 0xffff00000000ffff, 0xffff00000000ffff}}; -#define SLL_DF(T, lanes, mask) \ - uint64_t res = 0; \ - int size_in_bits = kMSARegSize / lanes; \ - for (int i = 0; i < lanes / 2; ++i) { \ - uint64_t shift = size_in_bits * i; \ - T src_op = static_cast((ws >> shift) & mask); \ - T shift_op = static_cast((wt >> shift) & mask) % size_in_bits; \ - res |= (static_cast(src_op << shift_op) & mask) << shift; \ - } \ - return res +#define SLL_DF(T, lanes, mask) \ + int size_in_bits = kMSARegSize / lanes; \ + for (int i = 0; i < 2; i++) { \ + uint64_t res = 0; \ + for (int j = 0; j < lanes / 2; ++j) { \ + uint64_t shift = size_in_bits * j; \ + T src_op = static_cast((ws[i] >> shift) & mask); \ + T shift_op = static_cast((wt[i] >> shift) & mask) % size_in_bits; \ + res |= (static_cast(src_op << shift_op) & mask) << shift; \ + } \ + wd[i] = res; \ + } -#define SRA_DF(T, lanes, mask) \ - uint64_t res = 0; \ - int size_in_bits = kMSARegSize / lanes; \ - for (int i = 0; i < lanes / 2; ++i) { \ - uint64_t shift = size_in_bits * i; \ - T src_op = static_cast((ws >> shift) & mask); \ - int shift_op = ((wt >> shift) & mask) % size_in_bits; \ - res |= \ - (static_cast(ArithmeticShiftRight(src_op, shift_op) & mask)) \ - << shift; \ - } \ - return res - -#define SRL_DF(T, lanes, mask) \ - uint64_t res = 0; \ - int size_in_bits = kMSARegSize / lanes; \ - for (int i = 0; i < lanes / 2; ++i) { \ - uint64_t shift = size_in_bits * i; \ - T src_op = static_cast((ws >> shift) & mask); \ - T shift_op = static_cast(((wt >> shift) & mask) % size_in_bits); \ - res |= (static_cast(src_op >> shift_op) & mask) << shift; \ - } \ - return res - -#define BCRL_DF(T, lanes, mask) \ - uint64_t res = 0; \ - int size_in_bits = kMSARegSize / lanes; \ - for (int i = 0; i < lanes / 2; ++i) { \ - uint64_t shift = size_in_bits * i; \ - T src_op = static_cast((ws >> shift) & mask); \ - T shift_op = static_cast(((wt >> shift) & mask) % size_in_bits); \ - T r = (static_cast(~(1ull << shift_op)) & src_op) & mask; \ - res |= static_cast(r) << shift; \ - } \ - return res - -#define BSET_DF(T, lanes, mask) \ - uint64_t res = 0; \ - int size_in_bits = kMSARegSize / lanes; \ - for (int i = 0; i < lanes / 2; ++i) { \ - uint64_t shift = size_in_bits * i; \ - T src_op = static_cast((ws >> shift) & mask); \ - T shift_op = static_cast(((wt >> shift) & mask) % size_in_bits); \ - T r = (static_cast(1ull << shift_op) | src_op) & mask; \ - res |= static_cast(r) << shift; \ - } \ - return res - -#define BNEG_DF(T, lanes, mask) \ - uint64_t res = 0; \ - int size_in_bits = kMSARegSize / lanes; \ - for (int i = 0; i < lanes / 2; ++i) { \ - uint64_t shift = size_in_bits * i; \ - T src_op = static_cast((ws >> shift) & mask); \ - T shift_op = static_cast(((wt >> shift) & mask) % size_in_bits); \ - T r = (static_cast(1ull << shift_op) ^ src_op) & mask; \ - res |= static_cast(r) << shift; \ - } \ - return res - -#define BINSL_DF(T, lanes, mask) \ - uint64_t res = 0; \ - int size_in_bits = kMSARegSize / lanes; \ - for (int i = 0; i < lanes / 2; ++i) { \ - uint64_t shift = size_in_bits * i; \ - T ws_op = static_cast((ws >> shift) & mask); \ - T wd_op = static_cast((wd >> shift) & mask); \ - int shift_op = static_cast(((wt >> shift) & mask) % size_in_bits); \ - int bits = shift_op + 1; \ - T r; \ - if (bits == size_in_bits) { \ - r = static_cast(ws_op); \ - } else { \ - uint64_t mask2 = ((1ull << bits) - 1) << (size_in_bits - bits); \ - r = static_cast((static_cast(mask2) & ws_op) | \ - (static_cast(~mask2) & wd_op)); \ - } \ - res |= static_cast(r) << shift; \ - } \ - return res - -#define BINSR_DF(T, lanes, mask) \ - uint64_t res = 0; \ - int size_in_bits = kMSARegSize / lanes; \ - for (int i = 0; i < lanes / 2; ++i) { \ - uint64_t shift = size_in_bits * i; \ - T ws_op = static_cast((ws >> shift) & mask); \ - T wd_op = static_cast((wd >> shift) & mask); \ - int shift_op = static_cast(((wt >> shift) & mask) % size_in_bits); \ - int bits = shift_op + 1; \ - T r; \ - if (bits == size_in_bits) { \ - r = static_cast(ws_op); \ - } else { \ - uint64_t mask2 = (1ull << bits) - 1; \ - r = static_cast((static_cast(mask2) & ws_op) | \ - (static_cast(~mask2) & wd_op)); \ - } \ - res |= static_cast(r) << shift; \ - } \ - return res - -#define ADDV_DF(T, lanes, mask) \ - uint64_t res = 0; \ - int size_in_bits = kMSARegSize / lanes; \ - for (int i = 0; i < lanes / 2; ++i) { \ - uint64_t shift = size_in_bits * i; \ - T ws_op = static_cast((ws >> shift) & mask); \ - T wt_op = static_cast((wt >> shift) & mask); \ - res |= (static_cast(ws_op + wt_op) & mask) << shift; \ - } \ - return res - -#define SUBV_DF(T, lanes, mask) \ - uint64_t res = 0; \ - int size_in_bits = kMSARegSize / lanes; \ - for (int i = 0; i < lanes / 2; ++i) { \ - uint64_t shift = size_in_bits * i; \ - T ws_op = static_cast((ws >> shift) & mask); \ - T wt_op = static_cast((wt >> shift) & mask); \ - res |= (static_cast(ws_op - wt_op) & mask) << shift; \ - } \ - return res - -#define MAX_DF(T, lanes, mask) \ - uint64_t res = 0; \ - int size_in_bits = kMSARegSize / lanes; \ - for (int i = 0; i < lanes / 2; ++i) { \ - uint64_t shift = size_in_bits * i; \ - T ws_op = static_cast((ws >> shift) & mask); \ - T wt_op = static_cast((wt >> shift) & mask); \ - res |= (static_cast(Max(ws_op, wt_op)) & mask) << shift; \ - } \ - return res - -#define MIN_DF(T, lanes, mask) \ - uint64_t res = 0; \ - int size_in_bits = kMSARegSize / lanes; \ - for (int i = 0; i < lanes / 2; ++i) { \ - uint64_t shift = size_in_bits * i; \ - T ws_op = static_cast((ws >> shift) & mask); \ - T wt_op = static_cast((wt >> shift) & mask); \ - res |= (static_cast(Min(ws_op, wt_op)) & mask) << shift; \ - } \ - return res - -#define MAXA_DF(T, lanes, mask) \ - uint64_t res = 0; \ - int size_in_bits = kMSARegSize / lanes; \ - for (int i = 0; i < lanes / 2; ++i) { \ - uint64_t shift = size_in_bits * i; \ - T ws_op = static_cast((ws >> shift) & mask); \ - T wt_op = static_cast((wt >> shift) & mask); \ - res |= (static_cast(Nabs(ws_op) < Nabs(wt_op) ? ws_op : wt_op) & \ - mask) \ - << shift; \ - } \ - return res - -#define MINA_DF(T, lanes, mask) \ - uint64_t res = 0; \ - int size_in_bits = kMSARegSize / lanes; \ - for (int i = 0; i < lanes / 2; ++i) { \ - uint64_t shift = size_in_bits * i; \ - T ws_op = static_cast((ws >> shift) & mask); \ - T wt_op = static_cast((wt >> shift) & mask); \ - res |= (static_cast(Nabs(ws_op) > Nabs(wt_op) ? ws_op : wt_op) & \ - mask) \ - << shift; \ - } \ - return res - -#define CEQ_DF(T, lanes, mask) \ - uint64_t res = 0; \ - int size_in_bits = kMSARegSize / lanes; \ - for (int i = 0; i < lanes / 2; ++i) { \ - uint64_t shift = size_in_bits * i; \ - T ws_op = static_cast((ws >> shift) & mask); \ - T wt_op = static_cast((wt >> shift) & mask); \ - res |= \ - (static_cast(!Compare(ws_op, wt_op) ? -1ull : 0ull) & mask) \ - << shift; \ - } \ - return res - -#define CLT_DF(T, lanes, mask) \ - uint64_t res = 0; \ - int size_in_bits = kMSARegSize / lanes; \ - for (int i = 0; i < lanes / 2; ++i) { \ - uint64_t shift = size_in_bits * i; \ - T ws_op = static_cast((ws >> shift) & mask); \ - T wt_op = static_cast((wt >> shift) & mask); \ - res |= \ - (static_cast((Compare(ws_op, wt_op) == -1) ? -1ull : 0ull) & \ - mask) \ - << shift; \ - } \ - return res - -#define CLE_DF(T, lanes, mask) \ - uint64_t res = 0; \ - int size_in_bits = kMSARegSize / lanes; \ - for (int i = 0; i < lanes / 2; ++i) { \ - uint64_t shift = size_in_bits * i; \ - T ws_op = static_cast((ws >> shift) & mask); \ - T wt_op = static_cast((wt >> shift) & mask); \ - res |= \ - (static_cast((Compare(ws_op, wt_op) != 1) ? -1ull : 0ull) & \ - mask) \ - << shift; \ - } \ - return res - -#define ADD_A_DF(T, lanes, mask) \ - uint64_t res = 0; \ +#define SRA_DF(T, lanes, mask) \ int size_in_bits = kMSARegSize / lanes; \ - for (int i = 0; i < lanes / 2; ++i) { \ - uint64_t shift = size_in_bits * i; \ - T ws_op = static_cast((ws >> shift) & mask); \ - T wt_op = static_cast((wt >> shift) & mask); \ - res |= (static_cast(Abs(ws_op) + Abs(wt_op)) & mask) << shift; \ - } \ - return res + for (int i = 0; i < 2; i++) { \ + uint64_t res = 0; \ + for (int j = 0; j < lanes / 2; ++j) { \ + uint64_t shift = size_in_bits * j; \ + T src_op = static_cast((ws[i] >> shift) & mask); \ + int shift_op = ((wt[i] >> shift) & mask) % size_in_bits; \ + res |= (static_cast(ArithmeticShiftRight(src_op, shift_op) & \ + mask)) \ + << shift; \ + } \ + wd[i] = res; \ + } -#define ADDS_A_DF(T, lanes, mask) \ - uint64_t res = 0; \ - int size_in_bits = kMSARegSize / lanes; \ - for (int i = 0; i < lanes / 2; ++i) { \ - uint64_t shift = size_in_bits * i; \ - T ws_op = Nabs(static_cast((ws >> shift) & mask)); \ - T wt_op = Nabs(static_cast((wt >> shift) & mask)); \ - T r; \ - if (ws_op < -std::numeric_limits::max() - wt_op) { \ - r = std::numeric_limits::max(); \ - } else { \ - r = -(ws_op + wt_op); \ - } \ - res |= (static_cast(r) & mask) << shift; \ - } \ - return res +#define SRL_DF(T, lanes, mask) \ + int size_in_bits = kMSARegSize / lanes; \ + for (int i = 0; i < 2; i++) { \ + uint64_t res = 0; \ + for (int j = 0; j < lanes / 2; ++j) { \ + uint64_t shift = size_in_bits * j; \ + T src_op = static_cast((ws[i] >> shift) & mask); \ + T shift_op = static_cast(((wt[i] >> shift) & mask) % size_in_bits); \ + res |= (static_cast(src_op >> shift_op) & mask) << shift; \ + } \ + wd[i] = res; \ + } -#define ADDS_DF(T, lanes, mask) \ - uint64_t res = 0; \ - int size_in_bits = kMSARegSize / lanes; \ - for (int i = 0; i < lanes / 2; ++i) { \ - uint64_t shift = size_in_bits * i; \ - T ws_op = static_cast((ws >> shift) & mask); \ - T wt_op = static_cast((wt >> shift) & mask); \ - res |= (static_cast(SaturateAdd(ws_op, wt_op)) & mask) << shift; \ - } \ - return res +#define BCRL_DF(T, lanes, mask) \ + int size_in_bits = kMSARegSize / lanes; \ + for (int i = 0; i < 2; i++) { \ + uint64_t res = 0; \ + for (int j = 0; j < lanes / 2; ++j) { \ + uint64_t shift = size_in_bits * j; \ + T src_op = static_cast((ws[i] >> shift) & mask); \ + T shift_op = static_cast(((wt[i] >> shift) & mask) % size_in_bits); \ + T r = (static_cast(~(1ull << shift_op)) & src_op) & mask; \ + res |= static_cast(r) << shift; \ + } \ + wd[i] = res; \ + } -#define AVE_DF(T, lanes, mask) \ - uint64_t res = 0; \ - int size_in_bits = kMSARegSize / lanes; \ - for (int i = 0; i < lanes / 2; ++i) { \ - uint64_t shift = size_in_bits * i; \ - T ws_op = static_cast((ws >> shift) & mask); \ - T wt_op = static_cast((wt >> shift) & mask); \ - res |= (static_cast(((wt_op & ws_op) + ((ws_op ^ wt_op) >> 1)) & \ - mask)) \ - << shift; \ - } \ - return res +#define BSET_DF(T, lanes, mask) \ + int size_in_bits = kMSARegSize / lanes; \ + for (int i = 0; i < 2; i++) { \ + uint64_t res = 0; \ + for (int j = 0; j < lanes / 2; ++j) { \ + uint64_t shift = size_in_bits * j; \ + T src_op = static_cast((ws[i] >> shift) & mask); \ + T shift_op = static_cast(((wt[i] >> shift) & mask) % size_in_bits); \ + T r = (static_cast(1ull << shift_op) | src_op) & mask; \ + res |= static_cast(r) << shift; \ + } \ + wd[i] = res; \ + } -#define AVER_DF(T, lanes, mask) \ - uint64_t res = 0; \ - int size_in_bits = kMSARegSize / lanes; \ - for (int i = 0; i < lanes / 2; ++i) { \ - uint64_t shift = size_in_bits * i; \ - T ws_op = static_cast((ws >> shift) & mask); \ - T wt_op = static_cast((wt >> shift) & mask); \ - res |= (static_cast(((wt_op | ws_op) - ((ws_op ^ wt_op) >> 1)) & \ - mask)) \ - << shift; \ - } \ - return res +#define BNEG_DF(T, lanes, mask) \ + int size_in_bits = kMSARegSize / lanes; \ + for (int i = 0; i < 2; i++) { \ + uint64_t res = 0; \ + for (int j = 0; j < lanes / 2; ++j) { \ + uint64_t shift = size_in_bits * j; \ + T src_op = static_cast((ws[i] >> shift) & mask); \ + T shift_op = static_cast(((wt[i] >> shift) & mask) % size_in_bits); \ + T r = (static_cast(1ull << shift_op) ^ src_op) & mask; \ + res |= static_cast(r) << shift; \ + } \ + wd[i] = res; \ + } -#define SUBS_DF(T, lanes, mask) \ - uint64_t res = 0; \ - int size_in_bits = kMSARegSize / lanes; \ - for (int i = 0; i < lanes / 2; ++i) { \ - uint64_t shift = size_in_bits * i; \ - T ws_op = static_cast((ws >> shift) & mask); \ - T wt_op = static_cast((wt >> shift) & mask); \ - res |= (static_cast(SaturateSub(ws_op, wt_op)) & mask) << shift; \ - } \ - return res +#define BINSL_DF(T, lanes, mask) \ + int size_in_bits = kMSARegSize / lanes; \ + for (int i = 0; i < 2; i++) { \ + uint64_t res = 0; \ + for (int j = 0; j < lanes / 2; ++j) { \ + uint64_t shift = size_in_bits * j; \ + T ws_op = static_cast((ws[i] >> shift) & mask); \ + T wd_op = static_cast((wd[i] >> shift) & mask); \ + T shift_op = static_cast(((wt[i] >> shift) & mask) % size_in_bits); \ + int64_t bits = shift_op + 1; \ + T r; \ + if (bits == size_in_bits) { \ + r = static_cast(ws_op); \ + } else { \ + uint64_t mask2 = ((1ull << bits) - 1) << (size_in_bits - bits); \ + r = static_cast((static_cast(mask2) & ws_op) | \ + (static_cast(~mask2) & wd_op)); \ + } \ + res |= static_cast(r) << shift; \ + } \ + wd[i] = res; \ + } -#define SUBSUS_U_DF(T, lanes, mask) \ - typedef typename std::make_unsigned::type uT; \ - uint64_t res = 0; \ - int size_in_bits = kMSARegSize / lanes; \ - for (int i = 0; i < lanes / 2; ++i) { \ - uint64_t shift = size_in_bits * i; \ - uT ws_op = static_cast((ws >> shift) & mask); \ - T wt_op = static_cast((wt >> shift) & mask); \ - T r; \ - if (wt_op > 0) { \ - uT wtu = static_cast(wt_op); \ - if (wtu > ws_op) { \ - r = 0; \ - } else { \ - r = static_cast(ws_op - wtu); \ - } \ - } else { \ - if (ws_op > std::numeric_limits::max() + wt_op) { \ - r = static_cast(std::numeric_limits::max()); \ - } else { \ - r = static_cast(ws_op - wt_op); \ - } \ - } \ - res |= (static_cast(r) & mask) << shift; \ - } \ - return res +#define BINSR_DF(T, lanes, mask) \ + int size_in_bits = kMSARegSize / lanes; \ + for (int i = 0; i < 2; i++) { \ + uint64_t res = 0; \ + for (int j = 0; j < lanes / 2; ++j) { \ + uint64_t shift = size_in_bits * j; \ + T ws_op = static_cast((ws[i] >> shift) & mask); \ + T wd_op = static_cast((wd[i] >> shift) & mask); \ + T shift_op = static_cast(((wt[i] >> shift) & mask) % size_in_bits); \ + int64_t bits = shift_op + 1; \ + T r; \ + if (bits == size_in_bits) { \ + r = static_cast(ws_op); \ + } else { \ + uint64_t mask2 = (1ull << bits) - 1; \ + r = static_cast((static_cast(mask2) & ws_op) | \ + (static_cast(~mask2) & wd_op)); \ + } \ + res |= static_cast(r) << shift; \ + } \ + wd[i] = res; \ + } -#define SUBSUU_S_DF(T, lanes, mask) \ - typedef typename std::make_unsigned::type uT; \ - uint64_t res = 0; \ - int size_in_bits = kMSARegSize / lanes; \ - for (int i = 0; i < lanes / 2; ++i) { \ - uint64_t shift = size_in_bits * i; \ - uT ws_op = static_cast((ws >> shift) & mask); \ - uT wt_op = static_cast((wt >> shift) & mask); \ - uT wdu; \ - T r; \ - if (ws_op > wt_op) { \ - wdu = ws_op - wt_op; \ - if (wdu > std::numeric_limits::max()) { \ - r = std::numeric_limits::max(); \ - } else { \ - r = static_cast(wdu); \ - } \ - } else { \ - wdu = wt_op - ws_op; \ - CHECK(-std::numeric_limits::max() == \ - std::numeric_limits::min() + 1); \ - if (wdu <= std::numeric_limits::max()) { \ - r = -static_cast(wdu); \ - } else { \ - r = std::numeric_limits::min(); \ - } \ - } \ - res |= (static_cast(r) & mask) << shift; \ - } \ - return res +#define ADDV_DF(T, lanes, mask) \ + int size_in_bits = kMSARegSize / lanes; \ + for (int i = 0; i < 2; i++) { \ + uint64_t res = 0; \ + for (int j = 0; j < lanes / 2; ++j) { \ + uint64_t shift = size_in_bits * j; \ + T ws_op = static_cast((ws[i] >> shift) & mask); \ + T wt_op = static_cast((wt[i] >> shift) & mask); \ + res |= (static_cast(ws_op + wt_op) & mask) << shift; \ + } \ + wd[i] = res; \ + } -#define ASUB_S_DF(T, lanes, mask) \ - uint64_t res = 0; \ - int size_in_bits = kMSARegSize / lanes; \ - for (int i = 0; i < lanes / 2; ++i) { \ - uint64_t shift = size_in_bits * i; \ - T ws_op = static_cast((ws >> shift) & mask); \ - T wt_op = static_cast((wt >> shift) & mask); \ - res |= (static_cast(Abs(ws_op - wt_op)) & mask) << shift; \ - } \ - return res +#define SUBV_DF(T, lanes, mask) \ + int size_in_bits = kMSARegSize / lanes; \ + for (int i = 0; i < 2; i++) { \ + uint64_t res = 0; \ + for (int j = 0; j < lanes / 2; ++j) { \ + uint64_t shift = size_in_bits * j; \ + T ws_op = static_cast((ws[i] >> shift) & mask); \ + T wt_op = static_cast((wt[i] >> shift) & mask); \ + res |= (static_cast(ws_op - wt_op) & mask) << shift; \ + } \ + wd[i] = res; \ + } -#define ASUB_U_DF(T, lanes, mask) \ - uint64_t res = 0; \ - int size_in_bits = kMSARegSize / lanes; \ - for (int i = 0; i < lanes / 2; ++i) { \ - uint64_t shift = size_in_bits * i; \ - T ws_op = static_cast((ws >> shift) & mask); \ - T wt_op = static_cast((wt >> shift) & mask); \ - res |= (static_cast(ws_op > wt_op ? ws_op - wt_op \ - : wt_op - ws_op) & \ - mask) \ - << shift; \ - } \ - return res +#define MAX_DF(T, lanes, mask) \ + int size_in_bits = kMSARegSize / lanes; \ + for (int i = 0; i < 2; i++) { \ + uint64_t res = 0; \ + for (int j = 0; j < lanes / 2; ++j) { \ + uint64_t shift = size_in_bits * j; \ + T ws_op = static_cast((ws[i] >> shift) & mask); \ + T wt_op = static_cast((wt[i] >> shift) & mask); \ + res |= (static_cast(Max(ws_op, wt_op)) & mask) << shift; \ + } \ + wd[i] = res; \ + } -#define MULV_DF(T, lanes, mask) \ - uint64_t res = 0; \ - int size_in_bits = kMSARegSize / lanes; \ - for (int i = 0; i < lanes / 2; ++i) { \ - uint64_t shift = size_in_bits * i; \ - T ws_op = static_cast((ws >> shift) & mask); \ - T wt_op = static_cast((wt >> shift) & mask); \ - res |= (static_cast(ws_op * wt_op) & mask) << shift; \ - } \ - return res +#define MIN_DF(T, lanes, mask) \ + int size_in_bits = kMSARegSize / lanes; \ + for (int i = 0; i < 2; i++) { \ + uint64_t res = 0; \ + for (int j = 0; j < lanes / 2; ++j) { \ + uint64_t shift = size_in_bits * j; \ + T ws_op = static_cast((ws[i] >> shift) & mask); \ + T wt_op = static_cast((wt[i] >> shift) & mask); \ + res |= (static_cast(Min(ws_op, wt_op)) & mask) << shift; \ + } \ + wd[i] = res; \ + } -#define MADDV_DF(T, lanes, mask) \ - uint64_t res = 0; \ +#define MAXA_DF(T, lanes, mask) \ + int size_in_bits = kMSARegSize / lanes; \ + for (int i = 0; i < 2; i++) { \ + uint64_t res = 0; \ + for (int j = 0; j < lanes / 2; ++j) { \ + uint64_t shift = size_in_bits * j; \ + T ws_op = static_cast((ws[i] >> shift) & mask); \ + T wt_op = static_cast((wt[i] >> shift) & mask); \ + res |= \ + (static_cast(Nabs(ws_op) < Nabs(wt_op) ? ws_op : wt_op) & \ + mask) \ + << shift; \ + } \ + wd[i] = res; \ + } + +#define MINA_DF(T, lanes, mask) \ + int size_in_bits = kMSARegSize / lanes; \ + for (int i = 0; i < 2; i++) { \ + uint64_t res = 0; \ + for (int j = 0; j < lanes / 2; ++j) { \ + uint64_t shift = size_in_bits * j; \ + T ws_op = static_cast((ws[i] >> shift) & mask); \ + T wt_op = static_cast((wt[i] >> shift) & mask); \ + res |= \ + (static_cast(Nabs(ws_op) > Nabs(wt_op) ? ws_op : wt_op) & \ + mask) \ + << shift; \ + } \ + wd[i] = res; \ + } + +#define CEQ_DF(T, lanes, mask) \ + int size_in_bits = kMSARegSize / lanes; \ + for (int i = 0; i < 2; i++) { \ + uint64_t res = 0; \ + for (int j = 0; j < lanes / 2; ++j) { \ + uint64_t shift = size_in_bits * j; \ + T ws_op = static_cast((ws[i] >> shift) & mask); \ + T wt_op = static_cast((wt[i] >> shift) & mask); \ + res |= (static_cast(!Compare(ws_op, wt_op) ? -1ull : 0ull) & \ + mask) \ + << shift; \ + } \ + wd[i] = res; \ + } + +#define CLT_DF(T, lanes, mask) \ + int size_in_bits = kMSARegSize / lanes; \ + for (int i = 0; i < 2; i++) { \ + uint64_t res = 0; \ + for (int j = 0; j < lanes / 2; ++j) { \ + uint64_t shift = size_in_bits * j; \ + T ws_op = static_cast((ws[i] >> shift) & mask); \ + T wt_op = static_cast((wt[i] >> shift) & mask); \ + res |= (static_cast((Compare(ws_op, wt_op) == -1) ? -1ull \ + : 0ull) & \ + mask) \ + << shift; \ + } \ + wd[i] = res; \ + } + +#define CLE_DF(T, lanes, mask) \ int size_in_bits = kMSARegSize / lanes; \ - for (int i = 0; i < lanes / 2; ++i) { \ - uint64_t shift = size_in_bits * i; \ - T ws_op = static_cast((ws >> shift) & mask); \ - T wt_op = static_cast((wt >> shift) & mask); \ - T wd_op = static_cast((wd >> shift) & mask); \ - res |= (static_cast(wd_op + ws_op * wt_op) & mask) << shift; \ - } \ - return res + for (int i = 0; i < 2; i++) { \ + uint64_t res = 0; \ + for (int j = 0; j < lanes / 2; ++j) { \ + uint64_t shift = size_in_bits * j; \ + T ws_op = static_cast((ws[i] >> shift) & mask); \ + T wt_op = static_cast((wt[i] >> shift) & mask); \ + res |= (static_cast((Compare(ws_op, wt_op) != 1) ? -1ull \ + : 0ull) & \ + mask) \ + << shift; \ + } \ + wd[i] = res; \ + } -#define MSUBV_DF(T, lanes, mask) \ - uint64_t res = 0; \ - int size_in_bits = kMSARegSize / lanes; \ - for (int i = 0; i < lanes / 2; ++i) { \ - uint64_t shift = size_in_bits * i; \ - T ws_op = static_cast((ws >> shift) & mask); \ - T wt_op = static_cast((wt >> shift) & mask); \ - T wd_op = static_cast((wd >> shift) & mask); \ - res |= (static_cast(wd_op - ws_op * wt_op) & mask) << shift; \ - } \ - return res +#define ADD_A_DF(T, lanes, mask) \ + int size_in_bits = kMSARegSize / lanes; \ + for (int i = 0; i < 2; i++) { \ + uint64_t res = 0; \ + for (int j = 0; j < lanes / 2; ++j) { \ + uint64_t shift = size_in_bits * j; \ + T ws_op = static_cast((ws[i] >> shift) & mask); \ + T wt_op = static_cast((wt[i] >> shift) & mask); \ + res |= (static_cast(Abs(ws_op) + Abs(wt_op)) & mask) << shift; \ + } \ + wd[i] = res; \ + } -#define DIV_DF(T, lanes, mask) \ - uint64_t res = 0; \ - int size_in_bits = kMSARegSize / lanes; \ - for (int i = 0; i < lanes / 2; ++i) { \ - uint64_t shift = size_in_bits * i; \ - T ws_op = static_cast((ws >> shift) & mask); \ - T wt_op = static_cast((wt >> shift) & mask); \ - if (wt_op == 0) { \ - res = Unpredictable; \ - break; \ - } \ - res |= (static_cast(ws_op / wt_op) & mask) << shift; \ - } \ - return res +#define ADDS_A_DF(T, lanes, mask) \ + int size_in_bits = kMSARegSize / lanes; \ + for (int i = 0; i < 2; i++) { \ + uint64_t res = 0; \ + for (int j = 0; j < lanes / 2; ++j) { \ + uint64_t shift = size_in_bits * j; \ + T ws_op = Nabs(static_cast((ws[i] >> shift) & mask)); \ + T wt_op = Nabs(static_cast((wt[i] >> shift) & mask)); \ + T r; \ + if (ws_op < -std::numeric_limits::max() - wt_op) { \ + r = std::numeric_limits::max(); \ + } else { \ + r = -(ws_op + wt_op); \ + } \ + res |= (static_cast(r) & mask) << shift; \ + } \ + wd[i] = res; \ + } -#define MOD_DF(T, lanes, mask) \ - uint64_t res = 0; \ +#define ADDS_DF(T, lanes, mask) \ + int size_in_bits = kMSARegSize / lanes; \ + for (int i = 0; i < 2; i++) { \ + uint64_t res = 0; \ + for (int j = 0; j < lanes / 2; ++j) { \ + uint64_t shift = size_in_bits * j; \ + T ws_op = static_cast((ws[i] >> shift) & mask); \ + T wt_op = static_cast((wt[i] >> shift) & mask); \ + res |= (static_cast(SaturateAdd(ws_op, wt_op)) & mask) \ + << shift; \ + } \ + wd[i] = res; \ + } + +#define AVE_DF(T, lanes, mask) \ + int size_in_bits = kMSARegSize / lanes; \ + for (int i = 0; i < 2; i++) { \ + uint64_t res = 0; \ + for (int j = 0; j < lanes / 2; ++j) { \ + uint64_t shift = size_in_bits * j; \ + T ws_op = static_cast((ws[i] >> shift) & mask); \ + T wt_op = static_cast((wt[i] >> shift) & mask); \ + res |= (static_cast( \ + ((wt_op & ws_op) + ((ws_op ^ wt_op) >> 1)) & mask)) \ + << shift; \ + } \ + wd[i] = res; \ + } + +#define AVER_DF(T, lanes, mask) \ + int size_in_bits = kMSARegSize / lanes; \ + for (int i = 0; i < 2; i++) { \ + uint64_t res = 0; \ + for (int j = 0; j < lanes / 2; ++j) { \ + uint64_t shift = size_in_bits * j; \ + T ws_op = static_cast((ws[i] >> shift) & mask); \ + T wt_op = static_cast((wt[i] >> shift) & mask); \ + res |= (static_cast( \ + ((wt_op | ws_op) - ((ws_op ^ wt_op) >> 1)) & mask)) \ + << shift; \ + } \ + wd[i] = res; \ + } + +#define SUBS_DF(T, lanes, mask) \ + int size_in_bits = kMSARegSize / lanes; \ + for (int i = 0; i < 2; i++) { \ + uint64_t res = 0; \ + for (int j = 0; j < lanes / 2; ++j) { \ + uint64_t shift = size_in_bits * j; \ + T ws_op = static_cast((ws[i] >> shift) & mask); \ + T wt_op = static_cast((wt[i] >> shift) & mask); \ + res |= (static_cast(SaturateSub(ws_op, wt_op)) & mask) \ + << shift; \ + } \ + wd[i] = res; \ + } + +#define SUBSUS_U_DF(T, lanes, mask) \ + typedef typename std::make_unsigned::type uT; \ + int size_in_bits = kMSARegSize / lanes; \ + for (int i = 0; i < 2; i++) { \ + uint64_t res = 0; \ + for (int j = 0; j < lanes / 2; ++j) { \ + uint64_t shift = size_in_bits * j; \ + uT ws_op = static_cast((ws[i] >> shift) & mask); \ + T wt_op = static_cast((wt[i] >> shift) & mask); \ + T r; \ + if (wt_op > 0) { \ + uT wtu = static_cast(wt_op); \ + if (wtu > ws_op) { \ + r = 0; \ + } else { \ + r = static_cast(ws_op - wtu); \ + } \ + } else { \ + if (ws_op > std::numeric_limits::max() + wt_op) { \ + r = static_cast(std::numeric_limits::max()); \ + } else { \ + r = static_cast(ws_op - wt_op); \ + } \ + } \ + res |= (static_cast(r) & mask) << shift; \ + } \ + wd[i] = res; \ + } + +#define SUBSUU_S_DF(T, lanes, mask) \ + typedef typename std::make_unsigned::type uT; \ + int size_in_bits = kMSARegSize / lanes; \ + for (int i = 0; i < 2; i++) { \ + uint64_t res = 0; \ + for (int j = 0; j < lanes / 2; ++j) { \ + uint64_t shift = size_in_bits * j; \ + uT ws_op = static_cast((ws[i] >> shift) & mask); \ + uT wt_op = static_cast((wt[i] >> shift) & mask); \ + uT wdu; \ + T r; \ + if (ws_op > wt_op) { \ + wdu = ws_op - wt_op; \ + if (wdu > std::numeric_limits::max()) { \ + r = std::numeric_limits::max(); \ + } else { \ + r = static_cast(wdu); \ + } \ + } else { \ + wdu = wt_op - ws_op; \ + CHECK(-std::numeric_limits::max() == \ + std::numeric_limits::min() + 1); \ + if (wdu <= std::numeric_limits::max()) { \ + r = -static_cast(wdu); \ + } else { \ + r = std::numeric_limits::min(); \ + } \ + } \ + res |= (static_cast(r) & mask) << shift; \ + } \ + wd[i] = res; \ + } + +#define ASUB_S_DF(T, lanes, mask) \ int size_in_bits = kMSARegSize / lanes; \ - for (int i = 0; i < lanes / 2; ++i) { \ - uint64_t shift = size_in_bits * i; \ - T ws_op = static_cast((ws >> shift) & mask); \ - T wt_op = static_cast((wt >> shift) & mask); \ - if (wt_op == 0) { \ - res = Unpredictable; \ - break; \ + for (int i = 0; i < 2; i++) { \ + uint64_t res = 0; \ + for (int j = 0; j < lanes / 2; ++j) { \ + uint64_t shift = size_in_bits * j; \ + T ws_op = static_cast((ws[i] >> shift) & mask); \ + T wt_op = static_cast((wt[i] >> shift) & mask); \ + res |= (static_cast(Abs(ws_op - wt_op)) & mask) << shift; \ } \ - res |= (static_cast(wt_op != 0 ? ws_op % wt_op : 0) & mask) \ - << shift; \ - } \ - return res + wd[i] = res; \ + } -#define SRAR_DF(T, lanes, mask) \ - uint64_t res = 0; \ - int size_in_bits = kMSARegSize / lanes; \ - for (int i = 0; i < lanes / 2; ++i) { \ - uint64_t shift = size_in_bits * i; \ - T src_op = static_cast((ws >> shift) & mask); \ - int shift_op = ((wt >> shift) & mask) % size_in_bits; \ - uint32_t bit = shift_op == 0 ? 0 : src_op >> (shift_op - 1) & 1; \ - res |= \ - (static_cast(ArithmeticShiftRight(src_op, shift_op) + bit) & \ - mask) \ - << shift; \ - } \ - return res +#define ASUB_U_DF(T, lanes, mask) \ + int size_in_bits = kMSARegSize / lanes; \ + for (int i = 0; i < 2; i++) { \ + uint64_t res = 0; \ + for (int j = 0; j < lanes / 2; ++j) { \ + uint64_t shift = size_in_bits * j; \ + T ws_op = static_cast((ws[i] >> shift) & mask); \ + T wt_op = static_cast((wt[i] >> shift) & mask); \ + res |= (static_cast(ws_op > wt_op ? ws_op - wt_op \ + : wt_op - ws_op) & \ + mask) \ + << shift; \ + } \ + wd[i] = res; \ + } + +#define MULV_DF(T, lanes, mask) \ + int size_in_bits = kMSARegSize / lanes; \ + for (int i = 0; i < 2; i++) { \ + uint64_t res = 0; \ + for (int j = 0; j < lanes / 2; ++j) { \ + uint64_t shift = size_in_bits * j; \ + T ws_op = static_cast((ws[i] >> shift) & mask); \ + T wt_op = static_cast((wt[i] >> shift) & mask); \ + res |= (static_cast(ws_op * wt_op) & mask) << shift; \ + } \ + wd[i] = res; \ + } + +#define MADDV_DF(T, lanes, mask) \ + int size_in_bits = kMSARegSize / lanes; \ + for (int i = 0; i < 2; i++) { \ + uint64_t res = 0; \ + for (int j = 0; j < lanes / 2; ++j) { \ + uint64_t shift = size_in_bits * j; \ + T ws_op = static_cast((ws[i] >> shift) & mask); \ + T wt_op = static_cast((wt[i] >> shift) & mask); \ + T wd_op = static_cast((wd[i] >> shift) & mask); \ + res |= (static_cast(wd_op + ws_op * wt_op) & mask) << shift; \ + } \ + wd[i] = res; \ + } + +#define MSUBV_DF(T, lanes, mask) \ + int size_in_bits = kMSARegSize / lanes; \ + for (int i = 0; i < 2; i++) { \ + uint64_t res = 0; \ + for (int j = 0; j < lanes / 2; ++j) { \ + uint64_t shift = size_in_bits * j; \ + T ws_op = static_cast((ws[i] >> shift) & mask); \ + T wt_op = static_cast((wt[i] >> shift) & mask); \ + T wd_op = static_cast((wd[i] >> shift) & mask); \ + res |= (static_cast(wd_op - ws_op * wt_op) & mask) << shift; \ + } \ + wd[i] = res; \ + } + +#define DIV_DF(T, lanes, mask) \ + int size_in_bits = kMSARegSize / lanes; \ + for (int i = 0; i < 2; i++) { \ + uint64_t res = 0; \ + for (int j = 0; j < lanes / 2; ++j) { \ + uint64_t shift = size_in_bits * j; \ + T ws_op = static_cast((ws[i] >> shift) & mask); \ + T wt_op = static_cast((wt[i] >> shift) & mask); \ + if (wt_op == 0) { \ + res = Unpredictable; \ + break; \ + } \ + res |= (static_cast(ws_op / wt_op) & mask) << shift; \ + } \ + wd[i] = res; \ + } + +#define MOD_DF(T, lanes, mask) \ + int size_in_bits = kMSARegSize / lanes; \ + for (int i = 0; i < 2; i++) { \ + uint64_t res = 0; \ + for (int j = 0; j < lanes / 2; ++j) { \ + uint64_t shift = size_in_bits * j; \ + T ws_op = static_cast((ws[i] >> shift) & mask); \ + T wt_op = static_cast((wt[i] >> shift) & mask); \ + if (wt_op == 0) { \ + res = Unpredictable; \ + break; \ + } \ + res |= (static_cast(wt_op != 0 ? ws_op % wt_op : 0) & mask) \ + << shift; \ + } \ + wd[i] = res; \ + } + +#define SRAR_DF(T, lanes, mask) \ + int size_in_bits = kMSARegSize / lanes; \ + for (int i = 0; i < 2; i++) { \ + uint64_t res = 0; \ + for (int j = 0; j < lanes / 2; ++j) { \ + uint64_t shift = size_in_bits * j; \ + T src_op = static_cast((ws[i] >> shift) & mask); \ + int shift_op = ((wt[i] >> shift) & mask) % size_in_bits; \ + uint32_t bit = shift_op == 0 ? 0 : src_op >> (shift_op - 1) & 1; \ + res |= (static_cast(ArithmeticShiftRight(src_op, shift_op) + \ + bit) & \ + mask) \ + << shift; \ + } \ + wd[i] = res; \ + } + +#define PCKEV_DF(T, lanes, mask) \ + T* ws_p = reinterpret_cast(ws); \ + T* wt_p = reinterpret_cast(wt); \ + T* wd_p = reinterpret_cast(wd); \ + for (int i = 0; i < lanes / 2; ++i) { \ + wd_p[i] = wt_p[2 * i]; \ + wd_p[i + lanes / 2] = ws_p[2 * i]; \ + } + +#define PCKOD_DF(T, lanes, mask) \ + T* ws_p = reinterpret_cast(ws); \ + T* wt_p = reinterpret_cast(wt); \ + T* wd_p = reinterpret_cast(wd); \ + for (int i = 0; i < lanes / 2; ++i) { \ + wd_p[i] = wt_p[2 * i + 1]; \ + wd_p[i + lanes / 2] = ws_p[2 * i + 1]; \ + } + +#define ILVL_DF(T, lanes, mask) \ + T* ws_p = reinterpret_cast(ws); \ + T* wt_p = reinterpret_cast(wt); \ + T* wd_p = reinterpret_cast(wd); \ + for (int i = 0; i < lanes / 2; ++i) { \ + wd_p[2 * i] = wt_p[i + lanes / 2]; \ + wd_p[2 * i + 1] = ws_p[i + lanes / 2]; \ + } + +#define ILVR_DF(T, lanes, mask) \ + T* ws_p = reinterpret_cast(ws); \ + T* wt_p = reinterpret_cast(wt); \ + T* wd_p = reinterpret_cast(wd); \ + for (int i = 0; i < lanes / 2; ++i) { \ + wd_p[2 * i] = wt_p[i]; \ + wd_p[2 * i + 1] = ws_p[i]; \ + } + +#define ILVEV_DF(T, lanes, mask) \ + T* ws_p = reinterpret_cast(ws); \ + T* wt_p = reinterpret_cast(wt); \ + T* wd_p = reinterpret_cast(wd); \ + for (int i = 0; i < lanes / 2; ++i) { \ + wd_p[2 * i] = wt_p[2 * i]; \ + wd_p[2 * i + 1] = ws_p[2 * i]; \ + } + +#define ILVOD_DF(T, lanes, mask) \ + T* ws_p = reinterpret_cast(ws); \ + T* wt_p = reinterpret_cast(wt); \ + T* wd_p = reinterpret_cast(wd); \ + for (int i = 0; i < lanes / 2; ++i) { \ + wd_p[2 * i] = wt_p[2 * i + 1]; \ + wd_p[2 * i + 1] = ws_p[2 * i + 1]; \ + } + +#define VSHF_DF(T, lanes, mask) \ + T* ws_p = reinterpret_cast(ws); \ + T* wt_p = reinterpret_cast(wt); \ + T* wd_p = reinterpret_cast(wd); \ + const int mask_not_valid = 0xc0; \ + const int mask_6bits = 0x3f; \ + for (int i = 0; i < lanes; ++i) { \ + if ((wd_p[i] & mask_not_valid)) { \ + wd_p[i] = 0; \ + } else { \ + int k = (wd_p[i] & mask_6bits) % (lanes * 2); \ + wd_p[i] = k > lanes ? ws_p[k - lanes] : wt_p[k]; \ + } \ + } + +#define HADD_DF(T, T_small, lanes) \ + T_small* ws_p = reinterpret_cast(ws); \ + T_small* wt_p = reinterpret_cast(wt); \ + T* wd_p = reinterpret_cast(wd); \ + for (int i = 0; i < lanes; ++i) { \ + wd_p[i] = static_cast(ws_p[2 * i + 1]) + static_cast(wt_p[2 * i]); \ + } + +#define HSUB_DF(T, T_small, lanes) \ + T_small* ws_p = reinterpret_cast(ws); \ + T_small* wt_p = reinterpret_cast(wt); \ + T* wd_p = reinterpret_cast(wd); \ + for (int i = 0; i < lanes; ++i) { \ + wd_p[i] = static_cast(ws_p[2 * i + 1]) - static_cast(wt_p[2 * i]); \ + } #define TEST_CASE(V) \ V(sll_b, SLL_DF, uint8_t, kMSALanesByte, UINT8_MAX) \ V(sll_h, SLL_DF, uint16_t, kMSALanesHalf, UINT16_MAX) \ V(sll_w, SLL_DF, uint32_t, kMSALanesWord, UINT32_MAX) \ V(sll_d, SLL_DF, uint64_t, kMSALanesDword, UINT64_MAX) \ - V(sra_b, SRA_DF, int8_t, kMSALanesByte, UINT8_MAX) \ - V(sra_h, SRA_DF, int16_t, kMSALanesHalf, UINT16_MAX) \ - V(sra_w, SRA_DF, int32_t, kMSALanesWord, UINT32_MAX) \ - V(sra_d, SRA_DF, int64_t, kMSALanesDword, UINT64_MAX) \ V(srl_b, SRL_DF, uint8_t, kMSALanesByte, UINT8_MAX) \ V(srl_h, SRL_DF, uint16_t, kMSALanesHalf, UINT16_MAX) \ V(srl_w, SRL_DF, uint32_t, kMSALanesWord, UINT32_MAX) \ @@ -10245,18 +10598,54 @@ TEST(MSA_3R_instructions) { V(mod_u_h, MOD_DF, uint16_t, kMSALanesHalf, UINT16_MAX) \ V(mod_u_w, MOD_DF, uint32_t, kMSALanesWord, UINT32_MAX) \ V(mod_u_d, MOD_DF, uint64_t, kMSALanesDword, UINT64_MAX) \ - V(srar_b, SRAR_DF, int8_t, kMSALanesByte, UINT8_MAX) \ - V(srar_h, SRAR_DF, int16_t, kMSALanesHalf, UINT16_MAX) \ - V(srar_w, SRAR_DF, int32_t, kMSALanesWord, UINT32_MAX) \ - V(srar_d, SRAR_DF, int64_t, kMSALanesDword, UINT64_MAX) \ V(srlr_b, SRAR_DF, uint8_t, kMSALanesByte, UINT8_MAX) \ V(srlr_h, SRAR_DF, uint16_t, kMSALanesHalf, UINT16_MAX) \ V(srlr_w, SRAR_DF, uint32_t, kMSALanesWord, UINT32_MAX) \ - V(srlr_d, SRAR_DF, uint64_t, kMSALanesDword, UINT64_MAX) + V(srlr_d, SRAR_DF, uint64_t, kMSALanesDword, UINT64_MAX) \ + V(pckev_b, PCKEV_DF, uint8_t, kMSALanesByte, UINT8_MAX) \ + V(pckev_h, PCKEV_DF, uint16_t, kMSALanesHalf, UINT16_MAX) \ + V(pckev_w, PCKEV_DF, uint32_t, kMSALanesWord, UINT32_MAX) \ + V(pckev_d, PCKEV_DF, uint64_t, kMSALanesDword, UINT64_MAX) \ + V(pckod_b, PCKOD_DF, uint8_t, kMSALanesByte, UINT8_MAX) \ + V(pckod_h, PCKOD_DF, uint16_t, kMSALanesHalf, UINT16_MAX) \ + V(pckod_w, PCKOD_DF, uint32_t, kMSALanesWord, UINT32_MAX) \ + V(pckod_d, PCKOD_DF, uint64_t, kMSALanesDword, UINT64_MAX) \ + V(ilvl_b, ILVL_DF, uint8_t, kMSALanesByte, UINT8_MAX) \ + V(ilvl_h, ILVL_DF, uint16_t, kMSALanesHalf, UINT16_MAX) \ + V(ilvl_w, ILVL_DF, uint32_t, kMSALanesWord, UINT32_MAX) \ + V(ilvl_d, ILVL_DF, uint64_t, kMSALanesDword, UINT64_MAX) \ + V(ilvr_b, ILVR_DF, uint8_t, kMSALanesByte, UINT8_MAX) \ + V(ilvr_h, ILVR_DF, uint16_t, kMSALanesHalf, UINT16_MAX) \ + V(ilvr_w, ILVR_DF, uint32_t, kMSALanesWord, UINT32_MAX) \ + V(ilvr_d, ILVR_DF, uint64_t, kMSALanesDword, UINT64_MAX) \ + V(ilvev_b, ILVEV_DF, uint8_t, kMSALanesByte, UINT8_MAX) \ + V(ilvev_h, ILVEV_DF, uint16_t, kMSALanesHalf, UINT16_MAX) \ + V(ilvev_w, ILVEV_DF, uint32_t, kMSALanesWord, UINT32_MAX) \ + V(ilvev_d, ILVEV_DF, uint64_t, kMSALanesDword, UINT64_MAX) \ + V(ilvod_b, ILVOD_DF, uint8_t, kMSALanesByte, UINT8_MAX) \ + V(ilvod_h, ILVOD_DF, uint16_t, kMSALanesHalf, UINT16_MAX) \ + V(ilvod_w, ILVOD_DF, uint32_t, kMSALanesWord, UINT32_MAX) \ + V(ilvod_d, ILVOD_DF, uint64_t, kMSALanesDword, UINT64_MAX) \ + V(vshf_b, VSHF_DF, uint8_t, kMSALanesByte, UINT8_MAX) \ + V(vshf_h, VSHF_DF, uint16_t, kMSALanesHalf, UINT16_MAX) \ + V(vshf_w, VSHF_DF, uint32_t, kMSALanesWord, UINT32_MAX) \ + V(vshf_d, VSHF_DF, uint64_t, kMSALanesDword, UINT64_MAX) \ + V(hadd_s_h, HADD_DF, int16_t, int8_t, kMSALanesHalf) \ + V(hadd_s_w, HADD_DF, int32_t, int16_t, kMSALanesWord) \ + V(hadd_s_d, HADD_DF, int64_t, int32_t, kMSALanesDword) \ + V(hadd_u_h, HADD_DF, uint16_t, uint8_t, kMSALanesHalf) \ + V(hadd_u_w, HADD_DF, uint32_t, uint16_t, kMSALanesWord) \ + V(hadd_u_d, HADD_DF, uint64_t, uint32_t, kMSALanesDword) \ + V(hsub_s_h, HSUB_DF, int16_t, int8_t, kMSALanesHalf) \ + V(hsub_s_w, HSUB_DF, int32_t, int16_t, kMSALanesWord) \ + V(hsub_s_d, HSUB_DF, int64_t, int32_t, kMSALanesDword) \ + V(hsub_u_h, HSUB_DF, uint16_t, uint8_t, kMSALanesHalf) \ + V(hsub_u_w, HSUB_DF, uint32_t, uint16_t, kMSALanesWord) \ + V(hsub_u_d, HSUB_DF, uint64_t, uint32_t, kMSALanesDword) #define RUN_TEST(instr, verify, type, lanes, mask) \ run_msa_3r(&tc[i], [](MacroAssembler& assm) { __ instr(w2, w1, w0); }, \ - [](uint64_t ws, uint64_t wt, uint64_t wd) { \ + [](uint64_t* ws, uint64_t* wt, uint64_t* wd) { \ verify(type, lanes, mask); \ }); @@ -10264,9 +10653,41 @@ TEST(MSA_3R_instructions) { TEST_CASE(RUN_TEST) } +#define RUN_TEST2(instr, verify, type, lanes, mask) \ + for (unsigned i = 0; i < arraysize(tc); i++) { \ + for (unsigned j = 0; j < 3; j++) { \ + for (unsigned k = 0; k < lanes; k++) { \ + type* element = reinterpret_cast(&tc[i]); \ + element[k + j * lanes] &= std::numeric_limits::max(); \ + } \ + } \ + } \ + run_msa_3r(&tc[i], [](MacroAssembler& assm) { __ instr(w2, w1, w0); }, \ + [](uint64_t* ws, uint64_t* wt, uint64_t* wd) { \ + verify(type, lanes, mask); \ + }); + +#define TEST_CASE2(V) \ + V(sra_b, SRA_DF, int8_t, kMSALanesByte, UINT8_MAX) \ + V(sra_h, SRA_DF, int16_t, kMSALanesHalf, UINT16_MAX) \ + V(sra_w, SRA_DF, int32_t, kMSALanesWord, UINT32_MAX) \ + V(sra_d, SRA_DF, int64_t, kMSALanesDword, UINT64_MAX) \ + V(srar_b, SRAR_DF, int8_t, kMSALanesByte, UINT8_MAX) \ + V(srar_h, SRAR_DF, int16_t, kMSALanesHalf, UINT16_MAX) \ + V(srar_w, SRAR_DF, int32_t, kMSALanesWord, UINT32_MAX) \ + V(srar_d, SRAR_DF, int64_t, kMSALanesDword, UINT64_MAX) + + for (size_t i = 0; i < arraysize(tc); ++i) { + TEST_CASE2(RUN_TEST2) + } + +#undef TEST_CASE +#undef TEST_CASE2 #undef RUN_TEST +#undef RUN_TEST2 #undef SLL_DF #undef SRL_DF +#undef SRA_DF #undef BCRL_DF #undef BSET_DF #undef BNEG_DF @@ -10297,6 +10718,15 @@ TEST(MSA_3R_instructions) { #undef DIV_DF #undef MOD_DF #undef SRAR_DF +#undef PCKEV_DF +#undef PCKOD_DF +#undef ILVL_DF +#undef ILVR_DF +#undef ILVEV_DF +#undef ILVOD_DF +#undef VSHF_DF +#undef HADD_DF +#undef HSUB_DF } struct TestCaseMsa3RF { @@ -10313,12 +10743,10 @@ struct ExpectedResult_MSA3RF { uint64_t exp_res_hi; }; -template +template void run_msa_3rf(const struct TestCaseMsa3RF* input, const struct ExpectedResult_MSA3RF* output, - Func Generate2RInstructionFunc, - FuncLoad load_elements_of_vector, - FuncStore store_elements_of_vector) { + Func Generate2RInstructionFunc) { Isolate* isolate = CcTest::i_isolate(); HandleScope scope(isolate); @@ -10349,28 +10777,8 @@ void run_msa_3rf(const struct TestCaseMsa3RF* input, (CALL_GENERATED_CODE(isolate, f, &res, 0, 0, 0, 0)); - if (store_elements_of_vector == store_uint64_elements_of_vector) { - CHECK_EQ(output->exp_res_lo, res.d[0]); - CHECK_EQ(output->exp_res_hi, res.d[1]); - } else if (store_elements_of_vector == store_uint32_elements_of_vector) { - const uint32_t* exp_res = - reinterpret_cast(&output->exp_res_lo); - CHECK_EQ(exp_res[0], res.w[0]); - CHECK_EQ(exp_res[1], res.w[1]); - CHECK_EQ(exp_res[2], res.w[2]); - CHECK_EQ(exp_res[3], res.w[3]); - } else { - const uint16_t* exp_res = - reinterpret_cast(&output->exp_res_lo); - CHECK_EQ(exp_res[0], res.h[0]); - CHECK_EQ(exp_res[1], res.h[1]); - CHECK_EQ(exp_res[2], res.h[2]); - CHECK_EQ(exp_res[3], res.h[3]); - CHECK_EQ(exp_res[4], res.h[4]); - CHECK_EQ(exp_res[5], res.h[5]); - CHECK_EQ(exp_res[6], res.h[6]); - CHECK_EQ(exp_res[7], res.h[7]); - } + CHECK_EQ(output->exp_res_lo, res.d[0]); + CHECK_EQ(output->exp_res_hi, res.d[1]); } struct TestCaseMsa3RF_F { @@ -10472,15 +10880,12 @@ TEST(MSA_floating_point_quiet_compare) { #define TEST_FP_QUIET_COMPARE_W(instruction, src, exp_res) \ run_msa_3rf(reinterpret_cast(src), \ reinterpret_cast(exp_res), \ - [](MacroAssembler& assm) { __ instruction(w2, w0, w1); }, \ - load_uint32_elements_of_vector, \ - store_uint32_elements_of_vector); + [](MacroAssembler& assm) { __ instruction(w2, w0, w1); }); + #define TEST_FP_QUIET_COMPARE_D(instruction, src, exp_res) \ run_msa_3rf(reinterpret_cast(src), \ reinterpret_cast(exp_res), \ - [](MacroAssembler& assm) { __ instruction(w2, w0, w1); }, \ - load_uint64_elements_of_vector, \ - store_uint64_elements_of_vector); + [](MacroAssembler& assm) { __ instruction(w2, w0, w1); }); for (uint64_t i = 0; i < arraysize(tc_w); i++) { TEST_FP_QUIET_COMPARE_W(fcaf_w, &tc_w[i], &exp_res_fcaf) @@ -10595,16 +11000,14 @@ TEST(MSA_floating_point_arithmetic) { reinterpret_cast(src1), \ reinterpret_cast(function( \ src1, src2, src3, reinterpret_cast(&dst_container))), \ - [](MacroAssembler& assm) { __ instr(w2, w0, w1); }, \ - load_uint32_elements_of_vector, store_uint32_elements_of_vector); + [](MacroAssembler& assm) { __ instr(w2, w0, w1); }); #define FP_ARITHMETIC_DF_D(instr, function, src1, src2, src3) \ run_msa_3rf( \ reinterpret_cast(src1), \ reinterpret_cast(function( \ src1, src2, src3, reinterpret_cast(&dst_container))), \ - [](MacroAssembler& assm) { __ instr(w2, w0, w1); }, \ - load_uint64_elements_of_vector, store_uint64_elements_of_vector); + [](MacroAssembler& assm) { __ instr(w2, w0, w1); }); for (uint64_t i = 0; i < arraysize(tc_w); i++) { FP_ARITHMETIC_DF_W(fadd_w, fadd_function, &tc_w[i].ws_1, &tc_w[i].wt_1, @@ -10701,16 +11104,12 @@ TEST(MSA_fmin_fmin_a_fmax_fmax_a) { #define TEST_FP_MIN_MAX_W(instruction, src, exp_res) \ run_msa_3rf(reinterpret_cast(src), \ reinterpret_cast(exp_res), \ - [](MacroAssembler& assm) { __ instruction(w2, w0, w1); }, \ - load_uint32_elements_of_vector, \ - store_uint32_elements_of_vector); + [](MacroAssembler& assm) { __ instruction(w2, w0, w1); }); #define TEST_FP_MIN_MAX_D(instruction, src, exp_res) \ run_msa_3rf(reinterpret_cast(src), \ reinterpret_cast(exp_res), \ - [](MacroAssembler& assm) { __ instruction(w2, w0, w1); }, \ - load_uint64_elements_of_vector, \ - store_uint64_elements_of_vector); + [](MacroAssembler& assm) { __ instruction(w2, w0, w1); }); for (uint64_t i = 0; i < arraysize(tc_w); i++) { TEST_FP_MIN_MAX_W(fmax_w, &tc_w[i], &exp_res_fmax_w[i]) @@ -10823,17 +11222,13 @@ TEST(MSA_fixed_point_arithmetic) { {0x800000007fffffff, 0x800000007c33f15e}, {0xb5deb625939d884d, 0xe40dcbfe728756b5}}; -#define TEST_FIXED_POINT_DF_H(instruction, src, exp_res) \ - run_msa_3rf((src), (exp_res), \ - [](MacroAssembler& assm) { __ instruction(w2, w0, w1); }, \ - load_uint16_elements_of_vector, \ - store_uint16_elements_of_vector); +#define TEST_FIXED_POINT_DF_H(instruction, src, exp_res) \ + run_msa_3rf((src), (exp_res), \ + [](MacroAssembler& assm) { __ instruction(w2, w0, w1); }); -#define TEST_FIXED_POINT_DF_W(instruction, src, exp_res) \ - run_msa_3rf((src), (exp_res), \ - [](MacroAssembler& assm) { __ instruction(w2, w0, w1); }, \ - load_uint32_elements_of_vector, \ - store_uint32_elements_of_vector); +#define TEST_FIXED_POINT_DF_W(instruction, src, exp_res) \ + run_msa_3rf((src), (exp_res), \ + [](MacroAssembler& assm) { __ instruction(w2, w0, w1); }); for (uint64_t i = 0; i < arraysize(tc_h); i++) { TEST_FIXED_POINT_DF_H(mul_q_h, &tc_h[i], &exp_res_mul_q_h[i]) @@ -10914,16 +11309,12 @@ TEST(MSA_fexdo) { #define TEST_FEXDO_H(instruction, src, exp_res) \ run_msa_3rf(reinterpret_cast(src), \ reinterpret_cast(exp_res), \ - [](MacroAssembler& assm) { __ instruction(w2, w0, w1); }, \ - load_uint32_elements_of_vector, \ - store_uint16_elements_of_vector); + [](MacroAssembler& assm) { __ instruction(w2, w0, w1); }); #define TEST_FEXDO_W(instruction, src, exp_res) \ run_msa_3rf(reinterpret_cast(src), \ reinterpret_cast(exp_res), \ - [](MacroAssembler& assm) { __ instruction(w2, w0, w1); }, \ - load_uint64_elements_of_vector, \ - store_uint32_elements_of_vector); + [](MacroAssembler& assm) { __ instruction(w2, w0, w1); }); for (uint64_t i = 0; i < arraysize(tc_w); i++) { TEST_FEXDO_H(fexdo_h, &tc_w[i], &exp_res_fexdo_w[i]) @@ -10993,16 +11384,12 @@ TEST(MSA_ftq) { #define TEST_FTQ_H(instruction, src, exp_res) \ run_msa_3rf(reinterpret_cast(src), \ reinterpret_cast(exp_res), \ - [](MacroAssembler& assm) { __ instruction(w2, w0, w1); }, \ - load_uint32_elements_of_vector, \ - store_uint16_elements_of_vector); + [](MacroAssembler& assm) { __ instruction(w2, w0, w1); }); #define TEST_FTQ_W(instruction, src, exp_res) \ run_msa_3rf(reinterpret_cast(src), \ reinterpret_cast(exp_res), \ - [](MacroAssembler& assm) { __ instruction(w2, w0, w1); }, \ - load_uint64_elements_of_vector, \ - store_uint32_elements_of_vector); + [](MacroAssembler& assm) { __ instruction(w2, w0, w1); }); for (uint64_t i = 0; i < arraysize(tc_w); i++) { TEST_FTQ_H(ftq_h, &tc_w[i], &exp_res_ftq_w[i]) diff --git a/test/unittests/BUILD.gn b/test/unittests/BUILD.gn index 10e5afc17f..d6d0a1067f 100644 --- a/test/unittests/BUILD.gn +++ b/test/unittests/BUILD.gn @@ -176,6 +176,7 @@ v8_source_set("unittests_sources") { "test-utils.cc", "test-utils.h", "unicode-unittest.cc", + "utils-unittest.cc", "value-serializer-unittest.cc", "wasm/control-transfer-unittest.cc", "wasm/decoder-unittest.cc", diff --git a/test/unittests/unittests.gyp b/test/unittests/unittests.gyp index 1528d9fd4a..575f550871 100644 --- a/test/unittests/unittests.gyp +++ b/test/unittests/unittests.gyp @@ -149,6 +149,7 @@ 'test-utils.h', 'test-utils.cc', 'unicode-unittest.cc', + 'utils-unittest.cc', 'value-serializer-unittest.cc', 'zone/segmentpool-unittest.cc', 'zone/zone-allocator-unittest.cc', diff --git a/test/unittests/utils-unittest.cc b/test/unittests/utils-unittest.cc new file mode 100644 index 0000000000..65088d873b --- /dev/null +++ b/test/unittests/utils-unittest.cc @@ -0,0 +1,113 @@ +// Copyright 2014 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include + +#include "src/utils.h" +#include "testing/gtest-support.h" + +namespace v8 { +namespace internal { + +template +class UtilsTest : public ::testing::Test {}; + +typedef ::testing::Types + IntegerTypes; + +TYPED_TEST_CASE(UtilsTest, IntegerTypes); + +TYPED_TEST(UtilsTest, SaturateSub) { + TypeParam min = std::numeric_limits::min(); + TypeParam max = std::numeric_limits::max(); + EXPECT_EQ(SaturateSub(min, 0), min); + EXPECT_EQ(SaturateSub(max, 0), max); + EXPECT_EQ(SaturateSub(max, min), max); + EXPECT_EQ(SaturateSub(min, max), min); + EXPECT_EQ(SaturateSub(min, max / 3), min); + EXPECT_EQ(SaturateSub(min + 1, 2), min); + if (std::numeric_limits::is_signed) { + EXPECT_EQ(SaturateSub(min, min), static_cast(0)); + EXPECT_EQ(SaturateSub(0, min), max); + EXPECT_EQ(SaturateSub(max / 3, min), max); + EXPECT_EQ(SaturateSub(max / 5, min), max); + EXPECT_EQ(SaturateSub(min / 3, max), min); + EXPECT_EQ(SaturateSub(min / 9, max), min); + EXPECT_EQ(SaturateSub(max, min / 3), max); + EXPECT_EQ(SaturateSub(min, max / 3), min); + EXPECT_EQ(SaturateSub(max / 3 * 2, min / 2), max); + EXPECT_EQ(SaturateSub(min / 3 * 2, max / 2), min); + } else { + EXPECT_EQ(SaturateSub(min, min), min); + EXPECT_EQ(SaturateSub(0, min), min); + EXPECT_EQ(SaturateSub(0, max), min); + EXPECT_EQ(SaturateSub(max / 3, max), min); + EXPECT_EQ(SaturateSub(max - 3, max), min); + } + TypeParam test_cases[] = {static_cast(min / 23), + static_cast(max / 3), + 63, + static_cast(min / 6), + static_cast(max / 55), + static_cast(min / 2), + static_cast(max / 2), + 0, + 1, + 2, + 3, + 4, + 42}; + TRACED_FOREACH(TypeParam, x, test_cases) { + TRACED_FOREACH(TypeParam, y, test_cases) { + if (std::numeric_limits::is_signed) { + EXPECT_EQ(SaturateSub(x, y), x - y); + } else { + EXPECT_EQ(SaturateSub(x, y), y > x ? min : x - y); + } + } + } +} + +TYPED_TEST(UtilsTest, SaturateAdd) { + TypeParam min = std::numeric_limits::min(); + TypeParam max = std::numeric_limits::max(); + EXPECT_EQ(SaturateAdd(min, min), min); + EXPECT_EQ(SaturateAdd(max, max), max); + EXPECT_EQ(SaturateAdd(min, min / 3), min); + EXPECT_EQ(SaturateAdd(max / 8 * 7, max / 3 * 2), max); + EXPECT_EQ(SaturateAdd(min / 3 * 2, min / 8 * 7), min); + EXPECT_EQ(SaturateAdd(max / 20 * 18, max / 25 * 18), max); + EXPECT_EQ(SaturateAdd(min / 3 * 2, min / 3 * 2), min); + EXPECT_EQ(SaturateAdd(max - 1, 2), max); + EXPECT_EQ(SaturateAdd(max - 100, 101), max); + TypeParam test_cases[] = {static_cast(min / 23), + static_cast(max / 3), + 63, + static_cast(min / 6), + static_cast(max / 55), + static_cast(min / 2), + static_cast(max / 2), + 0, + 1, + 2, + 3, + 4, + 42}; + TRACED_FOREACH(TypeParam, x, test_cases) { + TRACED_FOREACH(TypeParam, y, test_cases) { + EXPECT_EQ(SaturateAdd(x, y), x + y); + } + } +} + +} // namespace internal +} // namespace v8