MIPS[64]: Implement MSA Bit instructions in simulator

Add support for slli, srai, srli, bclri, bseti, bnegi, binsli, binsri,
sat_s, sat_u, srari, srlri MSA instructions in mips32 and mips64
simulators.

Bug: 
Change-Id: I1a351a23c733f0bfbc829f25874df26295327afc
Reviewed-on: https://chromium-review.googlesource.com/568020
Reviewed-by: Ivica Bogosavljevic <ivica.bogosavljevic@imgtec.com>
Commit-Queue: Ivica Bogosavljevic <ivica.bogosavljevic@imgtec.com>
Cr-Commit-Position: refs/heads/master@{#46764}
This commit is contained in:
Dusan Simicic 2017-07-12 13:30:55 +02:00 committed by Commit Bot
parent 176a2b24fb
commit 6d9025ef57
6 changed files with 1236 additions and 28 deletions

View File

@ -4583,25 +4583,127 @@ void Simulator::DecodeTypeMsaELM() {
}
}
template <typename T>
T Simulator::MsaBitInstrHelper(uint32_t opcode, T wd, T ws, int32_t m) {
typedef typename std::make_unsigned<T>::type uT;
T res;
switch (opcode) {
case SLLI:
res = static_cast<T>(ws << m);
break;
case SRAI:
res = static_cast<T>(ArithmeticShiftRight(ws, m));
break;
case SRLI:
res = static_cast<T>(static_cast<uT>(ws) >> m);
break;
case BCLRI:
res = static_cast<T>(static_cast<T>(~(1ull << m)) & ws);
break;
case BSETI:
res = static_cast<T>(static_cast<T>(1ull << m) | ws);
break;
case BNEGI:
res = static_cast<T>(static_cast<T>(1ull << m) ^ ws);
break;
case BINSLI: {
int elem_size = 8 * sizeof(T);
int bits = m + 1;
if (bits == elem_size) {
res = static_cast<T>(ws);
} else {
uint64_t mask = ((1ull << bits) - 1) << (elem_size - bits);
res = static_cast<T>((static_cast<T>(mask) & ws) |
(static_cast<T>(~mask) & wd));
}
} break;
case BINSRI: {
int elem_size = 8 * sizeof(T);
int bits = m + 1;
if (bits == elem_size) {
res = static_cast<T>(ws);
} else {
uint64_t mask = (1ull << bits) - 1;
res = static_cast<T>((static_cast<T>(mask) & ws) |
(static_cast<T>(~mask) & wd));
}
} break;
case SAT_S: {
#define M_MAX_INT(x) static_cast<int64_t>((1LL << ((x)-1)) - 1)
#define M_MIN_INT(x) static_cast<int64_t>(-(1LL << ((x)-1)))
int shift = 64 - 8 * sizeof(T);
int64_t ws_i64 = (static_cast<int64_t>(ws) << shift) >> shift;
res = static_cast<T>(ws_i64 < M_MIN_INT(m + 1)
? M_MIN_INT(m + 1)
: ws_i64 > M_MAX_INT(m + 1) ? M_MAX_INT(m + 1)
: ws_i64);
#undef M_MAX_INT
#undef M_MIN_INT
} break;
case SAT_U: {
#define M_MAX_UINT(x) static_cast<uint64_t>(-1ULL >> (64 - (x)))
uint64_t mask = static_cast<uint64_t>(-1ULL >> (64 - 8 * sizeof(T)));
uint64_t ws_u64 = static_cast<uint64_t>(ws) & mask;
res = static_cast<T>(ws_u64 < M_MAX_UINT(m + 1) ? ws_u64
: M_MAX_UINT(m + 1));
#undef M_MAX_UINT
} break;
case SRARI:
if (!m) {
res = static_cast<T>(ws);
} else {
res = static_cast<T>(ArithmeticShiftRight(ws, m)) +
static_cast<T>((ws >> (m - 1)) & 0x1);
}
break;
case SRLRI:
if (!m) {
res = static_cast<T>(ws);
} else {
res = static_cast<T>(static_cast<uT>(ws) >> m) +
static_cast<T>((ws >> (m - 1)) & 0x1);
}
break;
default:
UNREACHABLE();
}
return res;
}
void Simulator::DecodeTypeMsaBIT() {
DCHECK(IsMipsArchVariant(kMips32r6));
DCHECK(CpuFeatures::IsSupported(MIPS_SIMD));
uint32_t opcode = instr_.InstructionBits() & kMsaBITMask;
int32_t m = instr_.MsaBitMValue();
msa_reg_t wd, ws;
switch (opcode) {
case SLLI:
case SRAI:
case SRLI:
case BCLRI:
case BSETI:
case BNEGI:
case BINSLI:
case BINSRI:
case SAT_S:
case SAT_U:
case SRARI:
case SRLRI:
UNIMPLEMENTED();
#define MSA_BIT_DF(elem, num_of_lanes) \
get_msa_register(instr_.WsValue(), ws.elem); \
if (opcode == BINSLI || opcode == BINSRI) { \
get_msa_register(instr_.WdValue(), wd.elem); \
} \
for (int i = 0; i < num_of_lanes; i++) { \
wd.elem[i] = MsaBitInstrHelper(opcode, wd.elem[i], ws.elem[i], m); \
} \
set_msa_register(instr_.WdValue(), wd.elem); \
TraceMSARegWr(wd.elem)
switch (DecodeMsaDataFormat()) {
case MSA_BYTE:
DCHECK(m < kMSARegSize / kMSALanesByte);
MSA_BIT_DF(b, kMSALanesByte);
break;
case MSA_HALF:
DCHECK(m < kMSARegSize / kMSALanesHalf);
MSA_BIT_DF(h, kMSALanesHalf);
break;
case MSA_WORD:
DCHECK(m < kMSARegSize / kMSALanesWord);
MSA_BIT_DF(w, kMSALanesWord);
break;
case MSA_DWORD:
DCHECK(m < kMSARegSize / kMSALanesDword);
MSA_BIT_DF(d, kMSALanesDword);
break;
default:
UNREACHABLE();

View File

@ -424,6 +424,8 @@ class Simulator {
void DecodeTypeMsa2RF();
template <typename T>
T MsaI5InstrHelper(uint32_t opcode, T ws, int32_t i5);
template <typename T>
T MsaBitInstrHelper(uint32_t opcode, T wd, T ws, int32_t m);
inline int32_t rs_reg() const { return instr_.RsValue(); }
inline int32_t rs() const { return get_register(rs_reg()); }

View File

@ -4808,25 +4808,127 @@ void Simulator::DecodeTypeMsaELM() {
}
}
template <typename T>
T Simulator::MsaBitInstrHelper(uint32_t opcode, T wd, T ws, int32_t m) {
typedef typename std::make_unsigned<T>::type uT;
T res;
switch (opcode) {
case SLLI:
res = static_cast<T>(ws << m);
break;
case SRAI:
res = static_cast<T>(ArithmeticShiftRight(ws, m));
break;
case SRLI:
res = static_cast<T>(static_cast<uT>(ws) >> m);
break;
case BCLRI:
res = static_cast<T>(static_cast<T>(~(1ull << m)) & ws);
break;
case BSETI:
res = static_cast<T>(static_cast<T>(1ull << m) | ws);
break;
case BNEGI:
res = static_cast<T>(static_cast<T>(1ull << m) ^ ws);
break;
case BINSLI: {
int elem_size = 8 * sizeof(T);
int bits = m + 1;
if (bits == elem_size) {
res = static_cast<T>(ws);
} else {
uint64_t mask = ((1ull << bits) - 1) << (elem_size - bits);
res = static_cast<T>((static_cast<T>(mask) & ws) |
(static_cast<T>(~mask) & wd));
}
} break;
case BINSRI: {
int elem_size = 8 * sizeof(T);
int bits = m + 1;
if (bits == elem_size) {
res = static_cast<T>(ws);
} else {
uint64_t mask = (1ull << bits) - 1;
res = static_cast<T>((static_cast<T>(mask) & ws) |
(static_cast<T>(~mask) & wd));
}
} break;
case SAT_S: {
#define M_MAX_INT(x) static_cast<int64_t>((1LL << ((x)-1)) - 1)
#define M_MIN_INT(x) static_cast<int64_t>(-(1LL << ((x)-1)))
int shift = 64 - 8 * sizeof(T);
int64_t ws_i64 = (static_cast<int64_t>(ws) << shift) >> shift;
res = static_cast<T>(ws_i64 < M_MIN_INT(m + 1)
? M_MIN_INT(m + 1)
: ws_i64 > M_MAX_INT(m + 1) ? M_MAX_INT(m + 1)
: ws_i64);
#undef M_MAX_INT
#undef M_MIN_INT
} break;
case SAT_U: {
#define M_MAX_UINT(x) static_cast<uint64_t>(-1ULL >> (64 - (x)))
uint64_t mask = static_cast<uint64_t>(-1ULL >> (64 - 8 * sizeof(T)));
uint64_t ws_u64 = static_cast<uint64_t>(ws) & mask;
res = static_cast<T>(ws_u64 < M_MAX_UINT(m + 1) ? ws_u64
: M_MAX_UINT(m + 1));
#undef M_MAX_UINT
} break;
case SRARI:
if (!m) {
res = static_cast<T>(ws);
} else {
res = static_cast<T>(ArithmeticShiftRight(ws, m)) +
static_cast<T>((ws >> (m - 1)) & 0x1);
}
break;
case SRLRI:
if (!m) {
res = static_cast<T>(ws);
} else {
res = static_cast<T>(static_cast<uT>(ws) >> m) +
static_cast<T>((ws >> (m - 1)) & 0x1);
}
break;
default:
UNREACHABLE();
}
return res;
}
void Simulator::DecodeTypeMsaBIT() {
DCHECK(kArchVariant == kMips64r6);
DCHECK(CpuFeatures::IsSupported(MIPS_SIMD));
uint32_t opcode = instr_.InstructionBits() & kMsaBITMask;
int32_t m = instr_.MsaBitMValue();
msa_reg_t wd, ws;
switch (opcode) {
case SLLI:
case SRAI:
case SRLI:
case BCLRI:
case BSETI:
case BNEGI:
case BINSLI:
case BINSRI:
case SAT_S:
case SAT_U:
case SRARI:
case SRLRI:
UNIMPLEMENTED();
#define MSA_BIT_DF(elem, num_of_lanes) \
get_msa_register(instr_.WsValue(), ws.elem); \
if (opcode == BINSLI || opcode == BINSRI) { \
get_msa_register(instr_.WdValue(), wd.elem); \
} \
for (int i = 0; i < num_of_lanes; i++) { \
wd.elem[i] = MsaBitInstrHelper(opcode, wd.elem[i], ws.elem[i], m); \
} \
set_msa_register(instr_.WdValue(), wd.elem); \
TraceMSARegWr(wd.elem)
switch (DecodeMsaDataFormat()) {
case MSA_BYTE:
DCHECK(m < kMSARegSize / kMSALanesByte);
MSA_BIT_DF(b, kMSALanesByte);
break;
case MSA_HALF:
DCHECK(m < kMSARegSize / kMSALanesHalf);
MSA_BIT_DF(h, kMSALanesHalf);
break;
case MSA_WORD:
DCHECK(m < kMSARegSize / kMSALanesWord);
MSA_BIT_DF(w, kMSALanesWord);
break;
case MSA_DWORD:
DCHECK(m < kMSARegSize / kMSALanesDword);
MSA_BIT_DF(d, kMSALanesDword);
break;
default:
UNREACHABLE();

View File

@ -441,6 +441,8 @@ class Simulator {
void DecodeTypeMsa2RF();
template <typename T>
T MsaI5InstrHelper(uint32_t opcode, T ws, int32_t i5);
template <typename T>
T MsaBitInstrHelper(uint32_t opcode, T wd, T ws, int32_t m);
// Executing is handled based on the instruction type.
void DecodeTypeRegister();

View File

@ -6997,4 +6997,506 @@ TEST(MSA_vector) {
}
}
struct TestCaseMsaBit {
uint64_t wd_lo;
uint64_t wd_hi;
uint64_t ws_lo;
uint64_t ws_hi;
uint32_t m;
};
template <typename InstFunc, typename OperFunc>
void run_msa_bit(struct TestCaseMsaBit* input, InstFunc GenerateInstructionFunc,
OperFunc GenerateOperationFunc) {
Isolate* isolate = CcTest::i_isolate();
HandleScope scope(isolate);
MacroAssembler assm(isolate, NULL, 0, v8::internal::CodeObjectRequired::kYes);
CpuFeatureScope fscope(&assm, MIPS_SIMD);
msa_reg_t res;
#define LOAD_W_REG(lo, hi, w_reg) \
__ li(t0, static_cast<uint32_t>(lo & 0xffffffff)); \
__ li(t1, static_cast<uint32_t>((lo >> 32) & 0xffffffff)); \
__ insert_w(w_reg, 0, t0); \
__ insert_w(w_reg, 1, t1); \
__ li(t0, static_cast<uint32_t>(hi & 0xffffffff)); \
__ li(t1, static_cast<uint32_t>((hi >> 32) & 0xffffffff)); \
__ insert_w(w_reg, 2, t0); \
__ insert_w(w_reg, 3, t1)
LOAD_W_REG(input->ws_lo, input->ws_hi, w0);
LOAD_W_REG(input->wd_lo, input->wd_hi, w2);
#undef LOAD_W_REG
GenerateInstructionFunc(assm, input->m);
__ copy_u_w(t2, w2, 0);
__ sw(t2, MemOperand(a0, 0));
__ copy_u_w(t2, w2, 1);
__ sw(t2, MemOperand(a0, 4));
__ copy_u_w(t2, w2, 2);
__ sw(t2, MemOperand(a0, 8));
__ copy_u_w(t2, w2, 3);
__ sw(t2, MemOperand(a0, 12));
__ jr(ra);
__ nop();
CodeDesc desc;
assm.GetCode(isolate, &desc);
Handle<Code> code = isolate->factory()->NewCode(
desc, Code::ComputeFlags(Code::STUB), Handle<Code>());
#ifdef OBJECT_PRINT
code->Print(std::cout);
#endif
F3 f = FUNCTION_CAST<F3>(code->entry());
(CALL_GENERATED_CODE(isolate, f, &res, 0, 0, 0, 0));
CHECK_EQ(GenerateOperationFunc(input->wd_lo, input->ws_lo, input->m),
res.d[0]);
CHECK_EQ(GenerateOperationFunc(input->wd_hi, input->ws_hi, input->m),
res.d[1]);
}
TEST(MSA_slli_srai_srli) {
if (!IsMipsArchVariant(kMips32r6) || !CpuFeatures::IsSupported(MIPS_SIMD))
return;
CcTest::InitializeVM();
struct TestCaseMsaBit tc[] = {
// wd_lo, wd_hi ws_lo, ws_hi, m
{0, 0, 0xf35862e13e38f8b0, 0x4f41ffdef2bfe636, 3},
{0, 0, 0x64be4f6dbe9caa51, 0x6b23de1a687d9cb9, 5},
{0, 0, 0x1169751bb9a7d9c3, 0xf7a594aec8ef8a9c, 9},
{0, 0, 0x2b665362c4e812df, 0x3a0d80d68b3f8bc8, 13},
{0, 0, 0x566be7ba4365b70a, 0x01ebbc1937d76cb4, 21},
{0, 0, 0x380e2deb9d3f8aae, 0x017e0de0bcc6ca42, 30},
{0, 0, 0xa46a3a9bcb43f4e5, 0x1c62c8473bdfcffb, 45},
{0, 0, 0xf6759d85f23b5a2b, 0x5c042ae42c6d12c1, 61}};
#define SLLI_SRLI_DF(lanes, mask, func) \
[](uint64_t wd, uint64_t ws, uint32_t m) { \
uint64_t res = 0; \
int elem_size = kMSARegSize / lanes; \
for (int i = 0; i < lanes / 2; ++i) { \
int shift = elem_size * i; \
uint64_t elem = (ws >> shift) & mask; \
res |= ((func)&mask) << shift; \
} \
return res; \
}
#define SRAI_DF(lanes, mask, func) \
[](uint64_t wd, uint64_t ws, uint32_t m) { \
uint64_t res = 0; \
int elem_size = kMSARegSize / lanes; \
for (int i = 0; i < lanes / 2; ++i) { \
int shift = elem_size * i; \
int64_t elem = \
static_cast<int64_t>(((ws >> shift) & mask) << (64 - elem_size)) >> \
(64 - elem_size); \
res |= static_cast<uint64_t>((func)&mask) << shift; \
} \
return res; \
}
for (size_t i = 0; i < sizeof(tc) / sizeof(TestCaseMsaBit); ++i) {
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ slli_b(w2, w0, m % 8); },
SLLI_SRLI_DF(kMSALanesByte, UINT8_MAX, (elem << (m % elem_size))));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ slli_h(w2, w0, m % 16); },
SLLI_SRLI_DF(kMSALanesHalf, UINT16_MAX, (elem << (m % elem_size))));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ slli_w(w2, w0, m % 32); },
SLLI_SRLI_DF(kMSALanesWord, UINT32_MAX, (elem << (m % elem_size))));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ slli_d(w2, w0, m % 64); },
SLLI_SRLI_DF(kMSALanesDword, UINT64_MAX, (elem << (m % elem_size))));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ srli_b(w2, w0, m % 8); },
SLLI_SRLI_DF(kMSALanesByte, UINT8_MAX, (elem >> (m % elem_size))));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ srli_h(w2, w0, m % 16); },
SLLI_SRLI_DF(kMSALanesHalf, UINT16_MAX, (elem >> (m % elem_size))));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ srli_w(w2, w0, m % 32); },
SLLI_SRLI_DF(kMSALanesWord, UINT32_MAX, (elem >> (m % elem_size))));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ srli_d(w2, w0, m % 64); },
SLLI_SRLI_DF(kMSALanesDword, UINT64_MAX, (elem >> (m % elem_size))));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ srlri_b(w2, w0, m % 8); },
SLLI_SRLI_DF(
kMSALanesByte, UINT8_MAX,
(elem >> (m % elem_size)) + ((elem >> (m % elem_size - 1)) & 0x1)));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ srlri_h(w2, w0, m % 16); },
SLLI_SRLI_DF(
kMSALanesHalf, UINT16_MAX,
(elem >> (m % elem_size)) + ((elem >> (m % elem_size - 1)) & 0x1)));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ srlri_w(w2, w0, m % 32); },
SLLI_SRLI_DF(
kMSALanesWord, UINT32_MAX,
(elem >> (m % elem_size)) + ((elem >> (m % elem_size - 1)) & 0x1)));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ srlri_d(w2, w0, m % 64); },
SLLI_SRLI_DF(
kMSALanesDword, UINT64_MAX,
(elem >> (m % elem_size)) + ((elem >> (m % elem_size - 1)) & 0x1)));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ srai_b(w2, w0, m % 8); },
SRAI_DF(kMSALanesByte, UINT8_MAX,
ArithmeticShiftRight(elem, m % elem_size)));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ srai_h(w2, w0, m % 16); },
SRAI_DF(kMSALanesHalf, UINT16_MAX,
ArithmeticShiftRight(elem, m % elem_size)));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ srai_w(w2, w0, m % 32); },
SRAI_DF(kMSALanesWord, UINT32_MAX,
ArithmeticShiftRight(elem, m % elem_size)));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ srai_d(w2, w0, m % 64); },
SRAI_DF(kMSALanesDword, UINT64_MAX,
ArithmeticShiftRight(elem, m % elem_size)));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ srari_b(w2, w0, m % 8); },
SRAI_DF(kMSALanesByte, UINT8_MAX,
ArithmeticShiftRight(elem, m % elem_size) +
((elem >> (m % elem_size - 1)) & 0x1)));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ srari_h(w2, w0, m % 16); },
SRAI_DF(kMSALanesHalf, UINT16_MAX,
ArithmeticShiftRight(elem, m % elem_size) +
((elem >> (m % elem_size - 1)) & 0x1)));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ srari_w(w2, w0, m % 32); },
SRAI_DF(kMSALanesWord, UINT32_MAX,
ArithmeticShiftRight(elem, m % elem_size) +
((elem >> (m % elem_size - 1)) & 0x1)));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ srari_d(w2, w0, m % 64); },
SRAI_DF(kMSALanesDword, UINT64_MAX,
ArithmeticShiftRight(elem, m % elem_size) +
((elem >> (m % elem_size - 1)) & 0x1)));
}
#undef SLLI_SRLI_DF
#undef SRAI_DF
}
TEST(MSA_bclri_bseti_bnegi) {
if (!IsMipsArchVariant(kMips32r6) || !CpuFeatures::IsSupported(MIPS_SIMD))
return;
CcTest::InitializeVM();
struct TestCaseMsaBit tc[] = {
// wd_lo, wd_hi, ws_lo, ws_hi, m
{0, 0, 0xf35862e13e38f8b0, 0x4f41ffdef2bfe636, 3},
{0, 0, 0x64be4f6dbe9caa51, 0x6b23de1a687d9cb9, 5},
{0, 0, 0x1169751bb9a7d9c3, 0xf7a594aec8ef8a9c, 9},
{0, 0, 0x2b665362c4e812df, 0x3a0d80d68b3f8bc8, 13},
{0, 0, 0x566be7ba4365b70a, 0x01ebbc1937d76cb4, 21},
{0, 0, 0x380e2deb9d3f8aae, 0x017e0de0bcc6ca42, 30},
{0, 0, 0xa46a3a9bcb43f4e5, 0x1c62c8473bdfcffb, 45},
{0, 0, 0xf6759d85f23b5a2b, 0x5c042ae42c6d12c1, 61}};
#define BCLRI_BSETI_BNEGI_DF(lanes, mask, func) \
[](uint64_t wd, uint64_t ws, uint32_t m) { \
uint64_t res = 0; \
int elem_size = kMSARegSize / lanes; \
for (int i = 0; i < lanes / 2; ++i) { \
int shift = elem_size * i; \
uint64_t elem = (ws >> shift) & mask; \
res |= ((func)&mask) << shift; \
} \
return res; \
}
for (size_t i = 0; i < sizeof(tc) / sizeof(TestCaseMsaBit); ++i) {
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ bclri_b(w2, w0, m % 8); },
BCLRI_BSETI_BNEGI_DF(kMSALanesByte, UINT8_MAX,
(~(1ull << (m % elem_size)) & elem)));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ bclri_h(w2, w0, m % 16); },
BCLRI_BSETI_BNEGI_DF(kMSALanesHalf, UINT16_MAX,
(~(1ull << (m % elem_size)) & elem)));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ bclri_w(w2, w0, m % 32); },
BCLRI_BSETI_BNEGI_DF(kMSALanesWord, UINT32_MAX,
(~(1ull << (m % elem_size)) & elem)));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ bclri_d(w2, w0, m % 64); },
BCLRI_BSETI_BNEGI_DF(kMSALanesDword, UINT64_MAX,
(~(1ull << (m % elem_size)) & elem)));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ bseti_b(w2, w0, m % 8); },
BCLRI_BSETI_BNEGI_DF(kMSALanesByte, UINT8_MAX,
((1ull << (m % elem_size)) | elem)));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ bseti_h(w2, w0, m % 16); },
BCLRI_BSETI_BNEGI_DF(kMSALanesHalf, UINT16_MAX,
((1ull << (m % elem_size)) | elem)));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ bseti_w(w2, w0, m % 32); },
BCLRI_BSETI_BNEGI_DF(kMSALanesWord, UINT32_MAX,
((1ull << (m % elem_size)) | elem)));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ bseti_d(w2, w0, m % 64); },
BCLRI_BSETI_BNEGI_DF(kMSALanesDword, UINT64_MAX,
((1ull << (m % elem_size)) | elem)));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ bnegi_b(w2, w0, m % 8); },
BCLRI_BSETI_BNEGI_DF(kMSALanesByte, UINT8_MAX,
((1ull << (m % elem_size)) ^ elem)));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ bnegi_h(w2, w0, m % 16); },
BCLRI_BSETI_BNEGI_DF(kMSALanesHalf, UINT16_MAX,
((1ull << (m % elem_size)) ^ elem)));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ bnegi_w(w2, w0, m % 32); },
BCLRI_BSETI_BNEGI_DF(kMSALanesWord, UINT32_MAX,
((1ull << (m % elem_size)) ^ elem)));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ bnegi_d(w2, w0, m % 64); },
BCLRI_BSETI_BNEGI_DF(kMSALanesDword, UINT64_MAX,
((1ull << (m % elem_size)) ^ elem)));
}
#undef BCLRI_BSETI_BNEGI_DF
}
TEST(MSA_binsli_binsri) {
if (!IsMipsArchVariant(kMips32r6) || !CpuFeatures::IsSupported(MIPS_SIMD))
return;
CcTest::InitializeVM();
struct TestCaseMsaBit tc[] = {// wd_lo, wd_hi, ws_lo, ws_hi, m
{0x53f4457553bbd5b4, 0x5fb8250eacc296b2,
0xf35862e13e38f8b0, 0x4f41ffdef2bfe636, 3},
{0xf61bfdb0f312e6fc, 0xc9437568dd1ea925,
0x64be4f6dbe9caa51, 0x6b23de1a687d9cb9, 5},
{0x53f4457553bbd5b4, 0x5fb8250eacc296b2,
0x1169751bb9a7d9c3, 0xf7a594aec8ef8a9c, 9},
{0xf61bfdb0f312e6fc, 0xc9437568dd1ea925,
0x2b665362c4e812df, 0x3a0d80d68b3f8bc8, 13},
{0x53f4457553bbd5b4, 0x5fb8250eacc296b2,
0x566be7ba4365b70a, 0x01ebbc1937d76cb4, 21},
{0xf61bfdb0f312e6fc, 0xc9437568dd1ea925,
0x380e2deb9d3f8aae, 0x017e0de0bcc6ca42, 30},
{0x53f4457553bbd5b4, 0x5fb8250eacc296b2,
0xa46a3a9bcb43f4e5, 0x1c62c8473bdfcffb, 45},
{0xf61bfdb0f312e6fc, 0xc9437568dd1ea925,
0xf6759d85f23b5a2b, 0x5c042ae42c6d12c1, 61}};
#define BINSLI_BINSRI_DF(lanes, mask, func) \
[](uint64_t wd, uint64_t ws, uint32_t m) { \
uint64_t res = 0; \
int elem_size = kMSARegSize / lanes; \
int bits = m % elem_size + 1; \
for (int i = 0; i < lanes / 2; ++i) { \
int shift = elem_size * i; \
uint64_t ws_elem = (ws >> shift) & mask; \
if (bits == elem_size) { \
res |= (ws_elem & mask) << shift; \
} else { \
uint64_t r_mask = (1ull << bits) - 1; \
uint64_t l_mask = r_mask << (elem_size - bits); \
USE(l_mask); \
uint64_t wd_elem = (wd >> shift) & mask; \
res |= ((func)&mask) << shift; \
} \
} \
return res; \
}
for (size_t i = 0; i < sizeof(tc) / sizeof(TestCaseMsaBit); ++i) {
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ binsli_b(w2, w0, m % 8); },
BINSLI_BINSRI_DF(kMSALanesByte, UINT8_MAX,
((ws_elem & l_mask) | (wd_elem & ~l_mask))));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ binsli_h(w2, w0, m % 16); },
BINSLI_BINSRI_DF(kMSALanesHalf, UINT16_MAX,
((ws_elem & l_mask) | (wd_elem & ~l_mask))));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ binsli_w(w2, w0, m % 32); },
BINSLI_BINSRI_DF(kMSALanesWord, UINT32_MAX,
((ws_elem & l_mask) | (wd_elem & ~l_mask))));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ binsli_d(w2, w0, m % 64); },
BINSLI_BINSRI_DF(kMSALanesDword, UINT64_MAX,
((ws_elem & l_mask) | (wd_elem & ~l_mask))));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ binsri_b(w2, w0, m % 8); },
BINSLI_BINSRI_DF(kMSALanesByte, UINT8_MAX,
((ws_elem & r_mask) | (wd_elem & ~r_mask))));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ binsri_h(w2, w0, m % 16); },
BINSLI_BINSRI_DF(kMSALanesHalf, UINT16_MAX,
((ws_elem & r_mask) | (wd_elem & ~r_mask))));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ binsri_w(w2, w0, m % 32); },
BINSLI_BINSRI_DF(kMSALanesWord, UINT32_MAX,
((ws_elem & r_mask) | (wd_elem & ~r_mask))));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ binsri_d(w2, w0, m % 64); },
BINSLI_BINSRI_DF(kMSALanesDword, UINT64_MAX,
((ws_elem & r_mask) | (wd_elem & ~r_mask))));
}
#undef BINSLI_BINSRI_DF
}
TEST(MSA_sat_s_sat_u) {
if (!IsMipsArchVariant(kMips32r6) || !CpuFeatures::IsSupported(MIPS_SIMD))
return;
CcTest::InitializeVM();
struct TestCaseMsaBit tc[] = {
// wd_lo, wd_hi, ws_lo, ws_hi, m
{0, 0, 0xf35862e13e3808b0, 0x4f41ffdef2bfe636, 3},
{0, 0, 0x64be4f6dbe9caa51, 0x6b23de1a687d9cb9, 5},
{0, 0, 0x1169751bb9a7d9c3, 0xf7a594aec8ef8a9c, 9},
{0, 0, 0x2b665362c4e812df, 0x3a0d80d68b3f8bc8, 13},
{0, 0, 0x566be7ba4365b70a, 0x01ebbc1937d76cb4, 21},
{0, 0, 0x380e2deb9d3f8aae, 0x017e0de0bcc6ca42, 30},
{0, 0, 0xa46a3a9bcb43f4e5, 0x1c62c8473bdfcffb, 45},
{0, 0, 0xf6759d85f23b5a2b, 0x5c042ae42c6d12c1, 61}};
#define SAT_DF(lanes, mask, func) \
[](uint64_t wd, uint64_t ws, uint32_t m) { \
uint64_t res = 0; \
int elem_size = kMSARegSize / lanes; \
m %= elem_size; \
for (int i = 0; i < lanes / 2; ++i) { \
int shift = elem_size * i; \
uint64_t elem_u64 = (ws >> shift) & mask; \
int64_t elem_i64 = static_cast<int64_t>(elem_u64 << (64 - elem_size)) >> \
(64 - elem_size); \
USE(elem_i64); \
res |= ((func)&mask) << shift; \
} \
return res; \
}
#define M_MAX_INT(x) static_cast<int64_t>((1LL << ((x)-1)) - 1)
#define M_MIN_INT(x) static_cast<int64_t>(-(1LL << ((x)-1)))
#define M_MAX_UINT(x) static_cast<uint64_t>(-1ULL >> (64 - (x)))
for (size_t i = 0; i < sizeof(tc) / sizeof(TestCaseMsaBit); ++i) {
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ sat_u_b(w2, w0, m % 8); },
SAT_DF(kMSALanesByte, UINT8_MAX,
(elem_u64 < M_MAX_UINT(m + 1) ? elem_u64 : M_MAX_UINT(m + 1))));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ sat_u_h(w2, w0, m % 16); },
SAT_DF(kMSALanesHalf, UINT16_MAX,
(elem_u64 < M_MAX_UINT(m + 1) ? elem_u64 : M_MAX_UINT(m + 1))));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ sat_u_w(w2, w0, m % 32); },
SAT_DF(kMSALanesWord, UINT32_MAX,
(elem_u64 < M_MAX_UINT(m + 1) ? elem_u64 : M_MAX_UINT(m + 1))));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ sat_u_d(w2, w0, m % 64); },
SAT_DF(kMSALanesDword, UINT64_MAX,
(elem_u64 < M_MAX_UINT(m + 1) ? elem_u64 : M_MAX_UINT(m + 1))));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ sat_s_b(w2, w0, m % 8); },
SAT_DF(
kMSALanesByte, UINT8_MAX,
(elem_i64 < M_MIN_INT(m + 1)
? M_MIN_INT(m + 1)
: elem_i64 > M_MAX_INT(m + 1) ? M_MAX_INT(m + 1) : elem_i64)));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ sat_s_h(w2, w0, m % 16); },
SAT_DF(
kMSALanesHalf, UINT16_MAX,
(elem_i64 < M_MIN_INT(m + 1)
? M_MIN_INT(m + 1)
: elem_i64 > M_MAX_INT(m + 1) ? M_MAX_INT(m + 1) : elem_i64)));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ sat_s_w(w2, w0, m % 32); },
SAT_DF(
kMSALanesWord, UINT32_MAX,
(elem_i64 < M_MIN_INT(m + 1)
? M_MIN_INT(m + 1)
: elem_i64 > M_MAX_INT(m + 1) ? M_MAX_INT(m + 1) : elem_i64)));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ sat_s_d(w2, w0, m % 64); },
SAT_DF(
kMSALanesDword, UINT64_MAX,
(elem_i64 < M_MIN_INT(m + 1)
? M_MIN_INT(m + 1)
: elem_i64 > M_MAX_INT(m + 1) ? M_MAX_INT(m + 1) : elem_i64)));
}
#undef SAT_DF
#undef M_MAX_INT
#undef M_MIN_INT
#undef M_MAX_UINT
}
#undef __

View File

@ -7862,4 +7862,502 @@ TEST(MSA_vector) {
}
}
struct TestCaseMsaBit {
uint64_t wd_lo;
uint64_t wd_hi;
uint64_t ws_lo;
uint64_t ws_hi;
uint32_t m;
};
template <typename InstFunc, typename OperFunc>
void run_msa_bit(struct TestCaseMsaBit* input, InstFunc GenerateInstructionFunc,
OperFunc GenerateOperationFunc) {
Isolate* isolate = CcTest::i_isolate();
HandleScope scope(isolate);
MacroAssembler assm(isolate, NULL, 0, v8::internal::CodeObjectRequired::kYes);
CpuFeatureScope fscope(&assm, MIPS_SIMD);
msa_reg_t res;
#define LOAD_W_REG(lo, hi, w_reg) \
__ li(t0, lo); \
__ li(t1, hi); \
__ insert_d(w_reg, 0, t0); \
__ insert_d(w_reg, 1, t1)
LOAD_W_REG(input->ws_lo, input->ws_hi, w0);
LOAD_W_REG(input->wd_lo, input->wd_hi, w2);
#undef LOAD_W_REG
GenerateInstructionFunc(assm, input->m);
__ copy_u_w(t2, w2, 0);
__ sw(t2, MemOperand(a0, 0));
__ copy_u_w(t2, w2, 1);
__ sw(t2, MemOperand(a0, 4));
__ copy_u_w(t2, w2, 2);
__ sw(t2, MemOperand(a0, 8));
__ copy_u_w(t2, w2, 3);
__ sw(t2, MemOperand(a0, 12));
__ jr(ra);
__ nop();
CodeDesc desc;
assm.GetCode(isolate, &desc);
Handle<Code> code = isolate->factory()->NewCode(
desc, Code::ComputeFlags(Code::STUB), Handle<Code>());
#ifdef OBJECT_PRINT
code->Print(std::cout);
#endif
F3 f = FUNCTION_CAST<F3>(code->entry());
(CALL_GENERATED_CODE(isolate, f, &res, 0, 0, 0, 0));
CHECK_EQ(GenerateOperationFunc(input->wd_lo, input->ws_lo, input->m),
res.d[0]);
CHECK_EQ(GenerateOperationFunc(input->wd_hi, input->ws_hi, input->m),
res.d[1]);
}
TEST(MSA_slli_srai_srli) {
if ((kArchVariant != kMips64r6) || !CpuFeatures::IsSupported(MIPS_SIMD))
return;
CcTest::InitializeVM();
struct TestCaseMsaBit tc[] = {
// wd_lo, wd_hi ws_lo, ws_hi, m
{0, 0, 0xf35862e13e38f8b0, 0x4f41ffdef2bfe636, 3},
{0, 0, 0x64be4f6dbe9caa51, 0x6b23de1a687d9cb9, 5},
{0, 0, 0x1169751bb9a7d9c3, 0xf7a594aec8ef8a9c, 9},
{0, 0, 0x2b665362c4e812df, 0x3a0d80d68b3f8bc8, 13},
{0, 0, 0x566be7ba4365b70a, 0x01ebbc1937d76cb4, 21},
{0, 0, 0x380e2deb9d3f8aae, 0x017e0de0bcc6ca42, 30},
{0, 0, 0xa46a3a9bcb43f4e5, 0x1c62c8473bdfcffb, 45},
{0, 0, 0xf6759d85f23b5a2b, 0x5c042ae42c6d12c1, 61}};
#define SLLI_SRLI_DF(lanes, mask, func) \
[](uint64_t wd, uint64_t ws, uint32_t m) { \
uint64_t res = 0; \
int elem_size = kMSARegSize / lanes; \
for (int i = 0; i < lanes / 2; ++i) { \
int shift = elem_size * i; \
uint64_t elem = (ws >> shift) & mask; \
res |= ((func)&mask) << shift; \
} \
return res; \
}
#define SRAI_DF(lanes, mask, func) \
[](uint64_t wd, uint64_t ws, uint32_t m) { \
uint64_t res = 0; \
int elem_size = kMSARegSize / lanes; \
for (int i = 0; i < lanes / 2; ++i) { \
int shift = elem_size * i; \
int64_t elem = \
static_cast<int64_t>(((ws >> shift) & mask) << (64 - elem_size)) >> \
(64 - elem_size); \
res |= static_cast<uint64_t>((func)&mask) << shift; \
} \
return res; \
}
for (size_t i = 0; i < sizeof(tc) / sizeof(TestCaseMsaBit); ++i) {
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ slli_b(w2, w0, m % 8); },
SLLI_SRLI_DF(kMSALanesByte, UINT8_MAX, (elem << (m % elem_size))));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ slli_h(w2, w0, m % 16); },
SLLI_SRLI_DF(kMSALanesHalf, UINT16_MAX, (elem << (m % elem_size))));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ slli_w(w2, w0, m % 32); },
SLLI_SRLI_DF(kMSALanesWord, UINT32_MAX, (elem << (m % elem_size))));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ slli_d(w2, w0, m % 64); },
SLLI_SRLI_DF(kMSALanesDword, UINT64_MAX, (elem << (m % elem_size))));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ srli_b(w2, w0, m % 8); },
SLLI_SRLI_DF(kMSALanesByte, UINT8_MAX, (elem >> (m % elem_size))));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ srli_h(w2, w0, m % 16); },
SLLI_SRLI_DF(kMSALanesHalf, UINT16_MAX, (elem >> (m % elem_size))));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ srli_w(w2, w0, m % 32); },
SLLI_SRLI_DF(kMSALanesWord, UINT32_MAX, (elem >> (m % elem_size))));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ srli_d(w2, w0, m % 64); },
SLLI_SRLI_DF(kMSALanesDword, UINT64_MAX, (elem >> (m % elem_size))));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ srlri_b(w2, w0, m % 8); },
SLLI_SRLI_DF(
kMSALanesByte, UINT8_MAX,
(elem >> (m % elem_size)) + ((elem >> (m % elem_size - 1)) & 0x1)));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ srlri_h(w2, w0, m % 16); },
SLLI_SRLI_DF(
kMSALanesHalf, UINT16_MAX,
(elem >> (m % elem_size)) + ((elem >> (m % elem_size - 1)) & 0x1)));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ srlri_w(w2, w0, m % 32); },
SLLI_SRLI_DF(
kMSALanesWord, UINT32_MAX,
(elem >> (m % elem_size)) + ((elem >> (m % elem_size - 1)) & 0x1)));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ srlri_d(w2, w0, m % 64); },
SLLI_SRLI_DF(
kMSALanesDword, UINT64_MAX,
(elem >> (m % elem_size)) + ((elem >> (m % elem_size - 1)) & 0x1)));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ srai_b(w2, w0, m % 8); },
SRAI_DF(kMSALanesByte, UINT8_MAX,
ArithmeticShiftRight(elem, m % elem_size)));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ srai_h(w2, w0, m % 16); },
SRAI_DF(kMSALanesHalf, UINT16_MAX,
ArithmeticShiftRight(elem, m % elem_size)));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ srai_w(w2, w0, m % 32); },
SRAI_DF(kMSALanesWord, UINT32_MAX,
ArithmeticShiftRight(elem, m % elem_size)));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ srai_d(w2, w0, m % 64); },
SRAI_DF(kMSALanesDword, UINT64_MAX,
ArithmeticShiftRight(elem, m % elem_size)));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ srari_b(w2, w0, m % 8); },
SRAI_DF(kMSALanesByte, UINT8_MAX,
ArithmeticShiftRight(elem, m % elem_size) +
((elem >> (m % elem_size - 1)) & 0x1)));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ srari_h(w2, w0, m % 16); },
SRAI_DF(kMSALanesHalf, UINT16_MAX,
ArithmeticShiftRight(elem, m % elem_size) +
((elem >> (m % elem_size - 1)) & 0x1)));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ srari_w(w2, w0, m % 32); },
SRAI_DF(kMSALanesWord, UINT32_MAX,
ArithmeticShiftRight(elem, m % elem_size) +
((elem >> (m % elem_size - 1)) & 0x1)));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ srari_d(w2, w0, m % 64); },
SRAI_DF(kMSALanesDword, UINT64_MAX,
ArithmeticShiftRight(elem, m % elem_size) +
((elem >> (m % elem_size - 1)) & 0x1)));
}
#undef SLLI_SRLI_DF
#undef SRAI_DF
}
TEST(MSA_bclri_bseti_bnegi) {
if ((kArchVariant != kMips64r6) || !CpuFeatures::IsSupported(MIPS_SIMD))
return;
CcTest::InitializeVM();
struct TestCaseMsaBit tc[] = {
// wd_lo, wd_hi, ws_lo, ws_hi, m
{0, 0, 0xf35862e13e38f8b0, 0x4f41ffdef2bfe636, 3},
{0, 0, 0x64be4f6dbe9caa51, 0x6b23de1a687d9cb9, 5},
{0, 0, 0x1169751bb9a7d9c3, 0xf7a594aec8ef8a9c, 9},
{0, 0, 0x2b665362c4e812df, 0x3a0d80d68b3f8bc8, 13},
{0, 0, 0x566be7ba4365b70a, 0x01ebbc1937d76cb4, 21},
{0, 0, 0x380e2deb9d3f8aae, 0x017e0de0bcc6ca42, 30},
{0, 0, 0xa46a3a9bcb43f4e5, 0x1c62c8473bdfcffb, 45},
{0, 0, 0xf6759d85f23b5a2b, 0x5c042ae42c6d12c1, 61}};
#define BCLRI_BSETI_BNEGI_DF(lanes, mask, func) \
[](uint64_t wd, uint64_t ws, uint32_t m) { \
uint64_t res = 0; \
int elem_size = kMSARegSize / lanes; \
for (int i = 0; i < lanes / 2; ++i) { \
int shift = elem_size * i; \
uint64_t elem = (ws >> shift) & mask; \
res |= ((func)&mask) << shift; \
} \
return res; \
}
for (size_t i = 0; i < sizeof(tc) / sizeof(TestCaseMsaBit); ++i) {
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ bclri_b(w2, w0, m % 8); },
BCLRI_BSETI_BNEGI_DF(kMSALanesByte, UINT8_MAX,
(~(1ull << (m % elem_size)) & elem)));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ bclri_h(w2, w0, m % 16); },
BCLRI_BSETI_BNEGI_DF(kMSALanesHalf, UINT16_MAX,
(~(1ull << (m % elem_size)) & elem)));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ bclri_w(w2, w0, m % 32); },
BCLRI_BSETI_BNEGI_DF(kMSALanesWord, UINT32_MAX,
(~(1ull << (m % elem_size)) & elem)));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ bclri_d(w2, w0, m % 64); },
BCLRI_BSETI_BNEGI_DF(kMSALanesDword, UINT64_MAX,
(~(1ull << (m % elem_size)) & elem)));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ bseti_b(w2, w0, m % 8); },
BCLRI_BSETI_BNEGI_DF(kMSALanesByte, UINT8_MAX,
((1ull << (m % elem_size)) | elem)));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ bseti_h(w2, w0, m % 16); },
BCLRI_BSETI_BNEGI_DF(kMSALanesHalf, UINT16_MAX,
((1ull << (m % elem_size)) | elem)));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ bseti_w(w2, w0, m % 32); },
BCLRI_BSETI_BNEGI_DF(kMSALanesWord, UINT32_MAX,
((1ull << (m % elem_size)) | elem)));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ bseti_d(w2, w0, m % 64); },
BCLRI_BSETI_BNEGI_DF(kMSALanesDword, UINT64_MAX,
((1ull << (m % elem_size)) | elem)));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ bnegi_b(w2, w0, m % 8); },
BCLRI_BSETI_BNEGI_DF(kMSALanesByte, UINT8_MAX,
((1ull << (m % elem_size)) ^ elem)));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ bnegi_h(w2, w0, m % 16); },
BCLRI_BSETI_BNEGI_DF(kMSALanesHalf, UINT16_MAX,
((1ull << (m % elem_size)) ^ elem)));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ bnegi_w(w2, w0, m % 32); },
BCLRI_BSETI_BNEGI_DF(kMSALanesWord, UINT32_MAX,
((1ull << (m % elem_size)) ^ elem)));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ bnegi_d(w2, w0, m % 64); },
BCLRI_BSETI_BNEGI_DF(kMSALanesDword, UINT64_MAX,
((1ull << (m % elem_size)) ^ elem)));
}
#undef BCLRI_BSETI_BNEGI_DF
}
TEST(MSA_binsli_binsri) {
if ((kArchVariant != kMips64r6) || !CpuFeatures::IsSupported(MIPS_SIMD))
return;
CcTest::InitializeVM();
struct TestCaseMsaBit tc[] = {// wd_lo, wd_hi, ws_lo, ws_hi, m
{0x53f4457553bbd5b4, 0x5fb8250eacc296b2,
0xf35862e13e38f8b0, 0x4f41ffdef2bfe636, 3},
{0xf61bfdb0f312e6fc, 0xc9437568dd1ea925,
0x64be4f6dbe9caa51, 0x6b23de1a687d9cb9, 5},
{0x53f4457553bbd5b4, 0x5fb8250eacc296b2,
0x1169751bb9a7d9c3, 0xf7a594aec8ef8a9c, 9},
{0xf61bfdb0f312e6fc, 0xc9437568dd1ea925,
0x2b665362c4e812df, 0x3a0d80d68b3f8bc8, 13},
{0x53f4457553bbd5b4, 0x5fb8250eacc296b2,
0x566be7ba4365b70a, 0x01ebbc1937d76cb4, 21},
{0xf61bfdb0f312e6fc, 0xc9437568dd1ea925,
0x380e2deb9d3f8aae, 0x017e0de0bcc6ca42, 30},
{0x53f4457553bbd5b4, 0x5fb8250eacc296b2,
0xa46a3a9bcb43f4e5, 0x1c62c8473bdfcffb, 45},
{0xf61bfdb0f312e6fc, 0xc9437568dd1ea925,
0xf6759d85f23b5a2b, 0x5c042ae42c6d12c1, 61}};
#define BINSLI_BINSRI_DF(lanes, mask, func) \
[](uint64_t wd, uint64_t ws, uint32_t m) { \
uint64_t res = 0; \
int elem_size = kMSARegSize / lanes; \
int bits = m % elem_size + 1; \
for (int i = 0; i < lanes / 2; ++i) { \
int shift = elem_size * i; \
uint64_t ws_elem = (ws >> shift) & mask; \
if (bits == elem_size) { \
res |= (ws_elem & mask) << shift; \
} else { \
uint64_t r_mask = (1ull << bits) - 1; \
uint64_t l_mask = r_mask << (elem_size - bits); \
USE(l_mask); \
uint64_t wd_elem = (wd >> shift) & mask; \
res |= ((func)&mask) << shift; \
} \
} \
return res; \
}
for (size_t i = 0; i < sizeof(tc) / sizeof(TestCaseMsaBit); ++i) {
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ binsli_b(w2, w0, m % 8); },
BINSLI_BINSRI_DF(kMSALanesByte, UINT8_MAX,
((ws_elem & l_mask) | (wd_elem & ~l_mask))));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ binsli_h(w2, w0, m % 16); },
BINSLI_BINSRI_DF(kMSALanesHalf, UINT16_MAX,
((ws_elem & l_mask) | (wd_elem & ~l_mask))));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ binsli_w(w2, w0, m % 32); },
BINSLI_BINSRI_DF(kMSALanesWord, UINT32_MAX,
((ws_elem & l_mask) | (wd_elem & ~l_mask))));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ binsli_d(w2, w0, m % 64); },
BINSLI_BINSRI_DF(kMSALanesDword, UINT64_MAX,
((ws_elem & l_mask) | (wd_elem & ~l_mask))));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ binsri_b(w2, w0, m % 8); },
BINSLI_BINSRI_DF(kMSALanesByte, UINT8_MAX,
((ws_elem & r_mask) | (wd_elem & ~r_mask))));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ binsri_h(w2, w0, m % 16); },
BINSLI_BINSRI_DF(kMSALanesHalf, UINT16_MAX,
((ws_elem & r_mask) | (wd_elem & ~r_mask))));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ binsri_w(w2, w0, m % 32); },
BINSLI_BINSRI_DF(kMSALanesWord, UINT32_MAX,
((ws_elem & r_mask) | (wd_elem & ~r_mask))));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ binsri_d(w2, w0, m % 64); },
BINSLI_BINSRI_DF(kMSALanesDword, UINT64_MAX,
((ws_elem & r_mask) | (wd_elem & ~r_mask))));
}
#undef BINSLI_BINSRI_DF
}
TEST(MSA_sat_s_sat_u) {
if ((kArchVariant != kMips64r6) || !CpuFeatures::IsSupported(MIPS_SIMD))
return;
CcTest::InitializeVM();
struct TestCaseMsaBit tc[] = {
// wd_lo, wd_hi, ws_lo, ws_hi, m
{0, 0, 0xf35862e13e3808b0, 0x4f41ffdef2bfe636, 3},
{0, 0, 0x64be4f6dbe9caa51, 0x6b23de1a687d9cb9, 5},
{0, 0, 0x1169751bb9a7d9c3, 0xf7a594aec8ef8a9c, 9},
{0, 0, 0x2b665362c4e812df, 0x3a0d80d68b3f8bc8, 13},
{0, 0, 0x566be7ba4365b70a, 0x01ebbc1937d76cb4, 21},
{0, 0, 0x380e2deb9d3f8aae, 0x017e0de0bcc6ca42, 30},
{0, 0, 0xa46a3a9bcb43f4e5, 0x1c62c8473bdfcffb, 45},
{0, 0, 0xf6759d85f23b5a2b, 0x5c042ae42c6d12c1, 61}};
#define SAT_DF(lanes, mask, func) \
[](uint64_t wd, uint64_t ws, uint32_t m) { \
uint64_t res = 0; \
int elem_size = kMSARegSize / lanes; \
m %= elem_size; \
for (int i = 0; i < lanes / 2; ++i) { \
int shift = elem_size * i; \
uint64_t elem_u64 = (ws >> shift) & mask; \
int64_t elem_i64 = static_cast<int64_t>(elem_u64 << (64 - elem_size)) >> \
(64 - elem_size); \
USE(elem_i64); \
res |= ((func)&mask) << shift; \
} \
return res; \
}
#define M_MAX_INT(x) static_cast<int64_t>((1LL << ((x)-1)) - 1)
#define M_MIN_INT(x) static_cast<int64_t>(-(1LL << ((x)-1)))
#define M_MAX_UINT(x) static_cast<uint64_t>(-1ULL >> (64 - (x)))
for (size_t i = 0; i < sizeof(tc) / sizeof(TestCaseMsaBit); ++i) {
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ sat_u_b(w2, w0, m % 8); },
SAT_DF(kMSALanesByte, UINT8_MAX,
(elem_u64 < M_MAX_UINT(m + 1) ? elem_u64 : M_MAX_UINT(m + 1))));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ sat_u_h(w2, w0, m % 16); },
SAT_DF(kMSALanesHalf, UINT16_MAX,
(elem_u64 < M_MAX_UINT(m + 1) ? elem_u64 : M_MAX_UINT(m + 1))));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ sat_u_w(w2, w0, m % 32); },
SAT_DF(kMSALanesWord, UINT32_MAX,
(elem_u64 < M_MAX_UINT(m + 1) ? elem_u64 : M_MAX_UINT(m + 1))));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ sat_u_d(w2, w0, m % 64); },
SAT_DF(kMSALanesDword, UINT64_MAX,
(elem_u64 < M_MAX_UINT(m + 1) ? elem_u64 : M_MAX_UINT(m + 1))));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ sat_s_b(w2, w0, m % 8); },
SAT_DF(
kMSALanesByte, UINT8_MAX,
(elem_i64 < M_MIN_INT(m + 1)
? M_MIN_INT(m + 1)
: elem_i64 > M_MAX_INT(m + 1) ? M_MAX_INT(m + 1) : elem_i64)));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ sat_s_h(w2, w0, m % 16); },
SAT_DF(
kMSALanesHalf, UINT16_MAX,
(elem_i64 < M_MIN_INT(m + 1)
? M_MIN_INT(m + 1)
: elem_i64 > M_MAX_INT(m + 1) ? M_MAX_INT(m + 1) : elem_i64)));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ sat_s_w(w2, w0, m % 32); },
SAT_DF(
kMSALanesWord, UINT32_MAX,
(elem_i64 < M_MIN_INT(m + 1)
? M_MIN_INT(m + 1)
: elem_i64 > M_MAX_INT(m + 1) ? M_MAX_INT(m + 1) : elem_i64)));
run_msa_bit(
&tc[i],
[](MacroAssembler& assm, uint32_t m) { __ sat_s_d(w2, w0, m % 64); },
SAT_DF(
kMSALanesDword, UINT64_MAX,
(elem_i64 < M_MIN_INT(m + 1)
? M_MIN_INT(m + 1)
: elem_i64 > M_MAX_INT(m + 1) ? M_MAX_INT(m + 1) : elem_i64)));
}
#undef SAT_DF
#undef M_MAX_INT
#undef M_MIN_INT
#undef M_MAX_UINT
}
#undef __