[ARM] Add vcge, vcgt instructions to assembler.
- Floating point, signed, and unsigned. - Disassembler, simulator support too. LOG=N BUG=v8:4124 Review-Url: https://codereview.chromium.org/2602293002 Cr-Commit-Position: refs/heads/master@{#42262}
This commit is contained in:
parent
d23e7d2f81
commit
e46893c6c4
@ -4319,7 +4319,7 @@ void Assembler::vtst(NeonSize size, QwNeonRegister dst,
|
||||
void Assembler::vceq(const QwNeonRegister dst, const QwNeonRegister src1,
|
||||
const QwNeonRegister src2) {
|
||||
DCHECK(IsEnabled(NEON));
|
||||
// Qd = vceq(Qn, Qm) SIMD integer compare equal.
|
||||
// Qd = vceq(Qn, Qm) SIMD floating point compare equal.
|
||||
// Instruction details available in ARM DDI 0406C.b, A8-844.
|
||||
int vd, d;
|
||||
dst.split_code(&vd, &d);
|
||||
@ -4334,7 +4334,7 @@ void Assembler::vceq(const QwNeonRegister dst, const QwNeonRegister src1,
|
||||
void Assembler::vceq(NeonSize size, QwNeonRegister dst,
|
||||
const QwNeonRegister src1, const QwNeonRegister src2) {
|
||||
DCHECK(IsEnabled(NEON));
|
||||
// Qd = vceq(Qn, Qm) SIMD bitwise compare equal.
|
||||
// Qd = vceq(Qn, Qm) SIMD integer compare equal.
|
||||
// Instruction details available in ARM DDI 0406C.b, A8-844.
|
||||
int vd, d;
|
||||
dst.split_code(&vd, &d);
|
||||
@ -4347,6 +4347,70 @@ void Assembler::vceq(NeonSize size, QwNeonRegister dst,
|
||||
n * B7 | B6 | m * B5 | B4 | vm);
|
||||
}
|
||||
|
||||
static Instr EncodeNeonCompareOp(const QwNeonRegister dst,
|
||||
const QwNeonRegister src1,
|
||||
const QwNeonRegister src2, Condition cond) {
|
||||
DCHECK(cond == ge || cond == gt);
|
||||
int vd, d;
|
||||
dst.split_code(&vd, &d);
|
||||
int vn, n;
|
||||
src1.split_code(&vn, &n);
|
||||
int vm, m;
|
||||
src2.split_code(&vm, &m);
|
||||
int is_gt = (cond == gt) ? 1 : 0;
|
||||
return 0x1E6U * B23 | d * B22 | is_gt * B21 | vn * B16 | vd * B12 | 0xe * B8 |
|
||||
n * B7 | B6 | m * B5 | vm;
|
||||
}
|
||||
|
||||
static Instr EncodeNeonCompareOp(NeonDataType dt, const QwNeonRegister dst,
|
||||
const QwNeonRegister src1,
|
||||
const QwNeonRegister src2, Condition cond) {
|
||||
DCHECK(cond == ge || cond == gt);
|
||||
int vd, d;
|
||||
dst.split_code(&vd, &d);
|
||||
int vn, n;
|
||||
src1.split_code(&vn, &n);
|
||||
int vm, m;
|
||||
src2.split_code(&vm, &m);
|
||||
int size = (dt & NeonDataTypeSizeMask) / 2;
|
||||
int U = dt & NeonDataTypeUMask;
|
||||
int is_ge = (cond == ge) ? 1 : 0;
|
||||
return 0x1E4U * B23 | U | d * B22 | size * B20 | vn * B16 | vd * B12 |
|
||||
0x3 * B8 | n * B7 | B6 | m * B5 | is_ge * B4 | vm;
|
||||
}
|
||||
|
||||
void Assembler::vcge(const QwNeonRegister dst, const QwNeonRegister src1,
|
||||
const QwNeonRegister src2) {
|
||||
DCHECK(IsEnabled(NEON));
|
||||
// Qd = vcge(Qn, Qm) SIMD floating point compare greater or equal.
|
||||
// Instruction details available in ARM DDI 0406C.b, A8-848.
|
||||
emit(EncodeNeonCompareOp(dst, src1, src2, ge));
|
||||
}
|
||||
|
||||
void Assembler::vcge(NeonDataType dt, QwNeonRegister dst,
|
||||
const QwNeonRegister src1, const QwNeonRegister src2) {
|
||||
DCHECK(IsEnabled(NEON));
|
||||
// Qd = vcge(Qn, Qm) SIMD integer compare greater or equal.
|
||||
// Instruction details available in ARM DDI 0406C.b, A8-848.
|
||||
emit(EncodeNeonCompareOp(dt, dst, src1, src2, ge));
|
||||
}
|
||||
|
||||
void Assembler::vcgt(const QwNeonRegister dst, const QwNeonRegister src1,
|
||||
const QwNeonRegister src2) {
|
||||
DCHECK(IsEnabled(NEON));
|
||||
// Qd = vcgt(Qn, Qm) SIMD floating point compare greater than.
|
||||
// Instruction details available in ARM DDI 0406C.b, A8-852.
|
||||
emit(EncodeNeonCompareOp(dst, src1, src2, gt));
|
||||
}
|
||||
|
||||
void Assembler::vcgt(NeonDataType dt, QwNeonRegister dst,
|
||||
const QwNeonRegister src1, const QwNeonRegister src2) {
|
||||
DCHECK(IsEnabled(NEON));
|
||||
// Qd = vcgt(Qn, Qm) SIMD integer compare greater than.
|
||||
// Instruction details available in ARM DDI 0406C.b, A8-852.
|
||||
emit(EncodeNeonCompareOp(dt, dst, src1, src2, gt));
|
||||
}
|
||||
|
||||
void Assembler::vbsl(QwNeonRegister dst, const QwNeonRegister src1,
|
||||
const QwNeonRegister src2) {
|
||||
DCHECK(IsEnabled(NEON));
|
||||
|
@ -1393,6 +1393,14 @@ class Assembler : public AssemblerBase {
|
||||
const QwNeonRegister src2);
|
||||
void vceq(NeonSize size, const QwNeonRegister dst, const QwNeonRegister src1,
|
||||
const QwNeonRegister src2);
|
||||
void vcge(const QwNeonRegister dst, const QwNeonRegister src1,
|
||||
const QwNeonRegister src2);
|
||||
void vcge(NeonDataType dt, const QwNeonRegister dst,
|
||||
const QwNeonRegister src1, const QwNeonRegister src2);
|
||||
void vcgt(const QwNeonRegister dst, const QwNeonRegister src1,
|
||||
const QwNeonRegister src2);
|
||||
void vcgt(NeonDataType dt, const QwNeonRegister dst,
|
||||
const QwNeonRegister src1, const QwNeonRegister src2);
|
||||
void vbsl(const QwNeonRegister dst, const QwNeonRegister src1,
|
||||
const QwNeonRegister src2);
|
||||
void vext(const QwNeonRegister dst, const QwNeonRegister src1,
|
||||
|
@ -1899,6 +1899,16 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) {
|
||||
// vceq.f32 Qd, Qm, Qn.
|
||||
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
|
||||
"vceq.f32 q%d, q%d, q%d", Vd, Vn, Vm);
|
||||
} else if (instr->Bits(11, 8) == 0x3) {
|
||||
int size = kBitsPerByte * (1 << instr->Bits(21, 20));
|
||||
int Vd = instr->VFPDRegValue(kSimd128Precision);
|
||||
int Vm = instr->VFPMRegValue(kSimd128Precision);
|
||||
int Vn = instr->VFPNRegValue(kSimd128Precision);
|
||||
const char* op = (instr->Bit(4) == 1) ? "vcge" : "vcgt";
|
||||
// vcge/vcgt.s<size> Qd, Qm, Qn.
|
||||
out_buffer_pos_ +=
|
||||
SNPrintF(out_buffer_ + out_buffer_pos_, "%s.s%d q%d, q%d, q%d", op,
|
||||
size, Vd, Vn, Vm);
|
||||
} else if (instr->Bit(20) == 0 && instr->Bits(11, 8) == 0xf &&
|
||||
instr->Bit(6) == 1 && instr->Bit(4) == 1) {
|
||||
int Vd = instr->VFPDRegValue(kSimd128Precision);
|
||||
@ -1983,6 +1993,25 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) {
|
||||
int Vm = instr->VFPMRegValue(kSimd128Precision);
|
||||
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
|
||||
"vmul.f32 q%d, q%d, q%d", Vd, Vn, Vm);
|
||||
} else if (instr->Bit(20) == 0 && instr->Bits(11, 8) == 0xe &&
|
||||
instr->Bit(4) == 0) {
|
||||
int Vd = instr->VFPDRegValue(kSimd128Precision);
|
||||
int Vm = instr->VFPMRegValue(kSimd128Precision);
|
||||
int Vn = instr->VFPNRegValue(kSimd128Precision);
|
||||
const char* op = (instr->Bit(21) == 0) ? "vcge" : "vcgt";
|
||||
// vcge/vcgt.f32 Qd, Qm, Qn.
|
||||
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
|
||||
"%s.f32 q%d, q%d, q%d", op, Vd, Vn, Vm);
|
||||
} else if (instr->Bits(11, 8) == 0x3) {
|
||||
int size = kBitsPerByte * (1 << instr->Bits(21, 20));
|
||||
int Vd = instr->VFPDRegValue(kSimd128Precision);
|
||||
int Vm = instr->VFPMRegValue(kSimd128Precision);
|
||||
int Vn = instr->VFPNRegValue(kSimd128Precision);
|
||||
const char* op = (instr->Bit(4) == 1) ? "vcge" : "vcgt";
|
||||
// vcge/vcgt.u<size> Qd, Qm, Qn.
|
||||
out_buffer_pos_ +=
|
||||
SNPrintF(out_buffer_ + out_buffer_pos_, "%s.u%d q%d, q%d, q%d", op,
|
||||
size, Vd, Vn, Vm);
|
||||
} else {
|
||||
Unknown(instr);
|
||||
}
|
||||
|
@ -3840,7 +3840,7 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
|
||||
set_q_register(Vd, data);
|
||||
} else if (instr->Bits(11, 8) == 8) {
|
||||
// vadd/vtst
|
||||
int size = static_cast<NeonSize>(instr->Bits(21, 20));
|
||||
NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
|
||||
int Vd = instr->VFPDRegValue(kSimd128Precision);
|
||||
int Vm = instr->VFPMRegValue(kSimd128Precision);
|
||||
int Vn = instr->VFPNRegValue(kSimd128Precision);
|
||||
@ -3993,6 +3993,57 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
|
||||
dst[i] = (src1[i] == src2[i]) ? 0xFFFFFFFF : 0;
|
||||
}
|
||||
set_q_register(Vd, dst);
|
||||
} else if (instr->Bits(11, 8) == 0x3) {
|
||||
// vcge/vcgt.s<size> Qd, Qm, Qn.
|
||||
bool ge = instr->Bit(4) == 1;
|
||||
NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
|
||||
int Vd = instr->VFPDRegValue(kSimd128Precision);
|
||||
int Vm = instr->VFPMRegValue(kSimd128Precision);
|
||||
int Vn = instr->VFPNRegValue(kSimd128Precision);
|
||||
switch (size) {
|
||||
case Neon8: {
|
||||
int8_t src1[16], src2[16];
|
||||
get_q_register(Vn, src1);
|
||||
get_q_register(Vm, src2);
|
||||
for (int i = 0; i < 16; i++) {
|
||||
if (ge)
|
||||
src1[i] = src1[i] >= src2[i] ? 0xFF : 0;
|
||||
else
|
||||
src1[i] = src1[i] > src2[i] ? 0xFF : 0;
|
||||
}
|
||||
set_q_register(Vd, src1);
|
||||
break;
|
||||
}
|
||||
case Neon16: {
|
||||
int16_t src1[8], src2[8];
|
||||
get_q_register(Vn, src1);
|
||||
get_q_register(Vm, src2);
|
||||
for (int i = 0; i < 8; i++) {
|
||||
if (ge)
|
||||
src1[i] = src1[i] >= src2[i] ? 0xFFFF : 0;
|
||||
else
|
||||
src1[i] = src1[i] > src2[i] ? 0xFFFF : 0;
|
||||
}
|
||||
set_q_register(Vd, src1);
|
||||
break;
|
||||
}
|
||||
case Neon32: {
|
||||
int32_t src1[4], src2[4];
|
||||
get_q_register(Vn, src1);
|
||||
get_q_register(Vm, src2);
|
||||
for (int i = 0; i < 4; i++) {
|
||||
if (ge)
|
||||
src1[i] = src1[i] >= src2[i] ? 0xFFFFFFFF : 0;
|
||||
else
|
||||
src1[i] = src1[i] > src2[i] ? 0xFFFFFFFF : 0;
|
||||
}
|
||||
set_q_register(Vd, src1);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNREACHABLE();
|
||||
break;
|
||||
}
|
||||
} else if (instr->Bit(20) == 0 && instr->Bits(11, 8) == 0xf &&
|
||||
instr->Bit(6) == 1 && instr->Bit(4) == 1) {
|
||||
int Vd = instr->VFPDRegValue(kSimd128Precision);
|
||||
@ -4062,7 +4113,7 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
|
||||
case 6:
|
||||
if (instr->Bits(11, 8) == 8 && instr->Bit(4) == 0) {
|
||||
// vsub.size Qd, Qm, Qn.
|
||||
int size = static_cast<NeonSize>(instr->Bits(21, 20));
|
||||
NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
|
||||
int Vd = instr->VFPDRegValue(kSimd128Precision);
|
||||
int Vm = instr->VFPMRegValue(kSimd128Precision);
|
||||
int Vn = instr->VFPNRegValue(kSimd128Precision);
|
||||
@ -4103,7 +4154,7 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
|
||||
}
|
||||
} else if (instr->Bits(11, 8) == 8 && instr->Bit(4) == 1) {
|
||||
// vceq.size Qd, Qm, Qn.
|
||||
int size = static_cast<NeonSize>(instr->Bits(21, 20));
|
||||
NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
|
||||
int Vd = instr->VFPDRegValue(kSimd128Precision);
|
||||
int Vm = instr->VFPMRegValue(kSimd128Precision);
|
||||
int Vn = instr->VFPNRegValue(kSimd128Precision);
|
||||
@ -4193,6 +4244,76 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
|
||||
src1[i] = src1[i] * src2[i];
|
||||
}
|
||||
set_q_register(Vd, src1);
|
||||
} else if (instr->Bit(20) == 0 && instr->Bits(11, 8) == 0xe &&
|
||||
instr->Bit(4) == 0) {
|
||||
// vcge/vcgt.f32 Qd, Qm, Qn
|
||||
bool ge = instr->Bit(21) == 0;
|
||||
int Vd = instr->VFPDRegValue(kSimd128Precision);
|
||||
int Vm = instr->VFPMRegValue(kSimd128Precision);
|
||||
int Vn = instr->VFPNRegValue(kSimd128Precision);
|
||||
float src1[4], src2[4];
|
||||
get_q_register(Vn, src1);
|
||||
get_q_register(Vm, src2);
|
||||
uint32_t dst[4];
|
||||
for (int i = 0; i < 4; i++) {
|
||||
if (ge) {
|
||||
dst[i] = src1[i] >= src2[i] ? 0xFFFFFFFFu : 0;
|
||||
} else {
|
||||
dst[i] = src1[i] > src2[i] ? 0xFFFFFFFFu : 0;
|
||||
}
|
||||
}
|
||||
set_q_register(Vd, dst);
|
||||
} else if (instr->Bits(11, 8) == 0x3) {
|
||||
// vcge/vcgt.u<size> Qd, Qm, Qn.
|
||||
bool ge = instr->Bit(4) == 1;
|
||||
NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
|
||||
int Vd = instr->VFPDRegValue(kSimd128Precision);
|
||||
int Vm = instr->VFPMRegValue(kSimd128Precision);
|
||||
int Vn = instr->VFPNRegValue(kSimd128Precision);
|
||||
switch (size) {
|
||||
case Neon8: {
|
||||
uint8_t src1[16], src2[16];
|
||||
get_q_register(Vn, src1);
|
||||
get_q_register(Vm, src2);
|
||||
for (int i = 0; i < 16; i++) {
|
||||
if (ge)
|
||||
src1[i] = src1[i] >= src2[i] ? 0xFFu : 0;
|
||||
else
|
||||
src1[i] = src1[i] > src2[i] ? 0xFFu : 0;
|
||||
}
|
||||
set_q_register(Vd, src1);
|
||||
break;
|
||||
}
|
||||
case Neon16: {
|
||||
uint16_t src1[8], src2[8];
|
||||
get_q_register(Vn, src1);
|
||||
get_q_register(Vm, src2);
|
||||
for (int i = 0; i < 8; i++) {
|
||||
if (ge)
|
||||
src1[i] = src1[i] >= src2[i] ? 0xFFFFu : 0;
|
||||
else
|
||||
src1[i] = src1[i] > src2[i] ? 0xFFFFu : 0;
|
||||
}
|
||||
set_q_register(Vd, src1);
|
||||
break;
|
||||
}
|
||||
case Neon32: {
|
||||
uint32_t src1[4], src2[4];
|
||||
get_q_register(Vn, src1);
|
||||
get_q_register(Vm, src2);
|
||||
for (int i = 0; i < 4; i++) {
|
||||
if (ge)
|
||||
src1[i] = src1[i] >= src2[i] ? 0xFFFFFFFFu : 0;
|
||||
else
|
||||
src1[i] = src1[i] > src2[i] ? 0xFFFFFFFFu : 0;
|
||||
}
|
||||
set_q_register(Vd, src1);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNREACHABLE();
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
@ -4314,7 +4435,7 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
|
||||
set_d_register(vd, &result);
|
||||
} else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 6) == 0x7) {
|
||||
// vzip.<size> Qd, Qm.
|
||||
int size = static_cast<NeonSize>(instr->Bits(19, 18));
|
||||
NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18));
|
||||
int Vd = instr->VFPDRegValue(kSimd128Precision);
|
||||
int Vm = instr->VFPMRegValue(kSimd128Precision);
|
||||
switch (size) {
|
||||
@ -4368,7 +4489,7 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
|
||||
// vrev<op>.size Qd, Qm
|
||||
int Vd = instr->VFPDRegValue(kSimd128Precision);
|
||||
int Vm = instr->VFPMRegValue(kSimd128Precision);
|
||||
int size = static_cast<NeonSize>(instr->Bits(19, 18));
|
||||
NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18));
|
||||
NeonSize op = static_cast<NeonSize>(static_cast<int>(Neon64) -
|
||||
instr->Bits(8, 7));
|
||||
switch (op) {
|
||||
@ -4452,7 +4573,7 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
|
||||
} else if (instr->Bits(17, 16) == 0x1 && instr->Bit(11) == 0) {
|
||||
int Vd = instr->VFPDRegValue(kSimd128Precision);
|
||||
int Vm = instr->VFPMRegValue(kSimd128Precision);
|
||||
int size = static_cast<NeonSize>(instr->Bits(19, 18));
|
||||
NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18));
|
||||
if (instr->Bits(9, 6) == 0xd) {
|
||||
// vabs<type>.<size> Qd, Qm
|
||||
if (instr->Bit(10) != 0) {
|
||||
|
@ -1289,8 +1289,11 @@ TEST(15) {
|
||||
uint32_t vadd8[4], vadd16[4], vadd32[4];
|
||||
uint32_t vsub8[4], vsub16[4], vsub32[4];
|
||||
uint32_t vmul8[4], vmul16[4], vmul32[4];
|
||||
uint32_t vceq[4], vceqf[4], vcgef[4], vcgtf[4];
|
||||
uint32_t vcge_s8[4], vcge_u16[4], vcge_s32[4];
|
||||
uint32_t vcgt_s8[4], vcgt_u16[4], vcgt_s32[4];
|
||||
float vrecpe[4], vrecps[4], vrsqrte[4], vrsqrts[4];
|
||||
uint32_t vtst[4], vceq[4], vceqf[4], vbsl[4];
|
||||
uint32_t vtst[4], vbsl[4];
|
||||
uint32_t vext[4];
|
||||
uint32_t vzip8a[4], vzip8b[4], vzip16a[4], vzip16b[4], vzip32a[4],
|
||||
vzip32b[4];
|
||||
@ -1518,6 +1521,18 @@ TEST(15) {
|
||||
__ vceq(q1, q1, q0);
|
||||
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vceqf))));
|
||||
__ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
|
||||
// vcge (float).
|
||||
__ vmov(s0, 1.0);
|
||||
__ vmov(s1, -1.0);
|
||||
__ vmov(s2, -0.0);
|
||||
__ vmov(s3, 0.0);
|
||||
__ vdup(q1, s3);
|
||||
__ vcge(q2, q1, q0);
|
||||
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vcgef))));
|
||||
__ vst1(Neon8, NeonListOperand(q2), NeonMemOperand(r4));
|
||||
__ vcgt(q2, q1, q0);
|
||||
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vcgtf))));
|
||||
__ vst1(Neon8, NeonListOperand(q2), NeonMemOperand(r4));
|
||||
|
||||
// vadd (integer).
|
||||
__ mov(r4, Operand(0x81));
|
||||
@ -1585,12 +1600,40 @@ TEST(15) {
|
||||
// vceq.
|
||||
__ mov(r4, Operand(0x03));
|
||||
__ vdup(Neon8, q0, r4);
|
||||
__ mov(r4, Operand(0x03));
|
||||
__ vdup(Neon16, q1, r4);
|
||||
__ vceq(Neon8, q1, q0, q1);
|
||||
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vceq))));
|
||||
__ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
|
||||
|
||||
// vcge/vcgt.
|
||||
__ mov(r4, Operand(0x03));
|
||||
__ vdup(Neon16, q0, r4);
|
||||
__ vdup(Neon8, q1, r4);
|
||||
__ vcge(NeonS8, q2, q0, q1);
|
||||
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vcge_s8))));
|
||||
__ vst1(Neon8, NeonListOperand(q2), NeonMemOperand(r4));
|
||||
__ vcgt(NeonS8, q2, q0, q1);
|
||||
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vcgt_s8))));
|
||||
__ vst1(Neon8, NeonListOperand(q2), NeonMemOperand(r4));
|
||||
__ mov(r4, Operand(0xff));
|
||||
__ vdup(Neon16, q0, r4);
|
||||
__ vdup(Neon8, q1, r4);
|
||||
__ vcge(NeonU16, q2, q0, q1);
|
||||
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vcge_u16))));
|
||||
__ vst1(Neon8, NeonListOperand(q2), NeonMemOperand(r4));
|
||||
__ vcgt(NeonU16, q2, q0, q1);
|
||||
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vcgt_u16))));
|
||||
__ vst1(Neon8, NeonListOperand(q2), NeonMemOperand(r4));
|
||||
__ mov(r4, Operand(0xff));
|
||||
__ vdup(Neon32, q0, r4);
|
||||
__ vdup(Neon8, q1, r4);
|
||||
__ vcge(NeonS32, q2, q0, q1);
|
||||
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vcge_s32))));
|
||||
__ vst1(Neon8, NeonListOperand(q2), NeonMemOperand(r4));
|
||||
__ vcgt(NeonS32, q2, q0, q1);
|
||||
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vcgt_s32))));
|
||||
__ vst1(Neon8, NeonListOperand(q2), NeonMemOperand(r4));
|
||||
|
||||
// vtst.
|
||||
__ mov(r4, Operand(0x03));
|
||||
__ vdup(Neon8, q0, r4);
|
||||
@ -1784,6 +1827,9 @@ TEST(15) {
|
||||
CHECK_EQ_SPLAT(vrsqrte, 0.5f); // 1 / sqrt(4)
|
||||
CHECK_EQ_SPLAT(vrsqrts, -1.0f); // (3 - (2 * 2.5)) / 2
|
||||
CHECK_EQ_SPLAT(vceqf, 0xffffffffu);
|
||||
// [0] >= [-1, 1, -0, 0]
|
||||
CHECK_EQ_32X4(vcgef, 0u, 0xffffffffu, 0xffffffffu, 0xffffffffu);
|
||||
CHECK_EQ_32X4(vcgtf, 0u, 0xffffffffu, 0u, 0u);
|
||||
CHECK_EQ_SPLAT(vadd8, 0x03030303u);
|
||||
CHECK_EQ_SPLAT(vadd16, 0x00030003u);
|
||||
CHECK_EQ_SPLAT(vadd32, 0x00000003u);
|
||||
@ -1794,6 +1840,15 @@ TEST(15) {
|
||||
CHECK_EQ_SPLAT(vmul16, 0x00040004u);
|
||||
CHECK_EQ_SPLAT(vmul32, 0x00000004u);
|
||||
CHECK_EQ_SPLAT(vceq, 0x00ff00ffu);
|
||||
// [0, 3, 0, 3, ...] >= [3, 3, 3, 3, ...]
|
||||
CHECK_EQ_SPLAT(vcge_s8, 0x00ff00ffu);
|
||||
CHECK_EQ_SPLAT(vcgt_s8, 0u);
|
||||
// [0x00ff, 0x00ff, ...] >= [0xffff, 0xffff, ...]
|
||||
CHECK_EQ_SPLAT(vcge_u16, 0u);
|
||||
CHECK_EQ_SPLAT(vcgt_u16, 0u);
|
||||
// [0x000000ff, 0x000000ff, ...] >= [0xffffffff, 0xffffffff, ...]
|
||||
CHECK_EQ_SPLAT(vcge_s32, 0xffffffffu);
|
||||
CHECK_EQ_SPLAT(vcgt_s32, 0xffffffffu);
|
||||
CHECK_EQ_SPLAT(vtst, 0x00ff00ffu);
|
||||
CHECK_EQ_SPLAT(vbsl, 0x02010201u);
|
||||
|
||||
|
@ -1065,12 +1065,28 @@ TEST(Neon) {
|
||||
"f260e870 vtst.i32 q15, q0, q8");
|
||||
COMPARE(vceq(q0, q1, q2),
|
||||
"f2020e44 vceq.f32 q0, q1, q2");
|
||||
COMPARE(vcge(q0, q1, q2),
|
||||
"f3020e44 vcge.f32 q0, q1, q2");
|
||||
COMPARE(vcgt(q0, q1, q2),
|
||||
"f3220e44 vcgt.f32 q0, q1, q2");
|
||||
COMPARE(vceq(Neon8, q0, q1, q2),
|
||||
"f3020854 vceq.i8 q0, q1, q2");
|
||||
COMPARE(vceq(Neon16, q1, q2, q8),
|
||||
"f3142870 vceq.i16 q1, q2, q8");
|
||||
COMPARE(vceq(Neon32, q15, q0, q8),
|
||||
"f360e870 vceq.i32 q15, q0, q8");
|
||||
COMPARE(vcge(NeonS8, q0, q1, q2),
|
||||
"f2020354 vcge.s8 q0, q1, q2");
|
||||
COMPARE(vcge(NeonU16, q1, q2, q8),
|
||||
"f3142370 vcge.u16 q1, q2, q8");
|
||||
COMPARE(vcge(NeonS32, q15, q0, q8),
|
||||
"f260e370 vcge.s32 q15, q0, q8");
|
||||
COMPARE(vcgt(NeonS8, q0, q1, q2),
|
||||
"f2020344 vcgt.s8 q0, q1, q2");
|
||||
COMPARE(vcgt(NeonU16, q1, q2, q8),
|
||||
"f3142360 vcgt.u16 q1, q2, q8");
|
||||
COMPARE(vcgt(NeonS32, q15, q0, q8),
|
||||
"f260e360 vcgt.s32 q15, q0, q8");
|
||||
COMPARE(vbsl(q0, q1, q2),
|
||||
"f3120154 vbsl q0, q1, q2");
|
||||
COMPARE(vbsl(q15, q0, q8),
|
||||
|
Loading…
Reference in New Issue
Block a user