[ARM] Add vcge, vcgt instructions to assembler.

- Floating point, signed, and unsigned.
- Disassembler, simulator support too.
LOG=N
BUG=v8:4124

Review-Url: https://codereview.chromium.org/2602293002
Cr-Commit-Position: refs/heads/master@{#42262}
This commit is contained in:
bbudge 2017-01-12 03:20:08 -08:00 committed by Commit bot
parent d23e7d2f81
commit e46893c6c4
6 changed files with 303 additions and 10 deletions

View File

@ -4319,7 +4319,7 @@ void Assembler::vtst(NeonSize size, QwNeonRegister dst,
void Assembler::vceq(const QwNeonRegister dst, const QwNeonRegister src1,
const QwNeonRegister src2) {
DCHECK(IsEnabled(NEON));
// Qd = vceq(Qn, Qm) SIMD integer compare equal.
// Qd = vceq(Qn, Qm) SIMD floating point compare equal.
// Instruction details available in ARM DDI 0406C.b, A8-844.
int vd, d;
dst.split_code(&vd, &d);
@ -4334,7 +4334,7 @@ void Assembler::vceq(const QwNeonRegister dst, const QwNeonRegister src1,
void Assembler::vceq(NeonSize size, QwNeonRegister dst,
const QwNeonRegister src1, const QwNeonRegister src2) {
DCHECK(IsEnabled(NEON));
// Qd = vceq(Qn, Qm) SIMD bitwise compare equal.
// Qd = vceq(Qn, Qm) SIMD integer compare equal.
// Instruction details available in ARM DDI 0406C.b, A8-844.
int vd, d;
dst.split_code(&vd, &d);
@ -4347,6 +4347,70 @@ void Assembler::vceq(NeonSize size, QwNeonRegister dst,
n * B7 | B6 | m * B5 | B4 | vm);
}
static Instr EncodeNeonCompareOp(const QwNeonRegister dst,
const QwNeonRegister src1,
const QwNeonRegister src2, Condition cond) {
DCHECK(cond == ge || cond == gt);
int vd, d;
dst.split_code(&vd, &d);
int vn, n;
src1.split_code(&vn, &n);
int vm, m;
src2.split_code(&vm, &m);
int is_gt = (cond == gt) ? 1 : 0;
return 0x1E6U * B23 | d * B22 | is_gt * B21 | vn * B16 | vd * B12 | 0xe * B8 |
n * B7 | B6 | m * B5 | vm;
}
static Instr EncodeNeonCompareOp(NeonDataType dt, const QwNeonRegister dst,
const QwNeonRegister src1,
const QwNeonRegister src2, Condition cond) {
DCHECK(cond == ge || cond == gt);
int vd, d;
dst.split_code(&vd, &d);
int vn, n;
src1.split_code(&vn, &n);
int vm, m;
src2.split_code(&vm, &m);
int size = (dt & NeonDataTypeSizeMask) / 2;
int U = dt & NeonDataTypeUMask;
int is_ge = (cond == ge) ? 1 : 0;
return 0x1E4U * B23 | U | d * B22 | size * B20 | vn * B16 | vd * B12 |
0x3 * B8 | n * B7 | B6 | m * B5 | is_ge * B4 | vm;
}
void Assembler::vcge(const QwNeonRegister dst, const QwNeonRegister src1,
const QwNeonRegister src2) {
DCHECK(IsEnabled(NEON));
// Qd = vcge(Qn, Qm) SIMD floating point compare greater or equal.
// Instruction details available in ARM DDI 0406C.b, A8-848.
emit(EncodeNeonCompareOp(dst, src1, src2, ge));
}
void Assembler::vcge(NeonDataType dt, QwNeonRegister dst,
const QwNeonRegister src1, const QwNeonRegister src2) {
DCHECK(IsEnabled(NEON));
// Qd = vcge(Qn, Qm) SIMD integer compare greater or equal.
// Instruction details available in ARM DDI 0406C.b, A8-848.
emit(EncodeNeonCompareOp(dt, dst, src1, src2, ge));
}
void Assembler::vcgt(const QwNeonRegister dst, const QwNeonRegister src1,
const QwNeonRegister src2) {
DCHECK(IsEnabled(NEON));
// Qd = vcgt(Qn, Qm) SIMD floating point compare greater than.
// Instruction details available in ARM DDI 0406C.b, A8-852.
emit(EncodeNeonCompareOp(dst, src1, src2, gt));
}
void Assembler::vcgt(NeonDataType dt, QwNeonRegister dst,
const QwNeonRegister src1, const QwNeonRegister src2) {
DCHECK(IsEnabled(NEON));
// Qd = vcgt(Qn, Qm) SIMD integer compare greater than.
// Instruction details available in ARM DDI 0406C.b, A8-852.
emit(EncodeNeonCompareOp(dt, dst, src1, src2, gt));
}
void Assembler::vbsl(QwNeonRegister dst, const QwNeonRegister src1,
const QwNeonRegister src2) {
DCHECK(IsEnabled(NEON));

View File

@ -1393,6 +1393,14 @@ class Assembler : public AssemblerBase {
const QwNeonRegister src2);
void vceq(NeonSize size, const QwNeonRegister dst, const QwNeonRegister src1,
const QwNeonRegister src2);
void vcge(const QwNeonRegister dst, const QwNeonRegister src1,
const QwNeonRegister src2);
void vcge(NeonDataType dt, const QwNeonRegister dst,
const QwNeonRegister src1, const QwNeonRegister src2);
void vcgt(const QwNeonRegister dst, const QwNeonRegister src1,
const QwNeonRegister src2);
void vcgt(NeonDataType dt, const QwNeonRegister dst,
const QwNeonRegister src1, const QwNeonRegister src2);
void vbsl(const QwNeonRegister dst, const QwNeonRegister src1,
const QwNeonRegister src2);
void vext(const QwNeonRegister dst, const QwNeonRegister src1,

View File

@ -1899,6 +1899,16 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) {
// vceq.f32 Qd, Qm, Qn.
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"vceq.f32 q%d, q%d, q%d", Vd, Vn, Vm);
} else if (instr->Bits(11, 8) == 0x3) {
int size = kBitsPerByte * (1 << instr->Bits(21, 20));
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
int Vn = instr->VFPNRegValue(kSimd128Precision);
const char* op = (instr->Bit(4) == 1) ? "vcge" : "vcgt";
// vcge/vcgt.s<size> Qd, Qm, Qn.
out_buffer_pos_ +=
SNPrintF(out_buffer_ + out_buffer_pos_, "%s.s%d q%d, q%d, q%d", op,
size, Vd, Vn, Vm);
} else if (instr->Bit(20) == 0 && instr->Bits(11, 8) == 0xf &&
instr->Bit(6) == 1 && instr->Bit(4) == 1) {
int Vd = instr->VFPDRegValue(kSimd128Precision);
@ -1983,6 +1993,25 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) {
int Vm = instr->VFPMRegValue(kSimd128Precision);
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"vmul.f32 q%d, q%d, q%d", Vd, Vn, Vm);
} else if (instr->Bit(20) == 0 && instr->Bits(11, 8) == 0xe &&
instr->Bit(4) == 0) {
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
int Vn = instr->VFPNRegValue(kSimd128Precision);
const char* op = (instr->Bit(21) == 0) ? "vcge" : "vcgt";
// vcge/vcgt.f32 Qd, Qm, Qn.
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"%s.f32 q%d, q%d, q%d", op, Vd, Vn, Vm);
} else if (instr->Bits(11, 8) == 0x3) {
int size = kBitsPerByte * (1 << instr->Bits(21, 20));
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
int Vn = instr->VFPNRegValue(kSimd128Precision);
const char* op = (instr->Bit(4) == 1) ? "vcge" : "vcgt";
// vcge/vcgt.u<size> Qd, Qm, Qn.
out_buffer_pos_ +=
SNPrintF(out_buffer_ + out_buffer_pos_, "%s.u%d q%d, q%d, q%d", op,
size, Vd, Vn, Vm);
} else {
Unknown(instr);
}

View File

@ -3840,7 +3840,7 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
set_q_register(Vd, data);
} else if (instr->Bits(11, 8) == 8) {
// vadd/vtst
int size = static_cast<NeonSize>(instr->Bits(21, 20));
NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
int Vn = instr->VFPNRegValue(kSimd128Precision);
@ -3993,6 +3993,57 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
dst[i] = (src1[i] == src2[i]) ? 0xFFFFFFFF : 0;
}
set_q_register(Vd, dst);
} else if (instr->Bits(11, 8) == 0x3) {
// vcge/vcgt.s<size> Qd, Qm, Qn.
bool ge = instr->Bit(4) == 1;
NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
int Vn = instr->VFPNRegValue(kSimd128Precision);
switch (size) {
case Neon8: {
int8_t src1[16], src2[16];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 16; i++) {
if (ge)
src1[i] = src1[i] >= src2[i] ? 0xFF : 0;
else
src1[i] = src1[i] > src2[i] ? 0xFF : 0;
}
set_q_register(Vd, src1);
break;
}
case Neon16: {
int16_t src1[8], src2[8];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 8; i++) {
if (ge)
src1[i] = src1[i] >= src2[i] ? 0xFFFF : 0;
else
src1[i] = src1[i] > src2[i] ? 0xFFFF : 0;
}
set_q_register(Vd, src1);
break;
}
case Neon32: {
int32_t src1[4], src2[4];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 4; i++) {
if (ge)
src1[i] = src1[i] >= src2[i] ? 0xFFFFFFFF : 0;
else
src1[i] = src1[i] > src2[i] ? 0xFFFFFFFF : 0;
}
set_q_register(Vd, src1);
break;
}
default:
UNREACHABLE();
break;
}
} else if (instr->Bit(20) == 0 && instr->Bits(11, 8) == 0xf &&
instr->Bit(6) == 1 && instr->Bit(4) == 1) {
int Vd = instr->VFPDRegValue(kSimd128Precision);
@ -4062,7 +4113,7 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
case 6:
if (instr->Bits(11, 8) == 8 && instr->Bit(4) == 0) {
// vsub.size Qd, Qm, Qn.
int size = static_cast<NeonSize>(instr->Bits(21, 20));
NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
int Vn = instr->VFPNRegValue(kSimd128Precision);
@ -4103,7 +4154,7 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
}
} else if (instr->Bits(11, 8) == 8 && instr->Bit(4) == 1) {
// vceq.size Qd, Qm, Qn.
int size = static_cast<NeonSize>(instr->Bits(21, 20));
NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
int Vn = instr->VFPNRegValue(kSimd128Precision);
@ -4193,6 +4244,76 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
src1[i] = src1[i] * src2[i];
}
set_q_register(Vd, src1);
} else if (instr->Bit(20) == 0 && instr->Bits(11, 8) == 0xe &&
instr->Bit(4) == 0) {
// vcge/vcgt.f32 Qd, Qm, Qn
bool ge = instr->Bit(21) == 0;
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
int Vn = instr->VFPNRegValue(kSimd128Precision);
float src1[4], src2[4];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
uint32_t dst[4];
for (int i = 0; i < 4; i++) {
if (ge) {
dst[i] = src1[i] >= src2[i] ? 0xFFFFFFFFu : 0;
} else {
dst[i] = src1[i] > src2[i] ? 0xFFFFFFFFu : 0;
}
}
set_q_register(Vd, dst);
} else if (instr->Bits(11, 8) == 0x3) {
// vcge/vcgt.u<size> Qd, Qm, Qn.
bool ge = instr->Bit(4) == 1;
NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
int Vn = instr->VFPNRegValue(kSimd128Precision);
switch (size) {
case Neon8: {
uint8_t src1[16], src2[16];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 16; i++) {
if (ge)
src1[i] = src1[i] >= src2[i] ? 0xFFu : 0;
else
src1[i] = src1[i] > src2[i] ? 0xFFu : 0;
}
set_q_register(Vd, src1);
break;
}
case Neon16: {
uint16_t src1[8], src2[8];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 8; i++) {
if (ge)
src1[i] = src1[i] >= src2[i] ? 0xFFFFu : 0;
else
src1[i] = src1[i] > src2[i] ? 0xFFFFu : 0;
}
set_q_register(Vd, src1);
break;
}
case Neon32: {
uint32_t src1[4], src2[4];
get_q_register(Vn, src1);
get_q_register(Vm, src2);
for (int i = 0; i < 4; i++) {
if (ge)
src1[i] = src1[i] >= src2[i] ? 0xFFFFFFFFu : 0;
else
src1[i] = src1[i] > src2[i] ? 0xFFFFFFFFu : 0;
}
set_q_register(Vd, src1);
break;
}
default:
UNREACHABLE();
break;
}
} else {
UNIMPLEMENTED();
}
@ -4314,7 +4435,7 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
set_d_register(vd, &result);
} else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 6) == 0x7) {
// vzip.<size> Qd, Qm.
int size = static_cast<NeonSize>(instr->Bits(19, 18));
NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18));
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
switch (size) {
@ -4368,7 +4489,7 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
// vrev<op>.size Qd, Qm
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
int size = static_cast<NeonSize>(instr->Bits(19, 18));
NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18));
NeonSize op = static_cast<NeonSize>(static_cast<int>(Neon64) -
instr->Bits(8, 7));
switch (op) {
@ -4452,7 +4573,7 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
} else if (instr->Bits(17, 16) == 0x1 && instr->Bit(11) == 0) {
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
int size = static_cast<NeonSize>(instr->Bits(19, 18));
NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18));
if (instr->Bits(9, 6) == 0xd) {
// vabs<type>.<size> Qd, Qm
if (instr->Bit(10) != 0) {

View File

@ -1289,8 +1289,11 @@ TEST(15) {
uint32_t vadd8[4], vadd16[4], vadd32[4];
uint32_t vsub8[4], vsub16[4], vsub32[4];
uint32_t vmul8[4], vmul16[4], vmul32[4];
uint32_t vceq[4], vceqf[4], vcgef[4], vcgtf[4];
uint32_t vcge_s8[4], vcge_u16[4], vcge_s32[4];
uint32_t vcgt_s8[4], vcgt_u16[4], vcgt_s32[4];
float vrecpe[4], vrecps[4], vrsqrte[4], vrsqrts[4];
uint32_t vtst[4], vceq[4], vceqf[4], vbsl[4];
uint32_t vtst[4], vbsl[4];
uint32_t vext[4];
uint32_t vzip8a[4], vzip8b[4], vzip16a[4], vzip16b[4], vzip32a[4],
vzip32b[4];
@ -1518,6 +1521,18 @@ TEST(15) {
__ vceq(q1, q1, q0);
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vceqf))));
__ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
// vcge (float).
__ vmov(s0, 1.0);
__ vmov(s1, -1.0);
__ vmov(s2, -0.0);
__ vmov(s3, 0.0);
__ vdup(q1, s3);
__ vcge(q2, q1, q0);
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vcgef))));
__ vst1(Neon8, NeonListOperand(q2), NeonMemOperand(r4));
__ vcgt(q2, q1, q0);
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vcgtf))));
__ vst1(Neon8, NeonListOperand(q2), NeonMemOperand(r4));
// vadd (integer).
__ mov(r4, Operand(0x81));
@ -1585,12 +1600,40 @@ TEST(15) {
// vceq.
__ mov(r4, Operand(0x03));
__ vdup(Neon8, q0, r4);
__ mov(r4, Operand(0x03));
__ vdup(Neon16, q1, r4);
__ vceq(Neon8, q1, q0, q1);
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vceq))));
__ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
// vcge/vcgt.
__ mov(r4, Operand(0x03));
__ vdup(Neon16, q0, r4);
__ vdup(Neon8, q1, r4);
__ vcge(NeonS8, q2, q0, q1);
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vcge_s8))));
__ vst1(Neon8, NeonListOperand(q2), NeonMemOperand(r4));
__ vcgt(NeonS8, q2, q0, q1);
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vcgt_s8))));
__ vst1(Neon8, NeonListOperand(q2), NeonMemOperand(r4));
__ mov(r4, Operand(0xff));
__ vdup(Neon16, q0, r4);
__ vdup(Neon8, q1, r4);
__ vcge(NeonU16, q2, q0, q1);
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vcge_u16))));
__ vst1(Neon8, NeonListOperand(q2), NeonMemOperand(r4));
__ vcgt(NeonU16, q2, q0, q1);
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vcgt_u16))));
__ vst1(Neon8, NeonListOperand(q2), NeonMemOperand(r4));
__ mov(r4, Operand(0xff));
__ vdup(Neon32, q0, r4);
__ vdup(Neon8, q1, r4);
__ vcge(NeonS32, q2, q0, q1);
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vcge_s32))));
__ vst1(Neon8, NeonListOperand(q2), NeonMemOperand(r4));
__ vcgt(NeonS32, q2, q0, q1);
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vcgt_s32))));
__ vst1(Neon8, NeonListOperand(q2), NeonMemOperand(r4));
// vtst.
__ mov(r4, Operand(0x03));
__ vdup(Neon8, q0, r4);
@ -1784,6 +1827,9 @@ TEST(15) {
CHECK_EQ_SPLAT(vrsqrte, 0.5f); // 1 / sqrt(4)
CHECK_EQ_SPLAT(vrsqrts, -1.0f); // (3 - (2 * 2.5)) / 2
CHECK_EQ_SPLAT(vceqf, 0xffffffffu);
// [0] >= [-1, 1, -0, 0]
CHECK_EQ_32X4(vcgef, 0u, 0xffffffffu, 0xffffffffu, 0xffffffffu);
CHECK_EQ_32X4(vcgtf, 0u, 0xffffffffu, 0u, 0u);
CHECK_EQ_SPLAT(vadd8, 0x03030303u);
CHECK_EQ_SPLAT(vadd16, 0x00030003u);
CHECK_EQ_SPLAT(vadd32, 0x00000003u);
@ -1794,6 +1840,15 @@ TEST(15) {
CHECK_EQ_SPLAT(vmul16, 0x00040004u);
CHECK_EQ_SPLAT(vmul32, 0x00000004u);
CHECK_EQ_SPLAT(vceq, 0x00ff00ffu);
// [0, 3, 0, 3, ...] >= [3, 3, 3, 3, ...]
CHECK_EQ_SPLAT(vcge_s8, 0x00ff00ffu);
CHECK_EQ_SPLAT(vcgt_s8, 0u);
// [0x00ff, 0x00ff, ...] >= [0xffff, 0xffff, ...]
CHECK_EQ_SPLAT(vcge_u16, 0u);
CHECK_EQ_SPLAT(vcgt_u16, 0u);
// [0x000000ff, 0x000000ff, ...] >= [0xffffffff, 0xffffffff, ...]
CHECK_EQ_SPLAT(vcge_s32, 0xffffffffu);
CHECK_EQ_SPLAT(vcgt_s32, 0xffffffffu);
CHECK_EQ_SPLAT(vtst, 0x00ff00ffu);
CHECK_EQ_SPLAT(vbsl, 0x02010201u);

View File

@ -1065,12 +1065,28 @@ TEST(Neon) {
"f260e870 vtst.i32 q15, q0, q8");
COMPARE(vceq(q0, q1, q2),
"f2020e44 vceq.f32 q0, q1, q2");
COMPARE(vcge(q0, q1, q2),
"f3020e44 vcge.f32 q0, q1, q2");
COMPARE(vcgt(q0, q1, q2),
"f3220e44 vcgt.f32 q0, q1, q2");
COMPARE(vceq(Neon8, q0, q1, q2),
"f3020854 vceq.i8 q0, q1, q2");
COMPARE(vceq(Neon16, q1, q2, q8),
"f3142870 vceq.i16 q1, q2, q8");
COMPARE(vceq(Neon32, q15, q0, q8),
"f360e870 vceq.i32 q15, q0, q8");
COMPARE(vcge(NeonS8, q0, q1, q2),
"f2020354 vcge.s8 q0, q1, q2");
COMPARE(vcge(NeonU16, q1, q2, q8),
"f3142370 vcge.u16 q1, q2, q8");
COMPARE(vcge(NeonS32, q15, q0, q8),
"f260e370 vcge.s32 q15, q0, q8");
COMPARE(vcgt(NeonS8, q0, q1, q2),
"f2020344 vcgt.s8 q0, q1, q2");
COMPARE(vcgt(NeonU16, q1, q2, q8),
"f3142360 vcgt.u16 q1, q2, q8");
COMPARE(vcgt(NeonS32, q15, q0, q8),
"f260e360 vcgt.s32 q15, q0, q8");
COMPARE(vbsl(q0, q1, q2),
"f3120154 vbsl q0, q1, q2");
COMPARE(vbsl(q15, q0, q8),