[Turbofan] Add ARM NEON instructions for implementing SIMD.
- Adds NEON instructions to assembler, disassembler, simulator. - Adds ExtractLane, ReplaceLane functions to macro assembler. LOG=N BUG=v8:4124 Review-Url: https://codereview.chromium.org/2546933002 Cr-Commit-Position: refs/heads/master@{#41737}
This commit is contained in:
parent
250e85f84a
commit
03f33f2e68
@ -483,30 +483,6 @@ void NeonMemOperand::SetAlignment(int align) {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
NeonListOperand::NeonListOperand(DoubleRegister base, int registers_count) {
|
||||
base_ = base;
|
||||
switch (registers_count) {
|
||||
case 1:
|
||||
type_ = nlt_1;
|
||||
break;
|
||||
case 2:
|
||||
type_ = nlt_2;
|
||||
break;
|
||||
case 3:
|
||||
type_ = nlt_3;
|
||||
break;
|
||||
case 4:
|
||||
type_ = nlt_4;
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
type_ = nlt_1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// Specific instructions, constants, and masks.
|
||||
|
||||
@ -2968,7 +2944,6 @@ void Assembler::vmov(const Register dst,
|
||||
emit(cond | 0xE*B24 | B20 | sn*B16 | dst.code()*B12 | 0xA*B8 | n*B7 | B4);
|
||||
}
|
||||
|
||||
|
||||
// Type of data to read from or write to VFP register.
|
||||
// Used as specifier in generic vcvt instruction.
|
||||
enum VFPType { S32, U32, F32, F64 };
|
||||
@ -3902,6 +3877,57 @@ void Assembler::vmovl(NeonDataType dt, QwNeonRegister dst, DwVfpRegister src) {
|
||||
(dt & NeonDataTypeSizeMask)*B19 | vd*B12 | 0xA*B8 | m*B5 | B4 | vm);
|
||||
}
|
||||
|
||||
static int EncodeScalar(NeonDataType dt, int index) {
|
||||
int opc1_opc2 = 0;
|
||||
DCHECK_LE(0, index);
|
||||
switch (dt) {
|
||||
case NeonS8:
|
||||
case NeonU8:
|
||||
DCHECK_GT(8, index);
|
||||
opc1_opc2 = 0x8 | index;
|
||||
break;
|
||||
case NeonS16:
|
||||
case NeonU16:
|
||||
DCHECK_GT(4, index);
|
||||
opc1_opc2 = 0x1 | (index << 1);
|
||||
break;
|
||||
case NeonS32:
|
||||
case NeonU32:
|
||||
DCHECK_GT(2, index);
|
||||
opc1_opc2 = index << 2;
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
break;
|
||||
}
|
||||
return (opc1_opc2 >> 2) * B21 | (opc1_opc2 & 0x3) * B5;
|
||||
}
|
||||
|
||||
void Assembler::vmov(NeonDataType dt, DwVfpRegister dst, int index,
|
||||
Register src) {
|
||||
// Instruction details available in ARM DDI 0406C.b, A8.8.940.
|
||||
// vmov ARM core register to scalar.
|
||||
DCHECK(dt == NeonS32 || dt == NeonU32 || IsEnabled(NEON));
|
||||
int vd, d;
|
||||
dst.split_code(&vd, &d);
|
||||
int opc1_opc2 = EncodeScalar(dt, index);
|
||||
emit(0xEEu * B24 | vd * B16 | src.code() * B12 | 0xB * B8 | d * B7 | B4 |
|
||||
opc1_opc2);
|
||||
}
|
||||
|
||||
void Assembler::vmov(NeonDataType dt, Register dst, DwVfpRegister src,
|
||||
int index) {
|
||||
// Instruction details available in ARM DDI 0406C.b, A8.8.942.
|
||||
// vmov Arm scalar to core register.
|
||||
DCHECK(dt == NeonS32 || dt == NeonU32 || IsEnabled(NEON));
|
||||
int vn, n;
|
||||
src.split_code(&vn, &n);
|
||||
int opc1_opc2 = EncodeScalar(dt, index);
|
||||
int u = (dt & NeonDataTypeUMask) != 0 ? 1 : 0;
|
||||
emit(0xEEu * B24 | u * B23 | B20 | vn * B16 | dst.code() * B12 | 0xB * B8 |
|
||||
n * B7 | B4 | opc1_opc2);
|
||||
}
|
||||
|
||||
void Assembler::vmov(const QwNeonRegister dst, const QwNeonRegister src) {
|
||||
DCHECK(IsEnabled(NEON));
|
||||
// Instruction details available in ARM DDI 0406C.b, A8-938.
|
||||
@ -3915,6 +3941,18 @@ void Assembler::vmov(const QwNeonRegister dst, const QwNeonRegister src) {
|
||||
B6 | m * B5 | B4 | vm);
|
||||
}
|
||||
|
||||
void Assembler::vmvn(const QwNeonRegister dst, const QwNeonRegister src) {
|
||||
DCHECK(IsEnabled(NEON));
|
||||
// Instruction details available in ARM DDI 0406C.b, A8-966.
|
||||
DCHECK(VfpRegisterIsAvailable(dst));
|
||||
DCHECK(VfpRegisterIsAvailable(src));
|
||||
int vd, d;
|
||||
dst.split_code(&vd, &d);
|
||||
int vm, m;
|
||||
src.split_code(&vm, &m);
|
||||
emit(0x1E7U * B23 | d * B22 | 3 * B20 | vd * B12 | 0x17 * B6 | m * B5 | vm);
|
||||
}
|
||||
|
||||
void Assembler::vswp(DwVfpRegister dst, DwVfpRegister src) {
|
||||
// Instruction details available in ARM DDI 0406C.b, A8.8.418.
|
||||
// 1111(31-28) | 00111(27-23) | D(22) | 110010(21-16) |
|
||||
@ -3940,8 +3978,105 @@ void Assembler::vswp(QwNeonRegister dst, QwNeonRegister src) {
|
||||
vm);
|
||||
}
|
||||
|
||||
void Assembler::vdup(NeonSize size, const QwNeonRegister dst,
|
||||
const Register src) {
|
||||
DCHECK(IsEnabled(NEON));
|
||||
// Instruction details available in ARM DDI 0406C.b, A8-886.
|
||||
int B = 0, E = 0;
|
||||
switch (size) {
|
||||
case Neon8:
|
||||
B = 1;
|
||||
break;
|
||||
case Neon16:
|
||||
E = 1;
|
||||
break;
|
||||
case Neon32:
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
break;
|
||||
}
|
||||
int vd, d;
|
||||
dst.split_code(&vd, &d);
|
||||
|
||||
emit(al | 0x1D * B23 | B * B22 | B21 | vd * B16 | src.code() * B12 |
|
||||
0xB * B8 | d * B7 | E * B5 | B4);
|
||||
}
|
||||
|
||||
void Assembler::vdup(const QwNeonRegister dst, const SwVfpRegister src) {
|
||||
DCHECK(IsEnabled(NEON));
|
||||
// Instruction details available in ARM DDI 0406C.b, A8-884.
|
||||
int index = src.code() & 1;
|
||||
int d_reg = src.code() / 2;
|
||||
int imm4 = 4 | index << 3; // esize = 32, index in bit 3.
|
||||
int vd, d;
|
||||
dst.split_code(&vd, &d);
|
||||
int vm, m;
|
||||
DwVfpRegister::from_code(d_reg).split_code(&vm, &m);
|
||||
|
||||
emit(0x1E7U * B23 | d * B22 | 0x3 * B20 | imm4 * B16 | vd * B12 | 0x18 * B7 |
|
||||
B6 | m * B5 | vm);
|
||||
}
|
||||
|
||||
// Encode NEON vcvt.src_type.dst_type instruction.
|
||||
static Instr EncodeNeonVCVT(const VFPType dst_type, const QwNeonRegister dst,
|
||||
const VFPType src_type, const QwNeonRegister src) {
|
||||
DCHECK(src_type != dst_type);
|
||||
DCHECK(src_type == F32 || dst_type == F32);
|
||||
// Instruction details available in ARM DDI 0406C.b, A8.8.868.
|
||||
int vd, d;
|
||||
dst.split_code(&vd, &d);
|
||||
int vm, m;
|
||||
src.split_code(&vm, &m);
|
||||
|
||||
int op = 0;
|
||||
if (src_type == F32) {
|
||||
DCHECK(dst_type == S32 || dst_type == U32);
|
||||
op = dst_type == U32 ? 3 : 2;
|
||||
} else {
|
||||
DCHECK(src_type == S32 || src_type == U32);
|
||||
op = src_type == U32 ? 1 : 0;
|
||||
}
|
||||
|
||||
return 0x1E7U * B23 | d * B22 | 0x3B * B16 | vd * B12 | 0x3 * B9 | op * B7 |
|
||||
B6 | m * B5 | vm;
|
||||
}
|
||||
|
||||
void Assembler::vcvt_f32_s32(const QwNeonRegister dst,
|
||||
const QwNeonRegister src) {
|
||||
DCHECK(IsEnabled(NEON));
|
||||
DCHECK(VfpRegisterIsAvailable(dst));
|
||||
DCHECK(VfpRegisterIsAvailable(src));
|
||||
emit(EncodeNeonVCVT(F32, dst, S32, src));
|
||||
}
|
||||
|
||||
void Assembler::vcvt_f32_u32(const QwNeonRegister dst,
|
||||
const QwNeonRegister src) {
|
||||
DCHECK(IsEnabled(NEON));
|
||||
DCHECK(VfpRegisterIsAvailable(dst));
|
||||
DCHECK(VfpRegisterIsAvailable(src));
|
||||
emit(EncodeNeonVCVT(F32, dst, U32, src));
|
||||
}
|
||||
|
||||
void Assembler::vcvt_s32_f32(const QwNeonRegister dst,
|
||||
const QwNeonRegister src) {
|
||||
DCHECK(IsEnabled(NEON));
|
||||
DCHECK(VfpRegisterIsAvailable(dst));
|
||||
DCHECK(VfpRegisterIsAvailable(src));
|
||||
emit(EncodeNeonVCVT(S32, dst, F32, src));
|
||||
}
|
||||
|
||||
void Assembler::vcvt_u32_f32(const QwNeonRegister dst,
|
||||
const QwNeonRegister src) {
|
||||
DCHECK(IsEnabled(NEON));
|
||||
DCHECK(VfpRegisterIsAvailable(dst));
|
||||
DCHECK(VfpRegisterIsAvailable(src));
|
||||
emit(EncodeNeonVCVT(U32, dst, F32, src));
|
||||
}
|
||||
|
||||
void Assembler::veor(DwVfpRegister dst, DwVfpRegister src1,
|
||||
DwVfpRegister src2) {
|
||||
// Dd = veor(Dn, Dm) 64 bit integer exclusive OR.
|
||||
// Instruction details available in ARM DDI 0406C.b, A8.8.888.
|
||||
DCHECK(IsEnabled(NEON));
|
||||
int vd, d;
|
||||
@ -3956,6 +4091,7 @@ void Assembler::veor(DwVfpRegister dst, DwVfpRegister src1,
|
||||
|
||||
void Assembler::veor(QwNeonRegister dst, QwNeonRegister src1,
|
||||
QwNeonRegister src2) {
|
||||
// Qd = veor(Qn, Qm) SIMD integer exclusive OR.
|
||||
// Instruction details available in ARM DDI 0406C.b, A8.8.888.
|
||||
DCHECK(IsEnabled(NEON));
|
||||
int vd, d;
|
||||
@ -3968,6 +4104,146 @@ void Assembler::veor(QwNeonRegister dst, QwNeonRegister src1,
|
||||
m * B5 | B4 | vm);
|
||||
}
|
||||
|
||||
void Assembler::vadd(QwNeonRegister dst, const QwNeonRegister src1,
|
||||
const QwNeonRegister src2) {
|
||||
DCHECK(IsEnabled(NEON));
|
||||
// Qd = vadd(Qn, Qm) SIMD floating point addition.
|
||||
// Instruction details available in ARM DDI 0406C.b, A8-830.
|
||||
int vd, d;
|
||||
dst.split_code(&vd, &d);
|
||||
int vn, n;
|
||||
src1.split_code(&vn, &n);
|
||||
int vm, m;
|
||||
src2.split_code(&vm, &m);
|
||||
emit(0x1E4U * B23 | d * B22 | vn * B16 | vd * B12 | 0xD * B8 | n * B7 | B6 |
|
||||
m * B5 | vm);
|
||||
}
|
||||
|
||||
void Assembler::vadd(NeonSize size, QwNeonRegister dst,
|
||||
const QwNeonRegister src1, const QwNeonRegister src2) {
|
||||
DCHECK(IsEnabled(NEON));
|
||||
// Qd = vadd(Qn, Qm) SIMD integer addition.
|
||||
// Instruction details available in ARM DDI 0406C.b, A8-828.
|
||||
int vd, d;
|
||||
dst.split_code(&vd, &d);
|
||||
int vn, n;
|
||||
src1.split_code(&vn, &n);
|
||||
int vm, m;
|
||||
src2.split_code(&vm, &m);
|
||||
int sz = static_cast<int>(size);
|
||||
emit(0x1E4U * B23 | d * B22 | sz * B20 | vn * B16 | vd * B12 | 0x8 * B8 |
|
||||
n * B7 | B6 | m * B5 | vm);
|
||||
}
|
||||
|
||||
void Assembler::vsub(QwNeonRegister dst, const QwNeonRegister src1,
|
||||
const QwNeonRegister src2) {
|
||||
DCHECK(IsEnabled(NEON));
|
||||
// Qd = vsub(Qn, Qm) SIMD floating point subtraction.
|
||||
// Instruction details available in ARM DDI 0406C.b, A8-1086.
|
||||
int vd, d;
|
||||
dst.split_code(&vd, &d);
|
||||
int vn, n;
|
||||
src1.split_code(&vn, &n);
|
||||
int vm, m;
|
||||
src2.split_code(&vm, &m);
|
||||
emit(0x1E4U * B23 | d * B22 | B21 | vn * B16 | vd * B12 | 0xD * B8 | n * B7 |
|
||||
B6 | m * B5 | vm);
|
||||
}
|
||||
|
||||
void Assembler::vsub(NeonSize size, QwNeonRegister dst,
|
||||
const QwNeonRegister src1, const QwNeonRegister src2) {
|
||||
DCHECK(IsEnabled(NEON));
|
||||
// Qd = vsub(Qn, Qm) SIMD integer subtraction.
|
||||
// Instruction details available in ARM DDI 0406C.b, A8-1084.
|
||||
int vd, d;
|
||||
dst.split_code(&vd, &d);
|
||||
int vn, n;
|
||||
src1.split_code(&vn, &n);
|
||||
int vm, m;
|
||||
src2.split_code(&vm, &m);
|
||||
int sz = static_cast<int>(size);
|
||||
emit(0x1E6U * B23 | d * B22 | sz * B20 | vn * B16 | vd * B12 | 0x8 * B8 |
|
||||
n * B7 | B6 | m * B5 | vm);
|
||||
}
|
||||
|
||||
void Assembler::vtst(NeonSize size, QwNeonRegister dst,
|
||||
const QwNeonRegister src1, const QwNeonRegister src2) {
|
||||
DCHECK(IsEnabled(NEON));
|
||||
// Qd = vtst(Qn, Qm) SIMD test integer operands.
|
||||
// Instruction details available in ARM DDI 0406C.b, A8-1098.
|
||||
int vd, d;
|
||||
dst.split_code(&vd, &d);
|
||||
int vn, n;
|
||||
src1.split_code(&vn, &n);
|
||||
int vm, m;
|
||||
src2.split_code(&vm, &m);
|
||||
int sz = static_cast<int>(size);
|
||||
emit(0x1E4U * B23 | d * B22 | sz * B20 | vn * B16 | vd * B12 | 0x8 * B8 |
|
||||
n * B7 | B6 | m * B5 | B4 | vm);
|
||||
}
|
||||
|
||||
void Assembler::vceq(NeonSize size, QwNeonRegister dst,
|
||||
const QwNeonRegister src1, const QwNeonRegister src2) {
|
||||
DCHECK(IsEnabled(NEON));
|
||||
// Qd = vceq(Qn, Qm) SIMD integer compare equal.
|
||||
// Instruction details available in ARM DDI 0406C.b, A8-844.
|
||||
int vd, d;
|
||||
dst.split_code(&vd, &d);
|
||||
int vn, n;
|
||||
src1.split_code(&vn, &n);
|
||||
int vm, m;
|
||||
src2.split_code(&vm, &m);
|
||||
int sz = static_cast<int>(size);
|
||||
emit(0x1E6U * B23 | d * B22 | sz * B20 | vn * B16 | vd * B12 | 0x8 * B8 |
|
||||
n * B7 | B6 | m * B5 | B4 | vm);
|
||||
}
|
||||
|
||||
void Assembler::vbsl(QwNeonRegister dst, const QwNeonRegister src1,
|
||||
const QwNeonRegister src2) {
|
||||
DCHECK(IsEnabled(NEON));
|
||||
// Qd = vbsl(Qn, Qm) SIMD bitwise select.
|
||||
// Instruction details available in ARM DDI 0406C.b, A8-844.
|
||||
int vd, d;
|
||||
dst.split_code(&vd, &d);
|
||||
int vn, n;
|
||||
src1.split_code(&vn, &n);
|
||||
int vm, m;
|
||||
src2.split_code(&vm, &m);
|
||||
int op = 1; // vbsl
|
||||
emit(0x1E6U * B23 | d * B22 | op * B20 | vn * B16 | vd * B12 | 0x1 * B8 |
|
||||
n * B7 | B6 | m * B5 | B4 | vm);
|
||||
}
|
||||
|
||||
// Encode NEON vtbl / vtbx instruction.
|
||||
static Instr EncodeNeonVTB(const DwVfpRegister dst, const NeonListOperand& list,
|
||||
const DwVfpRegister index, bool vtbx) {
|
||||
// Dd = vtbl(table, Dm) SIMD vector permute, zero at out of range indices.
|
||||
// Instruction details available in ARM DDI 0406C.b, A8-1094.
|
||||
// Dd = vtbx(table, Dm) SIMD vector permute, skip out of range indices.
|
||||
// Instruction details available in ARM DDI 0406C.b, A8-1094.
|
||||
int vd, d;
|
||||
dst.split_code(&vd, &d);
|
||||
int vn, n;
|
||||
list.base().split_code(&vn, &n);
|
||||
int vm, m;
|
||||
index.split_code(&vm, &m);
|
||||
int op = vtbx ? 1 : 0; // vtbl = 0, vtbx = 1.
|
||||
return 0x1E7U * B23 | d * B22 | 0x3 * B20 | vn * B16 | vd * B12 | 0x2 * B10 |
|
||||
list.length() * B8 | n * B7 | op * B6 | m * B5 | vm;
|
||||
}
|
||||
|
||||
void Assembler::vtbl(const DwVfpRegister dst, const NeonListOperand& list,
|
||||
const DwVfpRegister index) {
|
||||
DCHECK(IsEnabled(NEON));
|
||||
emit(EncodeNeonVTB(dst, list, index, false));
|
||||
}
|
||||
|
||||
void Assembler::vtbx(const DwVfpRegister dst, const NeonListOperand& list,
|
||||
const DwVfpRegister index) {
|
||||
DCHECK(IsEnabled(NEON));
|
||||
emit(EncodeNeonVTB(dst, list, index, true));
|
||||
}
|
||||
|
||||
// Pseudo instructions.
|
||||
void Assembler::nop(int type) {
|
||||
// ARMv6{K/T2} and v7 have an actual NOP instruction but it serializes
|
||||
|
@ -640,12 +640,26 @@ class NeonMemOperand BASE_EMBEDDED {
|
||||
// Class NeonListOperand represents a list of NEON registers
|
||||
class NeonListOperand BASE_EMBEDDED {
|
||||
public:
|
||||
explicit NeonListOperand(DoubleRegister base, int registers_count = 1);
|
||||
explicit NeonListOperand(DoubleRegister base, int register_count = 1)
|
||||
: base_(base), register_count_(register_count) {}
|
||||
explicit NeonListOperand(QwNeonRegister q_reg)
|
||||
: base_(q_reg.low()), register_count_(2) {}
|
||||
DoubleRegister base() const { return base_; }
|
||||
NeonListType type() const { return type_; }
|
||||
int register_count() { return register_count_; }
|
||||
int length() const { return register_count_ - 1; }
|
||||
NeonListType type() const {
|
||||
switch (register_count_) {
|
||||
default: UNREACHABLE();
|
||||
// Fall through.
|
||||
case 1: return nlt_1;
|
||||
case 2: return nlt_2;
|
||||
case 3: return nlt_3;
|
||||
case 4: return nlt_4;
|
||||
}
|
||||
}
|
||||
private:
|
||||
DoubleRegister base_;
|
||||
NeonListType type_;
|
||||
int register_count_;
|
||||
};
|
||||
|
||||
|
||||
@ -1149,6 +1163,8 @@ class Assembler : public AssemblerBase {
|
||||
void vmov(const DwVfpRegister dst,
|
||||
const DwVfpRegister src,
|
||||
const Condition cond = al);
|
||||
// TODO(bbudge) Replace uses of these with the more general core register to
|
||||
// scalar register vmov's.
|
||||
void vmov(const DwVfpRegister dst,
|
||||
const VmovIndex index,
|
||||
const Register src,
|
||||
@ -1329,11 +1345,43 @@ class Assembler : public AssemblerBase {
|
||||
const NeonMemOperand& dst);
|
||||
void vmovl(NeonDataType dt, QwNeonRegister dst, DwVfpRegister src);
|
||||
|
||||
// Only unconditional core <-> scalar moves are currently supported.
|
||||
void vmov(NeonDataType dt, DwVfpRegister dst, int index, Register src);
|
||||
void vmov(NeonDataType dt, Register dst, DwVfpRegister src, int index);
|
||||
|
||||
void vmov(const QwNeonRegister dst, const QwNeonRegister src);
|
||||
void vmvn(const QwNeonRegister dst, const QwNeonRegister src);
|
||||
void vswp(DwVfpRegister dst, DwVfpRegister src);
|
||||
void vswp(QwNeonRegister dst, QwNeonRegister src);
|
||||
// vdup conditional execution isn't supported.
|
||||
void vdup(NeonSize size, const QwNeonRegister dst, const Register src);
|
||||
void vdup(const QwNeonRegister dst, const SwVfpRegister src);
|
||||
|
||||
void vcvt_f32_s32(const QwNeonRegister dst, const QwNeonRegister src);
|
||||
void vcvt_f32_u32(const QwNeonRegister dst, const QwNeonRegister src);
|
||||
void vcvt_s32_f32(const QwNeonRegister dst, const QwNeonRegister src);
|
||||
void vcvt_u32_f32(const QwNeonRegister dst, const QwNeonRegister src);
|
||||
|
||||
void veor(DwVfpRegister dst, DwVfpRegister src1, DwVfpRegister src2);
|
||||
void veor(QwNeonRegister dst, QwNeonRegister src1, QwNeonRegister src2);
|
||||
void vadd(const QwNeonRegister dst, const QwNeonRegister src1,
|
||||
const QwNeonRegister src2);
|
||||
void vadd(NeonSize size, const QwNeonRegister dst, const QwNeonRegister src1,
|
||||
const QwNeonRegister src2);
|
||||
void vsub(const QwNeonRegister dst, const QwNeonRegister src1,
|
||||
const QwNeonRegister src2);
|
||||
void vsub(NeonSize size, const QwNeonRegister dst, const QwNeonRegister src1,
|
||||
const QwNeonRegister src2);
|
||||
void vtst(NeonSize size, const QwNeonRegister dst, const QwNeonRegister src1,
|
||||
const QwNeonRegister src2);
|
||||
void vceq(NeonSize size, const QwNeonRegister dst, const QwNeonRegister src1,
|
||||
const QwNeonRegister src2);
|
||||
void vbsl(const QwNeonRegister dst, const QwNeonRegister src1,
|
||||
const QwNeonRegister src2);
|
||||
void vtbl(const DwVfpRegister dst, const NeonListOperand& list,
|
||||
const DwVfpRegister index);
|
||||
void vtbx(const DwVfpRegister dst, const NeonListOperand& list,
|
||||
const DwVfpRegister index);
|
||||
|
||||
// Pseudo instructions
|
||||
|
||||
|
@ -190,6 +190,7 @@ enum {
|
||||
B7 = 1 << 7,
|
||||
B8 = 1 << 8,
|
||||
B9 = 1 << 9,
|
||||
B10 = 1 << 10,
|
||||
B12 = 1 << 12,
|
||||
B16 = 1 << 16,
|
||||
B17 = 1 << 17,
|
||||
@ -218,7 +219,6 @@ enum {
|
||||
kOff8Mask = (1 << 8) - 1
|
||||
};
|
||||
|
||||
|
||||
enum BarrierOption {
|
||||
OSHLD = 0x1,
|
||||
OSHST = 0x2,
|
||||
@ -667,7 +667,7 @@ class Instruction {
|
||||
|
||||
|
||||
private:
|
||||
// Join split register codes, depending on single or double precision.
|
||||
// Join split register codes, depending on register precision.
|
||||
// four_bit is the position of the least-significant bit of the four
|
||||
// bit specifier. one_bit is the position of the additional single bit
|
||||
// specifier.
|
||||
|
@ -1419,6 +1419,9 @@ int Decoder::DecodeType7(Instruction* instr) {
|
||||
// Sd = vsqrt(Sm)
|
||||
// vmrs
|
||||
// vmsr
|
||||
// Qd = vdup.size(Qd, Rt)
|
||||
// vmov.size: Dd[i] = Rt
|
||||
// vmov.sign.size: Rt = Dn[i]
|
||||
void Decoder::DecodeTypeVFP(Instruction* instr) {
|
||||
VERIFY((instr->TypeValue() == 7) && (instr->Bit(24) == 0x0) );
|
||||
VERIFY(instr->Bits(11, 9) == 0x5);
|
||||
@ -1531,22 +1534,72 @@ void Decoder::DecodeTypeVFP(Instruction* instr) {
|
||||
if ((instr->VCValue() == 0x0) &&
|
||||
(instr->VAValue() == 0x0)) {
|
||||
DecodeVMOVBetweenCoreAndSinglePrecisionRegisters(instr);
|
||||
} else if ((instr->VLValue() == 0x0) &&
|
||||
(instr->VCValue() == 0x1) &&
|
||||
(instr->Bit(23) == 0x0)) {
|
||||
} else if ((instr->VLValue() == 0x0) && (instr->VCValue() == 0x1)) {
|
||||
if (instr->Bit(23) == 0) {
|
||||
int opc1_opc2 = (instr->Bits(22, 21) << 2) | instr->Bits(6, 5);
|
||||
if ((opc1_opc2 & 0xb) == 0) {
|
||||
// NeonS32/NeonU32
|
||||
if (instr->Bit(21) == 0x0) {
|
||||
Format(instr, "vmov'cond.32 'Dd[0], 'rt");
|
||||
} else {
|
||||
Format(instr, "vmov'cond.32 'Dd[1], 'rt");
|
||||
}
|
||||
} else if ((instr->VLValue() == 0x1) &&
|
||||
(instr->VCValue() == 0x1) &&
|
||||
(instr->Bit(23) == 0x0)) {
|
||||
} else {
|
||||
int vd = instr->VFPNRegValue(kDoublePrecision);
|
||||
int rt = instr->RtValue();
|
||||
if ((opc1_opc2 & 0x8) != 0) {
|
||||
// NeonS8 / NeonU8
|
||||
int i = opc1_opc2 & 0x7;
|
||||
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
|
||||
"vmov.8 d%d[%d], r%d", vd, i, rt);
|
||||
} else if ((opc1_opc2 & 0x1) != 0) {
|
||||
// NeonS16 / NeonU16
|
||||
int i = (opc1_opc2 >> 1) & 0x3;
|
||||
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
|
||||
"vmov.16 d%d[%d], r%d", vd, i, rt);
|
||||
} else {
|
||||
Unknown(instr);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
int size = 32;
|
||||
if (instr->Bit(5) != 0)
|
||||
size = 16;
|
||||
else if (instr->Bit(22) != 0)
|
||||
size = 8;
|
||||
int Vd = instr->VFPNRegValue(kSimd128Precision);
|
||||
int Rt = instr->RtValue();
|
||||
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
|
||||
"vdup.%i q%d, r%d", size, Vd, Rt);
|
||||
}
|
||||
} else if ((instr->VLValue() == 0x1) && (instr->VCValue() == 0x1)) {
|
||||
int opc1_opc2 = (instr->Bits(22, 21) << 2) | instr->Bits(6, 5);
|
||||
if ((opc1_opc2 & 0xb) == 0) {
|
||||
// NeonS32 / NeonU32
|
||||
if (instr->Bit(21) == 0x0) {
|
||||
Format(instr, "vmov'cond.32 'rt, 'Dd[0]");
|
||||
} else {
|
||||
Format(instr, "vmov'cond.32 'rt, 'Dd[1]");
|
||||
}
|
||||
} else {
|
||||
const char* sign = instr->Bit(23) != 0 ? "u" : "s";
|
||||
int rt = instr->RtValue();
|
||||
int vn = instr->VFPNRegValue(kDoublePrecision);
|
||||
if ((opc1_opc2 & 0x8) != 0) {
|
||||
// NeonS8 / NeonU8
|
||||
int i = opc1_opc2 & 0x7;
|
||||
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
|
||||
"vmov.%s8 r%d, d%d[%d]", sign, rt, vn, i);
|
||||
} else if ((opc1_opc2 & 0x1) != 0) {
|
||||
// NeonS16 / NeonU16
|
||||
int i = (opc1_opc2 >> 1) & 0x3;
|
||||
out_buffer_pos_ +=
|
||||
SNPrintF(out_buffer_ + out_buffer_pos_, "vmov.%s16 r%d, d%d[%d]",
|
||||
sign, rt, vn, i);
|
||||
} else {
|
||||
Unknown(instr);
|
||||
}
|
||||
}
|
||||
} else if ((instr->VCValue() == 0x0) &&
|
||||
(instr->VAValue() == 0x7) &&
|
||||
(instr->Bits(19, 16) == 0x1)) {
|
||||
@ -1563,6 +1616,8 @@ void Decoder::DecodeTypeVFP(Instruction* instr) {
|
||||
Format(instr, "vmrs'cond 'rt, FPSCR");
|
||||
}
|
||||
}
|
||||
} else {
|
||||
Unknown(instr); // Not used by V8.
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1809,6 +1864,25 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) {
|
||||
int Vm = instr->VFPMRegValue(kSimd128Precision);
|
||||
out_buffer_pos_ +=
|
||||
SNPrintF(out_buffer_ + out_buffer_pos_, "vmov q%d, q%d", Vd, Vm);
|
||||
} else if (instr->Bits(11, 8) == 8) {
|
||||
const char* op = (instr->Bit(4) == 0) ? "vadd" : "vtst";
|
||||
int size = kBitsPerByte * (1 << instr->Bits(21, 20));
|
||||
int Vd = instr->VFPDRegValue(kSimd128Precision);
|
||||
int Vm = instr->VFPMRegValue(kSimd128Precision);
|
||||
int Vn = instr->VFPNRegValue(kSimd128Precision);
|
||||
// vadd/vtst.i<size> Qd, Qm, Qn.
|
||||
out_buffer_pos_ +=
|
||||
SNPrintF(out_buffer_ + out_buffer_pos_, "%s.i%d q%d, q%d, q%d", op,
|
||||
size, Vd, Vn, Vm);
|
||||
} else if (instr->Bits(11, 8) == 0xd && instr->Bit(4) == 0) {
|
||||
const char* op = (instr->Bits(21, 20) == 0) ? "vadd" : "vsub";
|
||||
int size = kBitsPerByte * (1 << instr->Bits(21, 20));
|
||||
int Vd = instr->VFPDRegValue(kSimd128Precision);
|
||||
int Vm = instr->VFPMRegValue(kSimd128Precision);
|
||||
int Vn = instr->VFPNRegValue(kSimd128Precision);
|
||||
// vadd/vsub.f32 Qd, Qm, Qn.
|
||||
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
|
||||
"%s.f32 q%d, q%d, q%d", op, Vd, Vn, Vm);
|
||||
} else {
|
||||
Unknown(instr);
|
||||
}
|
||||
@ -1828,7 +1902,28 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) {
|
||||
}
|
||||
break;
|
||||
case 6:
|
||||
if (instr->Bits(21, 20) == 0 && instr->Bits(11, 8) == 1 &&
|
||||
if (instr->Bits(11, 8) == 8) {
|
||||
int size = kBitsPerByte * (1 << instr->Bits(21, 20));
|
||||
int Vd = instr->VFPDRegValue(kSimd128Precision);
|
||||
int Vm = instr->VFPMRegValue(kSimd128Precision);
|
||||
int Vn = instr->VFPNRegValue(kSimd128Precision);
|
||||
if (instr->Bit(4) == 0) {
|
||||
out_buffer_pos_ +=
|
||||
SNPrintF(out_buffer_ + out_buffer_pos_, "vsub.i%d q%d, q%d, q%d",
|
||||
size, Vd, Vn, Vm);
|
||||
} else {
|
||||
out_buffer_pos_ +=
|
||||
SNPrintF(out_buffer_ + out_buffer_pos_, "vceq.i%d q%d, q%d, q%d",
|
||||
size, Vd, Vn, Vm);
|
||||
}
|
||||
} else if (instr->Bits(21, 20) == 1 && instr->Bits(11, 8) == 1 &&
|
||||
instr->Bit(4) == 1) {
|
||||
int Vd = instr->VFPDRegValue(kSimd128Precision);
|
||||
int Vm = instr->VFPMRegValue(kSimd128Precision);
|
||||
int Vn = instr->VFPNRegValue(kSimd128Precision);
|
||||
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
|
||||
"vbsl q%d, q%d, q%d", Vd, Vn, Vm);
|
||||
} else if (instr->Bits(21, 20) == 0 && instr->Bits(11, 8) == 1 &&
|
||||
instr->Bit(4) == 1) {
|
||||
if (instr->Bit(6) == 0) {
|
||||
// veor Dd, Dn, Dm
|
||||
@ -1860,6 +1955,35 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) {
|
||||
int imm3 = instr->Bits(21, 19);
|
||||
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
|
||||
"vmovl.u%d q%d, d%d", imm3*8, Vd, Vm);
|
||||
} else if (instr->Opc1Value() == 7 && instr->Bits(19, 16) == 0 &&
|
||||
instr->Bits(11, 6) == 0x17 && instr->Bit(4) == 0) {
|
||||
int Vd = instr->VFPDRegValue(kSimd128Precision);
|
||||
int Vm = instr->VFPMRegValue(kSimd128Precision);
|
||||
out_buffer_pos_ +=
|
||||
SNPrintF(out_buffer_ + out_buffer_pos_, "vmvn q%d, q%d", Vd, Vm);
|
||||
} else if (instr->Opc1Value() == 7 && instr->Bits(19, 16) == 0xB &&
|
||||
instr->Bits(11, 9) == 0x3 && instr->Bit(6) == 1 &&
|
||||
instr->Bit(4) == 0) {
|
||||
int Vd = instr->VFPDRegValue(kSimd128Precision);
|
||||
int Vm = instr->VFPMRegValue(kSimd128Precision);
|
||||
const char* suffix = nullptr;
|
||||
int op = instr->Bits(8, 7);
|
||||
switch (op) {
|
||||
case 0:
|
||||
suffix = "f32.s32";
|
||||
break;
|
||||
case 1:
|
||||
suffix = "f32.u32";
|
||||
break;
|
||||
case 2:
|
||||
suffix = "s32.f32";
|
||||
break;
|
||||
case 3:
|
||||
suffix = "u32.f32";
|
||||
break;
|
||||
}
|
||||
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
|
||||
"vcvt.%s q%d, q%d", suffix, Vd, Vm);
|
||||
} else if ((instr->Bits(21, 16) == 0x32) && (instr->Bits(11, 7) == 0) &&
|
||||
(instr->Bit(4) == 0)) {
|
||||
if (instr->Bit(6) == 0) {
|
||||
@ -1873,6 +1997,26 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) {
|
||||
out_buffer_pos_ +=
|
||||
SNPrintF(out_buffer_ + out_buffer_pos_, "vswp q%d, q%d", Vd, Vm);
|
||||
}
|
||||
} else if (instr->Opc1Value() == 0x7 && instr->Bits(11, 7) == 0x18 &&
|
||||
instr->Bit(4) == 0x0) {
|
||||
int Vd = instr->VFPDRegValue(kSimd128Precision);
|
||||
int Vm = instr->VFPMRegValue(kDoublePrecision);
|
||||
int index = instr->Bit(19);
|
||||
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
|
||||
"vdup q%d, d%d[%d]", Vd, Vm, index);
|
||||
} else if (instr->Opc1Value() == 0x7 && instr->Bits(11, 10) == 0x2 &&
|
||||
instr->Bit(4) == 0x0) {
|
||||
int Vd = instr->VFPDRegValue(kDoublePrecision);
|
||||
int Vn = instr->VFPNRegValue(kDoublePrecision);
|
||||
int Vm = instr->VFPMRegValue(kDoublePrecision);
|
||||
int len = instr->Bits(9, 8);
|
||||
NeonListOperand list(DwVfpRegister::from_code(Vn), len + 1);
|
||||
out_buffer_pos_ +=
|
||||
SNPrintF(out_buffer_ + out_buffer_pos_, "%s d%d, ",
|
||||
instr->Bit(6) == 0 ? "vtbl.8" : "vtbx.8", Vd);
|
||||
FormatNeonList(Vn, list.type());
|
||||
Print(", ");
|
||||
PrintDRegister(Vm);
|
||||
} else {
|
||||
Unknown(instr);
|
||||
}
|
||||
|
@ -1081,8 +1081,8 @@ void MacroAssembler::VmovLow(DwVfpRegister dst, Register src) {
|
||||
}
|
||||
|
||||
void MacroAssembler::VmovExtended(Register dst, int src_code) {
|
||||
DCHECK_LE(32, src_code);
|
||||
DCHECK_GT(64, src_code);
|
||||
DCHECK_LE(SwVfpRegister::kMaxNumRegisters, src_code);
|
||||
DCHECK_GT(SwVfpRegister::kMaxNumRegisters * 2, src_code);
|
||||
if (src_code & 0x1) {
|
||||
VmovHigh(dst, DwVfpRegister::from_code(src_code / 2));
|
||||
} else {
|
||||
@ -1091,8 +1091,8 @@ void MacroAssembler::VmovExtended(Register dst, int src_code) {
|
||||
}
|
||||
|
||||
void MacroAssembler::VmovExtended(int dst_code, Register src) {
|
||||
DCHECK_LE(32, dst_code);
|
||||
DCHECK_GT(64, dst_code);
|
||||
DCHECK_LE(SwVfpRegister::kMaxNumRegisters, dst_code);
|
||||
DCHECK_GT(SwVfpRegister::kMaxNumRegisters * 2, dst_code);
|
||||
if (dst_code & 0x1) {
|
||||
VmovHigh(DwVfpRegister::from_code(dst_code / 2), src);
|
||||
} else {
|
||||
@ -1102,22 +1102,23 @@ void MacroAssembler::VmovExtended(int dst_code, Register src) {
|
||||
|
||||
void MacroAssembler::VmovExtended(int dst_code, int src_code,
|
||||
Register scratch) {
|
||||
if (src_code < 32 && dst_code < 32) {
|
||||
if (src_code < SwVfpRegister::kMaxNumRegisters &&
|
||||
dst_code < SwVfpRegister::kMaxNumRegisters) {
|
||||
// src and dst are both s-registers.
|
||||
vmov(SwVfpRegister::from_code(dst_code),
|
||||
SwVfpRegister::from_code(src_code));
|
||||
} else if (src_code < 32) {
|
||||
} else if (src_code < SwVfpRegister::kMaxNumRegisters) {
|
||||
// src is an s-register.
|
||||
vmov(scratch, SwVfpRegister::from_code(src_code));
|
||||
VmovExtended(dst_code, scratch);
|
||||
} else if (dst_code < 32) {
|
||||
} else if (dst_code < SwVfpRegister::kMaxNumRegisters) {
|
||||
// dst is an s-register.
|
||||
VmovExtended(scratch, src_code);
|
||||
vmov(SwVfpRegister::from_code(dst_code), scratch);
|
||||
} else {
|
||||
// Neither src or dst are s-registers.
|
||||
DCHECK_GT(64, src_code);
|
||||
DCHECK_GT(64, dst_code);
|
||||
DCHECK_GT(SwVfpRegister::kMaxNumRegisters * 2, src_code);
|
||||
DCHECK_GT(SwVfpRegister::kMaxNumRegisters * 2, dst_code);
|
||||
VmovExtended(scratch, src_code);
|
||||
VmovExtended(dst_code, scratch);
|
||||
}
|
||||
@ -1125,7 +1126,7 @@ void MacroAssembler::VmovExtended(int dst_code, int src_code,
|
||||
|
||||
void MacroAssembler::VmovExtended(int dst_code, const MemOperand& src,
|
||||
Register scratch) {
|
||||
if (dst_code >= 32) {
|
||||
if (dst_code >= SwVfpRegister::kMaxNumRegisters) {
|
||||
ldr(scratch, src);
|
||||
VmovExtended(dst_code, scratch);
|
||||
} else {
|
||||
@ -1135,7 +1136,7 @@ void MacroAssembler::VmovExtended(int dst_code, const MemOperand& src,
|
||||
|
||||
void MacroAssembler::VmovExtended(const MemOperand& dst, int src_code,
|
||||
Register scratch) {
|
||||
if (src_code >= 32) {
|
||||
if (src_code >= SwVfpRegister::kMaxNumRegisters) {
|
||||
VmovExtended(scratch, src_code);
|
||||
str(scratch, dst);
|
||||
} else {
|
||||
@ -1143,6 +1144,47 @@ void MacroAssembler::VmovExtended(const MemOperand& dst, int src_code,
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::ExtractLane(Register dst, QwNeonRegister src,
|
||||
NeonDataType dt, int lane) {
|
||||
int bytes_per_lane = dt & NeonDataTypeSizeMask; // 1, 2, 4
|
||||
int log2_bytes_per_lane = bytes_per_lane / 2; // 0, 1, 2
|
||||
int byte = lane << log2_bytes_per_lane;
|
||||
int double_word = byte >> kDoubleSizeLog2;
|
||||
int double_byte = byte & (kDoubleSize - 1);
|
||||
int double_lane = double_byte >> log2_bytes_per_lane;
|
||||
DwVfpRegister double_source =
|
||||
DwVfpRegister::from_code(src.code() * 2 + double_word);
|
||||
vmov(dt, dst, double_source, double_lane);
|
||||
}
|
||||
|
||||
void MacroAssembler::ExtractLane(SwVfpRegister dst, QwNeonRegister src,
|
||||
Register scratch, int lane) {
|
||||
int s_code = src.code() * 4 + lane;
|
||||
VmovExtended(dst.code(), s_code, scratch);
|
||||
}
|
||||
|
||||
void MacroAssembler::ReplaceLane(QwNeonRegister dst, QwNeonRegister src,
|
||||
Register src_lane, NeonDataType dt, int lane) {
|
||||
Move(dst, src);
|
||||
int bytes_per_lane = dt & NeonDataTypeSizeMask; // 1, 2, 4
|
||||
int log2_bytes_per_lane = bytes_per_lane / 2; // 0, 1, 2
|
||||
int byte = lane << log2_bytes_per_lane;
|
||||
int double_word = byte >> kDoubleSizeLog2;
|
||||
int double_byte = byte & (kDoubleSize - 1);
|
||||
int double_lane = double_byte >> log2_bytes_per_lane;
|
||||
DwVfpRegister double_dst =
|
||||
DwVfpRegister::from_code(dst.code() * 2 + double_word);
|
||||
vmov(dt, double_dst, double_lane, src_lane);
|
||||
}
|
||||
|
||||
void MacroAssembler::ReplaceLane(QwNeonRegister dst, QwNeonRegister src,
|
||||
SwVfpRegister src_lane, Register scratch,
|
||||
int lane) {
|
||||
Move(dst, src);
|
||||
int s_code = dst.code() * 4 + lane;
|
||||
VmovExtended(s_code, src_lane.code(), scratch);
|
||||
}
|
||||
|
||||
void MacroAssembler::LslPair(Register dst_low, Register dst_high,
|
||||
Register src_low, Register src_high,
|
||||
Register scratch, Register shift) {
|
||||
|
@ -561,6 +561,14 @@ class MacroAssembler: public Assembler {
|
||||
void VmovExtended(int dst_code, const MemOperand& src, Register scratch);
|
||||
void VmovExtended(const MemOperand& dst, int src_code, Register scratch);
|
||||
|
||||
void ExtractLane(Register dst, QwNeonRegister src, NeonDataType dt, int lane);
|
||||
void ExtractLane(SwVfpRegister dst, QwNeonRegister src, Register scratch,
|
||||
int lane);
|
||||
void ReplaceLane(QwNeonRegister dst, QwNeonRegister src, Register src_lane,
|
||||
NeonDataType dt, int lane);
|
||||
void ReplaceLane(QwNeonRegister dst, QwNeonRegister src,
|
||||
SwVfpRegister src_lane, Register scratch, int lane);
|
||||
|
||||
void LslPair(Register dst_low, Register dst_high, Register src_low,
|
||||
Register src_high, Register scratch, Register shift);
|
||||
void LslPair(Register dst_low, Register dst_high, Register src_low,
|
||||
|
@ -3067,6 +3067,7 @@ void Simulator::DecodeType7(Instruction* instr) {
|
||||
// Dd = vsqrt(Dm)
|
||||
// Sd = vsqrt(Sm)
|
||||
// vmrs
|
||||
// vdup.size Qd, Rt.
|
||||
void Simulator::DecodeTypeVFP(Instruction* instr) {
|
||||
DCHECK((instr->TypeValue() == 7) && (instr->Bit(24) == 0x0) );
|
||||
DCHECK(instr->Bits(11, 9) == 0x5);
|
||||
@ -3277,24 +3278,116 @@ void Simulator::DecodeTypeVFP(Instruction* instr) {
|
||||
if ((instr->VCValue() == 0x0) &&
|
||||
(instr->VAValue() == 0x0)) {
|
||||
DecodeVMOVBetweenCoreAndSinglePrecisionRegisters(instr);
|
||||
} else if ((instr->VLValue() == 0x0) &&
|
||||
(instr->VCValue() == 0x1) &&
|
||||
(instr->Bit(23) == 0x0)) {
|
||||
} else if ((instr->VLValue() == 0x0) && (instr->VCValue() == 0x1)) {
|
||||
if (instr->Bit(23) == 0) {
|
||||
// vmov (ARM core register to scalar)
|
||||
int vd = instr->Bits(19, 16) | (instr->Bit(7) << 4);
|
||||
int vd = instr->VFPNRegValue(kDoublePrecision);
|
||||
int rt = instr->RtValue();
|
||||
int opc1_opc2 = (instr->Bits(22, 21) << 2) | instr->Bits(6, 5);
|
||||
if ((opc1_opc2 & 0xb) == 0) {
|
||||
// NeonS32/NeonU32
|
||||
uint32_t data[2];
|
||||
get_d_register(vd, data);
|
||||
data[instr->Bit(21)] = get_register(instr->RtValue());
|
||||
data[instr->Bit(21)] = get_register(rt);
|
||||
set_d_register(vd, data);
|
||||
} else if ((instr->VLValue() == 0x1) &&
|
||||
(instr->VCValue() == 0x1) &&
|
||||
(instr->Bit(23) == 0x0)) {
|
||||
} else {
|
||||
uint64_t data;
|
||||
get_d_register(vd, &data);
|
||||
uint64_t rt_value = get_register(rt);
|
||||
if ((opc1_opc2 & 0x8) != 0) {
|
||||
// NeonS8 / NeonU8
|
||||
int i = opc1_opc2 & 0x7;
|
||||
int shift = i * kBitsPerByte;
|
||||
const uint64_t mask = 0xFF;
|
||||
data &= ~(mask << shift);
|
||||
data |= (rt_value & mask) << shift;
|
||||
set_d_register(vd, &data);
|
||||
} else if ((opc1_opc2 & 0x1) != 0) {
|
||||
// NeonS16 / NeonU16
|
||||
int i = (opc1_opc2 >> 1) & 0x3;
|
||||
int shift = i * kBitsPerByte * kShortSize;
|
||||
const uint64_t mask = 0xFFFF;
|
||||
data &= ~(mask << shift);
|
||||
data |= (rt_value & mask) << shift;
|
||||
set_d_register(vd, &data);
|
||||
} else {
|
||||
UNREACHABLE(); // Not used by V8.
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// vdup.size Qd, Rt.
|
||||
NeonSize size = Neon32;
|
||||
if (instr->Bit(5) != 0)
|
||||
size = Neon16;
|
||||
else if (instr->Bit(22) != 0)
|
||||
size = Neon8;
|
||||
int vd = instr->VFPNRegValue(kSimd128Precision);
|
||||
int rt = instr->RtValue();
|
||||
uint32_t rt_value = get_register(rt);
|
||||
uint32_t q_data[4];
|
||||
switch (size) {
|
||||
case Neon8: {
|
||||
rt_value &= 0xFF;
|
||||
uint8_t* dst = reinterpret_cast<uint8_t*>(q_data);
|
||||
for (int i = 0; i < 16; i++) {
|
||||
dst[i] = rt_value;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Neon16: {
|
||||
// Perform pairwise ops instead of casting to uint16_t.
|
||||
rt_value &= 0xFFFFu;
|
||||
uint32_t rt_rt = (rt_value << 16) | (rt_value & 0xFFFFu);
|
||||
for (int i = 0; i < 4; i++) {
|
||||
q_data[i] = rt_rt;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Neon32: {
|
||||
for (int i = 0; i < 4; i++) {
|
||||
q_data[i] = rt_value;
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNREACHABLE();
|
||||
break;
|
||||
}
|
||||
set_q_register(vd, q_data);
|
||||
}
|
||||
} else if ((instr->VLValue() == 0x1) && (instr->VCValue() == 0x1)) {
|
||||
// vmov (scalar to ARM core register)
|
||||
int vn = instr->Bits(19, 16) | (instr->Bit(7) << 4);
|
||||
int vn = instr->VFPNRegValue(kDoublePrecision);
|
||||
int rt = instr->RtValue();
|
||||
int opc1_opc2 = (instr->Bits(22, 21) << 2) | instr->Bits(6, 5);
|
||||
if ((opc1_opc2 & 0xb) == 0) {
|
||||
// NeonS32 / NeonU32
|
||||
double dn_value = get_double_from_d_register(vn);
|
||||
int32_t data[2];
|
||||
memcpy(data, &dn_value, 8);
|
||||
set_register(instr->RtValue(), data[instr->Bit(21)]);
|
||||
set_register(rt, data[instr->Bit(21)]);
|
||||
} else {
|
||||
uint64_t data;
|
||||
get_d_register(vn, &data);
|
||||
bool u = instr->Bit(23) != 0;
|
||||
if ((opc1_opc2 & 0x8) != 0) {
|
||||
// NeonS8 / NeonU8
|
||||
int i = opc1_opc2 & 0x7;
|
||||
int shift = i * kBitsPerByte;
|
||||
uint32_t scalar = (data >> shift) & 0xFFu;
|
||||
if (!u && (scalar & 0x80) != 0) scalar |= 0xffffff00;
|
||||
set_register(rt, scalar);
|
||||
} else if ((opc1_opc2 & 0x1) != 0) {
|
||||
// NeonS16 / NeonU16
|
||||
int i = (opc1_opc2 >> 1) & 0x3;
|
||||
int shift = i * kBitsPerByte * kShortSize;
|
||||
uint32_t scalar = (data >> shift) & 0xFFFFu;
|
||||
if (!u && (scalar & 0x8000) != 0) scalar |= 0xffff0000;
|
||||
set_register(rt, scalar);
|
||||
} else {
|
||||
UNREACHABLE(); // Not used by V8.
|
||||
}
|
||||
}
|
||||
} else if ((instr->VLValue() == 0x1) &&
|
||||
(instr->VCValue() == 0x0) &&
|
||||
(instr->VAValue() == 0x7) &&
|
||||
@ -3520,6 +3613,48 @@ int VFPConversionSaturate(double val, bool unsigned_res) {
|
||||
}
|
||||
}
|
||||
|
||||
int32_t Simulator::ConvertDoubleToInt(double val, bool unsigned_integer,
|
||||
VFPRoundingMode mode) {
|
||||
int32_t result =
|
||||
unsigned_integer ? static_cast<uint32_t>(val) : static_cast<int32_t>(val);
|
||||
|
||||
inv_op_vfp_flag_ = get_inv_op_vfp_flag(mode, val, unsigned_integer);
|
||||
|
||||
double abs_diff = unsigned_integer
|
||||
? std::fabs(val - static_cast<uint32_t>(result))
|
||||
: std::fabs(val - result);
|
||||
|
||||
inexact_vfp_flag_ = (abs_diff != 0);
|
||||
|
||||
if (inv_op_vfp_flag_) {
|
||||
result = VFPConversionSaturate(val, unsigned_integer);
|
||||
} else {
|
||||
switch (mode) {
|
||||
case RN: {
|
||||
int val_sign = (val > 0) ? 1 : -1;
|
||||
if (abs_diff > 0.5) {
|
||||
result += val_sign;
|
||||
} else if (abs_diff == 0.5) {
|
||||
// Round to even if exactly halfway.
|
||||
result = ((result % 2) == 0) ? result : result + val_sign;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case RM:
|
||||
result = result > val ? result - 1 : result;
|
||||
break;
|
||||
|
||||
case RZ:
|
||||
// Nothing to do.
|
||||
break;
|
||||
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
void Simulator::DecodeVCVTBetweenFloatingPointAndInteger(Instruction* instr) {
|
||||
DCHECK((instr->Bit(4) == 0) && (instr->Opc1Value() == 0x7) &&
|
||||
@ -3556,44 +3691,7 @@ void Simulator::DecodeVCVTBetweenFloatingPointAndInteger(Instruction* instr) {
|
||||
double val = double_precision ? get_double_from_d_register(src)
|
||||
: get_float_from_s_register(src);
|
||||
|
||||
int temp = unsigned_integer ? static_cast<uint32_t>(val)
|
||||
: static_cast<int32_t>(val);
|
||||
|
||||
inv_op_vfp_flag_ = get_inv_op_vfp_flag(mode, val, unsigned_integer);
|
||||
|
||||
double abs_diff =
|
||||
unsigned_integer ? std::fabs(val - static_cast<uint32_t>(temp))
|
||||
: std::fabs(val - temp);
|
||||
|
||||
inexact_vfp_flag_ = (abs_diff != 0);
|
||||
|
||||
if (inv_op_vfp_flag_) {
|
||||
temp = VFPConversionSaturate(val, unsigned_integer);
|
||||
} else {
|
||||
switch (mode) {
|
||||
case RN: {
|
||||
int val_sign = (val > 0) ? 1 : -1;
|
||||
if (abs_diff > 0.5) {
|
||||
temp += val_sign;
|
||||
} else if (abs_diff == 0.5) {
|
||||
// Round to even if exactly halfway.
|
||||
temp = ((temp % 2) == 0) ? temp : temp + val_sign;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case RM:
|
||||
temp = temp > val ? temp - 1 : temp;
|
||||
break;
|
||||
|
||||
case RZ:
|
||||
// Nothing to do.
|
||||
break;
|
||||
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
int32_t temp = ConvertDoubleToInt(val, unsigned_integer, mode);
|
||||
|
||||
// Update the destination register.
|
||||
set_s_register_from_sinteger(dst, temp);
|
||||
@ -3740,6 +3838,16 @@ void Simulator::DecodeType6CoprocessorIns(Instruction* instr) {
|
||||
}
|
||||
}
|
||||
|
||||
#define HIGH_16(x) ((x) >> 16)
|
||||
#define LOW_16(x) ((x)&0xFFFFu)
|
||||
#define COMBINE_32(high, low) ((high) << 16 | (low)&0xFFFFu)
|
||||
#define PAIRWISE_OP(x, y, OP) \
|
||||
COMBINE_32(OP(HIGH_16((x)), HIGH_16((y))), OP(LOW_16((x)), LOW_16((y))))
|
||||
|
||||
#define ADD_16(x, y) ((x) + (y))
|
||||
#define SUB_16(x, y) ((x) - (y))
|
||||
#define CEQ_16(x, y) ((x) == (y) ? 0xFFFFu : 0)
|
||||
#define TST_16(x, y) (((x) & (y)) != 0 ? 0xFFFFu : 0)
|
||||
|
||||
void Simulator::DecodeSpecialCondition(Instruction* instr) {
|
||||
switch (instr->SpecialValue()) {
|
||||
@ -3752,6 +3860,91 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
|
||||
uint32_t data[4];
|
||||
get_q_register(Vm, data);
|
||||
set_q_register(Vd, data);
|
||||
} else if (instr->Bits(11, 8) == 8) {
|
||||
// vadd/vtst
|
||||
int size = static_cast<NeonSize>(instr->Bits(21, 20));
|
||||
int Vd = instr->VFPDRegValue(kSimd128Precision);
|
||||
int Vm = instr->VFPMRegValue(kSimd128Precision);
|
||||
int Vn = instr->VFPNRegValue(kSimd128Precision);
|
||||
uint32_t src1[4], src2[4];
|
||||
get_q_register(Vn, src1);
|
||||
get_q_register(Vm, src2);
|
||||
if (instr->Bit(4) == 0) {
|
||||
// vadd.i<size> Qd, Qm, Qn.
|
||||
switch (size) {
|
||||
case Neon8: {
|
||||
uint8_t* s1 = reinterpret_cast<uint8_t*>(src1);
|
||||
uint8_t* s2 = reinterpret_cast<uint8_t*>(src2);
|
||||
for (int i = 0; i < 16; i++) {
|
||||
s1[i] += s2[i];
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Neon16: {
|
||||
for (int i = 0; i < 4; i++) {
|
||||
src1[i] = PAIRWISE_OP(src1[i], src2[i], ADD_16);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Neon32: {
|
||||
for (int i = 0; i < 4; i++) {
|
||||
src1[i] += src2[i];
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNREACHABLE();
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
// vtst.i<size> Qd, Qm, Qn.
|
||||
switch (size) {
|
||||
case Neon8: {
|
||||
uint8_t* s1 = reinterpret_cast<uint8_t*>(src1);
|
||||
uint8_t* s2 = reinterpret_cast<uint8_t*>(src2);
|
||||
for (int i = 0; i < 16; i++) {
|
||||
s1[i] = (s1[i] & s2[i]) != 0 ? 0xFFu : 0;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Neon16: {
|
||||
for (int i = 0; i < 4; i++) {
|
||||
src1[i] = PAIRWISE_OP(src1[i], src2[i], TST_16);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Neon32: {
|
||||
for (int i = 0; i < 4; i++) {
|
||||
src1[i] = (src1[i] & src2[i]) != 0 ? 0xFFFFFFFFu : 0;
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNREACHABLE();
|
||||
break;
|
||||
}
|
||||
}
|
||||
set_q_register(Vd, src1);
|
||||
} else if (instr->Bit(20) == 0 && instr->Bits(11, 8) == 0xd &&
|
||||
instr->Bit(4) == 0) {
|
||||
int Vd = instr->VFPDRegValue(kSimd128Precision);
|
||||
int Vm = instr->VFPMRegValue(kSimd128Precision);
|
||||
int Vn = instr->VFPNRegValue(kSimd128Precision);
|
||||
uint32_t src1[4], src2[4];
|
||||
get_q_register(Vn, src1);
|
||||
get_q_register(Vm, src2);
|
||||
for (int i = 0; i < 4; i++) {
|
||||
if (instr->Bit(21) == 0) {
|
||||
// vadd.f32 Qd, Qm, Qn.
|
||||
src1[i] = bit_cast<uint32_t>(bit_cast<float>(src1[i]) +
|
||||
bit_cast<float>(src2[i]));
|
||||
} else {
|
||||
// vsub.f32 Qd, Qm, Qn.
|
||||
src1[i] = bit_cast<uint32_t>(bit_cast<float>(src1[i]) -
|
||||
bit_cast<float>(src2[i]));
|
||||
}
|
||||
}
|
||||
set_q_register(Vd, src1);
|
||||
} else {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
@ -3781,7 +3974,91 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
|
||||
}
|
||||
break;
|
||||
case 6:
|
||||
if (instr->Bits(21, 20) == 0 && instr->Bits(11, 8) == 1 &&
|
||||
if (instr->Bits(11, 8) == 8 && instr->Bit(4) == 0) {
|
||||
// vsub.size Qd, Qm, Qn.
|
||||
int size = static_cast<NeonSize>(instr->Bits(21, 20));
|
||||
int Vd = instr->VFPDRegValue(kSimd128Precision);
|
||||
int Vm = instr->VFPMRegValue(kSimd128Precision);
|
||||
int Vn = instr->VFPNRegValue(kSimd128Precision);
|
||||
uint32_t src1[4], src2[4];
|
||||
get_q_register(Vn, src1);
|
||||
get_q_register(Vm, src2);
|
||||
switch (size) {
|
||||
case Neon8: {
|
||||
uint8_t* s1 = reinterpret_cast<uint8_t*>(src1);
|
||||
uint8_t* s2 = reinterpret_cast<uint8_t*>(src2);
|
||||
for (int i = 0; i < 16; i++) {
|
||||
s1[i] -= s2[i];
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Neon16: {
|
||||
for (int i = 0; i < 4; i++) {
|
||||
src1[i] = PAIRWISE_OP(src1[i], src2[i], SUB_16);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Neon32: {
|
||||
for (int i = 0; i < 4; i++) {
|
||||
src1[i] -= src2[i];
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNREACHABLE();
|
||||
break;
|
||||
}
|
||||
set_q_register(Vd, src1);
|
||||
} else if (instr->Bits(11, 8) == 8 && instr->Bit(4) == 1) {
|
||||
// vceq.size Qd, Qm, Qn.
|
||||
int size = static_cast<NeonSize>(instr->Bits(21, 20));
|
||||
int Vd = instr->VFPDRegValue(kSimd128Precision);
|
||||
int Vm = instr->VFPMRegValue(kSimd128Precision);
|
||||
int Vn = instr->VFPNRegValue(kSimd128Precision);
|
||||
uint32_t src1[4], src2[4];
|
||||
get_q_register(Vn, src1);
|
||||
get_q_register(Vm, src2);
|
||||
switch (size) {
|
||||
case Neon8: {
|
||||
uint8_t* s1 = reinterpret_cast<uint8_t*>(src1);
|
||||
uint8_t* s2 = reinterpret_cast<uint8_t*>(src2);
|
||||
for (int i = 0; i < 16; i++) {
|
||||
s1[i] = s1[i] == s2[i] ? 0xFF : 0;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Neon16: {
|
||||
for (int i = 0; i < 4; i++) {
|
||||
src1[i] = PAIRWISE_OP(src1[i], src2[i], CEQ_16);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Neon32: {
|
||||
for (int i = 0; i < 4; i++) {
|
||||
src1[i] = src1[i] == src2[i] ? 0xFFFFFFFF : 0;
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNREACHABLE();
|
||||
break;
|
||||
}
|
||||
set_q_register(Vd, src1);
|
||||
} else if (instr->Bits(21, 20) == 1 && instr->Bits(11, 8) == 1 &&
|
||||
instr->Bit(4) == 1) {
|
||||
// vbsl.size Qd, Qm, Qn.
|
||||
int Vd = instr->VFPDRegValue(kSimd128Precision);
|
||||
int Vm = instr->VFPMRegValue(kSimd128Precision);
|
||||
int Vn = instr->VFPNRegValue(kSimd128Precision);
|
||||
uint32_t dst[4], src1[4], src2[4];
|
||||
get_q_register(Vd, dst);
|
||||
get_q_register(Vn, src1);
|
||||
get_q_register(Vm, src2);
|
||||
for (int i = 0; i < 4; i++) {
|
||||
dst[i] = (dst[i] & src1[i]) | (~dst[i] & src2[i]);
|
||||
}
|
||||
set_q_register(Vd, dst);
|
||||
} else if (instr->Bits(21, 20) == 0 && instr->Bits(11, 8) == 1 &&
|
||||
instr->Bit(4) == 1) {
|
||||
if (instr->Bit(6) == 0) {
|
||||
// veor Dd, Dn, Dm
|
||||
@ -3829,6 +4106,40 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
|
||||
e++;
|
||||
}
|
||||
set_q_register(Vd, reinterpret_cast<uint64_t*>(to));
|
||||
} else if (instr->Opc1Value() == 7 && instr->Bits(19, 16) == 0xB &&
|
||||
instr->Bits(11, 9) == 0x3 && instr->Bit(6) == 1 &&
|
||||
instr->Bit(4) == 0) {
|
||||
// vcvt.<Td>.<Tm> Qd, Qm.
|
||||
int Vd = instr->VFPDRegValue(kSimd128Precision);
|
||||
int Vm = instr->VFPMRegValue(kSimd128Precision);
|
||||
uint32_t q_data[4];
|
||||
get_q_register(Vm, q_data);
|
||||
int op = instr->Bits(8, 7);
|
||||
for (int i = 0; i < 4; i++) {
|
||||
switch (op) {
|
||||
case 0:
|
||||
// f32 <- s32, round towards nearest.
|
||||
q_data[i] = bit_cast<uint32_t>(
|
||||
std::round(static_cast<float>(bit_cast<int32_t>(q_data[i]))));
|
||||
break;
|
||||
case 1:
|
||||
// f32 <- u32, round towards nearest.
|
||||
q_data[i] =
|
||||
bit_cast<uint32_t>(std::round(static_cast<float>(q_data[i])));
|
||||
break;
|
||||
case 2:
|
||||
// s32 <- f32, round to zero.
|
||||
q_data[i] = static_cast<uint32_t>(
|
||||
ConvertDoubleToInt(bit_cast<float>(q_data[i]), false, RZ));
|
||||
break;
|
||||
case 3:
|
||||
// u32 <- f32, round to zero.
|
||||
q_data[i] = static_cast<uint32_t>(
|
||||
ConvertDoubleToInt(bit_cast<float>(q_data[i]), true, RZ));
|
||||
break;
|
||||
}
|
||||
}
|
||||
set_q_register(Vd, q_data);
|
||||
} else if ((instr->Bits(21, 16) == 0x32) && (instr->Bits(11, 7) == 0) &&
|
||||
(instr->Bit(4) == 0)) {
|
||||
if (instr->Bit(6) == 0) {
|
||||
@ -3850,6 +4161,49 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
|
||||
set_q_register(vm, dval);
|
||||
set_q_register(vd, mval);
|
||||
}
|
||||
} else if (instr->Opc1Value() == 0x7 && instr->Bits(11, 7) == 0x18 &&
|
||||
instr->Bit(4) == 0x0) {
|
||||
// vdup.32 Qd, Sm.
|
||||
int vd = instr->VFPDRegValue(kSimd128Precision);
|
||||
int vm = instr->VFPMRegValue(kDoublePrecision);
|
||||
int index = instr->Bit(19);
|
||||
uint32_t s_data = get_s_register(vm * 2 + index);
|
||||
uint32_t q_data[4];
|
||||
for (int i = 0; i < 4; i++) q_data[i] = s_data;
|
||||
set_q_register(vd, q_data);
|
||||
} else if (instr->Opc1Value() == 7 && instr->Bits(19, 16) == 0 &&
|
||||
instr->Bits(11, 6) == 0x17 && instr->Bit(4) == 0) {
|
||||
// vmvn Qd, Qm.
|
||||
int vd = instr->VFPDRegValue(kSimd128Precision);
|
||||
int vm = instr->VFPMRegValue(kSimd128Precision);
|
||||
uint32_t q_data[4];
|
||||
get_q_register(vm, q_data);
|
||||
for (int i = 0; i < 4; i++) q_data[i] = ~q_data[i];
|
||||
set_q_register(vd, q_data);
|
||||
} else if (instr->Opc1Value() == 0x7 && instr->Bits(11, 10) == 0x2 &&
|
||||
instr->Bit(4) == 0x0) {
|
||||
// vtb[l,x] Dd, <list>, Dm.
|
||||
int vd = instr->VFPDRegValue(kDoublePrecision);
|
||||
int vn = instr->VFPNRegValue(kDoublePrecision);
|
||||
int vm = instr->VFPMRegValue(kDoublePrecision);
|
||||
int table_len = (instr->Bits(9, 8) + 1) * kDoubleSize;
|
||||
bool vtbx = instr->Bit(6) != 0; // vtbl / vtbx
|
||||
uint64_t destination = 0, indices = 0, result = 0;
|
||||
get_d_register(vd, &destination);
|
||||
get_d_register(vm, &indices);
|
||||
for (int i = 0; i < kDoubleSize; i++) {
|
||||
int shift = i * kBitsPerByte;
|
||||
int index = (indices >> shift) & 0xFF;
|
||||
if (index < table_len) {
|
||||
uint64_t table;
|
||||
get_d_register(vn + index / kDoubleSize, &table);
|
||||
result |= ((table >> ((index % kDoubleSize) * kBitsPerByte)) & 0xFF)
|
||||
<< shift;
|
||||
} else if (vtbx) {
|
||||
result |= destination & (0xFFull << shift);
|
||||
}
|
||||
}
|
||||
set_d_register(vd, &result);
|
||||
} else {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
@ -339,6 +339,8 @@ class Simulator {
|
||||
void DecodeVMOVBetweenCoreAndSinglePrecisionRegisters(Instruction* instr);
|
||||
void DecodeVCMP(Instruction* instr);
|
||||
void DecodeVCVTBetweenDoubleAndSingle(Instruction* instr);
|
||||
int32_t ConvertDoubleToInt(double val, bool unsigned_integer,
|
||||
VFPRoundingMode mode);
|
||||
void DecodeVCVTBetweenFloatingPointAndInteger(Instruction* instr);
|
||||
|
||||
// Executes one instruction.
|
||||
|
@ -1221,6 +1221,10 @@ TEST(14) {
|
||||
CHECK_EQ(kArmNanLower32, bit_cast<int64_t>(t.div_result) & 0xffffffffu);
|
||||
}
|
||||
|
||||
#define INT32_TO_FLOAT(val) \
|
||||
std::round(static_cast<float>(bit_cast<int32_t>(val)))
|
||||
#define UINT32_TO_FLOAT(val) \
|
||||
std::round(static_cast<float>(bit_cast<uint32_t>(val)))
|
||||
|
||||
TEST(15) {
|
||||
// Test the Neon instructions.
|
||||
@ -1255,8 +1259,20 @@ TEST(15) {
|
||||
uint32_t dstA5;
|
||||
uint32_t dstA6;
|
||||
uint32_t dstA7;
|
||||
uint32_t vmov_src[4], vmov_dst[4];
|
||||
uint32_t veor_src[4], veor_dst[4];
|
||||
uint64_t vmov_to_scalar1, vmov_to_scalar2;
|
||||
uint32_t vmov_from_scalar_s8, vmov_from_scalar_u8;
|
||||
uint32_t vmov_from_scalar_s16, vmov_from_scalar_u16;
|
||||
uint32_t vmov_from_scalar_32;
|
||||
uint32_t vmov_src[4], vmov_dst[4], vmvn[4];
|
||||
int32_t vcvt_s32_f32[4];
|
||||
uint32_t vcvt_u32_f32[4];
|
||||
float vcvt_f32_s32[4], vcvt_f32_u32[4];
|
||||
uint32_t vdup1[4], vdup2[4], vdup3[4], vdup4[4];
|
||||
uint32_t veor[4];
|
||||
uint32_t vadd8[4], vadd16[4], vadd32[4];
|
||||
uint32_t vsub8[4], vsub16[4], vsub32[4];
|
||||
uint32_t vtst[4], vceq[4], vbsl[4], vtbl[2], vtbx[2];
|
||||
float vaddf[4], vsubf[4];
|
||||
} T;
|
||||
T t;
|
||||
|
||||
@ -1268,7 +1284,7 @@ TEST(15) {
|
||||
if (CpuFeatures::IsSupported(NEON)) {
|
||||
CpuFeatureScope scope(&assm, NEON);
|
||||
|
||||
__ stm(db_w, sp, r4.bit() | lr.bit());
|
||||
__ stm(db_w, sp, r4.bit() | r5.bit() | lr.bit());
|
||||
// Move 32 bytes with neon.
|
||||
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, src0))));
|
||||
__ vld1(Neon8, NeonListOperand(d0, 4), NeonMemOperand(r4));
|
||||
@ -1289,23 +1305,200 @@ TEST(15) {
|
||||
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, dstA4))));
|
||||
__ vst1(Neon8, NeonListOperand(d2, 2), NeonMemOperand(r4));
|
||||
|
||||
// Test vmov for q-registers.
|
||||
// ARM core register to scalar.
|
||||
__ mov(r4, Operand(0xFFFFFFF8));
|
||||
__ vmov(d0, 0);
|
||||
__ vmov(NeonS8, d0, 1, r4);
|
||||
__ vmov(NeonS16, d0, 1, r4);
|
||||
__ vmov(NeonS32, d0, 1, r4);
|
||||
__ vstr(d0, r0, offsetof(T, vmov_to_scalar1));
|
||||
__ vmov(d0, 0);
|
||||
__ vmov(NeonS8, d0, 3, r4);
|
||||
__ vmov(NeonS16, d0, 3, r4);
|
||||
__ vstr(d0, r0, offsetof(T, vmov_to_scalar2));
|
||||
|
||||
// Scalar to ARM core register.
|
||||
__ mov(r4, Operand(0xFFFFFF00));
|
||||
__ mov(r5, Operand(0xFFFFFFFF));
|
||||
__ vmov(d0, r4, r5);
|
||||
__ vmov(NeonS8, r4, d0, 1);
|
||||
__ str(r4, MemOperand(r0, offsetof(T, vmov_from_scalar_s8)));
|
||||
__ vmov(NeonU8, r4, d0, 1);
|
||||
__ str(r4, MemOperand(r0, offsetof(T, vmov_from_scalar_u8)));
|
||||
__ vmov(NeonS16, r4, d0, 1);
|
||||
__ str(r4, MemOperand(r0, offsetof(T, vmov_from_scalar_s16)));
|
||||
__ vmov(NeonU16, r4, d0, 1);
|
||||
__ str(r4, MemOperand(r0, offsetof(T, vmov_from_scalar_u16)));
|
||||
__ vmov(NeonS32, r4, d0, 1);
|
||||
__ str(r4, MemOperand(r0, offsetof(T, vmov_from_scalar_32)));
|
||||
|
||||
// vmov for q-registers.
|
||||
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vmov_src))));
|
||||
__ vld1(Neon8, NeonListOperand(d0, 2), NeonMemOperand(r4));
|
||||
__ vld1(Neon8, NeonListOperand(q0), NeonMemOperand(r4));
|
||||
__ vmov(q1, q0);
|
||||
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vmov_dst))));
|
||||
__ vst1(Neon8, NeonListOperand(d2, 2), NeonMemOperand(r4));
|
||||
__ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
|
||||
|
||||
// Test veor for q-registers.
|
||||
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, veor_src))));
|
||||
__ vld1(Neon8, NeonListOperand(d0, 2), NeonMemOperand(r4));
|
||||
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, veor_dst))));
|
||||
__ vld1(Neon8, NeonListOperand(d2, 2), NeonMemOperand(r4));
|
||||
// vmvn.
|
||||
__ mov(r4, Operand(0xFF));
|
||||
__ vdup(Neon16, q0, r4);
|
||||
__ vmvn(q1, q0);
|
||||
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vmvn))));
|
||||
__ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
|
||||
|
||||
// vcvt for q-registers.
|
||||
__ vmov(s0, -1.5);
|
||||
__ vmov(s1, -1);
|
||||
__ vmov(s2, 1);
|
||||
__ vmov(s3, 1.5);
|
||||
__ vcvt_s32_f32(q1, q0);
|
||||
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vcvt_s32_f32))));
|
||||
__ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
|
||||
__ vcvt_u32_f32(q1, q0);
|
||||
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vcvt_u32_f32))));
|
||||
__ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
|
||||
__ mov(r4, Operand(kMinInt));
|
||||
__ mov(r5, Operand(kMaxInt));
|
||||
__ vmov(d0, r4, r5);
|
||||
__ mov(r4, Operand(kMaxUInt32));
|
||||
__ mov(r5, Operand(kMinInt + 1));
|
||||
__ vmov(d1, r4, r5); // q0 = [kMinInt, kMaxInt, kMaxUInt32, kMinInt + 1]
|
||||
__ vcvt_f32_s32(q1, q0);
|
||||
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vcvt_f32_s32))));
|
||||
__ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
|
||||
__ vcvt_f32_u32(q1, q0);
|
||||
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vcvt_f32_u32))));
|
||||
__ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
|
||||
|
||||
// int vdup.
|
||||
__ mov(r4, Operand(0xa));
|
||||
__ vdup(Neon8, q0, r4);
|
||||
__ vdup(Neon16, q1, r4);
|
||||
__ vdup(Neon32, q2, r4);
|
||||
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vdup1))));
|
||||
__ vst1(Neon8, NeonListOperand(q0), NeonMemOperand(r4));
|
||||
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vdup2))));
|
||||
__ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
|
||||
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vdup3))));
|
||||
__ vst1(Neon8, NeonListOperand(q2), NeonMemOperand(r4));
|
||||
// float vdup.
|
||||
__ vmov(s0, -1.0);
|
||||
__ vdup(q0, s0);
|
||||
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vdup4))));
|
||||
__ vst1(Neon8, NeonListOperand(q0), NeonMemOperand(r4));
|
||||
|
||||
// veor.
|
||||
__ mov(r4, Operand(0x00aa));
|
||||
__ vdup(Neon16, q0, r4);
|
||||
__ mov(r4, Operand(0x0055));
|
||||
__ vdup(Neon16, q1, r4);
|
||||
__ veor(q1, q1, q0);
|
||||
__ vst1(Neon8, NeonListOperand(d2, 2), NeonMemOperand(r4));
|
||||
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, veor))));
|
||||
__ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
|
||||
|
||||
// vadd(integer).
|
||||
__ mov(r4, Operand(0x81));
|
||||
__ vdup(Neon8, q0, r4);
|
||||
__ mov(r4, Operand(0x82));
|
||||
__ vdup(Neon8, q1, r4);
|
||||
__ vadd(Neon8, q1, q1, q0);
|
||||
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vadd8))));
|
||||
__ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
|
||||
__ mov(r4, Operand(0x8001));
|
||||
__ vdup(Neon16, q0, r4);
|
||||
__ mov(r4, Operand(0x8002));
|
||||
__ vdup(Neon16, q1, r4);
|
||||
__ vadd(Neon16, q1, q1, q0);
|
||||
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vadd16))));
|
||||
__ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
|
||||
__ mov(r4, Operand(0x80000001));
|
||||
__ vdup(Neon32, q0, r4);
|
||||
__ mov(r4, Operand(0x80000002));
|
||||
__ vdup(Neon32, q1, r4);
|
||||
__ vadd(Neon32, q1, q1, q0);
|
||||
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vadd32))));
|
||||
__ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
|
||||
|
||||
// vadd(float).
|
||||
__ vmov(s4, 1.0);
|
||||
__ vdup(q0, s4);
|
||||
__ vdup(q1, s4);
|
||||
__ vadd(q1, q1, q0);
|
||||
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vaddf))));
|
||||
__ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
|
||||
|
||||
// vsub(integer).
|
||||
__ mov(r4, Operand(0x01));
|
||||
__ vdup(Neon8, q0, r4);
|
||||
__ mov(r4, Operand(0x02));
|
||||
__ vdup(Neon8, q1, r4);
|
||||
__ vsub(Neon8, q1, q0, q1);
|
||||
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vsub8))));
|
||||
__ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
|
||||
__ mov(r4, Operand(0x0001));
|
||||
__ vdup(Neon16, q0, r4);
|
||||
__ mov(r4, Operand(0x0002));
|
||||
__ vdup(Neon16, q1, r4);
|
||||
__ vsub(Neon16, q1, q0, q1);
|
||||
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vsub16))));
|
||||
__ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
|
||||
__ mov(r4, Operand(0x00000001));
|
||||
__ vdup(Neon32, q0, r4);
|
||||
__ mov(r4, Operand(0x00000002));
|
||||
__ vdup(Neon32, q1, r4);
|
||||
__ vsub(Neon32, q1, q0, q1);
|
||||
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vsub32))));
|
||||
__ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
|
||||
|
||||
// vsub(float).
|
||||
__ vmov(s4, 2.0);
|
||||
__ vdup(q0, s4);
|
||||
__ vmov(s4, 1.0);
|
||||
__ vdup(q1, s4);
|
||||
__ vsub(q1, q1, q0);
|
||||
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vsubf))));
|
||||
__ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
|
||||
|
||||
// vceq.
|
||||
__ mov(r4, Operand(0x03));
|
||||
__ vdup(Neon8, q0, r4);
|
||||
__ mov(r4, Operand(0x03));
|
||||
__ vdup(Neon16, q1, r4);
|
||||
__ vceq(Neon8, q1, q0, q1);
|
||||
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vceq))));
|
||||
__ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
|
||||
|
||||
// vtst.
|
||||
__ mov(r4, Operand(0x03));
|
||||
__ vdup(Neon8, q0, r4);
|
||||
__ mov(r4, Operand(0x02));
|
||||
__ vdup(Neon16, q1, r4);
|
||||
__ vtst(Neon8, q1, q0, q1);
|
||||
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vtst))));
|
||||
__ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
|
||||
|
||||
// vbsl.
|
||||
__ mov(r4, Operand(0x00ff));
|
||||
__ vdup(Neon16, q0, r4);
|
||||
__ mov(r4, Operand(0x01));
|
||||
__ vdup(Neon8, q1, r4);
|
||||
__ mov(r4, Operand(0x02));
|
||||
__ vdup(Neon8, q2, r4);
|
||||
__ vbsl(q0, q1, q2);
|
||||
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vbsl))));
|
||||
__ vst1(Neon8, NeonListOperand(q0), NeonMemOperand(r4));
|
||||
|
||||
// vtb[l/x].
|
||||
__ mov(r4, Operand(0x06040200));
|
||||
__ mov(r5, Operand(0xff050301));
|
||||
__ vmov(d2, r4, r5); // d2 = ff05030106040200
|
||||
__ vtbl(d0, NeonListOperand(d2, 1), d2);
|
||||
__ vstr(d0, r0, offsetof(T, vtbl));
|
||||
__ vtbx(d2, NeonListOperand(d2, 1), d2);
|
||||
__ vstr(d2, r0, offsetof(T, vtbx));
|
||||
|
||||
// Restore and return.
|
||||
__ ldm(ia_w, sp, r4.bit() | pc.bit());
|
||||
__ ldm(ia_w, sp, r4.bit() | r5.bit() | pc.bit());
|
||||
|
||||
CodeDesc desc;
|
||||
assm.GetCode(&desc);
|
||||
@ -1344,10 +1537,9 @@ TEST(15) {
|
||||
t.dstA7 = 0;
|
||||
t.vmov_src[0] = t.vmov_src[1] = t.vmov_src[2] = t.vmov_src[3] = 1;
|
||||
t.vmov_dst[0] = t.vmov_dst[1] = t.vmov_dst[2] = t.vmov_dst[3] = 0;
|
||||
t.veor_src[0] = t.veor_src[1] = t.veor_src[2] = t.veor_src[3] = 0xAA;
|
||||
t.veor_dst[0] = t.veor_dst[1] = t.veor_dst[2] = t.veor_dst[3] = 0x55;
|
||||
Object* dummy = CALL_GENERATED_CODE(isolate, f, &t, 0, 0, 0, 0);
|
||||
USE(dummy);
|
||||
|
||||
CHECK_EQ(0x01020304u, t.dst0);
|
||||
CHECK_EQ(0x11121314u, t.dst1);
|
||||
CHECK_EQ(0x21222324u, t.dst2);
|
||||
@ -1364,14 +1556,57 @@ TEST(15) {
|
||||
CHECK_EQ(0x00410042u, t.dstA5);
|
||||
CHECK_EQ(0x00830084u, t.dstA6);
|
||||
CHECK_EQ(0x00810082u, t.dstA7);
|
||||
CHECK_EQ(0xfffffff8fff8f800u, t.vmov_to_scalar1);
|
||||
CHECK_EQ(0xfff80000f8000000u, t.vmov_to_scalar2);
|
||||
CHECK_EQ(0xFFFFFFFFu, t.vmov_from_scalar_s8);
|
||||
CHECK_EQ(0xFFu, t.vmov_from_scalar_u8);
|
||||
CHECK_EQ(0xFFFFFFFFu, t.vmov_from_scalar_s16);
|
||||
CHECK_EQ(0xFFFFu, t.vmov_from_scalar_u16);
|
||||
CHECK_EQ(0xFFFFFFFFu, t.vmov_from_scalar_32);
|
||||
CHECK_EQ(1u, t.vmov_dst[0]);
|
||||
CHECK_EQ(1u, t.vmov_dst[1]);
|
||||
CHECK_EQ(1u, t.vmov_dst[2]);
|
||||
CHECK_EQ(1u, t.vmov_dst[3]);
|
||||
CHECK_EQ(0xFFu, t.veor_dst[0]);
|
||||
CHECK_EQ(0xFFu, t.veor_dst[1]);
|
||||
CHECK_EQ(0xFFu, t.veor_dst[2]);
|
||||
CHECK_EQ(0xFFu, t.veor_dst[3]);
|
||||
CHECK_EQ(-1, t.vcvt_s32_f32[0]);
|
||||
CHECK_EQ(-1, t.vcvt_s32_f32[1]);
|
||||
CHECK_EQ(1, t.vcvt_s32_f32[2]);
|
||||
CHECK_EQ(1, t.vcvt_s32_f32[3]);
|
||||
CHECK_EQ(0u, t.vcvt_u32_f32[0]);
|
||||
CHECK_EQ(0u, t.vcvt_u32_f32[1]);
|
||||
CHECK_EQ(1u, t.vcvt_u32_f32[2]);
|
||||
CHECK_EQ(1u, t.vcvt_u32_f32[3]);
|
||||
|
||||
// src: [kMinInt, kMaxInt, kMaxUInt32, kMinInt + 1]
|
||||
CHECK_EQ(INT32_TO_FLOAT(kMinInt), t.vcvt_f32_s32[0]);
|
||||
CHECK_EQ(INT32_TO_FLOAT(kMaxInt), t.vcvt_f32_s32[1]);
|
||||
CHECK_EQ(INT32_TO_FLOAT(kMaxUInt32), t.vcvt_f32_s32[2]);
|
||||
CHECK_EQ(INT32_TO_FLOAT(kMinInt + 1), t.vcvt_f32_s32[3]);
|
||||
CHECK_EQ(UINT32_TO_FLOAT(kMinInt), t.vcvt_f32_u32[0]);
|
||||
CHECK_EQ(UINT32_TO_FLOAT(kMaxInt), t.vcvt_f32_u32[1]);
|
||||
CHECK_EQ(UINT32_TO_FLOAT(kMaxUInt32), t.vcvt_f32_u32[2]);
|
||||
CHECK_EQ(UINT32_TO_FLOAT(kMinInt + 1), t.vcvt_f32_u32[3]);
|
||||
|
||||
for (int i = 0; i < 4; i++) CHECK_EQ(0xFF00FF00, t.vmvn[i]);
|
||||
for (int i = 0; i < 4; i++) CHECK_EQ(0x0a0a0a0au, t.vdup1[i]);
|
||||
for (int i = 0; i < 4; i++) CHECK_EQ(0x000a000au, t.vdup2[i]);
|
||||
for (int i = 0; i < 4; i++) CHECK_EQ(0x0000000au, t.vdup3[i]);
|
||||
for (int i = 0; i < 4; i++) CHECK_EQ(0xbf800000u, t.vdup4[i]); // -1.0f
|
||||
for (int i = 0; i < 4; i++) CHECK_EQ(0x00ff00ffu, t.veor[i]);
|
||||
for (int i = 0; i < 4; i++) CHECK_EQ(2.0, t.vaddf[i]);
|
||||
for (int i = 0; i < 4; i++) CHECK_EQ(0x03030303u, t.vadd8[i]);
|
||||
for (int i = 0; i < 4; i++) CHECK_EQ(0x00030003u, t.vadd16[i]);
|
||||
for (int i = 0; i < 4; i++) CHECK_EQ(0x00000003u, t.vadd32[i]);
|
||||
for (int i = 0; i < 4; i++) CHECK_EQ(-1.0, t.vsubf[i]);
|
||||
for (int i = 0; i < 4; i++) CHECK_EQ(0xffffffffu, t.vsub8[i]);
|
||||
for (int i = 0; i < 4; i++) CHECK_EQ(0xffffffffu, t.vsub16[i]);
|
||||
for (int i = 0; i < 4; i++) CHECK_EQ(0xffffffffu, t.vsub32[i]);
|
||||
for (int i = 0; i < 4; i++) CHECK_EQ(0x00ff00ffu, t.vceq[i]);
|
||||
for (int i = 0; i < 4; i++) CHECK_EQ(0x00ff00ffu, t.vtst[i]);
|
||||
for (int i = 0; i < 4; i++) CHECK_EQ(0x02010201u, t.vbsl[i]);
|
||||
CHECK_EQ(0x05010400u, t.vtbl[0]);
|
||||
CHECK_EQ(0x00030602u, t.vtbl[1]);
|
||||
CHECK_EQ(0x05010400u, t.vtbx[0]);
|
||||
CHECK_EQ(0xff030602u, t.vtbx[1]);
|
||||
}
|
||||
}
|
||||
|
||||
@ -2963,9 +3198,9 @@ TEST(vswp) {
|
||||
__ vmov(d11, r5, r5); // q5 = [-1.0, -1.0]
|
||||
__ vswp(q4, q5);
|
||||
__ add(r6, r0, Operand(static_cast<int32_t>(offsetof(T, vswp_q4))));
|
||||
__ vst1(Neon8, NeonListOperand(d8, 2), NeonMemOperand(r6));
|
||||
__ vst1(Neon8, NeonListOperand(q4), NeonMemOperand(r6));
|
||||
__ add(r6, r0, Operand(static_cast<int32_t>(offsetof(T, vswp_q5))));
|
||||
__ vst1(Neon8, NeonListOperand(d10, 2), NeonMemOperand(r6));
|
||||
__ vst1(Neon8, NeonListOperand(q5), NeonMemOperand(r6));
|
||||
|
||||
__ ldm(ia_w, sp, r4.bit() | r5.bit() | r6.bit() | r7.bit() | pc.bit());
|
||||
__ bx(lr);
|
||||
|
@ -936,10 +936,45 @@ TEST(Neon) {
|
||||
"f3886a11 vmovl.u8 q3, d1");
|
||||
COMPARE(vmovl(NeonU8, q4, d2),
|
||||
"f3888a12 vmovl.u8 q4, d2");
|
||||
|
||||
COMPARE(vmov(NeonS8, d0, 0, r0), "ee400b10 vmov.8 d0[0], r0");
|
||||
COMPARE(vmov(NeonU8, d1, 1, r1), "ee411b30 vmov.8 d1[1], r1");
|
||||
COMPARE(vmov(NeonS8, d2, 2, r2), "ee422b50 vmov.8 d2[2], r2");
|
||||
COMPARE(vmov(NeonU8, d3, 3, r8), "ee438b70 vmov.8 d3[3], r8");
|
||||
COMPARE(vmov(NeonS8, d4, 4, r0), "ee640b10 vmov.8 d4[4], r0");
|
||||
COMPARE(vmov(NeonU8, d5, 5, r1), "ee651b30 vmov.8 d5[5], r1");
|
||||
COMPARE(vmov(NeonS8, d6, 6, r2), "ee662b50 vmov.8 d6[6], r2");
|
||||
COMPARE(vmov(NeonU8, d7, 7, r8), "ee678b70 vmov.8 d7[7], r8");
|
||||
COMPARE(vmov(NeonS16, d0, 0, r0), "ee000b30 vmov.16 d0[0], r0");
|
||||
COMPARE(vmov(NeonS16, d1, 1, r1), "ee011b70 vmov.16 d1[1], r1");
|
||||
COMPARE(vmov(NeonS16, d2, 2, r2), "ee222b30 vmov.16 d2[2], r2");
|
||||
COMPARE(vmov(NeonS16, d3, 3, r7), "ee237b70 vmov.16 d3[3], r7");
|
||||
COMPARE(vmov(NeonS32, d0, 0, r0), "ee000b10 vmov.32 d0[0], r0");
|
||||
COMPARE(vmov(NeonU32, d0, 1, r0), "ee200b10 vmov.32 d0[1], r0");
|
||||
|
||||
COMPARE(vmov(NeonS8, r0, d0, 0), "ee500b10 vmov.s8 r0, d0[0]");
|
||||
COMPARE(vmov(NeonU8, r1, d1, 1), "eed11b30 vmov.u8 r1, d1[1]");
|
||||
COMPARE(vmov(NeonS8, r2, d2, 2), "ee522b50 vmov.s8 r2, d2[2]");
|
||||
COMPARE(vmov(NeonU8, r8, d3, 3), "eed38b70 vmov.u8 r8, d3[3]");
|
||||
COMPARE(vmov(NeonS8, r0, d4, 4), "ee740b10 vmov.s8 r0, d4[4]");
|
||||
COMPARE(vmov(NeonU8, r1, d5, 5), "eef51b30 vmov.u8 r1, d5[5]");
|
||||
COMPARE(vmov(NeonS8, r2, d6, 6), "ee762b50 vmov.s8 r2, d6[6]");
|
||||
COMPARE(vmov(NeonU8, r8, d7, 7), "eef78b70 vmov.u8 r8, d7[7]");
|
||||
COMPARE(vmov(NeonS16, r0, d0, 0), "ee100b30 vmov.s16 r0, d0[0]");
|
||||
COMPARE(vmov(NeonU16, r1, d1, 1), "ee911b70 vmov.u16 r1, d1[1]");
|
||||
COMPARE(vmov(NeonS16, r2, d2, 2), "ee322b30 vmov.s16 r2, d2[2]");
|
||||
COMPARE(vmov(NeonU16, r7, d3, 3), "eeb37b70 vmov.u16 r7, d3[3]");
|
||||
COMPARE(vmov(NeonS32, r2, d15, 0), "ee1f2b10 vmov.32 r2, d15[0]");
|
||||
COMPARE(vmov(NeonS32, r3, d14, 1), "ee3e3b10 vmov.32 r3, d14[1]");
|
||||
|
||||
COMPARE(vmov(q0, q15),
|
||||
"f22e01fe vmov q0, q15");
|
||||
COMPARE(vmov(q8, q9),
|
||||
"f26201f2 vmov q8, q9");
|
||||
COMPARE(vmvn(q0, q15),
|
||||
"f3b005ee vmvn q0, q15");
|
||||
COMPARE(vmvn(q8, q9),
|
||||
"f3f005e2 vmvn q8, q9");
|
||||
COMPARE(vswp(d0, d31),
|
||||
"f3b2002f vswp d0, d31");
|
||||
COMPARE(vswp(d16, d14),
|
||||
@ -948,6 +983,24 @@ TEST(Neon) {
|
||||
"f3b2006e vswp q0, q15");
|
||||
COMPARE(vswp(q8, q9),
|
||||
"f3f20062 vswp q8, q9");
|
||||
COMPARE(vdup(Neon8, q0, r0),
|
||||
"eee00b10 vdup.8 q0, r0");
|
||||
COMPARE(vdup(Neon16, q1, r4),
|
||||
"eea24b30 vdup.16 q1, r4");
|
||||
COMPARE(vdup(Neon32, q15, r1),
|
||||
"eeae1b90 vdup.32 q15, r1");
|
||||
COMPARE(vdup(q0, s3),
|
||||
"f3bc0c41 vdup q0, d1[1]");
|
||||
COMPARE(vdup(q15, s2),
|
||||
"f3f4ec41 vdup q15, d1[0]");
|
||||
COMPARE(vcvt_f32_s32(q15, q1),
|
||||
"f3fbe642 vcvt.f32.s32 q15, q1");
|
||||
COMPARE(vcvt_f32_u32(q8, q9),
|
||||
"f3fb06e2 vcvt.f32.u32 q8, q9");
|
||||
COMPARE(vcvt_s32_f32(q15, q1),
|
||||
"f3fbe742 vcvt.s32.f32 q15, q1");
|
||||
COMPARE(vcvt_u32_f32(q8, q9),
|
||||
"f3fb07e2 vcvt.u32.f32 q8, q9");
|
||||
COMPARE(veor(d0, d1, d2),
|
||||
"f3010112 veor d0, d1, d2");
|
||||
COMPARE(veor(d0, d30, d31),
|
||||
@ -956,6 +1009,54 @@ TEST(Neon) {
|
||||
"f3020154 veor q0, q1, q2");
|
||||
COMPARE(veor(q15, q0, q8),
|
||||
"f340e170 veor q15, q0, q8");
|
||||
COMPARE(vadd(q15, q0, q8),
|
||||
"f240ed60 vadd.f32 q15, q0, q8");
|
||||
COMPARE(vadd(Neon8, q0, q1, q2),
|
||||
"f2020844 vadd.i8 q0, q1, q2");
|
||||
COMPARE(vadd(Neon16, q1, q2, q8),
|
||||
"f2142860 vadd.i16 q1, q2, q8");
|
||||
COMPARE(vadd(Neon32, q15, q0, q8),
|
||||
"f260e860 vadd.i32 q15, q0, q8");
|
||||
COMPARE(vsub(q15, q0, q8),
|
||||
"f260ed60 vsub.f32 q15, q0, q8");
|
||||
COMPARE(vsub(Neon8, q0, q1, q2),
|
||||
"f3020844 vsub.i8 q0, q1, q2");
|
||||
COMPARE(vsub(Neon16, q1, q2, q8),
|
||||
"f3142860 vsub.i16 q1, q2, q8");
|
||||
COMPARE(vsub(Neon32, q15, q0, q8),
|
||||
"f360e860 vsub.i32 q15, q0, q8");
|
||||
COMPARE(vtst(Neon8, q0, q1, q2),
|
||||
"f2020854 vtst.i8 q0, q1, q2");
|
||||
COMPARE(vtst(Neon16, q1, q2, q8),
|
||||
"f2142870 vtst.i16 q1, q2, q8");
|
||||
COMPARE(vtst(Neon32, q15, q0, q8),
|
||||
"f260e870 vtst.i32 q15, q0, q8");
|
||||
COMPARE(vceq(Neon8, q0, q1, q2),
|
||||
"f3020854 vceq.i8 q0, q1, q2");
|
||||
COMPARE(vceq(Neon16, q1, q2, q8),
|
||||
"f3142870 vceq.i16 q1, q2, q8");
|
||||
COMPARE(vceq(Neon32, q15, q0, q8),
|
||||
"f360e870 vceq.i32 q15, q0, q8");
|
||||
COMPARE(vbsl(q0, q1, q2),
|
||||
"f3120154 vbsl q0, q1, q2");
|
||||
COMPARE(vbsl(q15, q0, q8),
|
||||
"f350e170 vbsl q15, q0, q8");
|
||||
COMPARE(vtbl(d0, NeonListOperand(d1, 1), d2),
|
||||
"f3b10802 vtbl.8 d0, {d1}, d2");
|
||||
COMPARE(vtbl(d31, NeonListOperand(d0, 2), d4),
|
||||
"f3f0f904 vtbl.8 d31, {d0, d1}, d4");
|
||||
COMPARE(vtbl(d15, NeonListOperand(d1, 3), d5),
|
||||
"f3b1fa05 vtbl.8 d15, {d1, d2, d3}, d5");
|
||||
COMPARE(vtbl(d15, NeonListOperand(d1, 4), d5),
|
||||
"f3b1fb05 vtbl.8 d15, {d1, d2, d3, d4}, d5");
|
||||
COMPARE(vtbx(d0, NeonListOperand(d1, 1), d2),
|
||||
"f3b10842 vtbx.8 d0, {d1}, d2");
|
||||
COMPARE(vtbx(d31, NeonListOperand(d0, 2), d4),
|
||||
"f3f0f944 vtbx.8 d31, {d0, d1}, d4");
|
||||
COMPARE(vtbx(d15, NeonListOperand(d1, 3), d5),
|
||||
"f3b1fa45 vtbx.8 d15, {d1, d2, d3}, d5");
|
||||
COMPARE(vtbx(d15, NeonListOperand(d1, 4), d5),
|
||||
"f3b1fb45 vtbx.8 d15, {d1, d2, d3, d4}, d5");
|
||||
}
|
||||
|
||||
VERIFY_RUN();
|
||||
|
@ -42,6 +42,7 @@ typedef void* (*F)(int x, int y, int p2, int p3, int p4);
|
||||
|
||||
#define __ masm->
|
||||
|
||||
typedef Object* (*F3)(void* p0, int p1, int p2, int p3, int p4);
|
||||
typedef int (*F5)(void*, void*, void*, void*, void*);
|
||||
|
||||
|
||||
@ -134,4 +135,248 @@ TEST(LoadAndStoreWithRepresentation) {
|
||||
CHECK(!CALL_GENERATED_CODE(isolate, f, 0, 0, 0, 0, 0));
|
||||
}
|
||||
|
||||
TEST(ExtractLane) {
|
||||
if (!CpuFeatures::IsSupported(NEON)) return;
|
||||
|
||||
// Allocate an executable page of memory.
|
||||
size_t actual_size;
|
||||
byte* buffer = static_cast<byte*>(v8::base::OS::Allocate(
|
||||
Assembler::kMinimalBufferSize, &actual_size, true));
|
||||
CHECK(buffer);
|
||||
Isolate* isolate = CcTest::i_isolate();
|
||||
HandleScope handles(isolate);
|
||||
MacroAssembler assembler(isolate, buffer, static_cast<int>(actual_size),
|
||||
v8::internal::CodeObjectRequired::kYes);
|
||||
MacroAssembler* masm = &assembler; // Create a pointer for the __ macro.
|
||||
|
||||
typedef struct {
|
||||
int32_t i32x4_low[4];
|
||||
int32_t i32x4_high[4];
|
||||
int32_t i16x8_low[8];
|
||||
int32_t i16x8_high[8];
|
||||
int32_t i8x16_low[16];
|
||||
int32_t i8x16_high[16];
|
||||
int32_t f32x4_low[4];
|
||||
int32_t f32x4_high[4];
|
||||
} T;
|
||||
T t;
|
||||
|
||||
__ stm(db_w, sp, r4.bit() | r5.bit() | lr.bit());
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
__ mov(r4, Operand(i));
|
||||
__ vdup(Neon32, q1, r4);
|
||||
__ ExtractLane(r5, q1, NeonS32, i);
|
||||
__ str(r5, MemOperand(r0, offsetof(T, i32x4_low) + 4 * i));
|
||||
SwVfpRegister si = SwVfpRegister::from_code(i);
|
||||
__ ExtractLane(si, q1, r4, i);
|
||||
__ vstr(si, r0, offsetof(T, f32x4_low) + 4 * i);
|
||||
}
|
||||
|
||||
for (int i = 0; i < 8; i++) {
|
||||
__ mov(r4, Operand(i));
|
||||
__ vdup(Neon16, q1, r4);
|
||||
__ ExtractLane(r5, q1, NeonS16, i);
|
||||
__ str(r5, MemOperand(r0, offsetof(T, i16x8_low) + 4 * i));
|
||||
}
|
||||
|
||||
for (int i = 0; i < 16; i++) {
|
||||
__ mov(r4, Operand(i));
|
||||
__ vdup(Neon8, q1, r4);
|
||||
__ ExtractLane(r5, q1, NeonS8, i);
|
||||
__ str(r5, MemOperand(r0, offsetof(T, i8x16_low) + 4 * i));
|
||||
}
|
||||
|
||||
if (CpuFeatures::IsSupported(VFP32DREGS)) {
|
||||
for (int i = 0; i < 4; i++) {
|
||||
__ mov(r4, Operand(-i));
|
||||
__ vdup(Neon32, q15, r4);
|
||||
__ ExtractLane(r5, q15, NeonS32, i);
|
||||
__ str(r5, MemOperand(r0, offsetof(T, i32x4_high) + 4 * i));
|
||||
SwVfpRegister si = SwVfpRegister::from_code(i);
|
||||
__ ExtractLane(si, q15, r4, i);
|
||||
__ vstr(si, r0, offsetof(T, f32x4_high) + 4 * i);
|
||||
}
|
||||
|
||||
for (int i = 0; i < 8; i++) {
|
||||
__ mov(r4, Operand(-i));
|
||||
__ vdup(Neon16, q15, r4);
|
||||
__ ExtractLane(r5, q15, NeonS16, i);
|
||||
__ str(r5, MemOperand(r0, offsetof(T, i16x8_high) + 4 * i));
|
||||
}
|
||||
|
||||
for (int i = 0; i < 16; i++) {
|
||||
__ mov(r4, Operand(-i));
|
||||
__ vdup(Neon8, q15, r4);
|
||||
__ ExtractLane(r5, q15, NeonS8, i);
|
||||
__ str(r5, MemOperand(r0, offsetof(T, i8x16_high) + 4 * i));
|
||||
}
|
||||
}
|
||||
|
||||
__ ldm(ia_w, sp, r4.bit() | r5.bit() | pc.bit());
|
||||
|
||||
CodeDesc desc;
|
||||
masm->GetCode(&desc);
|
||||
Handle<Code> code = isolate->factory()->NewCode(
|
||||
desc, Code::ComputeFlags(Code::STUB), Handle<Code>());
|
||||
#ifdef DEBUG
|
||||
OFStream os(stdout);
|
||||
code->Print(os);
|
||||
#endif
|
||||
F3 f = FUNCTION_CAST<F3>(code->entry());
|
||||
Object* dummy = CALL_GENERATED_CODE(isolate, f, &t, 0, 0, 0, 0);
|
||||
USE(dummy);
|
||||
for (int i = 0; i < 4; i++) {
|
||||
CHECK_EQ(i, t.i32x4_low[i]);
|
||||
CHECK_EQ(i, t.f32x4_low[i]);
|
||||
}
|
||||
for (int i = 0; i < 8; i++) {
|
||||
CHECK_EQ(i, t.i16x8_low[i]);
|
||||
}
|
||||
for (int i = 0; i < 16; i++) {
|
||||
CHECK_EQ(i, t.i8x16_low[i]);
|
||||
}
|
||||
if (CpuFeatures::IsSupported(VFP32DREGS)) {
|
||||
for (int i = 0; i < 4; i++) {
|
||||
CHECK_EQ(-i, t.i32x4_high[i]);
|
||||
CHECK_EQ(-i, t.f32x4_high[i]);
|
||||
}
|
||||
for (int i = 0; i < 8; i++) {
|
||||
CHECK_EQ(-i, t.i16x8_high[i]);
|
||||
}
|
||||
for (int i = 0; i < 16; i++) {
|
||||
CHECK_EQ(-i, t.i8x16_high[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(ReplaceLane) {
|
||||
if (!CpuFeatures::IsSupported(NEON)) return;
|
||||
|
||||
// Allocate an executable page of memory.
|
||||
size_t actual_size;
|
||||
byte* buffer = static_cast<byte*>(v8::base::OS::Allocate(
|
||||
Assembler::kMinimalBufferSize, &actual_size, true));
|
||||
CHECK(buffer);
|
||||
Isolate* isolate = CcTest::i_isolate();
|
||||
HandleScope handles(isolate);
|
||||
MacroAssembler assembler(isolate, buffer, static_cast<int>(actual_size),
|
||||
v8::internal::CodeObjectRequired::kYes);
|
||||
MacroAssembler* masm = &assembler; // Create a pointer for the __ macro.
|
||||
|
||||
typedef struct {
|
||||
int32_t i32x4_low[4];
|
||||
int32_t i32x4_high[4];
|
||||
int16_t i16x8_low[8];
|
||||
int16_t i16x8_high[8];
|
||||
int8_t i8x16_low[16];
|
||||
int8_t i8x16_high[16];
|
||||
int32_t f32x4_low[4];
|
||||
int32_t f32x4_high[4];
|
||||
} T;
|
||||
T t;
|
||||
|
||||
__ stm(db_w, sp, r4.bit() | r5.bit() | r6.bit() | r7.bit() | lr.bit());
|
||||
|
||||
const Register kScratch = r5;
|
||||
|
||||
__ veor(q0, q0, q0); // Zero
|
||||
__ veor(q1, q1, q1); // Zero
|
||||
for (int i = 0; i < 4; i++) {
|
||||
__ mov(r4, Operand(i));
|
||||
__ ReplaceLane(q0, q0, r4, NeonS32, i);
|
||||
SwVfpRegister si = SwVfpRegister::from_code(i);
|
||||
__ vmov(si, r4);
|
||||
__ ReplaceLane(q1, q1, si, kScratch, i);
|
||||
}
|
||||
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, i32x4_low))));
|
||||
__ vst1(Neon8, NeonListOperand(q0), NeonMemOperand(r4));
|
||||
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, f32x4_low))));
|
||||
__ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
|
||||
|
||||
__ veor(q0, q0, q0); // Zero
|
||||
for (int i = 0; i < 8; i++) {
|
||||
__ mov(r4, Operand(i));
|
||||
__ ReplaceLane(q0, q0, r4, NeonS16, i);
|
||||
}
|
||||
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, i16x8_low))));
|
||||
__ vst1(Neon8, NeonListOperand(q0), NeonMemOperand(r4));
|
||||
|
||||
__ veor(q0, q0, q0); // Zero
|
||||
for (int i = 0; i < 16; i++) {
|
||||
__ mov(r4, Operand(i));
|
||||
__ ReplaceLane(q0, q0, r4, NeonS8, i);
|
||||
}
|
||||
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, i8x16_low))));
|
||||
__ vst1(Neon8, NeonListOperand(q0), NeonMemOperand(r4));
|
||||
|
||||
if (CpuFeatures::IsSupported(VFP32DREGS)) {
|
||||
__ veor(q14, q14, q14); // Zero
|
||||
__ veor(q15, q15, q15); // Zero
|
||||
for (int i = 0; i < 4; i++) {
|
||||
__ mov(r4, Operand(-i));
|
||||
__ ReplaceLane(q14, q14, r4, NeonS32, i);
|
||||
SwVfpRegister si = SwVfpRegister::from_code(i);
|
||||
__ vmov(si, r4);
|
||||
__ ReplaceLane(q15, q15, si, kScratch, i);
|
||||
}
|
||||
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, i32x4_high))));
|
||||
__ vst1(Neon8, NeonListOperand(q14), NeonMemOperand(r4));
|
||||
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, f32x4_high))));
|
||||
__ vst1(Neon8, NeonListOperand(q15), NeonMemOperand(r4));
|
||||
|
||||
__ veor(q14, q14, q14); // Zero
|
||||
for (int i = 0; i < 8; i++) {
|
||||
__ mov(r4, Operand(-i));
|
||||
__ ReplaceLane(q14, q14, r4, NeonS16, i);
|
||||
}
|
||||
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, i16x8_high))));
|
||||
__ vst1(Neon8, NeonListOperand(q14), NeonMemOperand(r4));
|
||||
|
||||
__ veor(q14, q14, q14); // Zero
|
||||
for (int i = 0; i < 16; i++) {
|
||||
__ mov(r4, Operand(-i));
|
||||
__ ReplaceLane(q14, q14, r4, NeonS8, i);
|
||||
}
|
||||
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, i8x16_high))));
|
||||
__ vst1(Neon8, NeonListOperand(q14), NeonMemOperand(r4));
|
||||
}
|
||||
|
||||
__ ldm(ia_w, sp, r4.bit() | r5.bit() | r6.bit() | r7.bit() | pc.bit());
|
||||
|
||||
CodeDesc desc;
|
||||
masm->GetCode(&desc);
|
||||
Handle<Code> code = isolate->factory()->NewCode(
|
||||
desc, Code::ComputeFlags(Code::STUB), Handle<Code>());
|
||||
#ifdef DEBUG
|
||||
OFStream os(stdout);
|
||||
code->Print(os);
|
||||
#endif
|
||||
F3 f = FUNCTION_CAST<F3>(code->entry());
|
||||
Object* dummy = CALL_GENERATED_CODE(isolate, f, &t, 0, 0, 0, 0);
|
||||
USE(dummy);
|
||||
for (int i = 0; i < 4; i++) {
|
||||
CHECK_EQ(i, t.i32x4_low[i]);
|
||||
CHECK_EQ(i, t.f32x4_low[i]);
|
||||
}
|
||||
for (int i = 0; i < 8; i++) {
|
||||
CHECK_EQ(i, t.i16x8_low[i]);
|
||||
}
|
||||
for (int i = 0; i < 16; i++) {
|
||||
CHECK_EQ(i, t.i8x16_low[i]);
|
||||
}
|
||||
if (CpuFeatures::IsSupported(VFP32DREGS)) {
|
||||
for (int i = 0; i < 4; i++) {
|
||||
CHECK_EQ(-i, t.i32x4_high[i]);
|
||||
CHECK_EQ(-i, t.f32x4_high[i]);
|
||||
}
|
||||
for (int i = 0; i < 8; i++) {
|
||||
CHECK_EQ(-i, t.i16x8_high[i]);
|
||||
}
|
||||
for (int i = 0; i < 16; i++) {
|
||||
CHECK_EQ(-i, t.i8x16_high[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#undef __
|
||||
|
Loading…
Reference in New Issue
Block a user