ARM: Implement memcpy using NEON.

Add support for a few NEON and ARM SIMD instructions and use them for various
memcpy operations.

BUG=none
TEST=none

Review URL: https://chromiumcodereview.appspot.com/17858002

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@15602 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
rodolph.perfetta@gmail.com 2013-07-10 15:32:39 +00:00
parent c04a72e76b
commit 762157d697
17 changed files with 1648 additions and 63 deletions

View File

@ -49,6 +49,7 @@ bool CpuFeatures::initialized_ = false;
#endif
unsigned CpuFeatures::supported_ = 0;
unsigned CpuFeatures::found_by_runtime_probing_only_ = 0;
unsigned CpuFeatures::cache_line_size_ = 64;
ExternalReference ExternalReference::cpu_features() {
@ -125,6 +126,9 @@ void CpuFeatures::Probe() {
static_cast<uint64_t>(1) << VFP3 |
static_cast<uint64_t>(1) << ARMv7;
}
if (FLAG_enable_neon) {
supported_ |= 1u << NEON;
}
// For the simulator=arm build, use ARMv7 when FLAG_enable_armv7 is enabled
if (FLAG_enable_armv7) {
supported_ |= static_cast<uint64_t>(1) << ARMv7;
@ -157,6 +161,10 @@ void CpuFeatures::Probe() {
static_cast<uint64_t>(1) << ARMv7;
}
if (!IsSupported(NEON) && FLAG_enable_neon && OS::ArmCpuHasFeature(NEON)) {
found_by_runtime_probing_only_ |= 1u << NEON;
}
if (!IsSupported(ARMv7) && FLAG_enable_armv7 && OS::ArmCpuHasFeature(ARMv7)) {
found_by_runtime_probing_only_ |= static_cast<uint64_t>(1) << ARMv7;
}
@ -171,12 +179,18 @@ void CpuFeatures::Probe() {
static_cast<uint64_t>(1) << UNALIGNED_ACCESSES;
}
if (OS::GetCpuImplementer() == QUALCOMM_IMPLEMENTER &&
CpuImplementer implementer = OS::GetCpuImplementer();
if (implementer == QUALCOMM_IMPLEMENTER &&
FLAG_enable_movw_movt && OS::ArmCpuHasFeature(ARMv7)) {
found_by_runtime_probing_only_ |=
static_cast<uint64_t>(1) << MOVW_MOVT_IMMEDIATE_LOADS;
}
CpuPart part = OS::GetCpuPart(implementer);
if ((part == CORTEX_A9) || (part == CORTEX_A5)) {
cache_line_size_ = 32;
}
if (!IsSupported(VFP32DREGS) && FLAG_enable_32dregs
&& OS::ArmCpuHasFeature(VFP32DREGS)) {
found_by_runtime_probing_only_ |= static_cast<uint64_t>(1) << VFP32DREGS;
@ -247,11 +261,12 @@ void CpuFeatures::PrintTarget() {
void CpuFeatures::PrintFeatures() {
printf(
"ARMv7=%d VFP3=%d VFP32DREGS=%d SUDIV=%d UNALIGNED_ACCESSES=%d "
"ARMv7=%d VFP3=%d VFP32DREGS=%d NEON=%d SUDIV=%d UNALIGNED_ACCESSES=%d "
"MOVW_MOVT_IMMEDIATE_LOADS=%d",
CpuFeatures::IsSupported(ARMv7),
CpuFeatures::IsSupported(VFP3),
CpuFeatures::IsSupported(VFP32DREGS),
CpuFeatures::IsSupported(NEON),
CpuFeatures::IsSupported(SUDIV),
CpuFeatures::IsSupported(UNALIGNED_ACCESSES),
CpuFeatures::IsSupported(MOVW_MOVT_IMMEDIATE_LOADS));
@ -378,6 +393,66 @@ MemOperand::MemOperand(Register rn, Register rm,
}
NeonMemOperand::NeonMemOperand(Register rn, AddrMode am, int align) {
ASSERT((am == Offset) || (am == PostIndex));
rn_ = rn;
rm_ = (am == Offset) ? pc : sp;
SetAlignment(align);
}
NeonMemOperand::NeonMemOperand(Register rn, Register rm, int align) {
rn_ = rn;
rm_ = rm;
SetAlignment(align);
}
void NeonMemOperand::SetAlignment(int align) {
switch (align) {
case 0:
align_ = 0;
break;
case 64:
align_ = 1;
break;
case 128:
align_ = 2;
break;
case 256:
align_ = 3;
break;
default:
UNREACHABLE();
align_ = 0;
break;
}
}
NeonListOperand::NeonListOperand(DoubleRegister base, int registers_count) {
base_ = base;
switch (registers_count) {
case 1:
type_ = nlt_1;
break;
case 2:
type_ = nlt_2;
break;
case 3:
type_ = nlt_3;
break;
case 4:
type_ = nlt_4;
break;
default:
UNREACHABLE();
type_ = nlt_1;
break;
}
}
// -----------------------------------------------------------------------------
// Specific instructions, constants, and masks.
@ -1546,6 +1621,107 @@ void Assembler::bfi(Register dst,
}
void Assembler::pkhbt(Register dst,
Register src1,
const Operand& src2,
Condition cond ) {
// Instruction details available in ARM DDI 0406C.b, A8.8.125.
// cond(31-28) | 01101000(27-20) | Rn(19-16) |
// Rd(15-12) | imm5(11-7) | 0(6) | 01(5-4) | Rm(3-0)
ASSERT(!dst.is(pc));
ASSERT(!src1.is(pc));
ASSERT(!src2.rm().is(pc));
ASSERT(!src2.rm().is(no_reg));
ASSERT(src2.rs().is(no_reg));
ASSERT((src2.shift_imm_ >= 0) && (src2.shift_imm_ <= 31));
ASSERT(src2.shift_op() == LSL);
emit(cond | 0x68*B20 | src1.code()*B16 | dst.code()*B12 |
src2.shift_imm_*B7 | B4 | src2.rm().code());
}
void Assembler::pkhtb(Register dst,
Register src1,
const Operand& src2,
Condition cond) {
// Instruction details available in ARM DDI 0406C.b, A8.8.125.
// cond(31-28) | 01101000(27-20) | Rn(19-16) |
// Rd(15-12) | imm5(11-7) | 1(6) | 01(5-4) | Rm(3-0)
ASSERT(!dst.is(pc));
ASSERT(!src1.is(pc));
ASSERT(!src2.rm().is(pc));
ASSERT(!src2.rm().is(no_reg));
ASSERT(src2.rs().is(no_reg));
ASSERT((src2.shift_imm_ >= 1) && (src2.shift_imm_ <= 32));
ASSERT(src2.shift_op() == ASR);
int asr = (src2.shift_imm_ == 32) ? 0 : src2.shift_imm_;
emit(cond | 0x68*B20 | src1.code()*B16 | dst.code()*B12 |
asr*B7 | B6 | B4 | src2.rm().code());
}
void Assembler::uxtb(Register dst,
const Operand& src,
Condition cond) {
// Instruction details available in ARM DDI 0406C.b, A8.8.274.
// cond(31-28) | 01101110(27-20) | 1111(19-16) |
// Rd(15-12) | rotate(11-10) | 00(9-8)| 0111(7-4) | Rm(3-0)
ASSERT(!dst.is(pc));
ASSERT(!src.rm().is(pc));
ASSERT(!src.rm().is(no_reg));
ASSERT(src.rs().is(no_reg));
ASSERT((src.shift_imm_ == 0) ||
(src.shift_imm_ == 8) ||
(src.shift_imm_ == 16) ||
(src.shift_imm_ == 24));
ASSERT(src.shift_op() == ROR);
emit(cond | 0x6E*B20 | 0xF*B16 | dst.code()*B12 |
((src.shift_imm_ >> 1)&0xC)*B8 | 7*B4 | src.rm().code());
}
void Assembler::uxtab(Register dst,
Register src1,
const Operand& src2,
Condition cond) {
// Instruction details available in ARM DDI 0406C.b, A8.8.271.
// cond(31-28) | 01101110(27-20) | Rn(19-16) |
// Rd(15-12) | rotate(11-10) | 00(9-8)| 0111(7-4) | Rm(3-0)
ASSERT(!dst.is(pc));
ASSERT(!src1.is(pc));
ASSERT(!src2.rm().is(pc));
ASSERT(!src2.rm().is(no_reg));
ASSERT(src2.rs().is(no_reg));
ASSERT((src2.shift_imm_ == 0) ||
(src2.shift_imm_ == 8) ||
(src2.shift_imm_ == 16) ||
(src2.shift_imm_ == 24));
ASSERT(src2.shift_op() == ROR);
emit(cond | 0x6E*B20 | src1.code()*B16 | dst.code()*B12 |
((src2.shift_imm_ >> 1) &0xC)*B8 | 7*B4 | src2.rm().code());
}
void Assembler::uxtb16(Register dst,
const Operand& src,
Condition cond) {
// Instruction details available in ARM DDI 0406C.b, A8.8.275.
// cond(31-28) | 01101100(27-20) | 1111(19-16) |
// Rd(15-12) | rotate(11-10) | 00(9-8)| 0111(7-4) | Rm(3-0)
ASSERT(!dst.is(pc));
ASSERT(!src.rm().is(pc));
ASSERT(!src.rm().is(no_reg));
ASSERT(src.rs().is(no_reg));
ASSERT((src.shift_imm_ == 0) ||
(src.shift_imm_ == 8) ||
(src.shift_imm_ == 16) ||
(src.shift_imm_ == 24));
ASSERT(src.shift_op() == ROR);
emit(cond | 0x6C*B20 | 0xF*B16 | dst.code()*B12 |
((src.shift_imm_ >> 1)&0xC)*B8 | 7*B4 | src.rm().code());
}
// Status register access instructions.
void Assembler::mrs(Register dst, SRegister s, Condition cond) {
ASSERT(!dst.is(pc));
@ -1644,6 +1820,25 @@ void Assembler::strd(Register src1, Register src2,
}
// Preload instructions.
void Assembler::pld(const MemOperand& address) {
// Instruction details available in ARM DDI 0406C.b, A8.8.128.
// 1111(31-28) | 0111(27-24) | U(23) | R(22) | 01(21-20) | Rn(19-16) |
// 1111(15-12) | imm5(11-07) | type(6-5) | 0(4)| Rm(3-0) |
ASSERT(address.rm().is(no_reg));
ASSERT(address.am() == Offset);
int U = B23;
int offset = address.offset();
if (offset < 0) {
offset = -offset;
U = 0;
}
ASSERT(offset < 4096);
emit(kSpecialCondition | B26 | B24 | U | B22 | B20 | address.rn().code()*B16 |
0xf*B12 | offset);
}
// Load/Store multiple instructions.
void Assembler::ldm(BlockAddrMode am,
Register base,
@ -2707,6 +2902,50 @@ void Assembler::vsqrt(const DwVfpRegister dst,
}
// Support for NEON.
void Assembler::vld1(NeonSize size,
const NeonListOperand& dst,
const NeonMemOperand& src) {
// Instruction details available in ARM DDI 0406C.b, A8.8.320.
// 1111(31-28) | 01000(27-23) | D(22) | 10(21-20) | Rn(19-16) |
// Vd(15-12) | type(11-8) | size(7-6) | align(5-4) | Rm(3-0)
ASSERT(CpuFeatures::IsSupported(NEON));
int vd, d;
dst.base().split_code(&vd, &d);
emit(0xFU*B28 | 4*B24 | d*B22 | 2*B20 | src.rn().code()*B16 | vd*B12 |
dst.type()*B8 | size*B6 | src.align()*B4 | src.rm().code());
}
void Assembler::vst1(NeonSize size,
const NeonListOperand& src,
const NeonMemOperand& dst) {
// Instruction details available in ARM DDI 0406C.b, A8.8.404.
// 1111(31-28) | 01000(27-23) | D(22) | 00(21-20) | Rn(19-16) |
// Vd(15-12) | type(11-8) | size(7-6) | align(5-4) | Rm(3-0)
ASSERT(CpuFeatures::IsSupported(NEON));
int vd, d;
src.base().split_code(&vd, &d);
emit(0xFU*B28 | 4*B24 | d*B22 | dst.rn().code()*B16 | vd*B12 | src.type()*B8 |
size*B6 | dst.align()*B4 | dst.rm().code());
}
void Assembler::vmovl(NeonDataType dt, QwNeonRegister dst, DwVfpRegister src) {
// Instruction details available in ARM DDI 0406C.b, A8.8.346.
// 1111(31-28) | 001(27-25) | U(24) | 1(23) | D(22) | imm3(21-19) |
// 000(18-16) | Vd(15-12) | 101000(11-6) | M(5) | 1(4) | Vm(3-0)
ASSERT(CpuFeatures::IsSupported(NEON));
int vd, d;
dst.split_code(&vd, &d);
int vm, m;
src.split_code(&vm, &m);
emit(0xFU*B28 | B25 | (dt & NeonDataTypeUMask) | B23 | d*B22 |
(dt & NeonDataTypeSizeMask)*B19 | vd*B12 | 0xA*B8 | m*B5 | B4 | vm);
}
// Pseudo instructions.
void Assembler::nop(int type) {
// ARMv6{K/T2} and v7 have an actual NOP instruction but it serializes

View File

@ -78,12 +78,15 @@ class CpuFeatures : public AllStatic {
(!Serializer::enabled() || !IsFoundByRuntimeProbingOnly(f)));
}
static unsigned cache_line_size() { return cache_line_size_; }
private:
#ifdef DEBUG
static bool initialized_;
#endif
static unsigned supported_;
static unsigned found_by_runtime_probing_only_;
static unsigned cache_line_size_;
friend class ExternalReference;
DISALLOW_COPY_AND_ASSIGN(CpuFeatures);
@ -301,6 +304,36 @@ struct DwVfpRegister {
typedef DwVfpRegister DoubleRegister;
// Quad word NEON register.
struct QwNeonRegister {
static const int kMaxNumRegisters = 16;
static QwNeonRegister from_code(int code) {
QwNeonRegister r = { code };
return r;
}
bool is_valid() const {
return (0 <= code_) && (code_ < kMaxNumRegisters);
}
bool is(QwNeonRegister reg) const { return code_ == reg.code_; }
int code() const {
ASSERT(is_valid());
return code_;
}
void split_code(int* vm, int* m) const {
ASSERT(is_valid());
*m = (code_ & 0x10) >> 4;
*vm = code_ & 0x0F;
}
int code_;
};
typedef QwNeonRegister QuadRegister;
// Support for the VFP registers s0 to s31 (d0 to d15).
// Note that "s(N):s(N+1)" is the same as "d(N/2)".
const SwVfpRegister s0 = { 0 };
@ -370,6 +403,23 @@ const DwVfpRegister d29 = { 29 };
const DwVfpRegister d30 = { 30 };
const DwVfpRegister d31 = { 31 };
const QwNeonRegister q0 = { 0 };
const QwNeonRegister q1 = { 1 };
const QwNeonRegister q2 = { 2 };
const QwNeonRegister q3 = { 3 };
const QwNeonRegister q4 = { 4 };
const QwNeonRegister q5 = { 5 };
const QwNeonRegister q6 = { 6 };
const QwNeonRegister q7 = { 7 };
const QwNeonRegister q8 = { 8 };
const QwNeonRegister q9 = { 9 };
const QwNeonRegister q10 = { 10 };
const QwNeonRegister q11 = { 11 };
const QwNeonRegister q12 = { 12 };
const QwNeonRegister q13 = { 13 };
const QwNeonRegister q14 = { 14 };
const QwNeonRegister q15 = { 15 };
// Aliases for double registers. Defined using #define instead of
// "static const DwVfpRegister&" because Clang complains otherwise when a
// compilation unit that includes this header doesn't use the variables.
@ -562,6 +612,42 @@ class MemOperand BASE_EMBEDDED {
friend class Assembler;
};
// Class NeonMemOperand represents a memory operand in load and
// store NEON instructions
class NeonMemOperand BASE_EMBEDDED {
public:
// [rn {:align}] Offset
// [rn {:align}]! PostIndex
explicit NeonMemOperand(Register rn, AddrMode am = Offset, int align = 0);
// [rn {:align}], rm PostIndex
explicit NeonMemOperand(Register rn, Register rm, int align = 0);
Register rn() const { return rn_; }
Register rm() const { return rm_; }
int align() const { return align_; }
private:
void SetAlignment(int align);
Register rn_; // base
Register rm_; // register increment
int align_;
};
// Class NeonListOperand represents a list of NEON registers
class NeonListOperand BASE_EMBEDDED {
public:
explicit NeonListOperand(DoubleRegister base, int registers_count = 1);
DoubleRegister base() const { return base_; }
NeonListType type() const { return type_; }
private:
DoubleRegister base_;
NeonListType type_;
};
extern const Instr kMovLrPc;
extern const Instr kLdrPCMask;
extern const Instr kLdrPCPattern;
@ -866,6 +952,19 @@ class Assembler : public AssemblerBase {
void bfi(Register dst, Register src, int lsb, int width,
Condition cond = al);
void pkhbt(Register dst, Register src1, const Operand& src2,
Condition cond = al);
void pkhtb(Register dst, Register src1, const Operand& src2,
Condition cond = al);
void uxtb(Register dst, const Operand& src, Condition cond = al);
void uxtab(Register dst, Register src1, const Operand& src2,
Condition cond = al);
void uxtb16(Register dst, const Operand& src, Condition cond = al);
// Status register access instructions
void mrs(Register dst, SRegister s, Condition cond = al);
@ -887,6 +986,9 @@ class Assembler : public AssemblerBase {
Register src2,
const MemOperand& dst, Condition cond = al);
// Preload instructions
void pld(const MemOperand& address);
// Load/Store multiple instructions
void ldm(BlockAddrMode am, Register base, RegList dst, Condition cond = al);
void stm(BlockAddrMode am, Register base, RegList src, Condition cond = al);
@ -1097,6 +1199,17 @@ class Assembler : public AssemblerBase {
const DwVfpRegister src,
const Condition cond = al);
// Support for NEON.
// All these APIs support D0 to D31 and Q0 to Q15.
void vld1(NeonSize size,
const NeonListOperand& dst,
const NeonMemOperand& src);
void vst1(NeonSize size,
const NeonListOperand& src,
const NeonMemOperand& dst);
void vmovl(NeonDataType dt, QwNeonRegister dst, DwVfpRegister src);
// Pseudo instructions
// Different nop operations are used by the code generator to detect certain

View File

@ -112,6 +112,252 @@ UnaryMathFunction CreateExpFunction() {
#endif
}
#if defined(V8_HOST_ARCH_ARM)
OS::MemCopyUint8Function CreateMemCopyUint8Function(
OS::MemCopyUint8Function stub) {
#if defined(USE_SIMULATOR)
return stub;
#else
if (Serializer::enabled() || !CpuFeatures::IsSupported(UNALIGNED_ACCESSES)) {
return stub;
}
size_t actual_size;
byte* buffer = static_cast<byte*>(OS::Allocate(1 * KB, &actual_size, true));
if (buffer == NULL) return stub;
MacroAssembler masm(NULL, buffer, static_cast<int>(actual_size));
Register dest = r0;
Register src = r1;
Register chars = r2;
Register temp1 = r3;
Label less_4;
if (CpuFeatures::IsSupported(NEON)) {
Label loop, less_256, less_128, less_64, less_32, _16_or_less, _8_or_less;
Label size_less_than_8;
__ pld(MemOperand(src, 0));
__ cmp(chars, Operand(8));
__ b(lt, &size_less_than_8);
__ cmp(chars, Operand(32));
__ b(lt, &less_32);
if (CpuFeatures::cache_line_size() == 32) {
__ pld(MemOperand(src, 32));
}
__ cmp(chars, Operand(64));
__ b(lt, &less_64);
__ pld(MemOperand(src, 64));
if (CpuFeatures::cache_line_size() == 32) {
__ pld(MemOperand(src, 96));
}
__ cmp(chars, Operand(128));
__ b(lt, &less_128);
__ pld(MemOperand(src, 128));
if (CpuFeatures::cache_line_size() == 32) {
__ pld(MemOperand(src, 160));
}
__ pld(MemOperand(src, 192));
if (CpuFeatures::cache_line_size() == 32) {
__ pld(MemOperand(src, 224));
}
__ cmp(chars, Operand(256));
__ b(lt, &less_256);
__ sub(chars, chars, Operand(256));
__ bind(&loop);
__ pld(MemOperand(src, 256));
__ vld1(Neon8, NeonListOperand(d0, 4), NeonMemOperand(src, PostIndex));
if (CpuFeatures::cache_line_size() == 32) {
__ pld(MemOperand(src, 256));
}
__ vld1(Neon8, NeonListOperand(d4, 4), NeonMemOperand(src, PostIndex));
__ sub(chars, chars, Operand(64), SetCC);
__ vst1(Neon8, NeonListOperand(d0, 4), NeonMemOperand(dest, PostIndex));
__ vst1(Neon8, NeonListOperand(d4, 4), NeonMemOperand(dest, PostIndex));
__ b(ge, &loop);
__ add(chars, chars, Operand(256));
__ bind(&less_256);
__ vld1(Neon8, NeonListOperand(d0, 4), NeonMemOperand(src, PostIndex));
__ vld1(Neon8, NeonListOperand(d4, 4), NeonMemOperand(src, PostIndex));
__ sub(chars, chars, Operand(128));
__ vst1(Neon8, NeonListOperand(d0, 4), NeonMemOperand(dest, PostIndex));
__ vst1(Neon8, NeonListOperand(d4, 4), NeonMemOperand(dest, PostIndex));
__ vld1(Neon8, NeonListOperand(d0, 4), NeonMemOperand(src, PostIndex));
__ vld1(Neon8, NeonListOperand(d4, 4), NeonMemOperand(src, PostIndex));
__ vst1(Neon8, NeonListOperand(d0, 4), NeonMemOperand(dest, PostIndex));
__ vst1(Neon8, NeonListOperand(d4, 4), NeonMemOperand(dest, PostIndex));
__ cmp(chars, Operand(64));
__ b(lt, &less_64);
__ bind(&less_128);
__ vld1(Neon8, NeonListOperand(d0, 4), NeonMemOperand(src, PostIndex));
__ vld1(Neon8, NeonListOperand(d4, 4), NeonMemOperand(src, PostIndex));
__ sub(chars, chars, Operand(64));
__ vst1(Neon8, NeonListOperand(d0, 4), NeonMemOperand(dest, PostIndex));
__ vst1(Neon8, NeonListOperand(d4, 4), NeonMemOperand(dest, PostIndex));
__ bind(&less_64);
__ cmp(chars, Operand(32));
__ b(lt, &less_32);
__ vld1(Neon8, NeonListOperand(d0, 4), NeonMemOperand(src, PostIndex));
__ vst1(Neon8, NeonListOperand(d0, 4), NeonMemOperand(dest, PostIndex));
__ sub(chars, chars, Operand(32));
__ bind(&less_32);
__ cmp(chars, Operand(16));
__ b(le, &_16_or_less);
__ vld1(Neon8, NeonListOperand(d0, 2), NeonMemOperand(src, PostIndex));
__ vst1(Neon8, NeonListOperand(d0, 2), NeonMemOperand(dest, PostIndex));
__ sub(chars, chars, Operand(16));
__ bind(&_16_or_less);
__ cmp(chars, Operand(8));
__ b(le, &_8_or_less);
__ vld1(Neon8, NeonListOperand(d0), NeonMemOperand(src, PostIndex));
__ vst1(Neon8, NeonListOperand(d0), NeonMemOperand(dest, PostIndex));
__ sub(chars, chars, Operand(8));
// Do a last copy which may overlap with the previous copy (up to 8 bytes).
__ bind(&_8_or_less);
__ rsb(chars, chars, Operand(8));
__ sub(src, src, Operand(chars));
__ sub(dest, dest, Operand(chars));
__ vld1(Neon8, NeonListOperand(d0), NeonMemOperand(src));
__ vst1(Neon8, NeonListOperand(d0), NeonMemOperand(dest));
__ Ret();
__ bind(&size_less_than_8);
__ bic(temp1, chars, Operand(0x3), SetCC);
__ b(&less_4, eq);
__ ldr(temp1, MemOperand(src, 4, PostIndex));
__ str(temp1, MemOperand(dest, 4, PostIndex));
} else {
Register temp2 = ip;
Label loop;
__ bic(temp2, chars, Operand(0x3), SetCC);
__ b(&less_4, eq);
__ add(temp2, dest, temp2);
__ bind(&loop);
__ ldr(temp1, MemOperand(src, 4, PostIndex));
__ str(temp1, MemOperand(dest, 4, PostIndex));
__ cmp(dest, temp2);
__ b(&loop, ne);
}
__ bind(&less_4);
__ mov(chars, Operand(chars, LSL, 31), SetCC);
// bit0 => Z (ne), bit1 => C (cs)
__ ldrh(temp1, MemOperand(src, 2, PostIndex), cs);
__ strh(temp1, MemOperand(dest, 2, PostIndex), cs);
__ ldrb(temp1, MemOperand(src), ne);
__ strb(temp1, MemOperand(dest), ne);
__ Ret();
CodeDesc desc;
masm.GetCode(&desc);
ASSERT(!RelocInfo::RequiresRelocation(desc));
CPU::FlushICache(buffer, actual_size);
OS::ProtectCode(buffer, actual_size);
return FUNCTION_CAST<OS::MemCopyUint8Function>(buffer);
#endif
}
// Convert 8 to 16. The number of character to copy must be at least 8.
OS::MemCopyUint16Uint8Function CreateMemCopyUint16Uint8Function(
OS::MemCopyUint16Uint8Function stub) {
#if defined(USE_SIMULATOR)
return stub;
#else
if (Serializer::enabled() || !CpuFeatures::IsSupported(UNALIGNED_ACCESSES)) {
return stub;
}
size_t actual_size;
byte* buffer = static_cast<byte*>(OS::Allocate(1 * KB, &actual_size, true));
if (buffer == NULL) return stub;
MacroAssembler masm(NULL, buffer, static_cast<int>(actual_size));
Register dest = r0;
Register src = r1;
Register chars = r2;
if (CpuFeatures::IsSupported(NEON)) {
Register temp = r3;
Label loop;
__ bic(temp, chars, Operand(0x7));
__ sub(chars, chars, Operand(temp));
__ add(temp, dest, Operand(temp, LSL, 1));
__ bind(&loop);
__ vld1(Neon8, NeonListOperand(d0), NeonMemOperand(src, PostIndex));
__ vmovl(NeonU8, q0, d0);
__ vst1(Neon16, NeonListOperand(d0, 2), NeonMemOperand(dest, PostIndex));
__ cmp(dest, temp);
__ b(&loop, ne);
// Do a last copy which will overlap with the previous copy (1 to 8 bytes).
__ rsb(chars, chars, Operand(8));
__ sub(src, src, Operand(chars));
__ sub(dest, dest, Operand(chars, LSL, 1));
__ vld1(Neon8, NeonListOperand(d0), NeonMemOperand(src));
__ vmovl(NeonU8, q0, d0);
__ vst1(Neon16, NeonListOperand(d0, 2), NeonMemOperand(dest));
__ Ret();
} else {
Register temp1 = r3;
Register temp2 = ip;
Register temp3 = lr;
Register temp4 = r4;
Label loop;
Label not_two;
__ Push(lr, r4);
__ bic(temp2, chars, Operand(0x3));
__ add(temp2, dest, Operand(temp2, LSL, 1));
__ bind(&loop);
__ ldr(temp1, MemOperand(src, 4, PostIndex));
__ uxtb16(temp3, Operand(temp1, ROR, 0));
__ uxtb16(temp4, Operand(temp1, ROR, 8));
__ pkhbt(temp1, temp3, Operand(temp4, LSL, 16));
__ str(temp1, MemOperand(dest));
__ pkhtb(temp1, temp4, Operand(temp3, ASR, 16));
__ str(temp1, MemOperand(dest, 4));
__ add(dest, dest, Operand(8));
__ cmp(dest, temp2);
__ b(&loop, ne);
__ mov(chars, Operand(chars, LSL, 31), SetCC); // bit0 => ne, bit1 => cs
__ b(&not_two, cc);
__ ldrh(temp1, MemOperand(src, 2, PostIndex));
__ uxtb(temp3, Operand(temp1, ROR, 8));
__ mov(temp3, Operand(temp3, LSL, 16));
__ uxtab(temp3, temp3, Operand(temp1, ROR, 0));
__ str(temp3, MemOperand(dest, 4, PostIndex));
__ bind(&not_two);
__ ldrb(temp1, MemOperand(src), ne);
__ strh(temp1, MemOperand(dest), ne);
__ Pop(pc, r4);
}
CodeDesc desc;
masm.GetCode(&desc);
CPU::FlushICache(buffer, actual_size);
OS::ProtectCode(buffer, actual_size);
return FUNCTION_CAST<OS::MemCopyUint16Uint8Function>(buffer);
#endif
}
#endif
#undef __

View File

@ -33,22 +33,6 @@
#error ARM EABI support is required.
#endif
#if defined(__ARM_ARCH_7A__) || \
defined(__ARM_ARCH_7R__) || \
defined(__ARM_ARCH_7__)
# define CAN_USE_ARMV7_INSTRUCTIONS 1
#ifndef CAN_USE_VFP3_INSTRUCTIONS
# define CAN_USE_VFP3_INSTRUCTIONS
#endif
#endif
// Simulator should support unaligned access by default.
#if !defined(__arm__)
# ifndef CAN_USE_UNALIGNED_ACCESSES
# define CAN_USE_UNALIGNED_ACCESSES 1
# endif
#endif
namespace v8 {
namespace internal {
@ -331,6 +315,32 @@ enum LFlag {
};
// NEON data type
enum NeonDataType {
NeonS8 = 0x1, // U = 0, imm3 = 0b001
NeonS16 = 0x2, // U = 0, imm3 = 0b010
NeonS32 = 0x4, // U = 0, imm3 = 0b100
NeonU8 = 1 << 24 | 0x1, // U = 1, imm3 = 0b001
NeonU16 = 1 << 24 | 0x2, // U = 1, imm3 = 0b010
NeonU32 = 1 << 24 | 0x4, // U = 1, imm3 = 0b100
NeonDataTypeSizeMask = 0x7,
NeonDataTypeUMask = 1 << 24
};
enum NeonListType {
nlt_1 = 0x7,
nlt_2 = 0xA,
nlt_3 = 0x6,
nlt_4 = 0x2
};
enum NeonSize {
Neon8 = 0x0,
Neon16 = 0x1,
Neon32 = 0x2,
Neon64 = 0x4
};
// -----------------------------------------------------------------------------
// Supervisor Call (svc) specific support.
@ -573,6 +583,7 @@ class Instruction {
DECLARE_STATIC_TYPED_ACCESSOR(Condition, ConditionField);
inline int TypeValue() const { return Bits(27, 25); }
inline int SpecialValue() const { return Bits(27, 23); }
inline int RnValue() const { return Bits(19, 16); }
DECLARE_STATIC_ACCESSOR(RnValue);

View File

@ -113,6 +113,8 @@ class Decoder {
// Handle formatting of instructions and their options.
int FormatRegister(Instruction* instr, const char* option);
void FormatNeonList(int Vd, int type);
void FormatNeonMemory(int Rn, int align, int Rm);
int FormatOption(Instruction* instr, const char* option);
void Format(Instruction* instr, const char* format);
void Unknown(Instruction* instr);
@ -133,6 +135,8 @@ class Decoder {
void DecodeTypeVFP(Instruction* instr);
void DecodeType6CoprocessorIns(Instruction* instr);
void DecodeSpecialCondition(Instruction* instr);
void DecodeVMOVBetweenCoreAndSinglePrecisionRegisters(Instruction* instr);
void DecodeVCMP(Instruction* instr);
void DecodeVCVTBetweenDoubleAndSingle(Instruction* instr);
@ -419,6 +423,41 @@ int Decoder::FormatVFPinstruction(Instruction* instr, const char* format) {
}
void Decoder::FormatNeonList(int Vd, int type) {
if (type == nlt_1) {
out_buffer_pos_ += OS::SNPrintF(out_buffer_ + out_buffer_pos_,
"{d%d}", Vd);
} else if (type == nlt_2) {
out_buffer_pos_ += OS::SNPrintF(out_buffer_ + out_buffer_pos_,
"{d%d, d%d}", Vd, Vd + 1);
} else if (type == nlt_3) {
out_buffer_pos_ += OS::SNPrintF(out_buffer_ + out_buffer_pos_,
"{d%d, d%d, d%d}", Vd, Vd + 1, Vd + 2);
} else if (type == nlt_4) {
out_buffer_pos_ += OS::SNPrintF(out_buffer_ + out_buffer_pos_,
"{d%d, d%d, d%d, d%d}", Vd, Vd + 1, Vd + 2, Vd + 3);
}
}
void Decoder::FormatNeonMemory(int Rn, int align, int Rm) {
out_buffer_pos_ += OS::SNPrintF(out_buffer_ + out_buffer_pos_,
"[r%d", Rn);
if (align != 0) {
out_buffer_pos_ += OS::SNPrintF(out_buffer_ + out_buffer_pos_,
":%d", (1 << align) << 6);
}
if (Rm == 15) {
Print("]");
} else if (Rm == 13) {
Print("]!");
} else {
out_buffer_pos_ += OS::SNPrintF(out_buffer_ + out_buffer_pos_,
"], r%d", Rm);
}
}
// Print the movw or movt instruction.
void Decoder::PrintMovwMovt(Instruction* instr) {
int imm = instr->ImmedMovwMovtValue();
@ -982,15 +1021,107 @@ void Decoder::DecodeType3(Instruction* instr) {
break;
}
case ia_x: {
if (instr->HasW()) {
VERIFY(instr->Bits(5, 4) == 0x1);
if (instr->Bit(22) == 0x1) {
Format(instr, "usat 'rd, #'imm05@16, 'rm'shift_sat");
} else {
UNREACHABLE(); // SSAT.
}
} else {
if (instr->Bit(4) == 0) {
Format(instr, "'memop'cond'b 'rd, ['rn], +'shift_rm");
} else {
if (instr->Bit(5) == 0) {
switch (instr->Bits(22, 21)) {
case 0:
if (instr->Bit(20) == 0) {
if (instr->Bit(6) == 0) {
Format(instr, "pkhbt'cond 'rd, 'rn, 'rm, lsl #'imm05@07");
} else {
if (instr->Bits(11, 7) == 0) {
Format(instr, "pkhtb'cond 'rd, 'rn, 'rm, asr #32");
} else {
Format(instr, "pkhtb'cond 'rd, 'rn, 'rm, asr #'imm05@07");
}
}
} else {
UNREACHABLE();
}
break;
case 1:
UNREACHABLE();
break;
case 2:
UNREACHABLE();
break;
case 3:
Format(instr, "usat 'rd, #'imm05@16, 'rm'shift_sat");
break;
}
} else {
switch (instr->Bits(22, 21)) {
case 0:
UNREACHABLE();
break;
case 1:
UNREACHABLE();
break;
case 2:
if ((instr->Bit(20) == 0) && (instr->Bits(9, 6) == 1)) {
if (instr->Bits(19, 16) == 0xF) {
switch (instr->Bits(11, 10)) {
case 0:
Format(instr, "uxtb16'cond 'rd, 'rm, ror #0");
break;
case 1:
Format(instr, "uxtb16'cond 'rd, 'rm, ror #8");
break;
case 2:
Format(instr, "uxtb16'cond 'rd, 'rm, ror #16");
break;
case 3:
Format(instr, "uxtb16'cond 'rd, 'rm, ror #24");
break;
}
} else {
UNREACHABLE();
}
} else {
UNREACHABLE();
}
break;
case 3:
if ((instr->Bit(20) == 0) && (instr->Bits(9, 6) == 1)) {
if (instr->Bits(19, 16) == 0xF) {
switch (instr->Bits(11, 10)) {
case 0:
Format(instr, "uxtb'cond 'rd, 'rm, ror #0");
break;
case 1:
Format(instr, "uxtb'cond 'rd, 'rm, ror #8");
break;
case 2:
Format(instr, "uxtb'cond 'rd, 'rm, ror #16");
break;
case 3:
Format(instr, "uxtb'cond 'rd, 'rm, ror #24");
break;
}
} else {
switch (instr->Bits(11, 10)) {
case 0:
Format(instr, "uxtab'cond 'rd, 'rn, 'rm, ror #0");
break;
case 1:
Format(instr, "uxtab'cond 'rd, 'rn, 'rm, ror #8");
break;
case 2:
Format(instr, "uxtab'cond 'rd, 'rn, 'rm, ror #16");
break;
case 3:
Format(instr, "uxtab'cond 'rd, 'rn, 'rm, ror #24");
break;
}
}
} else {
UNREACHABLE();
}
break;
}
}
}
break;
}
@ -1423,6 +1554,91 @@ void Decoder::DecodeType6CoprocessorIns(Instruction* instr) {
}
}
void Decoder::DecodeSpecialCondition(Instruction* instr) {
switch (instr->SpecialValue()) {
case 5:
if ((instr->Bits(18, 16) == 0) && (instr->Bits(11, 6) == 0x28) &&
(instr->Bit(4) == 1)) {
// vmovl signed
int Vd = (instr->Bit(22) << 4) | instr->VdValue();
int Vm = (instr->Bit(5) << 4) | instr->VmValue();
int imm3 = instr->Bits(21, 19);
out_buffer_pos_ += OS::SNPrintF(out_buffer_ + out_buffer_pos_,
"vmovl.s%d q%d, d%d", imm3*8, Vd, Vm);
} else {
Unknown(instr);
}
break;
case 7:
if ((instr->Bits(18, 16) == 0) && (instr->Bits(11, 6) == 0x28) &&
(instr->Bit(4) == 1)) {
// vmovl unsigned
int Vd = (instr->Bit(22) << 4) | instr->VdValue();
int Vm = (instr->Bit(5) << 4) | instr->VmValue();
int imm3 = instr->Bits(21, 19);
out_buffer_pos_ += OS::SNPrintF(out_buffer_ + out_buffer_pos_,
"vmovl.u%d q%d, d%d", imm3*8, Vd, Vm);
} else {
Unknown(instr);
}
break;
case 8:
if (instr->Bits(21, 20) == 0) {
// vst1
int Vd = (instr->Bit(22) << 4) | instr->VdValue();
int Rn = instr->VnValue();
int type = instr->Bits(11, 8);
int size = instr->Bits(7, 6);
int align = instr->Bits(5, 4);
int Rm = instr->VmValue();
out_buffer_pos_ += OS::SNPrintF(out_buffer_ + out_buffer_pos_,
"vst1.%d ", (1 << size) << 3);
FormatNeonList(Vd, type);
Print(", ");
FormatNeonMemory(Rn, align, Rm);
} else if (instr->Bits(21, 20) == 2) {
// vld1
int Vd = (instr->Bit(22) << 4) | instr->VdValue();
int Rn = instr->VnValue();
int type = instr->Bits(11, 8);
int size = instr->Bits(7, 6);
int align = instr->Bits(5, 4);
int Rm = instr->VmValue();
out_buffer_pos_ += OS::SNPrintF(out_buffer_ + out_buffer_pos_,
"vld1.%d ", (1 << size) << 3);
FormatNeonList(Vd, type);
Print(", ");
FormatNeonMemory(Rn, align, Rm);
} else {
Unknown(instr);
}
break;
case 0xA:
case 0xB:
if ((instr->Bits(22, 20) == 5) && (instr->Bits(15, 12) == 0xf)) {
int Rn = instr->Bits(19, 16);
int offset = instr->Bits(11, 0);
if (offset == 0) {
out_buffer_pos_ += OS::SNPrintF(out_buffer_ + out_buffer_pos_,
"pld [r%d]", Rn);
} else if (instr->Bit(23) == 0) {
out_buffer_pos_ += OS::SNPrintF(out_buffer_ + out_buffer_pos_,
"pld [r%d, #-%d]", Rn, offset);
} else {
out_buffer_pos_ += OS::SNPrintF(out_buffer_ + out_buffer_pos_,
"pld [r%d, #+%d]", Rn, offset);
}
} else {
Unknown(instr);
}
break;
default:
Unknown(instr);
break;
}
}
#undef VERIFIY
bool Decoder::IsConstantPoolAt(byte* instr_ptr) {
@ -1449,7 +1665,7 @@ int Decoder::InstructionDecode(byte* instr_ptr) {
"%08x ",
instr->InstructionBits());
if (instr->ConditionField() == kSpecialCondition) {
Unknown(instr);
DecodeSpecialCondition(instr);
return Instruction::kInstrSize;
}
int instruction_bits = *(reinterpret_cast<int*>(instr_ptr));

View File

@ -919,6 +919,54 @@ void Simulator::set_dw_register(int dreg, const int* dbl) {
}
void Simulator::get_d_register(int dreg, uint64_t* value) {
ASSERT((dreg >= 0) && (dreg < DwVfpRegister::NumRegisters()));
memcpy(value, vfp_registers_ + dreg * 2, sizeof(*value));
}
void Simulator::set_d_register(int dreg, const uint64_t* value) {
ASSERT((dreg >= 0) && (dreg < DwVfpRegister::NumRegisters()));
memcpy(vfp_registers_ + dreg * 2, value, sizeof(*value));
}
void Simulator::get_d_register(int dreg, uint32_t* value) {
ASSERT((dreg >= 0) && (dreg < DwVfpRegister::NumRegisters()));
memcpy(value, vfp_registers_ + dreg * 2, sizeof(*value) * 2);
}
void Simulator::set_d_register(int dreg, const uint32_t* value) {
ASSERT((dreg >= 0) && (dreg < DwVfpRegister::NumRegisters()));
memcpy(vfp_registers_ + dreg * 2, value, sizeof(*value) * 2);
}
void Simulator::get_q_register(int qreg, uint64_t* value) {
ASSERT((qreg >= 0) && (qreg < num_q_registers));
memcpy(value, vfp_registers_ + qreg * 4, sizeof(*value) * 2);
}
void Simulator::set_q_register(int qreg, const uint64_t* value) {
ASSERT((qreg >= 0) && (qreg < num_q_registers));
memcpy(vfp_registers_ + qreg * 4, value, sizeof(*value) * 2);
}
void Simulator::get_q_register(int qreg, uint32_t* value) {
ASSERT((qreg >= 0) && (qreg < num_q_registers));
memcpy(value, vfp_registers_ + qreg * 4, sizeof(*value) * 4);
}
void Simulator::set_q_register(int qreg, const uint32_t* value) {
ASSERT((qreg >= 0) && (qreg < num_q_registers));
memcpy(vfp_registers_ + qreg * 4, value, sizeof(*value) * 4);
}
// Raw access to the PC register.
void Simulator::set_pc(int32_t value) {
pc_modified_ = true;
@ -2599,36 +2647,148 @@ void Simulator::DecodeType3(Instruction* instr) {
break;
}
case ia_x: {
if (instr->HasW()) {
ASSERT(instr->Bits(5, 4) == 0x1);
if (instr->Bit(22) == 0x1) { // USAT.
int32_t sat_pos = instr->Bits(20, 16);
int32_t sat_val = (1 << sat_pos) - 1;
int32_t shift = instr->Bits(11, 7);
int32_t shift_type = instr->Bit(6);
int32_t rm_val = get_register(instr->RmValue());
if (shift_type == 0) { // LSL
rm_val <<= shift;
} else { // ASR
rm_val >>= shift;
if (instr->Bit(4) == 0) {
// Memop.
} else {
if (instr->Bit(5) == 0) {
switch (instr->Bits(22, 21)) {
case 0:
if (instr->Bit(20) == 0) {
if (instr->Bit(6) == 0) {
// Pkhbt.
uint32_t rn_val = get_register(rn);
uint32_t rm_val = get_register(instr->RmValue());
int32_t shift = instr->Bits(11, 7);
rm_val <<= shift;
set_register(rd, (rn_val & 0xFFFF) | (rm_val & 0xFFFF0000U));
} else {
// Pkhtb.
uint32_t rn_val = get_register(rn);
int32_t rm_val = get_register(instr->RmValue());
int32_t shift = instr->Bits(11, 7);
if (shift == 0) {
shift = 32;
}
rm_val >>= shift;
set_register(rd, (rn_val & 0xFFFF0000U) | (rm_val & 0xFFFF));
}
} else {
UNIMPLEMENTED();
}
break;
case 1:
UNIMPLEMENTED();
break;
case 2:
UNIMPLEMENTED();
break;
case 3: {
// Usat.
int32_t sat_pos = instr->Bits(20, 16);
int32_t sat_val = (1 << sat_pos) - 1;
int32_t shift = instr->Bits(11, 7);
int32_t shift_type = instr->Bit(6);
int32_t rm_val = get_register(instr->RmValue());
if (shift_type == 0) { // LSL
rm_val <<= shift;
} else { // ASR
rm_val >>= shift;
}
// If saturation occurs, the Q flag should be set in the CPSR.
// There is no Q flag yet, and no instruction (MRS) to read the
// CPSR directly.
if (rm_val > sat_val) {
rm_val = sat_val;
} else if (rm_val < 0) {
rm_val = 0;
}
set_register(rd, rm_val);
break;
}
}
// If saturation occurs, the Q flag should be set in the CPSR.
// There is no Q flag yet, and no instruction (MRS) to read the
// CPSR directly.
if (rm_val > sat_val) {
rm_val = sat_val;
} else if (rm_val < 0) {
rm_val = 0;
} else {
switch (instr->Bits(22, 21)) {
case 0:
UNIMPLEMENTED();
break;
case 1:
UNIMPLEMENTED();
break;
case 2:
if ((instr->Bit(20) == 0) && (instr->Bits(9, 6) == 1)) {
if (instr->Bits(19, 16) == 0xF) {
// Uxtb16.
uint32_t rm_val = get_register(instr->RmValue());
int32_t rotate = instr->Bits(11, 10);
switch (rotate) {
case 0:
break;
case 1:
rm_val = (rm_val >> 8) | (rm_val << 24);
break;
case 2:
rm_val = (rm_val >> 16) | (rm_val << 16);
break;
case 3:
rm_val = (rm_val >> 24) | (rm_val << 8);
break;
}
set_register(rd,
(rm_val & 0xFF) | (rm_val & 0xFF0000));
} else {
UNIMPLEMENTED();
}
} else {
UNIMPLEMENTED();
}
break;
case 3:
if ((instr->Bit(20) == 0) && (instr->Bits(9, 6) == 1)) {
if (instr->Bits(19, 16) == 0xF) {
// Uxtb.
uint32_t rm_val = get_register(instr->RmValue());
int32_t rotate = instr->Bits(11, 10);
switch (rotate) {
case 0:
break;
case 1:
rm_val = (rm_val >> 8) | (rm_val << 24);
break;
case 2:
rm_val = (rm_val >> 16) | (rm_val << 16);
break;
case 3:
rm_val = (rm_val >> 24) | (rm_val << 8);
break;
}
set_register(rd, (rm_val & 0xFF));
} else {
// Uxtab.
uint32_t rn_val = get_register(rn);
uint32_t rm_val = get_register(instr->RmValue());
int32_t rotate = instr->Bits(11, 10);
switch (rotate) {
case 0:
break;
case 1:
rm_val = (rm_val >> 8) | (rm_val << 24);
break;
case 2:
rm_val = (rm_val >> 16) | (rm_val << 16);
break;
case 3:
rm_val = (rm_val >> 24) | (rm_val << 8);
break;
}
set_register(rd, rn_val + (rm_val & 0xFF));
}
} else {
UNIMPLEMENTED();
}
break;
}
set_register(rd, rm_val);
} else { // SSAT.
UNIMPLEMENTED();
}
return;
} else {
Format(instr, "'memop'cond'b 'rd, ['rn], +'shift_rm");
UNIMPLEMENTED();
}
break;
}
@ -3352,6 +3512,156 @@ void Simulator::DecodeType6CoprocessorIns(Instruction* instr) {
}
void Simulator::DecodeSpecialCondition(Instruction* instr) {
switch (instr->SpecialValue()) {
case 5:
if ((instr->Bits(18, 16) == 0) && (instr->Bits(11, 6) == 0x28) &&
(instr->Bit(4) == 1)) {
// vmovl signed
int Vd = (instr->Bit(22) << 4) | instr->VdValue();
int Vm = (instr->Bit(5) << 4) | instr->VmValue();
int imm3 = instr->Bits(21, 19);
if ((imm3 != 1) && (imm3 != 2) && (imm3 != 4)) UNIMPLEMENTED();
int esize = 8 * imm3;
int elements = 64 / esize;
int8_t from[8];
get_d_register(Vm, reinterpret_cast<uint64_t*>(from));
int16_t to[8];
int e = 0;
while (e < elements) {
to[e] = from[e];
e++;
}
set_q_register(Vd, reinterpret_cast<uint64_t*>(to));
} else {
UNIMPLEMENTED();
}
break;
case 7:
if ((instr->Bits(18, 16) == 0) && (instr->Bits(11, 6) == 0x28) &&
(instr->Bit(4) == 1)) {
// vmovl unsigned
int Vd = (instr->Bit(22) << 4) | instr->VdValue();
int Vm = (instr->Bit(5) << 4) | instr->VmValue();
int imm3 = instr->Bits(21, 19);
if ((imm3 != 1) && (imm3 != 2) && (imm3 != 4)) UNIMPLEMENTED();
int esize = 8 * imm3;
int elements = 64 / esize;
uint8_t from[8];
get_d_register(Vm, reinterpret_cast<uint64_t*>(from));
uint16_t to[8];
int e = 0;
while (e < elements) {
to[e] = from[e];
e++;
}
set_q_register(Vd, reinterpret_cast<uint64_t*>(to));
} else {
UNIMPLEMENTED();
}
break;
case 8:
if (instr->Bits(21, 20) == 0) {
// vst1
int Vd = (instr->Bit(22) << 4) | instr->VdValue();
int Rn = instr->VnValue();
int type = instr->Bits(11, 8);
int Rm = instr->VmValue();
int32_t address = get_register(Rn);
int regs = 0;
switch (type) {
case nlt_1:
regs = 1;
break;
case nlt_2:
regs = 2;
break;
case nlt_3:
regs = 3;
break;
case nlt_4:
regs = 4;
break;
default:
UNIMPLEMENTED();
break;
}
int r = 0;
while (r < regs) {
uint32_t data[2];
get_d_register(Vd + r, data);
WriteW(address, data[0], instr);
WriteW(address + 4, data[1], instr);
address += 8;
r++;
}
if (Rm != 15) {
if (Rm == 13) {
set_register(Rn, address);
} else {
set_register(Rn, get_register(Rn) + get_register(Rm));
}
}
} else if (instr->Bits(21, 20) == 2) {
// vld1
int Vd = (instr->Bit(22) << 4) | instr->VdValue();
int Rn = instr->VnValue();
int type = instr->Bits(11, 8);
int Rm = instr->VmValue();
int32_t address = get_register(Rn);
int regs = 0;
switch (type) {
case nlt_1:
regs = 1;
break;
case nlt_2:
regs = 2;
break;
case nlt_3:
regs = 3;
break;
case nlt_4:
regs = 4;
break;
default:
UNIMPLEMENTED();
break;
}
int r = 0;
while (r < regs) {
uint32_t data[2];
data[0] = ReadW(address, instr);
data[1] = ReadW(address + 4, instr);
set_d_register(Vd + r, data);
address += 8;
r++;
}
if (Rm != 15) {
if (Rm == 13) {
set_register(Rn, address);
} else {
set_register(Rn, get_register(Rn) + get_register(Rm));
}
}
} else {
UNIMPLEMENTED();
}
break;
case 0xA:
case 0xB:
if ((instr->Bits(22, 20) == 5) && (instr->Bits(15, 12) == 0xf)) {
// pld: ignore instruction.
} else {
UNIMPLEMENTED();
}
break;
default:
UNIMPLEMENTED();
break;
}
}
// Executes the current instruction.
void Simulator::InstructionDecode(Instruction* instr) {
if (v8::internal::FLAG_check_icache) {
@ -3368,7 +3678,7 @@ void Simulator::InstructionDecode(Instruction* instr) {
PrintF(" 0x%08x %s\n", reinterpret_cast<intptr_t>(instr), buffer.start());
}
if (instr->ConditionField() == kSpecialCondition) {
UNIMPLEMENTED();
DecodeSpecialCondition(instr);
} else if (ConditionallyExecute(instr)) {
switch (instr->TypeValue()) {
case 0:

View File

@ -144,7 +144,10 @@ class Simulator {
d8, d9, d10, d11, d12, d13, d14, d15,
d16, d17, d18, d19, d20, d21, d22, d23,
d24, d25, d26, d27, d28, d29, d30, d31,
num_d_registers = 32
num_d_registers = 32,
q0 = 0, q1, q2, q3, q4, q5, q6, q7,
q8, q9, q10, q11, q12, q13, q14, q15,
num_q_registers = 16
};
explicit Simulator(Isolate* isolate);
@ -163,6 +166,15 @@ class Simulator {
void set_dw_register(int dreg, const int* dbl);
// Support for VFP.
void get_d_register(int dreg, uint64_t* value);
void set_d_register(int dreg, const uint64_t* value);
void get_d_register(int dreg, uint32_t* value);
void set_d_register(int dreg, const uint32_t* value);
void get_q_register(int qreg, uint64_t* value);
void set_q_register(int qreg, const uint64_t* value);
void get_q_register(int qreg, uint32_t* value);
void set_q_register(int qreg, const uint32_t* value);
void set_s_register(int reg, unsigned int value);
unsigned int get_s_register(int reg) const;
@ -328,6 +340,7 @@ class Simulator {
// Support for VFP.
void DecodeTypeVFP(Instruction* instr);
void DecodeType6CoprocessorIns(Instruction* instr);
void DecodeSpecialCondition(Instruction* instr);
void DecodeVMOVBetweenCoreAndSinglePrecisionRegisters(Instruction* instr);
void DecodeVCMP(Instruction* instr);

View File

@ -348,6 +348,8 @@ DEFINE_bool(enable_vfp3, ENABLE_VFP3_DEFAULT,
"enable use of VFP3 instructions if available")
DEFINE_bool(enable_armv7, ENABLE_ARMV7_DEFAULT,
"enable use of ARMv7 instructions if available (ARM only)")
DEFINE_bool(enable_neon, true,
"enable use of NEON instructions if available (ARM only)")
DEFINE_bool(enable_sudiv, true,
"enable use of SDIV and UDIV instructions if available (ARM only)")
DEFINE_bool(enable_movw_movt, false,

View File

@ -89,12 +89,6 @@ namespace internal {
#elif defined(__ARMEL__)
#define V8_HOST_ARCH_ARM 1
#define V8_HOST_ARCH_32_BIT 1
// Some CPU-OS combinations allow unaligned access on ARM. We assume
// that unaligned accesses are not allowed unless the build system
// defines the CAN_USE_UNALIGNED_ACCESSES macro to be non-zero.
#if CAN_USE_UNALIGNED_ACCESSES
#define V8_HOST_CAN_READ_UNALIGNED 1
#endif
#elif defined(__MIPSEL__)
#define V8_HOST_ARCH_MIPS 1
#define V8_HOST_ARCH_32_BIT 1
@ -102,6 +96,16 @@ namespace internal {
#error Host architecture was not detected as supported by v8
#endif
#if defined(__ARM_ARCH_7A__) || \
defined(__ARM_ARCH_7R__) || \
defined(__ARM_ARCH_7__)
# define CAN_USE_ARMV7_INSTRUCTIONS 1
# ifndef CAN_USE_VFP3_INSTRUCTIONS
# define CAN_USE_VFP3_INSTRUCTIONS
# endif
#endif
// Target architecture detection. This may be set externally. If not, detect
// in the same way as the host architecture, that is, target the native
// environment as presented by the compiler.

View File

@ -146,6 +146,9 @@ bool OS::ArmCpuHasFeature(CpuFeature feature) {
case VFP3:
search_string = "vfpv3";
break;
case NEON:
search_string = "neon";
break;
case ARMv7:
search_string = "ARMv7";
break;
@ -200,6 +203,36 @@ CpuImplementer OS::GetCpuImplementer() {
}
CpuPart OS::GetCpuPart(CpuImplementer implementer) {
static bool use_cached_value = false;
static CpuPart cached_value = CPU_UNKNOWN;
if (use_cached_value) {
return cached_value;
}
if (implementer == ARM_IMPLEMENTER) {
if (CPUInfoContainsString("CPU part\t: 0xc0f")) {
cached_value = CORTEX_A15;
} else if (CPUInfoContainsString("CPU part\t: 0xc0c")) {
cached_value = CORTEX_A12;
} else if (CPUInfoContainsString("CPU part\t: 0xc09")) {
cached_value = CORTEX_A9;
} else if (CPUInfoContainsString("CPU part\t: 0xc08")) {
cached_value = CORTEX_A8;
} else if (CPUInfoContainsString("CPU part\t: 0xc07")) {
cached_value = CORTEX_A7;
} else if (CPUInfoContainsString("CPU part\t: 0xc05")) {
cached_value = CORTEX_A5;
} else {
cached_value = CPU_UNKNOWN;
}
} else {
cached_value = CPU_UNKNOWN;
}
use_cached_value = true;
return cached_value;
}
bool OS::ArmUsingHardFloat() {
// GCC versions 4.6 and above define __ARM_PCS or __ARM_PCS_VFP to specify
// the Floating Point ABI used (PCS stands for Procedure Call Standard).

View File

@ -220,6 +220,11 @@ CpuImplementer OS::GetCpuImplementer() {
}
CpuPart OS::GetCpuPart(CpuImplementer implementer) {
UNIMPLEMENTED();
}
bool OS::ArmCpuHasFeature(CpuFeature feature) {
UNIMPLEMENTED();
}

View File

@ -341,7 +341,26 @@ void OS::MemMove(void* dest, const void* src, size_t size) {
(*memmove_function)(dest, src, size);
}
#endif // V8_TARGET_ARCH_IA32
#elif defined(V8_HOST_ARCH_ARM)
void OS::MemCopyUint16Uint8Wrapper(uint16_t* dest,
const uint8_t* src,
size_t chars) {
uint16_t *limit = dest + chars;
while (dest < limit) {
*dest++ = static_cast<uint16_t>(*src++);
}
}
OS::MemCopyUint8Function OS::memcopy_uint8_function = &OS::MemCopyUint8Wrapper;
OS::MemCopyUint16Uint8Function OS::memcopy_uint16_uint8_function =
&OS::MemCopyUint16Uint8Wrapper;
// Defined in codegen-arm.cc.
OS::MemCopyUint8Function CreateMemCopyUint8Function(
OS::MemCopyUint8Function stub);
OS::MemCopyUint16Uint8Function CreateMemCopyUint16Uint8Function(
OS::MemCopyUint16Uint8Function stub);
#endif
void POSIXPostSetUp() {
@ -350,6 +369,11 @@ void POSIXPostSetUp() {
if (generated_memmove != NULL) {
memmove_function = generated_memmove;
}
#elif defined(V8_HOST_ARCH_ARM)
OS::memcopy_uint8_function =
CreateMemCopyUint8Function(&OS::MemCopyUint8Wrapper);
OS::memcopy_uint16_uint8_function =
CreateMemCopyUint16Uint8Function(&OS::MemCopyUint16Uint8Wrapper);
#endif
init_fast_sin_function();
init_fast_cos_function();

View File

@ -315,6 +315,9 @@ class OS {
// Support runtime detection of Cpu implementer
static CpuImplementer GetCpuImplementer();
// Support runtime detection of Cpu implementer
static CpuPart GetCpuPart(CpuImplementer implementer);
// Support runtime detection of VFP3 on ARM CPUs.
static bool ArmCpuHasFeature(CpuFeature feature);
@ -343,7 +346,42 @@ class OS {
static void MemCopy(void* dest, const void* src, size_t size) {
MemMove(dest, src, size);
}
#else // V8_TARGET_ARCH_IA32
#elif defined(V8_HOST_ARCH_ARM)
typedef void (*MemCopyUint8Function)(uint8_t* dest,
const uint8_t* src,
size_t size);
static MemCopyUint8Function memcopy_uint8_function;
static void MemCopyUint8Wrapper(uint8_t* dest,
const uint8_t* src,
size_t chars) {
memcpy(dest, src, chars);
}
// For values < 16, the assembler function is slower than the inlined C code.
static const int kMinComplexMemCopy = 16;
static void MemCopy(void* dest, const void* src, size_t size) {
(*memcopy_uint8_function)(reinterpret_cast<uint8_t*>(dest),
reinterpret_cast<const uint8_t*>(src),
size);
}
static void MemMove(void* dest, const void* src, size_t size) {
memmove(dest, src, size);
}
typedef void (*MemCopyUint16Uint8Function)(uint16_t* dest,
const uint8_t* src,
size_t size);
static MemCopyUint16Uint8Function memcopy_uint16_uint8_function;
static void MemCopyUint16Uint8Wrapper(uint16_t* dest,
const uint8_t* src,
size_t chars);
// For values < 12, the assembler function is slower than the inlined C code.
static const int kMinComplexConvertMemCopy = 12;
static void MemCopyUint16Uint8(uint16_t* dest,
const uint8_t* src,
size_t size) {
(*memcopy_uint16_uint8_function)(dest, src, size);
}
#else
// Copy memory area to disjoint memory area.
static void MemCopy(void* dest, const void* src, size_t size) {
memcpy(dest, src, size);

View File

@ -418,6 +418,17 @@ enum CpuImplementer {
};
enum CpuPart {
CPU_UNKNOWN,
CORTEX_A15,
CORTEX_A12,
CORTEX_A9,
CORTEX_A8,
CORTEX_A7,
CORTEX_A5
};
// Feature flags bit positions. They are mostly based on the CPUID spec.
// (We assign CPUID itself to one of the currently reserved bits --
// feel free to change this if needed.)
@ -434,6 +445,7 @@ enum CpuFeature { SSE4_1 = 32 + 19, // x86
UNALIGNED_ACCESSES = 4, // ARM
MOVW_MOVT_IMMEDIATE_LOADS = 5, // ARM
VFP32DREGS = 6, // ARM
NEON = 7, // ARM
SAHF = 0, // x86
FPU = 1}; // MIPS

View File

@ -317,6 +317,11 @@ template <typename sourcechar, typename sinkchar>
INLINE(static void CopyCharsUnsigned(sinkchar* dest,
const sourcechar* src,
int chars));
#if defined(V8_HOST_ARCH_ARM)
INLINE(void CopyCharsUnsigned(uint8_t* dest, const uint8_t* src, int chars));
INLINE(void CopyCharsUnsigned(uint16_t* dest, const uint8_t* src, int chars));
INLINE(void CopyCharsUnsigned(uint16_t* dest, const uint16_t* src, int chars));
#endif
// Copy from ASCII/16bit chars to ASCII/16bit chars.
template <typename sourcechar, typename sinkchar>
@ -375,6 +380,105 @@ void CopyCharsUnsigned(sinkchar* dest, const sourcechar* src, int chars) {
}
#if defined(V8_HOST_ARCH_ARM)
void CopyCharsUnsigned(uint8_t* dest, const uint8_t* src, int chars) {
switch (static_cast<unsigned>(chars)) {
case 0:
break;
case 1:
*dest = *src;
break;
case 2:
memcpy(dest, src, 2);
break;
case 3:
memcpy(dest, src, 3);
break;
case 4:
memcpy(dest, src, 4);
break;
case 5:
memcpy(dest, src, 5);
break;
case 6:
memcpy(dest, src, 6);
break;
case 7:
memcpy(dest, src, 7);
break;
case 8:
memcpy(dest, src, 8);
break;
case 9:
memcpy(dest, src, 9);
break;
case 10:
memcpy(dest, src, 10);
break;
case 11:
memcpy(dest, src, 11);
break;
case 12:
memcpy(dest, src, 12);
break;
case 13:
memcpy(dest, src, 13);
break;
case 14:
memcpy(dest, src, 14);
break;
case 15:
memcpy(dest, src, 15);
break;
default:
OS::MemCopy(dest, src, chars);
break;
}
}
void CopyCharsUnsigned(uint16_t* dest, const uint8_t* src, int chars) {
if (chars >= OS::kMinComplexConvertMemCopy) {
OS::MemCopyUint16Uint8(dest, src, chars);
} else {
OS::MemCopyUint16Uint8Wrapper(dest, src, chars);
}
}
void CopyCharsUnsigned(uint16_t* dest, const uint16_t* src, int chars) {
switch (static_cast<unsigned>(chars)) {
case 0:
break;
case 1:
*dest = *src;
break;
case 2:
memcpy(dest, src, 4);
break;
case 3:
memcpy(dest, src, 6);
break;
case 4:
memcpy(dest, src, 8);
break;
case 5:
memcpy(dest, src, 10);
break;
case 6:
memcpy(dest, src, 12);
break;
case 7:
memcpy(dest, src, 14);
break;
default:
OS::MemCopy(dest, src, chars * sizeof(*dest));
break;
}
}
#endif
class StringBuilder : public SimpleStringBuilder {
public:
explicit StringBuilder(int size) : SimpleStringBuilder(size) { }

View File

@ -1227,4 +1227,186 @@ TEST(14) {
CHECK_EQ(kArmNanLower32, BitCast<int64_t>(t.div_result) & 0xffffffffu);
}
TEST(15) {
// Test the Neon instructions.
CcTest::InitializeVM();
Isolate* isolate = Isolate::Current();
HandleScope scope(isolate);
typedef struct {
uint32_t src0;
uint32_t src1;
uint32_t src2;
uint32_t src3;
uint32_t src4;
uint32_t src5;
uint32_t src6;
uint32_t src7;
uint32_t dst0;
uint32_t dst1;
uint32_t dst2;
uint32_t dst3;
uint32_t dst4;
uint32_t dst5;
uint32_t dst6;
uint32_t dst7;
uint32_t srcA0;
uint32_t srcA1;
uint32_t dstA0;
uint32_t dstA1;
uint32_t dstA2;
uint32_t dstA3;
} T;
T t;
// Create a function that accepts &t, and loads, manipulates, and stores
// the doubles and floats.
Assembler assm(isolate, NULL, 0);
if (CpuFeatures::IsSupported(NEON)) {
CpuFeatureScope scope(&assm, NEON);
__ stm(db_w, sp, r4.bit() | lr.bit());
// Move 32 bytes with neon.
__ add(r4, r0, Operand(OFFSET_OF(T, src0)));
__ vld1(Neon8, NeonListOperand(d0, 4), NeonMemOperand(r4));
__ add(r4, r0, Operand(OFFSET_OF(T, dst0)));
__ vst1(Neon8, NeonListOperand(d0, 4), NeonMemOperand(r4));
// Expand 8 bytes into 8 words(16 bits).
__ add(r4, r0, Operand(OFFSET_OF(T, srcA0)));
__ vld1(Neon8, NeonListOperand(d0), NeonMemOperand(r4));
__ vmovl(NeonU8, q0, d0);
__ add(r4, r0, Operand(OFFSET_OF(T, dstA0)));
__ vst1(Neon8, NeonListOperand(d0, 2), NeonMemOperand(r4));
__ ldm(ia_w, sp, r4.bit() | pc.bit());
CodeDesc desc;
assm.GetCode(&desc);
Object* code = isolate->heap()->CreateCode(
desc,
Code::ComputeFlags(Code::STUB),
Handle<Code>())->ToObjectChecked();
CHECK(code->IsCode());
#ifdef DEBUG
Code::cast(code)->Print();
#endif
F3 f = FUNCTION_CAST<F3>(Code::cast(code)->entry());
t.src0 = 0x01020304;
t.src1 = 0x11121314;
t.src2 = 0x21222324;
t.src3 = 0x31323334;
t.src4 = 0x41424344;
t.src5 = 0x51525354;
t.src6 = 0x61626364;
t.src7 = 0x71727374;
t.dst0 = 0;
t.dst1 = 0;
t.dst2 = 0;
t.dst3 = 0;
t.dst4 = 0;
t.dst5 = 0;
t.dst6 = 0;
t.dst7 = 0;
t.srcA0 = 0x41424344;
t.srcA1 = 0x81828384;
t.dstA0 = 0;
t.dstA1 = 0;
t.dstA2 = 0;
t.dstA3 = 0;
Object* dummy = CALL_GENERATED_CODE(f, &t, 0, 0, 0, 0);
USE(dummy);
CHECK_EQ(0x01020304, t.dst0);
CHECK_EQ(0x11121314, t.dst1);
CHECK_EQ(0x21222324, t.dst2);
CHECK_EQ(0x31323334, t.dst3);
CHECK_EQ(0x41424344, t.dst4);
CHECK_EQ(0x51525354, t.dst5);
CHECK_EQ(0x61626364, t.dst6);
CHECK_EQ(0x71727374, t.dst7);
CHECK_EQ(0x00430044, t.dstA0);
CHECK_EQ(0x00410042, t.dstA1);
CHECK_EQ(0x00830084, t.dstA2);
CHECK_EQ(0x00810082, t.dstA3);
}
}
TEST(16) {
// Test the pkh, uxtb, uxtab and uxtb16 instructions.
CcTest::InitializeVM();
Isolate* isolate = Isolate::Current();
HandleScope scope(isolate);
typedef struct {
uint32_t src0;
uint32_t src1;
uint32_t src2;
uint32_t dst0;
uint32_t dst1;
uint32_t dst2;
uint32_t dst3;
uint32_t dst4;
} T;
T t;
// Create a function that accepts &t, and loads, manipulates, and stores
// the doubles and floats.
Assembler assm(isolate, NULL, 0);
__ stm(db_w, sp, r4.bit() | lr.bit());
__ mov(r4, Operand(r0));
__ ldr(r0, MemOperand(r4, OFFSET_OF(T, src0)));
__ ldr(r1, MemOperand(r4, OFFSET_OF(T, src1)));
__ pkhbt(r2, r0, Operand(r1, LSL, 8));
__ str(r2, MemOperand(r4, OFFSET_OF(T, dst0)));
__ pkhtb(r2, r0, Operand(r1, ASR, 8));
__ str(r2, MemOperand(r4, OFFSET_OF(T, dst1)));
__ uxtb16(r2, Operand(r0, ROR, 8));
__ str(r2, MemOperand(r4, OFFSET_OF(T, dst2)));
__ uxtb(r2, Operand(r0, ROR, 8));
__ str(r2, MemOperand(r4, OFFSET_OF(T, dst3)));
__ ldr(r0, MemOperand(r4, OFFSET_OF(T, src2)));
__ uxtab(r2, r0, Operand(r1, ROR, 8));
__ str(r2, MemOperand(r4, OFFSET_OF(T, dst4)));
__ ldm(ia_w, sp, r4.bit() | pc.bit());
CodeDesc desc;
assm.GetCode(&desc);
Object* code = isolate->heap()->CreateCode(
desc,
Code::ComputeFlags(Code::STUB),
Handle<Code>())->ToObjectChecked();
CHECK(code->IsCode());
#ifdef DEBUG
Code::cast(code)->Print();
#endif
F3 f = FUNCTION_CAST<F3>(Code::cast(code)->entry());
t.src0 = 0x01020304;
t.src1 = 0x11121314;
t.src2 = 0x11121300;
t.dst0 = 0;
t.dst1 = 0;
t.dst2 = 0;
t.dst3 = 0;
t.dst4 = 0;
Object* dummy = CALL_GENERATED_CODE(f, &t, 0, 0, 0, 0);
USE(dummy);
CHECK_EQ(0x12130304, t.dst0);
CHECK_EQ(0x01021213, t.dst1);
CHECK_EQ(0x00010003, t.dst2);
CHECK_EQ(0x00000003, t.dst3);
CHECK_EQ(0x11121313, t.dst4);
}
#undef __

View File

@ -405,6 +405,17 @@ TEST(Type3) {
"e6ff3f94 usat r3, #31, r4, lsl #31");
COMPARE(usat(r8, 0, Operand(r5, ASR, 17)),
"e6e088d5 usat r8, #0, r5, asr #17");
COMPARE(pkhbt(r3, r4, Operand(r5, LSL, 17)),
"e6843895 pkhbt r3, r4, r5, lsl #17");
COMPARE(pkhtb(r3, r4, Operand(r5, ASR, 17)),
"e68438d5 pkhtb r3, r4, r5, asr #17");
COMPARE(uxtb(r3, Operand(r4, ROR, 8)),
"e6ef3474 uxtb r3, r4, ror #8");
COMPARE(uxtab(r3, r4, Operand(r5, ROR, 8)),
"e6e43475 uxtab r3, r4, r5, ror #8");
COMPARE(uxtb16(r3, Operand(r4, ROR, 8)),
"e6cf3474 uxtb16 r3, r4, ror #8");
}
VERIFY_RUN();
@ -662,6 +673,23 @@ TEST(Vfp) {
}
TEST(Neon) {
SET_UP();
if (CpuFeatures::IsSupported(NEON)) {
CpuFeatureScope scope(&assm, NEON);
COMPARE(vld1(Neon8, NeonListOperand(d4, 4), NeonMemOperand(r1)),
"f421420f vld1.8 {d4, d5, d6, d7}, [r1]");
COMPARE(vst1(Neon16, NeonListOperand(d17, 4), NeonMemOperand(r9)),
"f449124f vst1.16 {d17, d18, d19, d20}, [r9]");
COMPARE(vmovl(NeonU8, q4, d2),
"f3884a12 vmovl.u8 q4, d2");
}
VERIFY_RUN();
}
TEST(LoadStore) {
SET_UP();
@ -858,6 +886,11 @@ TEST(LoadStore) {
"e1eba7ff strd r10, [fp, #+127]!");
COMPARE(strd(ip, sp, MemOperand(sp, -127, PreIndex)),
"e16dc7ff strd ip, [sp, #-127]!");
COMPARE(pld(MemOperand(r1, 0)),
"f5d1f000 pld [r1]");
COMPARE(pld(MemOperand(r2, 128)),
"f5d2f080 pld [r2, #+128]");
}
VERIFY_RUN();