[arm] Introduce UseScratchRegisterScope for VFP registers
Replace hard-coded uses of `kScratchDoubleReg`, `kScratchDoubleReg2` and `kScratchQuadReg` with the safer `UseScratchRegisterScope`. The reason for doing this is to be able to safely use these scratch registers inside the assembler without having to worry about the code generator using them too. For instance, using this scope showed us that `TryInlineTruncateDoubleToI` is using a FP scratch register while the caller, the `DoubleToI` stub, is using it too. We are safe only because the stub passes the scratch register to `TryInlineTruncateDoubleToI` as an input. Using the scope forces us to explicitely use the input register instead of acquiring a new scratch. Bug: v8:6553 Change-Id: I84c53cd851d31ea33b0e3ef398d7a858b7e3e3c4 Reviewed-on: https://chromium-review.googlesource.com/895460 Reviewed-by: Benedikt Meurer <bmeurer@chromium.org> Commit-Queue: Pierre Langlois <pierre.langlois@arm.com> Cr-Commit-Position: refs/heads/master@{#51061}
This commit is contained in:
parent
43ace493bb
commit
610a361013
@ -384,6 +384,35 @@ void Assembler::set_target_address_at(Isolate* isolate, Address pc,
|
|||||||
|
|
||||||
EnsureSpace::EnsureSpace(Assembler* assembler) { assembler->CheckBuffer(); }
|
EnsureSpace::EnsureSpace(Assembler* assembler) { assembler->CheckBuffer(); }
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
bool UseScratchRegisterScope::CanAcquireVfp() const {
|
||||||
|
VfpRegList* available = assembler_->GetScratchVfpRegisterList();
|
||||||
|
DCHECK_NOT_NULL(available);
|
||||||
|
for (int index = 0; index < T::kNumRegisters; index++) {
|
||||||
|
T reg = T::from_code(index);
|
||||||
|
uint64_t mask = reg.ToVfpRegList();
|
||||||
|
if ((*available & mask) == mask) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
T UseScratchRegisterScope::AcquireVfp() {
|
||||||
|
VfpRegList* available = assembler_->GetScratchVfpRegisterList();
|
||||||
|
DCHECK_NOT_NULL(available);
|
||||||
|
for (int index = 0; index < T::kNumRegisters; index++) {
|
||||||
|
T reg = T::from_code(index);
|
||||||
|
uint64_t mask = reg.ToVfpRegList();
|
||||||
|
if ((*available & mask) == mask) {
|
||||||
|
*available &= ~mask;
|
||||||
|
return reg;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
UNREACHABLE();
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace internal
|
} // namespace internal
|
||||||
} // namespace v8
|
} // namespace v8
|
||||||
|
|
||||||
|
@ -566,10 +566,16 @@ Assembler::Assembler(IsolateData isolate_data, void* buffer, int buffer_size)
|
|||||||
// it's awkward to use CpuFeatures::VFP32DREGS with CpuFeatureScope. To make
|
// it's awkward to use CpuFeatures::VFP32DREGS with CpuFeatureScope. To make
|
||||||
// its use consistent with other features, we always enable it if we can.
|
// its use consistent with other features, we always enable it if we can.
|
||||||
EnableCpuFeature(VFP32DREGS);
|
EnableCpuFeature(VFP32DREGS);
|
||||||
|
// Make sure we pick two D registers which alias a Q register. This way, we
|
||||||
|
// can use a Q as a scratch if NEON is supported.
|
||||||
|
scratch_vfp_register_list_ = d14.ToVfpRegList() | d15.ToVfpRegList();
|
||||||
|
} else {
|
||||||
|
// When VFP32DREGS is not supported, d15 become allocatable. Therefore we
|
||||||
|
// cannot use it as a scratch.
|
||||||
|
scratch_vfp_register_list_ = d14.ToVfpRegList();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
Assembler::~Assembler() {
|
Assembler::~Assembler() {
|
||||||
DCHECK_EQ(const_pool_blocked_nesting_, 0);
|
DCHECK_EQ(const_pool_blocked_nesting_, 0);
|
||||||
DCHECK_EQ(code_target_sharing_blocked_nesting_, 0);
|
DCHECK_EQ(code_target_sharing_blocked_nesting_, 0);
|
||||||
@ -1214,6 +1220,7 @@ void Assembler::AddrMode1(Instr instr, Register rd, Register rn,
|
|||||||
DCHECK(x.IsImmediate());
|
DCHECK(x.IsImmediate());
|
||||||
// Upon failure to encode, the opcode should not have changed.
|
// Upon failure to encode, the opcode should not have changed.
|
||||||
DCHECK(opcode == (instr & kOpCodeMask));
|
DCHECK(opcode == (instr & kOpCodeMask));
|
||||||
|
UseScratchRegisterScope temps(this);
|
||||||
Condition cond = Instruction::ConditionField(instr);
|
Condition cond = Instruction::ConditionField(instr);
|
||||||
if ((opcode == MOV) && !set_flags) {
|
if ((opcode == MOV) && !set_flags) {
|
||||||
// Generate a sequence of mov instructions or a load from the constant
|
// Generate a sequence of mov instructions or a load from the constant
|
||||||
@ -1221,7 +1228,7 @@ void Assembler::AddrMode1(Instr instr, Register rd, Register rn,
|
|||||||
DCHECK(!rn.is_valid());
|
DCHECK(!rn.is_valid());
|
||||||
Move32BitImmediate(rd, x, cond);
|
Move32BitImmediate(rd, x, cond);
|
||||||
} else if ((opcode == ADD) && !set_flags && (rd == rn) &&
|
} else if ((opcode == ADD) && !set_flags && (rd == rn) &&
|
||||||
(scratch_register_list_ == 0)) {
|
!temps.CanAcquire()) {
|
||||||
// Split the operation into a sequence of additions if we cannot use a
|
// Split the operation into a sequence of additions if we cannot use a
|
||||||
// scratch register. In this case, we cannot re-use rn and the assembler
|
// scratch register. In this case, we cannot re-use rn and the assembler
|
||||||
// does not have any scratch registers to spare.
|
// does not have any scratch registers to spare.
|
||||||
@ -1244,7 +1251,6 @@ void Assembler::AddrMode1(Instr instr, Register rd, Register rn,
|
|||||||
// The immediate operand cannot be encoded as a shifter operand, so load
|
// The immediate operand cannot be encoded as a shifter operand, so load
|
||||||
// it first to a scratch register and change the original instruction to
|
// it first to a scratch register and change the original instruction to
|
||||||
// use it.
|
// use it.
|
||||||
UseScratchRegisterScope temps(this);
|
|
||||||
// Re-use the destination register if possible.
|
// Re-use the destination register if possible.
|
||||||
Register scratch =
|
Register scratch =
|
||||||
(rd.is_valid() && rd != rn && rd != pc) ? rd : temps.Acquire();
|
(rd.is_valid() && rd != rn && rd != pc) ? rd : temps.Acquire();
|
||||||
@ -5478,19 +5484,23 @@ void PatchingAssembler::FlushICache(Isolate* isolate) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
UseScratchRegisterScope::UseScratchRegisterScope(Assembler* assembler)
|
UseScratchRegisterScope::UseScratchRegisterScope(Assembler* assembler)
|
||||||
: available_(assembler->GetScratchRegisterList()),
|
: assembler_(assembler),
|
||||||
old_available_(*available_) {}
|
old_available_(*assembler->GetScratchRegisterList()),
|
||||||
|
old_available_vfp_(*assembler->GetScratchVfpRegisterList()) {}
|
||||||
|
|
||||||
UseScratchRegisterScope::~UseScratchRegisterScope() {
|
UseScratchRegisterScope::~UseScratchRegisterScope() {
|
||||||
*available_ = old_available_;
|
*assembler_->GetScratchRegisterList() = old_available_;
|
||||||
|
*assembler_->GetScratchVfpRegisterList() = old_available_vfp_;
|
||||||
}
|
}
|
||||||
|
|
||||||
Register UseScratchRegisterScope::Acquire() {
|
Register UseScratchRegisterScope::Acquire() {
|
||||||
DCHECK_NOT_NULL(available_);
|
RegList* available = assembler_->GetScratchRegisterList();
|
||||||
DCHECK_NE(*available_, 0);
|
DCHECK_NOT_NULL(available);
|
||||||
int index = static_cast<int>(base::bits::CountTrailingZeros32(*available_));
|
DCHECK_NE(*available, 0);
|
||||||
*available_ &= ~(1UL << index);
|
int index = static_cast<int>(base::bits::CountTrailingZeros32(*available));
|
||||||
return Register::from_code(index);
|
Register reg = Register::from_code(index);
|
||||||
|
*available &= ~reg.bit();
|
||||||
|
return reg;
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace internal
|
} // namespace internal
|
||||||
|
@ -184,6 +184,17 @@ enum SwVfpRegisterCode {
|
|||||||
kSwVfpAfterLast
|
kSwVfpAfterLast
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Representation of a list of non-overlapping VFP registers. This list
|
||||||
|
// represents the data layout of VFP registers as a bitfield:
|
||||||
|
// S registers cover 1 bit
|
||||||
|
// D registers cover 2 bits
|
||||||
|
// Q registers cover 4 bits
|
||||||
|
//
|
||||||
|
// This way, we make sure no registers in the list ever overlap. However, a list
|
||||||
|
// may represent multiple different sets of registers,
|
||||||
|
// e.g. [d0 s2 s3] <=> [s0 s1 d1].
|
||||||
|
typedef uint64_t VfpRegList;
|
||||||
|
|
||||||
// Single word VFP register.
|
// Single word VFP register.
|
||||||
class SwVfpRegister : public RegisterBase<SwVfpRegister, kSwVfpAfterLast> {
|
class SwVfpRegister : public RegisterBase<SwVfpRegister, kSwVfpAfterLast> {
|
||||||
public:
|
public:
|
||||||
@ -195,6 +206,11 @@ class SwVfpRegister : public RegisterBase<SwVfpRegister, kSwVfpAfterLast> {
|
|||||||
*vm = reg_code >> 1;
|
*vm = reg_code >> 1;
|
||||||
}
|
}
|
||||||
void split_code(int* vm, int* m) const { split_code(code(), vm, m); }
|
void split_code(int* vm, int* m) const { split_code(code(), vm, m); }
|
||||||
|
VfpRegList ToVfpRegList() const {
|
||||||
|
DCHECK(is_valid());
|
||||||
|
// Each bit in the list corresponds to a S register.
|
||||||
|
return uint64_t{0x1} << code();
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
friend class RegisterBase;
|
friend class RegisterBase;
|
||||||
@ -217,10 +233,6 @@ enum DoubleRegisterCode {
|
|||||||
// Double word VFP register.
|
// Double word VFP register.
|
||||||
class DwVfpRegister : public RegisterBase<DwVfpRegister, kDoubleAfterLast> {
|
class DwVfpRegister : public RegisterBase<DwVfpRegister, kDoubleAfterLast> {
|
||||||
public:
|
public:
|
||||||
// A few double registers are reserved: one as a scratch register and one to
|
|
||||||
// hold 0.0, that does not fit in the immediate field of vmov instructions.
|
|
||||||
// d14: 0.0
|
|
||||||
// d15: scratch register.
|
|
||||||
static constexpr int kSizeInBytes = 8;
|
static constexpr int kSizeInBytes = 8;
|
||||||
|
|
||||||
inline static int NumRegisters();
|
inline static int NumRegisters();
|
||||||
@ -231,6 +243,11 @@ class DwVfpRegister : public RegisterBase<DwVfpRegister, kDoubleAfterLast> {
|
|||||||
*vm = reg_code & 0x0F;
|
*vm = reg_code & 0x0F;
|
||||||
}
|
}
|
||||||
void split_code(int* vm, int* m) const { split_code(code(), vm, m); }
|
void split_code(int* vm, int* m) const { split_code(code(), vm, m); }
|
||||||
|
VfpRegList ToVfpRegList() const {
|
||||||
|
DCHECK(is_valid());
|
||||||
|
// A D register overlaps two S registers.
|
||||||
|
return uint64_t{0x3} << (code() * 2);
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
friend class RegisterBase;
|
friend class RegisterBase;
|
||||||
@ -255,6 +272,11 @@ class LowDwVfpRegister
|
|||||||
SwVfpRegister high() const {
|
SwVfpRegister high() const {
|
||||||
return SwVfpRegister::from_code(code() * 2 + 1);
|
return SwVfpRegister::from_code(code() * 2 + 1);
|
||||||
}
|
}
|
||||||
|
VfpRegList ToVfpRegList() const {
|
||||||
|
DCHECK(is_valid());
|
||||||
|
// A D register overlaps two S registers.
|
||||||
|
return uint64_t{0x3} << (code() * 2);
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
friend class RegisterBase;
|
friend class RegisterBase;
|
||||||
@ -282,6 +304,11 @@ class QwNeonRegister : public RegisterBase<QwNeonRegister, kSimd128AfterLast> {
|
|||||||
DwVfpRegister high() const {
|
DwVfpRegister high() const {
|
||||||
return DwVfpRegister::from_code(code() * 2 + 1);
|
return DwVfpRegister::from_code(code() * 2 + 1);
|
||||||
}
|
}
|
||||||
|
VfpRegList ToVfpRegList() const {
|
||||||
|
DCHECK(is_valid());
|
||||||
|
// A Q register overlaps four S registers.
|
||||||
|
return uint64_t{0xf} << (code() * 4);
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
friend class RegisterBase;
|
friend class RegisterBase;
|
||||||
@ -334,12 +361,6 @@ SIMD128_REGISTERS(DECLARE_SIMD128_REGISTER)
|
|||||||
constexpr LowDwVfpRegister kFirstCalleeSavedDoubleReg = d8;
|
constexpr LowDwVfpRegister kFirstCalleeSavedDoubleReg = d8;
|
||||||
constexpr LowDwVfpRegister kLastCalleeSavedDoubleReg = d15;
|
constexpr LowDwVfpRegister kLastCalleeSavedDoubleReg = d15;
|
||||||
constexpr LowDwVfpRegister kDoubleRegZero = d13;
|
constexpr LowDwVfpRegister kDoubleRegZero = d13;
|
||||||
constexpr LowDwVfpRegister kScratchDoubleReg = d14;
|
|
||||||
// This scratch q-register aliases d14 (kScratchDoubleReg) and d15, but is only
|
|
||||||
// used if NEON is supported, which implies VFP32DREGS. When there are only 16
|
|
||||||
// d-registers, d15 is still allocatable.
|
|
||||||
constexpr QwNeonRegister kScratchQuadReg = q7;
|
|
||||||
constexpr LowDwVfpRegister kScratchDoubleReg2 = d15;
|
|
||||||
|
|
||||||
constexpr CRegister no_creg = CRegister::no_reg();
|
constexpr CRegister no_creg = CRegister::no_reg();
|
||||||
|
|
||||||
@ -685,6 +706,9 @@ class Assembler : public AssemblerBase {
|
|||||||
// register.
|
// register.
|
||||||
static constexpr int kPcLoadDelta = 8;
|
static constexpr int kPcLoadDelta = 8;
|
||||||
RegList* GetScratchRegisterList() { return &scratch_register_list_; }
|
RegList* GetScratchRegisterList() { return &scratch_register_list_; }
|
||||||
|
VfpRegList* GetScratchVfpRegisterList() {
|
||||||
|
return &scratch_vfp_register_list_;
|
||||||
|
}
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
// Code generation
|
// Code generation
|
||||||
@ -1655,6 +1679,7 @@ class Assembler : public AssemblerBase {
|
|||||||
|
|
||||||
// Scratch registers available for use by the Assembler.
|
// Scratch registers available for use by the Assembler.
|
||||||
RegList scratch_register_list_;
|
RegList scratch_register_list_;
|
||||||
|
VfpRegList scratch_vfp_register_list_;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// Avoid overflows for displacements etc.
|
// Avoid overflows for displacements etc.
|
||||||
@ -1732,6 +1757,7 @@ class Assembler : public AssemblerBase {
|
|||||||
friend class BlockConstPoolScope;
|
friend class BlockConstPoolScope;
|
||||||
friend class BlockCodeTargetSharingScope;
|
friend class BlockCodeTargetSharingScope;
|
||||||
friend class EnsureSpace;
|
friend class EnsureSpace;
|
||||||
|
friend class UseScratchRegisterScope;
|
||||||
|
|
||||||
// The following functions help with avoiding allocations of embedded heap
|
// The following functions help with avoiding allocations of embedded heap
|
||||||
// objects during the code assembly phase. {RequestHeapObject} records the
|
// objects during the code assembly phase. {RequestHeapObject} records the
|
||||||
@ -1779,12 +1805,38 @@ class UseScratchRegisterScope {
|
|||||||
|
|
||||||
// Take a register from the list and return it.
|
// Take a register from the list and return it.
|
||||||
Register Acquire();
|
Register Acquire();
|
||||||
|
SwVfpRegister AcquireS() { return AcquireVfp<SwVfpRegister>(); }
|
||||||
|
LowDwVfpRegister AcquireLowD() { return AcquireVfp<LowDwVfpRegister>(); }
|
||||||
|
DwVfpRegister AcquireD() {
|
||||||
|
DwVfpRegister reg = AcquireVfp<DwVfpRegister>();
|
||||||
|
DCHECK(assembler_->VfpRegisterIsAvailable(reg));
|
||||||
|
return reg;
|
||||||
|
}
|
||||||
|
QwNeonRegister AcquireQ() {
|
||||||
|
QwNeonRegister reg = AcquireVfp<QwNeonRegister>();
|
||||||
|
DCHECK(assembler_->VfpRegisterIsAvailable(reg));
|
||||||
|
return reg;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// Currently available scratch registers.
|
friend class Assembler;
|
||||||
RegList* available_;
|
friend class TurboAssembler;
|
||||||
|
|
||||||
|
// Check if we have registers available to acquire.
|
||||||
|
// These methods are kept private intentionally to restrict their usage to the
|
||||||
|
// assemblers. Choosing to emit a difference instruction sequence depending on
|
||||||
|
// the availability of scratch registers is generally their job.
|
||||||
|
bool CanAcquire() const { return *assembler_->GetScratchRegisterList() != 0; }
|
||||||
|
template <typename T>
|
||||||
|
bool CanAcquireVfp() const;
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
T AcquireVfp();
|
||||||
|
|
||||||
|
Assembler* assembler_;
|
||||||
// Available scratch registers at the start of this scope.
|
// Available scratch registers at the start of this scope.
|
||||||
RegList old_available_;
|
RegList old_available_;
|
||||||
|
VfpRegList old_available_vfp_;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace internal
|
} // namespace internal
|
||||||
|
@ -46,7 +46,7 @@ void DoubleToIStub::Generate(MacroAssembler* masm) {
|
|||||||
UseScratchRegisterScope temps(masm);
|
UseScratchRegisterScope temps(masm);
|
||||||
Register double_low = GetRegisterThatIsNotOneOf(result_reg);
|
Register double_low = GetRegisterThatIsNotOneOf(result_reg);
|
||||||
Register double_high = GetRegisterThatIsNotOneOf(result_reg, double_low);
|
Register double_high = GetRegisterThatIsNotOneOf(result_reg, double_low);
|
||||||
LowDwVfpRegister double_scratch = kScratchDoubleReg;
|
LowDwVfpRegister double_scratch = temps.AcquireLowD();
|
||||||
|
|
||||||
// Save the old values from these temporary registers on the stack.
|
// Save the old values from these temporary registers on the stack.
|
||||||
__ Push(double_high, double_low);
|
__ Push(double_high, double_low);
|
||||||
|
@ -30,9 +30,6 @@ void Deoptimizer::TableEntryGenerator::Generate() {
|
|||||||
const int kFloatRegsSize = kFloatSize * SwVfpRegister::kNumRegisters;
|
const int kFloatRegsSize = kFloatSize * SwVfpRegister::kNumRegisters;
|
||||||
|
|
||||||
// Save all allocatable VFP registers before messing with them.
|
// Save all allocatable VFP registers before messing with them.
|
||||||
DCHECK_EQ(kDoubleRegZero.code(), 13);
|
|
||||||
DCHECK_EQ(kScratchDoubleReg.code(), 14);
|
|
||||||
|
|
||||||
{
|
{
|
||||||
// We use a run-time check for VFP32DREGS.
|
// We use a run-time check for VFP32DREGS.
|
||||||
CpuFeatureScope scope(masm(), VFP32DREGS,
|
CpuFeatureScope scope(masm(), VFP32DREGS,
|
||||||
|
@ -307,11 +307,11 @@ void TurboAssembler::Swap(DwVfpRegister srcdst0, DwVfpRegister srcdst1) {
|
|||||||
if (CpuFeatures::IsSupported(NEON)) {
|
if (CpuFeatures::IsSupported(NEON)) {
|
||||||
vswp(srcdst0, srcdst1);
|
vswp(srcdst0, srcdst1);
|
||||||
} else {
|
} else {
|
||||||
DCHECK_NE(srcdst0, kScratchDoubleReg);
|
UseScratchRegisterScope temps(this);
|
||||||
DCHECK_NE(srcdst1, kScratchDoubleReg);
|
DwVfpRegister scratch = temps.AcquireD();
|
||||||
vmov(kScratchDoubleReg, srcdst0);
|
vmov(scratch, srcdst0);
|
||||||
vmov(srcdst0, srcdst1);
|
vmov(srcdst0, srcdst1);
|
||||||
vmov(srcdst1, kScratchDoubleReg);
|
vmov(srcdst1, scratch);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -809,11 +809,14 @@ void TurboAssembler::VmovExtended(int dst_code, int src_code) {
|
|||||||
int dst_offset = dst_code & 1;
|
int dst_offset = dst_code & 1;
|
||||||
int src_offset = src_code & 1;
|
int src_offset = src_code & 1;
|
||||||
if (CpuFeatures::IsSupported(NEON)) {
|
if (CpuFeatures::IsSupported(NEON)) {
|
||||||
|
UseScratchRegisterScope temps(this);
|
||||||
|
DwVfpRegister scratch = temps.AcquireD();
|
||||||
// On Neon we can shift and insert from d-registers.
|
// On Neon we can shift and insert from d-registers.
|
||||||
if (src_offset == dst_offset) {
|
if (src_offset == dst_offset) {
|
||||||
// Offsets are the same, use vdup to copy the source to the opposite lane.
|
// Offsets are the same, use vdup to copy the source to the opposite lane.
|
||||||
vdup(Neon32, kScratchDoubleReg, src_d_reg, src_offset);
|
vdup(Neon32, scratch, src_d_reg, src_offset);
|
||||||
src_d_reg = kScratchDoubleReg;
|
// Here we are extending the lifetime of scratch.
|
||||||
|
src_d_reg = scratch;
|
||||||
src_offset = dst_offset ^ 1;
|
src_offset = dst_offset ^ 1;
|
||||||
}
|
}
|
||||||
if (dst_offset) {
|
if (dst_offset) {
|
||||||
@ -834,27 +837,30 @@ void TurboAssembler::VmovExtended(int dst_code, int src_code) {
|
|||||||
|
|
||||||
// Without Neon, use the scratch registers to move src and/or dst into
|
// Without Neon, use the scratch registers to move src and/or dst into
|
||||||
// s-registers.
|
// s-registers.
|
||||||
int scratchSCode = kScratchDoubleReg.low().code();
|
UseScratchRegisterScope temps(this);
|
||||||
int scratchSCode2 = kScratchDoubleReg2.low().code();
|
LowDwVfpRegister d_scratch = temps.AcquireLowD();
|
||||||
|
LowDwVfpRegister d_scratch2 = temps.AcquireLowD();
|
||||||
|
int s_scratch_code = d_scratch.low().code();
|
||||||
|
int s_scratch_code2 = d_scratch2.low().code();
|
||||||
if (src_code < SwVfpRegister::kNumRegisters) {
|
if (src_code < SwVfpRegister::kNumRegisters) {
|
||||||
// src is an s-register, dst is not.
|
// src is an s-register, dst is not.
|
||||||
vmov(kScratchDoubleReg, dst_d_reg);
|
vmov(d_scratch, dst_d_reg);
|
||||||
vmov(SwVfpRegister::from_code(scratchSCode + dst_offset),
|
vmov(SwVfpRegister::from_code(s_scratch_code + dst_offset),
|
||||||
SwVfpRegister::from_code(src_code));
|
SwVfpRegister::from_code(src_code));
|
||||||
vmov(dst_d_reg, kScratchDoubleReg);
|
vmov(dst_d_reg, d_scratch);
|
||||||
} else if (dst_code < SwVfpRegister::kNumRegisters) {
|
} else if (dst_code < SwVfpRegister::kNumRegisters) {
|
||||||
// dst is an s-register, src is not.
|
// dst is an s-register, src is not.
|
||||||
vmov(kScratchDoubleReg, src_d_reg);
|
vmov(d_scratch, src_d_reg);
|
||||||
vmov(SwVfpRegister::from_code(dst_code),
|
vmov(SwVfpRegister::from_code(dst_code),
|
||||||
SwVfpRegister::from_code(scratchSCode + src_offset));
|
SwVfpRegister::from_code(s_scratch_code + src_offset));
|
||||||
} else {
|
} else {
|
||||||
// Neither src or dst are s-registers. Both scratch double registers are
|
// Neither src or dst are s-registers. Both scratch double registers are
|
||||||
// available when there are 32 VFP registers.
|
// available when there are 32 VFP registers.
|
||||||
vmov(kScratchDoubleReg, src_d_reg);
|
vmov(d_scratch, src_d_reg);
|
||||||
vmov(kScratchDoubleReg2, dst_d_reg);
|
vmov(d_scratch2, dst_d_reg);
|
||||||
vmov(SwVfpRegister::from_code(scratchSCode + dst_offset),
|
vmov(SwVfpRegister::from_code(s_scratch_code + dst_offset),
|
||||||
SwVfpRegister::from_code(scratchSCode2 + src_offset));
|
SwVfpRegister::from_code(s_scratch_code2 + src_offset));
|
||||||
vmov(dst_d_reg, kScratchQuadReg.high());
|
vmov(dst_d_reg, d_scratch2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -862,11 +868,13 @@ void TurboAssembler::VmovExtended(int dst_code, const MemOperand& src) {
|
|||||||
if (dst_code < SwVfpRegister::kNumRegisters) {
|
if (dst_code < SwVfpRegister::kNumRegisters) {
|
||||||
vldr(SwVfpRegister::from_code(dst_code), src);
|
vldr(SwVfpRegister::from_code(dst_code), src);
|
||||||
} else {
|
} else {
|
||||||
|
UseScratchRegisterScope temps(this);
|
||||||
|
LowDwVfpRegister scratch = temps.AcquireLowD();
|
||||||
// TODO(bbudge) If Neon supported, use load single lane form of vld1.
|
// TODO(bbudge) If Neon supported, use load single lane form of vld1.
|
||||||
int dst_s_code = kScratchDoubleReg.low().code() + (dst_code & 1);
|
int dst_s_code = scratch.low().code() + (dst_code & 1);
|
||||||
vmov(kScratchDoubleReg, DwVfpRegister::from_code(dst_code / 2));
|
vmov(scratch, DwVfpRegister::from_code(dst_code / 2));
|
||||||
vldr(SwVfpRegister::from_code(dst_s_code), src);
|
vldr(SwVfpRegister::from_code(dst_s_code), src);
|
||||||
vmov(DwVfpRegister::from_code(dst_code / 2), kScratchDoubleReg);
|
vmov(DwVfpRegister::from_code(dst_code / 2), scratch);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -875,8 +883,10 @@ void TurboAssembler::VmovExtended(const MemOperand& dst, int src_code) {
|
|||||||
vstr(SwVfpRegister::from_code(src_code), dst);
|
vstr(SwVfpRegister::from_code(src_code), dst);
|
||||||
} else {
|
} else {
|
||||||
// TODO(bbudge) If Neon supported, use store single lane form of vst1.
|
// TODO(bbudge) If Neon supported, use store single lane form of vst1.
|
||||||
int src_s_code = kScratchDoubleReg.low().code() + (src_code & 1);
|
UseScratchRegisterScope temps(this);
|
||||||
vmov(kScratchDoubleReg, DwVfpRegister::from_code(src_code / 2));
|
LowDwVfpRegister scratch = temps.AcquireLowD();
|
||||||
|
int src_s_code = scratch.low().code() + (src_code & 1);
|
||||||
|
vmov(scratch, DwVfpRegister::from_code(src_code / 2));
|
||||||
vstr(SwVfpRegister::from_code(src_s_code), dst);
|
vstr(SwVfpRegister::from_code(src_s_code), dst);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1599,13 +1609,22 @@ void MacroAssembler::TryDoubleToInt32Exact(Register result,
|
|||||||
void TurboAssembler::TryInlineTruncateDoubleToI(Register result,
|
void TurboAssembler::TryInlineTruncateDoubleToI(Register result,
|
||||||
DwVfpRegister double_input,
|
DwVfpRegister double_input,
|
||||||
Label* done) {
|
Label* done) {
|
||||||
LowDwVfpRegister double_scratch = kScratchDoubleReg;
|
|
||||||
vcvt_s32_f64(double_scratch.low(), double_input);
|
|
||||||
vmov(result, double_scratch.low());
|
|
||||||
|
|
||||||
UseScratchRegisterScope temps(this);
|
UseScratchRegisterScope temps(this);
|
||||||
Register scratch = temps.Acquire();
|
SwVfpRegister single_scratch = SwVfpRegister::no_reg();
|
||||||
|
if (temps.CanAcquireVfp<SwVfpRegister>()) {
|
||||||
|
single_scratch = temps.AcquireS();
|
||||||
|
} else {
|
||||||
|
// Re-use the input as a scratch register. However, we can only do this if
|
||||||
|
// the input register is d0-d15 as there are no s32+ registers.
|
||||||
|
DCHECK_LT(double_input.code(), LowDwVfpRegister::kNumRegisters);
|
||||||
|
LowDwVfpRegister double_scratch =
|
||||||
|
LowDwVfpRegister::from_code(double_input.code());
|
||||||
|
single_scratch = double_scratch.low();
|
||||||
|
}
|
||||||
|
vcvt_s32_f64(single_scratch, double_input);
|
||||||
|
vmov(result, single_scratch);
|
||||||
|
|
||||||
|
Register scratch = temps.Acquire();
|
||||||
// If result is not saturated (0x7FFFFFFF or 0x80000000), we are done.
|
// If result is not saturated (0x7FFFFFFF or 0x80000000), we are done.
|
||||||
sub(scratch, result, Operand(1));
|
sub(scratch, result, Operand(1));
|
||||||
cmp(scratch, Operand(0x7FFFFFFE));
|
cmp(scratch, Operand(0x7FFFFFFE));
|
||||||
|
@ -1364,35 +1364,40 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case kArmVcvtF32S32: {
|
case kArmVcvtF32S32: {
|
||||||
SwVfpRegister scratch = kScratchDoubleReg.low();
|
UseScratchRegisterScope temps(tasm());
|
||||||
|
SwVfpRegister scratch = temps.AcquireS();
|
||||||
__ vmov(scratch, i.InputRegister(0));
|
__ vmov(scratch, i.InputRegister(0));
|
||||||
__ vcvt_f32_s32(i.OutputFloatRegister(), scratch);
|
__ vcvt_f32_s32(i.OutputFloatRegister(), scratch);
|
||||||
DCHECK_EQ(LeaveCC, i.OutputSBit());
|
DCHECK_EQ(LeaveCC, i.OutputSBit());
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case kArmVcvtF32U32: {
|
case kArmVcvtF32U32: {
|
||||||
SwVfpRegister scratch = kScratchDoubleReg.low();
|
UseScratchRegisterScope temps(tasm());
|
||||||
|
SwVfpRegister scratch = temps.AcquireS();
|
||||||
__ vmov(scratch, i.InputRegister(0));
|
__ vmov(scratch, i.InputRegister(0));
|
||||||
__ vcvt_f32_u32(i.OutputFloatRegister(), scratch);
|
__ vcvt_f32_u32(i.OutputFloatRegister(), scratch);
|
||||||
DCHECK_EQ(LeaveCC, i.OutputSBit());
|
DCHECK_EQ(LeaveCC, i.OutputSBit());
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case kArmVcvtF64S32: {
|
case kArmVcvtF64S32: {
|
||||||
SwVfpRegister scratch = kScratchDoubleReg.low();
|
UseScratchRegisterScope temps(tasm());
|
||||||
|
SwVfpRegister scratch = temps.AcquireS();
|
||||||
__ vmov(scratch, i.InputRegister(0));
|
__ vmov(scratch, i.InputRegister(0));
|
||||||
__ vcvt_f64_s32(i.OutputDoubleRegister(), scratch);
|
__ vcvt_f64_s32(i.OutputDoubleRegister(), scratch);
|
||||||
DCHECK_EQ(LeaveCC, i.OutputSBit());
|
DCHECK_EQ(LeaveCC, i.OutputSBit());
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case kArmVcvtF64U32: {
|
case kArmVcvtF64U32: {
|
||||||
SwVfpRegister scratch = kScratchDoubleReg.low();
|
UseScratchRegisterScope temps(tasm());
|
||||||
|
SwVfpRegister scratch = temps.AcquireS();
|
||||||
__ vmov(scratch, i.InputRegister(0));
|
__ vmov(scratch, i.InputRegister(0));
|
||||||
__ vcvt_f64_u32(i.OutputDoubleRegister(), scratch);
|
__ vcvt_f64_u32(i.OutputDoubleRegister(), scratch);
|
||||||
DCHECK_EQ(LeaveCC, i.OutputSBit());
|
DCHECK_EQ(LeaveCC, i.OutputSBit());
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case kArmVcvtS32F32: {
|
case kArmVcvtS32F32: {
|
||||||
SwVfpRegister scratch = kScratchDoubleReg.low();
|
UseScratchRegisterScope temps(tasm());
|
||||||
|
SwVfpRegister scratch = temps.AcquireS();
|
||||||
__ vcvt_s32_f32(scratch, i.InputFloatRegister(0));
|
__ vcvt_s32_f32(scratch, i.InputFloatRegister(0));
|
||||||
__ vmov(i.OutputRegister(), scratch);
|
__ vmov(i.OutputRegister(), scratch);
|
||||||
// Avoid INT32_MAX as an overflow indicator and use INT32_MIN instead,
|
// Avoid INT32_MAX as an overflow indicator and use INT32_MIN instead,
|
||||||
@ -1403,7 +1408,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case kArmVcvtU32F32: {
|
case kArmVcvtU32F32: {
|
||||||
SwVfpRegister scratch = kScratchDoubleReg.low();
|
UseScratchRegisterScope temps(tasm());
|
||||||
|
SwVfpRegister scratch = temps.AcquireS();
|
||||||
__ vcvt_u32_f32(scratch, i.InputFloatRegister(0));
|
__ vcvt_u32_f32(scratch, i.InputFloatRegister(0));
|
||||||
__ vmov(i.OutputRegister(), scratch);
|
__ vmov(i.OutputRegister(), scratch);
|
||||||
// Avoid UINT32_MAX as an overflow indicator and use 0 instead,
|
// Avoid UINT32_MAX as an overflow indicator and use 0 instead,
|
||||||
@ -1414,14 +1420,16 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case kArmVcvtS32F64: {
|
case kArmVcvtS32F64: {
|
||||||
SwVfpRegister scratch = kScratchDoubleReg.low();
|
UseScratchRegisterScope temps(tasm());
|
||||||
|
SwVfpRegister scratch = temps.AcquireS();
|
||||||
__ vcvt_s32_f64(scratch, i.InputDoubleRegister(0));
|
__ vcvt_s32_f64(scratch, i.InputDoubleRegister(0));
|
||||||
__ vmov(i.OutputRegister(), scratch);
|
__ vmov(i.OutputRegister(), scratch);
|
||||||
DCHECK_EQ(LeaveCC, i.OutputSBit());
|
DCHECK_EQ(LeaveCC, i.OutputSBit());
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case kArmVcvtU32F64: {
|
case kArmVcvtU32F64: {
|
||||||
SwVfpRegister scratch = kScratchDoubleReg.low();
|
UseScratchRegisterScope temps(tasm());
|
||||||
|
SwVfpRegister scratch = temps.AcquireS();
|
||||||
__ vcvt_u32_f64(scratch, i.InputDoubleRegister(0));
|
__ vcvt_u32_f64(scratch, i.InputDoubleRegister(0));
|
||||||
__ vmov(i.OutputRegister(), scratch);
|
__ vmov(i.OutputRegister(), scratch);
|
||||||
DCHECK_EQ(LeaveCC, i.OutputSBit());
|
DCHECK_EQ(LeaveCC, i.OutputSBit());
|
||||||
@ -2217,41 +2225,50 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
|||||||
Simd128Register dst = i.OutputSimd128Register(),
|
Simd128Register dst = i.OutputSimd128Register(),
|
||||||
src1 = i.InputSimd128Register(1);
|
src1 = i.InputSimd128Register(1);
|
||||||
DCHECK(dst == i.InputSimd128Register(0));
|
DCHECK(dst == i.InputSimd128Register(0));
|
||||||
|
UseScratchRegisterScope temps(tasm());
|
||||||
|
Simd128Register scratch = temps.AcquireQ();
|
||||||
// src0 = [0, 1, 2, 3], src1 = [4, 5, 6, 7]
|
// src0 = [0, 1, 2, 3], src1 = [4, 5, 6, 7]
|
||||||
__ vmov(kScratchQuadReg, src1);
|
__ vmov(scratch, src1);
|
||||||
__ vuzp(Neon32, dst, kScratchQuadReg); // dst = [0, 2, 4, 6]
|
__ vuzp(Neon32, dst, scratch); // dst = [0, 2, 4, 6]
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case kArmS32x4UnzipRight: {
|
case kArmS32x4UnzipRight: {
|
||||||
Simd128Register dst = i.OutputSimd128Register(),
|
Simd128Register dst = i.OutputSimd128Register(),
|
||||||
src1 = i.InputSimd128Register(1);
|
src1 = i.InputSimd128Register(1);
|
||||||
DCHECK(dst == i.InputSimd128Register(0));
|
DCHECK(dst == i.InputSimd128Register(0));
|
||||||
|
UseScratchRegisterScope temps(tasm());
|
||||||
|
Simd128Register scratch = temps.AcquireQ();
|
||||||
// src0 = [4, 5, 6, 7], src1 = [0, 1, 2, 3] (flipped from UnzipLeft).
|
// src0 = [4, 5, 6, 7], src1 = [0, 1, 2, 3] (flipped from UnzipLeft).
|
||||||
__ vmov(kScratchQuadReg, src1);
|
__ vmov(scratch, src1);
|
||||||
__ vuzp(Neon32, kScratchQuadReg, dst); // dst = [1, 3, 5, 7]
|
__ vuzp(Neon32, scratch, dst); // dst = [1, 3, 5, 7]
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case kArmS32x4TransposeLeft: {
|
case kArmS32x4TransposeLeft: {
|
||||||
Simd128Register dst = i.OutputSimd128Register(),
|
Simd128Register dst = i.OutputSimd128Register(),
|
||||||
src1 = i.InputSimd128Register(1);
|
src1 = i.InputSimd128Register(1);
|
||||||
DCHECK(dst == i.InputSimd128Register(0));
|
DCHECK(dst == i.InputSimd128Register(0));
|
||||||
|
UseScratchRegisterScope temps(tasm());
|
||||||
|
Simd128Register scratch = temps.AcquireQ();
|
||||||
// src0 = [0, 1, 2, 3], src1 = [4, 5, 6, 7]
|
// src0 = [0, 1, 2, 3], src1 = [4, 5, 6, 7]
|
||||||
__ vmov(kScratchQuadReg, src1);
|
__ vmov(scratch, src1);
|
||||||
__ vtrn(Neon32, dst, kScratchQuadReg); // dst = [0, 4, 2, 6]
|
__ vtrn(Neon32, dst, scratch); // dst = [0, 4, 2, 6]
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case kArmS32x4Shuffle: {
|
case kArmS32x4Shuffle: {
|
||||||
Simd128Register dst = i.OutputSimd128Register(),
|
Simd128Register dst = i.OutputSimd128Register(),
|
||||||
src0 = i.InputSimd128Register(0),
|
src0 = i.InputSimd128Register(0),
|
||||||
src1 = i.InputSimd128Register(1);
|
src1 = i.InputSimd128Register(1);
|
||||||
|
UseScratchRegisterScope temps(tasm());
|
||||||
// Check for in-place shuffles.
|
// Check for in-place shuffles.
|
||||||
// If dst == src0 == src1, then the shuffle is unary and we only use src0.
|
// If dst == src0 == src1, then the shuffle is unary and we only use src0.
|
||||||
if (dst == src0) {
|
if (dst == src0) {
|
||||||
__ vmov(kScratchQuadReg, src0);
|
Simd128Register scratch = temps.AcquireQ();
|
||||||
src0 = kScratchQuadReg;
|
__ vmov(scratch, src0);
|
||||||
|
src0 = scratch;
|
||||||
} else if (dst == src1) {
|
} else if (dst == src1) {
|
||||||
__ vmov(kScratchQuadReg, src1);
|
Simd128Register scratch = temps.AcquireQ();
|
||||||
src1 = kScratchQuadReg;
|
__ vmov(scratch, src1);
|
||||||
|
src1 = scratch;
|
||||||
}
|
}
|
||||||
// Perform shuffle as a vmov per lane.
|
// Perform shuffle as a vmov per lane.
|
||||||
int dst_code = dst.code() * 4;
|
int dst_code = dst.code() * 4;
|
||||||
@ -2273,10 +2290,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
|||||||
case kArmS32x4TransposeRight: {
|
case kArmS32x4TransposeRight: {
|
||||||
Simd128Register dst = i.OutputSimd128Register(),
|
Simd128Register dst = i.OutputSimd128Register(),
|
||||||
src1 = i.InputSimd128Register(1);
|
src1 = i.InputSimd128Register(1);
|
||||||
|
UseScratchRegisterScope temps(tasm());
|
||||||
|
Simd128Register scratch = temps.AcquireQ();
|
||||||
DCHECK(dst == i.InputSimd128Register(0));
|
DCHECK(dst == i.InputSimd128Register(0));
|
||||||
// src0 = [4, 5, 6, 7], src1 = [0, 1, 2, 3] (flipped from TransposeLeft).
|
// src0 = [4, 5, 6, 7], src1 = [0, 1, 2, 3] (flipped from TransposeLeft).
|
||||||
__ vmov(kScratchQuadReg, src1);
|
__ vmov(scratch, src1);
|
||||||
__ vtrn(Neon32, kScratchQuadReg, dst); // dst = [1, 5, 3, 7]
|
__ vtrn(Neon32, scratch, dst); // dst = [1, 5, 3, 7]
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case kArmS16x8ZipLeft: {
|
case kArmS16x8ZipLeft: {
|
||||||
@ -2300,37 +2319,45 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
|||||||
case kArmS16x8UnzipLeft: {
|
case kArmS16x8UnzipLeft: {
|
||||||
Simd128Register dst = i.OutputSimd128Register(),
|
Simd128Register dst = i.OutputSimd128Register(),
|
||||||
src1 = i.InputSimd128Register(1);
|
src1 = i.InputSimd128Register(1);
|
||||||
|
UseScratchRegisterScope temps(tasm());
|
||||||
|
Simd128Register scratch = temps.AcquireQ();
|
||||||
DCHECK(dst == i.InputSimd128Register(0));
|
DCHECK(dst == i.InputSimd128Register(0));
|
||||||
// src0 = [0, 1, 2, 3, ... 7], src1 = [8, 9, 10, 11, ... 15]
|
// src0 = [0, 1, 2, 3, ... 7], src1 = [8, 9, 10, 11, ... 15]
|
||||||
__ vmov(kScratchQuadReg, src1);
|
__ vmov(scratch, src1);
|
||||||
__ vuzp(Neon16, dst, kScratchQuadReg); // dst = [0, 2, 4, 6, ... 14]
|
__ vuzp(Neon16, dst, scratch); // dst = [0, 2, 4, 6, ... 14]
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case kArmS16x8UnzipRight: {
|
case kArmS16x8UnzipRight: {
|
||||||
Simd128Register dst = i.OutputSimd128Register(),
|
Simd128Register dst = i.OutputSimd128Register(),
|
||||||
src1 = i.InputSimd128Register(1);
|
src1 = i.InputSimd128Register(1);
|
||||||
|
UseScratchRegisterScope temps(tasm());
|
||||||
|
Simd128Register scratch = temps.AcquireQ();
|
||||||
DCHECK(dst == i.InputSimd128Register(0));
|
DCHECK(dst == i.InputSimd128Register(0));
|
||||||
// src0 = [8, 9, 10, 11, ... 15], src1 = [0, 1, 2, 3, ... 7] (flipped).
|
// src0 = [8, 9, 10, 11, ... 15], src1 = [0, 1, 2, 3, ... 7] (flipped).
|
||||||
__ vmov(kScratchQuadReg, src1);
|
__ vmov(scratch, src1);
|
||||||
__ vuzp(Neon16, kScratchQuadReg, dst); // dst = [1, 3, 5, 7, ... 15]
|
__ vuzp(Neon16, scratch, dst); // dst = [1, 3, 5, 7, ... 15]
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case kArmS16x8TransposeLeft: {
|
case kArmS16x8TransposeLeft: {
|
||||||
Simd128Register dst = i.OutputSimd128Register(),
|
Simd128Register dst = i.OutputSimd128Register(),
|
||||||
src1 = i.InputSimd128Register(1);
|
src1 = i.InputSimd128Register(1);
|
||||||
|
UseScratchRegisterScope temps(tasm());
|
||||||
|
Simd128Register scratch = temps.AcquireQ();
|
||||||
DCHECK(dst == i.InputSimd128Register(0));
|
DCHECK(dst == i.InputSimd128Register(0));
|
||||||
// src0 = [0, 1, 2, 3, ... 7], src1 = [8, 9, 10, 11, ... 15]
|
// src0 = [0, 1, 2, 3, ... 7], src1 = [8, 9, 10, 11, ... 15]
|
||||||
__ vmov(kScratchQuadReg, src1);
|
__ vmov(scratch, src1);
|
||||||
__ vtrn(Neon16, dst, kScratchQuadReg); // dst = [0, 8, 2, 10, ... 14]
|
__ vtrn(Neon16, dst, scratch); // dst = [0, 8, 2, 10, ... 14]
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case kArmS16x8TransposeRight: {
|
case kArmS16x8TransposeRight: {
|
||||||
Simd128Register dst = i.OutputSimd128Register(),
|
Simd128Register dst = i.OutputSimd128Register(),
|
||||||
src1 = i.InputSimd128Register(1);
|
src1 = i.InputSimd128Register(1);
|
||||||
|
UseScratchRegisterScope temps(tasm());
|
||||||
|
Simd128Register scratch = temps.AcquireQ();
|
||||||
DCHECK(dst == i.InputSimd128Register(0));
|
DCHECK(dst == i.InputSimd128Register(0));
|
||||||
// src0 = [8, 9, 10, 11, ... 15], src1 = [0, 1, 2, 3, ... 7] (flipped).
|
// src0 = [8, 9, 10, 11, ... 15], src1 = [0, 1, 2, 3, ... 7] (flipped).
|
||||||
__ vmov(kScratchQuadReg, src1);
|
__ vmov(scratch, src1);
|
||||||
__ vtrn(Neon16, kScratchQuadReg, dst); // dst = [1, 9, 3, 11, ... 15]
|
__ vtrn(Neon16, scratch, dst); // dst = [1, 9, 3, 11, ... 15]
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case kArmS8x16ZipLeft: {
|
case kArmS8x16ZipLeft: {
|
||||||
@ -2354,37 +2381,45 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
|||||||
case kArmS8x16UnzipLeft: {
|
case kArmS8x16UnzipLeft: {
|
||||||
Simd128Register dst = i.OutputSimd128Register(),
|
Simd128Register dst = i.OutputSimd128Register(),
|
||||||
src1 = i.InputSimd128Register(1);
|
src1 = i.InputSimd128Register(1);
|
||||||
|
UseScratchRegisterScope temps(tasm());
|
||||||
|
Simd128Register scratch = temps.AcquireQ();
|
||||||
DCHECK(dst == i.InputSimd128Register(0));
|
DCHECK(dst == i.InputSimd128Register(0));
|
||||||
// src0 = [0, 1, 2, 3, ... 15], src1 = [16, 17, 18, 19, ... 31]
|
// src0 = [0, 1, 2, 3, ... 15], src1 = [16, 17, 18, 19, ... 31]
|
||||||
__ vmov(kScratchQuadReg, src1);
|
__ vmov(scratch, src1);
|
||||||
__ vuzp(Neon8, dst, kScratchQuadReg); // dst = [0, 2, 4, 6, ... 30]
|
__ vuzp(Neon8, dst, scratch); // dst = [0, 2, 4, 6, ... 30]
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case kArmS8x16UnzipRight: {
|
case kArmS8x16UnzipRight: {
|
||||||
Simd128Register dst = i.OutputSimd128Register(),
|
Simd128Register dst = i.OutputSimd128Register(),
|
||||||
src1 = i.InputSimd128Register(1);
|
src1 = i.InputSimd128Register(1);
|
||||||
|
UseScratchRegisterScope temps(tasm());
|
||||||
|
Simd128Register scratch = temps.AcquireQ();
|
||||||
DCHECK(dst == i.InputSimd128Register(0));
|
DCHECK(dst == i.InputSimd128Register(0));
|
||||||
// src0 = [16, 17, 18, 19, ... 31], src1 = [0, 1, 2, 3, ... 15] (flipped).
|
// src0 = [16, 17, 18, 19, ... 31], src1 = [0, 1, 2, 3, ... 15] (flipped).
|
||||||
__ vmov(kScratchQuadReg, src1);
|
__ vmov(scratch, src1);
|
||||||
__ vuzp(Neon8, kScratchQuadReg, dst); // dst = [1, 3, 5, 7, ... 31]
|
__ vuzp(Neon8, scratch, dst); // dst = [1, 3, 5, 7, ... 31]
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case kArmS8x16TransposeLeft: {
|
case kArmS8x16TransposeLeft: {
|
||||||
Simd128Register dst = i.OutputSimd128Register(),
|
Simd128Register dst = i.OutputSimd128Register(),
|
||||||
src1 = i.InputSimd128Register(1);
|
src1 = i.InputSimd128Register(1);
|
||||||
|
UseScratchRegisterScope temps(tasm());
|
||||||
|
Simd128Register scratch = temps.AcquireQ();
|
||||||
DCHECK(dst == i.InputSimd128Register(0));
|
DCHECK(dst == i.InputSimd128Register(0));
|
||||||
// src0 = [0, 1, 2, 3, ... 15], src1 = [16, 17, 18, 19, ... 31]
|
// src0 = [0, 1, 2, 3, ... 15], src1 = [16, 17, 18, 19, ... 31]
|
||||||
__ vmov(kScratchQuadReg, src1);
|
__ vmov(scratch, src1);
|
||||||
__ vtrn(Neon8, dst, kScratchQuadReg); // dst = [0, 16, 2, 18, ... 30]
|
__ vtrn(Neon8, dst, scratch); // dst = [0, 16, 2, 18, ... 30]
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case kArmS8x16TransposeRight: {
|
case kArmS8x16TransposeRight: {
|
||||||
Simd128Register dst = i.OutputSimd128Register(),
|
Simd128Register dst = i.OutputSimd128Register(),
|
||||||
src1 = i.InputSimd128Register(1);
|
src1 = i.InputSimd128Register(1);
|
||||||
|
UseScratchRegisterScope temps(tasm());
|
||||||
|
Simd128Register scratch = temps.AcquireQ();
|
||||||
DCHECK(dst == i.InputSimd128Register(0));
|
DCHECK(dst == i.InputSimd128Register(0));
|
||||||
// src0 = [16, 17, 18, 19, ... 31], src1 = [0, 1, 2, 3, ... 15] (flipped).
|
// src0 = [16, 17, 18, 19, ... 31], src1 = [0, 1, 2, 3, ... 15] (flipped).
|
||||||
__ vmov(kScratchQuadReg, src1);
|
__ vmov(scratch, src1);
|
||||||
__ vtrn(Neon8, kScratchQuadReg, dst); // dst = [1, 17, 3, 19, ... 31]
|
__ vtrn(Neon8, scratch, dst); // dst = [1, 17, 3, 19, ... 31]
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case kArmS8x16Concat: {
|
case kArmS8x16Concat: {
|
||||||
@ -2397,12 +2432,14 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
|||||||
src0 = i.InputSimd128Register(0),
|
src0 = i.InputSimd128Register(0),
|
||||||
src1 = i.InputSimd128Register(1);
|
src1 = i.InputSimd128Register(1);
|
||||||
DwVfpRegister table_base = src0.low();
|
DwVfpRegister table_base = src0.low();
|
||||||
|
UseScratchRegisterScope temps(tasm());
|
||||||
|
Simd128Register scratch = temps.AcquireQ();
|
||||||
// If unary shuffle, table is src0 (2 d-registers), otherwise src0 and
|
// If unary shuffle, table is src0 (2 d-registers), otherwise src0 and
|
||||||
// src1. They must be consecutive.
|
// src1. They must be consecutive.
|
||||||
int table_size = src0 == src1 ? 2 : 4;
|
int table_size = src0 == src1 ? 2 : 4;
|
||||||
DCHECK_IMPLIES(src0 != src1, src0.code() + 1 == src1.code());
|
DCHECK_IMPLIES(src0 != src1, src0.code() + 1 == src1.code());
|
||||||
// The shuffle lane mask is a byte mask, materialize in kScratchQuadReg.
|
// The shuffle lane mask is a byte mask, materialize in scratch.
|
||||||
int scratch_s_base = kScratchQuadReg.code() * 4;
|
int scratch_s_base = scratch.code() * 4;
|
||||||
for (int j = 0; j < 4; j++) {
|
for (int j = 0; j < 4; j++) {
|
||||||
uint32_t four_lanes = i.InputUint32(2 + j);
|
uint32_t four_lanes = i.InputUint32(2 + j);
|
||||||
// Ensure byte indices are in [0, 31] so masks are never NaNs.
|
// Ensure byte indices are in [0, 31] so masks are never NaNs.
|
||||||
@ -2412,12 +2449,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
|||||||
}
|
}
|
||||||
NeonListOperand table(table_base, table_size);
|
NeonListOperand table(table_base, table_size);
|
||||||
if (dst != src0 && dst != src1) {
|
if (dst != src0 && dst != src1) {
|
||||||
__ vtbl(dst.low(), table, kScratchQuadReg.low());
|
__ vtbl(dst.low(), table, scratch.low());
|
||||||
__ vtbl(dst.high(), table, kScratchQuadReg.high());
|
__ vtbl(dst.high(), table, scratch.high());
|
||||||
} else {
|
} else {
|
||||||
__ vtbl(kScratchQuadReg.low(), table, kScratchQuadReg.low());
|
__ vtbl(scratch.low(), table, scratch.low());
|
||||||
__ vtbl(kScratchQuadReg.high(), table, kScratchQuadReg.high());
|
__ vtbl(scratch.high(), table, scratch.high());
|
||||||
__ vmov(dst, kScratchQuadReg);
|
__ vmov(dst, scratch);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -2447,58 +2484,65 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
|||||||
}
|
}
|
||||||
case kArmS1x4AnyTrue: {
|
case kArmS1x4AnyTrue: {
|
||||||
const QwNeonRegister& src = i.InputSimd128Register(0);
|
const QwNeonRegister& src = i.InputSimd128Register(0);
|
||||||
__ vpmax(NeonU32, kScratchDoubleReg, src.low(), src.high());
|
UseScratchRegisterScope temps(tasm());
|
||||||
__ vpmax(NeonU32, kScratchDoubleReg, kScratchDoubleReg,
|
DwVfpRegister scratch = temps.AcquireD();
|
||||||
kScratchDoubleReg);
|
__ vpmax(NeonU32, scratch, src.low(), src.high());
|
||||||
__ ExtractLane(i.OutputRegister(), kScratchDoubleReg, NeonS32, 0);
|
__ vpmax(NeonU32, scratch, scratch, scratch);
|
||||||
|
__ ExtractLane(i.OutputRegister(), scratch, NeonS32, 0);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case kArmS1x4AllTrue: {
|
case kArmS1x4AllTrue: {
|
||||||
const QwNeonRegister& src = i.InputSimd128Register(0);
|
const QwNeonRegister& src = i.InputSimd128Register(0);
|
||||||
__ vpmin(NeonU32, kScratchDoubleReg, src.low(), src.high());
|
UseScratchRegisterScope temps(tasm());
|
||||||
__ vpmin(NeonU32, kScratchDoubleReg, kScratchDoubleReg,
|
DwVfpRegister scratch = temps.AcquireD();
|
||||||
kScratchDoubleReg);
|
__ vpmin(NeonU32, scratch, src.low(), src.high());
|
||||||
__ ExtractLane(i.OutputRegister(), kScratchDoubleReg, NeonS32, 0);
|
__ vpmin(NeonU32, scratch, scratch, scratch);
|
||||||
|
__ ExtractLane(i.OutputRegister(), scratch, NeonS32, 0);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case kArmS1x8AnyTrue: {
|
case kArmS1x8AnyTrue: {
|
||||||
const QwNeonRegister& src = i.InputSimd128Register(0);
|
const QwNeonRegister& src = i.InputSimd128Register(0);
|
||||||
__ vpmax(NeonU16, kScratchDoubleReg, src.low(), src.high());
|
UseScratchRegisterScope temps(tasm());
|
||||||
__ vpmax(NeonU16, kScratchDoubleReg, kScratchDoubleReg,
|
DwVfpRegister scratch = temps.AcquireD();
|
||||||
kScratchDoubleReg);
|
__ vpmax(NeonU16, scratch, src.low(), src.high());
|
||||||
__ vpmax(NeonU16, kScratchDoubleReg, kScratchDoubleReg,
|
__ vpmax(NeonU16, scratch, scratch, scratch);
|
||||||
kScratchDoubleReg);
|
__ vpmax(NeonU16, scratch, scratch, scratch);
|
||||||
__ ExtractLane(i.OutputRegister(), kScratchDoubleReg, NeonS16, 0);
|
__ ExtractLane(i.OutputRegister(), scratch, NeonS16, 0);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case kArmS1x8AllTrue: {
|
case kArmS1x8AllTrue: {
|
||||||
const QwNeonRegister& src = i.InputSimd128Register(0);
|
const QwNeonRegister& src = i.InputSimd128Register(0);
|
||||||
__ vpmin(NeonU16, kScratchDoubleReg, src.low(), src.high());
|
UseScratchRegisterScope temps(tasm());
|
||||||
__ vpmin(NeonU16, kScratchDoubleReg, kScratchDoubleReg,
|
DwVfpRegister scratch = temps.AcquireD();
|
||||||
kScratchDoubleReg);
|
__ vpmin(NeonU16, scratch, src.low(), src.high());
|
||||||
__ vpmin(NeonU16, kScratchDoubleReg, kScratchDoubleReg,
|
__ vpmin(NeonU16, scratch, scratch, scratch);
|
||||||
kScratchDoubleReg);
|
__ vpmin(NeonU16, scratch, scratch, scratch);
|
||||||
__ ExtractLane(i.OutputRegister(), kScratchDoubleReg, NeonS16, 0);
|
__ ExtractLane(i.OutputRegister(), scratch, NeonS16, 0);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case kArmS1x16AnyTrue: {
|
case kArmS1x16AnyTrue: {
|
||||||
const QwNeonRegister& src = i.InputSimd128Register(0);
|
const QwNeonRegister& src = i.InputSimd128Register(0);
|
||||||
__ vpmax(NeonU8, kScratchDoubleReg, src.low(), src.high());
|
UseScratchRegisterScope temps(tasm());
|
||||||
__ vpmax(NeonU8, kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
|
QwNeonRegister q_scratch = temps.AcquireQ();
|
||||||
// vtst to detect any bits in the bottom 32 bits of kScratchDoubleReg.
|
DwVfpRegister d_scratch = q_scratch.low();
|
||||||
|
__ vpmax(NeonU8, d_scratch, src.low(), src.high());
|
||||||
|
__ vpmax(NeonU8, d_scratch, d_scratch, d_scratch);
|
||||||
|
// vtst to detect any bits in the bottom 32 bits of d_scratch.
|
||||||
// This saves an instruction vs. the naive sequence of vpmax.
|
// This saves an instruction vs. the naive sequence of vpmax.
|
||||||
// kDoubleRegZero is not changed, since it is 0.
|
// kDoubleRegZero is not changed, since it is 0.
|
||||||
__ vtst(Neon32, kScratchQuadReg, kScratchQuadReg, kScratchQuadReg);
|
__ vtst(Neon32, q_scratch, q_scratch, q_scratch);
|
||||||
__ ExtractLane(i.OutputRegister(), kScratchDoubleReg, NeonS32, 0);
|
__ ExtractLane(i.OutputRegister(), d_scratch, NeonS32, 0);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case kArmS1x16AllTrue: {
|
case kArmS1x16AllTrue: {
|
||||||
const QwNeonRegister& src = i.InputSimd128Register(0);
|
const QwNeonRegister& src = i.InputSimd128Register(0);
|
||||||
__ vpmin(NeonU8, kScratchDoubleReg, src.low(), src.high());
|
UseScratchRegisterScope temps(tasm());
|
||||||
__ vpmin(NeonU8, kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
|
DwVfpRegister scratch = temps.AcquireD();
|
||||||
__ vpmin(NeonU8, kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
|
__ vpmin(NeonU8, scratch, src.low(), src.high());
|
||||||
__ vpmin(NeonU8, kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
|
__ vpmin(NeonU8, scratch, scratch, scratch);
|
||||||
__ ExtractLane(i.OutputRegister(), kScratchDoubleReg, NeonS8, 0);
|
__ vpmin(NeonU8, scratch, scratch, scratch);
|
||||||
|
__ vpmin(NeonU8, scratch, scratch, scratch);
|
||||||
|
__ ExtractLane(i.OutputRegister(), scratch, NeonS8, 0);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case kAtomicLoadInt8:
|
case kAtomicLoadInt8:
|
||||||
@ -3014,15 +3058,19 @@ void CodeGenerator::AssembleMove(InstructionOperand* source,
|
|||||||
__ ldr(kScratchReg, src);
|
__ ldr(kScratchReg, src);
|
||||||
__ str(kScratchReg, dst);
|
__ str(kScratchReg, dst);
|
||||||
} else if (source->IsDoubleStackSlot()) {
|
} else if (source->IsDoubleStackSlot()) {
|
||||||
__ vldr(kScratchDoubleReg, src);
|
UseScratchRegisterScope temps(tasm());
|
||||||
__ vstr(kScratchDoubleReg, dst);
|
DwVfpRegister temp = temps.AcquireD();
|
||||||
|
__ vldr(temp, src);
|
||||||
|
__ vstr(temp, dst);
|
||||||
} else {
|
} else {
|
||||||
DCHECK(source->IsSimd128StackSlot());
|
DCHECK(source->IsSimd128StackSlot());
|
||||||
|
UseScratchRegisterScope temps(tasm());
|
||||||
|
QwNeonRegister temp_q = temps.AcquireQ();
|
||||||
__ add(kScratchReg, src.rn(), Operand(src.offset()));
|
__ add(kScratchReg, src.rn(), Operand(src.offset()));
|
||||||
__ vld1(Neon8, NeonListOperand(kScratchQuadReg.low(), 2),
|
__ vld1(Neon8, NeonListOperand(temp_q.low(), 2),
|
||||||
NeonMemOperand(kScratchReg));
|
NeonMemOperand(kScratchReg));
|
||||||
__ add(kScratchReg, dst.rn(), Operand(dst.offset()));
|
__ add(kScratchReg, dst.rn(), Operand(dst.offset()));
|
||||||
__ vst1(Neon8, NeonListOperand(kScratchQuadReg.low(), 2),
|
__ vst1(Neon8, NeonListOperand(temp_q.low(), 2),
|
||||||
NeonMemOperand(kScratchReg));
|
NeonMemOperand(kScratchReg));
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
@ -3050,8 +3098,10 @@ void CodeGenerator::AssembleMove(InstructionOperand* source,
|
|||||||
__ str(kScratchReg, dst);
|
__ str(kScratchReg, dst);
|
||||||
} else {
|
} else {
|
||||||
DCHECK(destination->IsDoubleStackSlot());
|
DCHECK(destination->IsDoubleStackSlot());
|
||||||
__ vmov(kScratchDoubleReg, src.ToFloat64(), kScratchReg);
|
UseScratchRegisterScope temps(tasm());
|
||||||
__ vstr(kScratchDoubleReg, g.ToMemOperand(destination));
|
DwVfpRegister temp = temps.AcquireD();
|
||||||
|
__ vmov(temp, src.ToFloat64(), kScratchReg);
|
||||||
|
__ vstr(temp, g.ToMemOperand(destination));
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -3070,7 +3120,8 @@ void CodeGenerator::AssembleSwap(InstructionOperand* source,
|
|||||||
DCHECK(destination->IsFloatRegister());
|
DCHECK(destination->IsFloatRegister());
|
||||||
// GapResolver may give us reg codes that don't map to actual
|
// GapResolver may give us reg codes that don't map to actual
|
||||||
// s-registers. Generate code to work around those cases.
|
// s-registers. Generate code to work around those cases.
|
||||||
LowDwVfpRegister temp = kScratchDoubleReg;
|
UseScratchRegisterScope temps(tasm());
|
||||||
|
LowDwVfpRegister temp = temps.AcquireLowD();
|
||||||
int src_code = LocationOperand::cast(source)->register_code();
|
int src_code = LocationOperand::cast(source)->register_code();
|
||||||
int dst_code = LocationOperand::cast(destination)->register_code();
|
int dst_code = LocationOperand::cast(destination)->register_code();
|
||||||
__ VmovExtended(temp.low().code(), src_code);
|
__ VmovExtended(temp.low().code(), src_code);
|
||||||
@ -3091,22 +3142,27 @@ void CodeGenerator::AssembleSwap(InstructionOperand* source,
|
|||||||
__ str(kScratchReg, dst);
|
__ str(kScratchReg, dst);
|
||||||
} else if (source->IsFloatRegister()) {
|
} else if (source->IsFloatRegister()) {
|
||||||
int src_code = LocationOperand::cast(source)->register_code();
|
int src_code = LocationOperand::cast(source)->register_code();
|
||||||
LowDwVfpRegister temp = kScratchDoubleReg;
|
UseScratchRegisterScope temps(tasm());
|
||||||
|
LowDwVfpRegister temp = temps.AcquireLowD();
|
||||||
__ VmovExtended(temp.low().code(), src_code);
|
__ VmovExtended(temp.low().code(), src_code);
|
||||||
__ VmovExtended(src_code, dst);
|
__ VmovExtended(src_code, dst);
|
||||||
__ vstr(temp.low(), dst);
|
__ vstr(temp.low(), dst);
|
||||||
} else if (source->IsDoubleRegister()) {
|
} else if (source->IsDoubleRegister()) {
|
||||||
|
UseScratchRegisterScope temps(tasm());
|
||||||
|
DwVfpRegister temp = temps.AcquireD();
|
||||||
DwVfpRegister src = g.ToDoubleRegister(source);
|
DwVfpRegister src = g.ToDoubleRegister(source);
|
||||||
__ Move(kScratchDoubleReg, src);
|
__ Move(temp, src);
|
||||||
__ vldr(src, dst);
|
__ vldr(src, dst);
|
||||||
__ vstr(kScratchDoubleReg, dst);
|
__ vstr(temp, dst);
|
||||||
} else {
|
} else {
|
||||||
QwNeonRegister src = g.ToSimd128Register(source);
|
QwNeonRegister src = g.ToSimd128Register(source);
|
||||||
__ Move(kScratchQuadReg, src);
|
UseScratchRegisterScope temps(tasm());
|
||||||
|
QwNeonRegister temp_q = temps.AcquireQ();
|
||||||
|
__ Move(temp_q, src);
|
||||||
__ add(kScratchReg, dst.rn(), Operand(dst.offset()));
|
__ add(kScratchReg, dst.rn(), Operand(dst.offset()));
|
||||||
__ vld1(Neon8, NeonListOperand(src.low(), 2),
|
__ vld1(Neon8, NeonListOperand(src.low(), 2),
|
||||||
NeonMemOperand(kScratchReg));
|
NeonMemOperand(kScratchReg));
|
||||||
__ vst1(Neon8, NeonListOperand(kScratchQuadReg.low(), 2),
|
__ vst1(Neon8, NeonListOperand(temp_q.low(), 2),
|
||||||
NeonMemOperand(kScratchReg));
|
NeonMemOperand(kScratchReg));
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
@ -3116,14 +3172,16 @@ void CodeGenerator::AssembleSwap(InstructionOperand* source,
|
|||||||
MemOperand dst = g.ToMemOperand(destination);
|
MemOperand dst = g.ToMemOperand(destination);
|
||||||
if (source->IsStackSlot() || source->IsFloatStackSlot()) {
|
if (source->IsStackSlot() || source->IsFloatStackSlot()) {
|
||||||
Register temp_0 = kScratchReg;
|
Register temp_0 = kScratchReg;
|
||||||
SwVfpRegister temp_1 = kScratchDoubleReg.low();
|
UseScratchRegisterScope temps(tasm());
|
||||||
|
SwVfpRegister temp_1 = temps.AcquireS();
|
||||||
__ ldr(temp_0, src);
|
__ ldr(temp_0, src);
|
||||||
__ vldr(temp_1, dst);
|
__ vldr(temp_1, dst);
|
||||||
__ str(temp_0, dst);
|
__ str(temp_0, dst);
|
||||||
__ vstr(temp_1, src);
|
__ vstr(temp_1, src);
|
||||||
} else if (source->IsDoubleStackSlot()) {
|
} else if (source->IsDoubleStackSlot()) {
|
||||||
|
UseScratchRegisterScope temps(tasm());
|
||||||
Register temp_0 = kScratchReg;
|
Register temp_0 = kScratchReg;
|
||||||
LowDwVfpRegister temp_1 = kScratchDoubleReg;
|
DwVfpRegister temp_1 = temps.AcquireD();
|
||||||
// Save destination in temp_1.
|
// Save destination in temp_1.
|
||||||
__ vldr(temp_1, dst);
|
__ vldr(temp_1, dst);
|
||||||
// Then use temp_0 to copy source to destination.
|
// Then use temp_0 to copy source to destination.
|
||||||
@ -3138,14 +3196,17 @@ void CodeGenerator::AssembleSwap(InstructionOperand* source,
|
|||||||
MemOperand dst0 = dst;
|
MemOperand dst0 = dst;
|
||||||
MemOperand src1(src.rn(), src.offset() + kDoubleSize);
|
MemOperand src1(src.rn(), src.offset() + kDoubleSize);
|
||||||
MemOperand dst1(dst.rn(), dst.offset() + kDoubleSize);
|
MemOperand dst1(dst.rn(), dst.offset() + kDoubleSize);
|
||||||
__ vldr(kScratchQuadReg.low(), dst0);
|
UseScratchRegisterScope temps(tasm());
|
||||||
__ vldr(kScratchQuadReg.high(), src0);
|
DwVfpRegister temp_0 = temps.AcquireD();
|
||||||
__ vstr(kScratchQuadReg.low(), src0);
|
DwVfpRegister temp_1 = temps.AcquireD();
|
||||||
__ vstr(kScratchQuadReg.high(), dst0);
|
__ vldr(temp_0, dst0);
|
||||||
__ vldr(kScratchQuadReg.low(), dst1);
|
__ vldr(temp_1, src0);
|
||||||
__ vldr(kScratchQuadReg.high(), src1);
|
__ vstr(temp_0, src0);
|
||||||
__ vstr(kScratchQuadReg.low(), src1);
|
__ vstr(temp_1, dst0);
|
||||||
__ vstr(kScratchQuadReg.high(), dst1);
|
__ vldr(temp_0, dst1);
|
||||||
|
__ vldr(temp_1, src1);
|
||||||
|
__ vstr(temp_0, src1);
|
||||||
|
__ vstr(temp_1, dst1);
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -3923,6 +3923,101 @@ TEST(use_scratch_register_scope) {
|
|||||||
CHECK_EQ(*assm.GetScratchRegisterList(), ip.bit());
|
CHECK_EQ(*assm.GetScratchRegisterList(), ip.bit());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(use_scratch_vfp_register_scope) {
|
||||||
|
CcTest::InitializeVM();
|
||||||
|
Isolate* isolate = CcTest::i_isolate();
|
||||||
|
HandleScope scope(isolate);
|
||||||
|
|
||||||
|
Assembler assm(isolate, nullptr, 0);
|
||||||
|
|
||||||
|
VfpRegList orig_scratches = *assm.GetScratchVfpRegisterList();
|
||||||
|
|
||||||
|
if (CpuFeatures::IsSupported(VFP32DREGS)) {
|
||||||
|
CHECK_EQ(orig_scratches, d14.ToVfpRegList() | d15.ToVfpRegList());
|
||||||
|
} else {
|
||||||
|
CHECK_EQ(orig_scratches, d14.ToVfpRegList());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test each configuration of scratch registers we can have at the same time.
|
||||||
|
|
||||||
|
{
|
||||||
|
UseScratchRegisterScope temps(&assm);
|
||||||
|
|
||||||
|
SwVfpRegister s1_scratch = temps.AcquireS();
|
||||||
|
CHECK_EQ(s1_scratch, s28);
|
||||||
|
|
||||||
|
SwVfpRegister s2_scratch = temps.AcquireS();
|
||||||
|
CHECK_EQ(s2_scratch, s29);
|
||||||
|
|
||||||
|
if (CpuFeatures::IsSupported(VFP32DREGS)) {
|
||||||
|
SwVfpRegister s3_scratch = temps.AcquireS();
|
||||||
|
CHECK_EQ(s3_scratch, s30);
|
||||||
|
|
||||||
|
SwVfpRegister s4_scratch = temps.AcquireS();
|
||||||
|
CHECK_EQ(s4_scratch, s31);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
CHECK_EQ(*assm.GetScratchVfpRegisterList(), orig_scratches);
|
||||||
|
|
||||||
|
{
|
||||||
|
UseScratchRegisterScope temps(&assm);
|
||||||
|
|
||||||
|
SwVfpRegister s1_scratch = temps.AcquireS();
|
||||||
|
CHECK_EQ(s1_scratch, s28);
|
||||||
|
|
||||||
|
SwVfpRegister s2_scratch = temps.AcquireS();
|
||||||
|
CHECK_EQ(s2_scratch, s29);
|
||||||
|
|
||||||
|
if (CpuFeatures::IsSupported(VFP32DREGS)) {
|
||||||
|
DwVfpRegister d_scratch = temps.AcquireD();
|
||||||
|
CHECK_EQ(d_scratch, d15);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
CHECK_EQ(*assm.GetScratchVfpRegisterList(), orig_scratches);
|
||||||
|
|
||||||
|
{
|
||||||
|
UseScratchRegisterScope temps(&assm);
|
||||||
|
|
||||||
|
DwVfpRegister d_scratch = temps.AcquireD();
|
||||||
|
CHECK_EQ(d_scratch, d14);
|
||||||
|
|
||||||
|
if (CpuFeatures::IsSupported(VFP32DREGS)) {
|
||||||
|
SwVfpRegister s1_scratch = temps.AcquireS();
|
||||||
|
CHECK_EQ(s1_scratch, s30);
|
||||||
|
|
||||||
|
SwVfpRegister s2_scratch = temps.AcquireS();
|
||||||
|
CHECK_EQ(s2_scratch, s31);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
CHECK_EQ(*assm.GetScratchVfpRegisterList(), orig_scratches);
|
||||||
|
|
||||||
|
{
|
||||||
|
UseScratchRegisterScope temps(&assm);
|
||||||
|
|
||||||
|
DwVfpRegister d1_scratch = temps.AcquireD();
|
||||||
|
CHECK_EQ(d1_scratch, d14);
|
||||||
|
|
||||||
|
if (CpuFeatures::IsSupported(VFP32DREGS)) {
|
||||||
|
DwVfpRegister d2_scratch = temps.AcquireD();
|
||||||
|
CHECK_EQ(d2_scratch, d15);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
CHECK_EQ(*assm.GetScratchVfpRegisterList(), orig_scratches);
|
||||||
|
|
||||||
|
if (CpuFeatures::IsSupported(NEON)) {
|
||||||
|
UseScratchRegisterScope temps(&assm);
|
||||||
|
|
||||||
|
QwNeonRegister q_scratch = temps.AcquireQ();
|
||||||
|
CHECK_EQ(q_scratch, q7);
|
||||||
|
}
|
||||||
|
|
||||||
|
CHECK_EQ(*assm.GetScratchVfpRegisterList(), orig_scratches);
|
||||||
|
}
|
||||||
|
|
||||||
TEST(split_add_immediate) {
|
TEST(split_add_immediate) {
|
||||||
CcTest::InitializeVM();
|
CcTest::InitializeVM();
|
||||||
Isolate* isolate = CcTest::i_isolate();
|
Isolate* isolate = CcTest::i_isolate();
|
||||||
|
Loading…
Reference in New Issue
Block a user