[riscv64] Improve unaligned memory accesses

This commit allows using unaligned load/store, which is more efficient
for 2 bytes,4 bytes and 8 bytes memory access.
Use RISCV_HAS_NO_UNALIGNED to control whether enable the fast path or not.

Change-Id: I1d321e6e5fa5bc31541c8dbfe582881d80743483
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3329803
Reviewed-by: ji qiu <qiuji@iscas.ac.cn>
Commit-Queue: Yahan Lu <yahan@iscas.ac.cn>
Cr-Commit-Position: refs/heads/main@{#78427}
This commit is contained in:
Lu Yahan 2021-12-16 17:19:14 +08:00 committed by V8 LUCI CQ
parent 64a23393f7
commit 0dbcfe1fde
6 changed files with 82 additions and 54 deletions

View File

@ -3306,8 +3306,13 @@ InstructionSelector::SupportedMachineOperatorFlags() {
// static
MachineOperatorBuilder::AlignmentRequirements
InstructionSelector::AlignmentRequirements() {
#ifdef RISCV_HAS_NO_UNALIGNED
return MachineOperatorBuilder::AlignmentRequirements::
NoUnalignedAccessSupport();
#else
return MachineOperatorBuilder::AlignmentRequirements::
FullUnalignedAccessSupport();
#endif
}
#undef SIMD_BINOP_LIST

View File

@ -2413,14 +2413,14 @@ T Simulator::ReadMem(int64_t addr, Instruction* instr) {
addr, reinterpret_cast<intptr_t>(instr));
DieOrDebug();
}
#ifndef V8_COMPRESS_POINTERS // TODO(RISCV): v8:11812
// // check for natural alignment
// if (!FLAG_riscv_c_extension && ((addr & (sizeof(T) - 1)) != 0)) {
// PrintF("Unaligned read at 0x%08" PRIx64 " , pc=0x%08" V8PRIxPTR "\n",
// addr,
// reinterpret_cast<intptr_t>(instr));
// DieOrDebug();
// }
#if !defined(V8_COMPRESS_POINTERS) && defined(RISCV_HAS_NO_UNALIGNED)
// check for natural alignment
if (!FLAG_riscv_c_extension && ((addr & (sizeof(T) - 1)) != 0)) {
PrintF("Unaligned read at 0x%08" PRIx64 " , pc=0x%08" V8PRIxPTR "\n",
addr,
reinterpret_cast<intptr_t>(instr));
DieOrDebug();
}
#endif
T* ptr = reinterpret_cast<T*>(addr);
T value = *ptr;
@ -2436,7 +2436,7 @@ void Simulator::WriteMem(int64_t addr, T value, Instruction* instr) {
addr, reinterpret_cast<intptr_t>(instr));
DieOrDebug();
}
#ifndef V8_COMPRESS_POINTERS // TODO(RISCV): v8:11812
#if !defined(V8_COMPRESS_POINTERS) && defined(RISCV_HAS_NO_UNALIGNED)
// check for natural alignment
if (!FLAG_riscv_c_extension && ((addr & (sizeof(T) - 1)) != 0)) {
PrintF("Unaligned write at 0x%08" PRIx64 " , pc=0x%08" V8PRIxPTR "\n", addr,

View File

@ -1144,9 +1144,9 @@ void RegExpMacroAssemblerRISCV::ClearRegisters(int reg_from, int reg_to) {
__ Sd(a0, register_location(reg));
}
}
#ifdef RISCV_HAS_NO_UNALIGNED
bool RegExpMacroAssemblerRISCV::CanReadUnaligned() const { return false; }
#endif
// Private methods:
void RegExpMacroAssemblerRISCV::CallCheckStackGuardState(Register scratch) {
@ -1328,20 +1328,40 @@ void RegExpMacroAssemblerRISCV::CheckStackLimit() {
void RegExpMacroAssemblerRISCV::LoadCurrentCharacterUnchecked(int cp_offset,
int characters) {
Register offset = current_input_offset();
if (cp_offset != 0) {
// s3 is not being used to store the capture start index at this point.
__ Add64(s3, current_input_offset(), Operand(cp_offset * char_size()));
offset = s3;
// If unaligned load/stores are not supported then this function must only
// be used to load a single character at a time.
if (!CanReadUnaligned()) {
DCHECK_EQ(1, characters);
}
// We assume that we cannot do unaligned loads on RISC-V, so this function
// must only be used to load a single character at a time.
DCHECK_EQ(1, characters);
__ Add64(t1, end_of_input_address(), Operand(offset));
if (cp_offset != 0) {
// t3 is not being used to store the capture start index at this point.
__ Add64(t3, current_input_offset(), Operand(cp_offset * char_size()));
offset = t3;
}
if (mode_ == LATIN1) {
__ Lbu(current_character(), MemOperand(t1, 0));
if (characters == 4) {
__ Add64(kScratchReg, end_of_input_address(), offset);
__ Lwu(current_character(), MemOperand(kScratchReg));
} else if (characters == 2) {
__ Add64(kScratchReg, end_of_input_address(), offset);
__ Lhu(current_character(), MemOperand(kScratchReg));
} else {
DCHECK_EQ(1, characters);
__ Add64(kScratchReg, end_of_input_address(), offset);
__ Lbu(current_character(), MemOperand(kScratchReg));
}
} else {
DCHECK(mode_ == UC16);
__ Lhu(current_character(), MemOperand(t1, 0));
if (characters == 2) {
__ Add64(kScratchReg, end_of_input_address(), offset);
__ Lwu(current_character(), MemOperand(kScratchReg));
} else {
DCHECK_EQ(1, characters);
__ Add64(kScratchReg, end_of_input_address(), offset);
__ Lhu(current_character(), MemOperand(kScratchReg));
}
}
}

View File

@ -83,8 +83,9 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerRISCV
void WriteCurrentPositionToRegister(int reg, int cp_offset) override;
void ClearRegisters(int reg_from, int reg_to) override;
void WriteStackPointerToRegister(int reg) override;
#ifdef RISCV_HAS_NO_UNALIGNED
bool CanReadUnaligned() const override;
#endif
// Called from RegExp if the stack-guard is triggered.
// If the code object is relocated, the return address is fixed before
// returning.

View File

@ -123,19 +123,19 @@ inline void Store(LiftoffAssembler* assm, Register base, int32_t offset,
MemOperand dst(base, offset);
switch (kind) {
case kI32:
assm->Usw(src.gp(), dst);
assm->Sw(src.gp(), dst);
break;
case kI64:
case kOptRef:
case kRef:
case kRtt:
assm->Usd(src.gp(), dst);
assm->Sd(src.gp(), dst);
break;
case kF32:
assm->UStoreFloat(src.fp(), dst, kScratchReg);
assm->StoreFloat(src.fp(), dst);
break;
case kF64:
assm->UStoreDouble(src.fp(), dst, kScratchReg);
assm->StoreDouble(src.fp(), dst);
break;
default:
UNREACHABLE();
@ -539,27 +539,27 @@ void LiftoffAssembler::Load(LiftoffRegister dst, Register src_addr,
break;
case LoadType::kI32Load16U:
case LoadType::kI64Load16U:
TurboAssembler::Ulhu(dst.gp(), src_op);
TurboAssembler::Lhu(dst.gp(), src_op);
break;
case LoadType::kI32Load16S:
case LoadType::kI64Load16S:
TurboAssembler::Ulh(dst.gp(), src_op);
TurboAssembler::Lh(dst.gp(), src_op);
break;
case LoadType::kI64Load32U:
TurboAssembler::Ulwu(dst.gp(), src_op);
TurboAssembler::Lwu(dst.gp(), src_op);
break;
case LoadType::kI32Load:
case LoadType::kI64Load32S:
TurboAssembler::Ulw(dst.gp(), src_op);
TurboAssembler::Lw(dst.gp(), src_op);
break;
case LoadType::kI64Load:
TurboAssembler::Uld(dst.gp(), src_op);
TurboAssembler::Ld(dst.gp(), src_op);
break;
case LoadType::kF32Load:
TurboAssembler::ULoadFloat(dst.fp(), src_op, kScratchReg);
TurboAssembler::LoadFloat(dst.fp(), src_op);
break;
case LoadType::kF64Load:
TurboAssembler::ULoadDouble(dst.fp(), src_op, kScratchReg);
TurboAssembler::LoadDouble(dst.fp(), src_op);
break;
case LoadType::kS128Load: {
VU.set(kScratchReg, E8, m1);
@ -610,20 +610,20 @@ void LiftoffAssembler::Store(Register dst_addr, Register offset_reg,
break;
case StoreType::kI32Store16:
case StoreType::kI64Store16:
TurboAssembler::Ush(src.gp(), dst_op);
TurboAssembler::Sh(src.gp(), dst_op);
break;
case StoreType::kI32Store:
case StoreType::kI64Store32:
TurboAssembler::Usw(src.gp(), dst_op);
TurboAssembler::Sw(src.gp(), dst_op);
break;
case StoreType::kI64Store:
TurboAssembler::Usd(src.gp(), dst_op);
TurboAssembler::Sd(src.gp(), dst_op);
break;
case StoreType::kF32Store:
TurboAssembler::UStoreFloat(src.fp(), dst_op, kScratchReg);
TurboAssembler::StoreFloat(src.fp(), dst_op);
break;
case StoreType::kF64Store:
TurboAssembler::UStoreDouble(src.fp(), dst_op, kScratchReg);
TurboAssembler::StoreDouble(src.fp(), dst_op);
break;
case StoreType::kS128Store: {
VU.set(kScratchReg, E8, m1);
@ -3575,7 +3575,7 @@ void LiftoffAssembler::emit_s128_set_if_nan(Register dst, LiftoffRegister src,
}
void LiftoffAssembler::StackCheck(Label* ool_code, Register limit_address) {
TurboAssembler::Uld(limit_address, MemOperand(limit_address));
TurboAssembler::Ld(limit_address, MemOperand(limit_address));
TurboAssembler::Branch(ool_code, ule, sp, Operand(limit_address));
}

View File

@ -1056,19 +1056,6 @@ std::ostream& operator<<(std::ostream& os, const MemoryAccessImm1& acc) {
return os << acc.type;
}
struct MemoryAccessImm2 {
MachineType type;
ArchOpcode store_opcode;
ArchOpcode store_opcode_unaligned;
bool (InstructionSelectorTest::Stream::*val_predicate)(
const InstructionOperand*) const;
const int32_t immediates[40];
};
std::ostream& operator<<(std::ostream& os, const MemoryAccessImm2& acc) {
return os << acc.type;
}
// ----------------------------------------------------------------------------
// Loads and stores immediate values
// ----------------------------------------------------------------------------
@ -1181,6 +1168,20 @@ const MemoryAccessImm1 kMemoryAccessImmMoreThan16bit[] = {
&InstructionSelectorTest::Stream::IsInteger,
{-65000, -55000, 32777, 55000, 65000}}};
#ifdef RISCV_HAS_NO_UNALIGNED
struct MemoryAccessImm2 {
MachineType type;
ArchOpcode store_opcode;
ArchOpcode store_opcode_unaligned;
bool (InstructionSelectorTest::Stream::*val_predicate)(
const InstructionOperand*) const;
const int32_t immediates[40];
};
std::ostream& operator<<(std::ostream& os, const MemoryAccessImm2& acc) {
return os << acc.type;
}
const MemoryAccessImm2 kMemoryAccessesImmUnaligned[] = {
{MachineType::Int16(),
kRiscvUsh,
@ -1222,7 +1223,7 @@ const MemoryAccessImm2 kMemoryAccessesImmUnaligned[] = {
-89, -87, -86, -82, -44, -23, -3, 0, 7, 10,
39, 52, 69, 71, 91, 92, 107, 109, 115, 124,
286, 655, 1362, 1569, 2587, 3067, 3096, 3462, 3510, 4095}}};
#endif
} // namespace
using InstructionSelectorMemoryAccessTest =
@ -1327,6 +1328,7 @@ INSTANTIATE_TEST_SUITE_P(InstructionSelectorTest,
InstructionSelectorMemoryAccessImmTest,
::testing::ValuesIn(kMemoryAccessesImm));
#ifdef RISCV_HAS_NO_UNALIGNED
using InstructionSelectorMemoryAccessUnalignedImmTest =
InstructionSelectorTestWithParam<MemoryAccessImm2>;
@ -1358,7 +1360,7 @@ TEST_P(InstructionSelectorMemoryAccessUnalignedImmTest, StoreZero) {
INSTANTIATE_TEST_SUITE_P(InstructionSelectorTest,
InstructionSelectorMemoryAccessUnalignedImmTest,
::testing::ValuesIn(kMemoryAccessesImmUnaligned));
#endif
// ----------------------------------------------------------------------------
// Load/store offsets more than 16 bits.
// ----------------------------------------------------------------------------