From 0dbcfe1fdeb0a93054a3e825cfbc186a65b53282 Mon Sep 17 00:00:00 2001 From: Lu Yahan Date: Thu, 16 Dec 2021 17:19:14 +0800 Subject: [PATCH] [riscv64] Improve unaligned memory accesses This commit allows using unaligned load/store, which is more efficient for 2 bytes,4 bytes and 8 bytes memory access. Use RISCV_HAS_NO_UNALIGNED to control whether enable the fast path or not. Change-Id: I1d321e6e5fa5bc31541c8dbfe582881d80743483 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3329803 Reviewed-by: ji qiu Commit-Queue: Yahan Lu Cr-Commit-Position: refs/heads/main@{#78427} --- .../riscv64/instruction-selector-riscv64.cc | 5 +++ src/execution/riscv64/simulator-riscv64.cc | 18 ++++---- .../riscv64/regexp-macro-assembler-riscv64.cc | 44 ++++++++++++++----- .../riscv64/regexp-macro-assembler-riscv64.h | 3 +- .../riscv64/liftoff-assembler-riscv64.h | 34 +++++++------- .../instruction-selector-riscv64-unittest.cc | 32 +++++++------- 6 files changed, 82 insertions(+), 54 deletions(-) diff --git a/src/compiler/backend/riscv64/instruction-selector-riscv64.cc b/src/compiler/backend/riscv64/instruction-selector-riscv64.cc index 79f6b9288c..bc221d8872 100644 --- a/src/compiler/backend/riscv64/instruction-selector-riscv64.cc +++ b/src/compiler/backend/riscv64/instruction-selector-riscv64.cc @@ -3306,8 +3306,13 @@ InstructionSelector::SupportedMachineOperatorFlags() { // static MachineOperatorBuilder::AlignmentRequirements InstructionSelector::AlignmentRequirements() { +#ifdef RISCV_HAS_NO_UNALIGNED return MachineOperatorBuilder::AlignmentRequirements:: NoUnalignedAccessSupport(); +#else + return MachineOperatorBuilder::AlignmentRequirements:: + FullUnalignedAccessSupport(); +#endif } #undef SIMD_BINOP_LIST diff --git a/src/execution/riscv64/simulator-riscv64.cc b/src/execution/riscv64/simulator-riscv64.cc index 479c4b6a2f..6f323016bc 100644 --- a/src/execution/riscv64/simulator-riscv64.cc +++ b/src/execution/riscv64/simulator-riscv64.cc @@ -2413,14 +2413,14 @@ T Simulator::ReadMem(int64_t addr, Instruction* instr) { addr, reinterpret_cast(instr)); DieOrDebug(); } -#ifndef V8_COMPRESS_POINTERS // TODO(RISCV): v8:11812 - // // check for natural alignment - // if (!FLAG_riscv_c_extension && ((addr & (sizeof(T) - 1)) != 0)) { - // PrintF("Unaligned read at 0x%08" PRIx64 " , pc=0x%08" V8PRIxPTR "\n", - // addr, - // reinterpret_cast(instr)); - // DieOrDebug(); - // } +#if !defined(V8_COMPRESS_POINTERS) && defined(RISCV_HAS_NO_UNALIGNED) + // check for natural alignment + if (!FLAG_riscv_c_extension && ((addr & (sizeof(T) - 1)) != 0)) { + PrintF("Unaligned read at 0x%08" PRIx64 " , pc=0x%08" V8PRIxPTR "\n", + addr, + reinterpret_cast(instr)); + DieOrDebug(); + } #endif T* ptr = reinterpret_cast(addr); T value = *ptr; @@ -2436,7 +2436,7 @@ void Simulator::WriteMem(int64_t addr, T value, Instruction* instr) { addr, reinterpret_cast(instr)); DieOrDebug(); } -#ifndef V8_COMPRESS_POINTERS // TODO(RISCV): v8:11812 +#if !defined(V8_COMPRESS_POINTERS) && defined(RISCV_HAS_NO_UNALIGNED) // check for natural alignment if (!FLAG_riscv_c_extension && ((addr & (sizeof(T) - 1)) != 0)) { PrintF("Unaligned write at 0x%08" PRIx64 " , pc=0x%08" V8PRIxPTR "\n", addr, diff --git a/src/regexp/riscv64/regexp-macro-assembler-riscv64.cc b/src/regexp/riscv64/regexp-macro-assembler-riscv64.cc index dda2104feb..50fa5f119e 100644 --- a/src/regexp/riscv64/regexp-macro-assembler-riscv64.cc +++ b/src/regexp/riscv64/regexp-macro-assembler-riscv64.cc @@ -1144,9 +1144,9 @@ void RegExpMacroAssemblerRISCV::ClearRegisters(int reg_from, int reg_to) { __ Sd(a0, register_location(reg)); } } - +#ifdef RISCV_HAS_NO_UNALIGNED bool RegExpMacroAssemblerRISCV::CanReadUnaligned() const { return false; } - +#endif // Private methods: void RegExpMacroAssemblerRISCV::CallCheckStackGuardState(Register scratch) { @@ -1328,20 +1328,40 @@ void RegExpMacroAssemblerRISCV::CheckStackLimit() { void RegExpMacroAssemblerRISCV::LoadCurrentCharacterUnchecked(int cp_offset, int characters) { Register offset = current_input_offset(); - if (cp_offset != 0) { - // s3 is not being used to store the capture start index at this point. - __ Add64(s3, current_input_offset(), Operand(cp_offset * char_size())); - offset = s3; + + // If unaligned load/stores are not supported then this function must only + // be used to load a single character at a time. + if (!CanReadUnaligned()) { + DCHECK_EQ(1, characters); } - // We assume that we cannot do unaligned loads on RISC-V, so this function - // must only be used to load a single character at a time. - DCHECK_EQ(1, characters); - __ Add64(t1, end_of_input_address(), Operand(offset)); + if (cp_offset != 0) { + // t3 is not being used to store the capture start index at this point. + __ Add64(t3, current_input_offset(), Operand(cp_offset * char_size())); + offset = t3; + } + if (mode_ == LATIN1) { - __ Lbu(current_character(), MemOperand(t1, 0)); + if (characters == 4) { + __ Add64(kScratchReg, end_of_input_address(), offset); + __ Lwu(current_character(), MemOperand(kScratchReg)); + } else if (characters == 2) { + __ Add64(kScratchReg, end_of_input_address(), offset); + __ Lhu(current_character(), MemOperand(kScratchReg)); + } else { + DCHECK_EQ(1, characters); + __ Add64(kScratchReg, end_of_input_address(), offset); + __ Lbu(current_character(), MemOperand(kScratchReg)); + } } else { DCHECK(mode_ == UC16); - __ Lhu(current_character(), MemOperand(t1, 0)); + if (characters == 2) { + __ Add64(kScratchReg, end_of_input_address(), offset); + __ Lwu(current_character(), MemOperand(kScratchReg)); + } else { + DCHECK_EQ(1, characters); + __ Add64(kScratchReg, end_of_input_address(), offset); + __ Lhu(current_character(), MemOperand(kScratchReg)); + } } } diff --git a/src/regexp/riscv64/regexp-macro-assembler-riscv64.h b/src/regexp/riscv64/regexp-macro-assembler-riscv64.h index 121569849a..7613b47b3e 100644 --- a/src/regexp/riscv64/regexp-macro-assembler-riscv64.h +++ b/src/regexp/riscv64/regexp-macro-assembler-riscv64.h @@ -83,8 +83,9 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerRISCV void WriteCurrentPositionToRegister(int reg, int cp_offset) override; void ClearRegisters(int reg_from, int reg_to) override; void WriteStackPointerToRegister(int reg) override; +#ifdef RISCV_HAS_NO_UNALIGNED bool CanReadUnaligned() const override; - +#endif // Called from RegExp if the stack-guard is triggered. // If the code object is relocated, the return address is fixed before // returning. diff --git a/src/wasm/baseline/riscv64/liftoff-assembler-riscv64.h b/src/wasm/baseline/riscv64/liftoff-assembler-riscv64.h index e53797ff74..83aea99218 100644 --- a/src/wasm/baseline/riscv64/liftoff-assembler-riscv64.h +++ b/src/wasm/baseline/riscv64/liftoff-assembler-riscv64.h @@ -123,19 +123,19 @@ inline void Store(LiftoffAssembler* assm, Register base, int32_t offset, MemOperand dst(base, offset); switch (kind) { case kI32: - assm->Usw(src.gp(), dst); + assm->Sw(src.gp(), dst); break; case kI64: case kOptRef: case kRef: case kRtt: - assm->Usd(src.gp(), dst); + assm->Sd(src.gp(), dst); break; case kF32: - assm->UStoreFloat(src.fp(), dst, kScratchReg); + assm->StoreFloat(src.fp(), dst); break; case kF64: - assm->UStoreDouble(src.fp(), dst, kScratchReg); + assm->StoreDouble(src.fp(), dst); break; default: UNREACHABLE(); @@ -539,27 +539,27 @@ void LiftoffAssembler::Load(LiftoffRegister dst, Register src_addr, break; case LoadType::kI32Load16U: case LoadType::kI64Load16U: - TurboAssembler::Ulhu(dst.gp(), src_op); + TurboAssembler::Lhu(dst.gp(), src_op); break; case LoadType::kI32Load16S: case LoadType::kI64Load16S: - TurboAssembler::Ulh(dst.gp(), src_op); + TurboAssembler::Lh(dst.gp(), src_op); break; case LoadType::kI64Load32U: - TurboAssembler::Ulwu(dst.gp(), src_op); + TurboAssembler::Lwu(dst.gp(), src_op); break; case LoadType::kI32Load: case LoadType::kI64Load32S: - TurboAssembler::Ulw(dst.gp(), src_op); + TurboAssembler::Lw(dst.gp(), src_op); break; case LoadType::kI64Load: - TurboAssembler::Uld(dst.gp(), src_op); + TurboAssembler::Ld(dst.gp(), src_op); break; case LoadType::kF32Load: - TurboAssembler::ULoadFloat(dst.fp(), src_op, kScratchReg); + TurboAssembler::LoadFloat(dst.fp(), src_op); break; case LoadType::kF64Load: - TurboAssembler::ULoadDouble(dst.fp(), src_op, kScratchReg); + TurboAssembler::LoadDouble(dst.fp(), src_op); break; case LoadType::kS128Load: { VU.set(kScratchReg, E8, m1); @@ -610,20 +610,20 @@ void LiftoffAssembler::Store(Register dst_addr, Register offset_reg, break; case StoreType::kI32Store16: case StoreType::kI64Store16: - TurboAssembler::Ush(src.gp(), dst_op); + TurboAssembler::Sh(src.gp(), dst_op); break; case StoreType::kI32Store: case StoreType::kI64Store32: - TurboAssembler::Usw(src.gp(), dst_op); + TurboAssembler::Sw(src.gp(), dst_op); break; case StoreType::kI64Store: - TurboAssembler::Usd(src.gp(), dst_op); + TurboAssembler::Sd(src.gp(), dst_op); break; case StoreType::kF32Store: - TurboAssembler::UStoreFloat(src.fp(), dst_op, kScratchReg); + TurboAssembler::StoreFloat(src.fp(), dst_op); break; case StoreType::kF64Store: - TurboAssembler::UStoreDouble(src.fp(), dst_op, kScratchReg); + TurboAssembler::StoreDouble(src.fp(), dst_op); break; case StoreType::kS128Store: { VU.set(kScratchReg, E8, m1); @@ -3575,7 +3575,7 @@ void LiftoffAssembler::emit_s128_set_if_nan(Register dst, LiftoffRegister src, } void LiftoffAssembler::StackCheck(Label* ool_code, Register limit_address) { - TurboAssembler::Uld(limit_address, MemOperand(limit_address)); + TurboAssembler::Ld(limit_address, MemOperand(limit_address)); TurboAssembler::Branch(ool_code, ule, sp, Operand(limit_address)); } diff --git a/test/unittests/compiler/riscv64/instruction-selector-riscv64-unittest.cc b/test/unittests/compiler/riscv64/instruction-selector-riscv64-unittest.cc index ed657d9c4c..d578111829 100644 --- a/test/unittests/compiler/riscv64/instruction-selector-riscv64-unittest.cc +++ b/test/unittests/compiler/riscv64/instruction-selector-riscv64-unittest.cc @@ -1056,19 +1056,6 @@ std::ostream& operator<<(std::ostream& os, const MemoryAccessImm1& acc) { return os << acc.type; } -struct MemoryAccessImm2 { - MachineType type; - ArchOpcode store_opcode; - ArchOpcode store_opcode_unaligned; - bool (InstructionSelectorTest::Stream::*val_predicate)( - const InstructionOperand*) const; - const int32_t immediates[40]; -}; - -std::ostream& operator<<(std::ostream& os, const MemoryAccessImm2& acc) { - return os << acc.type; -} - // ---------------------------------------------------------------------------- // Loads and stores immediate values // ---------------------------------------------------------------------------- @@ -1181,6 +1168,20 @@ const MemoryAccessImm1 kMemoryAccessImmMoreThan16bit[] = { &InstructionSelectorTest::Stream::IsInteger, {-65000, -55000, 32777, 55000, 65000}}}; +#ifdef RISCV_HAS_NO_UNALIGNED +struct MemoryAccessImm2 { + MachineType type; + ArchOpcode store_opcode; + ArchOpcode store_opcode_unaligned; + bool (InstructionSelectorTest::Stream::*val_predicate)( + const InstructionOperand*) const; + const int32_t immediates[40]; +}; + +std::ostream& operator<<(std::ostream& os, const MemoryAccessImm2& acc) { + return os << acc.type; +} + const MemoryAccessImm2 kMemoryAccessesImmUnaligned[] = { {MachineType::Int16(), kRiscvUsh, @@ -1222,7 +1223,7 @@ const MemoryAccessImm2 kMemoryAccessesImmUnaligned[] = { -89, -87, -86, -82, -44, -23, -3, 0, 7, 10, 39, 52, 69, 71, 91, 92, 107, 109, 115, 124, 286, 655, 1362, 1569, 2587, 3067, 3096, 3462, 3510, 4095}}}; - +#endif } // namespace using InstructionSelectorMemoryAccessTest = @@ -1327,6 +1328,7 @@ INSTANTIATE_TEST_SUITE_P(InstructionSelectorTest, InstructionSelectorMemoryAccessImmTest, ::testing::ValuesIn(kMemoryAccessesImm)); +#ifdef RISCV_HAS_NO_UNALIGNED using InstructionSelectorMemoryAccessUnalignedImmTest = InstructionSelectorTestWithParam; @@ -1358,7 +1360,7 @@ TEST_P(InstructionSelectorMemoryAccessUnalignedImmTest, StoreZero) { INSTANTIATE_TEST_SUITE_P(InstructionSelectorTest, InstructionSelectorMemoryAccessUnalignedImmTest, ::testing::ValuesIn(kMemoryAccessesImmUnaligned)); - +#endif // ---------------------------------------------------------------------------- // Load/store offsets more than 16 bits. // ----------------------------------------------------------------------------