[runtime-profiler] Use the OSR cache earlier if enter loop range of OSR cache

If function's SFI has OSR cache, once enter loop range of OSR cache, set
OSR loop nesting level for matching condition of OSR (loop_depth <
osr_level), soon later OSR will be triggered when executing bytecode
JumpLoop which is entry of the OSR cache, then hit the OSR cache.
This CL can improve JetStream2 case gaussian-blur by ~3%, it's
introduced by 18 profiler ticks earlier use OSR code cache.

Change-Id: Ibf404d74a4a32bc34974f129828c594c9d551355
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3379240
Reviewed-by: Tobias Tebbi <tebbi@chromium.org>
Reviewed-by: Jakob Gruber <jgruber@chromium.org>
Commit-Queue: Tao Pan <tao.pan@intel.com>
Cr-Commit-Position: refs/heads/main@{#79064}
This commit is contained in:
Pan, Tao 2022-02-14 10:11:19 +08:00 committed by V8 LUCI CQ
parent d89579b3ce
commit ba94a6c149
7 changed files with 106 additions and 11 deletions

View File

@ -150,8 +150,8 @@ void RuntimeProfiler::MaybeOptimizeFrame(JSFunction function,
}
}
OptimizationReason reason =
ShouldOptimize(function, function.shared().GetBytecodeArray(isolate_));
OptimizationReason reason = ShouldOptimize(
function, function.shared().GetBytecodeArray(isolate_), frame);
if (reason != OptimizationReason::kDoNotOptimize) {
Optimize(function, reason, code_kind);
@ -183,10 +183,36 @@ bool ShouldOptimizeAsSmallFunction(int bytecode_size, bool any_ic_changed) {
} // namespace
OptimizationReason RuntimeProfiler::ShouldOptimize(JSFunction function,
BytecodeArray bytecode) {
BytecodeArray bytecode,
JavaScriptFrame* frame) {
if (function.ActiveTierIsTurbofan()) {
return OptimizationReason::kDoNotOptimize;
}
// If function's SFI has OSR cache, once enter loop range of OSR cache, set
// OSR loop nesting level for matching condition of OSR (loop_depth <
// osr_level), soon later OSR will be triggered when executing bytecode
// JumpLoop which is entry of the OSR cache, then hit the OSR cache.
if (V8_UNLIKELY(function.shared().osr_code_cache_state() > kNotCached) &&
frame->is_unoptimized()) {
int current_offset =
static_cast<UnoptimizedFrame*>(frame)->GetBytecodeOffset();
OSROptimizedCodeCache cache =
function.context().native_context().GetOSROptimizedCodeCache();
std::vector<int> bytecode_offsets =
cache.GetBytecodeOffsetsFromSFI(function.shared());
interpreter::BytecodeArrayIterator iterator(
Handle<BytecodeArray>(bytecode, isolate_));
for (int jump_offset : bytecode_offsets) {
iterator.SetOffset(jump_offset);
int jump_target_offset = iterator.GetJumpTargetOffset();
if (jump_offset >= current_offset &&
current_offset >= jump_target_offset) {
bytecode.set_osr_loop_nesting_level(iterator.GetImmediateOperand(1) +
1);
return OptimizationReason::kHotAndStable;
}
}
}
const int ticks = function.feedback_vector().profiler_ticks();
const int ticks_for_optimization =
FLAG_ticks_before_optimization +

View File

@ -47,7 +47,8 @@ class RuntimeProfiler {
// optimization attempts should be made.
bool MaybeOSR(JSFunction function, UnoptimizedFrame* frame);
OptimizationReason ShouldOptimize(JSFunction function,
BytecodeArray bytecode_array);
BytecodeArray bytecode_array,
JavaScriptFrame* frame);
void Optimize(JSFunction function, OptimizationReason reason,
CodeKind code_kind);
void Baseline(JSFunction function, OptimizationReason reason);

View File

@ -122,6 +122,18 @@ void OSROptimizedCodeCache::EvictMarkedCode(Isolate* isolate) {
}
}
std::vector<int> OSROptimizedCodeCache::GetBytecodeOffsetsFromSFI(
SharedFunctionInfo shared) {
std::vector<int> bytecode_offsets;
DisallowGarbageCollection gc;
for (int index = 0; index < length(); index += kEntryLength) {
if (GetSFIFromEntry(index) == shared) {
bytecode_offsets.push_back(GetBytecodeOffsetFromEntry(index).ToInt());
}
}
return bytecode_offsets;
}
int OSROptimizedCodeCache::GrowOSRCache(
Handle<NativeContext> native_context,
Handle<OSROptimizedCodeCache>* osr_cache) {
@ -179,12 +191,26 @@ int OSROptimizedCodeCache::FindEntry(Handle<SharedFunctionInfo> shared,
}
void OSROptimizedCodeCache::ClearEntry(int index, Isolate* isolate) {
Set(index + OSRCodeCacheConstants::kSharedOffset,
HeapObjectReference::ClearedValue(isolate));
Set(index + OSRCodeCacheConstants::kCachedCodeOffset,
HeapObjectReference::ClearedValue(isolate));
Set(index + OSRCodeCacheConstants::kOsrIdOffset,
HeapObjectReference::ClearedValue(isolate));
SharedFunctionInfo shared = GetSFIFromEntry(index);
DCHECK_GT(shared.osr_code_cache_state(), kNotCached);
if (V8_LIKELY(shared.osr_code_cache_state() == kCachedOnce)) {
shared.set_osr_code_cache_state(kNotCached);
} else if (shared.osr_code_cache_state() == kCachedMultiple) {
int osr_code_cache_count = 0;
for (int index = 0; index < length(); index += kEntryLength) {
if (GetSFIFromEntry(index) == shared) {
osr_code_cache_count++;
}
}
if (osr_code_cache_count == 2) {
shared.set_osr_code_cache_state(kCachedOnce);
}
}
HeapObjectReference cleared_value =
HeapObjectReference::ClearedValue(isolate);
Set(index + OSRCodeCacheConstants::kSharedOffset, cleared_value);
Set(index + OSRCodeCacheConstants::kCachedCodeOffset, cleared_value);
Set(index + OSRCodeCacheConstants::kOsrIdOffset, cleared_value);
}
void OSROptimizedCodeCache::InitializeEntry(int entry,
@ -197,6 +223,11 @@ void OSROptimizedCodeCache::InitializeEntry(int entry,
Set(entry + OSRCodeCacheConstants::kCachedCodeOffset, weak_code_entry);
Set(entry + OSRCodeCacheConstants::kOsrIdOffset,
MaybeObject::FromSmi(Smi::FromInt(osr_offset.ToInt())));
if (V8_LIKELY(shared.osr_code_cache_state() == kNotCached)) {
shared.set_osr_code_cache_state(kCachedOnce);
} else if (shared.osr_code_cache_state() == kCachedOnce) {
shared.set_osr_code_cache_state(kCachedMultiple);
}
}
void OSROptimizedCodeCache::MoveEntry(int src, int dst, Isolate* isolate) {
@ -205,7 +236,11 @@ void OSROptimizedCodeCache::MoveEntry(int src, int dst, Isolate* isolate) {
Set(dst + OSRCodeCacheConstants::kCachedCodeOffset,
Get(src + OSRCodeCacheConstants::kCachedCodeOffset));
Set(dst + OSRCodeCacheConstants::kOsrIdOffset, Get(src + kOsrIdOffset));
ClearEntry(src, isolate);
HeapObjectReference cleared_value =
HeapObjectReference::ClearedValue(isolate);
Set(src + OSRCodeCacheConstants::kSharedOffset, cleared_value);
Set(src + OSRCodeCacheConstants::kCachedCodeOffset, cleared_value);
Set(src + OSRCodeCacheConstants::kOsrIdOffset, cleared_value);
}
int OSROptimizedCodeCache::CapacityForLength(int curr_length) {

View File

@ -12,6 +12,17 @@
namespace v8 {
namespace internal {
// This enum are states that how many OSR code caches belong to a SFI. Without
// this enum, need to check all OSR code cache entries to know whether a
// JSFunction's SFI has OSR code cache. The enum value kCachedMultiple is for
// doing time-consuming loop check only when the very unlikely state change
// kCachedMultiple -> { kCachedOnce | kCachedMultiple }.
enum OSRCodeCacheStateOfSFI : uint8_t {
kNotCached, // Likely state, no OSR code cache
kCachedOnce, // Unlikely state, one OSR code cache
kCachedMultiple, // Very unlikely state, multiple OSR code caches
};
class V8_EXPORT OSROptimizedCodeCache : public WeakFixedArray {
public:
DECL_CAST(OSROptimizedCodeCache)
@ -48,6 +59,10 @@ class V8_EXPORT OSROptimizedCodeCache : public WeakFixedArray {
// Remove all code objects marked for deoptimization from OSR code cache.
void EvictMarkedCode(Isolate* isolate);
// Returns vector of bytecode offsets corresponding to the shared function
// |shared|
std::vector<int> GetBytecodeOffsetsFromSFI(SharedFunctionInfo shared);
private:
// Functions that implement heuristics on when to grow / shrink the cache.
static int CapacityForLength(int curr_capacity);

View File

@ -302,6 +302,17 @@ BailoutReason SharedFunctionInfo::disabled_optimization_reason() const {
return DisabledOptimizationReasonBits::decode(flags(kRelaxedLoad));
}
OSRCodeCacheStateOfSFI SharedFunctionInfo::osr_code_cache_state() const {
return OsrCodeCacheStateBits::decode(flags(kRelaxedLoad));
}
void SharedFunctionInfo::set_osr_code_cache_state(
OSRCodeCacheStateOfSFI state) {
int hints = flags(kRelaxedLoad);
hints = OsrCodeCacheStateBits::update(hints, state);
set_flags(hints, kRelaxedStore);
}
LanguageMode SharedFunctionInfo::language_mode() const {
STATIC_ASSERT(LanguageModeSize == 2);
return construct_language_mode(IsStrictBit::decode(flags(kRelaxedLoad)));

View File

@ -15,6 +15,7 @@
#include "src/objects/function-kind.h"
#include "src/objects/function-syntax-kind.h"
#include "src/objects/objects.h"
#include "src/objects/osr-optimized-code-cache.h"
#include "src/objects/script.h"
#include "src/objects/slots.h"
#include "src/objects/smi.h"
@ -520,6 +521,10 @@ class SharedFunctionInfo
// shared function info.
void DisableOptimization(BailoutReason reason);
inline OSRCodeCacheStateOfSFI osr_code_cache_state() const;
inline void set_osr_code_cache_state(OSRCodeCacheStateOfSFI state);
// This class constructor needs to call out to an instance fields
// initializer. This flag is set when creating the
// SharedFunctionInfo as a reminder to emit the initializer call

View File

@ -17,6 +17,7 @@ extern class InterpreterData extends Struct {
type FunctionKind extends uint8 constexpr 'FunctionKind';
type FunctionSyntaxKind extends uint8 constexpr 'FunctionSyntaxKind';
type BailoutReason extends uint8 constexpr 'BailoutReason';
type OSRCodeCacheStateOfSFI extends uint8 constexpr 'OSRCodeCacheStateOfSFI';
bitfield struct SharedFunctionInfoFlags extends uint32 {
// Have FunctionKind first to make it cheaper to access.
@ -37,6 +38,7 @@ bitfield struct SharedFunctionInfoFlags extends uint32 {
is_top_level: bool: 1 bit;
properties_are_final: bool: 1 bit;
private_name_lookup_skips_outer_class: bool: 1 bit;
osr_code_cache_state: OSRCodeCacheStateOfSFI: 2 bit;
}
bitfield struct SharedFunctionInfoFlags2 extends uint8 {