[runtime-profiler] Use the OSR cache earlier if enter loop range of OSR cache
If function's SFI has OSR cache, once enter loop range of OSR cache, set OSR loop nesting level for matching condition of OSR (loop_depth < osr_level), soon later OSR will be triggered when executing bytecode JumpLoop which is entry of the OSR cache, then hit the OSR cache. This CL can improve JetStream2 case gaussian-blur by ~3%, it's introduced by 18 profiler ticks earlier use OSR code cache. Change-Id: Ibf404d74a4a32bc34974f129828c594c9d551355 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3379240 Reviewed-by: Tobias Tebbi <tebbi@chromium.org> Reviewed-by: Jakob Gruber <jgruber@chromium.org> Commit-Queue: Tao Pan <tao.pan@intel.com> Cr-Commit-Position: refs/heads/main@{#79064}
This commit is contained in:
parent
d89579b3ce
commit
ba94a6c149
@ -150,8 +150,8 @@ void RuntimeProfiler::MaybeOptimizeFrame(JSFunction function,
|
||||
}
|
||||
}
|
||||
|
||||
OptimizationReason reason =
|
||||
ShouldOptimize(function, function.shared().GetBytecodeArray(isolate_));
|
||||
OptimizationReason reason = ShouldOptimize(
|
||||
function, function.shared().GetBytecodeArray(isolate_), frame);
|
||||
|
||||
if (reason != OptimizationReason::kDoNotOptimize) {
|
||||
Optimize(function, reason, code_kind);
|
||||
@ -183,10 +183,36 @@ bool ShouldOptimizeAsSmallFunction(int bytecode_size, bool any_ic_changed) {
|
||||
} // namespace
|
||||
|
||||
OptimizationReason RuntimeProfiler::ShouldOptimize(JSFunction function,
|
||||
BytecodeArray bytecode) {
|
||||
BytecodeArray bytecode,
|
||||
JavaScriptFrame* frame) {
|
||||
if (function.ActiveTierIsTurbofan()) {
|
||||
return OptimizationReason::kDoNotOptimize;
|
||||
}
|
||||
// If function's SFI has OSR cache, once enter loop range of OSR cache, set
|
||||
// OSR loop nesting level for matching condition of OSR (loop_depth <
|
||||
// osr_level), soon later OSR will be triggered when executing bytecode
|
||||
// JumpLoop which is entry of the OSR cache, then hit the OSR cache.
|
||||
if (V8_UNLIKELY(function.shared().osr_code_cache_state() > kNotCached) &&
|
||||
frame->is_unoptimized()) {
|
||||
int current_offset =
|
||||
static_cast<UnoptimizedFrame*>(frame)->GetBytecodeOffset();
|
||||
OSROptimizedCodeCache cache =
|
||||
function.context().native_context().GetOSROptimizedCodeCache();
|
||||
std::vector<int> bytecode_offsets =
|
||||
cache.GetBytecodeOffsetsFromSFI(function.shared());
|
||||
interpreter::BytecodeArrayIterator iterator(
|
||||
Handle<BytecodeArray>(bytecode, isolate_));
|
||||
for (int jump_offset : bytecode_offsets) {
|
||||
iterator.SetOffset(jump_offset);
|
||||
int jump_target_offset = iterator.GetJumpTargetOffset();
|
||||
if (jump_offset >= current_offset &&
|
||||
current_offset >= jump_target_offset) {
|
||||
bytecode.set_osr_loop_nesting_level(iterator.GetImmediateOperand(1) +
|
||||
1);
|
||||
return OptimizationReason::kHotAndStable;
|
||||
}
|
||||
}
|
||||
}
|
||||
const int ticks = function.feedback_vector().profiler_ticks();
|
||||
const int ticks_for_optimization =
|
||||
FLAG_ticks_before_optimization +
|
||||
|
@ -47,7 +47,8 @@ class RuntimeProfiler {
|
||||
// optimization attempts should be made.
|
||||
bool MaybeOSR(JSFunction function, UnoptimizedFrame* frame);
|
||||
OptimizationReason ShouldOptimize(JSFunction function,
|
||||
BytecodeArray bytecode_array);
|
||||
BytecodeArray bytecode_array,
|
||||
JavaScriptFrame* frame);
|
||||
void Optimize(JSFunction function, OptimizationReason reason,
|
||||
CodeKind code_kind);
|
||||
void Baseline(JSFunction function, OptimizationReason reason);
|
||||
|
@ -122,6 +122,18 @@ void OSROptimizedCodeCache::EvictMarkedCode(Isolate* isolate) {
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<int> OSROptimizedCodeCache::GetBytecodeOffsetsFromSFI(
|
||||
SharedFunctionInfo shared) {
|
||||
std::vector<int> bytecode_offsets;
|
||||
DisallowGarbageCollection gc;
|
||||
for (int index = 0; index < length(); index += kEntryLength) {
|
||||
if (GetSFIFromEntry(index) == shared) {
|
||||
bytecode_offsets.push_back(GetBytecodeOffsetFromEntry(index).ToInt());
|
||||
}
|
||||
}
|
||||
return bytecode_offsets;
|
||||
}
|
||||
|
||||
int OSROptimizedCodeCache::GrowOSRCache(
|
||||
Handle<NativeContext> native_context,
|
||||
Handle<OSROptimizedCodeCache>* osr_cache) {
|
||||
@ -179,12 +191,26 @@ int OSROptimizedCodeCache::FindEntry(Handle<SharedFunctionInfo> shared,
|
||||
}
|
||||
|
||||
void OSROptimizedCodeCache::ClearEntry(int index, Isolate* isolate) {
|
||||
Set(index + OSRCodeCacheConstants::kSharedOffset,
|
||||
HeapObjectReference::ClearedValue(isolate));
|
||||
Set(index + OSRCodeCacheConstants::kCachedCodeOffset,
|
||||
HeapObjectReference::ClearedValue(isolate));
|
||||
Set(index + OSRCodeCacheConstants::kOsrIdOffset,
|
||||
HeapObjectReference::ClearedValue(isolate));
|
||||
SharedFunctionInfo shared = GetSFIFromEntry(index);
|
||||
DCHECK_GT(shared.osr_code_cache_state(), kNotCached);
|
||||
if (V8_LIKELY(shared.osr_code_cache_state() == kCachedOnce)) {
|
||||
shared.set_osr_code_cache_state(kNotCached);
|
||||
} else if (shared.osr_code_cache_state() == kCachedMultiple) {
|
||||
int osr_code_cache_count = 0;
|
||||
for (int index = 0; index < length(); index += kEntryLength) {
|
||||
if (GetSFIFromEntry(index) == shared) {
|
||||
osr_code_cache_count++;
|
||||
}
|
||||
}
|
||||
if (osr_code_cache_count == 2) {
|
||||
shared.set_osr_code_cache_state(kCachedOnce);
|
||||
}
|
||||
}
|
||||
HeapObjectReference cleared_value =
|
||||
HeapObjectReference::ClearedValue(isolate);
|
||||
Set(index + OSRCodeCacheConstants::kSharedOffset, cleared_value);
|
||||
Set(index + OSRCodeCacheConstants::kCachedCodeOffset, cleared_value);
|
||||
Set(index + OSRCodeCacheConstants::kOsrIdOffset, cleared_value);
|
||||
}
|
||||
|
||||
void OSROptimizedCodeCache::InitializeEntry(int entry,
|
||||
@ -197,6 +223,11 @@ void OSROptimizedCodeCache::InitializeEntry(int entry,
|
||||
Set(entry + OSRCodeCacheConstants::kCachedCodeOffset, weak_code_entry);
|
||||
Set(entry + OSRCodeCacheConstants::kOsrIdOffset,
|
||||
MaybeObject::FromSmi(Smi::FromInt(osr_offset.ToInt())));
|
||||
if (V8_LIKELY(shared.osr_code_cache_state() == kNotCached)) {
|
||||
shared.set_osr_code_cache_state(kCachedOnce);
|
||||
} else if (shared.osr_code_cache_state() == kCachedOnce) {
|
||||
shared.set_osr_code_cache_state(kCachedMultiple);
|
||||
}
|
||||
}
|
||||
|
||||
void OSROptimizedCodeCache::MoveEntry(int src, int dst, Isolate* isolate) {
|
||||
@ -205,7 +236,11 @@ void OSROptimizedCodeCache::MoveEntry(int src, int dst, Isolate* isolate) {
|
||||
Set(dst + OSRCodeCacheConstants::kCachedCodeOffset,
|
||||
Get(src + OSRCodeCacheConstants::kCachedCodeOffset));
|
||||
Set(dst + OSRCodeCacheConstants::kOsrIdOffset, Get(src + kOsrIdOffset));
|
||||
ClearEntry(src, isolate);
|
||||
HeapObjectReference cleared_value =
|
||||
HeapObjectReference::ClearedValue(isolate);
|
||||
Set(src + OSRCodeCacheConstants::kSharedOffset, cleared_value);
|
||||
Set(src + OSRCodeCacheConstants::kCachedCodeOffset, cleared_value);
|
||||
Set(src + OSRCodeCacheConstants::kOsrIdOffset, cleared_value);
|
||||
}
|
||||
|
||||
int OSROptimizedCodeCache::CapacityForLength(int curr_length) {
|
||||
|
@ -12,6 +12,17 @@
|
||||
namespace v8 {
|
||||
namespace internal {
|
||||
|
||||
// This enum are states that how many OSR code caches belong to a SFI. Without
|
||||
// this enum, need to check all OSR code cache entries to know whether a
|
||||
// JSFunction's SFI has OSR code cache. The enum value kCachedMultiple is for
|
||||
// doing time-consuming loop check only when the very unlikely state change
|
||||
// kCachedMultiple -> { kCachedOnce | kCachedMultiple }.
|
||||
enum OSRCodeCacheStateOfSFI : uint8_t {
|
||||
kNotCached, // Likely state, no OSR code cache
|
||||
kCachedOnce, // Unlikely state, one OSR code cache
|
||||
kCachedMultiple, // Very unlikely state, multiple OSR code caches
|
||||
};
|
||||
|
||||
class V8_EXPORT OSROptimizedCodeCache : public WeakFixedArray {
|
||||
public:
|
||||
DECL_CAST(OSROptimizedCodeCache)
|
||||
@ -48,6 +59,10 @@ class V8_EXPORT OSROptimizedCodeCache : public WeakFixedArray {
|
||||
// Remove all code objects marked for deoptimization from OSR code cache.
|
||||
void EvictMarkedCode(Isolate* isolate);
|
||||
|
||||
// Returns vector of bytecode offsets corresponding to the shared function
|
||||
// |shared|
|
||||
std::vector<int> GetBytecodeOffsetsFromSFI(SharedFunctionInfo shared);
|
||||
|
||||
private:
|
||||
// Functions that implement heuristics on when to grow / shrink the cache.
|
||||
static int CapacityForLength(int curr_capacity);
|
||||
|
@ -302,6 +302,17 @@ BailoutReason SharedFunctionInfo::disabled_optimization_reason() const {
|
||||
return DisabledOptimizationReasonBits::decode(flags(kRelaxedLoad));
|
||||
}
|
||||
|
||||
OSRCodeCacheStateOfSFI SharedFunctionInfo::osr_code_cache_state() const {
|
||||
return OsrCodeCacheStateBits::decode(flags(kRelaxedLoad));
|
||||
}
|
||||
|
||||
void SharedFunctionInfo::set_osr_code_cache_state(
|
||||
OSRCodeCacheStateOfSFI state) {
|
||||
int hints = flags(kRelaxedLoad);
|
||||
hints = OsrCodeCacheStateBits::update(hints, state);
|
||||
set_flags(hints, kRelaxedStore);
|
||||
}
|
||||
|
||||
LanguageMode SharedFunctionInfo::language_mode() const {
|
||||
STATIC_ASSERT(LanguageModeSize == 2);
|
||||
return construct_language_mode(IsStrictBit::decode(flags(kRelaxedLoad)));
|
||||
|
@ -15,6 +15,7 @@
|
||||
#include "src/objects/function-kind.h"
|
||||
#include "src/objects/function-syntax-kind.h"
|
||||
#include "src/objects/objects.h"
|
||||
#include "src/objects/osr-optimized-code-cache.h"
|
||||
#include "src/objects/script.h"
|
||||
#include "src/objects/slots.h"
|
||||
#include "src/objects/smi.h"
|
||||
@ -520,6 +521,10 @@ class SharedFunctionInfo
|
||||
// shared function info.
|
||||
void DisableOptimization(BailoutReason reason);
|
||||
|
||||
inline OSRCodeCacheStateOfSFI osr_code_cache_state() const;
|
||||
|
||||
inline void set_osr_code_cache_state(OSRCodeCacheStateOfSFI state);
|
||||
|
||||
// This class constructor needs to call out to an instance fields
|
||||
// initializer. This flag is set when creating the
|
||||
// SharedFunctionInfo as a reminder to emit the initializer call
|
||||
|
@ -17,6 +17,7 @@ extern class InterpreterData extends Struct {
|
||||
type FunctionKind extends uint8 constexpr 'FunctionKind';
|
||||
type FunctionSyntaxKind extends uint8 constexpr 'FunctionSyntaxKind';
|
||||
type BailoutReason extends uint8 constexpr 'BailoutReason';
|
||||
type OSRCodeCacheStateOfSFI extends uint8 constexpr 'OSRCodeCacheStateOfSFI';
|
||||
|
||||
bitfield struct SharedFunctionInfoFlags extends uint32 {
|
||||
// Have FunctionKind first to make it cheaper to access.
|
||||
@ -37,6 +38,7 @@ bitfield struct SharedFunctionInfoFlags extends uint32 {
|
||||
is_top_level: bool: 1 bit;
|
||||
properties_are_final: bool: 1 bit;
|
||||
private_name_lookup_skips_outer_class: bool: 1 bit;
|
||||
osr_code_cache_state: OSRCodeCacheStateOfSFI: 2 bit;
|
||||
}
|
||||
|
||||
bitfield struct SharedFunctionInfoFlags2 extends uint8 {
|
||||
|
Loading…
Reference in New Issue
Block a user