[compiler] Do not nested inline optimized functions when inlining budget
is not enough This CL records the inlined bytecode size in code objects and take it into consideration when calculating inline candidate's size. It can improve Speedometer2 by ~1% and JetStream2 by ~3% on 9900K platform. Contributted by tao.pan@intel.com Change-Id: Icf31ca52ed5013d62a9c8d5dd550944ef3a4fbda Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2089021 Reviewed-by: Georg Neis <neis@chromium.org> Reviewed-by: Ulan Degenbaev <ulan@chromium.org> Reviewed-by: Mythri Alle <mythria@chromium.org> Commit-Queue: Georg Neis <neis@chromium.org> Cr-Commit-Position: refs/heads/master@{#67237}
This commit is contained in:
parent
cae3bc13d3
commit
8c68536601
@ -248,6 +248,12 @@ class V8_EXPORT_PRIVATE OptimizedCompilationInfo final {
|
||||
return optimization_id_;
|
||||
}
|
||||
|
||||
unsigned inlined_bytecode_size() const { return inlined_bytecode_size_; }
|
||||
|
||||
void set_inlined_bytecode_size(unsigned size) {
|
||||
inlined_bytecode_size_ = size;
|
||||
}
|
||||
|
||||
struct InlinedFunctionHolder {
|
||||
Handle<SharedFunctionInfo> shared_info;
|
||||
Handle<BytecodeArray> bytecode_array; // Explicit to prevent flushing.
|
||||
@ -329,6 +335,7 @@ class V8_EXPORT_PRIVATE OptimizedCompilationInfo final {
|
||||
InlinedFunctionList inlined_functions_;
|
||||
|
||||
int optimization_id_ = -1;
|
||||
unsigned inlined_bytecode_size_ = 0;
|
||||
|
||||
// The current OSR frame for specialization or {nullptr}.
|
||||
JavaScriptFrame* osr_frame_ = nullptr;
|
||||
|
@ -499,6 +499,7 @@ MaybeHandle<Code> CodeGenerator::FinalizeCode() {
|
||||
MaybeHandle<Code> maybe_code =
|
||||
Factory::CodeBuilder(isolate(), desc, info()->code_kind())
|
||||
.set_builtin_index(info()->builtin_index())
|
||||
.set_inlined_bytecode_size(info()->inlined_bytecode_size())
|
||||
.set_source_position_table(source_positions)
|
||||
.set_deoptimization_data(deopt_data)
|
||||
.set_is_turbofanned()
|
||||
|
@ -319,6 +319,7 @@ class V8_EXPORT_PRIVATE JSFunctionRef : public JSObjectRef {
|
||||
bool has_feedback_vector() const;
|
||||
bool has_initial_map() const;
|
||||
bool has_prototype() const;
|
||||
bool IsOptimized() const;
|
||||
bool PrototypeRequiresRuntimeLookup() const;
|
||||
|
||||
void Serialize();
|
||||
@ -331,6 +332,7 @@ class V8_EXPORT_PRIVATE JSFunctionRef : public JSObjectRef {
|
||||
NativeContextRef native_context() const;
|
||||
SharedFunctionInfoRef shared() const;
|
||||
FeedbackVectorRef feedback_vector() const;
|
||||
CodeRef code() const;
|
||||
int InitialMapInstanceSizeWithMinSlack() const;
|
||||
};
|
||||
|
||||
@ -920,6 +922,8 @@ class CodeRef : public HeapObjectRef {
|
||||
DEFINE_REF_CONSTRUCTOR(Code, HeapObjectRef)
|
||||
|
||||
Handle<Code> object() const;
|
||||
|
||||
unsigned inlined_bytecode_size() const;
|
||||
};
|
||||
|
||||
class InternalizedStringRef : public StringRef {
|
||||
|
@ -610,6 +610,7 @@ class JSFunctionData : public JSObjectData {
|
||||
bool has_feedback_vector() const { return has_feedback_vector_; }
|
||||
bool has_initial_map() const { return has_initial_map_; }
|
||||
bool has_prototype() const { return has_prototype_; }
|
||||
bool IsOptimized() const { return is_optimized_; }
|
||||
bool PrototypeRequiresRuntimeLookup() const {
|
||||
return PrototypeRequiresRuntimeLookup_;
|
||||
}
|
||||
@ -623,6 +624,7 @@ class JSFunctionData : public JSObjectData {
|
||||
ObjectData* prototype() const { return prototype_; }
|
||||
SharedFunctionInfoData* shared() const { return shared_; }
|
||||
FeedbackVectorData* feedback_vector() const { return feedback_vector_; }
|
||||
CodeData* code() const { return code_; }
|
||||
int initial_map_instance_size_with_min_slack() const {
|
||||
CHECK(serialized_);
|
||||
return initial_map_instance_size_with_min_slack_;
|
||||
@ -632,6 +634,7 @@ class JSFunctionData : public JSObjectData {
|
||||
bool has_feedback_vector_;
|
||||
bool has_initial_map_;
|
||||
bool has_prototype_;
|
||||
bool is_optimized_;
|
||||
bool PrototypeRequiresRuntimeLookup_;
|
||||
|
||||
bool serialized_ = false;
|
||||
@ -642,6 +645,7 @@ class JSFunctionData : public JSObjectData {
|
||||
ObjectData* prototype_ = nullptr;
|
||||
SharedFunctionInfoData* shared_ = nullptr;
|
||||
FeedbackVectorData* feedback_vector_ = nullptr;
|
||||
CodeData* code_ = nullptr;
|
||||
int initial_map_instance_size_with_min_slack_;
|
||||
};
|
||||
|
||||
@ -1261,6 +1265,7 @@ JSFunctionData::JSFunctionData(JSHeapBroker* broker, ObjectData** storage,
|
||||
has_initial_map_(object->has_prototype_slot() &&
|
||||
object->has_initial_map()),
|
||||
has_prototype_(object->has_prototype_slot() && object->has_prototype()),
|
||||
is_optimized_(object->IsOptimized()),
|
||||
PrototypeRequiresRuntimeLookup_(
|
||||
object->PrototypeRequiresRuntimeLookup()) {}
|
||||
|
||||
@ -1277,6 +1282,7 @@ void JSFunctionData::Serialize(JSHeapBroker* broker) {
|
||||
DCHECK_NULL(prototype_);
|
||||
DCHECK_NULL(shared_);
|
||||
DCHECK_NULL(feedback_vector_);
|
||||
DCHECK_NULL(code_);
|
||||
|
||||
context_ = broker->GetOrCreateData(function->context())->AsContext();
|
||||
native_context_ =
|
||||
@ -1286,6 +1292,7 @@ void JSFunctionData::Serialize(JSHeapBroker* broker) {
|
||||
? broker->GetOrCreateData(function->feedback_vector())
|
||||
->AsFeedbackVector()
|
||||
: nullptr;
|
||||
code_ = broker->GetOrCreateData(function->code())->AsCode();
|
||||
initial_map_ = has_initial_map()
|
||||
? broker->GetOrCreateData(function->initial_map())->AsMap()
|
||||
: nullptr;
|
||||
@ -2024,7 +2031,13 @@ class TemplateObjectDescriptionData : public HeapObjectData {
|
||||
class CodeData : public HeapObjectData {
|
||||
public:
|
||||
CodeData(JSHeapBroker* broker, ObjectData** storage, Handle<Code> object)
|
||||
: HeapObjectData(broker, storage, object) {}
|
||||
: HeapObjectData(broker, storage, object),
|
||||
inlined_bytecode_size_(object->inlined_bytecode_size()) {}
|
||||
|
||||
unsigned inlined_bytecode_size() const { return inlined_bytecode_size_; }
|
||||
|
||||
private:
|
||||
unsigned const inlined_bytecode_size_;
|
||||
};
|
||||
|
||||
#define DEFINE_IS_AND_AS(Name) \
|
||||
@ -3371,6 +3384,7 @@ BIMODAL_ACCESSOR_C(JSDataView, size_t, byte_offset)
|
||||
BIMODAL_ACCESSOR_C(JSFunction, bool, has_feedback_vector)
|
||||
BIMODAL_ACCESSOR_C(JSFunction, bool, has_initial_map)
|
||||
BIMODAL_ACCESSOR_C(JSFunction, bool, has_prototype)
|
||||
BIMODAL_ACCESSOR_C(JSFunction, bool, IsOptimized)
|
||||
BIMODAL_ACCESSOR_C(JSFunction, bool, PrototypeRequiresRuntimeLookup)
|
||||
BIMODAL_ACCESSOR(JSFunction, Context, context)
|
||||
BIMODAL_ACCESSOR(JSFunction, NativeContext, native_context)
|
||||
@ -3378,6 +3392,7 @@ BIMODAL_ACCESSOR(JSFunction, Map, initial_map)
|
||||
BIMODAL_ACCESSOR(JSFunction, Object, prototype)
|
||||
BIMODAL_ACCESSOR(JSFunction, SharedFunctionInfo, shared)
|
||||
BIMODAL_ACCESSOR(JSFunction, FeedbackVector, feedback_vector)
|
||||
BIMODAL_ACCESSOR(JSFunction, Code, code)
|
||||
|
||||
BIMODAL_ACCESSOR_C(JSGlobalObject, bool, IsDetached)
|
||||
|
||||
@ -3413,6 +3428,8 @@ BIMODAL_ACCESSOR(Map, Object, GetConstructor)
|
||||
BIMODAL_ACCESSOR(Map, HeapObject, GetBackPointer)
|
||||
BIMODAL_ACCESSOR_C(Map, bool, is_abandoned_prototype_map)
|
||||
|
||||
BIMODAL_ACCESSOR_C(Code, unsigned, inlined_bytecode_size)
|
||||
|
||||
#define DEF_NATIVE_CONTEXT_ACCESSOR(type, name) \
|
||||
BIMODAL_ACCESSOR(NativeContext, type, name)
|
||||
BROKER_NATIVE_CONTEXT_FIELDS(DEF_NATIVE_CONTEXT_ACCESSOR)
|
||||
|
@ -22,8 +22,8 @@ namespace compiler {
|
||||
} while (false)
|
||||
|
||||
namespace {
|
||||
bool IsSmall(BytecodeArrayRef const& bytecode) {
|
||||
return bytecode.length() <= FLAG_max_inlined_bytecode_size_small;
|
||||
bool IsSmall(int const size) {
|
||||
return size <= FLAG_max_inlined_bytecode_size_small;
|
||||
}
|
||||
|
||||
bool CanConsiderForInlining(JSHeapBroker* broker,
|
||||
@ -200,7 +200,16 @@ Reduction JSInliningHeuristic::Reduce(Node* node) {
|
||||
can_inline_candidate = true;
|
||||
BytecodeArrayRef bytecode = candidate.bytecode[i].value();
|
||||
candidate.total_size += bytecode.length();
|
||||
candidate_is_small = candidate_is_small && IsSmall(bytecode);
|
||||
unsigned inlined_bytecode_size = 0;
|
||||
if (candidate.functions[i].has_value()) {
|
||||
JSFunctionRef function = candidate.functions[i].value();
|
||||
if (function.IsOptimized()) {
|
||||
inlined_bytecode_size = function.code().inlined_bytecode_size();
|
||||
candidate.total_size += inlined_bytecode_size;
|
||||
}
|
||||
}
|
||||
candidate_is_small = candidate_is_small &&
|
||||
IsSmall(bytecode.length() + inlined_bytecode_size);
|
||||
}
|
||||
}
|
||||
if (!can_inline_candidate) return NoChange();
|
||||
|
@ -33,6 +33,10 @@ class JSInliningHeuristic final : public AdvancedReducer {
|
||||
// and inlines call sites that the heuristic determines to be important.
|
||||
void Finalize() final;
|
||||
|
||||
int total_inlined_bytecode_size() const {
|
||||
return total_inlined_bytecode_size_;
|
||||
}
|
||||
|
||||
private:
|
||||
// This limit currently matches what the old compiler did. We may want to
|
||||
// re-evaluate and come up with a proper limit for TurboFan.
|
||||
|
@ -1412,6 +1412,7 @@ struct InliningPhase {
|
||||
AddReducer(data, &graph_reducer, &inlining);
|
||||
}
|
||||
graph_reducer.ReduceGraph();
|
||||
info->set_inlined_bytecode_size(inlining.total_inlined_bytecode_size());
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -165,6 +165,7 @@ MaybeHandle<Code> Factory::CodeBuilder::BuildInternal(
|
||||
code->initialize_flags(kind_, has_unwinding_info, is_turbofanned_,
|
||||
stack_slots_, kIsNotOffHeapTrampoline);
|
||||
code->set_builtin_index(builtin_index_);
|
||||
code->set_inlined_bytecode_size(inlined_bytecode_size_);
|
||||
code->set_code_data_container(*data_container);
|
||||
code->set_deoptimization_data(*deoptimization_data_);
|
||||
code->set_source_position_table(*source_position_table_);
|
||||
|
@ -807,6 +807,11 @@ class V8_EXPORT_PRIVATE Factory : public FactoryBase<Factory> {
|
||||
return *this;
|
||||
}
|
||||
|
||||
CodeBuilder& set_inlined_bytecode_size(uint32_t size) {
|
||||
inlined_bytecode_size_ = size;
|
||||
return *this;
|
||||
}
|
||||
|
||||
CodeBuilder& set_source_position_table(Handle<ByteArray> table) {
|
||||
DCHECK(!table.is_null());
|
||||
source_position_table_ = table;
|
||||
@ -858,6 +863,7 @@ class V8_EXPORT_PRIVATE Factory : public FactoryBase<Factory> {
|
||||
|
||||
MaybeHandle<Object> self_reference_;
|
||||
int32_t builtin_index_ = Builtins::kNoBuiltinId;
|
||||
uint32_t inlined_bytecode_size_ = 0;
|
||||
int32_t kind_specific_flags_ = 0;
|
||||
Handle<ByteArray> source_position_table_;
|
||||
Handle<DeoptimizationData> deoptimization_data_ =
|
||||
|
@ -454,6 +454,17 @@ void Code::set_builtin_index(int index) {
|
||||
|
||||
bool Code::is_builtin() const { return builtin_index() != -1; }
|
||||
|
||||
unsigned Code::inlined_bytecode_size() const {
|
||||
DCHECK(kind() == OPTIMIZED_FUNCTION ||
|
||||
ReadField<unsigned>(kInlinedBytecodeSizeOffset) == 0);
|
||||
return ReadField<unsigned>(kInlinedBytecodeSizeOffset);
|
||||
}
|
||||
|
||||
void Code::set_inlined_bytecode_size(unsigned size) {
|
||||
DCHECK(kind() == OPTIMIZED_FUNCTION || size == 0);
|
||||
WriteField<unsigned>(kInlinedBytecodeSizeOffset, size);
|
||||
}
|
||||
|
||||
bool Code::has_safepoint_info() const {
|
||||
return is_turbofanned() || is_wasm_code();
|
||||
}
|
||||
|
@ -138,6 +138,9 @@ class Code : public HeapObject {
|
||||
inline void set_builtin_index(int id);
|
||||
inline bool is_builtin() const;
|
||||
|
||||
inline unsigned inlined_bytecode_size() const;
|
||||
inline void set_inlined_bytecode_size(unsigned size);
|
||||
|
||||
inline bool has_safepoint_info() const;
|
||||
|
||||
// [stack_slots]: If {has_safepoint_info()}, the number of stack slots
|
||||
@ -397,6 +400,7 @@ class Code : public HeapObject {
|
||||
FLAG_enable_embedded_constant_pool ? kIntSize : 0) \
|
||||
V(kCodeCommentsOffsetOffset, kIntSize) \
|
||||
V(kBuiltinIndexOffset, kIntSize) \
|
||||
V(kInlinedBytecodeSizeOffset, kIntSize) \
|
||||
V(kUnalignedHeaderSize, 0) \
|
||||
/* Add padding to align the instruction start following right after */ \
|
||||
/* the Code object header. */ \
|
||||
@ -409,22 +413,22 @@ class Code : public HeapObject {
|
||||
// This documents the amount of free space we have in each Code object header
|
||||
// due to padding for code alignment.
|
||||
#if V8_TARGET_ARCH_ARM64
|
||||
static constexpr int kHeaderPaddingSize = COMPRESS_POINTERS_BOOL ? 20 : 0;
|
||||
static constexpr int kHeaderPaddingSize = COMPRESS_POINTERS_BOOL ? 16 : 28;
|
||||
#elif V8_TARGET_ARCH_MIPS64
|
||||
static constexpr int kHeaderPaddingSize = 0;
|
||||
static constexpr int kHeaderPaddingSize = 28;
|
||||
#elif V8_TARGET_ARCH_X64
|
||||
static constexpr int kHeaderPaddingSize = COMPRESS_POINTERS_BOOL ? 20 : 0;
|
||||
static constexpr int kHeaderPaddingSize = COMPRESS_POINTERS_BOOL ? 16 : 28;
|
||||
#elif V8_TARGET_ARCH_ARM
|
||||
static constexpr int kHeaderPaddingSize = 20;
|
||||
static constexpr int kHeaderPaddingSize = 16;
|
||||
#elif V8_TARGET_ARCH_IA32
|
||||
static constexpr int kHeaderPaddingSize = 20;
|
||||
static constexpr int kHeaderPaddingSize = 16;
|
||||
#elif V8_TARGET_ARCH_MIPS
|
||||
static constexpr int kHeaderPaddingSize = 20;
|
||||
static constexpr int kHeaderPaddingSize = 16;
|
||||
#elif V8_TARGET_ARCH_PPC64
|
||||
static constexpr int kHeaderPaddingSize =
|
||||
FLAG_enable_embedded_constant_pool ? 28 : 0;
|
||||
FLAG_enable_embedded_constant_pool ? 24 : 28;
|
||||
#elif V8_TARGET_ARCH_S390X
|
||||
static constexpr int kHeaderPaddingSize = COMPRESS_POINTERS_BOOL ? 20 : 0;
|
||||
static constexpr int kHeaderPaddingSize = COMPRESS_POINTERS_BOOL ? 16 : 28;
|
||||
#else
|
||||
#error Unknown architecture.
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue
Block a user