[compiler] Do not nested inline optimized functions when inlining budget

is not enough

This CL records the inlined bytecode size in code objects and take it
into consideration when calculating inline candidate's size.

It can improve Speedometer2 by ~1% and JetStream2 by ~3% on 9900K platform.

Contributted by tao.pan@intel.com

Change-Id: Icf31ca52ed5013d62a9c8d5dd550944ef3a4fbda
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2089021
Reviewed-by: Georg Neis <neis@chromium.org>
Reviewed-by: Ulan Degenbaev <ulan@chromium.org>
Reviewed-by: Mythri Alle <mythria@chromium.org>
Commit-Queue: Georg Neis <neis@chromium.org>
Cr-Commit-Position: refs/heads/master@{#67237}
This commit is contained in:
Georg Neis 2020-04-20 14:25:13 +02:00 committed by Commit Bot
parent cae3bc13d3
commit 8c68536601
11 changed files with 77 additions and 12 deletions

View File

@ -248,6 +248,12 @@ class V8_EXPORT_PRIVATE OptimizedCompilationInfo final {
return optimization_id_;
}
unsigned inlined_bytecode_size() const { return inlined_bytecode_size_; }
void set_inlined_bytecode_size(unsigned size) {
inlined_bytecode_size_ = size;
}
struct InlinedFunctionHolder {
Handle<SharedFunctionInfo> shared_info;
Handle<BytecodeArray> bytecode_array; // Explicit to prevent flushing.
@ -329,6 +335,7 @@ class V8_EXPORT_PRIVATE OptimizedCompilationInfo final {
InlinedFunctionList inlined_functions_;
int optimization_id_ = -1;
unsigned inlined_bytecode_size_ = 0;
// The current OSR frame for specialization or {nullptr}.
JavaScriptFrame* osr_frame_ = nullptr;

View File

@ -499,6 +499,7 @@ MaybeHandle<Code> CodeGenerator::FinalizeCode() {
MaybeHandle<Code> maybe_code =
Factory::CodeBuilder(isolate(), desc, info()->code_kind())
.set_builtin_index(info()->builtin_index())
.set_inlined_bytecode_size(info()->inlined_bytecode_size())
.set_source_position_table(source_positions)
.set_deoptimization_data(deopt_data)
.set_is_turbofanned()

View File

@ -319,6 +319,7 @@ class V8_EXPORT_PRIVATE JSFunctionRef : public JSObjectRef {
bool has_feedback_vector() const;
bool has_initial_map() const;
bool has_prototype() const;
bool IsOptimized() const;
bool PrototypeRequiresRuntimeLookup() const;
void Serialize();
@ -331,6 +332,7 @@ class V8_EXPORT_PRIVATE JSFunctionRef : public JSObjectRef {
NativeContextRef native_context() const;
SharedFunctionInfoRef shared() const;
FeedbackVectorRef feedback_vector() const;
CodeRef code() const;
int InitialMapInstanceSizeWithMinSlack() const;
};
@ -920,6 +922,8 @@ class CodeRef : public HeapObjectRef {
DEFINE_REF_CONSTRUCTOR(Code, HeapObjectRef)
Handle<Code> object() const;
unsigned inlined_bytecode_size() const;
};
class InternalizedStringRef : public StringRef {

View File

@ -610,6 +610,7 @@ class JSFunctionData : public JSObjectData {
bool has_feedback_vector() const { return has_feedback_vector_; }
bool has_initial_map() const { return has_initial_map_; }
bool has_prototype() const { return has_prototype_; }
bool IsOptimized() const { return is_optimized_; }
bool PrototypeRequiresRuntimeLookup() const {
return PrototypeRequiresRuntimeLookup_;
}
@ -623,6 +624,7 @@ class JSFunctionData : public JSObjectData {
ObjectData* prototype() const { return prototype_; }
SharedFunctionInfoData* shared() const { return shared_; }
FeedbackVectorData* feedback_vector() const { return feedback_vector_; }
CodeData* code() const { return code_; }
int initial_map_instance_size_with_min_slack() const {
CHECK(serialized_);
return initial_map_instance_size_with_min_slack_;
@ -632,6 +634,7 @@ class JSFunctionData : public JSObjectData {
bool has_feedback_vector_;
bool has_initial_map_;
bool has_prototype_;
bool is_optimized_;
bool PrototypeRequiresRuntimeLookup_;
bool serialized_ = false;
@ -642,6 +645,7 @@ class JSFunctionData : public JSObjectData {
ObjectData* prototype_ = nullptr;
SharedFunctionInfoData* shared_ = nullptr;
FeedbackVectorData* feedback_vector_ = nullptr;
CodeData* code_ = nullptr;
int initial_map_instance_size_with_min_slack_;
};
@ -1261,6 +1265,7 @@ JSFunctionData::JSFunctionData(JSHeapBroker* broker, ObjectData** storage,
has_initial_map_(object->has_prototype_slot() &&
object->has_initial_map()),
has_prototype_(object->has_prototype_slot() && object->has_prototype()),
is_optimized_(object->IsOptimized()),
PrototypeRequiresRuntimeLookup_(
object->PrototypeRequiresRuntimeLookup()) {}
@ -1277,6 +1282,7 @@ void JSFunctionData::Serialize(JSHeapBroker* broker) {
DCHECK_NULL(prototype_);
DCHECK_NULL(shared_);
DCHECK_NULL(feedback_vector_);
DCHECK_NULL(code_);
context_ = broker->GetOrCreateData(function->context())->AsContext();
native_context_ =
@ -1286,6 +1292,7 @@ void JSFunctionData::Serialize(JSHeapBroker* broker) {
? broker->GetOrCreateData(function->feedback_vector())
->AsFeedbackVector()
: nullptr;
code_ = broker->GetOrCreateData(function->code())->AsCode();
initial_map_ = has_initial_map()
? broker->GetOrCreateData(function->initial_map())->AsMap()
: nullptr;
@ -2024,7 +2031,13 @@ class TemplateObjectDescriptionData : public HeapObjectData {
class CodeData : public HeapObjectData {
public:
CodeData(JSHeapBroker* broker, ObjectData** storage, Handle<Code> object)
: HeapObjectData(broker, storage, object) {}
: HeapObjectData(broker, storage, object),
inlined_bytecode_size_(object->inlined_bytecode_size()) {}
unsigned inlined_bytecode_size() const { return inlined_bytecode_size_; }
private:
unsigned const inlined_bytecode_size_;
};
#define DEFINE_IS_AND_AS(Name) \
@ -3371,6 +3384,7 @@ BIMODAL_ACCESSOR_C(JSDataView, size_t, byte_offset)
BIMODAL_ACCESSOR_C(JSFunction, bool, has_feedback_vector)
BIMODAL_ACCESSOR_C(JSFunction, bool, has_initial_map)
BIMODAL_ACCESSOR_C(JSFunction, bool, has_prototype)
BIMODAL_ACCESSOR_C(JSFunction, bool, IsOptimized)
BIMODAL_ACCESSOR_C(JSFunction, bool, PrototypeRequiresRuntimeLookup)
BIMODAL_ACCESSOR(JSFunction, Context, context)
BIMODAL_ACCESSOR(JSFunction, NativeContext, native_context)
@ -3378,6 +3392,7 @@ BIMODAL_ACCESSOR(JSFunction, Map, initial_map)
BIMODAL_ACCESSOR(JSFunction, Object, prototype)
BIMODAL_ACCESSOR(JSFunction, SharedFunctionInfo, shared)
BIMODAL_ACCESSOR(JSFunction, FeedbackVector, feedback_vector)
BIMODAL_ACCESSOR(JSFunction, Code, code)
BIMODAL_ACCESSOR_C(JSGlobalObject, bool, IsDetached)
@ -3413,6 +3428,8 @@ BIMODAL_ACCESSOR(Map, Object, GetConstructor)
BIMODAL_ACCESSOR(Map, HeapObject, GetBackPointer)
BIMODAL_ACCESSOR_C(Map, bool, is_abandoned_prototype_map)
BIMODAL_ACCESSOR_C(Code, unsigned, inlined_bytecode_size)
#define DEF_NATIVE_CONTEXT_ACCESSOR(type, name) \
BIMODAL_ACCESSOR(NativeContext, type, name)
BROKER_NATIVE_CONTEXT_FIELDS(DEF_NATIVE_CONTEXT_ACCESSOR)

View File

@ -22,8 +22,8 @@ namespace compiler {
} while (false)
namespace {
bool IsSmall(BytecodeArrayRef const& bytecode) {
return bytecode.length() <= FLAG_max_inlined_bytecode_size_small;
bool IsSmall(int const size) {
return size <= FLAG_max_inlined_bytecode_size_small;
}
bool CanConsiderForInlining(JSHeapBroker* broker,
@ -200,7 +200,16 @@ Reduction JSInliningHeuristic::Reduce(Node* node) {
can_inline_candidate = true;
BytecodeArrayRef bytecode = candidate.bytecode[i].value();
candidate.total_size += bytecode.length();
candidate_is_small = candidate_is_small && IsSmall(bytecode);
unsigned inlined_bytecode_size = 0;
if (candidate.functions[i].has_value()) {
JSFunctionRef function = candidate.functions[i].value();
if (function.IsOptimized()) {
inlined_bytecode_size = function.code().inlined_bytecode_size();
candidate.total_size += inlined_bytecode_size;
}
}
candidate_is_small = candidate_is_small &&
IsSmall(bytecode.length() + inlined_bytecode_size);
}
}
if (!can_inline_candidate) return NoChange();

View File

@ -33,6 +33,10 @@ class JSInliningHeuristic final : public AdvancedReducer {
// and inlines call sites that the heuristic determines to be important.
void Finalize() final;
int total_inlined_bytecode_size() const {
return total_inlined_bytecode_size_;
}
private:
// This limit currently matches what the old compiler did. We may want to
// re-evaluate and come up with a proper limit for TurboFan.

View File

@ -1412,6 +1412,7 @@ struct InliningPhase {
AddReducer(data, &graph_reducer, &inlining);
}
graph_reducer.ReduceGraph();
info->set_inlined_bytecode_size(inlining.total_inlined_bytecode_size());
}
};

View File

@ -165,6 +165,7 @@ MaybeHandle<Code> Factory::CodeBuilder::BuildInternal(
code->initialize_flags(kind_, has_unwinding_info, is_turbofanned_,
stack_slots_, kIsNotOffHeapTrampoline);
code->set_builtin_index(builtin_index_);
code->set_inlined_bytecode_size(inlined_bytecode_size_);
code->set_code_data_container(*data_container);
code->set_deoptimization_data(*deoptimization_data_);
code->set_source_position_table(*source_position_table_);

View File

@ -807,6 +807,11 @@ class V8_EXPORT_PRIVATE Factory : public FactoryBase<Factory> {
return *this;
}
CodeBuilder& set_inlined_bytecode_size(uint32_t size) {
inlined_bytecode_size_ = size;
return *this;
}
CodeBuilder& set_source_position_table(Handle<ByteArray> table) {
DCHECK(!table.is_null());
source_position_table_ = table;
@ -858,6 +863,7 @@ class V8_EXPORT_PRIVATE Factory : public FactoryBase<Factory> {
MaybeHandle<Object> self_reference_;
int32_t builtin_index_ = Builtins::kNoBuiltinId;
uint32_t inlined_bytecode_size_ = 0;
int32_t kind_specific_flags_ = 0;
Handle<ByteArray> source_position_table_;
Handle<DeoptimizationData> deoptimization_data_ =

View File

@ -454,6 +454,17 @@ void Code::set_builtin_index(int index) {
bool Code::is_builtin() const { return builtin_index() != -1; }
unsigned Code::inlined_bytecode_size() const {
DCHECK(kind() == OPTIMIZED_FUNCTION ||
ReadField<unsigned>(kInlinedBytecodeSizeOffset) == 0);
return ReadField<unsigned>(kInlinedBytecodeSizeOffset);
}
void Code::set_inlined_bytecode_size(unsigned size) {
DCHECK(kind() == OPTIMIZED_FUNCTION || size == 0);
WriteField<unsigned>(kInlinedBytecodeSizeOffset, size);
}
bool Code::has_safepoint_info() const {
return is_turbofanned() || is_wasm_code();
}

View File

@ -138,6 +138,9 @@ class Code : public HeapObject {
inline void set_builtin_index(int id);
inline bool is_builtin() const;
inline unsigned inlined_bytecode_size() const;
inline void set_inlined_bytecode_size(unsigned size);
inline bool has_safepoint_info() const;
// [stack_slots]: If {has_safepoint_info()}, the number of stack slots
@ -397,6 +400,7 @@ class Code : public HeapObject {
FLAG_enable_embedded_constant_pool ? kIntSize : 0) \
V(kCodeCommentsOffsetOffset, kIntSize) \
V(kBuiltinIndexOffset, kIntSize) \
V(kInlinedBytecodeSizeOffset, kIntSize) \
V(kUnalignedHeaderSize, 0) \
/* Add padding to align the instruction start following right after */ \
/* the Code object header. */ \
@ -409,22 +413,22 @@ class Code : public HeapObject {
// This documents the amount of free space we have in each Code object header
// due to padding for code alignment.
#if V8_TARGET_ARCH_ARM64
static constexpr int kHeaderPaddingSize = COMPRESS_POINTERS_BOOL ? 20 : 0;
static constexpr int kHeaderPaddingSize = COMPRESS_POINTERS_BOOL ? 16 : 28;
#elif V8_TARGET_ARCH_MIPS64
static constexpr int kHeaderPaddingSize = 0;
static constexpr int kHeaderPaddingSize = 28;
#elif V8_TARGET_ARCH_X64
static constexpr int kHeaderPaddingSize = COMPRESS_POINTERS_BOOL ? 20 : 0;
static constexpr int kHeaderPaddingSize = COMPRESS_POINTERS_BOOL ? 16 : 28;
#elif V8_TARGET_ARCH_ARM
static constexpr int kHeaderPaddingSize = 20;
static constexpr int kHeaderPaddingSize = 16;
#elif V8_TARGET_ARCH_IA32
static constexpr int kHeaderPaddingSize = 20;
static constexpr int kHeaderPaddingSize = 16;
#elif V8_TARGET_ARCH_MIPS
static constexpr int kHeaderPaddingSize = 20;
static constexpr int kHeaderPaddingSize = 16;
#elif V8_TARGET_ARCH_PPC64
static constexpr int kHeaderPaddingSize =
FLAG_enable_embedded_constant_pool ? 28 : 0;
FLAG_enable_embedded_constant_pool ? 24 : 28;
#elif V8_TARGET_ARCH_S390X
static constexpr int kHeaderPaddingSize = COMPRESS_POINTERS_BOOL ? 20 : 0;
static constexpr int kHeaderPaddingSize = COMPRESS_POINTERS_BOOL ? 16 : 28;
#else
#error Unknown architecture.
#endif