[X64] replace far jump by near jump

Code size in snapshot can be reduced ~41KB

Contributed by kanghua.yu@intel.com

Bug: None
Cq-Include-Trybots: master.tryserver.chromium.linux:linux_chromium_rel_ng
Change-Id: Ib73af39fe97cd38728affea40c593236f15bf6e5
Reviewed-on: https://chromium-review.googlesource.com/588751
Commit-Queue: Pan Deng <pan.deng@intel.com>
Reviewed-by: Michael Starzinger <mstarzinger@chromium.org>
Reviewed-by: Tobias Tebbi <tebbi@chromium.org>
Cr-Commit-Position: refs/heads/master@{#47531}
This commit is contained in:
pan.deng@intel.com 2017-08-23 11:08:51 +08:00 committed by Commit Bot
parent cb3befad02
commit 093dcd9dad
12 changed files with 287 additions and 39 deletions

View File

@ -159,7 +159,8 @@ AssemblerBase::AssemblerBase(IsolateData isolate_data, void* buffer,
enabled_cpu_features_(0),
emit_debug_code_(FLAG_debug_code),
predictable_code_size_(false),
constant_pool_available_(false) {
constant_pool_available_(false),
jump_optimization_info_(nullptr) {
own_buffer_ = buffer == NULL;
if (buffer_size == 0) buffer_size = kMinimalBufferSize;
DCHECK(buffer_size > 0);

View File

@ -65,6 +65,26 @@ void SetUpJSCallerSavedCodeData();
// Return the code of the n-th saved register available to JavaScript.
int JSCallerSavedCode(int n);
// -----------------------------------------------------------------------------
// Optimization for far-jmp like instructions that can be replaced by shorter.
class JumpOptimizationInfo {
public:
bool is_collecting() const { return stage_ == kCollection; }
bool is_optimizing() const { return stage_ == kOptimization; }
void set_optimizing() { stage_ = kOptimization; }
bool is_optimizable() const { return optimizable_; }
void set_optimizable() { optimizable_ = true; }
std::vector<uint32_t>& farjmp_bitmap() { return farjmp_bitmap_; }
private:
enum { kCollection, kOptimization } stage_ = kCollection;
bool optimizable_ = false;
std::vector<uint32_t> farjmp_bitmap_;
};
// -----------------------------------------------------------------------------
// Platform independent assembler base class.
@ -119,6 +139,13 @@ class AssemblerBase: public Malloced {
}
}
JumpOptimizationInfo* jump_optimization_info() {
return jump_optimization_info_;
}
void set_jump_optimization_info(JumpOptimizationInfo* jump_opt) {
jump_optimization_info_ = jump_opt;
}
// Overwrite a host NaN with a quiet target NaN. Used by mksnapshot for
// cross-snapshotting.
static void QuietNaN(HeapObject* nan) { }
@ -165,6 +192,8 @@ class AssemblerBase: public Malloced {
// if the pp register points to the current code object's constant pool.
bool constant_pool_available_;
JumpOptimizationInfo* jump_optimization_info_;
// Constant pool.
friend class FrameAndConstantPoolScope;
friend class ConstantPoolUnavailableScope;

View File

@ -154,9 +154,21 @@ Handle<Code> CodeAssembler::GenerateCode(CodeAssemblerState* state) {
RawMachineAssembler* rasm = state->raw_assembler_.get();
Schedule* schedule = rasm->Export();
JumpOptimizationInfo jump_opt;
Handle<Code> code = Pipeline::GenerateCodeForCodeStub(
rasm->isolate(), rasm->call_descriptor(), rasm->graph(), schedule,
state->flags_, state->name_);
state->flags_, state->name_,
rasm->isolate()->serializer_enabled() ? &jump_opt : nullptr);
if (jump_opt.is_optimizable()) {
jump_opt.set_optimizing();
// Regenerate machine code
code = Pipeline::GenerateCodeForCodeStub(
rasm->isolate(), rasm->call_descriptor(), rasm->graph(), schedule,
state->flags_, state->name_, &jump_opt);
}
state->code_generated_ = true;
return code;

View File

@ -38,7 +38,8 @@ class CodeGenerator::JumpTable final : public ZoneObject {
CodeGenerator::CodeGenerator(Zone* codegen_zone, Frame* frame, Linkage* linkage,
InstructionSequence* code, CompilationInfo* info,
base::Optional<OsrHelper> osr_helper,
int start_source_position)
int start_source_position,
JumpOptimizationInfo* jump_opt)
: zone_(codegen_zone),
frame_access_state_(nullptr),
linkage_(linkage),
@ -72,6 +73,7 @@ CodeGenerator::CodeGenerator(Zone* codegen_zone, Frame* frame, Linkage* linkage,
}
CreateFrameAccessState(frame);
CHECK_EQ(info->is_osr(), osr_helper_.has_value());
tasm_.set_jump_optimization_info(jump_opt);
}
Isolate* CodeGenerator::isolate() const { return info_->isolate(); }
@ -152,8 +154,11 @@ void CodeGenerator::AssembleCode() {
if (block->IsDeferred() == (deferred == 0)) {
continue;
}
// Align loop headers on 16-byte boundaries.
if (block->IsLoopHeader()) tasm()->Align(16);
if (block->IsLoopHeader() && !tasm()->jump_optimization_info()) {
tasm()->Align(16);
}
// Bind a label for a block.
current_block_ = block->rpo_number();
unwinding_info_writer_.BeginInstructionBlock(tasm()->pc_offset(), block);

View File

@ -82,7 +82,8 @@ class CodeGenerator final : public GapResolver::Assembler {
explicit CodeGenerator(Zone* codegen_zone, Frame* frame, Linkage* linkage,
InstructionSequence* code, CompilationInfo* info,
base::Optional<OsrHelper> osr_helper,
int start_source_position);
int start_source_position,
JumpOptimizationInfo* jump_opt);
// Generate native code. After calling AssembleCode, call FinalizeCode to
// produce the actual code object. If an error occurs during either phase,

View File

@ -142,7 +142,8 @@ class PipelineData {
// For machine graph testing entry point.
PipelineData(ZoneStats* zone_stats, CompilationInfo* info, Graph* graph,
Schedule* schedule, SourcePositionTable* source_positions)
Schedule* schedule, SourcePositionTable* source_positions,
JumpOptimizationInfo* jump_opt)
: isolate_(info->isolate()),
info_(info),
debug_name_(info_->GetDebugName()),
@ -156,8 +157,8 @@ class PipelineData {
codegen_zone_scope_(zone_stats_, ZONE_NAME),
codegen_zone_(codegen_zone_scope_.zone()),
register_allocation_zone_scope_(zone_stats_, ZONE_NAME),
register_allocation_zone_(register_allocation_zone_scope_.zone()) {
}
register_allocation_zone_(register_allocation_zone_scope_.zone()),
jump_optimization_info_(jump_opt) {}
// For register allocation testing entry point.
PipelineData(ZoneStats* zone_stats, CompilationInfo* info,
InstructionSequence* sequence)
@ -254,6 +255,10 @@ class PipelineData {
return protected_instructions_;
}
JumpOptimizationInfo* jump_optimization_info() const {
return jump_optimization_info_;
}
void DeleteGraphZone() {
if (graph_zone_ == nullptr) return;
graph_zone_scope_.Destroy();
@ -333,9 +338,9 @@ class PipelineData {
void InitializeCodeGenerator(Linkage* linkage) {
DCHECK_NULL(code_generator_);
code_generator_ =
new CodeGenerator(codegen_zone(), frame(), linkage, sequence(), info(),
osr_helper_, start_source_position_);
code_generator_ = new CodeGenerator(
codegen_zone(), frame(), linkage, sequence(), info(), osr_helper_,
start_source_position_, jump_optimization_info_);
}
void BeginPhaseKind(const char* phase_kind_name) {
@ -409,6 +414,8 @@ class PipelineData {
ZoneVector<trap_handler::ProtectedInstructionData>* protected_instructions_ =
nullptr;
JumpOptimizationInfo* jump_optimization_info_ = nullptr;
DISALLOW_COPY_AND_ASSIGN(PipelineData);
};
@ -1752,14 +1759,16 @@ Handle<Code> Pipeline::GenerateCodeForCodeStub(Isolate* isolate,
CallDescriptor* call_descriptor,
Graph* graph, Schedule* schedule,
Code::Flags flags,
const char* debug_name) {
const char* debug_name,
JumpOptimizationInfo* jump_opt) {
CompilationInfo info(CStrVector(debug_name), isolate, graph->zone(), flags);
if (isolate->serializer_enabled()) info.MarkAsSerializing();
// Construct a pipeline for scheduling and code generation.
ZoneStats zone_stats(isolate->allocator());
SourcePositionTable source_positions(graph);
PipelineData data(&zone_stats, &info, graph, schedule, &source_positions);
PipelineData data(&zone_stats, &info, graph, schedule, &source_positions,
jump_opt);
data.set_verify_graph(FLAG_verify_csa);
std::unique_ptr<PipelineStatistics> pipeline_statistics;
if (FLAG_turbo_stats || FLAG_turbo_stats_nvp) {
@ -1824,7 +1833,8 @@ Handle<Code> Pipeline::GenerateCodeForTesting(
// table, then remove this conditional allocation.
if (!source_positions)
source_positions = new (info->zone()) SourcePositionTable(graph);
PipelineData data(&zone_stats, info, graph, schedule, source_positions);
PipelineData data(&zone_stats, info, graph, schedule, source_positions,
nullptr);
std::unique_ptr<PipelineStatistics> pipeline_statistics;
if (FLAG_turbo_stats || FLAG_turbo_stats_nvp) {
pipeline_statistics.reset(new PipelineStatistics(info, &zone_stats));
@ -1901,6 +1911,14 @@ bool PipelineImpl::ScheduleAndSelectInstructions(Linkage* linkage,
}
bool verify_stub_graph = data->verify_graph();
// Jump optimization runs instruction selection twice, but the instruction
// selector mutates nodes like swapping the inputs of a load, which can
// violate the machine graph verification rules. So we skip the second
// verification on a graph that already verified before.
auto jump_opt = data->jump_optimization_info();
if (jump_opt && jump_opt->is_optimizing()) {
verify_stub_graph = false;
}
if (verify_stub_graph ||
(FLAG_turbo_verify_machine_graph != nullptr &&
(!strcmp(FLAG_turbo_verify_machine_graph, "*") ||

View File

@ -17,6 +17,7 @@ namespace internal {
class CompilationInfo;
class CompilationJob;
class RegisterConfiguration;
class JumpOptimizationInfo;
namespace trap_handler {
struct ProtectedInstructionData;
@ -55,7 +56,8 @@ class Pipeline : public AllStatic {
CallDescriptor* call_descriptor,
Graph* graph, Schedule* schedule,
Code::Flags flags,
const char* debug_name);
const char* debug_name,
JumpOptimizationInfo* jump_opt);
// Run the entire pipeline and generate a handle to a code object suitable for
// testing.

View File

@ -352,6 +352,29 @@ void Assembler::GetCode(Isolate* isolate, CodeDesc* desc) {
desc->constant_pool_size = 0;
desc->unwinding_info_size = 0;
desc->unwinding_info = nullptr;
// Collection stage
auto jump_opt = jump_optimization_info();
if (jump_opt && jump_opt->is_collecting()) {
auto& bitmap = jump_opt->farjmp_bitmap();
int num = static_cast<int>(farjmp_positions_.size());
if (num && bitmap.empty()) {
bool can_opt = false;
bitmap.resize((num + 31) / 32, 0);
for (int i = 0; i < num; i++) {
int disp_pos = farjmp_positions_[i];
int disp = long_at(disp_pos);
if (is_int8(disp)) {
bitmap[i / 32] |= 1 << (i & 31);
can_opt = true;
}
}
if (can_opt) {
jump_opt->set_optimizable();
}
}
}
}
@ -1537,6 +1560,21 @@ void Assembler::bind_to(Label* L, int pos) {
L->UnuseNear();
}
}
// Optimization stage
auto jump_opt = jump_optimization_info();
if (jump_opt && jump_opt->is_optimizing()) {
auto it = label_farjmp_maps_.find(L);
if (it != label_farjmp_maps_.end()) {
auto& pos_vector = it->second;
for (auto fixup_pos : pos_vector) {
int disp = pos - (fixup_pos + sizeof(int8_t));
CHECK(is_int8(disp));
set_byte_at(fixup_pos, disp);
}
label_farjmp_maps_.erase(it);
}
}
L->bind_to(pos);
}
@ -1547,6 +1585,21 @@ void Assembler::bind(Label* L) {
bind_to(L, pc_offset());
}
void Assembler::record_farjmp_position(Label* L, int pos) {
auto& pos_vector = label_farjmp_maps_[L];
pos_vector.push_back(pos);
}
bool Assembler::is_optimizable_farjmp(int idx) {
if (predictable_code_size()) return false;
auto jump_opt = jump_optimization_info();
CHECK(jump_opt->is_optimizing());
auto& bitmap = jump_opt->farjmp_bitmap();
CHECK(idx < static_cast<int>(bitmap.size() * 32));
return !!(bitmap[idx / 32] & (1 << (idx & 31)));
}
void Assembler::call(Label* L) {
EnsureSpace ensure_space(this);
@ -1628,6 +1681,18 @@ void Assembler::jmp(Label* L, Label::Distance distance) {
EMIT(0xEB);
emit_near_disp(L);
} else {
auto jump_opt = jump_optimization_info();
if (V8_UNLIKELY(jump_opt)) {
if (jump_opt->is_optimizing() && is_optimizable_farjmp(farjmp_num_++)) {
EMIT(0xEB);
record_farjmp_position(L, pc_offset());
EMIT(0);
return;
}
if (jump_opt->is_collecting()) {
farjmp_positions_.push_back(pc_offset() + 1);
}
}
// 1110 1001 #32-bit disp.
EMIT(0xE9);
emit_disp(L, Displacement::UNCONDITIONAL_JUMP);
@ -1684,6 +1749,19 @@ void Assembler::j(Condition cc, Label* L, Label::Distance distance) {
EMIT(0x70 | cc);
emit_near_disp(L);
} else {
auto jump_opt = jump_optimization_info();
if (V8_UNLIKELY(jump_opt)) {
if (jump_opt->is_optimizing() && is_optimizable_farjmp(farjmp_num_++)) {
// 0111 tttn #8-bit disp
EMIT(0x70 | cc);
record_farjmp_position(L, pc_offset());
EMIT(0);
return;
}
if (jump_opt->is_collecting()) {
farjmp_positions_.push_back(pc_offset() + 2);
}
}
// 0000 1111 1000 tttn #32-bit disp
// Note: could eliminate cond. jumps to this jump if condition
// is the same however, seems to be rather unlikely case.

View File

@ -1807,6 +1807,11 @@ class Assembler : public AssemblerBase {
// record reloc info for current pc_
void RecordRelocInfo(RelocInfo::Mode rmode, intptr_t data = 0);
// record the position of jmp/jcc instruction
void record_farjmp_position(Label* L, int pos);
bool is_optimizable_farjmp(int idx);
friend class CodePatcher;
friend class EnsureSpace;
@ -1830,6 +1835,11 @@ class Assembler : public AssemblerBase {
void AllocateAndInstallRequestedHeapObjects(Isolate* isolate);
std::forward_list<HeapObjectRequest> heap_object_requests_;
// Variables for this instance of assembler
int farjmp_num_ = 0;
std::deque<int> farjmp_positions_;
std::map<Label*, std::vector<int>> label_farjmp_maps_;
};

View File

@ -331,6 +331,29 @@ void Assembler::GetCode(Isolate* isolate, CodeDesc* desc) {
desc->constant_pool_size = 0;
desc->unwinding_info_size = 0;
desc->unwinding_info = nullptr;
// Collection stage
auto jump_opt = jump_optimization_info();
if (jump_opt && jump_opt->is_collecting()) {
auto& bitmap = jump_opt->farjmp_bitmap();
int num = static_cast<int>(farjmp_positions_.size());
if (num && bitmap.empty()) {
bool can_opt = false;
bitmap.resize((num + 31) / 32, 0);
for (int i = 0; i < num; i++) {
int disp_pos = farjmp_positions_[i];
int disp = long_at(disp_pos);
if (is_int8(disp)) {
bitmap[i / 32] |= 1 << (i & 31);
can_opt = true;
}
}
if (can_opt) {
jump_opt->set_optimizable();
}
}
}
}
@ -401,6 +424,21 @@ void Assembler::bind_to(Label* L, int pos) {
L->UnuseNear();
}
}
// Optimization stage
auto jump_opt = jump_optimization_info();
if (jump_opt && jump_opt->is_optimizing()) {
auto it = label_farjmp_maps_.find(L);
if (it != label_farjmp_maps_.end()) {
auto& pos_vector = it->second;
for (auto fixup_pos : pos_vector) {
int disp = pos - (fixup_pos + sizeof(int8_t));
CHECK(is_int8(disp));
set_byte_at(fixup_pos, disp);
}
label_farjmp_maps_.erase(it);
}
}
L->bind_to(pos);
}
@ -409,6 +447,21 @@ void Assembler::bind(Label* L) {
bind_to(L, pc_offset());
}
void Assembler::record_farjmp_position(Label* L, int pos) {
auto& pos_vector = label_farjmp_maps_[L];
pos_vector.push_back(pos);
}
bool Assembler::is_optimizable_farjmp(int idx) {
if (predictable_code_size()) return false;
auto jump_opt = jump_optimization_info();
CHECK(jump_opt->is_optimizing());
auto& bitmap = jump_opt->farjmp_bitmap();
CHECK(idx < static_cast<int>(bitmap.size() * 32));
return !!(bitmap[idx / 32] & (1 << (idx & 31)));
}
void Assembler::GrowBuffer() {
DCHECK(buffer_overflow());
@ -1268,19 +1321,34 @@ void Assembler::j(Condition cc, Label* L, Label::Distance distance) {
}
L->link_to(pc_offset(), Label::kNear);
emit(disp);
} else if (L->is_linked()) {
// 0000 1111 1000 tttn #32-bit disp.
emit(0x0F);
emit(0x80 | cc);
emitl(L->pos());
L->link_to(pc_offset() - sizeof(int32_t));
} else {
DCHECK(L->is_unused());
emit(0x0F);
emit(0x80 | cc);
int32_t current = pc_offset();
emitl(current);
L->link_to(current);
auto jump_opt = jump_optimization_info();
if (V8_UNLIKELY(jump_opt)) {
if (jump_opt->is_optimizing() && is_optimizable_farjmp(farjmp_num_++)) {
// 0111 tttn #8-bit disp
emit(0x70 | cc);
record_farjmp_position(L, pc_offset());
emit(0);
return;
}
if (jump_opt->is_collecting()) {
farjmp_positions_.push_back(pc_offset() + 2);
}
}
if (L->is_linked()) {
// 0000 1111 1000 tttn #32-bit disp.
emit(0x0F);
emit(0x80 | cc);
emitl(L->pos());
L->link_to(pc_offset() - sizeof(int32_t));
} else {
DCHECK(L->is_unused());
emit(0x0F);
emit(0x80 | cc);
int32_t current = pc_offset();
emitl(current);
L->link_to(current);
}
}
}
@ -1333,18 +1401,32 @@ void Assembler::jmp(Label* L, Label::Distance distance) {
}
L->link_to(pc_offset(), Label::kNear);
emit(disp);
} else if (L->is_linked()) {
// 1110 1001 #32-bit disp.
emit(0xE9);
emitl(L->pos());
L->link_to(pc_offset() - long_size);
} else {
// 1110 1001 #32-bit disp.
DCHECK(L->is_unused());
emit(0xE9);
int32_t current = pc_offset();
emitl(current);
L->link_to(current);
auto jump_opt = jump_optimization_info();
if (V8_UNLIKELY(jump_opt)) {
if (jump_opt->is_optimizing() && is_optimizable_farjmp(farjmp_num_++)) {
emit(0xEB);
record_farjmp_position(L, pc_offset());
emit(0);
return;
}
if (jump_opt->is_collecting()) {
farjmp_positions_.push_back(pc_offset() + 1);
}
}
if (L->is_linked()) {
// 1110 1001 #32-bit disp.
emit(0xE9);
emitl(L->pos());
L->link_to(pc_offset() - long_size);
} else {
// 1110 1001 #32-bit disp.
DCHECK(L->is_unused());
emit(0xE9);
int32_t current = pc_offset();
emitl(current);
L->link_to(current);
}
}
}

View File

@ -2494,6 +2494,11 @@ class Assembler : public AssemblerBase {
void bmi2l(SIMDPrefix pp, byte op, Register reg, Register vreg,
const Operand& rm);
// record the position of jmp/jcc instruction
void record_farjmp_position(Label* L, int pos);
bool is_optimizable_farjmp(int idx);
friend class CodePatcher;
friend class EnsureSpace;
friend class RegExpMacroAssemblerX64;
@ -2520,6 +2525,11 @@ class Assembler : public AssemblerBase {
void AllocateAndInstallRequestedHeapObjects(Isolate* isolate);
std::forward_list<HeapObjectRequest> heap_object_requests_;
// Variables for this instance of assembler
int farjmp_num_ = 0;
std::deque<int> farjmp_positions_;
std::map<Label*, std::vector<int>> label_farjmp_maps_;
};

View File

@ -28,7 +28,7 @@ class CodeGeneratorTester : public InitializedHandleScope {
sequence_(main_isolate(), &zone_, &blocks_),
frame_(descriptor_->CalculateFixedFrameSize()),
generator_(&zone_, &frame_, &linkage_, &sequence_, &info_,
base::Optional<OsrHelper>(), kNoSourcePosition) {
base::Optional<OsrHelper>(), kNoSourcePosition, nullptr) {
info_.set_prologue_offset(generator_.tasm()->pc_offset());
}