[turbofan] IA32 port of branch load poisoning.

The tricky part here is to take away one register from register
allocation for the mask. The only problem is with calls that need
an input operand to be passed in the poison register. For such calls,
we change the register constraint in the instruction selector
to pass the value in whatever place the register allocator sees fit.
During code generation, we then copy the value from that place
to the poison register. By that time, the mask is not necessary
(once we bake the mask into the target, it should be done before
this move).

For the branches, the mask update does not use cmov (unlike x64)
because cmov does not take an immediate and we do not have
a scratch register. Instead we use bit-twiddling tricks
(suggested by @tebbi). For example, here is the code for masking
register update after a bailout on non-zero:

  jnz deopt_bailout    ;; Bailout branch
  setnz bl             ;; These three instructions update the mask
  add  ebx, 255
  sar  ebx, 31

(On x64, the sequence is:

  jnz deopt_bailout
  mov r10, 0      ;; We have a scratch register for zero
  cmovnz r9, r10  ;; Set to zero if we execute this branch
                  ;; in branch mis-speculation
)


This CL also fixes a bug in register configuration, where we used
to wrongly restrict the array of register name.

Change-Id: I5fceff2faf8bdc527d9934afc284b749574ab69e
Bug: chromium:798964
Reviewed-on: https://chromium-review.googlesource.com/946251
Commit-Queue: Jaroslav Sevcik <jarin@chromium.org>
Reviewed-by: Benedikt Meurer <bmeurer@chromium.org>
Reviewed-by: Michael Starzinger <mstarzinger@chromium.org>
Cr-Commit-Position: refs/heads/master@{#51798}
This commit is contained in:
Jaroslav Sevcik 2018-03-07 07:35:45 +01:00 committed by Commit Bot
parent f24b9271de
commit 383ec7b506
9 changed files with 126 additions and 55 deletions

View File

@ -841,9 +841,9 @@ void CodeGenerator::RecordCallPosition(Instruction* instr) {
if (needs_frame_state) {
MarkLazyDeoptSite();
// If the frame state is present, it starts at argument 1 (just after the
// code address).
size_t frame_state_offset = 1;
// If the frame state is present, it starts at argument 2 - after
// the code address and the poison-alias index.
size_t frame_state_offset = 2;
FrameStateDescriptor* descriptor =
GetDeoptimizationEntry(instr, frame_state_offset).descriptor();
int pc_offset = tasm()->pc_offset();

View File

@ -285,6 +285,34 @@ class OutOfLineRecordWrite final : public OutOfLineCode {
Zone* zone_;
};
void MoveOperandIfAliasedWithPoisonRegister(Instruction* call_instruction,
CodeGenerator* gen) {
IA32OperandConverter i(gen, call_instruction);
int const poison_index = i.InputInt32(1);
if (poison_index == -1) {
// No aliasing -> nothing to move.
return;
}
if (HasImmediateInput(call_instruction, poison_index)) {
gen->tasm()->mov(kSpeculationPoisonRegister,
i.InputImmediate(poison_index));
} else {
gen->tasm()->mov(kSpeculationPoisonRegister, i.InputOperand(poison_index));
}
}
void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen,
InstructionCode opcode, Instruction* instr,
IA32OperandConverter& i) {
const MemoryAccessMode access_mode =
static_cast<MemoryAccessMode>(MiscField::decode(opcode));
if (access_mode == kMemoryAccessPoisoned) {
Register value = i.OutputRegister();
codegen->tasm()->and_(value, kSpeculationPoisonRegister);
}
}
} // namespace
#define ASSEMBLE_COMPARE(asm_instr) \
@ -549,6 +577,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
ArchOpcode arch_opcode = ArchOpcodeField::decode(opcode);
switch (arch_opcode) {
case kArchCallCodeObject: {
MoveOperandIfAliasedWithPoisonRegister(instr, this);
if (HasImmediateInput(instr, 0)) {
Handle<Code> code = i.InputCode(0);
__ call(code, RelocInfo::CODE_TARGET);
@ -566,6 +595,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kArchCallWasmFunction: {
MoveOperandIfAliasedWithPoisonRegister(instr, this);
if (HasImmediateInput(instr, 0)) {
Address wasm_code = reinterpret_cast<Address>(
i.ToConstant(instr->InputAt(0)).ToInt32());
@ -592,6 +622,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kArchTailCallCodeObjectFromJSFunction:
case kArchTailCallCodeObject: {
MoveOperandIfAliasedWithPoisonRegister(instr, this);
if (arch_opcode == kArchTailCallCodeObjectFromJSFunction) {
AssemblePopArgumentsAdaptorFrame(kJavaScriptCallArgCountRegister,
no_reg, no_reg, no_reg);
@ -613,6 +644,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kArchTailCallWasm: {
MoveOperandIfAliasedWithPoisonRegister(instr, this);
if (HasImmediateInput(instr, 0)) {
Address wasm_code = reinterpret_cast<Address>(
i.ToConstant(instr->InputAt(0)).ToInt32());
@ -634,6 +666,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kArchTailCallAddress: {
MoveOperandIfAliasedWithPoisonRegister(instr, this);
CHECK(!HasImmediateInput(instr, 0));
Register reg = i.InputRegister(0);
if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
@ -646,6 +679,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kArchCallJSFunction: {
MoveOperandIfAliasedWithPoisonRegister(instr, this);
Register func = i.InputRegister(0);
if (FLAG_debug_code) {
// Check the function's context matches the context argument.
@ -696,6 +730,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
AssemblePrepareTailCall();
break;
case kArchCallCFunction: {
MoveOperandIfAliasedWithPoisonRegister(instr, this);
int const num_parameters = MiscField::decode(instr->opcode());
if (HasImmediateInput(instr, 0)) {
ExternalReference ref = i.InputExternalReference(0);
@ -1460,9 +1495,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
case kIA32Movsxbl:
ASSEMBLE_MOVX(movsx_b);
EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
break;
case kIA32Movzxbl:
ASSEMBLE_MOVX(movzx_b);
EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
break;
case kIA32Movb: {
size_t index = 0;
@ -1472,13 +1509,16 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
} else {
__ mov_b(operand, i.InputRegister(index));
}
EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
break;
}
case kIA32Movsxwl:
ASSEMBLE_MOVX(movsx_w);
EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
break;
case kIA32Movzxwl:
ASSEMBLE_MOVX(movzx_w);
EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
break;
case kIA32Movw: {
size_t index = 0;
@ -1488,11 +1528,13 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
} else {
__ mov_w(operand, i.InputRegister(index));
}
EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
break;
}
case kIA32Movl:
if (instr->HasOutput()) {
__ mov(i.OutputRegister(), i.MemoryOperand());
EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
} else {
size_t index = 0;
Operand operand = i.MemoryOperand(&index);
@ -3119,7 +3161,15 @@ void CodeGenerator::AssembleArchBranch(Instruction* instr, BranchInfo* branch) {
void CodeGenerator::AssembleBranchPoisoning(FlagsCondition condition,
Instruction* instr) {
UNREACHABLE();
// TODO(jarin) Handle float comparisons (kUnordered[Not]Equal).
if (condition == kUnorderedEqual || condition == kUnorderedNotEqual) {
return;
}
condition = NegateFlagsCondition(condition);
__ setcc(FlagsConditionToCondition(condition), kSpeculationPoisonRegister);
__ add(kSpeculationPoisonRegister, Immediate(255));
__ sar(kSpeculationPoisonRegister, 31u);
}
void CodeGenerator::AssembleArchDeoptBranch(Instruction* instr,
@ -3437,6 +3487,7 @@ void CodeGenerator::AssembleConstructFrame() {
if (FLAG_code_comments) __ RecordComment("-- OSR entrypoint --");
osr_pc_offset_ = __ pc_offset();
shrink_slots -= osr_helper()->UnoptimizedFrameSlots();
InitializePoisonForLoadsIfNeeded();
}
const RegList saves = call_descriptor->CalleeSavedRegisters();

View File

@ -2044,7 +2044,7 @@ InstructionSelector::AlignmentRequirements() {
}
// static
bool InstructionSelector::SupportsSpeculationPoisoning() { return false; }
bool InstructionSelector::SupportsSpeculationPoisoning() { return true; }
} // namespace compiler
} // namespace internal

View File

@ -809,10 +809,17 @@ void InstructionSelector::InitializeCallBuffer(Node* call, CallBuffer* buffer,
}
DCHECK_EQ(1u, buffer->instruction_args.size());
// Argument 1 is used for poison-alias index (encoded in a word-sized
// immediate. This an index of the operand that aliases with poison register
// or -1 if there is no aliasing.
buffer->instruction_args.push_back(g.TempImmediate(-1));
const size_t poison_alias_index = 1;
DCHECK_EQ(buffer->instruction_args.size() - 1, poison_alias_index);
// If the call needs a frame state, we insert the state information as
// follows (n is the number of value inputs to the frame state):
// arg 1 : deoptimization id.
// arg 2 - arg (n + 1) : value inputs to the frame state.
// arg 2 : deoptimization id.
// arg 3 - arg (n + 2) : value inputs to the frame state.
size_t frame_state_entries = 0;
USE(frame_state_entries); // frame_state_entries is only used for debug.
if (buffer->frame_state_descriptor != nullptr) {
@ -848,7 +855,7 @@ void InstructionSelector::InitializeCallBuffer(Node* call, CallBuffer* buffer,
&buffer->instruction_args, FrameStateInputKind::kStackSlot,
instruction_zone());
DCHECK_EQ(1 + frame_state_entries, buffer->instruction_args.size());
DCHECK_EQ(2 + frame_state_entries, buffer->instruction_args.size());
}
size_t input_count = static_cast<size_t>(buffer->input_count());
@ -871,8 +878,9 @@ void InstructionSelector::InitializeCallBuffer(Node* call, CallBuffer* buffer,
location, stack_param_delta);
}
InstructionOperand op = g.UseLocation(*iter, location);
if (UnallocatedOperand::cast(op).HasFixedSlotPolicy() && !call_tail) {
int stack_index = -UnallocatedOperand::cast(op).fixed_slot_index() - 1;
UnallocatedOperand unallocated = UnallocatedOperand::cast(op);
if (unallocated.HasFixedSlotPolicy() && !call_tail) {
int stack_index = -unallocated.fixed_slot_index() - 1;
if (static_cast<size_t>(stack_index) >= buffer->pushed_nodes.size()) {
buffer->pushed_nodes.resize(stack_index + 1);
}
@ -880,11 +888,23 @@ void InstructionSelector::InitializeCallBuffer(Node* call, CallBuffer* buffer,
buffer->pushed_nodes[stack_index] = param;
pushed_count++;
} else {
// If we do load poisoning and the linkage uses the poisoning register,
// then we request the input in memory location, and during code
// generation, we move the input to the register.
if (load_poisoning_ == LoadPoisoning::kDoPoison &&
unallocated.HasFixedRegisterPolicy()) {
int reg = unallocated.fixed_register_index();
if (reg == kSpeculationPoisonRegister.code()) {
buffer->instruction_args[poison_alias_index] = g.TempImmediate(
static_cast<int32_t>(buffer->instruction_args.size()));
op = g.Use(*iter);
}
}
buffer->instruction_args.push_back(op);
}
}
DCHECK_EQ(input_count, buffer->instruction_args.size() + pushed_count -
frame_state_entries);
frame_state_entries - 1);
if (V8_TARGET_ARCH_STORES_RETURN_ADDRESS_ON_STACK && call_tail &&
stack_param_delta != 0) {
// For tail calls that change the size of their parameter list and keep

View File

@ -423,6 +423,12 @@ void CEntryStub::Generate(MacroAssembler* masm) {
__ mov(Operand(ebp, StandardFrameConstants::kContextOffset), esi);
__ bind(&skip);
// Reset the masking register. This is done independent of the underlying
// feature flag {FLAG_branch_load_poisoning} to make the snapshot work with
// both configurations. It is safe to always do this, because the underlying
// register is caller-saved and can be arbitrarily clobbered.
__ ResetSpeculationPoisonRegister();
// Compute the handler entry address and jump to it.
__ mov(edi, Operand::StaticVariable(pending_handler_entrypoint_address));
__ jmp(edi);

View File

@ -1665,7 +1665,9 @@ void TurboAssembler::ComputeCodeStartAddress(Register dst) {
}
}
void TurboAssembler::ResetSpeculationPoisonRegister() { UNREACHABLE(); }
void TurboAssembler::ResetSpeculationPoisonRegister() {
mov(kSpeculationPoisonRegister, Immediate(-1));
}
} // namespace internal
} // namespace v8

View File

@ -132,22 +132,10 @@ class ArchDefaultPoisoningRegisterConfiguration : public RegisterConfiguration {
get_num_allocatable_double_registers(),
InitializeGeneralRegisterCodes(), get_allocatable_double_codes(),
kSimpleFPAliasing ? AliasingKind::OVERLAP : AliasingKind::COMBINE,
InitializeGeneralRegisterNames(), kFloatRegisterNames,
kDoubleRegisterNames, kSimd128RegisterNames) {}
kGeneralRegisterNames, kFloatRegisterNames, kDoubleRegisterNames,
kSimd128RegisterNames) {}
private:
static char const* const* InitializeGeneralRegisterNames() {
int filtered_index = 0;
for (int i = 0; i < kMaxAllocatableGeneralRegisterCount; ++i) {
if (kAllocatableGeneralCodes[i] != kSpeculationPoisonRegister.code()) {
general_register_names_[filtered_index] = kGeneralRegisterNames[i];
filtered_index++;
}
}
DCHECK_EQ(filtered_index, kMaxAllocatableGeneralRegisterCount - 1);
return general_register_names_;
}
static const int* InitializeGeneralRegisterCodes() {
int filtered_index = 0;
for (int i = 0; i < kMaxAllocatableGeneralRegisterCount; ++i) {
@ -161,14 +149,10 @@ class ArchDefaultPoisoningRegisterConfiguration : public RegisterConfiguration {
return allocatable_general_codes_;
}
static const char*
general_register_names_[kMaxAllocatableGeneralRegisterCount - 1];
static int
allocatable_general_codes_[kMaxAllocatableGeneralRegisterCount - 1];
};
const char* ArchDefaultPoisoningRegisterConfiguration::general_register_names_
[kMaxAllocatableGeneralRegisterCount - 1];
int ArchDefaultPoisoningRegisterConfiguration::allocatable_general_codes_
[kMaxAllocatableGeneralRegisterCount - 1];

View File

@ -970,10 +970,15 @@ class CodeGeneratorTester {
Instruction* CreateTailCall(int stack_slot_delta) {
int optional_padding_slot = stack_slot_delta;
InstructionOperand callee[] = {
AllocatedOperand(LocationOperand::REGISTER,
MachineRepresentation::kTagged,
kReturnRegister0.code()),
ImmediateOperand(ImmediateOperand::INLINE, -1), // poison index.
ImmediateOperand(ImmediateOperand::INLINE, optional_padding_slot),
ImmediateOperand(ImmediateOperand::INLINE, stack_slot_delta)};
Instruction* tail_call = Instruction::New(zone_, kArchTailCallCodeObject, 0,
nullptr, 2, callee, 0, nullptr);
Instruction* tail_call =
Instruction::New(zone_, kArchTailCallCodeObject, 0, nullptr,
arraysize(callee), callee, 0, nullptr);
return tail_call;
}
@ -1046,10 +1051,12 @@ class CodeGeneratorTester {
AllocatedOperand(LocationOperand::REGISTER,
MachineRepresentation::kTagged,
kReturnRegister0.code()),
ImmediateOperand(ImmediateOperand::INLINE, -1), // poison index.
ImmediateOperand(ImmediateOperand::INLINE, optional_padding_slot),
ImmediateOperand(ImmediateOperand::INLINE, first_unused_stack_slot)};
Instruction* tail_call = Instruction::New(zone_, kArchTailCallCodeObject, 0,
nullptr, 3, callee, 0, nullptr);
Instruction* tail_call =
Instruction::New(zone_, kArchTailCallCodeObject, 0, nullptr,
arraysize(callee), callee, 0, nullptr);
sequence->AddInstruction(tail_call);
sequence->EndBlock(RpoNumber::FromInt(0));

View File

@ -463,8 +463,8 @@ TARGET_TEST_F(InstructionSelectorTest, CallStubWithDeopt) {
EXPECT_EQ(kArchCallCodeObject, call_instr->arch_opcode());
size_t num_operands =
1 + // Code object.
1 +
5 + // Frame state deopt id + one input for each value in frame state.
1 + // Poison index
6 + // Frame state deopt id + one input for each value in frame state.
1 + // Function.
1; // Context.
ASSERT_EQ(num_operands, call_instr->InputCount());
@ -473,23 +473,23 @@ TARGET_TEST_F(InstructionSelectorTest, CallStubWithDeopt) {
EXPECT_TRUE(call_instr->InputAt(0)->IsImmediate());
// Deoptimization id.
int32_t deopt_id_before = s.ToInt32(call_instr->InputAt(1));
int32_t deopt_id_before = s.ToInt32(call_instr->InputAt(2));
FrameStateDescriptor* desc_before =
s.GetFrameStateDescriptor(deopt_id_before);
EXPECT_EQ(bailout_id_before, desc_before->bailout_id());
EXPECT_EQ(1u, desc_before->parameters_count());
EXPECT_EQ(1u, desc_before->locals_count());
EXPECT_EQ(1u, desc_before->stack_count());
EXPECT_EQ(43, s.ToInt32(call_instr->InputAt(3)));
EXPECT_EQ(0, s.ToInt32(call_instr->InputAt(4))); // This should be a context.
EXPECT_EQ(43, s.ToInt32(call_instr->InputAt(4)));
EXPECT_EQ(0, s.ToInt32(call_instr->InputAt(5))); // This should be a context.
// We inserted 0 here.
EXPECT_EQ(0.5, s.ToFloat64(call_instr->InputAt(5)));
EXPECT_TRUE(s.ToHeapObject(call_instr->InputAt(6))->IsUndefined(isolate()));
EXPECT_EQ(0.5, s.ToFloat64(call_instr->InputAt(6)));
EXPECT_TRUE(s.ToHeapObject(call_instr->InputAt(7))->IsUndefined(isolate()));
// Function.
EXPECT_EQ(s.ToVreg(function_node), s.ToVreg(call_instr->InputAt(7)));
EXPECT_EQ(s.ToVreg(function_node), s.ToVreg(call_instr->InputAt(8)));
// Context.
EXPECT_EQ(s.ToVreg(context), s.ToVreg(call_instr->InputAt(8)));
EXPECT_EQ(s.ToVreg(context), s.ToVreg(call_instr->InputAt(9)));
EXPECT_EQ(kArchRet, s[index++]->arch_opcode());
@ -572,6 +572,7 @@ TARGET_TEST_F(InstructionSelectorTest, CallStubWithDeoptRecursiveFrameState) {
EXPECT_EQ(kArchCallCodeObject, call_instr->arch_opcode());
size_t num_operands =
1 + // Code object.
1 + // Poison index.
1 + // Frame state deopt id
6 + // One input for each value in frame state + context.
5 + // One input for each value in the parent frame state + context.
@ -582,7 +583,7 @@ TARGET_TEST_F(InstructionSelectorTest, CallStubWithDeoptRecursiveFrameState) {
EXPECT_TRUE(call_instr->InputAt(0)->IsImmediate());
// Deoptimization id.
int32_t deopt_id_before = s.ToInt32(call_instr->InputAt(1));
int32_t deopt_id_before = s.ToInt32(call_instr->InputAt(2));
FrameStateDescriptor* desc_before =
s.GetFrameStateDescriptor(deopt_id_before);
FrameStateDescriptor* desc_before_outer = desc_before->outer_state();
@ -591,25 +592,25 @@ TARGET_TEST_F(InstructionSelectorTest, CallStubWithDeoptRecursiveFrameState) {
EXPECT_EQ(1u, desc_before_outer->locals_count());
EXPECT_EQ(1u, desc_before_outer->stack_count());
// Values from parent environment.
EXPECT_EQ(63, s.ToInt32(call_instr->InputAt(3)));
EXPECT_EQ(63, s.ToInt32(call_instr->InputAt(4)));
// Context:
EXPECT_EQ(66, s.ToInt32(call_instr->InputAt(4)));
EXPECT_EQ(64, s.ToInt32(call_instr->InputAt(5)));
EXPECT_EQ(65, s.ToInt32(call_instr->InputAt(6)));
EXPECT_EQ(66, s.ToInt32(call_instr->InputAt(5)));
EXPECT_EQ(64, s.ToInt32(call_instr->InputAt(6)));
EXPECT_EQ(65, s.ToInt32(call_instr->InputAt(7)));
// Values from the nested frame.
EXPECT_EQ(1u, desc_before->parameters_count());
EXPECT_EQ(1u, desc_before->locals_count());
EXPECT_EQ(2u, desc_before->stack_count());
EXPECT_EQ(43, s.ToInt32(call_instr->InputAt(8)));
EXPECT_EQ(46, s.ToInt32(call_instr->InputAt(9)));
EXPECT_EQ(0.25, s.ToFloat64(call_instr->InputAt(10)));
EXPECT_EQ(44, s.ToInt32(call_instr->InputAt(11)));
EXPECT_EQ(45, s.ToInt32(call_instr->InputAt(12)));
EXPECT_EQ(43, s.ToInt32(call_instr->InputAt(9)));
EXPECT_EQ(46, s.ToInt32(call_instr->InputAt(10)));
EXPECT_EQ(0.25, s.ToFloat64(call_instr->InputAt(11)));
EXPECT_EQ(44, s.ToInt32(call_instr->InputAt(12)));
EXPECT_EQ(45, s.ToInt32(call_instr->InputAt(13)));
// Function.
EXPECT_EQ(s.ToVreg(function_node), s.ToVreg(call_instr->InputAt(13)));
EXPECT_EQ(s.ToVreg(function_node), s.ToVreg(call_instr->InputAt(14)));
// Context.
EXPECT_EQ(s.ToVreg(context2), s.ToVreg(call_instr->InputAt(14)));
EXPECT_EQ(s.ToVreg(context2), s.ToVreg(call_instr->InputAt(15)));
// Continuation.
EXPECT_EQ(kArchRet, s[index++]->arch_opcode());