Analyze complex cases for fragment interlocks.

If we are using interlocks in split functions or in control flow, we
have some serious workarounds we need to employ.
This commit is contained in:
Hans-Kristian Arntzen 2019-09-04 11:20:25 +02:00
parent f577836439
commit 3f2ce375e1
2 changed files with 152 additions and 34 deletions

View File

@ -3798,7 +3798,12 @@ bool Compiler::CombinedImageSamplerDrefHandler::handle(spv::Op opcode, const uin
const CFG &Compiler::get_cfg_for_current_function() const const CFG &Compiler::get_cfg_for_current_function() const
{ {
assert(current_function); assert(current_function);
auto cfg_itr = function_cfgs.find(current_function->self); return get_cfg_for_function(current_function->self);
}
const CFG &Compiler::get_cfg_for_function(uint32_t id) const
{
auto cfg_itr = function_cfgs.find(id);
assert(cfg_itr != end(function_cfgs)); assert(cfg_itr != end(function_cfgs));
assert(cfg_itr->second); assert(cfg_itr->second);
return *cfg_itr->second; return *cfg_itr->second;
@ -4249,18 +4254,91 @@ void Compiler::analyze_non_block_pointer_types()
sort(begin(physical_storage_non_block_pointer_types), end(physical_storage_non_block_pointer_types)); sort(begin(physical_storage_non_block_pointer_types), end(physical_storage_non_block_pointer_types));
} }
bool Compiler::InterlockedResourceAccessPrepassHandler::handle(Op op, const uint32_t *, uint32_t)
{
if (op == OpBeginInvocationInterlockEXT || op == OpEndInvocationInterlockEXT)
{
if (interlock_function_id != 0 && interlock_function_id != call_stack.back())
{
// Most complex case, we have no sensible way of dealing with this
// other than taking the 100% conservative approach, exit early.
split_function_case = true;
return false;
}
else
{
interlock_function_id = call_stack.back();
// If this call is performed inside control flow we have a problem.
auto &cfg = compiler.get_cfg_for_function(interlock_function_id);
uint32_t from_block_id = compiler.get<SPIRFunction>(interlock_function_id).entry_block;
bool outside_control_flow = cfg.node_terminates_control_flow_in_sub_graph(from_block_id, current_block_id);
if (!outside_control_flow)
control_flow_interlock = true;
}
}
return true;
}
void Compiler::InterlockedResourceAccessPrepassHandler::set_current_block(const SPIRBlock &block)
{
current_block_id = block.self;
}
bool Compiler::InterlockedResourceAccessPrepassHandler::begin_function_scope(const uint32_t *args, uint32_t length)
{
if (length < 2)
return false;
call_stack.push_back(args[1]);
return true;
}
bool Compiler::InterlockedResourceAccessPrepassHandler::end_function_scope(const uint32_t *, uint32_t)
{
call_stack.pop_back();
return true;
}
bool Compiler::InterlockedResourceAccessHandler::begin_function_scope(const uint32_t *args, uint32_t length)
{
if (length < 2)
return false;
call_stack.push_back(args[1]);
return true;
}
bool Compiler::InterlockedResourceAccessHandler::end_function_scope(const uint32_t *, uint32_t)
{
call_stack.pop_back();
return true;
}
void Compiler::InterlockedResourceAccessHandler::access_potential_resource(uint32_t id)
{
if ((use_critical_section && in_crit_sec) ||
(control_flow_interlock && call_stack.back() == interlock_function_id) ||
split_function_case)
{
compiler.interlocked_resources.insert(id);
}
}
bool Compiler::InterlockedResourceAccessHandler::handle(Op opcode, const uint32_t *args, uint32_t length) bool Compiler::InterlockedResourceAccessHandler::handle(Op opcode, const uint32_t *args, uint32_t length)
{ {
if (opcode == OpBeginInvocationInterlockEXT) // Only care about critical section analysis if we have simple case.
if (use_critical_section)
{ {
in_crit_sec = true; if (opcode == OpBeginInvocationInterlockEXT)
return true; {
} in_crit_sec = true;
return true;
}
if (opcode == OpEndInvocationInterlockEXT) if (opcode == OpEndInvocationInterlockEXT)
{ {
// End critical section--nothing more to do. // End critical section--nothing more to do.
return false; return false;
}
} }
// We need to figure out where images and buffers are loaded from, so do only the bare bones compilation we need. // We need to figure out where images and buffers are loaded from, so do only the bare bones compilation we need.
@ -4298,10 +4376,7 @@ bool Compiler::InterlockedResourceAccessHandler::handle(Op opcode, const uint32_
break; break;
// fallthrough // fallthrough
case StorageClassStorageBuffer: case StorageClassStorageBuffer:
if (!in_crit_sec) access_potential_resource(var->self);
break;
compiler.interlocked_resources.insert(var->self);
break; break;
} }
break; break;
@ -4324,6 +4399,7 @@ bool Compiler::InterlockedResourceAccessHandler::handle(Op opcode, const uint32_
uint32_t ptr = args[2]; uint32_t ptr = args[2];
compiler.set<SPIRExpression>(id, "", result_type, true); compiler.set<SPIRExpression>(id, "", result_type, true);
compiler.register_read(id, ptr, true); compiler.register_read(id, ptr, true);
compiler.ir.ids[id].set_allow_type_rewrite();
} }
break; break;
} }
@ -4340,6 +4416,7 @@ bool Compiler::InterlockedResourceAccessHandler::handle(Op opcode, const uint32_
auto *var = compiler.maybe_get_backing_variable(ptr); auto *var = compiler.maybe_get_backing_variable(ptr);
if (var) if (var)
e.loaded_from = var->self; e.loaded_from = var->self;
break;
} }
case OpStore: case OpStore:
@ -4349,14 +4426,13 @@ bool Compiler::InterlockedResourceAccessHandler::handle(Op opcode, const uint32_
if (length < 1) if (length < 1)
return false; return false;
if (!in_crit_sec)
break;
uint32_t ptr = args[0]; uint32_t ptr = args[0];
auto *var = compiler.maybe_get_backing_variable(ptr); auto *var = compiler.maybe_get_backing_variable(ptr);
if (var && (var->storage == StorageClassUniform || var->storage == StorageClassUniformConstant || if (var && (var->storage == StorageClassUniform || var->storage == StorageClassUniformConstant ||
var->storage == StorageClassStorageBuffer)) var->storage == StorageClassStorageBuffer))
compiler.interlocked_resources.insert(var->self); {
access_potential_resource(var->self);
}
break; break;
} }
@ -4366,23 +4442,26 @@ bool Compiler::InterlockedResourceAccessHandler::handle(Op opcode, const uint32_
if (length < 2) if (length < 2)
return false; return false;
if (!in_crit_sec)
break;
uint32_t dst = args[0]; uint32_t dst = args[0];
uint32_t src = args[1]; uint32_t src = args[1];
auto *dst_var = compiler.maybe_get_backing_variable(dst); auto *dst_var = compiler.maybe_get_backing_variable(dst);
auto *src_var = compiler.maybe_get_backing_variable(src); auto *src_var = compiler.maybe_get_backing_variable(src);
if (dst_var && (dst_var->storage == StorageClassUniform || dst_var->storage == StorageClassStorageBuffer)) if (dst_var && (dst_var->storage == StorageClassUniform || dst_var->storage == StorageClassStorageBuffer))
compiler.interlocked_resources.insert(dst_var->self); access_potential_resource(dst_var->self);
if (src_var) if (src_var)
{ {
if (src_var->storage != StorageClassUniform && src_var->storage != StorageClassStorageBuffer) if (src_var->storage != StorageClassUniform && src_var->storage != StorageClassStorageBuffer)
break; break;
if (src_var->storage == StorageClassUniform && if (src_var->storage == StorageClassUniform &&
!compiler.has_decoration(compiler.get<SPIRType>(src_var->basetype).self, DecorationBufferBlock)) !compiler.has_decoration(compiler.get<SPIRType>(src_var->basetype).self, DecorationBufferBlock))
{
break; break;
compiler.interlocked_resources.insert(src_var->self); }
access_potential_resource(src_var->self);
} }
break; break;
@ -4394,9 +4473,6 @@ bool Compiler::InterlockedResourceAccessHandler::handle(Op opcode, const uint32_
if (length < 3) if (length < 3)
return false; return false;
if (!in_crit_sec)
break;
uint32_t ptr = args[2]; uint32_t ptr = args[2];
auto *var = compiler.maybe_get_backing_variable(ptr); auto *var = compiler.maybe_get_backing_variable(ptr);
@ -4416,7 +4492,7 @@ bool Compiler::InterlockedResourceAccessHandler::handle(Op opcode, const uint32_
// fallthrough // fallthrough
case StorageClassUniformConstant: case StorageClassUniformConstant:
case StorageClassStorageBuffer: case StorageClassStorageBuffer:
compiler.interlocked_resources.insert(var->self); access_potential_resource(var->self);
break; break;
} }
break; break;
@ -4439,14 +4515,13 @@ bool Compiler::InterlockedResourceAccessHandler::handle(Op opcode, const uint32_
if (length < 3) if (length < 3)
return false; return false;
if (!in_crit_sec)
break;
uint32_t ptr = args[2]; uint32_t ptr = args[2];
auto *var = compiler.maybe_get_backing_variable(ptr); auto *var = compiler.maybe_get_backing_variable(ptr);
if (var && (var->storage == StorageClassUniform || var->storage == StorageClassUniformConstant || if (var && (var->storage == StorageClassUniform || var->storage == StorageClassUniformConstant ||
var->storage == StorageClassStorageBuffer)) var->storage == StorageClassStorageBuffer))
compiler.interlocked_resources.insert(var->self); {
access_potential_resource(var->self);
}
break; break;
} }
@ -4460,8 +4535,17 @@ bool Compiler::InterlockedResourceAccessHandler::handle(Op opcode, const uint32_
void Compiler::analyze_interlocked_resource_usage() void Compiler::analyze_interlocked_resource_usage()
{ {
InterlockedResourceAccessHandler handler(*this); InterlockedResourceAccessPrepassHandler prepass_handler(*this, ir.default_entry_point);
traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), prepass_handler);
InterlockedResourceAccessHandler handler(*this, ir.default_entry_point);
handler.interlock_function_id = prepass_handler.interlock_function_id;
handler.split_function_case = prepass_handler.split_function_case;
handler.control_flow_interlock = prepass_handler.control_flow_interlock;
handler.use_critical_section = !handler.split_function_case && !handler.control_flow_interlock;
traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), handler); traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), handler);
interlocked_complex = !handler.use_critical_section;
} }
bool Compiler::type_is_array_of_pointers(const SPIRType &type) const bool Compiler::type_is_array_of_pointers(const SPIRType &type) const

View File

@ -884,10 +884,11 @@ protected:
void build_function_control_flow_graphs_and_analyze(); void build_function_control_flow_graphs_and_analyze();
std::unordered_map<uint32_t, std::unique_ptr<CFG>> function_cfgs; std::unordered_map<uint32_t, std::unique_ptr<CFG>> function_cfgs;
const CFG &get_cfg_for_current_function() const; const CFG &get_cfg_for_current_function() const;
const CFG &get_cfg_for_function(uint32_t id) const;
struct CFGBuilder : OpcodeHandler struct CFGBuilder : OpcodeHandler
{ {
CFGBuilder(Compiler &compiler_); explicit CFGBuilder(Compiler &compiler_);
bool follow_function_call(const SPIRFunction &func) override; bool follow_function_call(const SPIRFunction &func) override;
bool handle(spv::Op op, const uint32_t *args, uint32_t length) override; bool handle(spv::Op op, const uint32_t *args, uint32_t length) override;
@ -932,7 +933,7 @@ protected:
struct PhysicalStorageBufferPointerHandler : OpcodeHandler struct PhysicalStorageBufferPointerHandler : OpcodeHandler
{ {
PhysicalStorageBufferPointerHandler(Compiler &compiler_); explicit PhysicalStorageBufferPointerHandler(Compiler &compiler_);
bool handle(spv::Op op, const uint32_t *args, uint32_t length) override; bool handle(spv::Op op, const uint32_t *args, uint32_t length) override;
Compiler &compiler; Compiler &compiler;
std::unordered_set<uint32_t> types; std::unordered_set<uint32_t> types;
@ -951,20 +952,53 @@ protected:
// while inside the critical section must be placed in a raster order group. // while inside the critical section must be placed in a raster order group.
struct InterlockedResourceAccessHandler : OpcodeHandler struct InterlockedResourceAccessHandler : OpcodeHandler
{ {
InterlockedResourceAccessHandler(Compiler &compiler_) InterlockedResourceAccessHandler(Compiler &compiler_, uint32_t entry_point_id)
: compiler(compiler_) : compiler(compiler_)
{ {
call_stack.push_back(entry_point_id);
} }
bool handle(spv::Op op, const uint32_t *args, uint32_t length) override; bool handle(spv::Op op, const uint32_t *args, uint32_t length) override;
bool begin_function_scope(const uint32_t *args, uint32_t length) override;
bool end_function_scope(const uint32_t *args, uint32_t length) override;
Compiler &compiler; Compiler &compiler;
bool in_crit_sec = false; bool in_crit_sec = false;
uint32_t interlock_function_id = 0;
bool split_function_case = false;
bool control_flow_interlock = false;
bool use_critical_section = false;
SmallVector<uint32_t> call_stack;
void access_potential_resource(uint32_t id);
};
struct InterlockedResourceAccessPrepassHandler : OpcodeHandler
{
InterlockedResourceAccessPrepassHandler(Compiler &compiler_, uint32_t entry_point_id)
: compiler(compiler_)
{
call_stack.push_back(entry_point_id);
}
void set_current_block(const SPIRBlock &block) override;
bool handle(spv::Op op, const uint32_t *args, uint32_t length) override;
bool begin_function_scope(const uint32_t *args, uint32_t length) override;
bool end_function_scope(const uint32_t *args, uint32_t length) override;
Compiler &compiler;
uint32_t interlock_function_id = 0;
uint32_t current_block_id = 0;
bool split_function_case = false;
bool control_flow_interlock = false;
SmallVector<uint32_t> call_stack;
}; };
void analyze_interlocked_resource_usage(); void analyze_interlocked_resource_usage();
// The set of all resources written while inside the critical section, if present. // The set of all resources written while inside the critical section, if present.
std::unordered_set<uint32_t> interlocked_resources; std::unordered_set<uint32_t> interlocked_resources;
bool interlocked_complex = false;
void make_constant_null(uint32_t id, uint32_t type); void make_constant_null(uint32_t id, uint32_t type);