Deal with case where a variable is dominated by inner part of a loop.

There is a risk that we try to preserve a loop variable through multiple
iterations, even though the dominating block is inside a loop.

Fix this by analyzing if a block starts off by writing to a variable. In
that case, there cannot be any preservation going on. If we don't, pretend the
loop header is reading the variable, which moves the variable to an
appropriate scope.
This commit is contained in:
Hans-Kristian Arntzen 2019-06-06 11:10:39 +02:00
parent 720681da39
commit 03d93abc1a
13 changed files with 244 additions and 4 deletions

View File

@ -0,0 +1,11 @@
#version 310 es
precision mediump float;
precision highp int;
layout(location = 0) out vec4 FragColor;
void main()
{
FragColor = vec4(1.0);
}

View File

@ -0,0 +1,15 @@
#version 460
#extension GL_NV_ray_tracing : require
layout(set = 0, binding = 0) uniform accelerationStructureNV as;
layout(set = 0, binding = 1, rgba32f) uniform writeonly image2D image;
layout(location = 0) rayPayloadNV vec4 payload;
layout(location = 0) callableDataNV float blend;
void main()
{
traceNV(as, 1u, 255u, 0u, 0u, 0u, vec3(0.0), 0.0, vec3(0.0, 0.0, -1.0), 100.0, 0);
executeCallableNV(0u, 0);
imageStore(image, ivec2(gl_LaunchIDNV.xy), payload + vec4(blend));
}

View File

@ -0,0 +1,16 @@
#version 460
#extension GL_NV_ray_tracing : require
layout(shaderRecordNV, std430) buffer sbt
{
vec3 direction;
float tmax;
} _20;
layout(set = 0, binding = 0) uniform accelerationStructureNV as;
void main()
{
traceNV(as, 0u, 255u, 0u, 1u, 0u, vec3(0.0), 0.0, _20.direction, _20.tmax, 0);
}

View File

@ -69,13 +69,13 @@ float4 fetch_attr(thread const attr_desc& desc, thread const int& vertex_id, thr
float4 result = float4(0.0, 0.0, 0.0, 1.0);
bool reverse_order = false;
int first_byte = (vertex_id * desc.stride) + desc.starting_offset;
uint4 tmp;
for (int n = 0; n < 4; n++)
{
if (n == desc.attribute_size)
{
break;
}
uint4 tmp;
switch (desc.type)
{
case 0:

View File

@ -40,9 +40,9 @@ vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _21 [[buffer(0)]]
main0_out out = {};
out.gl_Position = _21.uMVP * in.aVertex;
out.vColor = float4(0.0);
Light_1 light;
for (int i = 0; i < 4; i++)
{
Light_1 light;
light.Position = float3(_21.lights[i].Position);
light.Radius = _21.lights[i].Radius;
light.Color = _21.lights[i].Color;

View File

@ -9,9 +9,9 @@ struct SceneOut
void _main(vec4 positions[3], SceneOut OUT)
{
SceneOut o;
for (int i = 0; i < 3; i++)
{
SceneOut o;
o.pos = positions[i];
gl_Position = o.pos;
EmitVertex();

View File

@ -16,10 +16,10 @@ void main()
{
gl_Position = mat4(UBO[0], UBO[1], UBO[2], UBO[3]) * aVertex;
vColor = vec4(0.0);
Light light;
for (int i = 0; i < 4; i++)
{
Light _52 = Light(UBO[i * 2 + 4].xyz, UBO[i * 2 + 4].w, UBO[i * 2 + 5]);
Light light;
light.Position = _52.Position;
light.Radius = _52.Radius;
light.Color = _52.Color;

View File

@ -0,0 +1,27 @@
#version 310 es
precision mediump float;
precision highp int;
layout(location = 0) out vec4 FragColor;
void main()
{
bool written = false;
float v;
for (mediump int i = 0; i < 4; i++)
{
float w = 0.0;
if (written)
{
w += v;
}
else
{
v = 20.0;
}
v += float(i);
written = true;
}
FragColor = vec4(1.0);
}

View File

@ -0,0 +1,21 @@
#version 310 es
precision mediump float;
layout(location = 0) out vec4 FragColor;
void main()
{
float v;
bool written = false;
for (int i = 0; i < 4; i++)
{
float w = 0.0;
if (written)
w += v;
else
v = 20.0;
v += float(i);
written = true;
}
FragColor = vec4(1.0);
}

View File

@ -152,6 +152,56 @@ void CFG::add_branch(uint32_t from, uint32_t to)
add_unique(succeeding_edges[from], to);
}
uint32_t CFG::find_loop_dominator(uint32_t block_id) const
{
while (block_id != 0)
{
auto itr = preceding_edges.find(block_id);
if (itr == end(preceding_edges))
return 0;
if (itr->second.empty())
return 0;
uint32_t pred_block_id = 0;
bool ignore_loop_header = false;
// If we are a merge block, go directly to the header block.
// Only consider a loop dominator if we are branching from inside a block to a loop header.
// NOTE: In the CFG we forced an edge from header to merge block always to support variable scopes properly.
for (auto &pred : itr->second)
{
auto &pred_block = compiler.get<SPIRBlock>(pred);
if (pred_block.merge == SPIRBlock::MergeLoop && pred_block.merge_block == block_id)
{
pred_block_id = pred;
ignore_loop_header = true;
break;
}
else if (pred_block.merge == SPIRBlock::MergeSelection && pred_block.next_block == block_id)
{
pred_block_id = pred;
break;
}
}
// No merge block means we can just pick any edge. Loop headers dominate the inner loop, so any path we
// take will lead there.
if (!pred_block_id)
pred_block_id = itr->second.front();
block_id = pred_block_id;
if (!ignore_loop_header && block_id)
{
auto &block = compiler.get<SPIRBlock>(block_id);
if (block.merge == SPIRBlock::MergeLoop)
return block_id;
}
}
return block_id;
}
DominatorBuilder::DominatorBuilder(const CFG &cfg_)
: cfg(cfg_)
{

View File

@ -93,6 +93,8 @@ public:
walk_from(seen_blocks, b, op);
}
uint32_t find_loop_dominator(uint32_t block) const;
private:
struct VisitOrder
{

View File

@ -3322,6 +3322,30 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry, AnalyzeVariableScopeA
// Add it to a per-block list of variables.
uint32_t dominating_block = builder.get_dominator();
// For variables whose dominating block is inside a loop, there is a risk that these variables
// actually need to be preserved across loop iterations. We can express this by adding
// a "read" access to the loop header.
// In the dominating block, we must see an OpStore or equivalent as the first access of an OpVariable.
// Should that fail, we look for the innermost loop header and tack on an access there.
// Phi nodes cannot have this problem.
if (dominating_block)
{
auto &variable = get<SPIRVariable>(var.first);
if (!variable.phi_variable)
{
bool preserve = may_read_undefined_variable_in_block(get<SPIRBlock>(dominating_block), var.first);
if (preserve)
{
uint32_t loop_dominator = cfg.find_loop_dominator(dominating_block);
if (loop_dominator)
{
builder.add_block(loop_dominator);
dominating_block = builder.get_dominator();
}
}
}
}
// If all blocks here are dead code, this will be 0, so the variable in question
// will be completely eliminated.
if (dominating_block)
@ -3513,6 +3537,79 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry, AnalyzeVariableScopeA
}
}
bool Compiler::may_read_undefined_variable_in_block(const SPIRBlock &block, uint32_t var)
{
for (auto &op : block.ops)
{
auto *ops = stream(op);
switch (op.op)
{
case OpStore:
case OpCopyMemory:
if (ops[0] == var)
return false;
break;
case OpAccessChain:
case OpInBoundsAccessChain:
case OpPtrAccessChain:
// Access chains are generally used to partially read and write. It's too hard to analyze
// if all constituents are written fully before continuing, so just assume it's preserved.
// This is the same as the parameter preservation analysis.
if (ops[2] == var)
return true;
break;
case OpSelect:
// Variable pointers.
// We might read before writing.
if (ops[3] == var || ops[4] == var)
return true;
break;
case OpPhi:
{
// Variable pointers.
// We might read before writing.
if (op.length < 2)
break;
uint32_t count = op.length - 2;
for (uint32_t i = 0; i < count; i += 2)
if (ops[i + 2] == var)
return true;
break;
}
case OpCopyObject:
case OpLoad:
if (ops[2] == var)
return true;
break;
case OpFunctionCall:
{
if (op.length < 3)
break;
// May read before writing.
uint32_t count = op.length - 3;
for (uint32_t i = 0; i < count; i++)
if (ops[i + 3] == var)
return true;
break;
}
default:
break;
}
}
// Not accessed somehow, at least not in a usual fashion.
// It's likely accessed in a branch, so assume we must preserve.
return true;
}
Bitset Compiler::get_buffer_block_flags(uint32_t id) const
{
return ir.get_buffer_block_flags(get<SPIRVariable>(id));

View File

@ -947,6 +947,7 @@ protected:
void analyze_variable_scope(SPIRFunction &function, AnalyzeVariableScopeAccessHandler &handler);
void find_function_local_luts(SPIRFunction &function, const AnalyzeVariableScopeAccessHandler &handler,
bool single_function);
bool may_read_undefined_variable_in_block(const SPIRBlock &block, uint32_t var);
void make_constant_null(uint32_t id, uint32_t type);