MSL: Rewrite propagated depth comparison state handling.

Far cleaner, and more correct to run the traversal twice.
Fixes a case where we propagate depth state through multiple functions.
This commit is contained in:
Hans-Kristian Arntzen 2019-10-24 16:34:51 +02:00
parent 830e24c4ba
commit 8066d13599
9 changed files with 115 additions and 63 deletions

View File

@ -0,0 +1,23 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct main0_out
{
float FragColor [[color(0)]];
};
struct main0_in
{
float3 vUV [[user(locn0)]];
};
fragment main0_out main0(main0_in in [[stage_in]], depth2d<float> uTexture [[texture(0)]], sampler uSampler [[sampler(0)]], sampler uSamplerShadow [[sampler(1)]])
{
main0_out out = {};
out.FragColor = float4(uTexture.sample(uSampler, in.vUV.xy)).x;
out.FragColor += uTexture.sample_compare(uSamplerShadow, in.vUV.xy, in.vUV.z);
return out;
}

View File

@ -0,0 +1,43 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct main0_out
{
float FragColor [[color(0)]];
};
struct main0_in
{
float3 vUV [[user(locn0)]];
};
static inline __attribute__((always_inline))
float sample_normal2(thread const depth2d<float> tex, thread sampler uSampler, thread float3& vUV)
{
return float4(tex.sample(uSampler, vUV.xy)).x;
}
static inline __attribute__((always_inline))
float sample_normal(thread const depth2d<float> tex, thread sampler uSampler, thread float3& vUV)
{
return sample_normal2(tex, uSampler, vUV);
}
static inline __attribute__((always_inline))
float sample_comp(thread const depth2d<float> tex, thread float3& vUV, thread sampler uSamplerShadow)
{
return tex.sample_compare(uSamplerShadow, vUV.xy, vUV.z);
}
fragment main0_out main0(main0_in in [[stage_in]], depth2d<float> uTexture [[texture(0)]], sampler uSampler [[sampler(0)]], sampler uSamplerShadow [[sampler(1)]])
{
main0_out out = {};
out.FragColor = sample_normal(uTexture, uSampler, in.vUV);
out.FragColor += sample_comp(uTexture, in.vUV, uSamplerShadow);
return out;
}

View File

@ -0,0 +1,29 @@
#version 450
layout(set = 0, binding = 0) uniform texture2D uTexture;
layout(set = 0, binding = 1) uniform sampler uSampler;
layout(set = 0, binding = 2) uniform samplerShadow uSamplerShadow;
layout(location = 0) out float FragColor;
layout(location = 0) in vec3 vUV;
float sample_normal2(texture2D tex)
{
return texture(sampler2D(tex, uSampler), vUV.xy).x;
}
float sample_normal(texture2D tex)
{
return sample_normal2(tex);
}
float sample_comp(texture2D tex)
{
return texture(sampler2DShadow(tex, uSamplerShadow), vUV);
}
void main()
{
FragColor = sample_normal(uTexture);
FragColor += sample_comp(uTexture);
}

View File

@ -3549,11 +3549,6 @@ Bitset Compiler::get_buffer_block_flags(VariableID id) const
return ir.get_buffer_block_flags(get<SPIRVariable>(id));
}
bool Compiler::supports_combined_samplers() const
{
return false; // default implementation
}
bool Compiler::get_common_basic_type(const SPIRType &type, SPIRType::BaseType &base_type)
{
if (type.basetype == SPIRType::Struct)
@ -3791,6 +3786,13 @@ void Compiler::analyze_image_and_sampler_usage()
CombinedImageSamplerUsageHandler handler(*this, dref_handler.dref_combined_samplers);
traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), handler);
// Need to run this traversal twice. First time, we propagate any comparison sampler usage from leaf functions
// down to main().
// In the second pass, we can propagate up forced depth state coming from main() up into leaf functions.
handler.dependency_hierarchy.clear();
traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), handler);
comparison_ids = move(handler.comparison_ids);
need_subpass_input = handler.need_subpass_input;
@ -3906,6 +3908,14 @@ bool Compiler::CFGBuilder::follow_function_call(const SPIRFunction &func)
return false;
}
void Compiler::CombinedImageSamplerUsageHandler::add_dependency(uint32_t dst, uint32_t src)
{
dependency_hierarchy[dst].insert(src);
// Propagate up any comparison state if we're loading from one such variable.
if (comparison_ids.count(src))
comparison_ids.insert(dst);
}
bool Compiler::CombinedImageSamplerUsageHandler::begin_function_scope(const uint32_t *args, uint32_t length)
{
if (length < 3)
@ -3918,7 +3928,7 @@ bool Compiler::CombinedImageSamplerUsageHandler::begin_function_scope(const uint
for (uint32_t i = 0; i < length; i++)
{
auto &argument = func.arguments[i];
dependency_hierarchy[argument.id].insert(arg[i]);
add_dependency(argument.id, arg[i]);
}
return true;
@ -3929,39 +3939,10 @@ void Compiler::CombinedImageSamplerUsageHandler::add_hierarchy_to_comparison_ids
// Traverse the variable dependency hierarchy and tag everything in its path with comparison ids.
comparison_ids.insert(id);
// If the underlying resource has been used for comparison then duplicate loads of that resource must be too.
if (!compiler.supports_combined_samplers())
{
for (const auto &hierarchy : dependency_hierarchy)
{
if (hierarchy.second.find(id) != hierarchy.second.end())
comparison_ids.insert(hierarchy.first);
}
}
for (auto &dep_id : dependency_hierarchy[id])
add_hierarchy_to_comparison_ids(dep_id);
}
// If the underlying resource has been used for comparison then duplicate loads of that resource must be too.
bool Compiler::CombinedImageSamplerUsageHandler::dependent_used_for_comparison(uint32_t id) const
{
if (compiler.supports_combined_samplers())
return false;
auto hierarchy_iter = dependency_hierarchy.find(id);
if (hierarchy_iter != dependency_hierarchy.end())
{
for (uint32_t dependent_id : hierarchy_iter->second)
{
if (comparison_ids.find(dependent_id) != comparison_ids.end())
return true;
}
}
return false;
}
bool Compiler::CombinedImageSamplerUsageHandler::handle(Op opcode, const uint32_t *args, uint32_t length)
{
switch (opcode)
@ -3973,7 +3954,8 @@ bool Compiler::CombinedImageSamplerUsageHandler::handle(Op opcode, const uint32_
{
if (length < 3)
return false;
dependency_hierarchy[args[1]].insert(args[2]);
add_dependency(args[1], args[2]);
// Ideally defer this to OpImageRead, but then we'd need to track loaded IDs.
// If we load an image, we're going to use it and there is little harm in declaring an unused gl_FragCoord.
@ -4001,10 +3983,7 @@ bool Compiler::CombinedImageSamplerUsageHandler::handle(Op opcode, const uint32_
uint32_t image = args[2];
uint32_t sampler = args[3];
bool dependent = !compiler.supports_combined_samplers() &&
(dependent_used_for_comparison(sampler) || dependent_used_for_comparison(image));
if (type.image.depth || dref_combined_samplers.count(result_id) != 0 || dependent)
if (type.image.depth || dref_combined_samplers.count(result_id) != 0)
{
add_hierarchy_to_comparison_ids(image);

View File

@ -612,9 +612,6 @@ protected:
void register_read(uint32_t expr, uint32_t chain, bool forwarded);
void register_write(uint32_t chain);
// Returns true if the target language supports combined texture-samplers. Returns fasle by default.
virtual bool supports_combined_samplers() const;
inline bool is_continue(uint32_t next) const
{
return (ir.block_meta[next] & ParsedIR::BLOCK_META_CONTINUE_BIT) != 0;
@ -891,10 +888,7 @@ protected:
void add_hierarchy_to_comparison_ids(uint32_t ids);
bool need_subpass_input = false;
// If the underlying resource has been used for comparison then duplicate loads of that resource must be too.
// Returns true if a dependent resource in the dependency hierarchy of the specified image or sampler has been used for comparison.
bool dependent_used_for_comparison(uint32_t id) const;
void add_dependency(uint32_t dst, uint32_t src);
};
void build_function_control_flow_graphs_and_analyze();

View File

@ -11164,11 +11164,6 @@ void CompilerGLSL::flatten_buffer_block(VariableID id)
flattened_buffer_blocks.insert(id);
}
bool CompilerGLSL::supports_combined_samplers() const
{
return true; // GLSL always supports combined texture-samplers.
}
bool CompilerGLSL::builtin_translates_to_nonarray(spv::BuiltIn /*builtin*/) const
{
return false; // GLSL itself does not need to translate array builtin types to non-array builtin types

View File

@ -273,9 +273,6 @@ protected:
virtual std::string unpack_expression_type(std::string expr_str, const SPIRType &type, uint32_t physical_type_id,
bool packed_type, bool row_major);
// Returns true, because GLSL always supports combined texture-samplers.
virtual bool supports_combined_samplers() const override;
virtual bool builtin_translates_to_nonarray(spv::BuiltIn builtin) const;
StringStream<> buffer;

View File

@ -122,11 +122,6 @@ void CompilerMSL::set_fragment_output_components(uint32_t location, uint32_t com
fragment_output_components[location] = components;
}
bool CompilerMSL::supports_combined_samplers() const
{
return false; // Metal does not support combined texture-samplers
}
bool CompilerMSL::builtin_translates_to_nonarray(spv::BuiltIn builtin) const
{
return (builtin == BuiltInSampleMask);

View File

@ -603,9 +603,6 @@ protected:
std::string unpack_expression_type(std::string expr_str, const SPIRType &type, uint32_t physical_type_id,
bool is_packed, bool row_major) override;
// Returns false, because Metal does not support combined texture-samplers.
bool supports_combined_samplers() const override;
// Returns true for BuiltInSampleMask because gl_SampleMask[] is an array in SPIR-V, but [[sample_mask]] is a scalar in Metal.
bool builtin_translates_to_nonarray(spv::BuiltIn builtin) const override;