MSL: Rewrite propagated depth comparison state handling.
Far cleaner, and more correct to run the traversal twice. Fixes a case where we propagate depth state through multiple functions.
This commit is contained in:
parent
830e24c4ba
commit
8066d13599
@ -0,0 +1,23 @@
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
struct main0_out
|
||||
{
|
||||
float FragColor [[color(0)]];
|
||||
};
|
||||
|
||||
struct main0_in
|
||||
{
|
||||
float3 vUV [[user(locn0)]];
|
||||
};
|
||||
|
||||
fragment main0_out main0(main0_in in [[stage_in]], depth2d<float> uTexture [[texture(0)]], sampler uSampler [[sampler(0)]], sampler uSamplerShadow [[sampler(1)]])
|
||||
{
|
||||
main0_out out = {};
|
||||
out.FragColor = float4(uTexture.sample(uSampler, in.vUV.xy)).x;
|
||||
out.FragColor += uTexture.sample_compare(uSamplerShadow, in.vUV.xy, in.vUV.z);
|
||||
return out;
|
||||
}
|
||||
|
@ -0,0 +1,43 @@
|
||||
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
struct main0_out
|
||||
{
|
||||
float FragColor [[color(0)]];
|
||||
};
|
||||
|
||||
struct main0_in
|
||||
{
|
||||
float3 vUV [[user(locn0)]];
|
||||
};
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
float sample_normal2(thread const depth2d<float> tex, thread sampler uSampler, thread float3& vUV)
|
||||
{
|
||||
return float4(tex.sample(uSampler, vUV.xy)).x;
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
float sample_normal(thread const depth2d<float> tex, thread sampler uSampler, thread float3& vUV)
|
||||
{
|
||||
return sample_normal2(tex, uSampler, vUV);
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
float sample_comp(thread const depth2d<float> tex, thread float3& vUV, thread sampler uSamplerShadow)
|
||||
{
|
||||
return tex.sample_compare(uSamplerShadow, vUV.xy, vUV.z);
|
||||
}
|
||||
|
||||
fragment main0_out main0(main0_in in [[stage_in]], depth2d<float> uTexture [[texture(0)]], sampler uSampler [[sampler(0)]], sampler uSamplerShadow [[sampler(1)]])
|
||||
{
|
||||
main0_out out = {};
|
||||
out.FragColor = sample_normal(uTexture, uSampler, in.vUV);
|
||||
out.FragColor += sample_comp(uTexture, in.vUV, uSamplerShadow);
|
||||
return out;
|
||||
}
|
||||
|
@ -0,0 +1,29 @@
|
||||
#version 450
|
||||
|
||||
layout(set = 0, binding = 0) uniform texture2D uTexture;
|
||||
layout(set = 0, binding = 1) uniform sampler uSampler;
|
||||
layout(set = 0, binding = 2) uniform samplerShadow uSamplerShadow;
|
||||
|
||||
layout(location = 0) out float FragColor;
|
||||
layout(location = 0) in vec3 vUV;
|
||||
|
||||
float sample_normal2(texture2D tex)
|
||||
{
|
||||
return texture(sampler2D(tex, uSampler), vUV.xy).x;
|
||||
}
|
||||
|
||||
float sample_normal(texture2D tex)
|
||||
{
|
||||
return sample_normal2(tex);
|
||||
}
|
||||
|
||||
float sample_comp(texture2D tex)
|
||||
{
|
||||
return texture(sampler2DShadow(tex, uSamplerShadow), vUV);
|
||||
}
|
||||
|
||||
void main()
|
||||
{
|
||||
FragColor = sample_normal(uTexture);
|
||||
FragColor += sample_comp(uTexture);
|
||||
}
|
@ -3549,11 +3549,6 @@ Bitset Compiler::get_buffer_block_flags(VariableID id) const
|
||||
return ir.get_buffer_block_flags(get<SPIRVariable>(id));
|
||||
}
|
||||
|
||||
bool Compiler::supports_combined_samplers() const
|
||||
{
|
||||
return false; // default implementation
|
||||
}
|
||||
|
||||
bool Compiler::get_common_basic_type(const SPIRType &type, SPIRType::BaseType &base_type)
|
||||
{
|
||||
if (type.basetype == SPIRType::Struct)
|
||||
@ -3791,6 +3786,13 @@ void Compiler::analyze_image_and_sampler_usage()
|
||||
|
||||
CombinedImageSamplerUsageHandler handler(*this, dref_handler.dref_combined_samplers);
|
||||
traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), handler);
|
||||
|
||||
// Need to run this traversal twice. First time, we propagate any comparison sampler usage from leaf functions
|
||||
// down to main().
|
||||
// In the second pass, we can propagate up forced depth state coming from main() up into leaf functions.
|
||||
handler.dependency_hierarchy.clear();
|
||||
traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), handler);
|
||||
|
||||
comparison_ids = move(handler.comparison_ids);
|
||||
need_subpass_input = handler.need_subpass_input;
|
||||
|
||||
@ -3906,6 +3908,14 @@ bool Compiler::CFGBuilder::follow_function_call(const SPIRFunction &func)
|
||||
return false;
|
||||
}
|
||||
|
||||
void Compiler::CombinedImageSamplerUsageHandler::add_dependency(uint32_t dst, uint32_t src)
|
||||
{
|
||||
dependency_hierarchy[dst].insert(src);
|
||||
// Propagate up any comparison state if we're loading from one such variable.
|
||||
if (comparison_ids.count(src))
|
||||
comparison_ids.insert(dst);
|
||||
}
|
||||
|
||||
bool Compiler::CombinedImageSamplerUsageHandler::begin_function_scope(const uint32_t *args, uint32_t length)
|
||||
{
|
||||
if (length < 3)
|
||||
@ -3918,7 +3928,7 @@ bool Compiler::CombinedImageSamplerUsageHandler::begin_function_scope(const uint
|
||||
for (uint32_t i = 0; i < length; i++)
|
||||
{
|
||||
auto &argument = func.arguments[i];
|
||||
dependency_hierarchy[argument.id].insert(arg[i]);
|
||||
add_dependency(argument.id, arg[i]);
|
||||
}
|
||||
|
||||
return true;
|
||||
@ -3929,39 +3939,10 @@ void Compiler::CombinedImageSamplerUsageHandler::add_hierarchy_to_comparison_ids
|
||||
// Traverse the variable dependency hierarchy and tag everything in its path with comparison ids.
|
||||
comparison_ids.insert(id);
|
||||
|
||||
// If the underlying resource has been used for comparison then duplicate loads of that resource must be too.
|
||||
if (!compiler.supports_combined_samplers())
|
||||
{
|
||||
for (const auto &hierarchy : dependency_hierarchy)
|
||||
{
|
||||
if (hierarchy.second.find(id) != hierarchy.second.end())
|
||||
comparison_ids.insert(hierarchy.first);
|
||||
}
|
||||
}
|
||||
|
||||
for (auto &dep_id : dependency_hierarchy[id])
|
||||
add_hierarchy_to_comparison_ids(dep_id);
|
||||
}
|
||||
|
||||
// If the underlying resource has been used for comparison then duplicate loads of that resource must be too.
|
||||
bool Compiler::CombinedImageSamplerUsageHandler::dependent_used_for_comparison(uint32_t id) const
|
||||
{
|
||||
if (compiler.supports_combined_samplers())
|
||||
return false;
|
||||
|
||||
auto hierarchy_iter = dependency_hierarchy.find(id);
|
||||
if (hierarchy_iter != dependency_hierarchy.end())
|
||||
{
|
||||
for (uint32_t dependent_id : hierarchy_iter->second)
|
||||
{
|
||||
if (comparison_ids.find(dependent_id) != comparison_ids.end())
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool Compiler::CombinedImageSamplerUsageHandler::handle(Op opcode, const uint32_t *args, uint32_t length)
|
||||
{
|
||||
switch (opcode)
|
||||
@ -3973,7 +3954,8 @@ bool Compiler::CombinedImageSamplerUsageHandler::handle(Op opcode, const uint32_
|
||||
{
|
||||
if (length < 3)
|
||||
return false;
|
||||
dependency_hierarchy[args[1]].insert(args[2]);
|
||||
|
||||
add_dependency(args[1], args[2]);
|
||||
|
||||
// Ideally defer this to OpImageRead, but then we'd need to track loaded IDs.
|
||||
// If we load an image, we're going to use it and there is little harm in declaring an unused gl_FragCoord.
|
||||
@ -4001,10 +3983,7 @@ bool Compiler::CombinedImageSamplerUsageHandler::handle(Op opcode, const uint32_
|
||||
uint32_t image = args[2];
|
||||
uint32_t sampler = args[3];
|
||||
|
||||
bool dependent = !compiler.supports_combined_samplers() &&
|
||||
(dependent_used_for_comparison(sampler) || dependent_used_for_comparison(image));
|
||||
|
||||
if (type.image.depth || dref_combined_samplers.count(result_id) != 0 || dependent)
|
||||
if (type.image.depth || dref_combined_samplers.count(result_id) != 0)
|
||||
{
|
||||
add_hierarchy_to_comparison_ids(image);
|
||||
|
||||
|
@ -612,9 +612,6 @@ protected:
|
||||
void register_read(uint32_t expr, uint32_t chain, bool forwarded);
|
||||
void register_write(uint32_t chain);
|
||||
|
||||
// Returns true if the target language supports combined texture-samplers. Returns fasle by default.
|
||||
virtual bool supports_combined_samplers() const;
|
||||
|
||||
inline bool is_continue(uint32_t next) const
|
||||
{
|
||||
return (ir.block_meta[next] & ParsedIR::BLOCK_META_CONTINUE_BIT) != 0;
|
||||
@ -891,10 +888,7 @@ protected:
|
||||
|
||||
void add_hierarchy_to_comparison_ids(uint32_t ids);
|
||||
bool need_subpass_input = false;
|
||||
|
||||
// If the underlying resource has been used for comparison then duplicate loads of that resource must be too.
|
||||
// Returns true if a dependent resource in the dependency hierarchy of the specified image or sampler has been used for comparison.
|
||||
bool dependent_used_for_comparison(uint32_t id) const;
|
||||
void add_dependency(uint32_t dst, uint32_t src);
|
||||
};
|
||||
|
||||
void build_function_control_flow_graphs_and_analyze();
|
||||
|
@ -11164,11 +11164,6 @@ void CompilerGLSL::flatten_buffer_block(VariableID id)
|
||||
flattened_buffer_blocks.insert(id);
|
||||
}
|
||||
|
||||
bool CompilerGLSL::supports_combined_samplers() const
|
||||
{
|
||||
return true; // GLSL always supports combined texture-samplers.
|
||||
}
|
||||
|
||||
bool CompilerGLSL::builtin_translates_to_nonarray(spv::BuiltIn /*builtin*/) const
|
||||
{
|
||||
return false; // GLSL itself does not need to translate array builtin types to non-array builtin types
|
||||
|
@ -273,9 +273,6 @@ protected:
|
||||
virtual std::string unpack_expression_type(std::string expr_str, const SPIRType &type, uint32_t physical_type_id,
|
||||
bool packed_type, bool row_major);
|
||||
|
||||
// Returns true, because GLSL always supports combined texture-samplers.
|
||||
virtual bool supports_combined_samplers() const override;
|
||||
|
||||
virtual bool builtin_translates_to_nonarray(spv::BuiltIn builtin) const;
|
||||
|
||||
StringStream<> buffer;
|
||||
|
@ -122,11 +122,6 @@ void CompilerMSL::set_fragment_output_components(uint32_t location, uint32_t com
|
||||
fragment_output_components[location] = components;
|
||||
}
|
||||
|
||||
bool CompilerMSL::supports_combined_samplers() const
|
||||
{
|
||||
return false; // Metal does not support combined texture-samplers
|
||||
}
|
||||
|
||||
bool CompilerMSL::builtin_translates_to_nonarray(spv::BuiltIn builtin) const
|
||||
{
|
||||
return (builtin == BuiltInSampleMask);
|
||||
|
@ -603,9 +603,6 @@ protected:
|
||||
std::string unpack_expression_type(std::string expr_str, const SPIRType &type, uint32_t physical_type_id,
|
||||
bool is_packed, bool row_major) override;
|
||||
|
||||
// Returns false, because Metal does not support combined texture-samplers.
|
||||
bool supports_combined_samplers() const override;
|
||||
|
||||
// Returns true for BuiltInSampleMask because gl_SampleMask[] is an array in SPIR-V, but [[sample_mask]] is a scalar in Metal.
|
||||
bool builtin_translates_to_nonarray(spv::BuiltIn builtin) const override;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user