Merge pull request #1494 from cdavis5e/msl-tesc-tess-level-cast

MSL: Don't use a bitcast for tessellation levels in tesc shaders.
This commit is contained in:
Hans-Kristian Arntzen 2020-10-15 10:52:11 +02:00 committed by GitHub
commit 23a0cfc842
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 211 additions and 16 deletions

View File

@ -0,0 +1,35 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct main0_out
{
float4 gl_Position;
};
struct main0_in
{
uint3 m_82;
ushort2 m_86;
float4 gl_Position;
};
kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]])
{
device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1) * spvIndirectParams[0]];
uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1]);
if (gl_InvocationID == 0)
{
spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(2.0);
spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(3.0);
spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(4.0);
spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3] = half(5.0);
spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0] = half(mix(float(spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0]), float(spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3]), 0.5));
spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1] = half(mix(float(spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2]), float(spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1]), 0.5));
}
gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;
}

View File

@ -0,0 +1,35 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct main0_out
{
float4 gl_Position;
};
struct main0_in
{
float4 gl_Position [[attribute(0)]];
};
kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]])
{
device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
if (gl_InvocationID < spvIndirectParams[0])
gl_in[gl_InvocationID] = in;
threadgroup_barrier(mem_flags::mem_threadgroup);
if (gl_InvocationID >= 4)
return;
if (gl_InvocationID == 0)
{
spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(2.0);
spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(3.0);
spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(4.0);
spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3] = half(5.0);
spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0] = half(mix(float(spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0]), float(spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3]), 0.5));
spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1] = half(mix(float(spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2]), float(spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1]), 0.5));
}
gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;
}

View File

@ -0,0 +1,35 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct main0_out
{
float4 gl_Position;
};
struct main0_in
{
uint3 m_82;
ushort2 m_86;
float4 gl_Position;
};
kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]])
{
device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1) * spvIndirectParams[0]];
uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1]);
if (gl_InvocationID == 0)
{
spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(2.0);
spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(3.0);
spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(4.0);
spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3] = half(5.0);
spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0] = half(mix(float(spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0]), float(spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3]), 0.5));
spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1] = half(mix(float(spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2]), float(spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1]), 0.5));
}
gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;
}

View File

@ -0,0 +1,35 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct main0_out
{
float4 gl_Position;
};
struct main0_in
{
float4 gl_Position [[attribute(0)]];
};
kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]])
{
device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
if (gl_InvocationID < spvIndirectParams[0])
gl_in[gl_InvocationID] = in;
threadgroup_barrier(mem_flags::mem_threadgroup);
if (gl_InvocationID >= 4)
return;
if (gl_InvocationID == 0)
{
spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(2.0);
spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(3.0);
spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(4.0);
spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3] = half(5.0);
spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0] = half(mix(float(spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0]), float(spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3]), 0.5));
spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1] = half(mix(float(spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2]), float(spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1]), 0.5));
}
gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;
}

View File

@ -0,0 +1,17 @@
#version 450
layout(vertices = 4) out;
void main()
{
if (gl_InvocationID == 0)
{
gl_TessLevelOuter[0] = 2.0;
gl_TessLevelOuter[1] = 3.0;
gl_TessLevelOuter[2] = 4.0;
gl_TessLevelOuter[3] = 5.0;
gl_TessLevelInner[0] = mix(gl_TessLevelOuter[0], gl_TessLevelOuter[3], 0.5);
gl_TessLevelInner[1] = mix(gl_TessLevelOuter[2], gl_TessLevelOuter[1], 0.5);
}
gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;
}

View File

@ -0,0 +1,17 @@
#version 450
layout(vertices = 4) out;
void main()
{
if (gl_InvocationID == 0)
{
gl_TessLevelOuter[0] = 2.0;
gl_TessLevelOuter[1] = 3.0;
gl_TessLevelOuter[2] = 4.0;
gl_TessLevelOuter[3] = 5.0;
gl_TessLevelInner[0] = mix(gl_TessLevelOuter[0], gl_TessLevelOuter[3], 0.5);
gl_TessLevelInner[1] = mix(gl_TessLevelOuter[2], gl_TessLevelOuter[1], 0.5);
}
gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;
}

View File

@ -9313,8 +9313,8 @@ void CompilerGLSL::emit_store_statement(uint32_t lhs_expression, uint32_t rhs_ex
auto lhs = to_dereferenced_expression(lhs_expression);
// We might need to bitcast in order to store to a builtin.
bitcast_to_builtin_store(lhs_expression, rhs, expression_type(rhs_expression));
// We might need to cast in order to store to a builtin.
cast_to_builtin_store(lhs_expression, rhs, expression_type(rhs_expression));
// Tries to optimize assignments like "<lhs> = <lhs> op expr".
// While this is purely cosmetic, this is important for legacy ESSL where loop
@ -9477,8 +9477,8 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
if (expr_type.vecsize > type.vecsize)
expr = enclose_expression(expr + vector_swizzle(type.vecsize, 0));
// We might need to bitcast in order to load from a builtin.
bitcast_from_builtin_load(ptr, expr, type);
// We might need to cast in order to load from a builtin.
cast_from_builtin_load(ptr, expr, type);
// We might be trying to load a gl_Position[N], where we should be
// doing float4[](gl_in[i].gl_Position, ...) instead.
@ -14484,7 +14484,7 @@ void CompilerGLSL::unroll_array_from_complex_load(uint32_t target_id, uint32_t s
}
}
void CompilerGLSL::bitcast_from_builtin_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type)
void CompilerGLSL::cast_from_builtin_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type)
{
auto *var = maybe_get_backing_variable(source_id);
if (var)
@ -14536,7 +14536,7 @@ void CompilerGLSL::bitcast_from_builtin_load(uint32_t source_id, std::string &ex
expr = bitcast_expression(expr_type, expected_type, expr);
}
void CompilerGLSL::bitcast_to_builtin_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type)
void CompilerGLSL::cast_to_builtin_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type)
{
// Only interested in standalone builtin variables.
if (!has_decoration(target_id, DecorationBuiltIn))

View File

@ -854,9 +854,9 @@ protected:
// Builtins in GLSL are always specific signedness, but the SPIR-V can declare them
// as either unsigned or signed.
// Sometimes we will need to automatically perform bitcasts on load and store to make this work.
virtual void bitcast_to_builtin_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type);
virtual void bitcast_from_builtin_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type);
// Sometimes we will need to automatically perform casts on load and store to make this work.
virtual void cast_to_builtin_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type);
virtual void cast_from_builtin_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type);
void unroll_array_from_complex_load(uint32_t target_id, uint32_t source_id, std::string &expr);
void convert_non_uniform_expression(const SPIRType &type, std::string &expr);

View File

@ -13299,7 +13299,7 @@ void CompilerMSL::remap_constexpr_sampler_by_binding(uint32_t desc_set, uint32_t
constexpr_samplers_by_binding[{ desc_set, binding }] = sampler;
}
void CompilerMSL::bitcast_from_builtin_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type)
void CompilerMSL::cast_from_builtin_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type)
{
auto *var = maybe_get_backing_variable(source_id);
if (var)
@ -13311,6 +13311,7 @@ void CompilerMSL::bitcast_from_builtin_load(uint32_t source_id, std::string &exp
auto builtin = static_cast<BuiltIn>(get_decoration(source_id, DecorationBuiltIn));
auto expected_type = expr_type.basetype;
auto expected_width = expr_type.width;
switch (builtin)
{
case BuiltInGlobalInvocationId:
@ -13331,12 +13332,16 @@ void CompilerMSL::bitcast_from_builtin_load(uint32_t source_id, std::string &exp
case BuiltInBaseInstance:
case BuiltInBaseVertex:
expected_type = SPIRType::UInt;
expected_width = 32;
break;
case BuiltInTessLevelInner:
case BuiltInTessLevelOuter:
if (get_execution_model() == ExecutionModelTessellationControl)
{
expected_type = SPIRType::Half;
expected_width = 16;
}
break;
default:
@ -13344,7 +13349,17 @@ void CompilerMSL::bitcast_from_builtin_load(uint32_t source_id, std::string &exp
}
if (expected_type != expr_type.basetype)
expr = bitcast_expression(expr_type, expected_type, expr);
{
if (expected_width != expr_type.width)
{
// These are of different widths, so we cannot do a straight bitcast.
expr = join(type_to_glsl(expr_type), "(", expr, ")");
}
else
{
expr = bitcast_expression(expr_type, expected_type, expr);
}
}
if (builtin == BuiltInTessCoord && get_entry_point().flags.get(ExecutionModeQuads) && expr_type.vecsize == 3)
{
@ -13354,7 +13369,7 @@ void CompilerMSL::bitcast_from_builtin_load(uint32_t source_id, std::string &exp
}
}
void CompilerMSL::bitcast_to_builtin_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type)
void CompilerMSL::cast_to_builtin_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type)
{
auto *var = maybe_get_backing_variable(target_id);
if (var)
@ -13366,6 +13381,7 @@ void CompilerMSL::bitcast_to_builtin_store(uint32_t target_id, std::string &expr
auto builtin = static_cast<BuiltIn>(get_decoration(target_id, DecorationBuiltIn));
auto expected_type = expr_type.basetype;
auto expected_width = expr_type.width;
switch (builtin)
{
case BuiltInLayer:
@ -13374,11 +13390,13 @@ void CompilerMSL::bitcast_to_builtin_store(uint32_t target_id, std::string &expr
case BuiltInPrimitiveId:
case BuiltInViewIndex:
expected_type = SPIRType::UInt;
expected_width = 32;
break;
case BuiltInTessLevelInner:
case BuiltInTessLevelOuter:
expected_type = SPIRType::Half;
expected_width = 16;
break;
default:
@ -13387,10 +13405,13 @@ void CompilerMSL::bitcast_to_builtin_store(uint32_t target_id, std::string &expr
if (expected_type != expr_type.basetype)
{
if (expected_type == SPIRType::Half && expr_type.basetype == SPIRType::Float)
if (expected_width != expr_type.width)
{
// These are of different widths, so we cannot do a straight bitcast.
expr = join("half(", expr, ")");
auto type = expr_type;
type.basetype = expected_type;
type.width = expected_width;
expr = join(type_to_glsl(type), "(", expr, ")");
}
else
{

View File

@ -844,8 +844,8 @@ protected:
bool does_shader_write_sample_mask = false;
void bitcast_to_builtin_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type) override;
void bitcast_from_builtin_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type) override;
void cast_to_builtin_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type) override;
void cast_from_builtin_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type) override;
void emit_store_statement(uint32_t lhs_expression, uint32_t rhs_expression) override;
void analyze_sampled_image_usage();