2019-02-04 05:58:46 +00:00
|
|
|
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
2019-08-14 15:09:39 +00:00
|
|
|
#pragma clang diagnostic ignored "-Wmissing-braces"
|
|
|
|
#pragma clang diagnostic ignored "-Wunused-variable"
|
2019-02-04 05:58:46 +00:00
|
|
|
|
|
|
|
#include <metal_stdlib>
|
|
|
|
#include <simd/simd.h>
|
2019-08-14 15:09:39 +00:00
|
|
|
|
|
|
|
template <typename T, size_t Num>
|
|
|
|
struct unsafe_array
|
|
|
|
{
|
|
|
|
T __Elements[Num ? Num : 1];
|
|
|
|
|
|
|
|
constexpr size_t size() const thread { return Num; }
|
|
|
|
constexpr size_t max_size() const thread { return Num; }
|
|
|
|
constexpr bool empty() const thread { return Num == 0; }
|
|
|
|
|
|
|
|
constexpr size_t size() const device { return Num; }
|
|
|
|
constexpr size_t max_size() const device { return Num; }
|
|
|
|
constexpr bool empty() const device { return Num == 0; }
|
|
|
|
|
|
|
|
constexpr size_t size() const constant { return Num; }
|
|
|
|
constexpr size_t max_size() const constant { return Num; }
|
|
|
|
constexpr bool empty() const constant { return Num == 0; }
|
|
|
|
|
|
|
|
constexpr size_t size() const threadgroup { return Num; }
|
|
|
|
constexpr size_t max_size() const threadgroup { return Num; }
|
|
|
|
constexpr bool empty() const threadgroup { return Num == 0; }
|
|
|
|
|
|
|
|
thread T &operator[](size_t pos) thread
|
|
|
|
{
|
|
|
|
return __Elements[pos];
|
|
|
|
}
|
|
|
|
constexpr const thread T &operator[](size_t pos) const thread
|
|
|
|
{
|
|
|
|
return __Elements[pos];
|
|
|
|
}
|
|
|
|
|
|
|
|
device T &operator[](size_t pos) device
|
|
|
|
{
|
|
|
|
return __Elements[pos];
|
|
|
|
}
|
|
|
|
constexpr const device T &operator[](size_t pos) const device
|
|
|
|
{
|
|
|
|
return __Elements[pos];
|
|
|
|
}
|
|
|
|
|
|
|
|
constexpr const constant T &operator[](size_t pos) const constant
|
|
|
|
{
|
|
|
|
return __Elements[pos];
|
|
|
|
}
|
|
|
|
|
|
|
|
threadgroup T &operator[](size_t pos) threadgroup
|
|
|
|
{
|
|
|
|
return __Elements[pos];
|
|
|
|
}
|
|
|
|
constexpr const threadgroup T &operator[](size_t pos) const threadgroup
|
|
|
|
{
|
|
|
|
return __Elements[pos];
|
|
|
|
}
|
|
|
|
};
|
2019-02-04 05:58:46 +00:00
|
|
|
|
|
|
|
using namespace metal;
|
|
|
|
|
|
|
|
struct VertexOutput
|
|
|
|
{
|
|
|
|
float4 pos;
|
|
|
|
float2 uv;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct HSOut
|
|
|
|
{
|
|
|
|
float4 pos;
|
|
|
|
float2 uv;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct HSConstantOut
|
|
|
|
{
|
2019-08-14 15:09:39 +00:00
|
|
|
unsafe_array<float,3> EdgeTess;
|
2019-02-04 05:58:46 +00:00
|
|
|
float InsideTess;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct VertexOutput_1
|
|
|
|
{
|
|
|
|
float2 uv;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct HSOut_1
|
|
|
|
{
|
|
|
|
float2 uv;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct main0_out
|
|
|
|
{
|
|
|
|
HSOut_1 _entryPointOutput;
|
|
|
|
float4 gl_Position;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct main0_in
|
|
|
|
{
|
|
|
|
float2 VertexOutput_uv [[attribute(0)]];
|
|
|
|
float4 gl_Position [[attribute(1)]];
|
|
|
|
};
|
|
|
|
|
2019-08-14 15:09:39 +00:00
|
|
|
static inline __attribute__((always_inline))
|
|
|
|
HSOut _hs_main(thread const unsafe_array<VertexOutput,3> (&p), thread const uint& i)
|
2019-02-04 05:58:46 +00:00
|
|
|
{
|
|
|
|
HSOut _output;
|
|
|
|
_output.pos = p[i].pos;
|
|
|
|
_output.uv = p[i].uv;
|
|
|
|
return _output;
|
|
|
|
}
|
|
|
|
|
2019-08-14 15:09:39 +00:00
|
|
|
static inline __attribute__((always_inline))
|
|
|
|
HSConstantOut PatchHS(thread const unsafe_array<VertexOutput,3> (&_patch))
|
2019-02-04 05:58:46 +00:00
|
|
|
{
|
|
|
|
HSConstantOut _output;
|
|
|
|
_output.EdgeTess[0] = (float2(1.0) + _patch[0].uv).x;
|
|
|
|
_output.EdgeTess[1] = (float2(1.0) + _patch[0].uv).x;
|
|
|
|
_output.EdgeTess[2] = (float2(1.0) + _patch[0].uv).x;
|
|
|
|
_output.InsideTess = (float2(1.0) + _patch[0].uv).x;
|
|
|
|
return _output;
|
|
|
|
}
|
|
|
|
|
2019-04-10 17:37:04 +00:00
|
|
|
kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLTriangleTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]])
|
2019-02-04 05:58:46 +00:00
|
|
|
{
|
|
|
|
device main0_out* gl_out = &spvOut[gl_PrimitiveID * 3];
|
|
|
|
if (gl_InvocationID < spvIndirectParams[0])
|
|
|
|
gl_in[gl_InvocationID] = in;
|
|
|
|
threadgroup_barrier(mem_flags::mem_threadgroup);
|
MSL: Return early from helper tesc invocations.
Return after loading the input control point array if there are more
input points than output points, and this was one of the helper
invocations spun off to load the input points. I was hesitant to do this
initially, since the MSL spec has this to say about barriers:
> The `threadgroup_barrier` (or `simdgroup_barrier`) function must be
> encountered by all threads in a threadgroup (or SIMD-group) executing
> the kernel.
That is, if any thread executes the barrier, then all threads must
execute it, or the barrier'd invocations will hang. But, the key words
here seem to be "executing the kernel;" inactive invocations, those that
have already returned, need not encounter the barrier to prevent hangs.
Indeed, I've encountered no problems from doing this, at least on my
hardware. This also fixes a few CTS tests that were failing due to
execution ordering; apparently, my assumption that the later, invalid
data written by the helpers would get overwritten was wrong.
2019-02-24 18:06:54 +00:00
|
|
|
if (gl_InvocationID >= 3)
|
|
|
|
return;
|
2019-08-14 15:09:39 +00:00
|
|
|
unsafe_array<VertexOutput,3> p;
|
2019-02-04 05:58:46 +00:00
|
|
|
p[0].pos = gl_in[0].gl_Position;
|
|
|
|
p[0].uv = gl_in[0].VertexOutput_uv;
|
|
|
|
p[1].pos = gl_in[1].gl_Position;
|
|
|
|
p[1].uv = gl_in[1].VertexOutput_uv;
|
|
|
|
p[2].pos = gl_in[2].gl_Position;
|
|
|
|
p[2].uv = gl_in[2].VertexOutput_uv;
|
|
|
|
uint i = gl_InvocationID;
|
2019-08-14 15:09:39 +00:00
|
|
|
unsafe_array<VertexOutput,3> param;
|
|
|
|
param = p;
|
2019-02-04 05:58:46 +00:00
|
|
|
uint param_1 = i;
|
|
|
|
HSOut flattenTemp = _hs_main(param, param_1);
|
|
|
|
gl_out[gl_InvocationID].gl_Position = flattenTemp.pos;
|
|
|
|
gl_out[gl_InvocationID]._entryPointOutput.uv = flattenTemp.uv;
|
2019-08-14 15:09:39 +00:00
|
|
|
threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup);
|
2019-02-04 05:58:46 +00:00
|
|
|
if (int(gl_InvocationID) == 0)
|
|
|
|
{
|
2019-08-14 15:09:39 +00:00
|
|
|
unsafe_array<VertexOutput,3> param_2;
|
|
|
|
param_2 = p;
|
2019-02-04 05:58:46 +00:00
|
|
|
HSConstantOut _patchConstantResult = PatchHS(param_2);
|
|
|
|
spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(_patchConstantResult.EdgeTess[0]);
|
|
|
|
spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(_patchConstantResult.EdgeTess[1]);
|
|
|
|
spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(_patchConstantResult.EdgeTess[2]);
|
|
|
|
spvTessLevel[gl_PrimitiveID].insideTessellationFactor = half(_patchConstantResult.InsideTess);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|