2019-02-04 05:58:46 +00:00
|
|
|
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
|
|
|
|
|
|
|
#include <metal_stdlib>
|
|
|
|
#include <simd/simd.h>
|
|
|
|
|
|
|
|
using namespace metal;
|
|
|
|
|
|
|
|
struct main0_out
|
|
|
|
{
|
|
|
|
float4 gl_Position;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct main0_patchOut
|
|
|
|
{
|
|
|
|
float3 vFoo;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct main0_in
|
|
|
|
{
|
|
|
|
float4 gl_Position [[attribute(0)]];
|
|
|
|
};
|
|
|
|
|
2019-08-29 21:39:06 +00:00
|
|
|
inline void set_position(device main0_out* thread & gl_out, thread uint& gl_InvocationID, threadgroup main0_in* thread & gl_in)
|
2019-02-04 05:58:46 +00:00
|
|
|
{
|
|
|
|
gl_out[gl_InvocationID].gl_Position = gl_in[0].gl_Position + gl_in[1].gl_Position;
|
|
|
|
}
|
|
|
|
|
2019-04-10 17:37:04 +00:00
|
|
|
kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]])
|
2019-02-04 05:58:46 +00:00
|
|
|
{
|
|
|
|
device main0_out* gl_out = &spvOut[gl_PrimitiveID * 1];
|
|
|
|
device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
|
|
|
|
if (gl_InvocationID < spvIndirectParams[0])
|
|
|
|
gl_in[gl_InvocationID] = in;
|
|
|
|
threadgroup_barrier(mem_flags::mem_threadgroup);
|
MSL: Return early from helper tesc invocations.
Return after loading the input control point array if there are more
input points than output points, and this was one of the helper
invocations spun off to load the input points. I was hesitant to do this
initially, since the MSL spec has this to say about barriers:
> The `threadgroup_barrier` (or `simdgroup_barrier`) function must be
> encountered by all threads in a threadgroup (or SIMD-group) executing
> the kernel.
That is, if any thread executes the barrier, then all threads must
execute it, or the barrier'd invocations will hang. But, the key words
here seem to be "executing the kernel;" inactive invocations, those that
have already returned, need not encounter the barrier to prevent hangs.
Indeed, I've encountered no problems from doing this, at least on my
hardware. This also fixes a few CTS tests that were failing due to
execution ordering; apparently, my assumption that the later, invalid
data written by the helpers would get overwritten was wrong.
2019-02-24 18:06:54 +00:00
|
|
|
if (gl_InvocationID >= 1)
|
|
|
|
return;
|
2019-02-04 05:58:46 +00:00
|
|
|
spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0] = half(8.8999996185302734375);
|
|
|
|
spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1] = half(6.900000095367431640625);
|
|
|
|
spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(8.8999996185302734375);
|
|
|
|
spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(6.900000095367431640625);
|
|
|
|
spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(3.900000095367431640625);
|
|
|
|
spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3] = half(4.900000095367431640625);
|
|
|
|
patchOut.vFoo = float3(1.0);
|
|
|
|
set_position(gl_out, gl_InvocationID, gl_in);
|
|
|
|
}
|
|
|
|
|