2018-01-09 11:55:46 +00:00
|
|
|
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
|
|
|
|
|
|
|
#include <metal_stdlib>
|
|
|
|
#include <simd/simd.h>
|
|
|
|
|
|
|
|
using namespace metal;
|
|
|
|
|
2019-03-28 09:54:18 +00:00
|
|
|
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(4u, 1u, 1u);
|
2018-01-09 11:55:46 +00:00
|
|
|
|
2019-09-17 19:11:19 +00:00
|
|
|
static inline __attribute__((always_inline))
|
|
|
|
void barrier_shared()
|
2018-01-09 11:55:46 +00:00
|
|
|
{
|
|
|
|
threadgroup_barrier(mem_flags::mem_threadgroup);
|
|
|
|
}
|
|
|
|
|
2019-09-17 19:11:19 +00:00
|
|
|
static inline __attribute__((always_inline))
|
|
|
|
void full_barrier()
|
2018-01-09 11:55:46 +00:00
|
|
|
{
|
MSL: Add support for subgroup operations.
Some support for subgroups is present starting in Metal 2.0 on both iOS
and macOS. macOS gains more complete support in 10.14 (Metal 2.1).
Some restrictions are present. On iOS and on macOS 10.13, the
implementation of `OpGroupNonUniformElect` is incorrect: if thread 0 has
already terminated or is not executing a conditional branch, the first
thread that *is* will falsely believe itself not to be. Unfortunately,
this operation is part of the "basic" feature set; without it, subgroups
cannot be supported at all.
The `SubgroupSize` and `SubgroupLocalInvocationId` builtins are only
available in compute shaders (and, by extension, tessellation control
shaders), despite SPIR-V making them available in all stages. This
limits the usefulness of some of the subgroup operations in fragment
shaders.
Although Metal on macOS supports some clustered, inclusive, and
exclusive operations, it does not support them all. In particular,
inclusive and exclusive min, max, and, or, and xor; as well as cluster
sizes other than 4 are not supported. If this becomes a problem, they
could be emulated, but at a significant performance cost due to the need
for non-uniform operations.
2019-05-15 21:03:30 +00:00
|
|
|
threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture);
|
|
|
|
}
|
|
|
|
|
2019-09-17 19:11:19 +00:00
|
|
|
static inline __attribute__((always_inline))
|
|
|
|
void image_barrier()
|
MSL: Add support for subgroup operations.
Some support for subgroups is present starting in Metal 2.0 on both iOS
and macOS. macOS gains more complete support in 10.14 (Metal 2.1).
Some restrictions are present. On iOS and on macOS 10.13, the
implementation of `OpGroupNonUniformElect` is incorrect: if thread 0 has
already terminated or is not executing a conditional branch, the first
thread that *is* will falsely believe itself not to be. Unfortunately,
this operation is part of the "basic" feature set; without it, subgroups
cannot be supported at all.
The `SubgroupSize` and `SubgroupLocalInvocationId` builtins are only
available in compute shaders (and, by extension, tessellation control
shaders), despite SPIR-V making them available in all stages. This
limits the usefulness of some of the subgroup operations in fragment
shaders.
Although Metal on macOS supports some clustered, inclusive, and
exclusive operations, it does not support them all. In particular,
inclusive and exclusive min, max, and, or, and xor; as well as cluster
sizes other than 4 are not supported. If this becomes a problem, they
could be emulated, but at a significant performance cost due to the need
for non-uniform operations.
2019-05-15 21:03:30 +00:00
|
|
|
{
|
|
|
|
threadgroup_barrier(mem_flags::mem_texture);
|
2018-01-09 11:55:46 +00:00
|
|
|
}
|
|
|
|
|
2019-09-17 19:11:19 +00:00
|
|
|
static inline __attribute__((always_inline))
|
|
|
|
void buffer_barrier()
|
2018-01-09 11:55:46 +00:00
|
|
|
{
|
MSL: Add support for subgroup operations.
Some support for subgroups is present starting in Metal 2.0 on both iOS
and macOS. macOS gains more complete support in 10.14 (Metal 2.1).
Some restrictions are present. On iOS and on macOS 10.13, the
implementation of `OpGroupNonUniformElect` is incorrect: if thread 0 has
already terminated or is not executing a conditional branch, the first
thread that *is* will falsely believe itself not to be. Unfortunately,
this operation is part of the "basic" feature set; without it, subgroups
cannot be supported at all.
The `SubgroupSize` and `SubgroupLocalInvocationId` builtins are only
available in compute shaders (and, by extension, tessellation control
shaders), despite SPIR-V making them available in all stages. This
limits the usefulness of some of the subgroup operations in fragment
shaders.
Although Metal on macOS supports some clustered, inclusive, and
exclusive operations, it does not support them all. In particular,
inclusive and exclusive min, max, and, or, and xor; as well as cluster
sizes other than 4 are not supported. If this becomes a problem, they
could be emulated, but at a significant performance cost due to the need
for non-uniform operations.
2019-05-15 21:03:30 +00:00
|
|
|
threadgroup_barrier(mem_flags::mem_device);
|
2018-01-09 11:55:46 +00:00
|
|
|
}
|
|
|
|
|
2019-09-17 19:11:19 +00:00
|
|
|
static inline __attribute__((always_inline))
|
|
|
|
void group_barrier()
|
2018-01-09 11:55:46 +00:00
|
|
|
{
|
MSL: Add support for subgroup operations.
Some support for subgroups is present starting in Metal 2.0 on both iOS
and macOS. macOS gains more complete support in 10.14 (Metal 2.1).
Some restrictions are present. On iOS and on macOS 10.13, the
implementation of `OpGroupNonUniformElect` is incorrect: if thread 0 has
already terminated or is not executing a conditional branch, the first
thread that *is* will falsely believe itself not to be. Unfortunately,
this operation is part of the "basic" feature set; without it, subgroups
cannot be supported at all.
The `SubgroupSize` and `SubgroupLocalInvocationId` builtins are only
available in compute shaders (and, by extension, tessellation control
shaders), despite SPIR-V making them available in all stages. This
limits the usefulness of some of the subgroup operations in fragment
shaders.
Although Metal on macOS supports some clustered, inclusive, and
exclusive operations, it does not support them all. In particular,
inclusive and exclusive min, max, and, or, and xor; as well as cluster
sizes other than 4 are not supported. If this becomes a problem, they
could be emulated, but at a significant performance cost due to the need
for non-uniform operations.
2019-05-15 21:03:30 +00:00
|
|
|
threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture);
|
2018-01-09 11:55:46 +00:00
|
|
|
}
|
|
|
|
|
2019-09-17 19:11:19 +00:00
|
|
|
static inline __attribute__((always_inline))
|
|
|
|
void barrier_shared_exec()
|
2018-01-09 11:55:46 +00:00
|
|
|
{
|
|
|
|
threadgroup_barrier(mem_flags::mem_threadgroup);
|
|
|
|
}
|
|
|
|
|
2019-09-17 19:11:19 +00:00
|
|
|
static inline __attribute__((always_inline))
|
|
|
|
void full_barrier_exec()
|
2018-01-09 11:55:46 +00:00
|
|
|
{
|
MSL: Add support for subgroup operations.
Some support for subgroups is present starting in Metal 2.0 on both iOS
and macOS. macOS gains more complete support in 10.14 (Metal 2.1).
Some restrictions are present. On iOS and on macOS 10.13, the
implementation of `OpGroupNonUniformElect` is incorrect: if thread 0 has
already terminated or is not executing a conditional branch, the first
thread that *is* will falsely believe itself not to be. Unfortunately,
this operation is part of the "basic" feature set; without it, subgroups
cannot be supported at all.
The `SubgroupSize` and `SubgroupLocalInvocationId` builtins are only
available in compute shaders (and, by extension, tessellation control
shaders), despite SPIR-V making them available in all stages. This
limits the usefulness of some of the subgroup operations in fragment
shaders.
Although Metal on macOS supports some clustered, inclusive, and
exclusive operations, it does not support them all. In particular,
inclusive and exclusive min, max, and, or, and xor; as well as cluster
sizes other than 4 are not supported. If this becomes a problem, they
could be emulated, but at a significant performance cost due to the need
for non-uniform operations.
2019-05-15 21:03:30 +00:00
|
|
|
threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture);
|
|
|
|
}
|
|
|
|
|
2019-09-17 19:11:19 +00:00
|
|
|
static inline __attribute__((always_inline))
|
|
|
|
void image_barrier_exec()
|
MSL: Add support for subgroup operations.
Some support for subgroups is present starting in Metal 2.0 on both iOS
and macOS. macOS gains more complete support in 10.14 (Metal 2.1).
Some restrictions are present. On iOS and on macOS 10.13, the
implementation of `OpGroupNonUniformElect` is incorrect: if thread 0 has
already terminated or is not executing a conditional branch, the first
thread that *is* will falsely believe itself not to be. Unfortunately,
this operation is part of the "basic" feature set; without it, subgroups
cannot be supported at all.
The `SubgroupSize` and `SubgroupLocalInvocationId` builtins are only
available in compute shaders (and, by extension, tessellation control
shaders), despite SPIR-V making them available in all stages. This
limits the usefulness of some of the subgroup operations in fragment
shaders.
Although Metal on macOS supports some clustered, inclusive, and
exclusive operations, it does not support them all. In particular,
inclusive and exclusive min, max, and, or, and xor; as well as cluster
sizes other than 4 are not supported. If this becomes a problem, they
could be emulated, but at a significant performance cost due to the need
for non-uniform operations.
2019-05-15 21:03:30 +00:00
|
|
|
{
|
|
|
|
threadgroup_barrier(mem_flags::mem_texture);
|
2018-01-09 11:55:46 +00:00
|
|
|
}
|
|
|
|
|
2019-09-17 19:11:19 +00:00
|
|
|
static inline __attribute__((always_inline))
|
|
|
|
void buffer_barrier_exec()
|
2018-01-09 11:55:46 +00:00
|
|
|
{
|
MSL: Add support for subgroup operations.
Some support for subgroups is present starting in Metal 2.0 on both iOS
and macOS. macOS gains more complete support in 10.14 (Metal 2.1).
Some restrictions are present. On iOS and on macOS 10.13, the
implementation of `OpGroupNonUniformElect` is incorrect: if thread 0 has
already terminated or is not executing a conditional branch, the first
thread that *is* will falsely believe itself not to be. Unfortunately,
this operation is part of the "basic" feature set; without it, subgroups
cannot be supported at all.
The `SubgroupSize` and `SubgroupLocalInvocationId` builtins are only
available in compute shaders (and, by extension, tessellation control
shaders), despite SPIR-V making them available in all stages. This
limits the usefulness of some of the subgroup operations in fragment
shaders.
Although Metal on macOS supports some clustered, inclusive, and
exclusive operations, it does not support them all. In particular,
inclusive and exclusive min, max, and, or, and xor; as well as cluster
sizes other than 4 are not supported. If this becomes a problem, they
could be emulated, but at a significant performance cost due to the need
for non-uniform operations.
2019-05-15 21:03:30 +00:00
|
|
|
threadgroup_barrier(mem_flags::mem_device);
|
2018-01-09 11:55:46 +00:00
|
|
|
}
|
|
|
|
|
2019-09-17 19:11:19 +00:00
|
|
|
static inline __attribute__((always_inline))
|
|
|
|
void group_barrier_exec()
|
2018-01-09 11:55:46 +00:00
|
|
|
{
|
MSL: Add support for subgroup operations.
Some support for subgroups is present starting in Metal 2.0 on both iOS
and macOS. macOS gains more complete support in 10.14 (Metal 2.1).
Some restrictions are present. On iOS and on macOS 10.13, the
implementation of `OpGroupNonUniformElect` is incorrect: if thread 0 has
already terminated or is not executing a conditional branch, the first
thread that *is* will falsely believe itself not to be. Unfortunately,
this operation is part of the "basic" feature set; without it, subgroups
cannot be supported at all.
The `SubgroupSize` and `SubgroupLocalInvocationId` builtins are only
available in compute shaders (and, by extension, tessellation control
shaders), despite SPIR-V making them available in all stages. This
limits the usefulness of some of the subgroup operations in fragment
shaders.
Although Metal on macOS supports some clustered, inclusive, and
exclusive operations, it does not support them all. In particular,
inclusive and exclusive min, max, and, or, and xor; as well as cluster
sizes other than 4 are not supported. If this becomes a problem, they
could be emulated, but at a significant performance cost due to the need
for non-uniform operations.
2019-05-15 21:03:30 +00:00
|
|
|
threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture);
|
2018-01-09 11:55:46 +00:00
|
|
|
}
|
|
|
|
|
2019-09-17 19:11:19 +00:00
|
|
|
static inline __attribute__((always_inline))
|
|
|
|
void exec_barrier()
|
2018-01-09 11:55:46 +00:00
|
|
|
{
|
|
|
|
threadgroup_barrier(mem_flags::mem_threadgroup);
|
|
|
|
}
|
|
|
|
|
|
|
|
kernel void main0()
|
|
|
|
{
|
|
|
|
barrier_shared();
|
|
|
|
full_barrier();
|
MSL: Add support for subgroup operations.
Some support for subgroups is present starting in Metal 2.0 on both iOS
and macOS. macOS gains more complete support in 10.14 (Metal 2.1).
Some restrictions are present. On iOS and on macOS 10.13, the
implementation of `OpGroupNonUniformElect` is incorrect: if thread 0 has
already terminated or is not executing a conditional branch, the first
thread that *is* will falsely believe itself not to be. Unfortunately,
this operation is part of the "basic" feature set; without it, subgroups
cannot be supported at all.
The `SubgroupSize` and `SubgroupLocalInvocationId` builtins are only
available in compute shaders (and, by extension, tessellation control
shaders), despite SPIR-V making them available in all stages. This
limits the usefulness of some of the subgroup operations in fragment
shaders.
Although Metal on macOS supports some clustered, inclusive, and
exclusive operations, it does not support them all. In particular,
inclusive and exclusive min, max, and, or, and xor; as well as cluster
sizes other than 4 are not supported. If this becomes a problem, they
could be emulated, but at a significant performance cost due to the need
for non-uniform operations.
2019-05-15 21:03:30 +00:00
|
|
|
image_barrier();
|
2018-01-09 11:55:46 +00:00
|
|
|
buffer_barrier();
|
|
|
|
group_barrier();
|
|
|
|
barrier_shared_exec();
|
|
|
|
full_barrier_exec();
|
MSL: Add support for subgroup operations.
Some support for subgroups is present starting in Metal 2.0 on both iOS
and macOS. macOS gains more complete support in 10.14 (Metal 2.1).
Some restrictions are present. On iOS and on macOS 10.13, the
implementation of `OpGroupNonUniformElect` is incorrect: if thread 0 has
already terminated or is not executing a conditional branch, the first
thread that *is* will falsely believe itself not to be. Unfortunately,
this operation is part of the "basic" feature set; without it, subgroups
cannot be supported at all.
The `SubgroupSize` and `SubgroupLocalInvocationId` builtins are only
available in compute shaders (and, by extension, tessellation control
shaders), despite SPIR-V making them available in all stages. This
limits the usefulness of some of the subgroup operations in fragment
shaders.
Although Metal on macOS supports some clustered, inclusive, and
exclusive operations, it does not support them all. In particular,
inclusive and exclusive min, max, and, or, and xor; as well as cluster
sizes other than 4 are not supported. If this becomes a problem, they
could be emulated, but at a significant performance cost due to the need
for non-uniform operations.
2019-05-15 21:03:30 +00:00
|
|
|
image_barrier_exec();
|
2018-01-09 11:55:46 +00:00
|
|
|
buffer_barrier_exec();
|
|
|
|
group_barrier_exec();
|
|
|
|
exec_barrier();
|
|
|
|
}
|
|
|
|
|