Merge pull request #2271 from KhronosGroup/fix-2264

MSL: Handle volatile properly for emulated image atomics.
This commit is contained in:
Hans-Kristian Arntzen 2024-01-23 14:43:04 +01:00 committed by GitHub
commit 03b485dc47
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
17 changed files with 419 additions and 6 deletions

View File

@ -0,0 +1,22 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wunused-variable"
#include <metal_stdlib>
#include <simd/simd.h>
#include <metal_atomic>
using namespace metal;
// The required alignment of a linear texture of R32Uint format.
constant uint spvLinearTextureAlignmentOverride [[function_constant(65535)]];
constant uint spvLinearTextureAlignment = is_function_constant_defined(spvLinearTextureAlignmentOverride) ? spvLinearTextureAlignmentOverride : 4;
// Returns buffer coords corresponding to 2D texture coords for emulating 2D texture atomics
#define spvImage2DAtomicCoord(tc, tex) (((((tex).get_width() + spvLinearTextureAlignment / 4 - 1) & ~( spvLinearTextureAlignment / 4 - 1)) * (tc).y) + (tc).x)
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
kernel void main0(texture2d<uint> rw_spd_global_atomic [[texture(0)]], volatile device atomic_uint* rw_spd_global_atomic_atomic [[buffer(0)]])
{
uint _43 = atomic_fetch_add_explicit((volatile device atomic_uint*)&rw_spd_global_atomic_atomic[spvImage2DAtomicCoord(int2(0), rw_spd_global_atomic)], 1u, memory_order_relaxed);
}

View File

@ -0,0 +1,28 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wunused-variable"
#include <metal_stdlib>
#include <simd/simd.h>
#include <metal_atomic>
using namespace metal;
// The required alignment of a linear texture of R32Uint format.
constant uint spvLinearTextureAlignmentOverride [[function_constant(65535)]];
constant uint spvLinearTextureAlignment = is_function_constant_defined(spvLinearTextureAlignmentOverride) ? spvLinearTextureAlignmentOverride : 4;
// Returns buffer coords corresponding to 2D texture coords for emulating 2D texture atomics
#define spvImage2DAtomicCoord(tc, tex) (((((tex).get_width() + spvLinearTextureAlignment / 4 - 1) & ~( spvLinearTextureAlignment / 4 - 1)) * (tc).y) + (tc).x)
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
struct spvDescriptorSetBuffer1
{
texture2d<uint> rw_spd_global_atomic [[id(0)]];
volatile device atomic_uint* rw_spd_global_atomic_atomic [[id(1)]];
};
kernel void main0(constant spvDescriptorSetBuffer1& spvDescriptorSet1 [[buffer(1)]])
{
uint _43 = atomic_fetch_add_explicit((volatile device atomic_uint*)&spvDescriptorSet1.rw_spd_global_atomic_atomic[spvImage2DAtomicCoord(int2(0), spvDescriptorSet1.rw_spd_global_atomic)], 1u, memory_order_relaxed);
}

View File

@ -0,0 +1,22 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wunused-variable"
#include <metal_stdlib>
#include <simd/simd.h>
#include <metal_atomic>
using namespace metal;
// The required alignment of a linear texture of R32Uint format.
constant uint spvLinearTextureAlignmentOverride [[function_constant(65535)]];
constant uint spvLinearTextureAlignment = is_function_constant_defined(spvLinearTextureAlignmentOverride) ? spvLinearTextureAlignmentOverride : 4;
// Returns buffer coords corresponding to 2D texture coords for emulating 2D texture atomics
#define spvImage2DAtomicCoord(tc, tex) (((((tex).get_width() + spvLinearTextureAlignment / 4 - 1) & ~( spvLinearTextureAlignment / 4 - 1)) * (tc).y) + (tc).x)
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
kernel void main0(texture2d<uint> rw_spd_global_atomic [[texture(0)]], volatile device atomic_uint* rw_spd_global_atomic_atomic [[buffer(0)]])
{
uint _43 = atomic_fetch_add_explicit((volatile device atomic_uint*)&rw_spd_global_atomic_atomic[spvImage2DAtomicCoord(int2(0), rw_spd_global_atomic)], 1u, memory_order_relaxed);
}

View File

@ -0,0 +1,20 @@
#pragma clang diagnostic ignored "-Wunused-variable"
#include <metal_stdlib>
#include <simd/simd.h>
#include <metal_atomic>
using namespace metal;
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
struct spvDescriptorSetBuffer1
{
texture2d<uint, access::read_write> rw_spd_global_atomic [[id(0)]];
};
kernel void main0(constant spvDescriptorSetBuffer1& spvDescriptorSet1 [[buffer(1)]])
{
uint _43 = spvDescriptorSet1.rw_spd_global_atomic.atomic_fetch_add(uint2(int2(0)), 1u).x;
}

View File

@ -0,0 +1,15 @@
#pragma clang diagnostic ignored "-Wunused-variable"
#include <metal_stdlib>
#include <simd/simd.h>
#include <metal_atomic>
using namespace metal;
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
kernel void main0(texture2d<uint, access::read_write> rw_spd_global_atomic [[texture(0)]])
{
uint _43 = rw_spd_global_atomic.atomic_fetch_add(uint2(int2(0)), 1u).x;
}

View File

@ -0,0 +1,38 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wunused-variable"
#include <metal_stdlib>
#include <simd/simd.h>
#include <metal_atomic>
using namespace metal;
// The required alignment of a linear texture of R32Uint format.
constant uint spvLinearTextureAlignmentOverride [[function_constant(65535)]];
constant uint spvLinearTextureAlignment = is_function_constant_defined(spvLinearTextureAlignmentOverride) ? spvLinearTextureAlignmentOverride : 4;
// Returns buffer coords corresponding to 2D texture coords for emulating 2D texture atomics
#define spvImage2DAtomicCoord(tc, tex) (((((tex).get_width() + spvLinearTextureAlignment / 4 - 1) & ~( spvLinearTextureAlignment / 4 - 1)) * (tc).y) + (tc).x)
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
static inline __attribute__((always_inline))
void SPD_IncreaseAtomicCounter(thread uint& spdCounter, texture2d<uint> rw_spd_global_atomic, volatile device atomic_uint* rw_spd_global_atomic_atomic)
{
uint _25 = atomic_fetch_add_explicit((volatile device atomic_uint*)&rw_spd_global_atomic_atomic[spvImage2DAtomicCoord(int2(0), rw_spd_global_atomic)], 1u, memory_order_relaxed);
spdCounter = _25;
}
static inline __attribute__((always_inline))
void ComputeAutoExposure(texture2d<uint> rw_spd_global_atomic, volatile device atomic_uint* rw_spd_global_atomic_atomic)
{
uint v = 0u;
uint param = v;
SPD_IncreaseAtomicCounter(param, rw_spd_global_atomic, rw_spd_global_atomic_atomic);
v = param;
}
kernel void main0(texture2d<uint> rw_spd_global_atomic [[texture(0)]], volatile device atomic_uint* rw_spd_global_atomic_atomic [[buffer(0)]])
{
ComputeAutoExposure(rw_spd_global_atomic, rw_spd_global_atomic_atomic);
}

View File

@ -0,0 +1,44 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wunused-variable"
#include <metal_stdlib>
#include <simd/simd.h>
#include <metal_atomic>
using namespace metal;
// The required alignment of a linear texture of R32Uint format.
constant uint spvLinearTextureAlignmentOverride [[function_constant(65535)]];
constant uint spvLinearTextureAlignment = is_function_constant_defined(spvLinearTextureAlignmentOverride) ? spvLinearTextureAlignmentOverride : 4;
// Returns buffer coords corresponding to 2D texture coords for emulating 2D texture atomics
#define spvImage2DAtomicCoord(tc, tex) (((((tex).get_width() + spvLinearTextureAlignment / 4 - 1) & ~( spvLinearTextureAlignment / 4 - 1)) * (tc).y) + (tc).x)
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
struct spvDescriptorSetBuffer1
{
texture2d<uint> rw_spd_global_atomic [[id(0)]];
volatile device atomic_uint* rw_spd_global_atomic_atomic [[id(1)]];
};
static inline __attribute__((always_inline))
void SPD_IncreaseAtomicCounter(thread uint& spdCounter, texture2d<uint> rw_spd_global_atomic, volatile device atomic_uint* rw_spd_global_atomic_atomic)
{
uint _25 = atomic_fetch_add_explicit((volatile device atomic_uint*)&rw_spd_global_atomic_atomic[spvImage2DAtomicCoord(int2(0), rw_spd_global_atomic)], 1u, memory_order_relaxed);
spdCounter = _25;
}
static inline __attribute__((always_inline))
void ComputeAutoExposure(texture2d<uint> rw_spd_global_atomic, volatile device atomic_uint* rw_spd_global_atomic_atomic)
{
uint v = 0u;
uint param = v;
SPD_IncreaseAtomicCounter(param, rw_spd_global_atomic, rw_spd_global_atomic_atomic);
v = param;
}
kernel void main0(constant spvDescriptorSetBuffer1& spvDescriptorSet1 [[buffer(1)]])
{
ComputeAutoExposure(spvDescriptorSet1.rw_spd_global_atomic, spvDescriptorSet1.rw_spd_global_atomic_atomic);
}

View File

@ -0,0 +1,38 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wunused-variable"
#include <metal_stdlib>
#include <simd/simd.h>
#include <metal_atomic>
using namespace metal;
// The required alignment of a linear texture of R32Uint format.
constant uint spvLinearTextureAlignmentOverride [[function_constant(65535)]];
constant uint spvLinearTextureAlignment = is_function_constant_defined(spvLinearTextureAlignmentOverride) ? spvLinearTextureAlignmentOverride : 4;
// Returns buffer coords corresponding to 2D texture coords for emulating 2D texture atomics
#define spvImage2DAtomicCoord(tc, tex) (((((tex).get_width() + spvLinearTextureAlignment / 4 - 1) & ~( spvLinearTextureAlignment / 4 - 1)) * (tc).y) + (tc).x)
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
static inline __attribute__((always_inline))
void SPD_IncreaseAtomicCounter(thread uint& spdCounter, texture2d<uint> rw_spd_global_atomic, volatile device atomic_uint* rw_spd_global_atomic_atomic)
{
uint _25 = atomic_fetch_add_explicit((volatile device atomic_uint*)&rw_spd_global_atomic_atomic[spvImage2DAtomicCoord(int2(0), rw_spd_global_atomic)], 1u, memory_order_relaxed);
spdCounter = _25;
}
static inline __attribute__((always_inline))
void ComputeAutoExposure(texture2d<uint> rw_spd_global_atomic, volatile device atomic_uint* rw_spd_global_atomic_atomic)
{
uint v = 0u;
uint param = v;
SPD_IncreaseAtomicCounter(param, rw_spd_global_atomic, rw_spd_global_atomic_atomic);
v = param;
}
kernel void main0(texture2d<uint> rw_spd_global_atomic [[texture(0)]], volatile device atomic_uint* rw_spd_global_atomic_atomic [[buffer(0)]])
{
ComputeAutoExposure(rw_spd_global_atomic, rw_spd_global_atomic_atomic);
}

View File

@ -0,0 +1,37 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wunused-variable"
#include <metal_stdlib>
#include <simd/simd.h>
#include <metal_atomic>
using namespace metal;
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
struct spvDescriptorSetBuffer1
{
texture2d<uint, access::read_write> rw_spd_global_atomic [[id(0)]];
};
static inline __attribute__((always_inline))
void SPD_IncreaseAtomicCounter(thread uint& spdCounter, texture2d<uint, access::read_write> rw_spd_global_atomic)
{
uint _25 = rw_spd_global_atomic.atomic_fetch_add(uint2(int2(0)), 1u).x;
spdCounter = _25;
}
static inline __attribute__((always_inline))
void ComputeAutoExposure(texture2d<uint, access::read_write> rw_spd_global_atomic)
{
uint v = 0u;
uint param = v;
SPD_IncreaseAtomicCounter(param, rw_spd_global_atomic);
v = param;
}
kernel void main0(constant spvDescriptorSetBuffer1& spvDescriptorSet1 [[buffer(1)]])
{
ComputeAutoExposure(spvDescriptorSet1.rw_spd_global_atomic);
}

View File

@ -0,0 +1,32 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wunused-variable"
#include <metal_stdlib>
#include <simd/simd.h>
#include <metal_atomic>
using namespace metal;
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
static inline __attribute__((always_inline))
void SPD_IncreaseAtomicCounter(thread uint& spdCounter, texture2d<uint, access::read_write> rw_spd_global_atomic)
{
uint _25 = rw_spd_global_atomic.atomic_fetch_add(uint2(int2(0)), 1u).x;
spdCounter = _25;
}
static inline __attribute__((always_inline))
void ComputeAutoExposure(texture2d<uint, access::read_write> rw_spd_global_atomic)
{
uint v = 0u;
uint param = v;
SPD_IncreaseAtomicCounter(param, rw_spd_global_atomic);
v = param;
}
kernel void main0(texture2d<uint, access::read_write> rw_spd_global_atomic [[texture(0)]])
{
ComputeAutoExposure(rw_spd_global_atomic);
}

View File

@ -0,0 +1,20 @@
#version 450
layout (set = 1, binding = 0, r32ui) coherent uniform uimage2D rw_spd_global_atomic;
void SPD_IncreaseAtomicCounter(inout uint spdCounter)
{
spdCounter = imageAtomicAdd(rw_spd_global_atomic, ivec2(0,0), 1);
}
void ComputeAutoExposure() {
uint v = 0;
SPD_IncreaseAtomicCounter(v);
}
layout (local_size_x = 256, local_size_y = 1, local_size_z = 1) in;
void main()
{
ComputeAutoExposure();
}

View File

@ -0,0 +1,20 @@
#version 450
layout (set = 1, binding = 0, r32ui) coherent uniform uimage2D rw_spd_global_atomic;
void SPD_IncreaseAtomicCounter(inout uint spdCounter)
{
spdCounter = imageAtomicAdd(rw_spd_global_atomic, ivec2(0,0), 1);
}
void ComputeAutoExposure() {
uint v = 0;
SPD_IncreaseAtomicCounter(v);
}
layout (local_size_x = 256, local_size_y = 1, local_size_z = 1) in;
void main()
{
ComputeAutoExposure();
}

View File

@ -0,0 +1,20 @@
#version 450
layout (set = 1, binding = 0, r32ui) coherent uniform uimage2D rw_spd_global_atomic;
void SPD_IncreaseAtomicCounter(inout uint spdCounter)
{
spdCounter = imageAtomicAdd(rw_spd_global_atomic, ivec2(0,0), 1);
}
void ComputeAutoExposure() {
uint v = 0;
SPD_IncreaseAtomicCounter(v);
}
layout (local_size_x = 256, local_size_y = 1, local_size_z = 1) in;
void main()
{
ComputeAutoExposure();
}

View File

@ -0,0 +1,20 @@
#version 450
layout (set = 1, binding = 0, r32ui) coherent uniform uimage2D rw_spd_global_atomic;
void SPD_IncreaseAtomicCounter(inout uint spdCounter)
{
spdCounter = imageAtomicAdd(rw_spd_global_atomic, ivec2(0,0), 1);
}
void ComputeAutoExposure() {
uint v = 0;
SPD_IncreaseAtomicCounter(v);
}
layout (local_size_x = 256, local_size_y = 1, local_size_z = 1) in;
void main()
{
ComputeAutoExposure();
}

View File

@ -0,0 +1,20 @@
#version 450
layout (set = 1, binding = 0, r32ui) coherent uniform uimage2D rw_spd_global_atomic;
void SPD_IncreaseAtomicCounter(inout uint spdCounter)
{
spdCounter = imageAtomicAdd(rw_spd_global_atomic, ivec2(0,0), 1);
}
void ComputeAutoExposure() {
uint v = 0;
SPD_IncreaseAtomicCounter(v);
}
layout (local_size_x = 256, local_size_y = 1, local_size_z = 1) in;
void main()
{
ComputeAutoExposure();
}

View File

@ -1938,10 +1938,14 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std::
// When using the pointer, we need to know which variable it is actually loaded from.
uint32_t base_id = ops[2];
auto *var = maybe_get_backing_variable(base_id);
if (var && atomic_image_vars_emulated.count(var->self))
if (var)
{
if (!get<SPIRType>(var->basetype).array.empty())
SPIRV_CROSS_THROW("Cannot emulate array of storage images with atomics. Use MSL 3.1 for native support.");
if (atomic_image_vars_emulated.count(var->self) &&
!get<SPIRType>(var->basetype).array.empty())
{
SPIRV_CROSS_THROW(
"Cannot emulate array of storage images with atomics. Use MSL 3.1 for native support.");
}
if (global_var_ids.find(base_id) != global_var_ids.end())
added_arg_ids.insert(base_id);
@ -10194,6 +10198,9 @@ void CompilerMSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id,
// Emulate texture2D atomic operations
if (res_type.storage == StorageClassUniformConstant && res_type.basetype == SPIRType::Image)
{
auto &flags = ir.get_decoration_bitset(var->self);
if (decoration_flags_signal_volatile(flags))
exp += "volatile ";
exp += "device";
}
else
@ -12850,6 +12857,11 @@ string CompilerMSL::get_argument_address_space(const SPIRVariable &argument)
return get_type_address_space(type, argument.self, true);
}
bool CompilerMSL::decoration_flags_signal_volatile(const Bitset &flags)
{
return flags.get(DecorationVolatile) || flags.get(DecorationCoherent);
}
string CompilerMSL::get_type_address_space(const SPIRType &type, uint32_t id, bool argument)
{
// This can be called for variable pointer contexts as well, so be very careful about which method we choose.
@ -12959,7 +12971,7 @@ string CompilerMSL::get_type_address_space(const SPIRType &type, uint32_t id, bo
addr_space = type.pointer || (argument && type.basetype == SPIRType::ControlPointArray) ? "thread" : "";
}
return join(flags.get(DecorationVolatile) || flags.get(DecorationCoherent) ? "volatile " : "", addr_space);
return join(decoration_flags_signal_volatile(flags) ? "volatile " : "", addr_space);
}
const char *CompilerMSL::to_restrict(uint32_t id, bool space)
@ -13693,7 +13705,9 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args)
// Emulate texture2D atomic operations
if (atomic_image_vars_emulated.count(var.self))
{
ep_args += ", device atomic_" + type_to_glsl(get<SPIRType>(basetype.image.type), 0);
auto &flags = ir.get_decoration_bitset(var.self);
const char *cv_flags = decoration_flags_signal_volatile(flags) ? "volatile " : "";
ep_args += join(", ", cv_flags, "device atomic_", type_to_glsl(get<SPIRType>(basetype.image.type), 0));
ep_args += "* " + r.name + "_atomic";
ep_args += " [[buffer(" + convert_to_string(r.secondary_index) + ")";
if (interlocked_resources.count(var_id))
@ -14788,7 +14802,9 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg)
auto *backing_var = maybe_get_backing_variable(name_id);
if (backing_var && atomic_image_vars_emulated.count(backing_var->self))
{
decl += ", device atomic_" + type_to_glsl(get<SPIRType>(var_type.image.type), 0);
auto &flags = ir.get_decoration_bitset(backing_var->self);
const char *cv_flags = decoration_flags_signal_volatile(flags) ? "volatile " : "";
decl += join(", ", cv_flags, "device atomic_", type_to_glsl(get<SPIRType>(var_type.image.type), 0));
decl += "* " + to_expression(name_id) + "_atomic";
}

View File

@ -1045,6 +1045,7 @@ protected:
bool validate_member_packing_rules_msl(const SPIRType &type, uint32_t index) const;
std::string get_argument_address_space(const SPIRVariable &argument);
std::string get_type_address_space(const SPIRType &type, uint32_t id, bool argument = false);
static bool decoration_flags_signal_volatile(const Bitset &flags);
const char *to_restrict(uint32_t id, bool space);
SPIRType &get_stage_in_struct_type();
SPIRType &get_stage_out_struct_type();