From efbe7ca16fef170bcd5724bc79646b804abf4507 Mon Sep 17 00:00:00 2001 From: Bill Hollings Date: Fri, 5 Apr 2019 21:28:57 -0400 Subject: [PATCH] MSL: Fix infinite CAS loop on atomic_compare_exchange_weak_explicit(). --- reference/opt/shaders-msl/comp/atomic.comp | 8 ++++---- reference/shaders-msl/comp/atomic.comp | 8 ++++---- spirv_msl.cpp | 10 +++++++--- 3 files changed, 15 insertions(+), 11 deletions(-) diff --git a/reference/opt/shaders-msl/comp/atomic.comp b/reference/opt/shaders-msl/comp/atomic.comp index f77922ac..0315f3b1 100644 --- a/reference/opt/shaders-msl/comp/atomic.comp +++ b/reference/opt/shaders-msl/comp/atomic.comp @@ -27,7 +27,7 @@ kernel void main0(device SSBO& ssbo [[buffer(2)]]) do { _32 = 10u; - } while (!atomic_compare_exchange_weak_explicit((volatile device atomic_uint*)&ssbo.u32, &_32, 2u, memory_order_relaxed, memory_order_relaxed)); + } while (!atomic_compare_exchange_weak_explicit((volatile device atomic_uint*)&ssbo.u32, &_32, 2u, memory_order_relaxed, memory_order_relaxed) && _32 == 10u); int _36 = atomic_fetch_add_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); int _38 = atomic_fetch_or_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); int _40 = atomic_fetch_xor_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); @@ -39,7 +39,7 @@ kernel void main0(device SSBO& ssbo [[buffer(2)]]) do { _52 = 10; - } while (!atomic_compare_exchange_weak_explicit((volatile device atomic_int*)&ssbo.i32, &_52, 2, memory_order_relaxed, memory_order_relaxed)); + } while (!atomic_compare_exchange_weak_explicit((volatile device atomic_int*)&ssbo.i32, &_52, 2, memory_order_relaxed, memory_order_relaxed) && _52 == 10); shared_u32 = 10u; shared_i32 = 10; uint _57 = atomic_fetch_add_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed); @@ -53,7 +53,7 @@ kernel void main0(device SSBO& ssbo [[buffer(2)]]) do { _64 = 10u; - } while (!atomic_compare_exchange_weak_explicit((volatile threadgroup atomic_uint*)&shared_u32, &_64, 2u, memory_order_relaxed, memory_order_relaxed)); + } while (!atomic_compare_exchange_weak_explicit((volatile threadgroup atomic_uint*)&shared_u32, &_64, 2u, memory_order_relaxed, memory_order_relaxed) && _64 == 10u); int _65 = atomic_fetch_add_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed); int _66 = atomic_fetch_or_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed); int _67 = atomic_fetch_xor_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed); @@ -65,6 +65,6 @@ kernel void main0(device SSBO& ssbo [[buffer(2)]]) do { _72 = 10; - } while (!atomic_compare_exchange_weak_explicit((volatile threadgroup atomic_int*)&shared_i32, &_72, 2, memory_order_relaxed, memory_order_relaxed)); + } while (!atomic_compare_exchange_weak_explicit((volatile threadgroup atomic_int*)&shared_i32, &_72, 2, memory_order_relaxed, memory_order_relaxed) && _72 == 10); } diff --git a/reference/shaders-msl/comp/atomic.comp b/reference/shaders-msl/comp/atomic.comp index f77922ac..0315f3b1 100644 --- a/reference/shaders-msl/comp/atomic.comp +++ b/reference/shaders-msl/comp/atomic.comp @@ -27,7 +27,7 @@ kernel void main0(device SSBO& ssbo [[buffer(2)]]) do { _32 = 10u; - } while (!atomic_compare_exchange_weak_explicit((volatile device atomic_uint*)&ssbo.u32, &_32, 2u, memory_order_relaxed, memory_order_relaxed)); + } while (!atomic_compare_exchange_weak_explicit((volatile device atomic_uint*)&ssbo.u32, &_32, 2u, memory_order_relaxed, memory_order_relaxed) && _32 == 10u); int _36 = atomic_fetch_add_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); int _38 = atomic_fetch_or_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); int _40 = atomic_fetch_xor_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); @@ -39,7 +39,7 @@ kernel void main0(device SSBO& ssbo [[buffer(2)]]) do { _52 = 10; - } while (!atomic_compare_exchange_weak_explicit((volatile device atomic_int*)&ssbo.i32, &_52, 2, memory_order_relaxed, memory_order_relaxed)); + } while (!atomic_compare_exchange_weak_explicit((volatile device atomic_int*)&ssbo.i32, &_52, 2, memory_order_relaxed, memory_order_relaxed) && _52 == 10); shared_u32 = 10u; shared_i32 = 10; uint _57 = atomic_fetch_add_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed); @@ -53,7 +53,7 @@ kernel void main0(device SSBO& ssbo [[buffer(2)]]) do { _64 = 10u; - } while (!atomic_compare_exchange_weak_explicit((volatile threadgroup atomic_uint*)&shared_u32, &_64, 2u, memory_order_relaxed, memory_order_relaxed)); + } while (!atomic_compare_exchange_weak_explicit((volatile threadgroup atomic_uint*)&shared_u32, &_64, 2u, memory_order_relaxed, memory_order_relaxed) && _64 == 10u); int _65 = atomic_fetch_add_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed); int _66 = atomic_fetch_or_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed); int _67 = atomic_fetch_xor_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed); @@ -65,6 +65,6 @@ kernel void main0(device SSBO& ssbo [[buffer(2)]]) do { _72 = 10; - } while (!atomic_compare_exchange_weak_explicit((volatile threadgroup atomic_int*)&shared_i32, &_72, 2, memory_order_relaxed, memory_order_relaxed)); + } while (!atomic_compare_exchange_weak_explicit((volatile threadgroup atomic_int*)&shared_i32, &_72, 2, memory_order_relaxed, memory_order_relaxed) && _72 == 10); } diff --git a/spirv_msl.cpp b/spirv_msl.cpp index ac60d753..72fb872a 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -4043,13 +4043,17 @@ void CompilerMSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id, exp += get_memory_order(mem_order_2); exp += ")"; - // MSL only supports the weak atomic compare exchange, - // so emit a CAS loop here. + // MSL only supports the weak atomic compare exchange, so emit a CAS loop here. + // The MSL function returns false if the atomic write fails OR the comparison test fails, + // so we must validate that it wasn't the comparison test that failed before continuing + // the CAS loop, otherwise it will loop infinitely, with the comparison test always failing. + // The function updates the comparitor value from the memory value, so the additional + // comparison test evaluates the memory value against the expected value. statement(variable_decl(type, to_name(result_id)), ";"); statement("do"); begin_scope(); statement(to_name(result_id), " = ", to_expression(op1), ";"); - end_scope_decl(join("while (!", exp, ")")); + end_scope_decl(join("while (!", exp, " && ", to_name(result_id), " == ", to_enclosed_expression(op1), ")")); set(result_id, to_name(result_id), result_type, true); } else