diff --git a/CMakeLists.txt b/CMakeLists.txt index d7e211de..f0298763 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -323,7 +323,7 @@ if (SPIRV_CROSS_STATIC) endif() set(spirv-cross-abi-major 0) -set(spirv-cross-abi-minor 24) +set(spirv-cross-abi-minor 25) set(spirv-cross-abi-patch 0) if (SPIRV_CROSS_SHARED) diff --git a/main.cpp b/main.cpp index f19bc9b5..7620144a 100644 --- a/main.cpp +++ b/main.cpp @@ -522,6 +522,7 @@ struct CLIArguments bool msl_dispatch_base = false; bool msl_decoration_binding = false; bool msl_force_active_argument_buffer_resources = false; + bool msl_force_native_arrays = false; bool glsl_emit_push_constant_as_ubo = false; bool glsl_emit_ubo_as_plain_uniforms = false; bool vulkan_glsl_disable_ext_samplerless_texture_functions = false; @@ -616,6 +617,7 @@ static void print_help() "\t[--msl-inline-uniform-block ]\n" "\t[--msl-decoration-binding]\n" "\t[--msl-force-active-argument-buffer-resources]\n" + "\t[--msl-force-native-arrays]\n" "\t[--hlsl]\n" "\t[--reflect]\n" "\t[--shader-model]\n" @@ -806,6 +808,7 @@ static string compile_iteration(const CLIArguments &args, std::vector msl_opts.dispatch_base = args.msl_dispatch_base; msl_opts.enable_decoration_binding = args.msl_decoration_binding; msl_opts.force_active_argument_buffer_resources = args.msl_force_active_argument_buffer_resources; + msl_opts.force_native_arrays = args.msl_force_native_arrays; msl_comp->set_msl_options(msl_opts); for (auto &v : args.msl_discrete_descriptor_sets) msl_comp->add_discrete_descriptor_set(v); @@ -1164,6 +1167,9 @@ static int main_inner(int argc, char *argv[]) uint32_t binding = parser.next_uint(); args.msl_inline_uniform_blocks.push_back(make_pair(desc_set, binding)); }); + cbs.add("--msl-force-native-arrays", [&args](CLIParser &) { + args.msl_force_native_arrays = true; + }); cbs.add("--extension", [&args](CLIParser &parser) { args.extensions.push_back(parser.next_string()); }); cbs.add("--rename-entry-point", [&args](CLIParser &parser) { auto old_name = parser.next_string(); diff --git a/reference/opt/shaders-msl/comp/composite-array-initialization.force-native-array.comp b/reference/opt/shaders-msl/comp/composite-array-initialization.force-native-array.comp new file mode 100644 index 00000000..8f150e47 --- /dev/null +++ b/reference/opt/shaders-msl/comp/composite-array-initialization.force-native-array.comp @@ -0,0 +1,94 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct Data +{ + float a; + float b; +}; + +constant float X_tmp [[function_constant(0)]]; +constant float X = is_function_constant_defined(X_tmp) ? X_tmp : 4.0; + +struct Data_1 +{ + float a; + float b; +}; + +struct SSBO +{ + Data_1 outdata[1]; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(2u, 1u, 1u); + +template +inline void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +kernel void main0(device SSBO& _53 [[buffer(0)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) +{ + Data _25[2] = { Data{ 1.0, 2.0 }, Data{ 3.0, 4.0 } }; + + Data _31[2] = { Data{ X, 2.0 }, Data{ 3.0, 5.0 } }; + Data data2[2]; + spvArrayCopyFromStackToStack1(data2, _31); + _53.outdata[gl_WorkGroupID.x].a = _25[gl_LocalInvocationID.x].a + data2[gl_LocalInvocationID.x].a; + _53.outdata[gl_WorkGroupID.x].b = _25[gl_LocalInvocationID.x].b + data2[gl_LocalInvocationID.x].b; +} + diff --git a/reference/opt/shaders-msl/comp/copy-array-of-arrays.force-native-array.comp b/reference/opt/shaders-msl/comp/copy-array-of-arrays.force-native-array.comp new file mode 100644 index 00000000..cb396cff --- /dev/null +++ b/reference/opt/shaders-msl/comp/copy-array-of-arrays.force-native-array.comp @@ -0,0 +1,20 @@ +#include +#include + +using namespace metal; + +struct BUF +{ + int a; + float b; + float c; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +kernel void main0(device BUF& o [[buffer(0)]]) +{ + o.a = 4; + o.b = o.c; +} + diff --git a/reference/opt/shaders-msl/vert/return-array.force-native-array.vert b/reference/opt/shaders-msl/vert/return-array.force-native-array.vert new file mode 100644 index 00000000..ce13349a --- /dev/null +++ b/reference/opt/shaders-msl/vert/return-array.force-native-array.vert @@ -0,0 +1,22 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 gl_Position [[position]]; +}; + +struct main0_in +{ + float4 vInput1 [[attribute(1)]]; +}; + +vertex main0_out main0(main0_in in [[stage_in]]) +{ + main0_out out = {}; + out.gl_Position = float4(10.0) + in.vInput1; + return out; +} + diff --git a/reference/shaders-msl-no-opt/vert/pass-array-by-value.force-native-array.vert b/reference/shaders-msl-no-opt/vert/pass-array-by-value.force-native-array.vert new file mode 100644 index 00000000..d686d02b --- /dev/null +++ b/reference/shaders-msl-no-opt/vert/pass-array-by-value.force-native-array.vert @@ -0,0 +1,103 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +constant float4 _68[4] = { float4(0.0), float4(1.0), float4(2.0), float4(3.0) }; + +struct main0_out +{ + float4 gl_Position [[position]]; +}; + +struct main0_in +{ + int Index1 [[attribute(0)]]; + int Index2 [[attribute(1)]]; +}; + +template +inline void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +static inline __attribute__((always_inline)) +float4 consume_constant_arrays2(thread const float4 (&positions)[4], thread const float4 (&positions2)[4], thread int& Index1, thread int& Index2) +{ + float4 indexable[4]; + spvArrayCopyFromStackToStack1(indexable, positions); + float4 indexable_1[4]; + spvArrayCopyFromStackToStack1(indexable_1, positions2); + return indexable[Index1] + indexable_1[Index2]; +} + +static inline __attribute__((always_inline)) +float4 consume_constant_arrays(thread const float4 (&positions)[4], thread const float4 (&positions2)[4], thread int& Index1, thread int& Index2) +{ + return consume_constant_arrays2(positions, positions2, Index1, Index2); +} + +vertex main0_out main0(main0_in in [[stage_in]]) +{ + float4 _68_array_copy[4] = { float4(0.0), float4(1.0), float4(2.0), float4(3.0) }; + main0_out out = {}; + float4 LUT2[4]; + LUT2[0] = float4(10.0); + LUT2[1] = float4(11.0); + LUT2[2] = float4(12.0); + LUT2[3] = float4(13.0); + out.gl_Position = consume_constant_arrays(_68_array_copy, LUT2, in.Index1, in.Index2); + return out; +} + diff --git a/reference/shaders-msl/comp/composite-array-initialization.force-native-array.comp b/reference/shaders-msl/comp/composite-array-initialization.force-native-array.comp new file mode 100644 index 00000000..f8e6ef92 --- /dev/null +++ b/reference/shaders-msl/comp/composite-array-initialization.force-native-array.comp @@ -0,0 +1,104 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct Data +{ + float a; + float b; +}; + +constant float X_tmp [[function_constant(0)]]; +constant float X = is_function_constant_defined(X_tmp) ? X_tmp : 4.0; + +struct Data_1 +{ + float a; + float b; +}; + +struct SSBO +{ + Data_1 outdata[1]; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(2u, 1u, 1u); + +constant Data _25[2] = { Data{ 1.0, 2.0 }, Data{ 3.0, 4.0 } }; + +template +inline void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +static inline __attribute__((always_inline)) +Data combine(thread const Data& a, thread const Data& b) +{ + return Data{ a.a + b.a, a.b + b.b }; +} + +kernel void main0(device SSBO& _53 [[buffer(0)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) +{ + Data data[2] = { Data{ 1.0, 2.0 }, Data{ 3.0, 4.0 } }; + Data _31[2] = { Data{ X, 2.0 }, Data{ 3.0, 5.0 } }; + Data data2[2]; + spvArrayCopyFromStackToStack1(data2, _31); + Data param = data[gl_LocalInvocationID.x]; + Data param_1 = data2[gl_LocalInvocationID.x]; + Data _73 = combine(param, param_1); + _53.outdata[gl_WorkGroupID.x].a = _73.a; + _53.outdata[gl_WorkGroupID.x].b = _73.b; +} + diff --git a/reference/shaders-msl/comp/copy-array-of-arrays.force-native-array.comp b/reference/shaders-msl/comp/copy-array-of-arrays.force-native-array.comp new file mode 100644 index 00000000..5f8b0330 --- /dev/null +++ b/reference/shaders-msl/comp/copy-array-of-arrays.force-native-array.comp @@ -0,0 +1,202 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct BUF +{ + int a; + float b; + float c; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +constant float _16[2] = { 1.0, 2.0 }; +constant float _19[2] = { 3.0, 4.0 }; +constant float _20[2][2] = { { 1.0, 2.0 }, { 3.0, 4.0 } }; +constant float _21[2][2][2] = { { { 1.0, 2.0 }, { 3.0, 4.0 } }, { { 1.0, 2.0 }, { 3.0, 4.0 } } }; + +template +inline void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromConstantToStack2(thread T (&dst)[A][B], constant T (&src)[A][B]) +{ + for (uint i = 0; i < A; i++) + { + spvArrayCopyFromConstantToStack1(dst[i], src[i]); + } +} + +template +inline void spvArrayCopyFromConstantToThreadGroup2(threadgroup T (&dst)[A][B], constant T (&src)[A][B]) +{ + for (uint i = 0; i < A; i++) + { + spvArrayCopyFromConstantToThreadGroup1(dst[i], src[i]); + } +} + +template +inline void spvArrayCopyFromStackToStack2(thread T (&dst)[A][B], thread const T (&src)[A][B]) +{ + for (uint i = 0; i < A; i++) + { + spvArrayCopyFromStackToStack1(dst[i], src[i]); + } +} + +template +inline void spvArrayCopyFromStackToThreadGroup2(threadgroup T (&dst)[A][B], thread const T (&src)[A][B]) +{ + for (uint i = 0; i < A; i++) + { + spvArrayCopyFromStackToThreadGroup1(dst[i], src[i]); + } +} + +template +inline void spvArrayCopyFromThreadGroupToStack2(thread T (&dst)[A][B], threadgroup const T (&src)[A][B]) +{ + for (uint i = 0; i < A; i++) + { + spvArrayCopyFromThreadGroupToStack1(dst[i], src[i]); + } +} + +template +inline void spvArrayCopyFromThreadGroupToThreadGroup2(threadgroup T (&dst)[A][B], threadgroup const T (&src)[A][B]) +{ + for (uint i = 0; i < A; i++) + { + spvArrayCopyFromThreadGroupToThreadGroup1(dst[i], src[i]); + } +} + +template +inline void spvArrayCopyFromConstantToStack3(thread T (&dst)[A][B][C], constant T (&src)[A][B][C]) +{ + for (uint i = 0; i < A; i++) + { + spvArrayCopyFromConstantToStack2(dst[i], src[i]); + } +} + +template +inline void spvArrayCopyFromConstantToThreadGroup3(threadgroup T (&dst)[A][B][C], constant T (&src)[A][B][C]) +{ + for (uint i = 0; i < A; i++) + { + spvArrayCopyFromConstantToThreadGroup2(dst[i], src[i]); + } +} + +template +inline void spvArrayCopyFromStackToStack3(thread T (&dst)[A][B][C], thread const T (&src)[A][B][C]) +{ + for (uint i = 0; i < A; i++) + { + spvArrayCopyFromStackToStack2(dst[i], src[i]); + } +} + +template +inline void spvArrayCopyFromStackToThreadGroup3(threadgroup T (&dst)[A][B][C], thread const T (&src)[A][B][C]) +{ + for (uint i = 0; i < A; i++) + { + spvArrayCopyFromStackToThreadGroup2(dst[i], src[i]); + } +} + +template +inline void spvArrayCopyFromThreadGroupToStack3(thread T (&dst)[A][B][C], threadgroup const T (&src)[A][B][C]) +{ + for (uint i = 0; i < A; i++) + { + spvArrayCopyFromThreadGroupToStack2(dst[i], src[i]); + } +} + +template +inline void spvArrayCopyFromThreadGroupToThreadGroup3(threadgroup T (&dst)[A][B][C], threadgroup const T (&src)[A][B][C]) +{ + for (uint i = 0; i < A; i++) + { + spvArrayCopyFromThreadGroupToThreadGroup2(dst[i], src[i]); + } +} + +kernel void main0(device BUF& o [[buffer(0)]]) +{ + float c[2][2][2]; + spvArrayCopyFromConstantToStack3(c, _21); + o.a = int(c[1][1][1]); + float _43[2] = { o.b, o.c }; + float _48[2] = { o.b, o.b }; + float _49[2][2] = { { _43[0], _43[1] }, { _48[0], _48[1] } }; + float _54[2] = { o.c, o.c }; + float _59[2] = { o.c, o.b }; + float _60[2][2] = { { _54[0], _54[1] }, { _59[0], _59[1] } }; + float _61[2][2][2] = { { { _49[0][0], _49[0][1] }, { _49[1][0], _49[1][1] } }, { { _60[0][0], _60[0][1] }, { _60[1][0], _60[1][1] } } }; + float d[2][2][2]; + spvArrayCopyFromStackToStack3(d, _61); + float e[2][2][2]; + spvArrayCopyFromStackToStack3(e, d); + o.b = e[1][0][1]; +} + diff --git a/reference/shaders-msl/vert/return-array.force-native-array.vert b/reference/shaders-msl/vert/return-array.force-native-array.vert new file mode 100644 index 00000000..32388cb4 --- /dev/null +++ b/reference/shaders-msl/vert/return-array.force-native-array.vert @@ -0,0 +1,100 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +constant float4 _20[2] = { float4(10.0), float4(20.0) }; + +struct main0_out +{ + float4 gl_Position [[position]]; +}; + +struct main0_in +{ + float4 vInput0 [[attribute(0)]]; + float4 vInput1 [[attribute(1)]]; +}; + +template +inline void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +static inline __attribute__((always_inline)) +void test(thread float4 (&SPIRV_Cross_return_value)[2]) +{ + spvArrayCopyFromConstantToStack1(SPIRV_Cross_return_value, _20); +} + +static inline __attribute__((always_inline)) +void test2(thread float4 (&SPIRV_Cross_return_value)[2], thread float4& vInput0, thread float4& vInput1) +{ + float4 foobar[2]; + foobar[0] = vInput0; + foobar[1] = vInput1; + spvArrayCopyFromStackToStack1(SPIRV_Cross_return_value, foobar); +} + +vertex main0_out main0(main0_in in [[stage_in]]) +{ + main0_out out = {}; + float4 _42[2]; + test(_42); + float4 _44[2]; + test2(_44, in.vInput0, in.vInput1); + out.gl_Position = _42[0] + _44[1]; + return out; +} + diff --git a/shaders-msl-no-opt/vert/pass-array-by-value.force-native-array.vert b/shaders-msl-no-opt/vert/pass-array-by-value.force-native-array.vert new file mode 100644 index 00000000..2c142a78 --- /dev/null +++ b/shaders-msl-no-opt/vert/pass-array-by-value.force-native-array.vert @@ -0,0 +1,26 @@ +#version 310 es + +layout(location = 0) in int Index1; +layout(location = 1) in int Index2; + +vec4 consume_constant_arrays2(const vec4 positions[4], const vec4 positions2[4]) +{ + return positions[Index1] + positions2[Index2]; +} + +vec4 consume_constant_arrays(const vec4 positions[4], const vec4 positions2[4]) +{ + return consume_constant_arrays2(positions, positions2); +} + +const vec4 LUT1[] = vec4[](vec4(0.0), vec4(1.0), vec4(2.0), vec4(3.0)); + +void main() +{ + vec4 LUT2[4]; + LUT2[0] = vec4(10.0); + LUT2[1] = vec4(11.0); + LUT2[2] = vec4(12.0); + LUT2[3] = vec4(13.0); + gl_Position = consume_constant_arrays(LUT1, LUT2); +} diff --git a/shaders-msl/comp/composite-array-initialization.force-native-array.comp b/shaders-msl/comp/composite-array-initialization.force-native-array.comp new file mode 100644 index 00000000..1ecf4bcd --- /dev/null +++ b/shaders-msl/comp/composite-array-initialization.force-native-array.comp @@ -0,0 +1,28 @@ +#version 450 +layout(local_size_x = 2) in; + +struct Data +{ + float a; + float b; +}; + +layout(std430, binding = 0) buffer SSBO +{ + Data outdata[]; +}; + +layout(constant_id = 0) const float X = 4.0; + +Data data[2] = Data[](Data(1.0, 2.0), Data(3.0, 4.0)); +Data data2[2] = Data[](Data(X, 2.0), Data(3.0, 5.0)); + +Data combine(Data a, Data b) +{ + return Data(a.a + b.a, a.b + b.b); +} + +void main() +{ + outdata[gl_WorkGroupID.x] = combine(data[gl_LocalInvocationID.x], data2[gl_LocalInvocationID.x]); +} diff --git a/shaders-msl/comp/copy-array-of-arrays.force-native-array.comp b/shaders-msl/comp/copy-array-of-arrays.force-native-array.comp new file mode 100644 index 00000000..edf87195 --- /dev/null +++ b/shaders-msl/comp/copy-array-of-arrays.force-native-array.comp @@ -0,0 +1,21 @@ +#version 450 +layout(local_size_x = 1) in; + +layout(set = 0, binding = 0, std430) buffer BUF +{ + int a; + float b; + float c; +} o; + +void main() +{ + const float a[2][2][2] = float[][][](float[][](float[](1.0, 2.0), float[](3.0, 4.0)), float[][](float[](1.0, 2.0), float[](3.0, 4.0))); + float b[2][2][2] = a; + float c[2][2][2] = b; + o.a = int(c[1][1][1]); + + float d[2][2][2] = float[][][](float[][](float[](o.b, o.c), float[](o.b, o.b)), float[][](float[](o.c, o.c), float[](o.c, o.b))); + float e[2][2][2] = d; + o.b = e[1][0][1]; +} diff --git a/shaders-msl/vert/return-array.force-native-array.vert b/shaders-msl/vert/return-array.force-native-array.vert new file mode 100644 index 00000000..70846011 --- /dev/null +++ b/shaders-msl/vert/return-array.force-native-array.vert @@ -0,0 +1,22 @@ +#version 310 es + +layout(location = 0) in vec4 vInput0; +layout(location = 1) in vec4 vInput1; + +vec4[2] test() +{ + return vec4[](vec4(10.0), vec4(20.0)); +} + +vec4[2] test2() +{ + vec4 foobar[2]; + foobar[0] = vInput0; + foobar[1] = vInput1; + return foobar; +} + +void main() +{ + gl_Position = test()[0] + test2()[1]; +} diff --git a/spirv_common.hpp b/spirv_common.hpp index 58e0f42e..24ae4e2e 100644 --- a/spirv_common.hpp +++ b/spirv_common.hpp @@ -939,6 +939,11 @@ struct SPIRFunction : IVariant // Intentionally not a small vector, this one is rare, and std::function can be large. Vector> fixup_hooks_in; + // On function entry, make sure to copy a constant array into thread addr space to work around + // the case where we are passing a constant array by value to a function on backends which do not + // consider arrays value types. + SmallVector constant_arrays_needed_on_stack; + bool active = false; bool flush_undeclared = true; bool do_combined_parameters = true; diff --git a/spirv_cross_c.cpp b/spirv_cross_c.cpp index f653cd2b..9d094731 100644 --- a/spirv_cross_c.cpp +++ b/spirv_cross_c.cpp @@ -597,6 +597,10 @@ spvc_result spvc_compiler_options_set_uint(spvc_compiler_options options, spvc_c case SPVC_COMPILER_OPTION_MSL_FORCE_ACTIVE_ARGUMENT_BUFFER_RESOURCES: options->msl.force_active_argument_buffer_resources = value != 0; break; + + case SPVC_COMPILER_OPTION_MSL_FORCE_NATIVE_ARRAYS: + options->msl.force_native_arrays = value != 0; + break; #endif default: diff --git a/spirv_cross_c.h b/spirv_cross_c.h index 1d7afd6a..c7cbe12e 100644 --- a/spirv_cross_c.h +++ b/spirv_cross_c.h @@ -33,7 +33,7 @@ extern "C" { /* Bumped if ABI or API breaks backwards compatibility. */ #define SPVC_C_API_VERSION_MAJOR 0 /* Bumped if APIs or enumerations are added in a backwards compatible way. */ -#define SPVC_C_API_VERSION_MINOR 24 +#define SPVC_C_API_VERSION_MINOR 25 /* Bumped if internal implementation details change. */ #define SPVC_C_API_VERSION_PATCH 0 @@ -572,6 +572,7 @@ typedef enum spvc_compiler_option SPVC_COMPILER_OPTION_MSL_EMULATE_CUBEMAP_ARRAY = 48 | SPVC_COMPILER_OPTION_MSL_BIT, SPVC_COMPILER_OPTION_MSL_ENABLE_DECORATION_BINDING = 49 | SPVC_COMPILER_OPTION_MSL_BIT, SPVC_COMPILER_OPTION_MSL_FORCE_ACTIVE_ARGUMENT_BUFFER_RESOURCES = 50 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_FORCE_NATIVE_ARRAYS = 51 | SPVC_COMPILER_OPTION_MSL_BIT, SPVC_COMPILER_OPTION_INT_MAX = 0x7fffffff } spvc_compiler_option; diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp index cce5a6eb..cb36f8fb 100644 --- a/spirv_glsl.cpp +++ b/spirv_glsl.cpp @@ -3697,7 +3697,7 @@ string CompilerGLSL::constant_expression(const SPIRConstant &c) { res = type_to_glsl_constructor(type) + "{ "; } - else if (backend.use_initializer_list && backend.use_typed_initializer_list && !type.array.empty()) + else if (backend.use_initializer_list && backend.use_typed_initializer_list && backend.array_is_value_type && !type.array.empty()) { res = type_to_glsl_constructor(type) + "({ "; needs_trailing_tracket = true; @@ -8686,7 +8686,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) // This path cannot be used for arithmetic. if (backend.use_typed_initializer_list && out_type.basetype == SPIRType::Struct && out_type.array.empty()) constructor_op += type_to_glsl_constructor(get(result_type)); - else if (backend.use_typed_initializer_list && !out_type.array.empty()) + else if (backend.use_typed_initializer_list && backend.array_is_value_type && !out_type.array.empty()) { // MSL path. Array constructor is baked into type here, do not use _constructor variant. constructor_op += type_to_glsl_constructor(get(result_type)) + "("; @@ -11751,6 +11751,14 @@ void CompilerGLSL::emit_function(SPIRFunction &func, const Bitset &return_flags) current_function = &func; auto &entry_block = get(func.entry_block); + sort(begin(func.constant_arrays_needed_on_stack), end(func.constant_arrays_needed_on_stack)); + for (auto &array : func.constant_arrays_needed_on_stack) + { + auto &c = get(array); + auto &type = get(c.constant_type); + statement(variable_decl(type, join("_", array, "_array_copy")), " = ", constant_expression(c), ";"); + } + for (auto &v : func.local_variables) { auto &var = get(v); diff --git a/spirv_msl.cpp b/spirv_msl.cpp index 8deeb430..515cb06e 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -890,7 +890,7 @@ void CompilerMSL::emit_entry_point_declarations() SPIRV_CROSS_THROW("Runtime arrays with dynamic offsets are not supported yet."); else { - use_builtin_array = true; + is_using_builtin_array = true; statement(get_argument_address_space(var), " ", type_to_glsl(type), "* ", to_restrict(var_id), name, type_to_array_glsl(type), " ="); @@ -921,7 +921,7 @@ void CompilerMSL::emit_entry_point_declarations() } end_scope_decl(); statement_no_indent(""); - use_builtin_array = false; + is_using_builtin_array = false; } } else @@ -979,15 +979,17 @@ string CompilerMSL::compile() backend.native_row_major_matrix = false; backend.unsized_array_supported = false; backend.can_declare_arrays_inline = false; - backend.can_return_array = true; // <-- Allow Metal to use the array template backend.allow_truncated_access_chain = true; - backend.array_is_value_type = true; // <-- Allow Metal to use the array template to make arrays a value type backend.comparison_image_samples_scalar = true; backend.native_pointers = true; backend.nonuniform_qualifier = ""; backend.support_small_type_sampling_result = true; backend.supports_empty_struct = true; + // Allow Metal to use the array template unless we force it off. + backend.can_return_array = !msl_options.force_native_arrays; + backend.array_is_value_type = !msl_options.force_native_arrays; + capture_output_to_buffer = msl_options.capture_output_to_buffer; is_rasterization_disabled = msl_options.disable_rasterization || capture_output_to_buffer; @@ -6728,7 +6730,7 @@ void CompilerMSL::emit_array_copy(const string &lhs, uint32_t rhs_id, StorageCla // If threadgroup storage qualifiers are *not* used: // Avoid spvCopy* wrapper functions; Otherwise, spvUnsafeArray<> template cannot be used with that storage qualifier. - if (lhs_thread && rhs_thread && !use_builtin_array) + if (lhs_thread && rhs_thread && !using_builtin_array()) { statement(lhs, " = ", to_expression(rhs_id), ";"); } @@ -6782,9 +6784,9 @@ void CompilerMSL::emit_array_copy(const string &lhs, uint32_t rhs_id, StorageCla SPIRV_CROSS_THROW("Unknown storage class used for copying arrays."); // Pass internal array of spvUnsafeArray<> into wrapper functions - if (lhs_thread) + if (lhs_thread && !msl_options.force_native_arrays) statement("spvArrayCopy", tag, type.array.size(), "(", lhs, ".elements, ", to_expression(rhs_id), ");"); - else if (rhs_thread) + else if (rhs_thread && !msl_options.force_native_arrays) statement("spvArrayCopy", tag, type.array.size(), "(", lhs, ", ", to_expression(rhs_id), ".elements);"); else statement("spvArrayCopy", tag, type.array.size(), "(", lhs, ", ", to_expression(rhs_id), ");"); @@ -7234,11 +7236,31 @@ void CompilerMSL::emit_function_prototype(SPIRFunction &func, const Bitset &) auto &type = get(func.return_type); - decl += func_type_decl(type); + if (!type.array.empty() && msl_options.force_native_arrays) + { + // We cannot return native arrays in MSL, so "return" through an out variable. + decl += "void"; + } + else + { + decl += func_type_decl(type); + } + decl += " "; decl += to_name(func.self); decl += "("; + if (!type.array.empty() && msl_options.force_native_arrays) + { + // Fake arrays returns by writing to an out array instead. + decl += "thread "; + decl += type_to_glsl(type); + decl += " (&SPIRV_Cross_return_value)"; + decl += type_to_array_glsl(type); + if (!func.arguments.empty()) + decl += ", "; + } + if (processing_entry_point) { if (msl_options.argument_buffers) @@ -8183,7 +8205,29 @@ string CompilerMSL::to_func_call_arg(const SPIRFunction::Parameter &arg, uint32_ if (is_dynamic_img_sampler && !arg_is_dynamic_img_sampler) arg_str = join("spvDynamicImageSampler<", type_to_glsl(get(type.image.type)), ">("); - arg_str += CompilerGLSL::to_func_call_arg(arg, id); + auto *c = maybe_get(id); + if (msl_options.force_native_arrays && c && !get(c->constant_type).array.empty()) + { + // If we are passing a constant array directly to a function for some reason, + // the callee will expect an argument in thread const address space + // (since we can only bind to arrays with references in MSL). + // To resolve this, we must emit a copy in this address space. + // This kind of code gen should be rare enough that performance is not a real concern. + // Inline the SPIR-V to avoid this kind of suboptimal codegen. + // + // We risk calling this inside a continue block (invalid code), + // so just create a thread local copy in the current function. + arg_str = join("_", id, "_array_copy"); + auto &constants = current_function->constant_arrays_needed_on_stack; + auto itr = find(begin(constants), end(constants), ID(id)); + if (itr == end(constants)) + { + force_recompile(); + constants.push_back(id); + } + } + else + arg_str += CompilerGLSL::to_func_call_arg(arg, id); // Need to check the base variable in case we need to apply a qualified alias. uint32_t var_id = 0; @@ -8458,9 +8502,9 @@ string CompilerMSL::to_struct_member(const SPIRType &type, uint32_t member_type_ // address space. // Array of resources should also be declared as builtin arrays. if (has_member_decoration(type.self, index, DecorationOffset)) - use_builtin_array = true; + is_using_builtin_array = true; else if (has_extended_member_decoration(type.self, index, SPIRVCrossDecorationResourceIndexPrimary)) - use_builtin_array = true; + is_using_builtin_array = true; if (member_is_packed_physical_type(type, index)) { @@ -8516,14 +8560,14 @@ string CompilerMSL::to_struct_member(const SPIRType &type, uint32_t member_type_ { BuiltIn builtin = BuiltInMax; if (is_member_builtin(type, index, &builtin)) - use_builtin_array = true; + is_using_builtin_array = true; array_type = type_to_array_glsl(physical_type); } auto result = join(pack_pfx, type_to_glsl(*declared_type, orig_id), " ", qualifier, to_member_name(type, index), member_attribute_qualifier(type, index), array_type, ";"); - use_builtin_array = false; + is_using_builtin_array = false; return result; } @@ -9400,7 +9444,7 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args) SPIRV_CROSS_THROW("Unsized arrays of buffers are not supported in MSL."); // Allow Metal to use the array template to make arrays a value type - use_builtin_array = true; + is_using_builtin_array = true; buffer_arrays.push_back(var_id); for (uint32_t i = 0; i < array_size; ++i) { @@ -9413,7 +9457,7 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args) ep_args += ", raster_order_group(0)"; ep_args += "]]"; } - use_builtin_array = false; + is_using_builtin_array = false; } else { @@ -9979,9 +10023,9 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg) // Allow Metal to use the array template to make arrays a value type string address_space = get_argument_address_space(var); bool builtin = is_builtin_variable(var); - use_builtin_array = builtin; + is_using_builtin_array = builtin; if (address_space == "threadgroup") - use_builtin_array = true; + is_using_builtin_array = true; if (var.basevariable && (var.basevariable == stage_in_ptr_var_id || var.basevariable == stage_out_ptr_var_id)) decl += type_to_glsl(type, arg.id); @@ -9989,7 +10033,7 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg) decl += builtin_type_decl(static_cast(get_decoration(arg.id, DecorationBuiltIn)), arg.id); else if ((storage == StorageClassUniform || storage == StorageClassStorageBuffer) && is_array(type)) { - use_builtin_array = true; + is_using_builtin_array = true; decl += join(type_to_glsl(type, arg.id), "*"); } else if (is_dynamic_img_sampler) @@ -10007,10 +10051,34 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg) (storage == StorageClassFunction || storage == StorageClassGeneric)) { // If the argument is a pure value and not an opaque type, we will pass by value. - if (!address_space.empty()) - decl = join(address_space, " ", decl); - decl += " "; - decl += to_expression(name_id); + if (msl_options.force_native_arrays && is_array(type)) + { + // We are receiving an array by value. This is problematic. + // We cannot be sure of the target address space since we are supposed to receive a copy, + // but this is not possible with MSL without some extra work. + // We will have to assume we're getting a reference in thread address space. + // If we happen to get a reference in constant address space, the caller must emit a copy and pass that. + // Thread const therefore becomes the only logical choice, since we cannot "create" a constant array from + // non-constant arrays, but we can create thread const from constant. + decl = string("thread const ") + decl; + decl += " (&"; + const char *restrict_kw = to_restrict(name_id); + if (*restrict_kw) + { + decl += " "; + decl += restrict_kw; + } + decl += to_expression(name_id); + decl += ")"; + decl += type_to_array_glsl(type); + } + else + { + if (!address_space.empty()) + decl = join(address_space, " ", decl); + decl += " "; + decl += to_expression(name_id); + } } else if (is_array(type) && !type_is_image) { @@ -10086,7 +10154,7 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg) decl += "* " + to_expression(name_id) + "_atomic"; } - use_builtin_array = false; + is_using_builtin_array = false; return decl; } @@ -10571,7 +10639,7 @@ string CompilerMSL::type_to_glsl(const SPIRType &type, uint32_t id) if (type.vecsize > 1) type_name += to_string(type.vecsize); - if (type.array.empty() || use_builtin_array) + if (type.array.empty() || using_builtin_array()) { return type_name; } @@ -10607,7 +10675,7 @@ string CompilerMSL::type_to_array_glsl(const SPIRType &type) } default: { - if (use_builtin_array) + if (using_builtin_array()) return CompilerGLSL::type_to_array_glsl(type); else return ""; @@ -10620,12 +10688,12 @@ std::string CompilerMSL::variable_decl(const SPIRVariable &variable) { if (variable.storage == StorageClassWorkgroup) { - use_builtin_array = true; + is_using_builtin_array = true; } std::string expr = CompilerGLSL::variable_decl(variable); if (variable.storage == StorageClassWorkgroup) { - use_builtin_array = false; + is_using_builtin_array = false; } return expr; } @@ -12710,3 +12778,8 @@ void CompilerMSL::activate_argument_buffer_resources() active_interface_variables.insert(self); }); } + +bool CompilerMSL::using_builtin_array() const +{ + return msl_options.force_native_arrays || is_using_builtin_array; +} diff --git a/spirv_msl.hpp b/spirv_msl.hpp index 59144574..6b021c66 100644 --- a/spirv_msl.hpp +++ b/spirv_msl.hpp @@ -312,6 +312,11 @@ public: // and would otherwise declare a different IAB. bool force_active_argument_buffer_resources = false; + // Forces the use of plain arrays, which works around certain driver bugs on certain versions + // of Intel Macbooks. See https://github.com/KhronosGroup/SPIRV-Cross/issues/1210. + // May reduce performance in scenarios where arrays are copied around as value-types. + bool force_native_arrays = false; + bool is_ios() { return platform == iOS; @@ -827,7 +832,10 @@ protected: bool has_sampled_images = false; bool builtin_declaration = false; // Handle HLSL-style 0-based vertex/instance index. - bool use_builtin_array = false; // Force the use of C style array declaration. + + bool is_using_builtin_array = false; // Force the use of C style array declaration. + bool using_builtin_array() const; + bool is_rasterization_disabled = false; bool capture_output_to_buffer = false; bool needs_swizzle_buffer_def = false; diff --git a/test_shaders.py b/test_shaders.py index 9f0fdc53..f7a1b2da 100755 --- a/test_shaders.py +++ b/test_shaders.py @@ -254,6 +254,8 @@ def cross_compile_msl(shader, spirv, opt, iterations, paths): msl_args.append('0') msl_args.append('--msl-device-argument-buffer') msl_args.append('1') + if '.force-native-array.' in shader: + msl_args.append('--msl-force-native-arrays') subprocess.check_call(msl_args)