Merge pull request #321 from brenwill/master

Enhancements to MSL compute and entry point naming.
This commit is contained in:
Hans-Kristian Arntzen 2017-11-11 09:57:49 +01:00 committed by GitHub
commit 2b8fe8d711
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
73 changed files with 2763 additions and 229 deletions

1
.gitignore vendored
View File

@ -13,5 +13,6 @@
*.opensdf
*.shader
*.a
*.bc
!CMakeLists.txt

View File

@ -447,9 +447,11 @@ struct CLIArguments
const char *cpp_interface_name = nullptr;
uint32_t version = 0;
uint32_t shader_model = 0;
uint32_t msl_version = 0;
bool es = false;
bool set_version = false;
bool set_shader_model = false;
bool set_msl_version = false;
bool set_es = false;
bool dump_resources = false;
bool force_temporary = false;
@ -483,7 +485,7 @@ static void print_help()
"[--version <GLSL version>] [--dump-resources] [--help] [--force-temporary] "
"[--vulkan-semantics] [--flatten-ubo] [--fixup-clipspace] [--flip-vert-y] [--iterations iter] "
"[--cpp] [--cpp-interface-name <name>] "
"[--msl] "
"[--msl] [--msl-version <MMmmpp>]"
"[--hlsl] [--shader-model] [--hlsl-enable-compat] "
"[--separate-shader-objects]"
"[--pls-in format input-name] [--pls-out format output-name] [--remap source_name target_name "
@ -686,6 +688,10 @@ static int main_inner(int argc, char *argv[])
args.shader_model = parser.next_uint();
args.set_shader_model = true;
});
cbs.add("--msl-version", [&args](CLIParser &parser) {
args.msl_version = parser.next_uint();
args.set_msl_version = true;
});
cbs.add("--remove-unused-variables", [&args](CLIParser &) { args.remove_unused = true; });
@ -725,6 +731,8 @@ static int main_inner(int argc, char *argv[])
auto *msl_comp = static_cast<CompilerMSL *>(compiler.get());
auto msl_opts = msl_comp->get_options();
if (args.set_msl_version)
msl_opts.msl_version = args.msl_version;
msl_comp->set_options(msl_opts);
}
else if (args.hlsl)

View File

@ -0,0 +1,29 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct _3
{
int4 _m0;
uint4 _m1;
};
struct _4
{
uint4 _m0;
int4 _m1;
};
kernel void main0(device _3& _5 [[buffer(0)]], device _4& _6 [[buffer(1)]])
{
_6._m0 = _5._m1 + uint4(_5._m0);
_6._m0 = uint4(_5._m0) + _5._m1;
_6._m0 = _5._m1 + _5._m1;
_6._m0 = uint4(_5._m0 + _5._m0);
_6._m1 = int4(_5._m1 + _5._m1);
_6._m1 = _5._m0 + _5._m0;
_6._m1 = int4(_5._m1) + _5._m0;
_6._m1 = _5._m0 + int4(_5._m1);
}

View File

@ -0,0 +1,29 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct _3
{
int4 _m0;
uint4 _m1;
};
struct _4
{
uint4 _m0;
int4 _m1;
};
kernel void main0(device _3& _5 [[buffer(0)]], device _4& _6 [[buffer(1)]])
{
_6._m0 = uint4(int4(_5._m1) >> _5._m0);
_6._m0 = uint4(_5._m0 >> int4(_5._m1));
_6._m0 = uint4(int4(_5._m1) >> int4(_5._m1));
_6._m0 = uint4(_5._m0 >> _5._m0);
_6._m1 = int4(_5._m1) >> int4(_5._m1);
_6._m1 = _5._m0 >> _5._m0;
_6._m1 = int4(_5._m1) >> _5._m0;
_6._m1 = _5._m0 >> int4(_5._m1);
}

View File

@ -0,0 +1,29 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct _3
{
int4 _m0;
uint4 _m1;
};
struct _4
{
uint4 _m0;
int4 _m1;
};
kernel void main0(device _3& _5 [[buffer(0)]], device _4& _6 [[buffer(1)]])
{
_6._m0 = uint4(int4(_5._m1) / _5._m0);
_6._m0 = uint4(_5._m0 / int4(_5._m1));
_6._m0 = uint4(int4(_5._m1) / int4(_5._m1));
_6._m0 = uint4(_5._m0 / _5._m0);
_6._m1 = int4(_5._m1) / int4(_5._m1);
_6._m1 = _5._m0 / _5._m0;
_6._m1 = int4(_5._m1) / _5._m0;
_6._m1 = _5._m0 / int4(_5._m1);
}

View File

@ -0,0 +1,29 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct _3
{
int4 _m0;
uint4 _m1;
};
struct _4
{
uint4 _m0;
int4 _m1;
};
kernel void main0(device _3& _5 [[buffer(0)]], device _4& _6 [[buffer(1)]])
{
_6._m0 = _5._m1 >> uint4(_5._m0);
_6._m0 = uint4(_5._m0) >> _5._m1;
_6._m0 = _5._m1 >> _5._m1;
_6._m0 = uint4(_5._m0) >> uint4(_5._m0);
_6._m1 = int4(_5._m1 >> _5._m1);
_6._m1 = int4(uint4(_5._m0) >> uint4(_5._m0));
_6._m1 = int4(_5._m1 >> uint4(_5._m0));
_6._m1 = int4(uint4(_5._m0) >> _5._m1);
}

View File

@ -0,0 +1,29 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct _6
{
int4 _m0;
uint4 _m1;
};
struct _7
{
uint4 _m0;
int4 _m1;
};
kernel void main0(device _6& _8 [[buffer(0)]], device _7& _9 [[buffer(1)]])
{
_9._m0 = _8._m1 + uint4(_8._m0);
_9._m0 = uint4(_8._m0) + _8._m1;
_9._m0 = _8._m1 + _8._m1;
_9._m0 = uint4(_8._m0 + _8._m0);
_9._m1 = int4(_8._m1 + _8._m1);
_9._m1 = _8._m0 + _8._m0;
_9._m1 = int4(_8._m1) + _8._m0;
_9._m1 = _8._m0 + int4(_8._m1);
}

View File

@ -0,0 +1,21 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct SSBO0
{
float scalar;
float2 vec2_val;
float3 vec3_val;
float4 vec4_val;
};
kernel void main0(device SSBO0& _4 [[buffer(0)]])
{
_4.scalar = float(half(_4.scalar));
_4.vec2_val = float2(half2(_4.vec2_val));
_4.vec3_val = float3(half3(_4.vec3_val));
_4.vec4_val = float4(half4(_4.vec4_val));
}

View File

@ -0,0 +1,40 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
constant uint3 gl_WorkGroupSize = uint3(8u, 8u, 1u);
struct UBO
{
float4 uInvSize;
float4 uScale;
};
float jacobian(thread const float2& dDdx, thread const float2& dDdy)
{
return ((1.0 + dDdx.x) * (1.0 + dDdy.y)) - (dDdx.y * dDdy.x);
}
kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], constant UBO& _46 [[buffer(0)]], texture2d<float> uHeight [[texture(0)]], sampler uHeightSmplr [[sampler(0)]], texture2d<float> uDisplacement [[texture(1)]], sampler uDisplacementSmplr [[sampler(1)]], texture2d<float, access::write> iHeightDisplacement [[texture(2)]], texture2d<float, access::write> iGradJacobian [[texture(3)]])
{
float4 uv = (float2(gl_GlobalInvocationID.xy) * _46.uInvSize.xy).xyxy + (_46.uInvSize * 0.5);
float h = uHeight.sample(uHeightSmplr, uv.xy, level(0.0)).x;
float x0 = uHeight.sample(uHeightSmplr, uv.xy, level(0.0), int2(-1, 0)).x;
float x1 = uHeight.sample(uHeightSmplr, uv.xy, level(0.0), int2(1, 0)).x;
float y0 = uHeight.sample(uHeightSmplr, uv.xy, level(0.0), int2(0, -1)).x;
float y1 = uHeight.sample(uHeightSmplr, uv.xy, level(0.0), int2(0, 1)).x;
float2 grad = (_46.uScale.xy * 0.5) * float2(x1 - x0, y1 - y0);
float2 displacement = uDisplacement.sample(uDisplacementSmplr, uv.zw, level(0.0)).xy * 1.2000000476837158203125;
float2 dDdx = (uDisplacement.sample(uDisplacementSmplr, uv.zw, level(0.0), int2(1, 0)).xy - uDisplacement.sample(uDisplacementSmplr, uv.zw, level(0.0), int2(-1, 0)).xy) * 0.60000002384185791015625;
float2 dDdy = (uDisplacement.sample(uDisplacementSmplr, uv.zw, level(0.0), int2(0, 1)).xy - uDisplacement.sample(uDisplacementSmplr, uv.zw, level(0.0), int2(0, -1)).xy) * 0.60000002384185791015625;
float2 param = dDdx * _46.uScale.z;
float2 param_1 = dDdy * _46.uScale.z;
float j = jacobian(param, param_1);
displacement = float2(0.0);
iHeightDisplacement.write(float4(h, displacement, 0.0), uint2(int2(gl_GlobalInvocationID.xy)));
iGradJacobian.write(float4(grad, j, 0.0), uint2(int2(gl_GlobalInvocationID.xy)));
}

View File

@ -0,0 +1,34 @@
#pragma clang diagnostic ignored "-Wunused-variable"
#include <metal_stdlib>
#include <simd/simd.h>
#include <metal_atomic>
using namespace metal;
struct SSBO
{
float4 in_data[1];
};
struct SSBO2
{
float4 out_data[1];
};
struct SSBO3
{
uint counter;
};
kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device SSBO& _23 [[buffer(0)]], device SSBO2& _45 [[buffer(1)]], device SSBO3& _48 [[buffer(2)]])
{
uint ident = gl_GlobalInvocationID.x;
float4 idata = _23.in_data[ident];
if (dot(idata, float4(1.0, 5.0, 6.0, 2.0)) > 8.19999980926513671875)
{
uint _52 = atomic_fetch_add_explicit((volatile device atomic_uint*)&(_48.counter), 1u, memory_order_relaxed);
_45.out_data[_52] = idata;
}
}

View File

@ -0,0 +1,47 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
// Implementation of the GLSL findLSB() function
template<typename T>
T findLSB(T x)
{
return select(ctz(x), T(-1), x == T(0));
}
// Implementation of the signed GLSL findMSB() function
template<typename T>
T findSMSB(T x)
{
T v = select(x, T(-1) - x, x < T(0));
return select(clz(T(0)) - (clz(v) + T(1)), T(-1), v == T(0));
}
// Implementation of the unsigned GLSL findMSB() function
template<typename T>
T findUMSB(T x)
{
return select(clz(T(0)) - (clz(x) + T(1)), T(-1), x == T(0));
}
kernel void main0()
{
int signed_value = 0;
uint unsigned_value = 0u;
int s = extract_bits(signed_value, 5, 20);
uint u = extract_bits(unsigned_value, 6, 21);
s = insert_bits(s, 40, 5, 4);
u = insert_bits(u, 60u, 5, 4);
u = reverse_bits(u);
s = reverse_bits(s);
int v0 = popcount(u);
int v1 = popcount(s);
int v2 = findUMSB(u);
int v3 = findSMSB(s);
int v4 = findLSB(u);
int v5 = findLSB(s);
}

View File

@ -0,0 +1,78 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
void out_test_0(thread const int& cond, thread int& i)
{
if (cond == 0)
{
i = 40;
}
else
{
i = 60;
}
}
void out_test_1(thread const int& cond, thread int& i)
{
switch (cond)
{
case 40:
{
i = 40;
break;
}
default:
{
i = 70;
break;
}
}
}
void inout_test_0(thread const int& cond, thread int& i)
{
if (cond == 0)
{
i = 40;
}
}
void inout_test_1(thread const int& cond, thread int& i)
{
switch (cond)
{
case 40:
{
i = 40;
break;
}
}
}
kernel void main0()
{
int cond = 40;
int i = 50;
int param = cond;
int param_1 = i;
out_test_0(param, param_1);
i = param_1;
int param_2 = cond;
int param_3 = i;
out_test_1(param_2, param_3);
i = param_3;
int param_4 = cond;
int param_5 = i;
inout_test_0(param_4, param_5);
i = param_5;
int param_6 = cond;
int param_7 = i;
inout_test_1(param_6, param_7);
i = param_7;
}

View File

@ -0,0 +1,15 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct SSBO
{
float4 value;
};
kernel void main0(device SSBO& _10 [[buffer(0)]])
{
_10.value = float4(20.0);
}

View File

@ -0,0 +1,15 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct SSBO
{
int4 value;
};
kernel void main0(device SSBO& _10 [[buffer(0)]], texture2d<int> uImage [[texture(0)]])
{
_10.value = uImage.read(uint2(int2(10)));
}

View File

@ -0,0 +1,36 @@
#pragma clang diagnostic ignored "-Wunused-variable"
#include <metal_stdlib>
#include <simd/simd.h>
#include <metal_atomic>
using namespace metal;
constant uint3 gl_WorkGroupSize = uint3(4u, 1u, 1u);
struct SSBO
{
float in_data[1];
};
struct SSBO2
{
float out_data[1];
};
struct SSBO3
{
uint count;
};
kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device SSBO& _22 [[buffer(0)]], device SSBO2& _38 [[buffer(1)]], device SSBO3& _41 [[buffer(2)]])
{
uint ident = gl_GlobalInvocationID.x;
float idata = _22.in_data[ident];
if (idata > 12.0)
{
uint _45 = atomic_fetch_add_explicit((volatile device atomic_uint*)&(_41.count), 1u, memory_order_relaxed);
_38.out_data[_45] = idata;
}
}

View File

@ -0,0 +1,29 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct SSBO
{
float4x4 mvp;
float4 in_data[1];
};
struct SSBO2
{
float4 out_data[1];
};
kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device SSBO& _28 [[buffer(0)]], device SSBO2& _52 [[buffer(1)]])
{
uint ident = gl_GlobalInvocationID.x;
int i = 0;
float4 idat = _28.in_data[ident];
do
{
idat = _28.mvp * idat;
i++;
} while (i < 16);
_52.out_data[ident] = idat;
}

View File

@ -0,0 +1,11 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
kernel void main0(texture2d<float> uImageIn [[texture(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], texture2d<float, access::write> uImageOut [[texture(1)]])
{
float4 v = uImageIn.read(uint2((int2(gl_GlobalInvocationID.xy) + int2(uImageIn.get_width(), uImageIn.get_height()))));
uImageOut.write(v, uint2(int2(gl_GlobalInvocationID.xy)));
}

View File

@ -0,0 +1,21 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct SSBO
{
float4 out_data[1];
};
kernel void main0(device SSBO& _27 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
{
float4 v;
v.x = 10.0;
v.y = 30.0;
v.z = 70.0;
v.w = 90.0;
_27.out_data[gl_GlobalInvocationID.x] = v;
_27.out_data[gl_GlobalInvocationID.x].y = 20.0;
}

View File

@ -0,0 +1,107 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct SSBO
{
float4x4 mvp;
float4 in_data[1];
};
struct SSBO2
{
float4 out_data[1];
};
kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device SSBO& _24 [[buffer(0)]], device SSBO2& _177 [[buffer(1)]])
{
uint ident = gl_GlobalInvocationID.x;
float4 idat = _24.in_data[ident];
int k = 0;
uint i = 0u;
if (idat.y == 20.0)
{
do
{
k *= 2;
i++;
} while (i < ident);
}
switch (k)
{
case 10:
{
for (;;)
{
i++;
if (i > 10u)
{
break;
}
continue;
}
break;
}
default:
{
for (;;)
{
i += 2u;
if (i > 20u)
{
break;
}
continue;
}
break;
}
}
while (k < 10)
{
idat *= 2.0;
k++;
}
for (uint i_1 = 0u; i_1 < 16u; i_1++, k++)
{
for (uint j = 0u; j < 30u; j++)
{
idat = _24.mvp * idat;
}
}
k = 0;
for (;;)
{
k++;
if (k > 10)
{
k += 2;
}
else
{
k += 3;
continue;
}
k += 10;
continue;
}
k = 0;
do
{
k++;
} while (k > 10);
int l = 0;
for (;;)
{
if (l == 5)
{
l++;
continue;
}
idat += float4(1.0);
l++;
continue;
}
_177.out_data[ident] = idat;
}

View File

@ -0,0 +1,16 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct SSBO2
{
float3x3 out_data[1];
};
kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device SSBO2& _22 [[buffer(0)]])
{
uint ident = gl_GlobalInvocationID.x;
_22.out_data[ident] = float3x3(float3(10.0), float3(20.0), float3(40.0));
}

View File

@ -0,0 +1,35 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct SSBO
{
float4 in_data[1];
};
struct SSBO2
{
float4 out_data[1];
};
// Implementation of the GLSL mod() function, which is slightly different than Metal fmod()
template<typename Tx, typename Ty>
Tx mod(Tx x, Ty y)
{
return x - y * floor(x / y);
}
kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device SSBO& _23 [[buffer(0)]], device SSBO2& _33 [[buffer(1)]])
{
uint ident = gl_GlobalInvocationID.x;
float4 v = mod(_23.in_data[ident], _33.out_data[ident]);
_33.out_data[ident] = v;
uint4 vu = as_type<uint4>(_23.in_data[ident]) % as_type<uint4>(_33.out_data[ident]);
_33.out_data[ident] = as_type<float4>(vu);
int4 vi = as_type<int4>(_23.in_data[ident]) % as_type<int4>(_33.out_data[ident]);
_33.out_data[ident] = as_type<float4>(vi);
}

View File

@ -0,0 +1,24 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct SSBO
{
float4 in_data[1];
};
struct SSBO2
{
float4 out_data[1];
};
kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device SSBO& _23 [[buffer(0)]], device SSBO2& _35 [[buffer(1)]])
{
uint ident = gl_GlobalInvocationID.x;
float4 i;
float4 _31 = modf(_23.in_data[ident], i);
float4 v = _31;
_35.out_data[ident] = v;
}

View File

@ -0,0 +1,36 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct SSBO2
{
float4 out_data[1];
};
kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device SSBO2& _27 [[buffer(0)]])
{
uint ident = gl_GlobalInvocationID.x;
if (ident == 2u)
{
_27.out_data[ident] = float4(20.0);
}
else
{
if (ident == 4u)
{
_27.out_data[ident] = float4(10.0);
return;
}
}
for (int i = 0; i < 20; i++)
{
if (i == 10)
{
break;
}
return;
}
_27.out_data[ident] = float4(10.0);
}

View File

@ -0,0 +1,29 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct SSBO
{
int a;
};
kernel void main0(device SSBO& _9 [[buffer(0)]])
{
_9.a += 10;
_9.a -= 10;
_9.a *= 10;
_9.a /= 10;
_9.a = _9.a << 2;
_9.a = _9.a >> 3;
_9.a &= 40;
_9.a ^= 10;
_9.a %= 40;
_9.a |= 1;
bool c = false;
bool d = true;
c = c && d;
d = d || c;
_9.a = int(c && d);
}

View File

@ -0,0 +1,27 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
constant uint3 gl_WorkGroupSize = uint3(4u, 1u, 1u);
struct SSBO
{
float in_data[1];
};
struct SSBO2
{
float out_data[1];
};
kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device SSBO& _22 [[buffer(0)]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]], device SSBO2& _44 [[buffer(1)]])
{
uint ident = gl_GlobalInvocationID.x;
float idata = _22.in_data[ident];
threadgroup float sShared[4];
sShared[gl_LocalInvocationIndex] = idata;
threadgroup_barrier(mem_flags::mem_threadgroup);
_44.out_data[ident] = sShared[(4u - gl_LocalInvocationIndex) - 1u];
}

View File

@ -0,0 +1,26 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct Foo
{
float4x4 m;
};
struct SSBO2
{
Foo out_data[1];
};
struct SSBO
{
Foo in_data[1];
};
kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device SSBO2& _23 [[buffer(0)]], device SSBO& _30 [[buffer(1)]])
{
uint ident = gl_GlobalInvocationID.x;
_23.out_data[ident].m = _30.in_data[ident].m * _30.in_data[ident].m;
}

View File

@ -0,0 +1,100 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct S0
{
float2 a[1];
float b;
};
struct S1
{
packed_float3 a;
float b;
};
struct S2
{
float3 a[1];
float b;
};
struct S3
{
float2 a;
float b;
};
struct S4
{
float2 c;
};
struct Content
{
S0 m0s[1];
S1 m1s[1];
S2 m2s[1];
S0 m0;
S1 m1;
S2 m2;
S3 m3;
char pad7[4];
float m4;
S4 m3s[8];
};
struct SSBO1
{
Content content;
Content content1[2];
Content content2;
char pad3[8];
float2x2 m0;
float2x2 m1;
float2x3 m2[4];
float3x2 m3;
float2x2 m4;
float2x2 m5[9];
float2x3 m6[4][2];
float3x2 m7;
float array[1];
};
struct SSBO0
{
Content content;
Content content1[2];
Content content2;
float array[1];
};
kernel void main0(device SSBO1& ssbo_430 [[buffer(0)]], device SSBO0& ssbo_140 [[buffer(1)]])
{
ssbo_430.content.m0s[0].a[0] = ssbo_140.content.m0s[0].a[0];
ssbo_430.content.m0s[0].b = ssbo_140.content.m0s[0].b;
ssbo_430.content.m1s[0].a = ssbo_140.content.m1s[0].a;
ssbo_430.content.m1s[0].b = ssbo_140.content.m1s[0].b;
ssbo_430.content.m2s[0].a[0] = ssbo_140.content.m2s[0].a[0];
ssbo_430.content.m2s[0].b = ssbo_140.content.m2s[0].b;
ssbo_430.content.m0.a[0] = ssbo_140.content.m0.a[0];
ssbo_430.content.m0.b = ssbo_140.content.m0.b;
ssbo_430.content.m1.a = ssbo_140.content.m1.a;
ssbo_430.content.m1.b = ssbo_140.content.m1.b;
ssbo_430.content.m2.a[0] = ssbo_140.content.m2.a[0];
ssbo_430.content.m2.b = ssbo_140.content.m2.b;
ssbo_430.content.m3.a = ssbo_140.content.m3.a;
ssbo_430.content.m3.b = ssbo_140.content.m3.b;
ssbo_430.content.m4 = ssbo_140.content.m4;
ssbo_430.content.m3s[0].c = ssbo_140.content.m3s[0].c;
ssbo_430.content.m3s[1].c = ssbo_140.content.m3s[1].c;
ssbo_430.content.m3s[2].c = ssbo_140.content.m3s[2].c;
ssbo_430.content.m3s[3].c = ssbo_140.content.m3s[3].c;
ssbo_430.content.m3s[4].c = ssbo_140.content.m3s[4].c;
ssbo_430.content.m3s[5].c = ssbo_140.content.m3s[5].c;
ssbo_430.content.m3s[6].c = ssbo_140.content.m3s[6].c;
ssbo_430.content.m3s[7].c = ssbo_140.content.m3s[7].c;
}

View File

@ -0,0 +1,51 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct SSBO
{
float4x4 mvp;
float4 in_data[1];
};
struct SSBO2
{
float4 out_data[1];
};
kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device SSBO& _24 [[buffer(0)]], device SSBO2& _89 [[buffer(1)]])
{
uint ident = gl_GlobalInvocationID.x;
float4 idat = _24.in_data[ident];
int k = 0;
for (;;)
{
int _39 = k;
int _40 = _39 + 1;
k = _40;
if (_40 < 10)
{
idat *= 2.0;
k++;
continue;
}
else
{
break;
}
}
for (uint i = 0u; i < 16u; i++, k++)
{
for (uint j = 0u; j < 30u; j++)
{
idat = _24.mvp * idat;
}
}
do
{
k++;
} while (k > 10);
_89.out_data[ident] = idat;
}

View File

@ -0,0 +1,53 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct S0
{
float4 a;
};
struct S1
{
float4 a;
};
struct SSBO0
{
S0 s0s[1];
};
struct SSBO1
{
S1 s1s[1];
};
struct SSBO2
{
float4 outputs[1];
};
float4 overload(thread const S0& s0)
{
return s0.a;
}
float4 overload(thread const S1& s1)
{
return s1.a;
}
kernel void main0(device SSBO0& _36 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device SSBO1& _55 [[buffer(1)]], device SSBO2& _66 [[buffer(2)]])
{
S0 s0;
s0.a = _36.s0s[gl_GlobalInvocationID.x].a;
S1 s1;
s1.a = _55.s1s[gl_GlobalInvocationID.x].a;
S0 param = s0;
S1 param_1 = s1;
_66.outputs[gl_GlobalInvocationID.x] = overload(param) + overload(param_1);
}

View File

@ -0,0 +1,20 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct SSBO2
{
uint outputs[1];
};
struct SSBO
{
uint inputs[1];
};
kernel void main0(device SSBO2& _10 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device SSBO& _23 [[buffer(1)]])
{
_10.outputs[gl_GlobalInvocationID.x] = _23.inputs[gl_GlobalInvocationID.x] / 29u;
}

View File

@ -5,7 +5,7 @@ using namespace metal;
struct Light
{
float3 Position;
packed_float3 Position;
float Radius;
float4 Color;
};

View File

@ -39,7 +39,7 @@ vertex main0_out main0(constant UBO& _22 [[buffer(0)]])
out.oA = _22.A;
out.oB = float4(_22.B0, _22.B1);
out.oC = float4(_22.C0, _22.C1);
out.oD = float4(float3(_22.D0), _22.D1);
out.oD = float4(_22.D0, _22.D1);
out.oE = float4(_22.E0, _22.E1, _22.E2, _22.E3);
out.oF = float4(_22.F0, _22.F1, _22.F2);
return out;

View File

@ -5,7 +5,7 @@ using namespace metal;
struct Light
{
float3 Position;
packed_float3 Position;
float Radius;
float4 Color;
};

View File

@ -5,7 +5,7 @@ using namespace metal;
struct Light
{
float3 Position;
packed_float3 Position;
float Radius;
float4 Color;
};

View File

@ -47,15 +47,15 @@ T degrees(T r)
template<typename T>
T findLSB(T x)
{
return select(ctz(x), -1, x == 0);
return select(ctz(x), T(-1), x == T(0));
}
// Implementation of the signed GLSL findMSB() function
template<typename T>
T findSMSB(T x)
{
T v = select(x, -1 - x, x < 0);
return select(clz(0) - (clz(v) + 1), -1, v == 0);
T v = select(x, T(-1) - x, x < T(0));
return select(clz(T(0)) - (clz(v) + T(1)), T(-1), v == T(0));
}
// Returns the determinant of a 2x2 matrix.

View File

@ -31,7 +31,7 @@ vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _18 [[buffer(0)]]
main0_out out = {};
out.gl_Position = _18.mvp * in.aVertex;
out.vNormal = in.aNormal;
out.vColor = float3(_18.color) * _18.opacity;
out.vColor = _18.color * _18.opacity;
out.vSize = _18.targSize * _18.opacity;
return out;
}

View File

@ -0,0 +1,79 @@
; SPIR-V
; Version: 1.0
; Generator: Khronos Glslang Reference Front End; 1
; Bound: 30
; Schema: 0
OpCapability Shader
%1 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %func "main"
OpExecutionMode %func LocalSize 1 1 1
OpSource ESSL 310
OpSourceExtension "GL_GOOGLE_cpp_style_line_directive"
OpSourceExtension "GL_GOOGLE_include_directive"
OpMemberDecorate %input_struct 0 Offset 0
OpMemberDecorate %input_struct 1 Offset 16
OpMemberDecorate %output_struct 0 Offset 0
OpMemberDecorate %output_struct 1 Offset 16
OpDecorate %input_struct BufferBlock
OpDecorate %inputs DescriptorSet 0
OpDecorate %inputs Binding 0
OpDecorate %inputs Restrict
OpDecorate %output_struct BufferBlock
OpDecorate %outputs DescriptorSet 0
OpDecorate %outputs Binding 1
OpDecorate %outputs Restrict
%void = OpTypeVoid
%main_func = OpTypeFunction %void
%uint = OpTypeInt 32 0
%uvec4 = OpTypeVector %uint 4
%int = OpTypeInt 32 1
%ivec4 = OpTypeVector %int 4
%ivec4_ptr = OpTypePointer Uniform %ivec4
%uvec4_ptr = OpTypePointer Uniform %uvec4
%zero = OpConstant %int 0
%one = OpConstant %int 1
%input_struct = OpTypeStruct %ivec4 %uvec4
%input_struct_ptr = OpTypePointer Uniform %input_struct
%inputs = OpVariable %input_struct_ptr Uniform
%output_struct = OpTypeStruct %uvec4 %ivec4
%output_struct_ptr = OpTypePointer Uniform %output_struct
%outputs = OpVariable %output_struct_ptr Uniform
%func = OpFunction %void None %main_func
%block = OpLabel
%input1_ptr = OpAccessChain %ivec4_ptr %inputs %zero
%input0_ptr = OpAccessChain %uvec4_ptr %inputs %one
%input1 = OpLoad %ivec4 %input1_ptr
%input0 = OpLoad %uvec4 %input0_ptr
%output_ptr_uvec4 = OpAccessChain %uvec4_ptr %outputs %zero
%output_ptr_ivec4 = OpAccessChain %ivec4_ptr %outputs %one
; Test all variants of IAdd
%result_iadd_0 = OpIAdd %uvec4 %input0 %input1
%result_iadd_1 = OpIAdd %uvec4 %input1 %input0
%result_iadd_2 = OpIAdd %uvec4 %input0 %input0
%result_iadd_3 = OpIAdd %uvec4 %input1 %input1
%result_iadd_4 = OpIAdd %ivec4 %input0 %input0
%result_iadd_5 = OpIAdd %ivec4 %input1 %input1
%result_iadd_6 = OpIAdd %ivec4 %input0 %input1
%result_iadd_7 = OpIAdd %ivec4 %input1 %input0
OpStore %output_ptr_uvec4 %result_iadd_0
OpStore %output_ptr_uvec4 %result_iadd_1
OpStore %output_ptr_uvec4 %result_iadd_2
OpStore %output_ptr_uvec4 %result_iadd_3
OpStore %output_ptr_ivec4 %result_iadd_4
OpStore %output_ptr_ivec4 %result_iadd_5
OpStore %output_ptr_ivec4 %result_iadd_6
OpStore %output_ptr_ivec4 %result_iadd_7
OpReturn
OpFunctionEnd

View File

@ -0,0 +1,77 @@
; SPIR-V
; Version: 1.0
; Generator: Khronos Glslang Reference Front End; 1
; Bound: 30
; Schema: 0
OpCapability Shader
%1 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %func "main"
OpExecutionMode %func LocalSize 1 1 1
OpSource ESSL 310
OpSourceExtension "GL_GOOGLE_cpp_style_line_directive"
OpSourceExtension "GL_GOOGLE_include_directive"
OpMemberDecorate %input_struct 0 Offset 0
OpMemberDecorate %input_struct 1 Offset 16
OpMemberDecorate %output_struct 0 Offset 0
OpMemberDecorate %output_struct 1 Offset 16
OpDecorate %input_struct BufferBlock
OpDecorate %inputs DescriptorSet 0
OpDecorate %inputs Binding 0
OpDecorate %output_struct BufferBlock
OpDecorate %outputs DescriptorSet 0
OpDecorate %outputs Binding 1
%void = OpTypeVoid
%main_func = OpTypeFunction %void
%uint = OpTypeInt 32 0
%uvec4 = OpTypeVector %uint 4
%int = OpTypeInt 32 1
%ivec4 = OpTypeVector %int 4
%ivec4_ptr = OpTypePointer Uniform %ivec4
%uvec4_ptr = OpTypePointer Uniform %uvec4
%zero = OpConstant %int 0
%one = OpConstant %int 1
%input_struct = OpTypeStruct %ivec4 %uvec4
%input_struct_ptr = OpTypePointer Uniform %input_struct
%inputs = OpVariable %input_struct_ptr Uniform
%output_struct = OpTypeStruct %uvec4 %ivec4
%output_struct_ptr = OpTypePointer Uniform %output_struct
%outputs = OpVariable %output_struct_ptr Uniform
%func = OpFunction %void None %main_func
%block = OpLabel
%input1_ptr = OpAccessChain %ivec4_ptr %inputs %zero
%input0_ptr = OpAccessChain %uvec4_ptr %inputs %one
%input1 = OpLoad %ivec4 %input1_ptr
%input0 = OpLoad %uvec4 %input0_ptr
%output_ptr_uvec4 = OpAccessChain %uvec4_ptr %outputs %zero
%output_ptr_ivec4 = OpAccessChain %ivec4_ptr %outputs %one
; Test all variants of ShiftRightArithmetic
%result_iadd_0 = OpShiftRightArithmetic %uvec4 %input0 %input1
%result_iadd_1 = OpShiftRightArithmetic %uvec4 %input1 %input0
%result_iadd_2 = OpShiftRightArithmetic %uvec4 %input0 %input0
%result_iadd_3 = OpShiftRightArithmetic %uvec4 %input1 %input1
%result_iadd_4 = OpShiftRightArithmetic %ivec4 %input0 %input0
%result_iadd_5 = OpShiftRightArithmetic %ivec4 %input1 %input1
%result_iadd_6 = OpShiftRightArithmetic %ivec4 %input0 %input1
%result_iadd_7 = OpShiftRightArithmetic %ivec4 %input1 %input0
OpStore %output_ptr_uvec4 %result_iadd_0
OpStore %output_ptr_uvec4 %result_iadd_1
OpStore %output_ptr_uvec4 %result_iadd_2
OpStore %output_ptr_uvec4 %result_iadd_3
OpStore %output_ptr_ivec4 %result_iadd_4
OpStore %output_ptr_ivec4 %result_iadd_5
OpStore %output_ptr_ivec4 %result_iadd_6
OpStore %output_ptr_ivec4 %result_iadd_7
OpReturn
OpFunctionEnd

View File

@ -0,0 +1,77 @@
; SPIR-V
; Version: 1.0
; Generator: Khronos Glslang Reference Front End; 1
; Bound: 30
; Schema: 0
OpCapability Shader
%1 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %func "main"
OpExecutionMode %func LocalSize 1 1 1
OpSource ESSL 310
OpSourceExtension "GL_GOOGLE_cpp_style_line_directive"
OpSourceExtension "GL_GOOGLE_include_directive"
OpMemberDecorate %input_struct 0 Offset 0
OpMemberDecorate %input_struct 1 Offset 16
OpMemberDecorate %output_struct 0 Offset 0
OpMemberDecorate %output_struct 1 Offset 16
OpDecorate %input_struct BufferBlock
OpDecorate %inputs DescriptorSet 0
OpDecorate %inputs Binding 0
OpDecorate %output_struct BufferBlock
OpDecorate %outputs DescriptorSet 0
OpDecorate %outputs Binding 1
%void = OpTypeVoid
%main_func = OpTypeFunction %void
%uint = OpTypeInt 32 0
%uvec4 = OpTypeVector %uint 4
%int = OpTypeInt 32 1
%ivec4 = OpTypeVector %int 4
%ivec4_ptr = OpTypePointer Uniform %ivec4
%uvec4_ptr = OpTypePointer Uniform %uvec4
%zero = OpConstant %int 0
%one = OpConstant %int 1
%input_struct = OpTypeStruct %ivec4 %uvec4
%input_struct_ptr = OpTypePointer Uniform %input_struct
%inputs = OpVariable %input_struct_ptr Uniform
%output_struct = OpTypeStruct %uvec4 %ivec4
%output_struct_ptr = OpTypePointer Uniform %output_struct
%outputs = OpVariable %output_struct_ptr Uniform
%func = OpFunction %void None %main_func
%block = OpLabel
%input1_ptr = OpAccessChain %ivec4_ptr %inputs %zero
%input0_ptr = OpAccessChain %uvec4_ptr %inputs %one
%input1 = OpLoad %ivec4 %input1_ptr
%input0 = OpLoad %uvec4 %input0_ptr
%output_ptr_uvec4 = OpAccessChain %uvec4_ptr %outputs %zero
%output_ptr_ivec4 = OpAccessChain %ivec4_ptr %outputs %one
; Test all variants of SDiv
%result_iadd_0 = OpSDiv %uvec4 %input0 %input1
%result_iadd_1 = OpSDiv %uvec4 %input1 %input0
%result_iadd_2 = OpSDiv %uvec4 %input0 %input0
%result_iadd_3 = OpSDiv %uvec4 %input1 %input1
%result_iadd_4 = OpSDiv %ivec4 %input0 %input0
%result_iadd_5 = OpSDiv %ivec4 %input1 %input1
%result_iadd_6 = OpSDiv %ivec4 %input0 %input1
%result_iadd_7 = OpSDiv %ivec4 %input1 %input0
OpStore %output_ptr_uvec4 %result_iadd_0
OpStore %output_ptr_uvec4 %result_iadd_1
OpStore %output_ptr_uvec4 %result_iadd_2
OpStore %output_ptr_uvec4 %result_iadd_3
OpStore %output_ptr_ivec4 %result_iadd_4
OpStore %output_ptr_ivec4 %result_iadd_5
OpStore %output_ptr_ivec4 %result_iadd_6
OpStore %output_ptr_ivec4 %result_iadd_7
OpReturn
OpFunctionEnd

View File

@ -0,0 +1,77 @@
; SPIR-V
; Version: 1.0
; Generator: Khronos Glslang Reference Front End; 1
; Bound: 30
; Schema: 0
OpCapability Shader
%1 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %func "main"
OpExecutionMode %func LocalSize 1 1 1
OpSource ESSL 310
OpSourceExtension "GL_GOOGLE_cpp_style_line_directive"
OpSourceExtension "GL_GOOGLE_include_directive"
OpMemberDecorate %input_struct 0 Offset 0
OpMemberDecorate %input_struct 1 Offset 16
OpMemberDecorate %output_struct 0 Offset 0
OpMemberDecorate %output_struct 1 Offset 16
OpDecorate %input_struct BufferBlock
OpDecorate %inputs DescriptorSet 0
OpDecorate %inputs Binding 0
OpDecorate %output_struct BufferBlock
OpDecorate %outputs DescriptorSet 0
OpDecorate %outputs Binding 1
%void = OpTypeVoid
%main_func = OpTypeFunction %void
%uint = OpTypeInt 32 0
%uvec4 = OpTypeVector %uint 4
%int = OpTypeInt 32 1
%ivec4 = OpTypeVector %int 4
%ivec4_ptr = OpTypePointer Uniform %ivec4
%uvec4_ptr = OpTypePointer Uniform %uvec4
%zero = OpConstant %int 0
%one = OpConstant %int 1
%input_struct = OpTypeStruct %ivec4 %uvec4
%input_struct_ptr = OpTypePointer Uniform %input_struct
%inputs = OpVariable %input_struct_ptr Uniform
%output_struct = OpTypeStruct %uvec4 %ivec4
%output_struct_ptr = OpTypePointer Uniform %output_struct
%outputs = OpVariable %output_struct_ptr Uniform
%func = OpFunction %void None %main_func
%block = OpLabel
%input1_ptr = OpAccessChain %ivec4_ptr %inputs %zero
%input0_ptr = OpAccessChain %uvec4_ptr %inputs %one
%input1 = OpLoad %ivec4 %input1_ptr
%input0 = OpLoad %uvec4 %input0_ptr
%output_ptr_uvec4 = OpAccessChain %uvec4_ptr %outputs %zero
%output_ptr_ivec4 = OpAccessChain %ivec4_ptr %outputs %one
; Test all variants of ShiftRightLogical
%result_iadd_0 = OpShiftRightLogical %uvec4 %input0 %input1
%result_iadd_1 = OpShiftRightLogical %uvec4 %input1 %input0
%result_iadd_2 = OpShiftRightLogical %uvec4 %input0 %input0
%result_iadd_3 = OpShiftRightLogical %uvec4 %input1 %input1
%result_iadd_4 = OpShiftRightLogical %ivec4 %input0 %input0
%result_iadd_5 = OpShiftRightLogical %ivec4 %input1 %input1
%result_iadd_6 = OpShiftRightLogical %ivec4 %input0 %input1
%result_iadd_7 = OpShiftRightLogical %ivec4 %input1 %input0
OpStore %output_ptr_uvec4 %result_iadd_0
OpStore %output_ptr_uvec4 %result_iadd_1
OpStore %output_ptr_uvec4 %result_iadd_2
OpStore %output_ptr_uvec4 %result_iadd_3
OpStore %output_ptr_ivec4 %result_iadd_4
OpStore %output_ptr_ivec4 %result_iadd_5
OpStore %output_ptr_ivec4 %result_iadd_6
OpStore %output_ptr_ivec4 %result_iadd_7
OpReturn
OpFunctionEnd

View File

@ -0,0 +1,97 @@
; SPIR-V
; Version: 1.0
; Generator: Khronos Glslang Reference Front End; 1
; Bound: 30
; Schema: 0
OpCapability Shader
%1 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %func_alt "main2" %frag_in %frag_out
OpEntryPoint GLCompute %func "main"
OpExecutionMode %func LocalSize 1 1 1
OpSource ESSL 310
OpSourceExtension "GL_GOOGLE_cpp_style_line_directive"
OpSourceExtension "GL_GOOGLE_include_directive"
OpMemberDecorate %input_struct 0 Offset 0
OpMemberDecorate %input_struct 1 Offset 16
OpMemberDecorate %output_struct 0 Offset 0
OpMemberDecorate %output_struct 1 Offset 16
OpDecorate %input_struct BufferBlock
OpDecorate %inputs DescriptorSet 0
OpDecorate %inputs Binding 0
OpDecorate %inputs Restrict
OpDecorate %output_struct BufferBlock
OpDecorate %outputs DescriptorSet 0
OpDecorate %outputs Binding 1
OpDecorate %outputs Restrict
OpDecorate %frag_in Location 0
OpDecorate %frag_out Location 0
%void = OpTypeVoid
%main_func = OpTypeFunction %void
%uint = OpTypeInt 32 0
%uvec4 = OpTypeVector %uint 4
%int = OpTypeInt 32 1
%ivec4 = OpTypeVector %int 4
%ivec4_ptr = OpTypePointer Uniform %ivec4
%uvec4_ptr = OpTypePointer Uniform %uvec4
%float = OpTypeFloat 32
%vec4 = OpTypeVector %float 4
%vec4_input_ptr = OpTypePointer Input %vec4
%vec4_output_ptr = OpTypePointer Output %vec4
%zero = OpConstant %int 0
%one = OpConstant %int 1
%input_struct = OpTypeStruct %ivec4 %uvec4
%input_struct_ptr = OpTypePointer Uniform %input_struct
%inputs = OpVariable %input_struct_ptr Uniform
%output_struct = OpTypeStruct %uvec4 %ivec4
%output_struct_ptr = OpTypePointer Uniform %output_struct
%outputs = OpVariable %output_struct_ptr Uniform
%frag_in = OpVariable %vec4_input_ptr Input
%frag_out = OpVariable %vec4_output_ptr Output
%func = OpFunction %void None %main_func
%block = OpLabel
%input1_ptr = OpAccessChain %ivec4_ptr %inputs %zero
%input0_ptr = OpAccessChain %uvec4_ptr %inputs %one
%input1 = OpLoad %ivec4 %input1_ptr
%input0 = OpLoad %uvec4 %input0_ptr
%output_ptr_uvec4 = OpAccessChain %uvec4_ptr %outputs %zero
%output_ptr_ivec4 = OpAccessChain %ivec4_ptr %outputs %one
; Test all variants of IAdd
%result_iadd_0 = OpIAdd %uvec4 %input0 %input1
%result_iadd_1 = OpIAdd %uvec4 %input1 %input0
%result_iadd_2 = OpIAdd %uvec4 %input0 %input0
%result_iadd_3 = OpIAdd %uvec4 %input1 %input1
%result_iadd_4 = OpIAdd %ivec4 %input0 %input0
%result_iadd_5 = OpIAdd %ivec4 %input1 %input1
%result_iadd_6 = OpIAdd %ivec4 %input0 %input1
%result_iadd_7 = OpIAdd %ivec4 %input1 %input0
OpStore %output_ptr_uvec4 %result_iadd_0
OpStore %output_ptr_uvec4 %result_iadd_1
OpStore %output_ptr_uvec4 %result_iadd_2
OpStore %output_ptr_uvec4 %result_iadd_3
OpStore %output_ptr_ivec4 %result_iadd_4
OpStore %output_ptr_ivec4 %result_iadd_5
OpStore %output_ptr_ivec4 %result_iadd_6
OpStore %output_ptr_ivec4 %result_iadd_7
OpReturn
OpFunctionEnd
%func_alt = OpFunction %void None %main_func
%block_alt = OpLabel
%frag_input_value = OpLoad %vec4 %frag_in
OpStore %frag_out %frag_input_value
OpReturn
OpFunctionEnd

View File

@ -0,0 +1,67 @@
; SPIR-V
; Version: 1.0
; Generator: Khronos Glslang Reference Front End; 1
; Bound: 38
; Schema: 0
OpCapability Shader
%1 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %4 "main"
OpExecutionMode %4 LocalSize 1 1 1
OpSource ESSL 310
OpName %4 "main"
OpName %10 "SSBO0"
OpMemberName %10 0 "scalar"
OpMemberName %10 1 "vec2_val"
OpMemberName %10 2 "vec3_val"
OpMemberName %10 3 "vec4_val"
OpName %12 ""
OpMemberDecorate %10 0 Offset 0
OpMemberDecorate %10 1 Offset 8
OpMemberDecorate %10 2 Offset 16
OpMemberDecorate %10 3 Offset 32
OpDecorate %10 BufferBlock
OpDecorate %12 DescriptorSet 0
OpDecorate %12 Binding 0
%2 = OpTypeVoid
%3 = OpTypeFunction %2
%6 = OpTypeFloat 32
%7 = OpTypeVector %6 2
%8 = OpTypeVector %6 3
%9 = OpTypeVector %6 4
%10 = OpTypeStruct %6 %7 %8 %9
%11 = OpTypePointer Uniform %10
%12 = OpVariable %11 Uniform
%13 = OpTypeInt 32 1
%14 = OpConstant %13 0
%15 = OpTypePointer Uniform %6
%20 = OpConstant %13 1
%21 = OpTypePointer Uniform %7
%26 = OpConstant %13 2
%27 = OpTypePointer Uniform %8
%32 = OpConstant %13 3
%33 = OpTypePointer Uniform %9
%4 = OpFunction %2 None %3
%5 = OpLabel
%16 = OpAccessChain %15 %12 %14
%17 = OpLoad %6 %16
%18 = OpQuantizeToF16 %6 %17
%19 = OpAccessChain %15 %12 %14
OpStore %19 %18
%22 = OpAccessChain %21 %12 %20
%23 = OpLoad %7 %22
%24 = OpQuantizeToF16 %7 %23
%25 = OpAccessChain %21 %12 %20
OpStore %25 %24
%28 = OpAccessChain %27 %12 %26
%29 = OpLoad %8 %28
%30 = OpQuantizeToF16 %8 %29
%31 = OpAccessChain %27 %12 %26
OpStore %31 %30
%34 = OpAccessChain %33 %12 %32
%35 = OpLoad %9 %34
%36 = OpQuantizeToF16 %9 %35
%37 = OpAccessChain %33 %12 %32
OpStore %37 %36
OpReturn
OpFunctionEnd

View File

@ -0,0 +1,55 @@
#version 310 es
layout(local_size_x = 8, local_size_y = 8) in;
layout(binding = 0) uniform sampler2D uHeight;
layout(binding = 1) uniform sampler2D uDisplacement;
layout(rgba16f, binding = 2) uniform writeonly mediump image2D iHeightDisplacement;
layout(rgba16f, binding = 3) uniform writeonly mediump image2D iGradJacobian;
layout(binding = 4) uniform UBO
{
vec4 uInvSize;
vec4 uScale;
};
mediump float jacobian(mediump vec2 dDdx, mediump vec2 dDdy)
{
return (1.0 + dDdx.x) * (1.0 + dDdy.y) - dDdx.y * dDdy.x;
}
#define LAMBDA 1.2
void main()
{
vec4 uv = (vec2(gl_GlobalInvocationID.xy) * uInvSize.xy).xyxy + 0.5 * uInvSize;
float h = textureLod(uHeight, uv.xy, 0.0).x;
// Compute the heightmap gradient by simple differentiation.
float x0 = textureLodOffset(uHeight, uv.xy, 0.0, ivec2(-1, 0)).x;
float x1 = textureLodOffset(uHeight, uv.xy, 0.0, ivec2(+1, 0)).x;
float y0 = textureLodOffset(uHeight, uv.xy, 0.0, ivec2(0, -1)).x;
float y1 = textureLodOffset(uHeight, uv.xy, 0.0, ivec2(0, +1)).x;
vec2 grad = uScale.xy * 0.5 * vec2(x1 - x0, y1 - y0);
// Displacement map must be sampled with a different offset since it's a smaller texture.
vec2 displacement = LAMBDA * textureLod(uDisplacement, uv.zw, 0.0).xy;
// Compute jacobian.
vec2 dDdx = 0.5 * LAMBDA * (
textureLodOffset(uDisplacement, uv.zw, 0.0, ivec2(+1, 0)).xy -
textureLodOffset(uDisplacement, uv.zw, 0.0, ivec2(-1, 0)).xy);
vec2 dDdy = 0.5 * LAMBDA * (
textureLodOffset(uDisplacement, uv.zw, 0.0, ivec2(0, +1)).xy -
textureLodOffset(uDisplacement, uv.zw, 0.0, ivec2(0, -1)).xy);
float j = jacobian(dDdx * uScale.z, dDdy * uScale.z);
displacement = vec2(0.0);
// Read by vertex shader/tess shader.
imageStore(iHeightDisplacement, ivec2(gl_GlobalInvocationID.xy), vec4(h, displacement, 0.0));
// Read by fragment shader.
imageStore(iGradJacobian, ivec2(gl_GlobalInvocationID.xy), vec4(grad, j, 0.0));
}

View File

@ -0,0 +1,28 @@
#version 310 es
layout(local_size_x = 1) in;
layout(std430, binding = 0) readonly buffer SSBO
{
vec4 in_data[];
};
layout(std430, binding = 1) writeonly buffer SSBO2
{
vec4 out_data[];
};
layout(std430, binding = 2) buffer SSBO3
{
uint counter;
};
void main()
{
uint ident = gl_GlobalInvocationID.x;
vec4 idata = in_data[ident];
if (dot(idata, vec4(1.0, 5.0, 6.0, 2.0)) > 8.2)
{
out_data[atomicAdd(counter, 1u)] = idata;
}
}

View File

@ -0,0 +1,23 @@
#version 310 es
void main()
{
int signed_value = 0;
uint unsigned_value = 0u;
int s = bitfieldExtract(signed_value, 5, 20);
uint u = bitfieldExtract(unsigned_value, 6, 21);
s = bitfieldInsert(s, 40, 5, 4);
u = bitfieldInsert(u, 60u, 5, 4);
u = bitfieldReverse(u);
s = bitfieldReverse(s);
int v0 = bitCount(u);
int v1 = bitCount(s);
int v2 = findMSB(u);
int v3 = findMSB(s);
int v4 = findLSB(u);
int v5 = findLSB(s);
}

View File

@ -0,0 +1,54 @@
#version 310 es
// We write in all paths (and no reads), so should just be out.
void out_test_0(int cond, inout int i)
{
if (cond == 0)
i = 40;
else
i = 60;
}
// We write in all paths (and no reads), so should just be out.
void out_test_1(int cond, inout int i)
{
switch (cond)
{
case 40:
i = 40;
break;
default:
i = 70;
break;
}
}
// We don't write in all paths, so should be inout.
void inout_test_0(int cond, inout int i)
{
if (cond == 0)
i = 40;
}
void inout_test_1(int cond, inout int i)
{
switch (cond)
{
case 40:
i = 40;
break;
}
}
void main()
{
int cond = 40;
int i = 50;
out_test_0(cond, i);
out_test_1(cond, i);
inout_test_0(cond, i);
inout_test_1(cond, i);
}

View File

@ -0,0 +1,12 @@
#version 310 es
layout(local_size_x = 1) in;
layout(binding = 1) coherent restrict writeonly buffer SSBO
{
vec4 value;
};
void main()
{
value = vec4(20.0);
}

View File

@ -0,0 +1,14 @@
#version 310 es
layout(local_size_x = 1) in;
layout(binding = 1) coherent restrict writeonly buffer SSBO
{
ivec4 value;
};
layout(r32i, binding = 3) coherent readonly restrict uniform mediump iimage2D uImage;
void main()
{
value = imageLoad(uImage, ivec2(10));
}

View File

@ -0,0 +1,26 @@
#version 310 es
layout(local_size_x = 4) in;
layout(std430, binding = 0) readonly buffer SSBO
{
float in_data[];
};
layout(std430, binding = 1) writeonly buffer SSBO2
{
float out_data[];
};
layout(std430, binding = 2) buffer SSBO3
{
uint count;
};
void main()
{
uint ident = gl_GlobalInvocationID.x;
float idata = in_data[ident];
if (idata > 12.0)
out_data[atomicAdd(count, 1u)] = idata;
}

View File

@ -0,0 +1,31 @@
#version 310 es
layout(local_size_x = 1) in;
layout(std430, binding = 0) readonly buffer SSBO
{
mat4 mvp;
vec4 in_data[];
};
layout(std430, binding = 1) writeonly buffer SSBO2
{
vec4 out_data[];
};
int i;
void main()
{
uint ident = gl_GlobalInvocationID.x;
i = 0;
vec4 idat = in_data[ident];
do
{
idat = mvp * idat;
i++;
} while(i < 16);
out_data[ident] = idat;
}

View File

@ -0,0 +1,12 @@
#version 310 es
layout(local_size_x = 1) in;
layout(rgba8, binding = 0) uniform readonly mediump image2D uImageIn;
layout(rgba8, binding = 1) uniform writeonly mediump image2D uImageOut;
void main()
{
vec4 v = imageLoad(uImageIn, ivec2(gl_GlobalInvocationID.xy) + imageSize(uImageIn));
imageStore(uImageOut, ivec2(gl_GlobalInvocationID.xy), v);
}

View File

@ -0,0 +1,18 @@
#version 310 es
layout(local_size_x = 1) in;
layout(std430, binding = 0) writeonly buffer SSBO
{
vec4 out_data[];
};
void main()
{
vec4 v;
v.x = 10.0;
v.y = 30.0;
v.z = 70.0;
v.w = 90.0;
out_data[gl_GlobalInvocationID.x] = v;
out_data[gl_GlobalInvocationID.x].y = 20.0;
}

View File

@ -0,0 +1,98 @@
#version 310 es
layout(local_size_x = 1) in;
layout(std430, binding = 0) readonly buffer SSBO
{
mat4 mvp;
vec4 in_data[];
};
layout(std430, binding = 1) writeonly buffer SSBO2
{
vec4 out_data[];
};
void main()
{
uint ident = gl_GlobalInvocationID.x;
vec4 idat = in_data[ident];
int k = 0;
uint i = 0u;
if (idat.y == 20.0)
{
do
{
k = k * 2;
i++;
} while (i < ident);
}
switch (k)
{
case 10:
for (;;)
{
i++;
if (i > 10u)
break;
}
break;
default:
for (;;)
{
i += 2u;
if (i > 20u)
break;
}
break;
}
while (k < 10)
{
idat *= 2.0;
k++;
}
for (uint i = 0u; i < 16u; i++, k++)
for (uint j = 0u; j < 30u; j++)
idat = mvp * idat;
k = 0;
for (;;)
{
k++;
if (k > 10)
{
k += 2;
}
else
{
k += 3;
continue;
}
k += 10;
}
k = 0;
do
{
k++;
} while (k > 10);
int l = 0;
for (;; l++)
{
if (l == 5)
{
continue;
}
idat += 1.0;
}
out_data[ident] = idat;
}

View File

@ -0,0 +1,14 @@
#version 310 es
layout(local_size_x = 1) in;
layout(std430, binding = 1) writeonly buffer SSBO2
{
mat3 out_data[];
};
void main()
{
uint ident = gl_GlobalInvocationID.x;
out_data[ident] = mat3(vec3(10.0), vec3(20.0), vec3(40.0));
}

26
shaders-msl/comp/mod.comp Normal file
View File

@ -0,0 +1,26 @@
#version 310 es
layout(local_size_x = 1) in;
layout(std430, binding = 0) readonly buffer SSBO
{
vec4 in_data[];
};
layout(std430, binding = 1) writeonly buffer SSBO2
{
vec4 out_data[];
};
void main()
{
uint ident = gl_GlobalInvocationID.x;
vec4 v = mod(in_data[ident], out_data[ident]);
out_data[ident] = v;
uvec4 vu = floatBitsToUint(in_data[ident]) % floatBitsToUint(out_data[ident]);
out_data[ident] = uintBitsToFloat(vu);
ivec4 vi = floatBitsToInt(in_data[ident]) % floatBitsToInt(out_data[ident]);
out_data[ident] = intBitsToFloat(vi);
}

View File

@ -0,0 +1,23 @@
#version 310 es
layout(local_size_x = 1) in;
layout(std430, binding = 0) readonly buffer SSBO
{
vec4 in_data[];
};
layout(std430, binding = 1) writeonly buffer SSBO2
{
vec4 out_data[];
};
void main()
{
uint ident = gl_GlobalInvocationID.x;
vec4 i;
//vec4 v = frexp(in_data[ident], i);
//out_data[ident] = ldexp(v, i);
vec4 v = modf(in_data[ident], i);
out_data[ident] = v;
}

View File

@ -0,0 +1,33 @@
#version 310 es
layout(local_size_x = 1) in;
layout(std430, binding = 1) writeonly buffer SSBO2
{
vec4 out_data[];
};
void main()
{
uint ident = gl_GlobalInvocationID.x;
if (ident == 2u)
{
out_data[ident] = vec4(20.0);
}
else if (ident == 4u)
{
out_data[ident] = vec4(10.0);
return;
}
for (int i = 0; i < 20; i++)
{
if (i == 10)
break;
return;
}
out_data[ident] = vec4(10.0);
}

View File

@ -0,0 +1,27 @@
#version 310 es
layout(local_size_x = 1) in;
layout(std430, binding = 0) buffer SSBO
{
int a;
};
void main()
{
a += 10;
a -= 10;
a *= 10;
a /= 10;
a <<= 2;
a >>= 3;
a &= 40;
a ^= 10;
a %= 40;
a |= 1;
bool c = false;
bool d = true;
c = c && d;
d = d || c;
a = c && d ? 1 : 0;
}

View File

@ -0,0 +1,27 @@
#version 310 es
layout(local_size_x = 4) in;
shared float sShared[gl_WorkGroupSize.x];
layout(std430, binding = 0) readonly buffer SSBO
{
float in_data[];
};
layout(std430, binding = 1) writeonly buffer SSBO2
{
float out_data[];
};
void main()
{
uint ident = gl_GlobalInvocationID.x;
float idata = in_data[ident];
sShared[gl_LocalInvocationIndex] = idata;
memoryBarrierShared();
barrier();
out_data[ident] = sShared[gl_WorkGroupSize.x - gl_LocalInvocationIndex - 1u];
}

View File

@ -0,0 +1,24 @@
#version 310 es
layout(local_size_x = 1) in;
struct Foo
{
mat4 m;
};
layout(std430, binding = 0) readonly buffer SSBO
{
Foo in_data[];
};
layout(std430, binding = 1) writeonly buffer SSBO2
{
Foo out_data[];
};
void main()
{
uint ident = gl_GlobalInvocationID.x;
out_data[ident].m = in_data[ident].m * in_data[ident].m;
}

View File

@ -0,0 +1,76 @@
#version 310 es
layout(local_size_x = 1) in;
struct S0
{
vec2 a[1];
float b;
};
struct S1
{
vec3 a;
float b;
};
struct S2
{
vec3 a[1];
float b;
};
struct S3
{
vec2 a;
float b;
};
struct S4
{
vec2 c;
};
struct Content
{
S0 m0s[1];
S1 m1s[1];
S2 m2s[1];
S0 m0;
S1 m1;
S2 m2;
S3 m3;
float m4;
S4 m3s[8];
};
layout(binding = 1, std430) buffer SSBO1
{
Content content;
Content content1[2];
Content content2;
layout(column_major) mat2 m0;
layout(column_major) mat2 m1;
layout(column_major) mat2x3 m2[4];
layout(column_major) mat3x2 m3;
layout(row_major) mat2 m4;
layout(row_major) mat2 m5[9];
layout(row_major) mat2x3 m6[4][2];
layout(row_major) mat3x2 m7;
float array[];
} ssbo_430;
layout(binding = 0, std140) buffer SSBO0
{
Content content;
Content content1[2];
Content content2;
float array[];
} ssbo_140;
void main()
{
ssbo_430.content = ssbo_140.content;
}

View File

@ -0,0 +1,40 @@
#version 310 es
layout(local_size_x = 1) in;
layout(std430, binding = 0) readonly buffer SSBO
{
mat4 mvp;
vec4 in_data[];
};
layout(std430, binding = 1) writeonly buffer SSBO2
{
vec4 out_data[];
};
void main()
{
uint ident = gl_GlobalInvocationID.x;
vec4 idat = in_data[ident];
int k = 0;
// Continue with side effects.
while (++k < 10)
{
idat *= 2.0;
k++;
}
// Again used here ...
for (uint i = 0u; i < 16u; i++, k++)
for (uint j = 0u; j < 30u; j++)
idat = mvp * idat;
do
{
k++;
} while (k > 10);
out_data[ident] = idat;
}

View File

@ -0,0 +1,45 @@
#version 310 es
layout(local_size_x = 1) in;
struct S0
{
vec4 a;
};
struct S1
{
vec4 a;
};
vec4 overload(S0 s0)
{
return s0.a;
}
vec4 overload(S1 s1)
{
return s1.a;
}
layout(std430, binding = 0) buffer SSBO0
{
S0 s0s[];
};
layout(std430, binding = 1) buffer SSBO1
{
S1 s1s[];
};
layout(std430, binding = 2) buffer SSBO2
{
vec4 outputs[];
};
void main()
{
S0 s0 = s0s[gl_GlobalInvocationID.x];
S1 s1 = s1s[gl_GlobalInvocationID.x];
outputs[gl_GlobalInvocationID.x] = overload(s0) + overload(s1);
}

View File

@ -0,0 +1,17 @@
#version 310 es
layout(local_size_x = 1) in;
layout(std430, binding = 0) buffer SSBO
{
uint inputs[];
};
layout(std430, binding = 0) buffer SSBO2
{
uint outputs[];
};
void main()
{
outputs[gl_GlobalInvocationID.x] = inputs[gl_GlobalInvocationID.x] / 29u;
}

View File

@ -341,9 +341,10 @@ struct SPIRExtension : IVariant
// so in order to avoid conflicts, we can't stick them in the ids array.
struct SPIREntryPoint
{
SPIREntryPoint(uint32_t self_, spv::ExecutionModel execution_model, std::string entry_name)
SPIREntryPoint(uint32_t self_, spv::ExecutionModel execution_model, const std::string &entry_name)
: self(self_)
, name(std::move(entry_name))
, name(entry_name)
, orig_name(entry_name)
, model(execution_model)
{
}
@ -351,6 +352,7 @@ struct SPIREntryPoint
uint32_t self = 0;
std::string name;
std::string orig_name;
std::vector<uint32_t> interface_variables;
uint64_t flags = 0;
@ -858,7 +860,9 @@ struct SPIRConstant : IVariant
uint32_t constant_type;
ConstantMatrix m;
bool specialization = false; // If the constant is a specialization constant (i.e. created with OpSpecConstant*).
bool specialization = false; // If this constant is a specialization constant (i.e. created with OpSpecConstant*).
bool is_used_as_array_length =
false; // If this constant is used as an array length which creates specialization restrictions on some backends.
// For composites which are constant arrays, etc.
std::vector<uint32_t> subconstants;
@ -916,6 +920,10 @@ public:
{
return type;
}
uint32_t get_id() const
{
return holder ? holder->self : 0;
}
bool empty() const
{
return !holder;

View File

@ -171,9 +171,8 @@ void CompilerCPP::emit_resources()
auto &type = get<SPIRType>(var.basetype);
if (var.storage != StorageClassFunction && type.pointer && type.storage == StorageClassUniform &&
!is_hidden_variable(var) &&
(meta[type.self].decoration.decoration_flags &
((1ull << DecorationBlock) | (1ull << DecorationBufferBlock))))
!is_hidden_variable(var) && (meta[type.self].decoration.decoration_flags &
((1ull << DecorationBlock) | (1ull << DecorationBufferBlock))))
{
emit_buffer_block(var);
}

View File

@ -168,7 +168,7 @@ bool Compiler::block_is_pure(const SPIRBlock &block)
case OpMemoryBarrier:
return false;
// OpExtInst is potentially impure depending on extension, but GLSL builtins are at least pure.
// OpExtInst is potentially impure depending on extension, but GLSL builtins are at least pure.
default:
break;
@ -457,7 +457,7 @@ bool Compiler::is_hidden_variable(const SPIRVariable &var, bool include_builtins
// Combined image samplers are always considered active as they are "magic" variables.
if (find_if(begin(combined_image_samplers), end(combined_image_samplers), [&var](const CombinedImageSampler &samp) {
return samp.combined_id == var.self;
}) != end(combined_image_samplers))
}) != end(combined_image_samplers))
{
return false;
}
@ -1498,18 +1498,21 @@ void Compiler::parse(const Instruction &instruction)
case OpTypeArray:
{
uint32_t id = ops[0];
auto &base = get<SPIRType>(ops[1]);
auto &arraybase = set<SPIRType>(id);
arraybase = base;
uint32_t tid = ops[1];
auto &base = get<SPIRType>(tid);
auto *c = maybe_get<SPIRConstant>(ops[2]);
arraybase = base;
arraybase.parent_type = tid;
uint32_t cid = ops[2];
mark_used_as_array_length(cid);
auto *c = maybe_get<SPIRConstant>(cid);
bool literal = c && !c->specialization;
arraybase.array_size_literal.push_back(literal);
arraybase.array.push_back(literal ? c->scalar() : ops[2]);
arraybase.parent_type = ops[1];
arraybase.array.push_back(literal ? c->scalar() : cid);
// Do NOT set arraybase.self!
break;
}
@ -2523,7 +2526,7 @@ vector<string> Compiler::get_entry_points() const
{
vector<string> entries;
for (auto &entry : entry_points)
entries.push_back(entry.second.name);
entries.push_back(entry.second.orig_name);
return entries;
}
@ -2536,8 +2539,9 @@ void Compiler::set_entry_point(const std::string &name)
SPIREntryPoint &Compiler::get_entry_point(const std::string &name)
{
auto itr =
find_if(begin(entry_points), end(entry_points),
[&](const std::pair<uint32_t, SPIREntryPoint> &entry) -> bool { return entry.second.name == name; });
find_if(begin(entry_points), end(entry_points), [&](const std::pair<uint32_t, SPIREntryPoint> &entry) -> bool {
return entry.second.orig_name == name;
});
if (itr == end(entry_points))
SPIRV_CROSS_THROW("Entry point does not exist.");
@ -2548,8 +2552,9 @@ SPIREntryPoint &Compiler::get_entry_point(const std::string &name)
const SPIREntryPoint &Compiler::get_entry_point(const std::string &name) const
{
auto itr =
find_if(begin(entry_points), end(entry_points),
[&](const std::pair<uint32_t, SPIREntryPoint> &entry) -> bool { return entry.second.name == name; });
find_if(begin(entry_points), end(entry_points), [&](const std::pair<uint32_t, SPIREntryPoint> &entry) -> bool {
return entry.second.orig_name == name;
});
if (itr == end(entry_points))
SPIRV_CROSS_THROW("Entry point does not exist.");
@ -2557,6 +2562,11 @@ const SPIREntryPoint &Compiler::get_entry_point(const std::string &name) const
return itr->second;
}
const string &Compiler::get_cleansed_entry_point_name(const std::string &name) const
{
return get_entry_point(name).name;
}
const SPIREntryPoint &Compiler::get_entry_point() const
{
return entry_points.find(entry_point)->second;
@ -2713,7 +2723,7 @@ void Compiler::CombinedImageSamplerHandler::register_combined_image_sampler(SPIR
[&param](const SPIRFunction::CombinedImageSamplerParameter &p) {
return param.image_id == p.image_id && param.sampler_id == p.sampler_id &&
param.global_image == p.global_image && param.global_sampler == p.global_sampler;
});
});
if (itr == end(caller.combined_parameters))
{
@ -2850,7 +2860,7 @@ bool Compiler::CombinedImageSamplerHandler::handle(Op opcode, const uint32_t *ar
auto itr = find_if(begin(compiler.combined_image_samplers), end(compiler.combined_image_samplers),
[image_id, sampler_id](const CombinedImageSampler &combined) {
return combined.image_id == image_id && combined.sampler_id == sampler_id;
});
});
if (itr == end(compiler.combined_image_samplers))
{
@ -2926,6 +2936,31 @@ const SPIRConstant &Compiler::get_constant(uint32_t id) const
return get<SPIRConstant>(id);
}
// Recursively marks any constants referenced by the specified constant instruction as being used
// as an array length. The id must be a constant instruction (SPIRConstant or SPIRConstantOp).
void Compiler::mark_used_as_array_length(uint32_t id)
{
switch (ids[id].get_type())
{
case TypeConstant:
get<SPIRConstant>(id).is_used_as_array_length = true;
break;
case TypeConstantOp:
{
auto &cop = get<SPIRConstantOp>(id);
for (uint32_t arg_id : cop.arguments)
mark_used_as_array_length(arg_id);
}
case TypeUndef:
return;
default:
SPIRV_CROSS_THROW("Array lengths must be a constant instruction (OpConstant.. or OpSpecConstant...).");
}
}
static bool exists_unaccessed_path_to_return(const CFG &cfg, uint32_t block, const unordered_set<uint32_t> &blocks)
{
// This block accesses the variable.
@ -3167,8 +3202,8 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry)
break;
}
// Atomics shouldn't be able to access function-local variables.
// Some GLSL builtins access a pointer.
// Atomics shouldn't be able to access function-local variables.
// Some GLSL builtins access a pointer.
default:
break;

View File

@ -268,6 +268,16 @@ public:
const SPIREntryPoint &get_entry_point(const std::string &name) const;
SPIREntryPoint &get_entry_point(const std::string &name);
// Some shader languages restrict the names that can be given to entry points, and the
// corresponding backend will automatically rename an entry point name, during the call
// to compile() if it is illegal. For example, the common entry point name main() is
// illegal in MSL, and is renamed to an alternate name by the MSL backend.
// Given the original entry point name contained in the SPIR-V, this function returns
// the name, as updated by the backend during the call to compile(). If the name is not
// illegal, and has not been renamed, or if this function is called before compile(),
// this function will simply return the same name.
const std::string &get_cleansed_entry_point_name(const std::string &name) const;
// Query and modify OpExecutionMode.
uint64_t get_execution_mode_mask() const;
void unset_execution_mode(spv::ExecutionMode mode);
@ -497,6 +507,7 @@ protected:
bool expression_is_lvalue(uint32_t id) const;
bool variable_storage_is_aliased(const SPIRVariable &var);
SPIRVariable *maybe_get_backing_variable(uint32_t chain);
void mark_used_as_array_length(uint32_t id);
void register_read(uint32_t expr, uint32_t chain, bool forwarded);
void register_write(uint32_t chain);

View File

@ -2686,7 +2686,7 @@ string CompilerGLSL::declare_temporary(uint32_t result_type, uint32_t result_id)
if (find_if(begin(header.declare_temporary), end(header.declare_temporary),
[result_type, result_id](const pair<uint32_t, uint32_t> &tmp) {
return tmp.first == result_type && tmp.second == result_id;
}) == end(header.declare_temporary))
}) == end(header.declare_temporary))
{
header.declare_temporary.emplace_back(result_type, result_id);
force_recompile = true;
@ -2913,9 +2913,8 @@ void CompilerGLSL::emit_quaternary_func_op(uint32_t result_type, uint32_t result
uint32_t op2, uint32_t op3, const char *op)
{
bool forward = should_forward(op0) && should_forward(op1) && should_forward(op2) && should_forward(op3);
emit_op(result_type, result_id,
join(op, "(", to_expression(op0), ", ", to_expression(op1), ", ", to_expression(op2), ", ",
to_expression(op3), ")"),
emit_op(result_type, result_id, join(op, "(", to_expression(op0), ", ", to_expression(op1), ", ",
to_expression(op2), ", ", to_expression(op3), ")"),
forward);
inherit_expression_dependencies(result_id, op0);
@ -3556,15 +3555,6 @@ string CompilerGLSL::to_function_args(uint32_t img, const SPIRType &imgtype, boo
return farg_str;
}
// Some languages may have additional standard library functions whose names conflict
// with a function defined in the body of the shader. Subclasses can override to rename
// the function name defined in the shader to avoid conflict with the language standard
// functions (eg. MSL includes saturate()).
string CompilerGLSL::clean_func_name(string func_name)
{
return func_name;
}
void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, uint32_t)
{
GLSLstd450 op = static_cast<GLSLstd450>(eop);
@ -4052,6 +4042,7 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
bool access_chain_is_arrayed = false;
bool row_major_matrix_needs_conversion = is_non_native_row_major_matrix(base);
bool vector_is_packed = false;
bool pending_array_enclose = false;
bool dimension_flatten = false;
@ -4148,12 +4139,7 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
}
}
if (member_is_packed_type(*type, index))
{
auto &membertype = get<SPIRType>(type->member_types[index]);
expr = unpack_expression_type(expr, membertype);
}
vector_is_packed = member_is_packed_type(*type, index);
row_major_matrix_needs_conversion = member_is_non_native_row_major_matrix(*type, index);
type = &get<SPIRType>(type->member_types[index]);
}
@ -4179,6 +4165,9 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
// Vector -> Scalar
else if (type->vecsize > 1)
{
if (vector_is_packed)
expr = unpack_expression_type(expr, *type);
if (index_is_literal)
{
expr += ".";
@ -5081,7 +5070,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
string funexpr;
vector<string> arglist;
funexpr += clean_func_name(to_name(func)) + "(";
funexpr += to_name(func) + "(";
for (uint32_t i = 0; i < length; i++)
{
// Do not pass in separate images or samplers if we're remapping
@ -5893,8 +5882,8 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
register_read(ops[1], ops[2], should_forward(ops[2]));
break;
// OpAtomicStore unimplemented. Not sure what would use that.
// OpAtomicLoad seems to only be relevant for atomic counters.
// OpAtomicStore unimplemented. Not sure what would use that.
// OpAtomicLoad seems to only be relevant for atomic counters.
case OpAtomicIIncrement:
forced_temporaries.insert(ops[1]);
@ -6453,7 +6442,7 @@ bool CompilerGLSL::member_is_non_native_row_major_matrix(const SPIRType &type, u
// Checks whether the member is in packed data type, that might need to be unpacked.
// GLSL does not define packed data types, but certain subclasses do.
bool CompilerGLSL::member_is_packed_type(const SPIRType &type, uint32_t index)
bool CompilerGLSL::member_is_packed_type(const SPIRType &type, uint32_t index) const
{
return has_member_decoration(type.self, index, DecorationCPacked);
}
@ -7015,11 +7004,11 @@ void CompilerGLSL::emit_function_prototype(SPIRFunction &func, uint64_t return_f
if (func.self == entry_point)
{
decl += clean_func_name("main");
decl += "main";
processing_entry_point = true;
}
else
decl += clean_func_name(to_name(func.self));
decl += to_name(func.self);
decl += "(";
vector<string> arglist;

View File

@ -211,7 +211,6 @@ protected:
uint32_t grad_x, uint32_t grad_y, uint32_t lod, uint32_t coffset,
uint32_t offset, uint32_t bias, uint32_t comp, uint32_t sample,
bool *p_forward);
virtual std::string clean_func_name(std::string func_name);
virtual void emit_buffer_block(const SPIRVariable &type);
virtual void emit_push_constant_block(const SPIRVariable &var);
virtual void emit_uniform(const SPIRVariable &var);
@ -283,7 +282,7 @@ protected:
bool is_non_native_row_major_matrix(uint32_t id);
bool member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index);
bool member_is_packed_type(const SPIRType &type, uint32_t index);
bool member_is_packed_type(const SPIRType &type, uint32_t index) const;
virtual std::string convert_row_major_matrix(std::string exp_str);
std::unordered_set<std::string> local_variable_names;
@ -391,7 +390,7 @@ protected:
std::string to_member_name(const SPIRType &type, uint32_t index);
std::string type_to_glsl_constructor(const SPIRType &type);
std::string argument_decl(const SPIRFunction::Parameter &arg);
std::string to_qualifiers_glsl(uint32_t id);
virtual std::string to_qualifiers_glsl(uint32_t id);
const char *to_precision_qualifiers_glsl(uint32_t id);
virtual const char *to_storage_qualifiers_glsl(const SPIRVariable &var);
const char *flags_to_precision_qualifiers_glsl(const SPIRType &type, uint64_t flags);
@ -423,7 +422,7 @@ protected:
// and force recompile.
bool check_atomic_image(uint32_t id);
void replace_illegal_names();
virtual void replace_illegal_names();
void replace_fragment_output(SPIRVariable &var);
void replace_fragment_outputs();

View File

@ -1052,10 +1052,7 @@ void CompilerHLSL::emit_resources()
if (requires_op_fmod)
{
static const char *types[] = {
"float",
"float2",
"float3",
"float4",
"float", "float2", "float3", "float4",
};
for (auto &type : types)

View File

@ -18,7 +18,6 @@
#include "GLSL.std.450.h"
#include <algorithm>
#include <cassert>
#include <numeric>
using namespace spv;
@ -31,9 +30,6 @@ CompilerMSL::CompilerMSL(vector<uint32_t> spirv_, vector<MSLVertexAttr> *p_vtx_a
vector<MSLResourceBinding> *p_res_bindings)
: CompilerGLSL(move(spirv_))
{
populate_func_name_overrides();
populate_var_name_overrides();
if (p_vtx_attrs)
for (auto &va : *p_vtx_attrs)
vtx_attrs_by_location[va.location] = &va;
@ -47,9 +43,6 @@ CompilerMSL::CompilerMSL(const uint32_t *ir, size_t word_count, MSLVertexAttr *p
MSLResourceBinding *p_res_bindings, size_t res_bindings_count)
: CompilerGLSL(ir, word_count)
{
populate_func_name_overrides();
populate_var_name_overrides();
if (p_vtx_attrs)
for (size_t i = 0; i < vtx_attrs_count; i++)
vtx_attrs_by_location[p_vtx_attrs[i].location] = &p_vtx_attrs[i];
@ -59,27 +52,12 @@ CompilerMSL::CompilerMSL(const uint32_t *ir, size_t word_count, MSLVertexAttr *p
resource_bindings.push_back(&p_res_bindings[i]);
}
// Populate the collection of function names that need to be overridden
void CompilerMSL::populate_func_name_overrides()
{
func_name_overrides["main"] = "main0";
func_name_overrides["saturate"] = "saturate0";
}
void CompilerMSL::populate_var_name_overrides()
{
var_name_overrides["kernel"] = "kernel0";
var_name_overrides["bias"] = "bias0";
}
string CompilerMSL::compile()
{
// Force a classic "C" locale, reverts when function returns
ClassicLocale classic_locale;
// Set main function name if it was explicitly set
if (!options.entry_point_name.empty())
set_name(entry_point, options.entry_point_name);
replace_illegal_names();
non_stage_in_input_var_ids.clear();
struct_member_padding.clear();
@ -102,6 +80,14 @@ string CompilerMSL::compile()
localize_global_variables();
extract_global_variables_from_functions();
// Mark any non-stage-in structs to be tightly packed.
mark_packable_structs();
// Metal does not allow dynamic array lengths.
// Resolve any specialization constants that are used for array lengths.
if (options.resolve_specialized_array_lengths)
resolve_specialized_array_lengths();
// Do not deal with GLES-isms like precision, older extensions and such.
CompilerGLSL::options.vulkan_semantics = true;
CompilerGLSL::options.es = false;
@ -114,6 +100,7 @@ string CompilerMSL::compile()
backend.swizzle_is_function = false;
backend.shared_is_implied = false;
backend.native_row_major_matrix = false;
backend.flexible_member_array_supported = false;
uint32_t pass_count = 0;
do
@ -184,7 +171,7 @@ void CompilerMSL::preprocess_op_codes()
}
}
// Move the Private global variables to the entry function.
// Move the Private and Workgroup global variables to the entry function.
// Non-constant variables cannot have global scope in Metal.
void CompilerMSL::localize_global_variables()
{
@ -192,11 +179,12 @@ void CompilerMSL::localize_global_variables()
auto iter = global_variables.begin();
while (iter != global_variables.end())
{
uint32_t gv_id = *iter;
auto &gbl_var = get<SPIRVariable>(gv_id);
if (gbl_var.storage == StorageClassPrivate)
uint32_t v_id = *iter;
auto &var = get<SPIRVariable>(v_id);
if (var.storage == StorageClassPrivate || var.storage == StorageClassWorkgroup)
{
entry_func.add_local_variable(gv_id);
var.storage = StorageClassFunction;
entry_func.add_local_variable(v_id);
iter = global_variables.erase(iter);
}
else
@ -204,6 +192,21 @@ void CompilerMSL::localize_global_variables()
}
}
// Metal does not allow dynamic array lengths.
// Turn off specialization of any constants that are used for array lengths.
void CompilerMSL::resolve_specialized_array_lengths()
{
for (auto &id : ids)
{
if (id.get_type() == TypeConstant)
{
auto &c = id.get<SPIRConstant>();
if (c.is_used_as_array_length)
c.specialization = false;
}
}
}
// For any global variable accessed directly by a function,
// extract that variable and add it as an argument to that function.
void CompilerMSL::extract_global_variables_from_functions()
@ -313,6 +316,54 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std::
}
}
// For all variables that are some form of non-input-output interface block, mark that all the structs
// that are recursively contained within the type referenced by that variable should be packed tightly.
void CompilerMSL::mark_packable_structs()
{
for (auto &id : ids)
{
if (id.get_type() == TypeVariable)
{
auto &var = id.get<SPIRVariable>();
if (var.storage != StorageClassFunction && !is_hidden_variable(var))
{
auto &type = get<SPIRType>(var.basetype);
if (type.pointer &&
(type.storage == StorageClassUniform || type.storage == StorageClassUniformConstant ||
type.storage == StorageClassPushConstant || type.storage == StorageClassStorageBuffer) &&
(has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock)))
mark_as_packable(type);
}
}
}
}
// If the specified type is a struct, it and any nested structs
// are marked as packable with the DecorationCPacked decoration,
void CompilerMSL::mark_as_packable(SPIRType &type)
{
// If this is not the base type (eg. it's a pointer or array), tunnel down
if (type.parent_type)
{
mark_as_packable(get<SPIRType>(type.parent_type));
return;
}
if (type.basetype == SPIRType::Struct)
{
set_decoration(type.self, DecorationCPacked);
// Recurse
size_t mbr_cnt = type.member_types.size();
for (uint32_t mbr_idx = 0; mbr_idx < mbr_cnt; mbr_idx++)
{
uint32_t mbr_type_id = type.member_types[mbr_idx];
auto &mbr_type = get<SPIRType>(mbr_type_id);
mark_as_packable(mbr_type);
}
}
}
// If a vertex attribute exists at the location, it is marked as being used by this shader
void CompilerMSL::mark_location_as_used_by_shader(uint32_t location, StorageClass storage)
{
@ -668,13 +719,14 @@ void CompilerMSL::align_struct(SPIRType &ib_type)
MemberSorter member_sorter(ib_type, meta[ib_type_id], MemberSorter::Offset);
member_sorter.sort();
uint32_t curr_offset = 0;
uint32_t curr_offset;
uint32_t mbr_cnt = uint32_t(ib_type.member_types.size());
// Test the alignment of each member, and if a member should be closer to the previous
// member than the default spacing expects, it is likely that the previous member is in
// a packed format. If so, and the previous member is packable, pack it.
// For example...this applies to any 3-element vector that is followed by a scalar.
curr_offset = 0;
for (uint32_t mbr_idx = 0; mbr_idx < mbr_cnt; mbr_idx++)
{
// Align current offset to the current member's default alignment.
@ -697,6 +749,7 @@ void CompilerMSL::align_struct(SPIRType &ib_type)
// Test the alignment of each member, and if a member is positioned farther than its
// alignment and the end of the previous member, add a dummy padding member that will
// be added before the current member when the delaration of this struct is emitted.
curr_offset = 0;
for (uint32_t mbr_idx = 0; mbr_idx < mbr_cnt; mbr_idx++)
{
// Align current offset to the current member's default alignment.
@ -816,7 +869,7 @@ void CompilerMSL::emit_custom_functions()
statement("template<typename T>");
statement("T findLSB(T x)");
begin_scope();
statement("return select(ctz(x), -1, x == 0);");
statement("return select(ctz(x), T(-1), x == T(0));");
end_scope();
statement("");
break;
@ -826,7 +879,7 @@ void CompilerMSL::emit_custom_functions()
statement("template<typename T>");
statement("T findUMSB(T x)");
begin_scope();
statement("return select(clz(0) - (clz(x) + 1), -1, x == 0);");
statement("return select(clz(T(0)) - (clz(x) + T(1)), T(-1), x == T(0));");
end_scope();
statement("");
break;
@ -836,8 +889,19 @@ void CompilerMSL::emit_custom_functions()
statement("template<typename T>");
statement("T findSMSB(T x)");
begin_scope();
statement("T v = select(x, -1 - x, x < 0);");
statement("return select(clz(0) - (clz(v) + 1), -1, v == 0);");
statement("T v = select(x, T(-1) - x, x < T(0));");
statement("return select(clz(T(0)) - (clz(v) + T(1)), T(-1), v == T(0));");
end_scope();
statement("");
break;
case SPVFuncImplArrayCopy:
statement("// Implementation of an array copy function to cover GLSL's ability to copy an array via "
"assignment. ");
statement("template<typename T>");
statement("void spvArrayCopy(thread T* dst, thread const T* src, uint count)");
begin_scope();
statement("for (uint i = 0; i < count; *dst++ = *src++, i++);");
end_scope();
statement("");
break;
@ -979,51 +1043,41 @@ void CompilerMSL::emit_custom_functions()
void CompilerMSL::emit_resources()
{
// Output all basic struct types which are not Block or BufferBlock as these are declared inplace
// when such variables are instantiated.
// Output non-interface structs. These include local function structs
// and structs nested within uniform and read-write buffers.
unordered_set<uint32_t> declared_structs;
for (auto &id : ids)
{
if (id.get_type() == TypeType)
{
auto &type = id.get<SPIRType>();
if (type.basetype == SPIRType::Struct && type.array.empty() && !type.pointer &&
!has_decoration(type.self, DecorationBlock) && !has_decoration(type.self, DecorationBufferBlock))
uint32_t type_id = type.self;
bool is_struct = (type.basetype == SPIRType::Struct) && type.array.empty();
bool is_block =
has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock);
bool is_basic_struct = is_struct && !type.pointer && !is_block;
bool is_interface = (type.storage == StorageClassInput || type.storage == StorageClassOutput ||
type.storage == StorageClassUniformConstant);
bool is_non_interface_block = is_struct && type.pointer && is_block && !is_interface;
bool is_declarable_struct = is_basic_struct || is_non_interface_block;
// Align and emit declarable structs...but avoid declaring each more than once.
if (is_declarable_struct && declared_structs.count(type_id) == 0)
{
declared_structs.insert(type_id);
if (has_decoration(type_id, DecorationCPacked))
align_struct(type);
emit_struct(type);
}
}
}
// Output Uniform buffers and constants
unordered_set<uint32_t> declared_interface_structs;
for (auto &id : ids)
{
if (id.get_type() == TypeVariable)
{
auto &var = id.get<SPIRVariable>();
auto &type = get<SPIRType>(var.basetype);
if (var.storage != StorageClassFunction && type.pointer &&
(type.storage == StorageClassUniform || type.storage == StorageClassUniformConstant ||
type.storage == StorageClassPushConstant || type.storage == StorageClassStorageBuffer) &&
(has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock)) &&
!is_hidden_variable(var))
{
// Avoid declaring the same struct multiple times.
if (declared_interface_structs.count(type.self) == 0)
{
align_struct(type);
emit_struct(type);
declared_interface_structs.insert(type.self);
}
}
}
}
declare_undefined_values();
// Output interface blocks.
// Output interface structs.
emit_interface_block(stage_in_var_id);
for (auto &nsi_var : non_stage_in_input_var_ids)
emit_interface_block(nsi_var.second);
@ -1071,7 +1125,8 @@ void CompilerMSL::emit_specialization_constants()
// the work group size at compile time in SPIR-V, and [[threads_per_threadgroup]] would need to be passed around as a global.
// The work group size may be a specialization constant.
if (workgroup_size_id)
statement("constant uint3 gl_WorkGroupSize = ", constant_expression(get<SPIRConstant>(workgroup_size_id)), ";");
statement("constant uint3 ", builtin_to_glsl(BuiltInWorkgroupSize, StorageClassWorkgroup), " = ",
constant_expression(get<SPIRConstant>(workgroup_size_id)), ";");
if (!spec_consts.empty() || workgroup_size_id)
statement("");
@ -1090,7 +1145,6 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
#define BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op)
#define BFOP_CAST(op, type) \
emit_binary_func_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode))
#define BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op)
#define UFOP(op) emit_unary_func_op(ops[0], ops[1], ops[2], #op)
auto ops = stream(instruction);
@ -1434,74 +1488,185 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
}
case OpStore:
{
if (!maybe_emit_input_struct_assignment(ops[0], ops[1]))
CompilerGLSL::emit_instruction(instruction);
if (maybe_emit_input_struct_assignment(ops[0], ops[1]))
break;
if (maybe_emit_array_assignment(ops[0], ops[1]))
break;
CompilerGLSL::emit_instruction(instruction);
break;
}
// OpOuterProduct
// Compute barriers
case OpMemoryBarrier:
emit_barrier(0, ops[0], ops[1]);
break;
case OpControlBarrier:
// In GLSL a memory barrier is often followed by a control barrier.
// But in MSL, memory barriers are also control barriers, so don't
// emit a simple control barrier if a memory barrier has just been emitted.
if (previous_instruction_opcode != OpMemoryBarrier)
emit_barrier(ops[0], ops[1], ops[2]);
break;
// OpOuterProduct
default:
CompilerGLSL::emit_instruction(instruction);
break;
}
previous_instruction_opcode = opcode;
}
// Since MSL does not allow structs to be nested within the stage_in struct, the original
// input structs are flattened into a single stage_in struct by add_interface_block.
// As a result, if the LHS and RHS represent an assignment of an entire input struct,
// we must perform this member-by-member, mapping to the flattened stage_in struct.
void CompilerMSL::emit_barrier(uint32_t id_exe_scope, uint32_t id_mem_scope, uint32_t id_mem_sem)
{
if (get_entry_point().model != ExecutionModelGLCompute)
return;
string bar_stmt = "threadgroup_barrier(mem_flags::";
uint32_t mem_sem = id_mem_sem ? get<SPIRConstant>(id_mem_sem).scalar() : MemorySemanticsMaskNone;
switch (mem_sem)
{
case MemorySemanticsCrossWorkgroupMemoryMask:
bar_stmt += "mem_device";
break;
case MemorySemanticsSubgroupMemoryMask:
case MemorySemanticsWorkgroupMemoryMask:
case MemorySemanticsAtomicCounterMemoryMask:
bar_stmt += "mem_threadgroup";
break;
case MemorySemanticsImageMemoryMask:
bar_stmt += "mem_texture";
break;
case MemorySemanticsAcquireMask:
case MemorySemanticsReleaseMask:
case MemorySemanticsAcquireReleaseMask:
case MemorySemanticsSequentiallyConsistentMask:
case MemorySemanticsUniformMemoryMask:
case MemorySemanticsMaskNone:
default:
bar_stmt += "mem_none";
break;
}
if (options.supports_msl_version(2))
{
bar_stmt += ", ";
// Use the wider of the two scopes (smaller value)
uint32_t exe_scope = id_exe_scope ? get<SPIRConstant>(id_exe_scope).scalar() : ScopeInvocation;
uint32_t mem_scope = id_mem_scope ? get<SPIRConstant>(id_mem_scope).scalar() : ScopeInvocation;
uint32_t scope = min(exe_scope, mem_scope);
switch (scope)
{
case ScopeCrossDevice:
case ScopeDevice:
bar_stmt += "memory_scope_device";
break;
case ScopeSubgroup:
case ScopeInvocation:
bar_stmt += "memory_scope_simdgroup";
break;
case ScopeWorkgroup:
default:
bar_stmt += "memory_scope_threadgroup";
break;
}
}
bar_stmt += ");";
statement(bar_stmt);
}
// Since MSL does not allow structs to be nested within the stage_in struct, the original input
// structs are flattened into a single stage_in struct by add_interface_block. As a result,
// if the LHS and RHS represent an assignment of an entire input struct, we must perform this
// member-by-member, mapping each RHS member to its name in the flattened stage_in struct.
// Returns whether the struct assignment was emitted.
bool CompilerMSL::maybe_emit_input_struct_assignment(uint32_t id_lhs, uint32_t id_rhs)
{
auto *p_v_lhs = maybe_get_backing_variable(id_lhs);
// We only care about assignments of an entire struct
uint32_t type_id = expression_type_id(id_rhs);
auto &type = get<SPIRType>(type_id);
if (type.basetype != SPIRType::Struct)
return false;
// We only care about assignments from Input variables
auto *p_v_rhs = maybe_get_backing_variable(id_rhs);
if (!(p_v_rhs && p_v_rhs->storage == StorageClassInput))
return false;
if (p_v_lhs && p_v_rhs && p_v_rhs->storage == StorageClassInput)
// Get the ID of the type of the underlying RHS variable.
// This will be an Input OpTypePointer containing the qualified member names.
uint32_t tid_v_rhs = p_v_rhs->basetype;
// Ensure the LHS variable has been declared
auto *p_v_lhs = maybe_get_backing_variable(id_lhs);
if (p_v_lhs)
flush_variable_declaration(p_v_lhs->self);
size_t mbr_cnt = type.member_types.size();
for (uint32_t mbr_idx = 0; mbr_idx < mbr_cnt; mbr_idx++)
{
uint32_t tid_lhs = p_v_lhs->basetype;
uint32_t tid_rhs = p_v_rhs->basetype;
string expr;
auto &t_lhs = get<SPIRType>(tid_lhs);
auto &t_rhs = get<SPIRType>(tid_rhs);
//LHS
expr += to_name(id_lhs);
expr += ".";
expr += to_member_name(type, mbr_idx);
if (t_lhs.basetype == SPIRType::Struct && t_rhs.basetype == SPIRType::Struct)
expr += " = ";
//RHS
string qual_mbr_name = get_member_qualified_name(tid_v_rhs, mbr_idx);
if (qual_mbr_name.empty())
{
size_t mbr_cnt = t_rhs.member_types.size();
assert(t_lhs.member_types.size() == mbr_cnt);
flush_variable_declaration(p_v_lhs->self);
for (uint32_t mbr_idx = 0; mbr_idx < mbr_cnt; mbr_idx++)
{
string expr;
//LHS
expr += to_name(id_lhs);
expr += ".";
expr += to_member_name(t_lhs, mbr_idx);
expr += " = ";
//RHS
string qual_mbr_name = get_member_qualified_name(tid_rhs, mbr_idx);
if (qual_mbr_name.empty())
{
expr += to_name(id_rhs);
expr += ".";
expr += to_member_name(t_rhs, mbr_idx);
}
else
expr += qual_mbr_name;
statement(expr, ";");
}
return true;
expr += to_name(id_rhs);
expr += ".";
expr += to_member_name(type, mbr_idx);
}
else
expr += qual_mbr_name;
statement(expr, ";");
}
return false;
return true;
}
// Since MSL does not allow arrays to be copied via simple variable assignment,
// if the LHS and RHS represent an assignment of an entire array, it must be
// implemented by calling an array copy function.
// Returns whether the struct assignment was emitted.
bool CompilerMSL::maybe_emit_array_assignment(uint32_t id_lhs, uint32_t id_rhs)
{
// Assignment from an array initializer is fine.
if (ids[id_rhs].get_type() == TypeConstant)
return false;
// We only care about assignments of an entire array
auto &type = expression_type(id_rhs);
if (type.array.size() == 0)
return false;
// Ensure the LHS variable has been declared
auto *p_v_lhs = maybe_get_backing_variable(id_lhs);
if (p_v_lhs)
flush_variable_declaration(p_v_lhs->self);
statement("spvArrayCopy(", to_expression(id_lhs), ", ", to_expression(id_rhs), ", ", to_array_size(type, 0), ");");
register_write(id_lhs);
return true;
}
// Emits one of the atomic functions. In MSL, the atomic functions operate on pointers
@ -1604,7 +1769,7 @@ void CompilerMSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
emit_unary_func_op(result_type, id, args[0], "pack_float_to_unorm2x16");
break;
case GLSLstd450PackHalf2x16:
emit_unary_func_op(result_type, id, args[0], "pack_half_to_snorm2x16");
emit_unary_func_op(result_type, id, args[0], "unsupported_GLSLstd450PackHalf2x16"); // Currently unsupported
break;
case GLSLstd450UnpackSnorm4x8:
@ -1620,7 +1785,7 @@ void CompilerMSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
emit_unary_func_op(result_type, id, args[0], "unpack_unorm2x16_to_float");
break;
case GLSLstd450UnpackHalf2x16:
emit_unary_func_op(result_type, id, args[0], "unpack_snorm2x16_to_half");
emit_unary_func_op(result_type, id, args[0], "unsupported_GLSLstd450UnpackHalf2x16"); // Currently unsupported
break;
case GLSLstd450PackDouble2x32:
@ -1650,10 +1815,10 @@ void CompilerMSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
break;
}
// TODO:
// GLSLstd450InterpolateAtCentroid (centroid_no_perspective qualifier)
// GLSLstd450InterpolateAtSample (sample_no_perspective qualifier)
// GLSLstd450InterpolateAtOffset
// TODO:
// GLSLstd450InterpolateAtCentroid (centroid_no_perspective qualifier)
// GLSLstd450InterpolateAtSample (sample_no_perspective qualifier)
// GLSLstd450InterpolateAtOffset
default:
CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
@ -1686,7 +1851,7 @@ void CompilerMSL::emit_function_prototype(SPIRFunction &func, uint64_t)
auto &type = get<SPIRType>(func.return_type);
decl += func_type_decl(type);
decl += " ";
decl += clean_func_name(to_name(func.self));
decl += to_name(func.self);
decl += "(";
@ -2290,14 +2455,7 @@ string CompilerMSL::func_type_decl(SPIRType &type)
return entry_type + " " + return_type;
}
// Ensures the function name is not "main", which is illegal in MSL
string CompilerMSL::clean_func_name(string func_name)
{
auto iter = func_name_overrides.find(func_name);
return (iter != func_name_overrides.end()) ? iter->second : func_name;
}
// In MSL address space qualifiers are required for all pointer or reference arguments
// In MSL, address space qualifiers are required for all pointer or reference arguments
string CompilerMSL::get_argument_address_space(const SPIRVariable &argument)
{
const auto &type = get<SPIRType>(argument.basetype);
@ -2477,20 +2635,33 @@ uint32_t CompilerMSL::get_metal_resource_index(SPIRVariable &var, SPIRType::Base
// Returns the name of the entry point of this shader
string CompilerMSL::get_entry_point_name()
{
return clean_func_name(to_name(entry_point));
return to_name(entry_point);
}
string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg)
{
auto &var = get<SPIRVariable>(arg.id);
auto &type = expression_type(arg.id);
bool constref = !arg.alias_global_variable && (!type.pointer || arg.write_count == 0);
// TODO: Check if this arg is an uniform pointer
bool pointer = type.storage == StorageClassUniformConstant;
auto &var = get<SPIRVariable>(arg.id);
return join(constref ? "const " : "", type_to_glsl(type), pointer ? " " : "& ", to_name(var.self),
type_to_array_glsl(type));
string decl;
if (constref)
decl += "const ";
decl += type_to_glsl(type);
if (is_array(type))
decl += "*";
else if (!pointer)
decl += "&";
decl += " ";
decl += to_name(var.self);
return decl;
}
// If we're currently in the entry point function, and the object
@ -2525,17 +2696,79 @@ string CompilerMSL::to_qualified_member_name(const SPIRType &type, uint32_t inde
// if the first chars are _ and a digit, which indicate a transient name.
string CompilerMSL::ensure_valid_name(string name, string pfx)
{
if (name.size() >= 2 && name[0] == '_' && isdigit(name[1]))
return (name.size() >= 2 && name[0] == '_' && isdigit(name[1])) ? (pfx + name) : name;
}
// Replace all names that match MSL keywords or Metal Standard Library functions.
void CompilerMSL::replace_illegal_names()
{
static const unordered_set<string> keywords = {
"kernel", "bias",
};
static const unordered_set<string> illegal_func_names = {
"main", "saturate",
};
for (auto &id : ids)
{
return join(pfx, name);
switch (id.get_type())
{
case TypeVariable:
{
auto &dec = meta[id.get_id()].decoration;
if (keywords.find(dec.alias) != end(keywords))
dec.alias += "0";
break;
}
case TypeFunction:
{
auto &dec = meta[id.get_id()].decoration;
if (illegal_func_names.find(dec.alias) != end(illegal_func_names))
dec.alias += "0";
break;
}
case TypeType:
{
for (auto &mbr_dec : meta[id.get_id()].members)
if (keywords.find(mbr_dec.alias) != end(keywords))
mbr_dec.alias += "0";
break;
}
default:
break;
}
}
else
for (auto &entry : entry_points)
{
auto iter = var_name_overrides.find(name);
return (iter != var_name_overrides.end()) ? iter->second : name;
// Change both the entry point name and the alias, to keep them synced.
string &ep_name = entry.second.name;
if (illegal_func_names.find(ep_name) != end(illegal_func_names))
{
ep_name += "0";
meta[entry.first].decoration.alias = ep_name;
}
}
}
string CompilerMSL::to_qualifiers_glsl(uint32_t id)
{
string quals;
auto &type = expression_type(id);
if (type.storage == StorageClassWorkgroup)
quals += "threadgroup ";
return quals;
}
// The optional id parameter indicates the object whose type we are trying
// to find the description for. It is optional. Most type descriptions do not
// depend on a specific object's use of that type.
@ -2880,12 +3113,11 @@ string CompilerMSL::built_in_func_arg(BuiltIn builtin, bool prefix_comma)
string bi_arg;
if (prefix_comma)
bi_arg += ", ";
bi_arg += builtin_type_decl(builtin);
assert(builtin == BuiltInVertexIndex || builtin == BuiltInInstanceIndex);
bi_arg += " " + builtin_to_glsl(builtin, StorageClassInput);
bi_arg += " [[" + builtin_qualifier(builtin) + "]]";
return bi_arg;
}
@ -2905,9 +3137,6 @@ size_t CompilerMSL::get_declared_struct_member_size(const SPIRType &struct_type,
case SPIRType::Sampler:
SPIRV_CROSS_THROW("Querying size of opaque object.");
case SPIRType::Struct:
return get_declared_struct_size(type);
default:
{
size_t component_size = type.width / 8;
@ -2915,8 +3144,12 @@ size_t CompilerMSL::get_declared_struct_member_size(const SPIRType &struct_type,
unsigned columns = type.columns;
// For arrays, we can use ArrayStride to get an easy check.
// Runtime arrays will have zero size so force to min of one.
if (!type.array.empty())
return type_struct_member_array_stride(struct_type, index) * type.array.back();
return type_struct_member_array_stride(struct_type, index) * max(type.array.back(), 1U);
if (type.basetype == SPIRType::Struct)
return get_declared_struct_size(type);
if (columns == 1) // An unpacked 3-element vector is the same size as a 4-element vector.
{
@ -2967,11 +3200,14 @@ size_t CompilerMSL::get_declared_struct_member_alignment(const SPIRType &struct_
{
// Alignment of packed type is the same as the underlying component size.
// Alignment of unpacked type is the same as the type size (or one matrix column).
auto dec_mask = get_member_decoration_mask(struct_type.self, index);
if (dec_mask & (1ull << DecorationCPacked))
if (member_is_packed_type(struct_type, index))
return type.width / 8;
else
return get_declared_struct_member_size(struct_type, index) / type.columns;
{
// Divide by array size and colum count. Runtime arrays will have zero size so force to min of one.
uint32_t array_size = type.array.empty() ? 1 : max(type.array.back(), 1U);
return get_declared_struct_member_size(struct_type, index) / (type.columns * array_size);
}
}
}
}
@ -2981,7 +3217,7 @@ bool CompilerMSL::skip_argument(uint32_t) const
return false;
}
bool CompilerMSL::OpCodePreprocessor::handle(Op opcode, const uint32_t *args, uint32_t /*length*/)
bool CompilerMSL::OpCodePreprocessor::handle(Op opcode, const uint32_t *args, uint32_t length)
{
// Since MSL exists in a single execution scope, function prototype declarations are not
// needed, and clutter the output. If secondary functions are output (either as a SPIR-V
@ -2989,12 +3225,11 @@ bool CompilerMSL::OpCodePreprocessor::handle(Op opcode, const uint32_t *args, ui
// suppress_missing_prototypes to suppress compiler warnings of missing function prototypes.
// Mark if the input requires the implementation of an SPIR-V function that does not exist in Metal.
SPVFuncImpl spv_func = compiler.get_spv_func_impl(opcode, args);
SPVFuncImpl spv_func = get_spv_func_impl(opcode, args);
if (spv_func != SPVFuncImplNone)
{
compiler.spv_function_implementations.insert(spv_func);
suppress_missing_prototypes = true;
return true;
}
switch (opcode)
@ -3026,21 +3261,38 @@ bool CompilerMSL::OpCodePreprocessor::handle(Op opcode, const uint32_t *args, ui
break;
}
// Keep track of the instruction return types, mapped by ID
if (length > 1)
result_types[args[1]] = args[0];
return true;
}
// Returns an enumeration of a SPIR-V function that needs to be output for certain Op codes.
CompilerMSL::SPVFuncImpl CompilerMSL::get_spv_func_impl(Op opcode, const uint32_t *args)
CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op opcode, const uint32_t *args)
{
switch (opcode)
{
case OpFMod:
return SPVFuncImplMod;
case OpStore:
{
// Get the result type of the RHS. Since this is run as a pre-processing stage,
// we must extract the result type directly from the Instruction, rather than the ID.
uint32_t id_rhs = args[1];
uint32_t type_id_rhs = result_types[id_rhs];
if ((compiler.ids[id_rhs].get_type() != TypeConstant) && type_id_rhs &&
compiler.is_array(compiler.get<SPIRType>(type_id_rhs)))
return SPVFuncImplArrayCopy;
break;
}
case OpExtInst:
{
uint32_t extension_set = args[2];
if (get<SPIRExtension>(extension_set).ext == SPIRExtension::GLSL)
if (compiler.get<SPIRExtension>(extension_set).ext == SPIRExtension::GLSL)
{
GLSLstd450 op_450 = static_cast<GLSLstd450>(args[3]);
switch (op_450)
@ -3057,7 +3309,7 @@ CompilerMSL::SPVFuncImpl CompilerMSL::get_spv_func_impl(Op opcode, const uint32_
return SPVFuncImplFindUMsb;
case GLSLstd450MatrixInverse:
{
auto &mat_type = get<SPIRType>(args[0]);
auto &mat_type = compiler.get<SPIRType>(args[0]);
switch (mat_type.columns)
{
case 2:

View File

@ -18,6 +18,7 @@
#define SPIRV_CROSS_MSL_HPP
#include "spirv_glsl.hpp"
#include <limits>
#include <map>
#include <set>
#include <unordered_map>
@ -63,7 +64,7 @@ using MSLStructMemberKey = uint64_t;
// Special constant used in a MSLResourceBinding desc_set
// element to indicate the bindings for the push constants.
static const uint32_t kPushConstDescSet = UINT32_MAX;
static const uint32_t kPushConstDescSet = std::numeric_limits<uint32_t>::max();
// Special constant used in a MSLResourceBinding binding
// element to indicate the bindings for the push constants.
@ -76,8 +77,24 @@ public:
// Options for compiling to Metal Shading Language
struct Options
{
uint32_t msl_version = make_msl_version(1, 2);
bool enable_point_size_builtin = true;
std::string entry_point_name;
bool resolve_specialized_array_lengths = true;
void set_msl_version(uint32_t major, uint32_t minor = 0, uint32_t patch = 0)
{
msl_version = make_msl_version(major, minor, patch);
}
bool supports_msl_version(uint32_t major, uint32_t minor = 0, uint32_t patch = 0)
{
return msl_version >= make_msl_version(major, minor, patch);
}
static uint32_t make_msl_version(uint32_t major, uint32_t minor = 0, uint32_t patch = 0)
{
return (major * 10000) + (minor * 100) + patch;
}
};
const Options &get_options() const
@ -101,6 +118,7 @@ public:
SPVFuncImplFindILsb,
SPVFuncImplFindSMsb,
SPVFuncImplFindUMsb,
SPVFuncImplArrayCopy,
SPVFuncImplInverse2x2,
SPVFuncImplInverse3x3,
SPVFuncImplInverse4x4,
@ -165,10 +183,15 @@ protected:
std::string unpack_expression_type(std::string expr_str, const SPIRType &type) override;
std::string bitcast_glsl_op(const SPIRType &result_type, const SPIRType &argument_type) override;
bool skip_argument(uint32_t id) const override;
std::string to_qualifiers_glsl(uint32_t id) override;
void replace_illegal_names() override;
void preprocess_op_codes();
void localize_global_variables();
void extract_global_variables_from_functions();
void resolve_specialized_array_lengths();
void mark_packable_structs();
void mark_as_packable(SPIRType &type);
std::unordered_map<uint32_t, std::set<uint32_t>> function_global_vars;
void extract_global_variables_from_function(uint32_t func_id, std::set<uint32_t> &added_arg_ids,
@ -181,12 +204,10 @@ protected:
void emit_resources();
void emit_specialization_constants();
void emit_interface_block(uint32_t ib_var_id);
void populate_func_name_overrides();
void populate_var_name_overrides();
bool maybe_emit_input_struct_assignment(uint32_t id_lhs, uint32_t id_rhs);
bool maybe_emit_array_assignment(uint32_t id_lhs, uint32_t id_rhs);
std::string func_type_decl(SPIRType &type);
std::string clean_func_name(std::string func_name) override;
std::string entry_point_args(bool append_comma);
std::string get_entry_point_name();
std::string to_qualified_member_name(const SPIRType &type, uint32_t index);
@ -210,17 +231,15 @@ protected:
void align_struct(SPIRType &ib_type);
bool is_member_packable(SPIRType &ib_type, uint32_t index);
MSLStructMemberKey get_struct_member_key(uint32_t type_id, uint32_t index);
SPVFuncImpl get_spv_func_impl(spv::Op opcode, const uint32_t *args);
std::string get_argument_address_space(const SPIRVariable &argument);
void emit_atomic_func_op(uint32_t result_type, uint32_t result_id, const char *op, uint32_t mem_order_1,
uint32_t mem_order_2, bool has_mem_order_2, uint32_t op0, uint32_t op1 = 0,
bool op1_is_pointer = false, uint32_t op2 = 0);
const char *get_memory_order(uint32_t spv_mem_sem);
void add_pragma_line(const std::string &line);
void emit_barrier(uint32_t id_exe_scope, uint32_t id_mem_scope, uint32_t id_mem_sem);
Options options;
std::unordered_map<std::string, std::string> func_name_overrides;
std::unordered_map<std::string, std::string> var_name_overrides;
std::set<SPVFuncImpl> spv_function_implementations;
std::unordered_map<uint32_t, MSLVertexAttr *> vtx_attrs_by_location;
std::map<uint32_t, uint32_t> non_stage_in_input_var_ids;
@ -238,6 +257,7 @@ protected:
std::string stage_out_var_name = "out";
std::string stage_uniform_var_name = "uniforms";
std::string sampler_name_suffix = "Smplr";
spv::Op previous_instruction_opcode = spv::OpNop;
// OpcodeHandler that handles several MSL preprocessing operations.
struct OpCodePreprocessor : OpcodeHandler
@ -248,8 +268,10 @@ protected:
}
bool handle(spv::Op opcode, const uint32_t *args, uint32_t length) override;
CompilerMSL::SPVFuncImpl get_spv_func_impl(spv::Op opcode, const uint32_t *args);
CompilerMSL &compiler;
std::unordered_map<uint32_t, uint32_t> result_types;
bool suppress_missing_prototypes = false;
bool uses_atomics = false;
};