Merge pull request #321 from brenwill/master
Enhancements to MSL compute and entry point naming.
This commit is contained in:
commit
2b8fe8d711
1
.gitignore
vendored
1
.gitignore
vendored
@ -13,5 +13,6 @@
|
||||
*.opensdf
|
||||
*.shader
|
||||
*.a
|
||||
*.bc
|
||||
|
||||
!CMakeLists.txt
|
||||
|
10
main.cpp
10
main.cpp
@ -447,9 +447,11 @@ struct CLIArguments
|
||||
const char *cpp_interface_name = nullptr;
|
||||
uint32_t version = 0;
|
||||
uint32_t shader_model = 0;
|
||||
uint32_t msl_version = 0;
|
||||
bool es = false;
|
||||
bool set_version = false;
|
||||
bool set_shader_model = false;
|
||||
bool set_msl_version = false;
|
||||
bool set_es = false;
|
||||
bool dump_resources = false;
|
||||
bool force_temporary = false;
|
||||
@ -483,7 +485,7 @@ static void print_help()
|
||||
"[--version <GLSL version>] [--dump-resources] [--help] [--force-temporary] "
|
||||
"[--vulkan-semantics] [--flatten-ubo] [--fixup-clipspace] [--flip-vert-y] [--iterations iter] "
|
||||
"[--cpp] [--cpp-interface-name <name>] "
|
||||
"[--msl] "
|
||||
"[--msl] [--msl-version <MMmmpp>]"
|
||||
"[--hlsl] [--shader-model] [--hlsl-enable-compat] "
|
||||
"[--separate-shader-objects]"
|
||||
"[--pls-in format input-name] [--pls-out format output-name] [--remap source_name target_name "
|
||||
@ -686,6 +688,10 @@ static int main_inner(int argc, char *argv[])
|
||||
args.shader_model = parser.next_uint();
|
||||
args.set_shader_model = true;
|
||||
});
|
||||
cbs.add("--msl-version", [&args](CLIParser &parser) {
|
||||
args.msl_version = parser.next_uint();
|
||||
args.set_msl_version = true;
|
||||
});
|
||||
|
||||
cbs.add("--remove-unused-variables", [&args](CLIParser &) { args.remove_unused = true; });
|
||||
|
||||
@ -725,6 +731,8 @@ static int main_inner(int argc, char *argv[])
|
||||
|
||||
auto *msl_comp = static_cast<CompilerMSL *>(compiler.get());
|
||||
auto msl_opts = msl_comp->get_options();
|
||||
if (args.set_msl_version)
|
||||
msl_opts.msl_version = args.msl_version;
|
||||
msl_comp->set_options(msl_opts);
|
||||
}
|
||||
else if (args.hlsl)
|
||||
|
29
reference/shaders-msl/asm/comp/bitcast_iadd.asm.comp
Normal file
29
reference/shaders-msl/asm/comp/bitcast_iadd.asm.comp
Normal file
@ -0,0 +1,29 @@
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
struct _3
|
||||
{
|
||||
int4 _m0;
|
||||
uint4 _m1;
|
||||
};
|
||||
|
||||
struct _4
|
||||
{
|
||||
uint4 _m0;
|
||||
int4 _m1;
|
||||
};
|
||||
|
||||
kernel void main0(device _3& _5 [[buffer(0)]], device _4& _6 [[buffer(1)]])
|
||||
{
|
||||
_6._m0 = _5._m1 + uint4(_5._m0);
|
||||
_6._m0 = uint4(_5._m0) + _5._m1;
|
||||
_6._m0 = _5._m1 + _5._m1;
|
||||
_6._m0 = uint4(_5._m0 + _5._m0);
|
||||
_6._m1 = int4(_5._m1 + _5._m1);
|
||||
_6._m1 = _5._m0 + _5._m0;
|
||||
_6._m1 = int4(_5._m1) + _5._m0;
|
||||
_6._m1 = _5._m0 + int4(_5._m1);
|
||||
}
|
||||
|
29
reference/shaders-msl/asm/comp/bitcast_sar.asm.comp
Normal file
29
reference/shaders-msl/asm/comp/bitcast_sar.asm.comp
Normal file
@ -0,0 +1,29 @@
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
struct _3
|
||||
{
|
||||
int4 _m0;
|
||||
uint4 _m1;
|
||||
};
|
||||
|
||||
struct _4
|
||||
{
|
||||
uint4 _m0;
|
||||
int4 _m1;
|
||||
};
|
||||
|
||||
kernel void main0(device _3& _5 [[buffer(0)]], device _4& _6 [[buffer(1)]])
|
||||
{
|
||||
_6._m0 = uint4(int4(_5._m1) >> _5._m0);
|
||||
_6._m0 = uint4(_5._m0 >> int4(_5._m1));
|
||||
_6._m0 = uint4(int4(_5._m1) >> int4(_5._m1));
|
||||
_6._m0 = uint4(_5._m0 >> _5._m0);
|
||||
_6._m1 = int4(_5._m1) >> int4(_5._m1);
|
||||
_6._m1 = _5._m0 >> _5._m0;
|
||||
_6._m1 = int4(_5._m1) >> _5._m0;
|
||||
_6._m1 = _5._m0 >> int4(_5._m1);
|
||||
}
|
||||
|
29
reference/shaders-msl/asm/comp/bitcast_sdiv.asm.comp
Normal file
29
reference/shaders-msl/asm/comp/bitcast_sdiv.asm.comp
Normal file
@ -0,0 +1,29 @@
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
struct _3
|
||||
{
|
||||
int4 _m0;
|
||||
uint4 _m1;
|
||||
};
|
||||
|
||||
struct _4
|
||||
{
|
||||
uint4 _m0;
|
||||
int4 _m1;
|
||||
};
|
||||
|
||||
kernel void main0(device _3& _5 [[buffer(0)]], device _4& _6 [[buffer(1)]])
|
||||
{
|
||||
_6._m0 = uint4(int4(_5._m1) / _5._m0);
|
||||
_6._m0 = uint4(_5._m0 / int4(_5._m1));
|
||||
_6._m0 = uint4(int4(_5._m1) / int4(_5._m1));
|
||||
_6._m0 = uint4(_5._m0 / _5._m0);
|
||||
_6._m1 = int4(_5._m1) / int4(_5._m1);
|
||||
_6._m1 = _5._m0 / _5._m0;
|
||||
_6._m1 = int4(_5._m1) / _5._m0;
|
||||
_6._m1 = _5._m0 / int4(_5._m1);
|
||||
}
|
||||
|
29
reference/shaders-msl/asm/comp/bitcast_slr.asm.comp
Normal file
29
reference/shaders-msl/asm/comp/bitcast_slr.asm.comp
Normal file
@ -0,0 +1,29 @@
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
struct _3
|
||||
{
|
||||
int4 _m0;
|
||||
uint4 _m1;
|
||||
};
|
||||
|
||||
struct _4
|
||||
{
|
||||
uint4 _m0;
|
||||
int4 _m1;
|
||||
};
|
||||
|
||||
kernel void main0(device _3& _5 [[buffer(0)]], device _4& _6 [[buffer(1)]])
|
||||
{
|
||||
_6._m0 = _5._m1 >> uint4(_5._m0);
|
||||
_6._m0 = uint4(_5._m0) >> _5._m1;
|
||||
_6._m0 = _5._m1 >> _5._m1;
|
||||
_6._m0 = uint4(_5._m0) >> uint4(_5._m0);
|
||||
_6._m1 = int4(_5._m1 >> _5._m1);
|
||||
_6._m1 = int4(uint4(_5._m0) >> uint4(_5._m0));
|
||||
_6._m1 = int4(_5._m1 >> uint4(_5._m0));
|
||||
_6._m1 = int4(uint4(_5._m0) >> _5._m1);
|
||||
}
|
||||
|
29
reference/shaders-msl/asm/comp/multiple-entry.asm.comp
Normal file
29
reference/shaders-msl/asm/comp/multiple-entry.asm.comp
Normal file
@ -0,0 +1,29 @@
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
struct _6
|
||||
{
|
||||
int4 _m0;
|
||||
uint4 _m1;
|
||||
};
|
||||
|
||||
struct _7
|
||||
{
|
||||
uint4 _m0;
|
||||
int4 _m1;
|
||||
};
|
||||
|
||||
kernel void main0(device _6& _8 [[buffer(0)]], device _7& _9 [[buffer(1)]])
|
||||
{
|
||||
_9._m0 = _8._m1 + uint4(_8._m0);
|
||||
_9._m0 = uint4(_8._m0) + _8._m1;
|
||||
_9._m0 = _8._m1 + _8._m1;
|
||||
_9._m0 = uint4(_8._m0 + _8._m0);
|
||||
_9._m1 = int4(_8._m1 + _8._m1);
|
||||
_9._m1 = _8._m0 + _8._m0;
|
||||
_9._m1 = int4(_8._m1) + _8._m0;
|
||||
_9._m1 = _8._m0 + int4(_8._m1);
|
||||
}
|
||||
|
21
reference/shaders-msl/asm/comp/quantize.asm.comp
Normal file
21
reference/shaders-msl/asm/comp/quantize.asm.comp
Normal file
@ -0,0 +1,21 @@
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
struct SSBO0
|
||||
{
|
||||
float scalar;
|
||||
float2 vec2_val;
|
||||
float3 vec3_val;
|
||||
float4 vec4_val;
|
||||
};
|
||||
|
||||
kernel void main0(device SSBO0& _4 [[buffer(0)]])
|
||||
{
|
||||
_4.scalar = float(half(_4.scalar));
|
||||
_4.vec2_val = float2(half2(_4.vec2_val));
|
||||
_4.vec3_val = float3(half3(_4.vec3_val));
|
||||
_4.vec4_val = float4(half4(_4.vec4_val));
|
||||
}
|
||||
|
40
reference/shaders-msl/comp/bake_gradient.comp
Normal file
40
reference/shaders-msl/comp/bake_gradient.comp
Normal file
@ -0,0 +1,40 @@
|
||||
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
constant uint3 gl_WorkGroupSize = uint3(8u, 8u, 1u);
|
||||
|
||||
struct UBO
|
||||
{
|
||||
float4 uInvSize;
|
||||
float4 uScale;
|
||||
};
|
||||
|
||||
float jacobian(thread const float2& dDdx, thread const float2& dDdy)
|
||||
{
|
||||
return ((1.0 + dDdx.x) * (1.0 + dDdy.y)) - (dDdx.y * dDdy.x);
|
||||
}
|
||||
|
||||
kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], constant UBO& _46 [[buffer(0)]], texture2d<float> uHeight [[texture(0)]], sampler uHeightSmplr [[sampler(0)]], texture2d<float> uDisplacement [[texture(1)]], sampler uDisplacementSmplr [[sampler(1)]], texture2d<float, access::write> iHeightDisplacement [[texture(2)]], texture2d<float, access::write> iGradJacobian [[texture(3)]])
|
||||
{
|
||||
float4 uv = (float2(gl_GlobalInvocationID.xy) * _46.uInvSize.xy).xyxy + (_46.uInvSize * 0.5);
|
||||
float h = uHeight.sample(uHeightSmplr, uv.xy, level(0.0)).x;
|
||||
float x0 = uHeight.sample(uHeightSmplr, uv.xy, level(0.0), int2(-1, 0)).x;
|
||||
float x1 = uHeight.sample(uHeightSmplr, uv.xy, level(0.0), int2(1, 0)).x;
|
||||
float y0 = uHeight.sample(uHeightSmplr, uv.xy, level(0.0), int2(0, -1)).x;
|
||||
float y1 = uHeight.sample(uHeightSmplr, uv.xy, level(0.0), int2(0, 1)).x;
|
||||
float2 grad = (_46.uScale.xy * 0.5) * float2(x1 - x0, y1 - y0);
|
||||
float2 displacement = uDisplacement.sample(uDisplacementSmplr, uv.zw, level(0.0)).xy * 1.2000000476837158203125;
|
||||
float2 dDdx = (uDisplacement.sample(uDisplacementSmplr, uv.zw, level(0.0), int2(1, 0)).xy - uDisplacement.sample(uDisplacementSmplr, uv.zw, level(0.0), int2(-1, 0)).xy) * 0.60000002384185791015625;
|
||||
float2 dDdy = (uDisplacement.sample(uDisplacementSmplr, uv.zw, level(0.0), int2(0, 1)).xy - uDisplacement.sample(uDisplacementSmplr, uv.zw, level(0.0), int2(0, -1)).xy) * 0.60000002384185791015625;
|
||||
float2 param = dDdx * _46.uScale.z;
|
||||
float2 param_1 = dDdy * _46.uScale.z;
|
||||
float j = jacobian(param, param_1);
|
||||
displacement = float2(0.0);
|
||||
iHeightDisplacement.write(float4(h, displacement, 0.0), uint2(int2(gl_GlobalInvocationID.xy)));
|
||||
iGradJacobian.write(float4(grad, j, 0.0), uint2(int2(gl_GlobalInvocationID.xy)));
|
||||
}
|
||||
|
34
reference/shaders-msl/comp/basic.comp
Normal file
34
reference/shaders-msl/comp/basic.comp
Normal file
@ -0,0 +1,34 @@
|
||||
#pragma clang diagnostic ignored "-Wunused-variable"
|
||||
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
#include <metal_atomic>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
struct SSBO
|
||||
{
|
||||
float4 in_data[1];
|
||||
};
|
||||
|
||||
struct SSBO2
|
||||
{
|
||||
float4 out_data[1];
|
||||
};
|
||||
|
||||
struct SSBO3
|
||||
{
|
||||
uint counter;
|
||||
};
|
||||
|
||||
kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device SSBO& _23 [[buffer(0)]], device SSBO2& _45 [[buffer(1)]], device SSBO3& _48 [[buffer(2)]])
|
||||
{
|
||||
uint ident = gl_GlobalInvocationID.x;
|
||||
float4 idata = _23.in_data[ident];
|
||||
if (dot(idata, float4(1.0, 5.0, 6.0, 2.0)) > 8.19999980926513671875)
|
||||
{
|
||||
uint _52 = atomic_fetch_add_explicit((volatile device atomic_uint*)&(_48.counter), 1u, memory_order_relaxed);
|
||||
_45.out_data[_52] = idata;
|
||||
}
|
||||
}
|
||||
|
47
reference/shaders-msl/comp/bitfield.comp
Normal file
47
reference/shaders-msl/comp/bitfield.comp
Normal file
@ -0,0 +1,47 @@
|
||||
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
// Implementation of the GLSL findLSB() function
|
||||
template<typename T>
|
||||
T findLSB(T x)
|
||||
{
|
||||
return select(ctz(x), T(-1), x == T(0));
|
||||
}
|
||||
|
||||
// Implementation of the signed GLSL findMSB() function
|
||||
template<typename T>
|
||||
T findSMSB(T x)
|
||||
{
|
||||
T v = select(x, T(-1) - x, x < T(0));
|
||||
return select(clz(T(0)) - (clz(v) + T(1)), T(-1), v == T(0));
|
||||
}
|
||||
|
||||
// Implementation of the unsigned GLSL findMSB() function
|
||||
template<typename T>
|
||||
T findUMSB(T x)
|
||||
{
|
||||
return select(clz(T(0)) - (clz(x) + T(1)), T(-1), x == T(0));
|
||||
}
|
||||
|
||||
kernel void main0()
|
||||
{
|
||||
int signed_value = 0;
|
||||
uint unsigned_value = 0u;
|
||||
int s = extract_bits(signed_value, 5, 20);
|
||||
uint u = extract_bits(unsigned_value, 6, 21);
|
||||
s = insert_bits(s, 40, 5, 4);
|
||||
u = insert_bits(u, 60u, 5, 4);
|
||||
u = reverse_bits(u);
|
||||
s = reverse_bits(s);
|
||||
int v0 = popcount(u);
|
||||
int v1 = popcount(s);
|
||||
int v2 = findUMSB(u);
|
||||
int v3 = findSMSB(s);
|
||||
int v4 = findLSB(u);
|
||||
int v5 = findLSB(s);
|
||||
}
|
||||
|
78
reference/shaders-msl/comp/cfg-preserve-parameter.comp
Normal file
78
reference/shaders-msl/comp/cfg-preserve-parameter.comp
Normal file
@ -0,0 +1,78 @@
|
||||
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
void out_test_0(thread const int& cond, thread int& i)
|
||||
{
|
||||
if (cond == 0)
|
||||
{
|
||||
i = 40;
|
||||
}
|
||||
else
|
||||
{
|
||||
i = 60;
|
||||
}
|
||||
}
|
||||
|
||||
void out_test_1(thread const int& cond, thread int& i)
|
||||
{
|
||||
switch (cond)
|
||||
{
|
||||
case 40:
|
||||
{
|
||||
i = 40;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
i = 70;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void inout_test_0(thread const int& cond, thread int& i)
|
||||
{
|
||||
if (cond == 0)
|
||||
{
|
||||
i = 40;
|
||||
}
|
||||
}
|
||||
|
||||
void inout_test_1(thread const int& cond, thread int& i)
|
||||
{
|
||||
switch (cond)
|
||||
{
|
||||
case 40:
|
||||
{
|
||||
i = 40;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
kernel void main0()
|
||||
{
|
||||
int cond = 40;
|
||||
int i = 50;
|
||||
int param = cond;
|
||||
int param_1 = i;
|
||||
out_test_0(param, param_1);
|
||||
i = param_1;
|
||||
int param_2 = cond;
|
||||
int param_3 = i;
|
||||
out_test_1(param_2, param_3);
|
||||
i = param_3;
|
||||
int param_4 = cond;
|
||||
int param_5 = i;
|
||||
inout_test_0(param_4, param_5);
|
||||
i = param_5;
|
||||
int param_6 = cond;
|
||||
int param_7 = i;
|
||||
inout_test_1(param_6, param_7);
|
||||
i = param_7;
|
||||
}
|
||||
|
15
reference/shaders-msl/comp/coherent-block.comp
Normal file
15
reference/shaders-msl/comp/coherent-block.comp
Normal file
@ -0,0 +1,15 @@
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
struct SSBO
|
||||
{
|
||||
float4 value;
|
||||
};
|
||||
|
||||
kernel void main0(device SSBO& _10 [[buffer(0)]])
|
||||
{
|
||||
_10.value = float4(20.0);
|
||||
}
|
||||
|
15
reference/shaders-msl/comp/coherent-image.comp
Normal file
15
reference/shaders-msl/comp/coherent-image.comp
Normal file
@ -0,0 +1,15 @@
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
struct SSBO
|
||||
{
|
||||
int4 value;
|
||||
};
|
||||
|
||||
kernel void main0(device SSBO& _10 [[buffer(0)]], texture2d<int> uImage [[texture(0)]])
|
||||
{
|
||||
_10.value = uImage.read(uint2(int2(10)));
|
||||
}
|
||||
|
36
reference/shaders-msl/comp/culling.comp
Normal file
36
reference/shaders-msl/comp/culling.comp
Normal file
@ -0,0 +1,36 @@
|
||||
#pragma clang diagnostic ignored "-Wunused-variable"
|
||||
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
#include <metal_atomic>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
constant uint3 gl_WorkGroupSize = uint3(4u, 1u, 1u);
|
||||
|
||||
struct SSBO
|
||||
{
|
||||
float in_data[1];
|
||||
};
|
||||
|
||||
struct SSBO2
|
||||
{
|
||||
float out_data[1];
|
||||
};
|
||||
|
||||
struct SSBO3
|
||||
{
|
||||
uint count;
|
||||
};
|
||||
|
||||
kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device SSBO& _22 [[buffer(0)]], device SSBO2& _38 [[buffer(1)]], device SSBO3& _41 [[buffer(2)]])
|
||||
{
|
||||
uint ident = gl_GlobalInvocationID.x;
|
||||
float idata = _22.in_data[ident];
|
||||
if (idata > 12.0)
|
||||
{
|
||||
uint _45 = atomic_fetch_add_explicit((volatile device atomic_uint*)&(_41.count), 1u, memory_order_relaxed);
|
||||
_38.out_data[_45] = idata;
|
||||
}
|
||||
}
|
||||
|
29
reference/shaders-msl/comp/dowhile.comp
Normal file
29
reference/shaders-msl/comp/dowhile.comp
Normal file
@ -0,0 +1,29 @@
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
struct SSBO
|
||||
{
|
||||
float4x4 mvp;
|
||||
float4 in_data[1];
|
||||
};
|
||||
|
||||
struct SSBO2
|
||||
{
|
||||
float4 out_data[1];
|
||||
};
|
||||
|
||||
kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device SSBO& _28 [[buffer(0)]], device SSBO2& _52 [[buffer(1)]])
|
||||
{
|
||||
uint ident = gl_GlobalInvocationID.x;
|
||||
int i = 0;
|
||||
float4 idat = _28.in_data[ident];
|
||||
do
|
||||
{
|
||||
idat = _28.mvp * idat;
|
||||
i++;
|
||||
} while (i < 16);
|
||||
_52.out_data[ident] = idat;
|
||||
}
|
||||
|
11
reference/shaders-msl/comp/image.comp
Normal file
11
reference/shaders-msl/comp/image.comp
Normal file
@ -0,0 +1,11 @@
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
kernel void main0(texture2d<float> uImageIn [[texture(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], texture2d<float, access::write> uImageOut [[texture(1)]])
|
||||
{
|
||||
float4 v = uImageIn.read(uint2((int2(gl_GlobalInvocationID.xy) + int2(uImageIn.get_width(), uImageIn.get_height()))));
|
||||
uImageOut.write(v, uint2(int2(gl_GlobalInvocationID.xy)));
|
||||
}
|
||||
|
21
reference/shaders-msl/comp/insert.comp
Normal file
21
reference/shaders-msl/comp/insert.comp
Normal file
@ -0,0 +1,21 @@
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
struct SSBO
|
||||
{
|
||||
float4 out_data[1];
|
||||
};
|
||||
|
||||
kernel void main0(device SSBO& _27 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
|
||||
{
|
||||
float4 v;
|
||||
v.x = 10.0;
|
||||
v.y = 30.0;
|
||||
v.z = 70.0;
|
||||
v.w = 90.0;
|
||||
_27.out_data[gl_GlobalInvocationID.x] = v;
|
||||
_27.out_data[gl_GlobalInvocationID.x].y = 20.0;
|
||||
}
|
||||
|
107
reference/shaders-msl/comp/loop.comp
Normal file
107
reference/shaders-msl/comp/loop.comp
Normal file
@ -0,0 +1,107 @@
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
struct SSBO
|
||||
{
|
||||
float4x4 mvp;
|
||||
float4 in_data[1];
|
||||
};
|
||||
|
||||
struct SSBO2
|
||||
{
|
||||
float4 out_data[1];
|
||||
};
|
||||
|
||||
kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device SSBO& _24 [[buffer(0)]], device SSBO2& _177 [[buffer(1)]])
|
||||
{
|
||||
uint ident = gl_GlobalInvocationID.x;
|
||||
float4 idat = _24.in_data[ident];
|
||||
int k = 0;
|
||||
uint i = 0u;
|
||||
if (idat.y == 20.0)
|
||||
{
|
||||
do
|
||||
{
|
||||
k *= 2;
|
||||
i++;
|
||||
} while (i < ident);
|
||||
}
|
||||
switch (k)
|
||||
{
|
||||
case 10:
|
||||
{
|
||||
for (;;)
|
||||
{
|
||||
i++;
|
||||
if (i > 10u)
|
||||
{
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
for (;;)
|
||||
{
|
||||
i += 2u;
|
||||
if (i > 20u)
|
||||
{
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
while (k < 10)
|
||||
{
|
||||
idat *= 2.0;
|
||||
k++;
|
||||
}
|
||||
for (uint i_1 = 0u; i_1 < 16u; i_1++, k++)
|
||||
{
|
||||
for (uint j = 0u; j < 30u; j++)
|
||||
{
|
||||
idat = _24.mvp * idat;
|
||||
}
|
||||
}
|
||||
k = 0;
|
||||
for (;;)
|
||||
{
|
||||
k++;
|
||||
if (k > 10)
|
||||
{
|
||||
k += 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
k += 3;
|
||||
continue;
|
||||
}
|
||||
k += 10;
|
||||
continue;
|
||||
}
|
||||
k = 0;
|
||||
do
|
||||
{
|
||||
k++;
|
||||
} while (k > 10);
|
||||
int l = 0;
|
||||
for (;;)
|
||||
{
|
||||
if (l == 5)
|
||||
{
|
||||
l++;
|
||||
continue;
|
||||
}
|
||||
idat += float4(1.0);
|
||||
l++;
|
||||
continue;
|
||||
}
|
||||
_177.out_data[ident] = idat;
|
||||
}
|
||||
|
16
reference/shaders-msl/comp/mat3.comp
Normal file
16
reference/shaders-msl/comp/mat3.comp
Normal file
@ -0,0 +1,16 @@
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
struct SSBO2
|
||||
{
|
||||
float3x3 out_data[1];
|
||||
};
|
||||
|
||||
kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device SSBO2& _22 [[buffer(0)]])
|
||||
{
|
||||
uint ident = gl_GlobalInvocationID.x;
|
||||
_22.out_data[ident] = float3x3(float3(10.0), float3(20.0), float3(40.0));
|
||||
}
|
||||
|
35
reference/shaders-msl/comp/mod.comp
Normal file
35
reference/shaders-msl/comp/mod.comp
Normal file
@ -0,0 +1,35 @@
|
||||
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
struct SSBO
|
||||
{
|
||||
float4 in_data[1];
|
||||
};
|
||||
|
||||
struct SSBO2
|
||||
{
|
||||
float4 out_data[1];
|
||||
};
|
||||
|
||||
// Implementation of the GLSL mod() function, which is slightly different than Metal fmod()
|
||||
template<typename Tx, typename Ty>
|
||||
Tx mod(Tx x, Ty y)
|
||||
{
|
||||
return x - y * floor(x / y);
|
||||
}
|
||||
|
||||
kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device SSBO& _23 [[buffer(0)]], device SSBO2& _33 [[buffer(1)]])
|
||||
{
|
||||
uint ident = gl_GlobalInvocationID.x;
|
||||
float4 v = mod(_23.in_data[ident], _33.out_data[ident]);
|
||||
_33.out_data[ident] = v;
|
||||
uint4 vu = as_type<uint4>(_23.in_data[ident]) % as_type<uint4>(_33.out_data[ident]);
|
||||
_33.out_data[ident] = as_type<float4>(vu);
|
||||
int4 vi = as_type<int4>(_23.in_data[ident]) % as_type<int4>(_33.out_data[ident]);
|
||||
_33.out_data[ident] = as_type<float4>(vi);
|
||||
}
|
||||
|
24
reference/shaders-msl/comp/modf.comp
Normal file
24
reference/shaders-msl/comp/modf.comp
Normal file
@ -0,0 +1,24 @@
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
struct SSBO
|
||||
{
|
||||
float4 in_data[1];
|
||||
};
|
||||
|
||||
struct SSBO2
|
||||
{
|
||||
float4 out_data[1];
|
||||
};
|
||||
|
||||
kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device SSBO& _23 [[buffer(0)]], device SSBO2& _35 [[buffer(1)]])
|
||||
{
|
||||
uint ident = gl_GlobalInvocationID.x;
|
||||
float4 i;
|
||||
float4 _31 = modf(_23.in_data[ident], i);
|
||||
float4 v = _31;
|
||||
_35.out_data[ident] = v;
|
||||
}
|
||||
|
36
reference/shaders-msl/comp/return.comp
Normal file
36
reference/shaders-msl/comp/return.comp
Normal file
@ -0,0 +1,36 @@
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
struct SSBO2
|
||||
{
|
||||
float4 out_data[1];
|
||||
};
|
||||
|
||||
kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device SSBO2& _27 [[buffer(0)]])
|
||||
{
|
||||
uint ident = gl_GlobalInvocationID.x;
|
||||
if (ident == 2u)
|
||||
{
|
||||
_27.out_data[ident] = float4(20.0);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (ident == 4u)
|
||||
{
|
||||
_27.out_data[ident] = float4(10.0);
|
||||
return;
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < 20; i++)
|
||||
{
|
||||
if (i == 10)
|
||||
{
|
||||
break;
|
||||
}
|
||||
return;
|
||||
}
|
||||
_27.out_data[ident] = float4(10.0);
|
||||
}
|
||||
|
29
reference/shaders-msl/comp/rmw-opt.comp
Normal file
29
reference/shaders-msl/comp/rmw-opt.comp
Normal file
@ -0,0 +1,29 @@
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
struct SSBO
|
||||
{
|
||||
int a;
|
||||
};
|
||||
|
||||
kernel void main0(device SSBO& _9 [[buffer(0)]])
|
||||
{
|
||||
_9.a += 10;
|
||||
_9.a -= 10;
|
||||
_9.a *= 10;
|
||||
_9.a /= 10;
|
||||
_9.a = _9.a << 2;
|
||||
_9.a = _9.a >> 3;
|
||||
_9.a &= 40;
|
||||
_9.a ^= 10;
|
||||
_9.a %= 40;
|
||||
_9.a |= 1;
|
||||
bool c = false;
|
||||
bool d = true;
|
||||
c = c && d;
|
||||
d = d || c;
|
||||
_9.a = int(c && d);
|
||||
}
|
||||
|
27
reference/shaders-msl/comp/shared.comp
Normal file
27
reference/shaders-msl/comp/shared.comp
Normal file
@ -0,0 +1,27 @@
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
constant uint3 gl_WorkGroupSize = uint3(4u, 1u, 1u);
|
||||
|
||||
struct SSBO
|
||||
{
|
||||
float in_data[1];
|
||||
};
|
||||
|
||||
struct SSBO2
|
||||
{
|
||||
float out_data[1];
|
||||
};
|
||||
|
||||
kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device SSBO& _22 [[buffer(0)]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]], device SSBO2& _44 [[buffer(1)]])
|
||||
{
|
||||
uint ident = gl_GlobalInvocationID.x;
|
||||
float idata = _22.in_data[ident];
|
||||
threadgroup float sShared[4];
|
||||
sShared[gl_LocalInvocationIndex] = idata;
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
_44.out_data[ident] = sShared[(4u - gl_LocalInvocationIndex) - 1u];
|
||||
}
|
||||
|
26
reference/shaders-msl/comp/struct-layout.comp
Normal file
26
reference/shaders-msl/comp/struct-layout.comp
Normal file
@ -0,0 +1,26 @@
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
struct Foo
|
||||
{
|
||||
float4x4 m;
|
||||
};
|
||||
|
||||
struct SSBO2
|
||||
{
|
||||
Foo out_data[1];
|
||||
};
|
||||
|
||||
struct SSBO
|
||||
{
|
||||
Foo in_data[1];
|
||||
};
|
||||
|
||||
kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device SSBO2& _23 [[buffer(0)]], device SSBO& _30 [[buffer(1)]])
|
||||
{
|
||||
uint ident = gl_GlobalInvocationID.x;
|
||||
_23.out_data[ident].m = _30.in_data[ident].m * _30.in_data[ident].m;
|
||||
}
|
||||
|
100
reference/shaders-msl/comp/struct-packing.comp
Normal file
100
reference/shaders-msl/comp/struct-packing.comp
Normal file
@ -0,0 +1,100 @@
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
struct S0
|
||||
{
|
||||
float2 a[1];
|
||||
float b;
|
||||
};
|
||||
|
||||
struct S1
|
||||
{
|
||||
packed_float3 a;
|
||||
float b;
|
||||
};
|
||||
|
||||
struct S2
|
||||
{
|
||||
float3 a[1];
|
||||
float b;
|
||||
};
|
||||
|
||||
struct S3
|
||||
{
|
||||
float2 a;
|
||||
float b;
|
||||
};
|
||||
|
||||
struct S4
|
||||
{
|
||||
float2 c;
|
||||
};
|
||||
|
||||
struct Content
|
||||
{
|
||||
S0 m0s[1];
|
||||
S1 m1s[1];
|
||||
S2 m2s[1];
|
||||
S0 m0;
|
||||
S1 m1;
|
||||
S2 m2;
|
||||
S3 m3;
|
||||
char pad7[4];
|
||||
float m4;
|
||||
S4 m3s[8];
|
||||
};
|
||||
|
||||
struct SSBO1
|
||||
{
|
||||
Content content;
|
||||
Content content1[2];
|
||||
Content content2;
|
||||
char pad3[8];
|
||||
float2x2 m0;
|
||||
float2x2 m1;
|
||||
float2x3 m2[4];
|
||||
float3x2 m3;
|
||||
float2x2 m4;
|
||||
float2x2 m5[9];
|
||||
float2x3 m6[4][2];
|
||||
float3x2 m7;
|
||||
float array[1];
|
||||
};
|
||||
|
||||
struct SSBO0
|
||||
{
|
||||
Content content;
|
||||
Content content1[2];
|
||||
Content content2;
|
||||
float array[1];
|
||||
};
|
||||
|
||||
kernel void main0(device SSBO1& ssbo_430 [[buffer(0)]], device SSBO0& ssbo_140 [[buffer(1)]])
|
||||
{
|
||||
ssbo_430.content.m0s[0].a[0] = ssbo_140.content.m0s[0].a[0];
|
||||
ssbo_430.content.m0s[0].b = ssbo_140.content.m0s[0].b;
|
||||
ssbo_430.content.m1s[0].a = ssbo_140.content.m1s[0].a;
|
||||
ssbo_430.content.m1s[0].b = ssbo_140.content.m1s[0].b;
|
||||
ssbo_430.content.m2s[0].a[0] = ssbo_140.content.m2s[0].a[0];
|
||||
ssbo_430.content.m2s[0].b = ssbo_140.content.m2s[0].b;
|
||||
ssbo_430.content.m0.a[0] = ssbo_140.content.m0.a[0];
|
||||
ssbo_430.content.m0.b = ssbo_140.content.m0.b;
|
||||
ssbo_430.content.m1.a = ssbo_140.content.m1.a;
|
||||
ssbo_430.content.m1.b = ssbo_140.content.m1.b;
|
||||
ssbo_430.content.m2.a[0] = ssbo_140.content.m2.a[0];
|
||||
ssbo_430.content.m2.b = ssbo_140.content.m2.b;
|
||||
ssbo_430.content.m3.a = ssbo_140.content.m3.a;
|
||||
ssbo_430.content.m3.b = ssbo_140.content.m3.b;
|
||||
ssbo_430.content.m4 = ssbo_140.content.m4;
|
||||
ssbo_430.content.m3s[0].c = ssbo_140.content.m3s[0].c;
|
||||
ssbo_430.content.m3s[1].c = ssbo_140.content.m3s[1].c;
|
||||
ssbo_430.content.m3s[2].c = ssbo_140.content.m3s[2].c;
|
||||
ssbo_430.content.m3s[3].c = ssbo_140.content.m3s[3].c;
|
||||
ssbo_430.content.m3s[4].c = ssbo_140.content.m3s[4].c;
|
||||
ssbo_430.content.m3s[5].c = ssbo_140.content.m3s[5].c;
|
||||
ssbo_430.content.m3s[6].c = ssbo_140.content.m3s[6].c;
|
||||
ssbo_430.content.m3s[7].c = ssbo_140.content.m3s[7].c;
|
||||
}
|
||||
|
51
reference/shaders-msl/comp/torture-loop.comp
Normal file
51
reference/shaders-msl/comp/torture-loop.comp
Normal file
@ -0,0 +1,51 @@
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
struct SSBO
|
||||
{
|
||||
float4x4 mvp;
|
||||
float4 in_data[1];
|
||||
};
|
||||
|
||||
struct SSBO2
|
||||
{
|
||||
float4 out_data[1];
|
||||
};
|
||||
|
||||
kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device SSBO& _24 [[buffer(0)]], device SSBO2& _89 [[buffer(1)]])
|
||||
{
|
||||
uint ident = gl_GlobalInvocationID.x;
|
||||
float4 idat = _24.in_data[ident];
|
||||
int k = 0;
|
||||
for (;;)
|
||||
{
|
||||
int _39 = k;
|
||||
int _40 = _39 + 1;
|
||||
k = _40;
|
||||
if (_40 < 10)
|
||||
{
|
||||
idat *= 2.0;
|
||||
k++;
|
||||
continue;
|
||||
}
|
||||
else
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
for (uint i = 0u; i < 16u; i++, k++)
|
||||
{
|
||||
for (uint j = 0u; j < 30u; j++)
|
||||
{
|
||||
idat = _24.mvp * idat;
|
||||
}
|
||||
}
|
||||
do
|
||||
{
|
||||
k++;
|
||||
} while (k > 10);
|
||||
_89.out_data[ident] = idat;
|
||||
}
|
||||
|
53
reference/shaders-msl/comp/type-alias.comp
Normal file
53
reference/shaders-msl/comp/type-alias.comp
Normal file
@ -0,0 +1,53 @@
|
||||
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
struct S0
|
||||
{
|
||||
float4 a;
|
||||
};
|
||||
|
||||
struct S1
|
||||
{
|
||||
float4 a;
|
||||
};
|
||||
|
||||
struct SSBO0
|
||||
{
|
||||
S0 s0s[1];
|
||||
};
|
||||
|
||||
struct SSBO1
|
||||
{
|
||||
S1 s1s[1];
|
||||
};
|
||||
|
||||
struct SSBO2
|
||||
{
|
||||
float4 outputs[1];
|
||||
};
|
||||
|
||||
float4 overload(thread const S0& s0)
|
||||
{
|
||||
return s0.a;
|
||||
}
|
||||
|
||||
float4 overload(thread const S1& s1)
|
||||
{
|
||||
return s1.a;
|
||||
}
|
||||
|
||||
kernel void main0(device SSBO0& _36 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device SSBO1& _55 [[buffer(1)]], device SSBO2& _66 [[buffer(2)]])
|
||||
{
|
||||
S0 s0;
|
||||
s0.a = _36.s0s[gl_GlobalInvocationID.x].a;
|
||||
S1 s1;
|
||||
s1.a = _55.s1s[gl_GlobalInvocationID.x].a;
|
||||
S0 param = s0;
|
||||
S1 param_1 = s1;
|
||||
_66.outputs[gl_GlobalInvocationID.x] = overload(param) + overload(param_1);
|
||||
}
|
||||
|
20
reference/shaders-msl/comp/udiv.comp
Normal file
20
reference/shaders-msl/comp/udiv.comp
Normal file
@ -0,0 +1,20 @@
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
struct SSBO2
|
||||
{
|
||||
uint outputs[1];
|
||||
};
|
||||
|
||||
struct SSBO
|
||||
{
|
||||
uint inputs[1];
|
||||
};
|
||||
|
||||
kernel void main0(device SSBO2& _10 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device SSBO& _23 [[buffer(1)]])
|
||||
{
|
||||
_10.outputs[gl_GlobalInvocationID.x] = _23.inputs[gl_GlobalInvocationID.x] / 29u;
|
||||
}
|
||||
|
@ -5,7 +5,7 @@ using namespace metal;
|
||||
|
||||
struct Light
|
||||
{
|
||||
float3 Position;
|
||||
packed_float3 Position;
|
||||
float Radius;
|
||||
float4 Color;
|
||||
};
|
||||
|
@ -39,7 +39,7 @@ vertex main0_out main0(constant UBO& _22 [[buffer(0)]])
|
||||
out.oA = _22.A;
|
||||
out.oB = float4(_22.B0, _22.B1);
|
||||
out.oC = float4(_22.C0, _22.C1);
|
||||
out.oD = float4(float3(_22.D0), _22.D1);
|
||||
out.oD = float4(_22.D0, _22.D1);
|
||||
out.oE = float4(_22.E0, _22.E1, _22.E2, _22.E3);
|
||||
out.oF = float4(_22.F0, _22.F1, _22.F2);
|
||||
return out;
|
||||
|
@ -5,7 +5,7 @@ using namespace metal;
|
||||
|
||||
struct Light
|
||||
{
|
||||
float3 Position;
|
||||
packed_float3 Position;
|
||||
float Radius;
|
||||
float4 Color;
|
||||
};
|
||||
|
@ -5,7 +5,7 @@ using namespace metal;
|
||||
|
||||
struct Light
|
||||
{
|
||||
float3 Position;
|
||||
packed_float3 Position;
|
||||
float Radius;
|
||||
float4 Color;
|
||||
};
|
||||
|
@ -47,15 +47,15 @@ T degrees(T r)
|
||||
template<typename T>
|
||||
T findLSB(T x)
|
||||
{
|
||||
return select(ctz(x), -1, x == 0);
|
||||
return select(ctz(x), T(-1), x == T(0));
|
||||
}
|
||||
|
||||
// Implementation of the signed GLSL findMSB() function
|
||||
template<typename T>
|
||||
T findSMSB(T x)
|
||||
{
|
||||
T v = select(x, -1 - x, x < 0);
|
||||
return select(clz(0) - (clz(v) + 1), -1, v == 0);
|
||||
T v = select(x, T(-1) - x, x < T(0));
|
||||
return select(clz(T(0)) - (clz(v) + T(1)), T(-1), v == T(0));
|
||||
}
|
||||
|
||||
// Returns the determinant of a 2x2 matrix.
|
||||
|
@ -31,7 +31,7 @@ vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _18 [[buffer(0)]]
|
||||
main0_out out = {};
|
||||
out.gl_Position = _18.mvp * in.aVertex;
|
||||
out.vNormal = in.aNormal;
|
||||
out.vColor = float3(_18.color) * _18.opacity;
|
||||
out.vColor = _18.color * _18.opacity;
|
||||
out.vSize = _18.targSize * _18.opacity;
|
||||
return out;
|
||||
}
|
||||
|
79
shaders-msl/asm/comp/bitcast_iadd.asm.comp
Normal file
79
shaders-msl/asm/comp/bitcast_iadd.asm.comp
Normal file
@ -0,0 +1,79 @@
|
||||
; SPIR-V
|
||||
; Version: 1.0
|
||||
; Generator: Khronos Glslang Reference Front End; 1
|
||||
; Bound: 30
|
||||
; Schema: 0
|
||||
OpCapability Shader
|
||||
%1 = OpExtInstImport "GLSL.std.450"
|
||||
OpMemoryModel Logical GLSL450
|
||||
OpEntryPoint GLCompute %func "main"
|
||||
OpExecutionMode %func LocalSize 1 1 1
|
||||
OpSource ESSL 310
|
||||
OpSourceExtension "GL_GOOGLE_cpp_style_line_directive"
|
||||
OpSourceExtension "GL_GOOGLE_include_directive"
|
||||
OpMemberDecorate %input_struct 0 Offset 0
|
||||
OpMemberDecorate %input_struct 1 Offset 16
|
||||
OpMemberDecorate %output_struct 0 Offset 0
|
||||
OpMemberDecorate %output_struct 1 Offset 16
|
||||
OpDecorate %input_struct BufferBlock
|
||||
OpDecorate %inputs DescriptorSet 0
|
||||
OpDecorate %inputs Binding 0
|
||||
OpDecorate %inputs Restrict
|
||||
OpDecorate %output_struct BufferBlock
|
||||
OpDecorate %outputs DescriptorSet 0
|
||||
OpDecorate %outputs Binding 1
|
||||
OpDecorate %outputs Restrict
|
||||
|
||||
%void = OpTypeVoid
|
||||
%main_func = OpTypeFunction %void
|
||||
|
||||
%uint = OpTypeInt 32 0
|
||||
%uvec4 = OpTypeVector %uint 4
|
||||
|
||||
%int = OpTypeInt 32 1
|
||||
%ivec4 = OpTypeVector %int 4
|
||||
|
||||
%ivec4_ptr = OpTypePointer Uniform %ivec4
|
||||
%uvec4_ptr = OpTypePointer Uniform %uvec4
|
||||
|
||||
%zero = OpConstant %int 0
|
||||
%one = OpConstant %int 1
|
||||
|
||||
%input_struct = OpTypeStruct %ivec4 %uvec4
|
||||
%input_struct_ptr = OpTypePointer Uniform %input_struct
|
||||
%inputs = OpVariable %input_struct_ptr Uniform
|
||||
%output_struct = OpTypeStruct %uvec4 %ivec4
|
||||
%output_struct_ptr = OpTypePointer Uniform %output_struct
|
||||
%outputs = OpVariable %output_struct_ptr Uniform
|
||||
|
||||
%func = OpFunction %void None %main_func
|
||||
%block = OpLabel
|
||||
|
||||
%input1_ptr = OpAccessChain %ivec4_ptr %inputs %zero
|
||||
%input0_ptr = OpAccessChain %uvec4_ptr %inputs %one
|
||||
%input1 = OpLoad %ivec4 %input1_ptr
|
||||
%input0 = OpLoad %uvec4 %input0_ptr
|
||||
|
||||
%output_ptr_uvec4 = OpAccessChain %uvec4_ptr %outputs %zero
|
||||
%output_ptr_ivec4 = OpAccessChain %ivec4_ptr %outputs %one
|
||||
|
||||
; Test all variants of IAdd
|
||||
%result_iadd_0 = OpIAdd %uvec4 %input0 %input1
|
||||
%result_iadd_1 = OpIAdd %uvec4 %input1 %input0
|
||||
%result_iadd_2 = OpIAdd %uvec4 %input0 %input0
|
||||
%result_iadd_3 = OpIAdd %uvec4 %input1 %input1
|
||||
%result_iadd_4 = OpIAdd %ivec4 %input0 %input0
|
||||
%result_iadd_5 = OpIAdd %ivec4 %input1 %input1
|
||||
%result_iadd_6 = OpIAdd %ivec4 %input0 %input1
|
||||
%result_iadd_7 = OpIAdd %ivec4 %input1 %input0
|
||||
OpStore %output_ptr_uvec4 %result_iadd_0
|
||||
OpStore %output_ptr_uvec4 %result_iadd_1
|
||||
OpStore %output_ptr_uvec4 %result_iadd_2
|
||||
OpStore %output_ptr_uvec4 %result_iadd_3
|
||||
OpStore %output_ptr_ivec4 %result_iadd_4
|
||||
OpStore %output_ptr_ivec4 %result_iadd_5
|
||||
OpStore %output_ptr_ivec4 %result_iadd_6
|
||||
OpStore %output_ptr_ivec4 %result_iadd_7
|
||||
|
||||
OpReturn
|
||||
OpFunctionEnd
|
77
shaders-msl/asm/comp/bitcast_sar.asm.comp
Normal file
77
shaders-msl/asm/comp/bitcast_sar.asm.comp
Normal file
@ -0,0 +1,77 @@
|
||||
; SPIR-V
|
||||
; Version: 1.0
|
||||
; Generator: Khronos Glslang Reference Front End; 1
|
||||
; Bound: 30
|
||||
; Schema: 0
|
||||
OpCapability Shader
|
||||
%1 = OpExtInstImport "GLSL.std.450"
|
||||
OpMemoryModel Logical GLSL450
|
||||
OpEntryPoint GLCompute %func "main"
|
||||
OpExecutionMode %func LocalSize 1 1 1
|
||||
OpSource ESSL 310
|
||||
OpSourceExtension "GL_GOOGLE_cpp_style_line_directive"
|
||||
OpSourceExtension "GL_GOOGLE_include_directive"
|
||||
OpMemberDecorate %input_struct 0 Offset 0
|
||||
OpMemberDecorate %input_struct 1 Offset 16
|
||||
OpMemberDecorate %output_struct 0 Offset 0
|
||||
OpMemberDecorate %output_struct 1 Offset 16
|
||||
OpDecorate %input_struct BufferBlock
|
||||
OpDecorate %inputs DescriptorSet 0
|
||||
OpDecorate %inputs Binding 0
|
||||
OpDecorate %output_struct BufferBlock
|
||||
OpDecorate %outputs DescriptorSet 0
|
||||
OpDecorate %outputs Binding 1
|
||||
|
||||
%void = OpTypeVoid
|
||||
%main_func = OpTypeFunction %void
|
||||
|
||||
%uint = OpTypeInt 32 0
|
||||
%uvec4 = OpTypeVector %uint 4
|
||||
|
||||
%int = OpTypeInt 32 1
|
||||
%ivec4 = OpTypeVector %int 4
|
||||
|
||||
%ivec4_ptr = OpTypePointer Uniform %ivec4
|
||||
%uvec4_ptr = OpTypePointer Uniform %uvec4
|
||||
|
||||
%zero = OpConstant %int 0
|
||||
%one = OpConstant %int 1
|
||||
|
||||
%input_struct = OpTypeStruct %ivec4 %uvec4
|
||||
%input_struct_ptr = OpTypePointer Uniform %input_struct
|
||||
%inputs = OpVariable %input_struct_ptr Uniform
|
||||
%output_struct = OpTypeStruct %uvec4 %ivec4
|
||||
%output_struct_ptr = OpTypePointer Uniform %output_struct
|
||||
%outputs = OpVariable %output_struct_ptr Uniform
|
||||
|
||||
%func = OpFunction %void None %main_func
|
||||
%block = OpLabel
|
||||
|
||||
%input1_ptr = OpAccessChain %ivec4_ptr %inputs %zero
|
||||
%input0_ptr = OpAccessChain %uvec4_ptr %inputs %one
|
||||
%input1 = OpLoad %ivec4 %input1_ptr
|
||||
%input0 = OpLoad %uvec4 %input0_ptr
|
||||
|
||||
%output_ptr_uvec4 = OpAccessChain %uvec4_ptr %outputs %zero
|
||||
%output_ptr_ivec4 = OpAccessChain %ivec4_ptr %outputs %one
|
||||
|
||||
; Test all variants of ShiftRightArithmetic
|
||||
%result_iadd_0 = OpShiftRightArithmetic %uvec4 %input0 %input1
|
||||
%result_iadd_1 = OpShiftRightArithmetic %uvec4 %input1 %input0
|
||||
%result_iadd_2 = OpShiftRightArithmetic %uvec4 %input0 %input0
|
||||
%result_iadd_3 = OpShiftRightArithmetic %uvec4 %input1 %input1
|
||||
%result_iadd_4 = OpShiftRightArithmetic %ivec4 %input0 %input0
|
||||
%result_iadd_5 = OpShiftRightArithmetic %ivec4 %input1 %input1
|
||||
%result_iadd_6 = OpShiftRightArithmetic %ivec4 %input0 %input1
|
||||
%result_iadd_7 = OpShiftRightArithmetic %ivec4 %input1 %input0
|
||||
OpStore %output_ptr_uvec4 %result_iadd_0
|
||||
OpStore %output_ptr_uvec4 %result_iadd_1
|
||||
OpStore %output_ptr_uvec4 %result_iadd_2
|
||||
OpStore %output_ptr_uvec4 %result_iadd_3
|
||||
OpStore %output_ptr_ivec4 %result_iadd_4
|
||||
OpStore %output_ptr_ivec4 %result_iadd_5
|
||||
OpStore %output_ptr_ivec4 %result_iadd_6
|
||||
OpStore %output_ptr_ivec4 %result_iadd_7
|
||||
|
||||
OpReturn
|
||||
OpFunctionEnd
|
77
shaders-msl/asm/comp/bitcast_sdiv.asm.comp
Normal file
77
shaders-msl/asm/comp/bitcast_sdiv.asm.comp
Normal file
@ -0,0 +1,77 @@
|
||||
; SPIR-V
|
||||
; Version: 1.0
|
||||
; Generator: Khronos Glslang Reference Front End; 1
|
||||
; Bound: 30
|
||||
; Schema: 0
|
||||
OpCapability Shader
|
||||
%1 = OpExtInstImport "GLSL.std.450"
|
||||
OpMemoryModel Logical GLSL450
|
||||
OpEntryPoint GLCompute %func "main"
|
||||
OpExecutionMode %func LocalSize 1 1 1
|
||||
OpSource ESSL 310
|
||||
OpSourceExtension "GL_GOOGLE_cpp_style_line_directive"
|
||||
OpSourceExtension "GL_GOOGLE_include_directive"
|
||||
OpMemberDecorate %input_struct 0 Offset 0
|
||||
OpMemberDecorate %input_struct 1 Offset 16
|
||||
OpMemberDecorate %output_struct 0 Offset 0
|
||||
OpMemberDecorate %output_struct 1 Offset 16
|
||||
OpDecorate %input_struct BufferBlock
|
||||
OpDecorate %inputs DescriptorSet 0
|
||||
OpDecorate %inputs Binding 0
|
||||
OpDecorate %output_struct BufferBlock
|
||||
OpDecorate %outputs DescriptorSet 0
|
||||
OpDecorate %outputs Binding 1
|
||||
|
||||
%void = OpTypeVoid
|
||||
%main_func = OpTypeFunction %void
|
||||
|
||||
%uint = OpTypeInt 32 0
|
||||
%uvec4 = OpTypeVector %uint 4
|
||||
|
||||
%int = OpTypeInt 32 1
|
||||
%ivec4 = OpTypeVector %int 4
|
||||
|
||||
%ivec4_ptr = OpTypePointer Uniform %ivec4
|
||||
%uvec4_ptr = OpTypePointer Uniform %uvec4
|
||||
|
||||
%zero = OpConstant %int 0
|
||||
%one = OpConstant %int 1
|
||||
|
||||
%input_struct = OpTypeStruct %ivec4 %uvec4
|
||||
%input_struct_ptr = OpTypePointer Uniform %input_struct
|
||||
%inputs = OpVariable %input_struct_ptr Uniform
|
||||
%output_struct = OpTypeStruct %uvec4 %ivec4
|
||||
%output_struct_ptr = OpTypePointer Uniform %output_struct
|
||||
%outputs = OpVariable %output_struct_ptr Uniform
|
||||
|
||||
%func = OpFunction %void None %main_func
|
||||
%block = OpLabel
|
||||
|
||||
%input1_ptr = OpAccessChain %ivec4_ptr %inputs %zero
|
||||
%input0_ptr = OpAccessChain %uvec4_ptr %inputs %one
|
||||
%input1 = OpLoad %ivec4 %input1_ptr
|
||||
%input0 = OpLoad %uvec4 %input0_ptr
|
||||
|
||||
%output_ptr_uvec4 = OpAccessChain %uvec4_ptr %outputs %zero
|
||||
%output_ptr_ivec4 = OpAccessChain %ivec4_ptr %outputs %one
|
||||
|
||||
; Test all variants of SDiv
|
||||
%result_iadd_0 = OpSDiv %uvec4 %input0 %input1
|
||||
%result_iadd_1 = OpSDiv %uvec4 %input1 %input0
|
||||
%result_iadd_2 = OpSDiv %uvec4 %input0 %input0
|
||||
%result_iadd_3 = OpSDiv %uvec4 %input1 %input1
|
||||
%result_iadd_4 = OpSDiv %ivec4 %input0 %input0
|
||||
%result_iadd_5 = OpSDiv %ivec4 %input1 %input1
|
||||
%result_iadd_6 = OpSDiv %ivec4 %input0 %input1
|
||||
%result_iadd_7 = OpSDiv %ivec4 %input1 %input0
|
||||
OpStore %output_ptr_uvec4 %result_iadd_0
|
||||
OpStore %output_ptr_uvec4 %result_iadd_1
|
||||
OpStore %output_ptr_uvec4 %result_iadd_2
|
||||
OpStore %output_ptr_uvec4 %result_iadd_3
|
||||
OpStore %output_ptr_ivec4 %result_iadd_4
|
||||
OpStore %output_ptr_ivec4 %result_iadd_5
|
||||
OpStore %output_ptr_ivec4 %result_iadd_6
|
||||
OpStore %output_ptr_ivec4 %result_iadd_7
|
||||
|
||||
OpReturn
|
||||
OpFunctionEnd
|
77
shaders-msl/asm/comp/bitcast_slr.asm.comp
Normal file
77
shaders-msl/asm/comp/bitcast_slr.asm.comp
Normal file
@ -0,0 +1,77 @@
|
||||
; SPIR-V
|
||||
; Version: 1.0
|
||||
; Generator: Khronos Glslang Reference Front End; 1
|
||||
; Bound: 30
|
||||
; Schema: 0
|
||||
OpCapability Shader
|
||||
%1 = OpExtInstImport "GLSL.std.450"
|
||||
OpMemoryModel Logical GLSL450
|
||||
OpEntryPoint GLCompute %func "main"
|
||||
OpExecutionMode %func LocalSize 1 1 1
|
||||
OpSource ESSL 310
|
||||
OpSourceExtension "GL_GOOGLE_cpp_style_line_directive"
|
||||
OpSourceExtension "GL_GOOGLE_include_directive"
|
||||
OpMemberDecorate %input_struct 0 Offset 0
|
||||
OpMemberDecorate %input_struct 1 Offset 16
|
||||
OpMemberDecorate %output_struct 0 Offset 0
|
||||
OpMemberDecorate %output_struct 1 Offset 16
|
||||
OpDecorate %input_struct BufferBlock
|
||||
OpDecorate %inputs DescriptorSet 0
|
||||
OpDecorate %inputs Binding 0
|
||||
OpDecorate %output_struct BufferBlock
|
||||
OpDecorate %outputs DescriptorSet 0
|
||||
OpDecorate %outputs Binding 1
|
||||
|
||||
%void = OpTypeVoid
|
||||
%main_func = OpTypeFunction %void
|
||||
|
||||
%uint = OpTypeInt 32 0
|
||||
%uvec4 = OpTypeVector %uint 4
|
||||
|
||||
%int = OpTypeInt 32 1
|
||||
%ivec4 = OpTypeVector %int 4
|
||||
|
||||
%ivec4_ptr = OpTypePointer Uniform %ivec4
|
||||
%uvec4_ptr = OpTypePointer Uniform %uvec4
|
||||
|
||||
%zero = OpConstant %int 0
|
||||
%one = OpConstant %int 1
|
||||
|
||||
%input_struct = OpTypeStruct %ivec4 %uvec4
|
||||
%input_struct_ptr = OpTypePointer Uniform %input_struct
|
||||
%inputs = OpVariable %input_struct_ptr Uniform
|
||||
%output_struct = OpTypeStruct %uvec4 %ivec4
|
||||
%output_struct_ptr = OpTypePointer Uniform %output_struct
|
||||
%outputs = OpVariable %output_struct_ptr Uniform
|
||||
|
||||
%func = OpFunction %void None %main_func
|
||||
%block = OpLabel
|
||||
|
||||
%input1_ptr = OpAccessChain %ivec4_ptr %inputs %zero
|
||||
%input0_ptr = OpAccessChain %uvec4_ptr %inputs %one
|
||||
%input1 = OpLoad %ivec4 %input1_ptr
|
||||
%input0 = OpLoad %uvec4 %input0_ptr
|
||||
|
||||
%output_ptr_uvec4 = OpAccessChain %uvec4_ptr %outputs %zero
|
||||
%output_ptr_ivec4 = OpAccessChain %ivec4_ptr %outputs %one
|
||||
|
||||
; Test all variants of ShiftRightLogical
|
||||
%result_iadd_0 = OpShiftRightLogical %uvec4 %input0 %input1
|
||||
%result_iadd_1 = OpShiftRightLogical %uvec4 %input1 %input0
|
||||
%result_iadd_2 = OpShiftRightLogical %uvec4 %input0 %input0
|
||||
%result_iadd_3 = OpShiftRightLogical %uvec4 %input1 %input1
|
||||
%result_iadd_4 = OpShiftRightLogical %ivec4 %input0 %input0
|
||||
%result_iadd_5 = OpShiftRightLogical %ivec4 %input1 %input1
|
||||
%result_iadd_6 = OpShiftRightLogical %ivec4 %input0 %input1
|
||||
%result_iadd_7 = OpShiftRightLogical %ivec4 %input1 %input0
|
||||
OpStore %output_ptr_uvec4 %result_iadd_0
|
||||
OpStore %output_ptr_uvec4 %result_iadd_1
|
||||
OpStore %output_ptr_uvec4 %result_iadd_2
|
||||
OpStore %output_ptr_uvec4 %result_iadd_3
|
||||
OpStore %output_ptr_ivec4 %result_iadd_4
|
||||
OpStore %output_ptr_ivec4 %result_iadd_5
|
||||
OpStore %output_ptr_ivec4 %result_iadd_6
|
||||
OpStore %output_ptr_ivec4 %result_iadd_7
|
||||
|
||||
OpReturn
|
||||
OpFunctionEnd
|
97
shaders-msl/asm/comp/multiple-entry.asm.comp
Normal file
97
shaders-msl/asm/comp/multiple-entry.asm.comp
Normal file
@ -0,0 +1,97 @@
|
||||
; SPIR-V
|
||||
; Version: 1.0
|
||||
; Generator: Khronos Glslang Reference Front End; 1
|
||||
; Bound: 30
|
||||
; Schema: 0
|
||||
OpCapability Shader
|
||||
%1 = OpExtInstImport "GLSL.std.450"
|
||||
OpMemoryModel Logical GLSL450
|
||||
OpEntryPoint Fragment %func_alt "main2" %frag_in %frag_out
|
||||
OpEntryPoint GLCompute %func "main"
|
||||
OpExecutionMode %func LocalSize 1 1 1
|
||||
OpSource ESSL 310
|
||||
OpSourceExtension "GL_GOOGLE_cpp_style_line_directive"
|
||||
OpSourceExtension "GL_GOOGLE_include_directive"
|
||||
OpMemberDecorate %input_struct 0 Offset 0
|
||||
OpMemberDecorate %input_struct 1 Offset 16
|
||||
OpMemberDecorate %output_struct 0 Offset 0
|
||||
OpMemberDecorate %output_struct 1 Offset 16
|
||||
OpDecorate %input_struct BufferBlock
|
||||
OpDecorate %inputs DescriptorSet 0
|
||||
OpDecorate %inputs Binding 0
|
||||
OpDecorate %inputs Restrict
|
||||
OpDecorate %output_struct BufferBlock
|
||||
OpDecorate %outputs DescriptorSet 0
|
||||
OpDecorate %outputs Binding 1
|
||||
OpDecorate %outputs Restrict
|
||||
OpDecorate %frag_in Location 0
|
||||
OpDecorate %frag_out Location 0
|
||||
|
||||
%void = OpTypeVoid
|
||||
%main_func = OpTypeFunction %void
|
||||
|
||||
%uint = OpTypeInt 32 0
|
||||
%uvec4 = OpTypeVector %uint 4
|
||||
|
||||
%int = OpTypeInt 32 1
|
||||
%ivec4 = OpTypeVector %int 4
|
||||
|
||||
%ivec4_ptr = OpTypePointer Uniform %ivec4
|
||||
%uvec4_ptr = OpTypePointer Uniform %uvec4
|
||||
|
||||
%float = OpTypeFloat 32
|
||||
%vec4 = OpTypeVector %float 4
|
||||
%vec4_input_ptr = OpTypePointer Input %vec4
|
||||
%vec4_output_ptr = OpTypePointer Output %vec4
|
||||
|
||||
%zero = OpConstant %int 0
|
||||
%one = OpConstant %int 1
|
||||
|
||||
%input_struct = OpTypeStruct %ivec4 %uvec4
|
||||
%input_struct_ptr = OpTypePointer Uniform %input_struct
|
||||
%inputs = OpVariable %input_struct_ptr Uniform
|
||||
%output_struct = OpTypeStruct %uvec4 %ivec4
|
||||
%output_struct_ptr = OpTypePointer Uniform %output_struct
|
||||
%outputs = OpVariable %output_struct_ptr Uniform
|
||||
|
||||
%frag_in = OpVariable %vec4_input_ptr Input
|
||||
%frag_out = OpVariable %vec4_output_ptr Output
|
||||
|
||||
%func = OpFunction %void None %main_func
|
||||
%block = OpLabel
|
||||
|
||||
%input1_ptr = OpAccessChain %ivec4_ptr %inputs %zero
|
||||
%input0_ptr = OpAccessChain %uvec4_ptr %inputs %one
|
||||
%input1 = OpLoad %ivec4 %input1_ptr
|
||||
%input0 = OpLoad %uvec4 %input0_ptr
|
||||
|
||||
%output_ptr_uvec4 = OpAccessChain %uvec4_ptr %outputs %zero
|
||||
%output_ptr_ivec4 = OpAccessChain %ivec4_ptr %outputs %one
|
||||
|
||||
; Test all variants of IAdd
|
||||
%result_iadd_0 = OpIAdd %uvec4 %input0 %input1
|
||||
%result_iadd_1 = OpIAdd %uvec4 %input1 %input0
|
||||
%result_iadd_2 = OpIAdd %uvec4 %input0 %input0
|
||||
%result_iadd_3 = OpIAdd %uvec4 %input1 %input1
|
||||
%result_iadd_4 = OpIAdd %ivec4 %input0 %input0
|
||||
%result_iadd_5 = OpIAdd %ivec4 %input1 %input1
|
||||
%result_iadd_6 = OpIAdd %ivec4 %input0 %input1
|
||||
%result_iadd_7 = OpIAdd %ivec4 %input1 %input0
|
||||
OpStore %output_ptr_uvec4 %result_iadd_0
|
||||
OpStore %output_ptr_uvec4 %result_iadd_1
|
||||
OpStore %output_ptr_uvec4 %result_iadd_2
|
||||
OpStore %output_ptr_uvec4 %result_iadd_3
|
||||
OpStore %output_ptr_ivec4 %result_iadd_4
|
||||
OpStore %output_ptr_ivec4 %result_iadd_5
|
||||
OpStore %output_ptr_ivec4 %result_iadd_6
|
||||
OpStore %output_ptr_ivec4 %result_iadd_7
|
||||
|
||||
OpReturn
|
||||
OpFunctionEnd
|
||||
|
||||
%func_alt = OpFunction %void None %main_func
|
||||
%block_alt = OpLabel
|
||||
%frag_input_value = OpLoad %vec4 %frag_in
|
||||
OpStore %frag_out %frag_input_value
|
||||
OpReturn
|
||||
OpFunctionEnd
|
67
shaders-msl/asm/comp/quantize.asm.comp
Normal file
67
shaders-msl/asm/comp/quantize.asm.comp
Normal file
@ -0,0 +1,67 @@
|
||||
; SPIR-V
|
||||
; Version: 1.0
|
||||
; Generator: Khronos Glslang Reference Front End; 1
|
||||
; Bound: 38
|
||||
; Schema: 0
|
||||
OpCapability Shader
|
||||
%1 = OpExtInstImport "GLSL.std.450"
|
||||
OpMemoryModel Logical GLSL450
|
||||
OpEntryPoint GLCompute %4 "main"
|
||||
OpExecutionMode %4 LocalSize 1 1 1
|
||||
OpSource ESSL 310
|
||||
OpName %4 "main"
|
||||
OpName %10 "SSBO0"
|
||||
OpMemberName %10 0 "scalar"
|
||||
OpMemberName %10 1 "vec2_val"
|
||||
OpMemberName %10 2 "vec3_val"
|
||||
OpMemberName %10 3 "vec4_val"
|
||||
OpName %12 ""
|
||||
OpMemberDecorate %10 0 Offset 0
|
||||
OpMemberDecorate %10 1 Offset 8
|
||||
OpMemberDecorate %10 2 Offset 16
|
||||
OpMemberDecorate %10 3 Offset 32
|
||||
OpDecorate %10 BufferBlock
|
||||
OpDecorate %12 DescriptorSet 0
|
||||
OpDecorate %12 Binding 0
|
||||
%2 = OpTypeVoid
|
||||
%3 = OpTypeFunction %2
|
||||
%6 = OpTypeFloat 32
|
||||
%7 = OpTypeVector %6 2
|
||||
%8 = OpTypeVector %6 3
|
||||
%9 = OpTypeVector %6 4
|
||||
%10 = OpTypeStruct %6 %7 %8 %9
|
||||
%11 = OpTypePointer Uniform %10
|
||||
%12 = OpVariable %11 Uniform
|
||||
%13 = OpTypeInt 32 1
|
||||
%14 = OpConstant %13 0
|
||||
%15 = OpTypePointer Uniform %6
|
||||
%20 = OpConstant %13 1
|
||||
%21 = OpTypePointer Uniform %7
|
||||
%26 = OpConstant %13 2
|
||||
%27 = OpTypePointer Uniform %8
|
||||
%32 = OpConstant %13 3
|
||||
%33 = OpTypePointer Uniform %9
|
||||
%4 = OpFunction %2 None %3
|
||||
%5 = OpLabel
|
||||
%16 = OpAccessChain %15 %12 %14
|
||||
%17 = OpLoad %6 %16
|
||||
%18 = OpQuantizeToF16 %6 %17
|
||||
%19 = OpAccessChain %15 %12 %14
|
||||
OpStore %19 %18
|
||||
%22 = OpAccessChain %21 %12 %20
|
||||
%23 = OpLoad %7 %22
|
||||
%24 = OpQuantizeToF16 %7 %23
|
||||
%25 = OpAccessChain %21 %12 %20
|
||||
OpStore %25 %24
|
||||
%28 = OpAccessChain %27 %12 %26
|
||||
%29 = OpLoad %8 %28
|
||||
%30 = OpQuantizeToF16 %8 %29
|
||||
%31 = OpAccessChain %27 %12 %26
|
||||
OpStore %31 %30
|
||||
%34 = OpAccessChain %33 %12 %32
|
||||
%35 = OpLoad %9 %34
|
||||
%36 = OpQuantizeToF16 %9 %35
|
||||
%37 = OpAccessChain %33 %12 %32
|
||||
OpStore %37 %36
|
||||
OpReturn
|
||||
OpFunctionEnd
|
55
shaders-msl/comp/bake_gradient.comp
Normal file
55
shaders-msl/comp/bake_gradient.comp
Normal file
@ -0,0 +1,55 @@
|
||||
#version 310 es
|
||||
|
||||
layout(local_size_x = 8, local_size_y = 8) in;
|
||||
|
||||
layout(binding = 0) uniform sampler2D uHeight;
|
||||
layout(binding = 1) uniform sampler2D uDisplacement;
|
||||
layout(rgba16f, binding = 2) uniform writeonly mediump image2D iHeightDisplacement;
|
||||
layout(rgba16f, binding = 3) uniform writeonly mediump image2D iGradJacobian;
|
||||
|
||||
layout(binding = 4) uniform UBO
|
||||
{
|
||||
vec4 uInvSize;
|
||||
vec4 uScale;
|
||||
};
|
||||
|
||||
mediump float jacobian(mediump vec2 dDdx, mediump vec2 dDdy)
|
||||
{
|
||||
return (1.0 + dDdx.x) * (1.0 + dDdy.y) - dDdx.y * dDdy.x;
|
||||
}
|
||||
#define LAMBDA 1.2
|
||||
|
||||
void main()
|
||||
{
|
||||
vec4 uv = (vec2(gl_GlobalInvocationID.xy) * uInvSize.xy).xyxy + 0.5 * uInvSize;
|
||||
|
||||
float h = textureLod(uHeight, uv.xy, 0.0).x;
|
||||
|
||||
// Compute the heightmap gradient by simple differentiation.
|
||||
float x0 = textureLodOffset(uHeight, uv.xy, 0.0, ivec2(-1, 0)).x;
|
||||
float x1 = textureLodOffset(uHeight, uv.xy, 0.0, ivec2(+1, 0)).x;
|
||||
float y0 = textureLodOffset(uHeight, uv.xy, 0.0, ivec2(0, -1)).x;
|
||||
float y1 = textureLodOffset(uHeight, uv.xy, 0.0, ivec2(0, +1)).x;
|
||||
vec2 grad = uScale.xy * 0.5 * vec2(x1 - x0, y1 - y0);
|
||||
|
||||
// Displacement map must be sampled with a different offset since it's a smaller texture.
|
||||
vec2 displacement = LAMBDA * textureLod(uDisplacement, uv.zw, 0.0).xy;
|
||||
|
||||
// Compute jacobian.
|
||||
vec2 dDdx = 0.5 * LAMBDA * (
|
||||
textureLodOffset(uDisplacement, uv.zw, 0.0, ivec2(+1, 0)).xy -
|
||||
textureLodOffset(uDisplacement, uv.zw, 0.0, ivec2(-1, 0)).xy);
|
||||
vec2 dDdy = 0.5 * LAMBDA * (
|
||||
textureLodOffset(uDisplacement, uv.zw, 0.0, ivec2(0, +1)).xy -
|
||||
textureLodOffset(uDisplacement, uv.zw, 0.0, ivec2(0, -1)).xy);
|
||||
float j = jacobian(dDdx * uScale.z, dDdy * uScale.z);
|
||||
|
||||
displacement = vec2(0.0);
|
||||
|
||||
// Read by vertex shader/tess shader.
|
||||
imageStore(iHeightDisplacement, ivec2(gl_GlobalInvocationID.xy), vec4(h, displacement, 0.0));
|
||||
|
||||
// Read by fragment shader.
|
||||
imageStore(iGradJacobian, ivec2(gl_GlobalInvocationID.xy), vec4(grad, j, 0.0));
|
||||
}
|
||||
|
28
shaders-msl/comp/basic.comp
Normal file
28
shaders-msl/comp/basic.comp
Normal file
@ -0,0 +1,28 @@
|
||||
#version 310 es
|
||||
layout(local_size_x = 1) in;
|
||||
|
||||
layout(std430, binding = 0) readonly buffer SSBO
|
||||
{
|
||||
vec4 in_data[];
|
||||
};
|
||||
|
||||
layout(std430, binding = 1) writeonly buffer SSBO2
|
||||
{
|
||||
vec4 out_data[];
|
||||
};
|
||||
|
||||
layout(std430, binding = 2) buffer SSBO3
|
||||
{
|
||||
uint counter;
|
||||
};
|
||||
|
||||
void main()
|
||||
{
|
||||
uint ident = gl_GlobalInvocationID.x;
|
||||
vec4 idata = in_data[ident];
|
||||
if (dot(idata, vec4(1.0, 5.0, 6.0, 2.0)) > 8.2)
|
||||
{
|
||||
out_data[atomicAdd(counter, 1u)] = idata;
|
||||
}
|
||||
}
|
||||
|
23
shaders-msl/comp/bitfield.comp
Normal file
23
shaders-msl/comp/bitfield.comp
Normal file
@ -0,0 +1,23 @@
|
||||
#version 310 es
|
||||
|
||||
void main()
|
||||
{
|
||||
int signed_value = 0;
|
||||
uint unsigned_value = 0u;
|
||||
|
||||
int s = bitfieldExtract(signed_value, 5, 20);
|
||||
uint u = bitfieldExtract(unsigned_value, 6, 21);
|
||||
s = bitfieldInsert(s, 40, 5, 4);
|
||||
u = bitfieldInsert(u, 60u, 5, 4);
|
||||
|
||||
u = bitfieldReverse(u);
|
||||
s = bitfieldReverse(s);
|
||||
|
||||
int v0 = bitCount(u);
|
||||
int v1 = bitCount(s);
|
||||
|
||||
int v2 = findMSB(u);
|
||||
int v3 = findMSB(s);
|
||||
int v4 = findLSB(u);
|
||||
int v5 = findLSB(s);
|
||||
}
|
54
shaders-msl/comp/cfg-preserve-parameter.comp
Normal file
54
shaders-msl/comp/cfg-preserve-parameter.comp
Normal file
@ -0,0 +1,54 @@
|
||||
#version 310 es
|
||||
|
||||
// We write in all paths (and no reads), so should just be out.
|
||||
void out_test_0(int cond, inout int i)
|
||||
{
|
||||
if (cond == 0)
|
||||
i = 40;
|
||||
else
|
||||
i = 60;
|
||||
}
|
||||
|
||||
// We write in all paths (and no reads), so should just be out.
|
||||
void out_test_1(int cond, inout int i)
|
||||
{
|
||||
switch (cond)
|
||||
{
|
||||
case 40:
|
||||
i = 40;
|
||||
break;
|
||||
|
||||
default:
|
||||
i = 70;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// We don't write in all paths, so should be inout.
|
||||
void inout_test_0(int cond, inout int i)
|
||||
{
|
||||
if (cond == 0)
|
||||
i = 40;
|
||||
}
|
||||
|
||||
void inout_test_1(int cond, inout int i)
|
||||
{
|
||||
switch (cond)
|
||||
{
|
||||
case 40:
|
||||
i = 40;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void main()
|
||||
{
|
||||
int cond = 40;
|
||||
int i = 50;
|
||||
|
||||
out_test_0(cond, i);
|
||||
out_test_1(cond, i);
|
||||
inout_test_0(cond, i);
|
||||
inout_test_1(cond, i);
|
||||
}
|
12
shaders-msl/comp/coherent-block.comp
Normal file
12
shaders-msl/comp/coherent-block.comp
Normal file
@ -0,0 +1,12 @@
|
||||
#version 310 es
|
||||
layout(local_size_x = 1) in;
|
||||
|
||||
layout(binding = 1) coherent restrict writeonly buffer SSBO
|
||||
{
|
||||
vec4 value;
|
||||
};
|
||||
|
||||
void main()
|
||||
{
|
||||
value = vec4(20.0);
|
||||
}
|
14
shaders-msl/comp/coherent-image.comp
Normal file
14
shaders-msl/comp/coherent-image.comp
Normal file
@ -0,0 +1,14 @@
|
||||
#version 310 es
|
||||
layout(local_size_x = 1) in;
|
||||
|
||||
layout(binding = 1) coherent restrict writeonly buffer SSBO
|
||||
{
|
||||
ivec4 value;
|
||||
};
|
||||
|
||||
layout(r32i, binding = 3) coherent readonly restrict uniform mediump iimage2D uImage;
|
||||
|
||||
void main()
|
||||
{
|
||||
value = imageLoad(uImage, ivec2(10));
|
||||
}
|
26
shaders-msl/comp/culling.comp
Normal file
26
shaders-msl/comp/culling.comp
Normal file
@ -0,0 +1,26 @@
|
||||
#version 310 es
|
||||
layout(local_size_x = 4) in;
|
||||
|
||||
layout(std430, binding = 0) readonly buffer SSBO
|
||||
{
|
||||
float in_data[];
|
||||
};
|
||||
|
||||
layout(std430, binding = 1) writeonly buffer SSBO2
|
||||
{
|
||||
float out_data[];
|
||||
};
|
||||
|
||||
layout(std430, binding = 2) buffer SSBO3
|
||||
{
|
||||
uint count;
|
||||
};
|
||||
|
||||
void main()
|
||||
{
|
||||
uint ident = gl_GlobalInvocationID.x;
|
||||
float idata = in_data[ident];
|
||||
if (idata > 12.0)
|
||||
out_data[atomicAdd(count, 1u)] = idata;
|
||||
}
|
||||
|
31
shaders-msl/comp/dowhile.comp
Normal file
31
shaders-msl/comp/dowhile.comp
Normal file
@ -0,0 +1,31 @@
|
||||
#version 310 es
|
||||
layout(local_size_x = 1) in;
|
||||
|
||||
layout(std430, binding = 0) readonly buffer SSBO
|
||||
{
|
||||
mat4 mvp;
|
||||
vec4 in_data[];
|
||||
};
|
||||
|
||||
layout(std430, binding = 1) writeonly buffer SSBO2
|
||||
{
|
||||
vec4 out_data[];
|
||||
};
|
||||
|
||||
int i;
|
||||
|
||||
void main()
|
||||
{
|
||||
uint ident = gl_GlobalInvocationID.x;
|
||||
|
||||
i = 0;
|
||||
vec4 idat = in_data[ident];
|
||||
do
|
||||
{
|
||||
idat = mvp * idat;
|
||||
i++;
|
||||
} while(i < 16);
|
||||
|
||||
out_data[ident] = idat;
|
||||
}
|
||||
|
12
shaders-msl/comp/image.comp
Normal file
12
shaders-msl/comp/image.comp
Normal file
@ -0,0 +1,12 @@
|
||||
#version 310 es
|
||||
layout(local_size_x = 1) in;
|
||||
|
||||
layout(rgba8, binding = 0) uniform readonly mediump image2D uImageIn;
|
||||
layout(rgba8, binding = 1) uniform writeonly mediump image2D uImageOut;
|
||||
|
||||
void main()
|
||||
{
|
||||
vec4 v = imageLoad(uImageIn, ivec2(gl_GlobalInvocationID.xy) + imageSize(uImageIn));
|
||||
imageStore(uImageOut, ivec2(gl_GlobalInvocationID.xy), v);
|
||||
}
|
||||
|
18
shaders-msl/comp/insert.comp
Normal file
18
shaders-msl/comp/insert.comp
Normal file
@ -0,0 +1,18 @@
|
||||
#version 310 es
|
||||
layout(local_size_x = 1) in;
|
||||
|
||||
layout(std430, binding = 0) writeonly buffer SSBO
|
||||
{
|
||||
vec4 out_data[];
|
||||
};
|
||||
|
||||
void main()
|
||||
{
|
||||
vec4 v;
|
||||
v.x = 10.0;
|
||||
v.y = 30.0;
|
||||
v.z = 70.0;
|
||||
v.w = 90.0;
|
||||
out_data[gl_GlobalInvocationID.x] = v;
|
||||
out_data[gl_GlobalInvocationID.x].y = 20.0;
|
||||
}
|
98
shaders-msl/comp/loop.comp
Normal file
98
shaders-msl/comp/loop.comp
Normal file
@ -0,0 +1,98 @@
|
||||
#version 310 es
|
||||
layout(local_size_x = 1) in;
|
||||
|
||||
layout(std430, binding = 0) readonly buffer SSBO
|
||||
{
|
||||
mat4 mvp;
|
||||
vec4 in_data[];
|
||||
};
|
||||
|
||||
layout(std430, binding = 1) writeonly buffer SSBO2
|
||||
{
|
||||
vec4 out_data[];
|
||||
};
|
||||
|
||||
void main()
|
||||
{
|
||||
uint ident = gl_GlobalInvocationID.x;
|
||||
vec4 idat = in_data[ident];
|
||||
|
||||
int k = 0;
|
||||
uint i = 0u;
|
||||
|
||||
if (idat.y == 20.0)
|
||||
{
|
||||
do
|
||||
{
|
||||
k = k * 2;
|
||||
i++;
|
||||
} while (i < ident);
|
||||
}
|
||||
|
||||
switch (k)
|
||||
{
|
||||
case 10:
|
||||
for (;;)
|
||||
{
|
||||
i++;
|
||||
if (i > 10u)
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
for (;;)
|
||||
{
|
||||
i += 2u;
|
||||
if (i > 20u)
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
while (k < 10)
|
||||
{
|
||||
idat *= 2.0;
|
||||
k++;
|
||||
}
|
||||
|
||||
for (uint i = 0u; i < 16u; i++, k++)
|
||||
for (uint j = 0u; j < 30u; j++)
|
||||
idat = mvp * idat;
|
||||
|
||||
k = 0;
|
||||
for (;;)
|
||||
{
|
||||
k++;
|
||||
if (k > 10)
|
||||
{
|
||||
k += 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
k += 3;
|
||||
continue;
|
||||
}
|
||||
|
||||
k += 10;
|
||||
}
|
||||
|
||||
k = 0;
|
||||
do
|
||||
{
|
||||
k++;
|
||||
} while (k > 10);
|
||||
|
||||
int l = 0;
|
||||
for (;; l++)
|
||||
{
|
||||
if (l == 5)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
idat += 1.0;
|
||||
}
|
||||
out_data[ident] = idat;
|
||||
}
|
||||
|
14
shaders-msl/comp/mat3.comp
Normal file
14
shaders-msl/comp/mat3.comp
Normal file
@ -0,0 +1,14 @@
|
||||
#version 310 es
|
||||
layout(local_size_x = 1) in;
|
||||
|
||||
layout(std430, binding = 1) writeonly buffer SSBO2
|
||||
{
|
||||
mat3 out_data[];
|
||||
};
|
||||
|
||||
void main()
|
||||
{
|
||||
uint ident = gl_GlobalInvocationID.x;
|
||||
out_data[ident] = mat3(vec3(10.0), vec3(20.0), vec3(40.0));
|
||||
}
|
||||
|
26
shaders-msl/comp/mod.comp
Normal file
26
shaders-msl/comp/mod.comp
Normal file
@ -0,0 +1,26 @@
|
||||
#version 310 es
|
||||
layout(local_size_x = 1) in;
|
||||
|
||||
layout(std430, binding = 0) readonly buffer SSBO
|
||||
{
|
||||
vec4 in_data[];
|
||||
};
|
||||
|
||||
layout(std430, binding = 1) writeonly buffer SSBO2
|
||||
{
|
||||
vec4 out_data[];
|
||||
};
|
||||
|
||||
void main()
|
||||
{
|
||||
uint ident = gl_GlobalInvocationID.x;
|
||||
vec4 v = mod(in_data[ident], out_data[ident]);
|
||||
out_data[ident] = v;
|
||||
|
||||
uvec4 vu = floatBitsToUint(in_data[ident]) % floatBitsToUint(out_data[ident]);
|
||||
out_data[ident] = uintBitsToFloat(vu);
|
||||
|
||||
ivec4 vi = floatBitsToInt(in_data[ident]) % floatBitsToInt(out_data[ident]);
|
||||
out_data[ident] = intBitsToFloat(vi);
|
||||
}
|
||||
|
23
shaders-msl/comp/modf.comp
Normal file
23
shaders-msl/comp/modf.comp
Normal file
@ -0,0 +1,23 @@
|
||||
#version 310 es
|
||||
layout(local_size_x = 1) in;
|
||||
|
||||
layout(std430, binding = 0) readonly buffer SSBO
|
||||
{
|
||||
vec4 in_data[];
|
||||
};
|
||||
|
||||
layout(std430, binding = 1) writeonly buffer SSBO2
|
||||
{
|
||||
vec4 out_data[];
|
||||
};
|
||||
|
||||
void main()
|
||||
{
|
||||
uint ident = gl_GlobalInvocationID.x;
|
||||
vec4 i;
|
||||
//vec4 v = frexp(in_data[ident], i);
|
||||
//out_data[ident] = ldexp(v, i);
|
||||
vec4 v = modf(in_data[ident], i);
|
||||
out_data[ident] = v;
|
||||
}
|
||||
|
33
shaders-msl/comp/return.comp
Normal file
33
shaders-msl/comp/return.comp
Normal file
@ -0,0 +1,33 @@
|
||||
#version 310 es
|
||||
layout(local_size_x = 1) in;
|
||||
|
||||
layout(std430, binding = 1) writeonly buffer SSBO2
|
||||
{
|
||||
vec4 out_data[];
|
||||
};
|
||||
|
||||
void main()
|
||||
{
|
||||
uint ident = gl_GlobalInvocationID.x;
|
||||
|
||||
if (ident == 2u)
|
||||
{
|
||||
out_data[ident] = vec4(20.0);
|
||||
}
|
||||
else if (ident == 4u)
|
||||
{
|
||||
out_data[ident] = vec4(10.0);
|
||||
return;
|
||||
}
|
||||
|
||||
for (int i = 0; i < 20; i++)
|
||||
{
|
||||
if (i == 10)
|
||||
break;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
out_data[ident] = vec4(10.0);
|
||||
}
|
||||
|
27
shaders-msl/comp/rmw-opt.comp
Normal file
27
shaders-msl/comp/rmw-opt.comp
Normal file
@ -0,0 +1,27 @@
|
||||
#version 310 es
|
||||
layout(local_size_x = 1) in;
|
||||
|
||||
layout(std430, binding = 0) buffer SSBO
|
||||
{
|
||||
int a;
|
||||
};
|
||||
|
||||
void main()
|
||||
{
|
||||
a += 10;
|
||||
a -= 10;
|
||||
a *= 10;
|
||||
a /= 10;
|
||||
a <<= 2;
|
||||
a >>= 3;
|
||||
a &= 40;
|
||||
a ^= 10;
|
||||
a %= 40;
|
||||
a |= 1;
|
||||
|
||||
bool c = false;
|
||||
bool d = true;
|
||||
c = c && d;
|
||||
d = d || c;
|
||||
a = c && d ? 1 : 0;
|
||||
}
|
27
shaders-msl/comp/shared.comp
Normal file
27
shaders-msl/comp/shared.comp
Normal file
@ -0,0 +1,27 @@
|
||||
#version 310 es
|
||||
layout(local_size_x = 4) in;
|
||||
|
||||
shared float sShared[gl_WorkGroupSize.x];
|
||||
|
||||
layout(std430, binding = 0) readonly buffer SSBO
|
||||
{
|
||||
float in_data[];
|
||||
};
|
||||
|
||||
layout(std430, binding = 1) writeonly buffer SSBO2
|
||||
{
|
||||
float out_data[];
|
||||
};
|
||||
|
||||
void main()
|
||||
{
|
||||
uint ident = gl_GlobalInvocationID.x;
|
||||
float idata = in_data[ident];
|
||||
|
||||
sShared[gl_LocalInvocationIndex] = idata;
|
||||
memoryBarrierShared();
|
||||
barrier();
|
||||
|
||||
out_data[ident] = sShared[gl_WorkGroupSize.x - gl_LocalInvocationIndex - 1u];
|
||||
}
|
||||
|
24
shaders-msl/comp/struct-layout.comp
Normal file
24
shaders-msl/comp/struct-layout.comp
Normal file
@ -0,0 +1,24 @@
|
||||
#version 310 es
|
||||
layout(local_size_x = 1) in;
|
||||
|
||||
struct Foo
|
||||
{
|
||||
mat4 m;
|
||||
};
|
||||
|
||||
layout(std430, binding = 0) readonly buffer SSBO
|
||||
{
|
||||
Foo in_data[];
|
||||
};
|
||||
|
||||
layout(std430, binding = 1) writeonly buffer SSBO2
|
||||
{
|
||||
Foo out_data[];
|
||||
};
|
||||
|
||||
void main()
|
||||
{
|
||||
uint ident = gl_GlobalInvocationID.x;
|
||||
out_data[ident].m = in_data[ident].m * in_data[ident].m;
|
||||
}
|
||||
|
76
shaders-msl/comp/struct-packing.comp
Normal file
76
shaders-msl/comp/struct-packing.comp
Normal file
@ -0,0 +1,76 @@
|
||||
#version 310 es
|
||||
layout(local_size_x = 1) in;
|
||||
|
||||
struct S0
|
||||
{
|
||||
vec2 a[1];
|
||||
float b;
|
||||
};
|
||||
|
||||
struct S1
|
||||
{
|
||||
vec3 a;
|
||||
float b;
|
||||
};
|
||||
|
||||
struct S2
|
||||
{
|
||||
vec3 a[1];
|
||||
float b;
|
||||
};
|
||||
|
||||
struct S3
|
||||
{
|
||||
vec2 a;
|
||||
float b;
|
||||
};
|
||||
|
||||
struct S4
|
||||
{
|
||||
vec2 c;
|
||||
};
|
||||
|
||||
struct Content
|
||||
{
|
||||
S0 m0s[1];
|
||||
S1 m1s[1];
|
||||
S2 m2s[1];
|
||||
S0 m0;
|
||||
S1 m1;
|
||||
S2 m2;
|
||||
S3 m3;
|
||||
float m4;
|
||||
|
||||
S4 m3s[8];
|
||||
};
|
||||
|
||||
layout(binding = 1, std430) buffer SSBO1
|
||||
{
|
||||
Content content;
|
||||
Content content1[2];
|
||||
Content content2;
|
||||
|
||||
layout(column_major) mat2 m0;
|
||||
layout(column_major) mat2 m1;
|
||||
layout(column_major) mat2x3 m2[4];
|
||||
layout(column_major) mat3x2 m3;
|
||||
layout(row_major) mat2 m4;
|
||||
layout(row_major) mat2 m5[9];
|
||||
layout(row_major) mat2x3 m6[4][2];
|
||||
layout(row_major) mat3x2 m7;
|
||||
float array[];
|
||||
} ssbo_430;
|
||||
|
||||
layout(binding = 0, std140) buffer SSBO0
|
||||
{
|
||||
Content content;
|
||||
Content content1[2];
|
||||
Content content2;
|
||||
float array[];
|
||||
} ssbo_140;
|
||||
|
||||
void main()
|
||||
{
|
||||
ssbo_430.content = ssbo_140.content;
|
||||
}
|
||||
|
40
shaders-msl/comp/torture-loop.comp
Normal file
40
shaders-msl/comp/torture-loop.comp
Normal file
@ -0,0 +1,40 @@
|
||||
#version 310 es
|
||||
layout(local_size_x = 1) in;
|
||||
|
||||
layout(std430, binding = 0) readonly buffer SSBO
|
||||
{
|
||||
mat4 mvp;
|
||||
vec4 in_data[];
|
||||
};
|
||||
|
||||
layout(std430, binding = 1) writeonly buffer SSBO2
|
||||
{
|
||||
vec4 out_data[];
|
||||
};
|
||||
|
||||
void main()
|
||||
{
|
||||
uint ident = gl_GlobalInvocationID.x;
|
||||
vec4 idat = in_data[ident];
|
||||
|
||||
int k = 0;
|
||||
|
||||
// Continue with side effects.
|
||||
while (++k < 10)
|
||||
{
|
||||
idat *= 2.0;
|
||||
k++;
|
||||
}
|
||||
|
||||
// Again used here ...
|
||||
for (uint i = 0u; i < 16u; i++, k++)
|
||||
for (uint j = 0u; j < 30u; j++)
|
||||
idat = mvp * idat;
|
||||
|
||||
do
|
||||
{
|
||||
k++;
|
||||
} while (k > 10);
|
||||
out_data[ident] = idat;
|
||||
}
|
||||
|
45
shaders-msl/comp/type-alias.comp
Normal file
45
shaders-msl/comp/type-alias.comp
Normal file
@ -0,0 +1,45 @@
|
||||
#version 310 es
|
||||
layout(local_size_x = 1) in;
|
||||
|
||||
struct S0
|
||||
{
|
||||
vec4 a;
|
||||
};
|
||||
|
||||
struct S1
|
||||
{
|
||||
vec4 a;
|
||||
};
|
||||
|
||||
vec4 overload(S0 s0)
|
||||
{
|
||||
return s0.a;
|
||||
}
|
||||
|
||||
vec4 overload(S1 s1)
|
||||
{
|
||||
return s1.a;
|
||||
}
|
||||
|
||||
layout(std430, binding = 0) buffer SSBO0
|
||||
{
|
||||
S0 s0s[];
|
||||
};
|
||||
|
||||
layout(std430, binding = 1) buffer SSBO1
|
||||
{
|
||||
S1 s1s[];
|
||||
};
|
||||
|
||||
layout(std430, binding = 2) buffer SSBO2
|
||||
{
|
||||
vec4 outputs[];
|
||||
};
|
||||
|
||||
|
||||
void main()
|
||||
{
|
||||
S0 s0 = s0s[gl_GlobalInvocationID.x];
|
||||
S1 s1 = s1s[gl_GlobalInvocationID.x];
|
||||
outputs[gl_GlobalInvocationID.x] = overload(s0) + overload(s1);
|
||||
}
|
17
shaders-msl/comp/udiv.comp
Normal file
17
shaders-msl/comp/udiv.comp
Normal file
@ -0,0 +1,17 @@
|
||||
#version 310 es
|
||||
layout(local_size_x = 1) in;
|
||||
|
||||
layout(std430, binding = 0) buffer SSBO
|
||||
{
|
||||
uint inputs[];
|
||||
};
|
||||
|
||||
layout(std430, binding = 0) buffer SSBO2
|
||||
{
|
||||
uint outputs[];
|
||||
};
|
||||
|
||||
void main()
|
||||
{
|
||||
outputs[gl_GlobalInvocationID.x] = inputs[gl_GlobalInvocationID.x] / 29u;
|
||||
}
|
@ -341,9 +341,10 @@ struct SPIRExtension : IVariant
|
||||
// so in order to avoid conflicts, we can't stick them in the ids array.
|
||||
struct SPIREntryPoint
|
||||
{
|
||||
SPIREntryPoint(uint32_t self_, spv::ExecutionModel execution_model, std::string entry_name)
|
||||
SPIREntryPoint(uint32_t self_, spv::ExecutionModel execution_model, const std::string &entry_name)
|
||||
: self(self_)
|
||||
, name(std::move(entry_name))
|
||||
, name(entry_name)
|
||||
, orig_name(entry_name)
|
||||
, model(execution_model)
|
||||
{
|
||||
}
|
||||
@ -351,6 +352,7 @@ struct SPIREntryPoint
|
||||
|
||||
uint32_t self = 0;
|
||||
std::string name;
|
||||
std::string orig_name;
|
||||
std::vector<uint32_t> interface_variables;
|
||||
|
||||
uint64_t flags = 0;
|
||||
@ -858,7 +860,9 @@ struct SPIRConstant : IVariant
|
||||
|
||||
uint32_t constant_type;
|
||||
ConstantMatrix m;
|
||||
bool specialization = false; // If the constant is a specialization constant (i.e. created with OpSpecConstant*).
|
||||
bool specialization = false; // If this constant is a specialization constant (i.e. created with OpSpecConstant*).
|
||||
bool is_used_as_array_length =
|
||||
false; // If this constant is used as an array length which creates specialization restrictions on some backends.
|
||||
|
||||
// For composites which are constant arrays, etc.
|
||||
std::vector<uint32_t> subconstants;
|
||||
@ -916,6 +920,10 @@ public:
|
||||
{
|
||||
return type;
|
||||
}
|
||||
uint32_t get_id() const
|
||||
{
|
||||
return holder ? holder->self : 0;
|
||||
}
|
||||
bool empty() const
|
||||
{
|
||||
return !holder;
|
||||
|
@ -171,9 +171,8 @@ void CompilerCPP::emit_resources()
|
||||
auto &type = get<SPIRType>(var.basetype);
|
||||
|
||||
if (var.storage != StorageClassFunction && type.pointer && type.storage == StorageClassUniform &&
|
||||
!is_hidden_variable(var) &&
|
||||
(meta[type.self].decoration.decoration_flags &
|
||||
((1ull << DecorationBlock) | (1ull << DecorationBufferBlock))))
|
||||
!is_hidden_variable(var) && (meta[type.self].decoration.decoration_flags &
|
||||
((1ull << DecorationBlock) | (1ull << DecorationBufferBlock))))
|
||||
{
|
||||
emit_buffer_block(var);
|
||||
}
|
||||
|
@ -168,7 +168,7 @@ bool Compiler::block_is_pure(const SPIRBlock &block)
|
||||
case OpMemoryBarrier:
|
||||
return false;
|
||||
|
||||
// OpExtInst is potentially impure depending on extension, but GLSL builtins are at least pure.
|
||||
// OpExtInst is potentially impure depending on extension, but GLSL builtins are at least pure.
|
||||
|
||||
default:
|
||||
break;
|
||||
@ -457,7 +457,7 @@ bool Compiler::is_hidden_variable(const SPIRVariable &var, bool include_builtins
|
||||
// Combined image samplers are always considered active as they are "magic" variables.
|
||||
if (find_if(begin(combined_image_samplers), end(combined_image_samplers), [&var](const CombinedImageSampler &samp) {
|
||||
return samp.combined_id == var.self;
|
||||
}) != end(combined_image_samplers))
|
||||
}) != end(combined_image_samplers))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
@ -1498,18 +1498,21 @@ void Compiler::parse(const Instruction &instruction)
|
||||
case OpTypeArray:
|
||||
{
|
||||
uint32_t id = ops[0];
|
||||
|
||||
auto &base = get<SPIRType>(ops[1]);
|
||||
auto &arraybase = set<SPIRType>(id);
|
||||
|
||||
arraybase = base;
|
||||
uint32_t tid = ops[1];
|
||||
auto &base = get<SPIRType>(tid);
|
||||
|
||||
auto *c = maybe_get<SPIRConstant>(ops[2]);
|
||||
arraybase = base;
|
||||
arraybase.parent_type = tid;
|
||||
|
||||
uint32_t cid = ops[2];
|
||||
mark_used_as_array_length(cid);
|
||||
auto *c = maybe_get<SPIRConstant>(cid);
|
||||
bool literal = c && !c->specialization;
|
||||
|
||||
arraybase.array_size_literal.push_back(literal);
|
||||
arraybase.array.push_back(literal ? c->scalar() : ops[2]);
|
||||
arraybase.parent_type = ops[1];
|
||||
arraybase.array.push_back(literal ? c->scalar() : cid);
|
||||
// Do NOT set arraybase.self!
|
||||
break;
|
||||
}
|
||||
@ -2523,7 +2526,7 @@ vector<string> Compiler::get_entry_points() const
|
||||
{
|
||||
vector<string> entries;
|
||||
for (auto &entry : entry_points)
|
||||
entries.push_back(entry.second.name);
|
||||
entries.push_back(entry.second.orig_name);
|
||||
return entries;
|
||||
}
|
||||
|
||||
@ -2536,8 +2539,9 @@ void Compiler::set_entry_point(const std::string &name)
|
||||
SPIREntryPoint &Compiler::get_entry_point(const std::string &name)
|
||||
{
|
||||
auto itr =
|
||||
find_if(begin(entry_points), end(entry_points),
|
||||
[&](const std::pair<uint32_t, SPIREntryPoint> &entry) -> bool { return entry.second.name == name; });
|
||||
find_if(begin(entry_points), end(entry_points), [&](const std::pair<uint32_t, SPIREntryPoint> &entry) -> bool {
|
||||
return entry.second.orig_name == name;
|
||||
});
|
||||
|
||||
if (itr == end(entry_points))
|
||||
SPIRV_CROSS_THROW("Entry point does not exist.");
|
||||
@ -2548,8 +2552,9 @@ SPIREntryPoint &Compiler::get_entry_point(const std::string &name)
|
||||
const SPIREntryPoint &Compiler::get_entry_point(const std::string &name) const
|
||||
{
|
||||
auto itr =
|
||||
find_if(begin(entry_points), end(entry_points),
|
||||
[&](const std::pair<uint32_t, SPIREntryPoint> &entry) -> bool { return entry.second.name == name; });
|
||||
find_if(begin(entry_points), end(entry_points), [&](const std::pair<uint32_t, SPIREntryPoint> &entry) -> bool {
|
||||
return entry.second.orig_name == name;
|
||||
});
|
||||
|
||||
if (itr == end(entry_points))
|
||||
SPIRV_CROSS_THROW("Entry point does not exist.");
|
||||
@ -2557,6 +2562,11 @@ const SPIREntryPoint &Compiler::get_entry_point(const std::string &name) const
|
||||
return itr->second;
|
||||
}
|
||||
|
||||
const string &Compiler::get_cleansed_entry_point_name(const std::string &name) const
|
||||
{
|
||||
return get_entry_point(name).name;
|
||||
}
|
||||
|
||||
const SPIREntryPoint &Compiler::get_entry_point() const
|
||||
{
|
||||
return entry_points.find(entry_point)->second;
|
||||
@ -2713,7 +2723,7 @@ void Compiler::CombinedImageSamplerHandler::register_combined_image_sampler(SPIR
|
||||
[¶m](const SPIRFunction::CombinedImageSamplerParameter &p) {
|
||||
return param.image_id == p.image_id && param.sampler_id == p.sampler_id &&
|
||||
param.global_image == p.global_image && param.global_sampler == p.global_sampler;
|
||||
});
|
||||
});
|
||||
|
||||
if (itr == end(caller.combined_parameters))
|
||||
{
|
||||
@ -2850,7 +2860,7 @@ bool Compiler::CombinedImageSamplerHandler::handle(Op opcode, const uint32_t *ar
|
||||
auto itr = find_if(begin(compiler.combined_image_samplers), end(compiler.combined_image_samplers),
|
||||
[image_id, sampler_id](const CombinedImageSampler &combined) {
|
||||
return combined.image_id == image_id && combined.sampler_id == sampler_id;
|
||||
});
|
||||
});
|
||||
|
||||
if (itr == end(compiler.combined_image_samplers))
|
||||
{
|
||||
@ -2926,6 +2936,31 @@ const SPIRConstant &Compiler::get_constant(uint32_t id) const
|
||||
return get<SPIRConstant>(id);
|
||||
}
|
||||
|
||||
// Recursively marks any constants referenced by the specified constant instruction as being used
|
||||
// as an array length. The id must be a constant instruction (SPIRConstant or SPIRConstantOp).
|
||||
void Compiler::mark_used_as_array_length(uint32_t id)
|
||||
{
|
||||
switch (ids[id].get_type())
|
||||
{
|
||||
case TypeConstant:
|
||||
get<SPIRConstant>(id).is_used_as_array_length = true;
|
||||
break;
|
||||
|
||||
case TypeConstantOp:
|
||||
{
|
||||
auto &cop = get<SPIRConstantOp>(id);
|
||||
for (uint32_t arg_id : cop.arguments)
|
||||
mark_used_as_array_length(arg_id);
|
||||
}
|
||||
|
||||
case TypeUndef:
|
||||
return;
|
||||
|
||||
default:
|
||||
SPIRV_CROSS_THROW("Array lengths must be a constant instruction (OpConstant.. or OpSpecConstant...).");
|
||||
}
|
||||
}
|
||||
|
||||
static bool exists_unaccessed_path_to_return(const CFG &cfg, uint32_t block, const unordered_set<uint32_t> &blocks)
|
||||
{
|
||||
// This block accesses the variable.
|
||||
@ -3167,8 +3202,8 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry)
|
||||
break;
|
||||
}
|
||||
|
||||
// Atomics shouldn't be able to access function-local variables.
|
||||
// Some GLSL builtins access a pointer.
|
||||
// Atomics shouldn't be able to access function-local variables.
|
||||
// Some GLSL builtins access a pointer.
|
||||
|
||||
default:
|
||||
break;
|
||||
|
@ -268,6 +268,16 @@ public:
|
||||
const SPIREntryPoint &get_entry_point(const std::string &name) const;
|
||||
SPIREntryPoint &get_entry_point(const std::string &name);
|
||||
|
||||
// Some shader languages restrict the names that can be given to entry points, and the
|
||||
// corresponding backend will automatically rename an entry point name, during the call
|
||||
// to compile() if it is illegal. For example, the common entry point name main() is
|
||||
// illegal in MSL, and is renamed to an alternate name by the MSL backend.
|
||||
// Given the original entry point name contained in the SPIR-V, this function returns
|
||||
// the name, as updated by the backend during the call to compile(). If the name is not
|
||||
// illegal, and has not been renamed, or if this function is called before compile(),
|
||||
// this function will simply return the same name.
|
||||
const std::string &get_cleansed_entry_point_name(const std::string &name) const;
|
||||
|
||||
// Query and modify OpExecutionMode.
|
||||
uint64_t get_execution_mode_mask() const;
|
||||
void unset_execution_mode(spv::ExecutionMode mode);
|
||||
@ -497,6 +507,7 @@ protected:
|
||||
bool expression_is_lvalue(uint32_t id) const;
|
||||
bool variable_storage_is_aliased(const SPIRVariable &var);
|
||||
SPIRVariable *maybe_get_backing_variable(uint32_t chain);
|
||||
void mark_used_as_array_length(uint32_t id);
|
||||
|
||||
void register_read(uint32_t expr, uint32_t chain, bool forwarded);
|
||||
void register_write(uint32_t chain);
|
||||
|
@ -2686,7 +2686,7 @@ string CompilerGLSL::declare_temporary(uint32_t result_type, uint32_t result_id)
|
||||
if (find_if(begin(header.declare_temporary), end(header.declare_temporary),
|
||||
[result_type, result_id](const pair<uint32_t, uint32_t> &tmp) {
|
||||
return tmp.first == result_type && tmp.second == result_id;
|
||||
}) == end(header.declare_temporary))
|
||||
}) == end(header.declare_temporary))
|
||||
{
|
||||
header.declare_temporary.emplace_back(result_type, result_id);
|
||||
force_recompile = true;
|
||||
@ -2913,9 +2913,8 @@ void CompilerGLSL::emit_quaternary_func_op(uint32_t result_type, uint32_t result
|
||||
uint32_t op2, uint32_t op3, const char *op)
|
||||
{
|
||||
bool forward = should_forward(op0) && should_forward(op1) && should_forward(op2) && should_forward(op3);
|
||||
emit_op(result_type, result_id,
|
||||
join(op, "(", to_expression(op0), ", ", to_expression(op1), ", ", to_expression(op2), ", ",
|
||||
to_expression(op3), ")"),
|
||||
emit_op(result_type, result_id, join(op, "(", to_expression(op0), ", ", to_expression(op1), ", ",
|
||||
to_expression(op2), ", ", to_expression(op3), ")"),
|
||||
forward);
|
||||
|
||||
inherit_expression_dependencies(result_id, op0);
|
||||
@ -3556,15 +3555,6 @@ string CompilerGLSL::to_function_args(uint32_t img, const SPIRType &imgtype, boo
|
||||
return farg_str;
|
||||
}
|
||||
|
||||
// Some languages may have additional standard library functions whose names conflict
|
||||
// with a function defined in the body of the shader. Subclasses can override to rename
|
||||
// the function name defined in the shader to avoid conflict with the language standard
|
||||
// functions (eg. MSL includes saturate()).
|
||||
string CompilerGLSL::clean_func_name(string func_name)
|
||||
{
|
||||
return func_name;
|
||||
}
|
||||
|
||||
void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, uint32_t)
|
||||
{
|
||||
GLSLstd450 op = static_cast<GLSLstd450>(eop);
|
||||
@ -4052,6 +4042,7 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
|
||||
|
||||
bool access_chain_is_arrayed = false;
|
||||
bool row_major_matrix_needs_conversion = is_non_native_row_major_matrix(base);
|
||||
bool vector_is_packed = false;
|
||||
bool pending_array_enclose = false;
|
||||
bool dimension_flatten = false;
|
||||
|
||||
@ -4148,12 +4139,7 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
|
||||
}
|
||||
}
|
||||
|
||||
if (member_is_packed_type(*type, index))
|
||||
{
|
||||
auto &membertype = get<SPIRType>(type->member_types[index]);
|
||||
expr = unpack_expression_type(expr, membertype);
|
||||
}
|
||||
|
||||
vector_is_packed = member_is_packed_type(*type, index);
|
||||
row_major_matrix_needs_conversion = member_is_non_native_row_major_matrix(*type, index);
|
||||
type = &get<SPIRType>(type->member_types[index]);
|
||||
}
|
||||
@ -4179,6 +4165,9 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
|
||||
// Vector -> Scalar
|
||||
else if (type->vecsize > 1)
|
||||
{
|
||||
if (vector_is_packed)
|
||||
expr = unpack_expression_type(expr, *type);
|
||||
|
||||
if (index_is_literal)
|
||||
{
|
||||
expr += ".";
|
||||
@ -5081,7 +5070,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
|
||||
|
||||
string funexpr;
|
||||
vector<string> arglist;
|
||||
funexpr += clean_func_name(to_name(func)) + "(";
|
||||
funexpr += to_name(func) + "(";
|
||||
for (uint32_t i = 0; i < length; i++)
|
||||
{
|
||||
// Do not pass in separate images or samplers if we're remapping
|
||||
@ -5893,8 +5882,8 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
|
||||
register_read(ops[1], ops[2], should_forward(ops[2]));
|
||||
break;
|
||||
|
||||
// OpAtomicStore unimplemented. Not sure what would use that.
|
||||
// OpAtomicLoad seems to only be relevant for atomic counters.
|
||||
// OpAtomicStore unimplemented. Not sure what would use that.
|
||||
// OpAtomicLoad seems to only be relevant for atomic counters.
|
||||
|
||||
case OpAtomicIIncrement:
|
||||
forced_temporaries.insert(ops[1]);
|
||||
@ -6453,7 +6442,7 @@ bool CompilerGLSL::member_is_non_native_row_major_matrix(const SPIRType &type, u
|
||||
|
||||
// Checks whether the member is in packed data type, that might need to be unpacked.
|
||||
// GLSL does not define packed data types, but certain subclasses do.
|
||||
bool CompilerGLSL::member_is_packed_type(const SPIRType &type, uint32_t index)
|
||||
bool CompilerGLSL::member_is_packed_type(const SPIRType &type, uint32_t index) const
|
||||
{
|
||||
return has_member_decoration(type.self, index, DecorationCPacked);
|
||||
}
|
||||
@ -7015,11 +7004,11 @@ void CompilerGLSL::emit_function_prototype(SPIRFunction &func, uint64_t return_f
|
||||
|
||||
if (func.self == entry_point)
|
||||
{
|
||||
decl += clean_func_name("main");
|
||||
decl += "main";
|
||||
processing_entry_point = true;
|
||||
}
|
||||
else
|
||||
decl += clean_func_name(to_name(func.self));
|
||||
decl += to_name(func.self);
|
||||
|
||||
decl += "(";
|
||||
vector<string> arglist;
|
||||
|
@ -211,7 +211,6 @@ protected:
|
||||
uint32_t grad_x, uint32_t grad_y, uint32_t lod, uint32_t coffset,
|
||||
uint32_t offset, uint32_t bias, uint32_t comp, uint32_t sample,
|
||||
bool *p_forward);
|
||||
virtual std::string clean_func_name(std::string func_name);
|
||||
virtual void emit_buffer_block(const SPIRVariable &type);
|
||||
virtual void emit_push_constant_block(const SPIRVariable &var);
|
||||
virtual void emit_uniform(const SPIRVariable &var);
|
||||
@ -283,7 +282,7 @@ protected:
|
||||
|
||||
bool is_non_native_row_major_matrix(uint32_t id);
|
||||
bool member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index);
|
||||
bool member_is_packed_type(const SPIRType &type, uint32_t index);
|
||||
bool member_is_packed_type(const SPIRType &type, uint32_t index) const;
|
||||
virtual std::string convert_row_major_matrix(std::string exp_str);
|
||||
|
||||
std::unordered_set<std::string> local_variable_names;
|
||||
@ -391,7 +390,7 @@ protected:
|
||||
std::string to_member_name(const SPIRType &type, uint32_t index);
|
||||
std::string type_to_glsl_constructor(const SPIRType &type);
|
||||
std::string argument_decl(const SPIRFunction::Parameter &arg);
|
||||
std::string to_qualifiers_glsl(uint32_t id);
|
||||
virtual std::string to_qualifiers_glsl(uint32_t id);
|
||||
const char *to_precision_qualifiers_glsl(uint32_t id);
|
||||
virtual const char *to_storage_qualifiers_glsl(const SPIRVariable &var);
|
||||
const char *flags_to_precision_qualifiers_glsl(const SPIRType &type, uint64_t flags);
|
||||
@ -423,7 +422,7 @@ protected:
|
||||
// and force recompile.
|
||||
bool check_atomic_image(uint32_t id);
|
||||
|
||||
void replace_illegal_names();
|
||||
virtual void replace_illegal_names();
|
||||
|
||||
void replace_fragment_output(SPIRVariable &var);
|
||||
void replace_fragment_outputs();
|
||||
|
@ -1052,10 +1052,7 @@ void CompilerHLSL::emit_resources()
|
||||
if (requires_op_fmod)
|
||||
{
|
||||
static const char *types[] = {
|
||||
"float",
|
||||
"float2",
|
||||
"float3",
|
||||
"float4",
|
||||
"float", "float2", "float3", "float4",
|
||||
};
|
||||
|
||||
for (auto &type : types)
|
||||
|
564
spirv_msl.cpp
564
spirv_msl.cpp
@ -18,7 +18,6 @@
|
||||
#include "GLSL.std.450.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <numeric>
|
||||
|
||||
using namespace spv;
|
||||
@ -31,9 +30,6 @@ CompilerMSL::CompilerMSL(vector<uint32_t> spirv_, vector<MSLVertexAttr> *p_vtx_a
|
||||
vector<MSLResourceBinding> *p_res_bindings)
|
||||
: CompilerGLSL(move(spirv_))
|
||||
{
|
||||
populate_func_name_overrides();
|
||||
populate_var_name_overrides();
|
||||
|
||||
if (p_vtx_attrs)
|
||||
for (auto &va : *p_vtx_attrs)
|
||||
vtx_attrs_by_location[va.location] = &va;
|
||||
@ -47,9 +43,6 @@ CompilerMSL::CompilerMSL(const uint32_t *ir, size_t word_count, MSLVertexAttr *p
|
||||
MSLResourceBinding *p_res_bindings, size_t res_bindings_count)
|
||||
: CompilerGLSL(ir, word_count)
|
||||
{
|
||||
populate_func_name_overrides();
|
||||
populate_var_name_overrides();
|
||||
|
||||
if (p_vtx_attrs)
|
||||
for (size_t i = 0; i < vtx_attrs_count; i++)
|
||||
vtx_attrs_by_location[p_vtx_attrs[i].location] = &p_vtx_attrs[i];
|
||||
@ -59,27 +52,12 @@ CompilerMSL::CompilerMSL(const uint32_t *ir, size_t word_count, MSLVertexAttr *p
|
||||
resource_bindings.push_back(&p_res_bindings[i]);
|
||||
}
|
||||
|
||||
// Populate the collection of function names that need to be overridden
|
||||
void CompilerMSL::populate_func_name_overrides()
|
||||
{
|
||||
func_name_overrides["main"] = "main0";
|
||||
func_name_overrides["saturate"] = "saturate0";
|
||||
}
|
||||
|
||||
void CompilerMSL::populate_var_name_overrides()
|
||||
{
|
||||
var_name_overrides["kernel"] = "kernel0";
|
||||
var_name_overrides["bias"] = "bias0";
|
||||
}
|
||||
|
||||
string CompilerMSL::compile()
|
||||
{
|
||||
// Force a classic "C" locale, reverts when function returns
|
||||
ClassicLocale classic_locale;
|
||||
|
||||
// Set main function name if it was explicitly set
|
||||
if (!options.entry_point_name.empty())
|
||||
set_name(entry_point, options.entry_point_name);
|
||||
replace_illegal_names();
|
||||
|
||||
non_stage_in_input_var_ids.clear();
|
||||
struct_member_padding.clear();
|
||||
@ -102,6 +80,14 @@ string CompilerMSL::compile()
|
||||
localize_global_variables();
|
||||
extract_global_variables_from_functions();
|
||||
|
||||
// Mark any non-stage-in structs to be tightly packed.
|
||||
mark_packable_structs();
|
||||
|
||||
// Metal does not allow dynamic array lengths.
|
||||
// Resolve any specialization constants that are used for array lengths.
|
||||
if (options.resolve_specialized_array_lengths)
|
||||
resolve_specialized_array_lengths();
|
||||
|
||||
// Do not deal with GLES-isms like precision, older extensions and such.
|
||||
CompilerGLSL::options.vulkan_semantics = true;
|
||||
CompilerGLSL::options.es = false;
|
||||
@ -114,6 +100,7 @@ string CompilerMSL::compile()
|
||||
backend.swizzle_is_function = false;
|
||||
backend.shared_is_implied = false;
|
||||
backend.native_row_major_matrix = false;
|
||||
backend.flexible_member_array_supported = false;
|
||||
|
||||
uint32_t pass_count = 0;
|
||||
do
|
||||
@ -184,7 +171,7 @@ void CompilerMSL::preprocess_op_codes()
|
||||
}
|
||||
}
|
||||
|
||||
// Move the Private global variables to the entry function.
|
||||
// Move the Private and Workgroup global variables to the entry function.
|
||||
// Non-constant variables cannot have global scope in Metal.
|
||||
void CompilerMSL::localize_global_variables()
|
||||
{
|
||||
@ -192,11 +179,12 @@ void CompilerMSL::localize_global_variables()
|
||||
auto iter = global_variables.begin();
|
||||
while (iter != global_variables.end())
|
||||
{
|
||||
uint32_t gv_id = *iter;
|
||||
auto &gbl_var = get<SPIRVariable>(gv_id);
|
||||
if (gbl_var.storage == StorageClassPrivate)
|
||||
uint32_t v_id = *iter;
|
||||
auto &var = get<SPIRVariable>(v_id);
|
||||
if (var.storage == StorageClassPrivate || var.storage == StorageClassWorkgroup)
|
||||
{
|
||||
entry_func.add_local_variable(gv_id);
|
||||
var.storage = StorageClassFunction;
|
||||
entry_func.add_local_variable(v_id);
|
||||
iter = global_variables.erase(iter);
|
||||
}
|
||||
else
|
||||
@ -204,6 +192,21 @@ void CompilerMSL::localize_global_variables()
|
||||
}
|
||||
}
|
||||
|
||||
// Metal does not allow dynamic array lengths.
|
||||
// Turn off specialization of any constants that are used for array lengths.
|
||||
void CompilerMSL::resolve_specialized_array_lengths()
|
||||
{
|
||||
for (auto &id : ids)
|
||||
{
|
||||
if (id.get_type() == TypeConstant)
|
||||
{
|
||||
auto &c = id.get<SPIRConstant>();
|
||||
if (c.is_used_as_array_length)
|
||||
c.specialization = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// For any global variable accessed directly by a function,
|
||||
// extract that variable and add it as an argument to that function.
|
||||
void CompilerMSL::extract_global_variables_from_functions()
|
||||
@ -313,6 +316,54 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std::
|
||||
}
|
||||
}
|
||||
|
||||
// For all variables that are some form of non-input-output interface block, mark that all the structs
|
||||
// that are recursively contained within the type referenced by that variable should be packed tightly.
|
||||
void CompilerMSL::mark_packable_structs()
|
||||
{
|
||||
for (auto &id : ids)
|
||||
{
|
||||
if (id.get_type() == TypeVariable)
|
||||
{
|
||||
auto &var = id.get<SPIRVariable>();
|
||||
if (var.storage != StorageClassFunction && !is_hidden_variable(var))
|
||||
{
|
||||
auto &type = get<SPIRType>(var.basetype);
|
||||
if (type.pointer &&
|
||||
(type.storage == StorageClassUniform || type.storage == StorageClassUniformConstant ||
|
||||
type.storage == StorageClassPushConstant || type.storage == StorageClassStorageBuffer) &&
|
||||
(has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock)))
|
||||
mark_as_packable(type);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If the specified type is a struct, it and any nested structs
|
||||
// are marked as packable with the DecorationCPacked decoration,
|
||||
void CompilerMSL::mark_as_packable(SPIRType &type)
|
||||
{
|
||||
// If this is not the base type (eg. it's a pointer or array), tunnel down
|
||||
if (type.parent_type)
|
||||
{
|
||||
mark_as_packable(get<SPIRType>(type.parent_type));
|
||||
return;
|
||||
}
|
||||
|
||||
if (type.basetype == SPIRType::Struct)
|
||||
{
|
||||
set_decoration(type.self, DecorationCPacked);
|
||||
|
||||
// Recurse
|
||||
size_t mbr_cnt = type.member_types.size();
|
||||
for (uint32_t mbr_idx = 0; mbr_idx < mbr_cnt; mbr_idx++)
|
||||
{
|
||||
uint32_t mbr_type_id = type.member_types[mbr_idx];
|
||||
auto &mbr_type = get<SPIRType>(mbr_type_id);
|
||||
mark_as_packable(mbr_type);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If a vertex attribute exists at the location, it is marked as being used by this shader
|
||||
void CompilerMSL::mark_location_as_used_by_shader(uint32_t location, StorageClass storage)
|
||||
{
|
||||
@ -668,13 +719,14 @@ void CompilerMSL::align_struct(SPIRType &ib_type)
|
||||
MemberSorter member_sorter(ib_type, meta[ib_type_id], MemberSorter::Offset);
|
||||
member_sorter.sort();
|
||||
|
||||
uint32_t curr_offset = 0;
|
||||
uint32_t curr_offset;
|
||||
uint32_t mbr_cnt = uint32_t(ib_type.member_types.size());
|
||||
|
||||
// Test the alignment of each member, and if a member should be closer to the previous
|
||||
// member than the default spacing expects, it is likely that the previous member is in
|
||||
// a packed format. If so, and the previous member is packable, pack it.
|
||||
// For example...this applies to any 3-element vector that is followed by a scalar.
|
||||
curr_offset = 0;
|
||||
for (uint32_t mbr_idx = 0; mbr_idx < mbr_cnt; mbr_idx++)
|
||||
{
|
||||
// Align current offset to the current member's default alignment.
|
||||
@ -697,6 +749,7 @@ void CompilerMSL::align_struct(SPIRType &ib_type)
|
||||
// Test the alignment of each member, and if a member is positioned farther than its
|
||||
// alignment and the end of the previous member, add a dummy padding member that will
|
||||
// be added before the current member when the delaration of this struct is emitted.
|
||||
curr_offset = 0;
|
||||
for (uint32_t mbr_idx = 0; mbr_idx < mbr_cnt; mbr_idx++)
|
||||
{
|
||||
// Align current offset to the current member's default alignment.
|
||||
@ -816,7 +869,7 @@ void CompilerMSL::emit_custom_functions()
|
||||
statement("template<typename T>");
|
||||
statement("T findLSB(T x)");
|
||||
begin_scope();
|
||||
statement("return select(ctz(x), -1, x == 0);");
|
||||
statement("return select(ctz(x), T(-1), x == T(0));");
|
||||
end_scope();
|
||||
statement("");
|
||||
break;
|
||||
@ -826,7 +879,7 @@ void CompilerMSL::emit_custom_functions()
|
||||
statement("template<typename T>");
|
||||
statement("T findUMSB(T x)");
|
||||
begin_scope();
|
||||
statement("return select(clz(0) - (clz(x) + 1), -1, x == 0);");
|
||||
statement("return select(clz(T(0)) - (clz(x) + T(1)), T(-1), x == T(0));");
|
||||
end_scope();
|
||||
statement("");
|
||||
break;
|
||||
@ -836,8 +889,19 @@ void CompilerMSL::emit_custom_functions()
|
||||
statement("template<typename T>");
|
||||
statement("T findSMSB(T x)");
|
||||
begin_scope();
|
||||
statement("T v = select(x, -1 - x, x < 0);");
|
||||
statement("return select(clz(0) - (clz(v) + 1), -1, v == 0);");
|
||||
statement("T v = select(x, T(-1) - x, x < T(0));");
|
||||
statement("return select(clz(T(0)) - (clz(v) + T(1)), T(-1), v == T(0));");
|
||||
end_scope();
|
||||
statement("");
|
||||
break;
|
||||
|
||||
case SPVFuncImplArrayCopy:
|
||||
statement("// Implementation of an array copy function to cover GLSL's ability to copy an array via "
|
||||
"assignment. ");
|
||||
statement("template<typename T>");
|
||||
statement("void spvArrayCopy(thread T* dst, thread const T* src, uint count)");
|
||||
begin_scope();
|
||||
statement("for (uint i = 0; i < count; *dst++ = *src++, i++);");
|
||||
end_scope();
|
||||
statement("");
|
||||
break;
|
||||
@ -979,51 +1043,41 @@ void CompilerMSL::emit_custom_functions()
|
||||
|
||||
void CompilerMSL::emit_resources()
|
||||
{
|
||||
|
||||
// Output all basic struct types which are not Block or BufferBlock as these are declared inplace
|
||||
// when such variables are instantiated.
|
||||
// Output non-interface structs. These include local function structs
|
||||
// and structs nested within uniform and read-write buffers.
|
||||
unordered_set<uint32_t> declared_structs;
|
||||
for (auto &id : ids)
|
||||
{
|
||||
if (id.get_type() == TypeType)
|
||||
{
|
||||
auto &type = id.get<SPIRType>();
|
||||
if (type.basetype == SPIRType::Struct && type.array.empty() && !type.pointer &&
|
||||
!has_decoration(type.self, DecorationBlock) && !has_decoration(type.self, DecorationBufferBlock))
|
||||
uint32_t type_id = type.self;
|
||||
|
||||
bool is_struct = (type.basetype == SPIRType::Struct) && type.array.empty();
|
||||
bool is_block =
|
||||
has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock);
|
||||
bool is_basic_struct = is_struct && !type.pointer && !is_block;
|
||||
|
||||
bool is_interface = (type.storage == StorageClassInput || type.storage == StorageClassOutput ||
|
||||
type.storage == StorageClassUniformConstant);
|
||||
bool is_non_interface_block = is_struct && type.pointer && is_block && !is_interface;
|
||||
|
||||
bool is_declarable_struct = is_basic_struct || is_non_interface_block;
|
||||
|
||||
// Align and emit declarable structs...but avoid declaring each more than once.
|
||||
if (is_declarable_struct && declared_structs.count(type_id) == 0)
|
||||
{
|
||||
declared_structs.insert(type_id);
|
||||
|
||||
if (has_decoration(type_id, DecorationCPacked))
|
||||
align_struct(type);
|
||||
|
||||
emit_struct(type);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Output Uniform buffers and constants
|
||||
unordered_set<uint32_t> declared_interface_structs;
|
||||
for (auto &id : ids)
|
||||
{
|
||||
if (id.get_type() == TypeVariable)
|
||||
{
|
||||
auto &var = id.get<SPIRVariable>();
|
||||
auto &type = get<SPIRType>(var.basetype);
|
||||
|
||||
if (var.storage != StorageClassFunction && type.pointer &&
|
||||
(type.storage == StorageClassUniform || type.storage == StorageClassUniformConstant ||
|
||||
type.storage == StorageClassPushConstant || type.storage == StorageClassStorageBuffer) &&
|
||||
(has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock)) &&
|
||||
!is_hidden_variable(var))
|
||||
{
|
||||
// Avoid declaring the same struct multiple times.
|
||||
if (declared_interface_structs.count(type.self) == 0)
|
||||
{
|
||||
align_struct(type);
|
||||
emit_struct(type);
|
||||
declared_interface_structs.insert(type.self);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
declare_undefined_values();
|
||||
|
||||
// Output interface blocks.
|
||||
// Output interface structs.
|
||||
emit_interface_block(stage_in_var_id);
|
||||
for (auto &nsi_var : non_stage_in_input_var_ids)
|
||||
emit_interface_block(nsi_var.second);
|
||||
@ -1071,7 +1125,8 @@ void CompilerMSL::emit_specialization_constants()
|
||||
// the work group size at compile time in SPIR-V, and [[threads_per_threadgroup]] would need to be passed around as a global.
|
||||
// The work group size may be a specialization constant.
|
||||
if (workgroup_size_id)
|
||||
statement("constant uint3 gl_WorkGroupSize = ", constant_expression(get<SPIRConstant>(workgroup_size_id)), ";");
|
||||
statement("constant uint3 ", builtin_to_glsl(BuiltInWorkgroupSize, StorageClassWorkgroup), " = ",
|
||||
constant_expression(get<SPIRConstant>(workgroup_size_id)), ";");
|
||||
|
||||
if (!spec_consts.empty() || workgroup_size_id)
|
||||
statement("");
|
||||
@ -1090,7 +1145,6 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
|
||||
#define BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op)
|
||||
#define BFOP_CAST(op, type) \
|
||||
emit_binary_func_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode))
|
||||
#define BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op)
|
||||
#define UFOP(op) emit_unary_func_op(ops[0], ops[1], ops[2], #op)
|
||||
|
||||
auto ops = stream(instruction);
|
||||
@ -1434,74 +1488,185 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
|
||||
}
|
||||
|
||||
case OpStore:
|
||||
{
|
||||
if (!maybe_emit_input_struct_assignment(ops[0], ops[1]))
|
||||
CompilerGLSL::emit_instruction(instruction);
|
||||
if (maybe_emit_input_struct_assignment(ops[0], ops[1]))
|
||||
break;
|
||||
|
||||
if (maybe_emit_array_assignment(ops[0], ops[1]))
|
||||
break;
|
||||
|
||||
CompilerGLSL::emit_instruction(instruction);
|
||||
break;
|
||||
}
|
||||
|
||||
// OpOuterProduct
|
||||
// Compute barriers
|
||||
case OpMemoryBarrier:
|
||||
emit_barrier(0, ops[0], ops[1]);
|
||||
break;
|
||||
|
||||
case OpControlBarrier:
|
||||
// In GLSL a memory barrier is often followed by a control barrier.
|
||||
// But in MSL, memory barriers are also control barriers, so don't
|
||||
// emit a simple control barrier if a memory barrier has just been emitted.
|
||||
if (previous_instruction_opcode != OpMemoryBarrier)
|
||||
emit_barrier(ops[0], ops[1], ops[2]);
|
||||
break;
|
||||
|
||||
// OpOuterProduct
|
||||
|
||||
default:
|
||||
CompilerGLSL::emit_instruction(instruction);
|
||||
break;
|
||||
}
|
||||
|
||||
previous_instruction_opcode = opcode;
|
||||
}
|
||||
|
||||
// Since MSL does not allow structs to be nested within the stage_in struct, the original
|
||||
// input structs are flattened into a single stage_in struct by add_interface_block.
|
||||
// As a result, if the LHS and RHS represent an assignment of an entire input struct,
|
||||
// we must perform this member-by-member, mapping to the flattened stage_in struct.
|
||||
void CompilerMSL::emit_barrier(uint32_t id_exe_scope, uint32_t id_mem_scope, uint32_t id_mem_sem)
|
||||
{
|
||||
if (get_entry_point().model != ExecutionModelGLCompute)
|
||||
return;
|
||||
|
||||
string bar_stmt = "threadgroup_barrier(mem_flags::";
|
||||
|
||||
uint32_t mem_sem = id_mem_sem ? get<SPIRConstant>(id_mem_sem).scalar() : MemorySemanticsMaskNone;
|
||||
switch (mem_sem)
|
||||
{
|
||||
case MemorySemanticsCrossWorkgroupMemoryMask:
|
||||
bar_stmt += "mem_device";
|
||||
break;
|
||||
|
||||
case MemorySemanticsSubgroupMemoryMask:
|
||||
case MemorySemanticsWorkgroupMemoryMask:
|
||||
case MemorySemanticsAtomicCounterMemoryMask:
|
||||
bar_stmt += "mem_threadgroup";
|
||||
break;
|
||||
|
||||
case MemorySemanticsImageMemoryMask:
|
||||
bar_stmt += "mem_texture";
|
||||
break;
|
||||
|
||||
case MemorySemanticsAcquireMask:
|
||||
case MemorySemanticsReleaseMask:
|
||||
case MemorySemanticsAcquireReleaseMask:
|
||||
case MemorySemanticsSequentiallyConsistentMask:
|
||||
case MemorySemanticsUniformMemoryMask:
|
||||
case MemorySemanticsMaskNone:
|
||||
default:
|
||||
bar_stmt += "mem_none";
|
||||
break;
|
||||
}
|
||||
|
||||
if (options.supports_msl_version(2))
|
||||
{
|
||||
bar_stmt += ", ";
|
||||
|
||||
// Use the wider of the two scopes (smaller value)
|
||||
uint32_t exe_scope = id_exe_scope ? get<SPIRConstant>(id_exe_scope).scalar() : ScopeInvocation;
|
||||
uint32_t mem_scope = id_mem_scope ? get<SPIRConstant>(id_mem_scope).scalar() : ScopeInvocation;
|
||||
uint32_t scope = min(exe_scope, mem_scope);
|
||||
switch (scope)
|
||||
{
|
||||
case ScopeCrossDevice:
|
||||
case ScopeDevice:
|
||||
bar_stmt += "memory_scope_device";
|
||||
break;
|
||||
|
||||
case ScopeSubgroup:
|
||||
case ScopeInvocation:
|
||||
bar_stmt += "memory_scope_simdgroup";
|
||||
break;
|
||||
|
||||
case ScopeWorkgroup:
|
||||
default:
|
||||
bar_stmt += "memory_scope_threadgroup";
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
bar_stmt += ");";
|
||||
|
||||
statement(bar_stmt);
|
||||
}
|
||||
|
||||
// Since MSL does not allow structs to be nested within the stage_in struct, the original input
|
||||
// structs are flattened into a single stage_in struct by add_interface_block. As a result,
|
||||
// if the LHS and RHS represent an assignment of an entire input struct, we must perform this
|
||||
// member-by-member, mapping each RHS member to its name in the flattened stage_in struct.
|
||||
// Returns whether the struct assignment was emitted.
|
||||
bool CompilerMSL::maybe_emit_input_struct_assignment(uint32_t id_lhs, uint32_t id_rhs)
|
||||
{
|
||||
auto *p_v_lhs = maybe_get_backing_variable(id_lhs);
|
||||
// We only care about assignments of an entire struct
|
||||
uint32_t type_id = expression_type_id(id_rhs);
|
||||
auto &type = get<SPIRType>(type_id);
|
||||
if (type.basetype != SPIRType::Struct)
|
||||
return false;
|
||||
|
||||
// We only care about assignments from Input variables
|
||||
auto *p_v_rhs = maybe_get_backing_variable(id_rhs);
|
||||
if (!(p_v_rhs && p_v_rhs->storage == StorageClassInput))
|
||||
return false;
|
||||
|
||||
if (p_v_lhs && p_v_rhs && p_v_rhs->storage == StorageClassInput)
|
||||
// Get the ID of the type of the underlying RHS variable.
|
||||
// This will be an Input OpTypePointer containing the qualified member names.
|
||||
uint32_t tid_v_rhs = p_v_rhs->basetype;
|
||||
|
||||
// Ensure the LHS variable has been declared
|
||||
auto *p_v_lhs = maybe_get_backing_variable(id_lhs);
|
||||
if (p_v_lhs)
|
||||
flush_variable_declaration(p_v_lhs->self);
|
||||
|
||||
size_t mbr_cnt = type.member_types.size();
|
||||
for (uint32_t mbr_idx = 0; mbr_idx < mbr_cnt; mbr_idx++)
|
||||
{
|
||||
uint32_t tid_lhs = p_v_lhs->basetype;
|
||||
uint32_t tid_rhs = p_v_rhs->basetype;
|
||||
string expr;
|
||||
|
||||
auto &t_lhs = get<SPIRType>(tid_lhs);
|
||||
auto &t_rhs = get<SPIRType>(tid_rhs);
|
||||
//LHS
|
||||
expr += to_name(id_lhs);
|
||||
expr += ".";
|
||||
expr += to_member_name(type, mbr_idx);
|
||||
|
||||
if (t_lhs.basetype == SPIRType::Struct && t_rhs.basetype == SPIRType::Struct)
|
||||
expr += " = ";
|
||||
|
||||
//RHS
|
||||
string qual_mbr_name = get_member_qualified_name(tid_v_rhs, mbr_idx);
|
||||
if (qual_mbr_name.empty())
|
||||
{
|
||||
size_t mbr_cnt = t_rhs.member_types.size();
|
||||
assert(t_lhs.member_types.size() == mbr_cnt);
|
||||
|
||||
flush_variable_declaration(p_v_lhs->self);
|
||||
|
||||
for (uint32_t mbr_idx = 0; mbr_idx < mbr_cnt; mbr_idx++)
|
||||
{
|
||||
string expr;
|
||||
|
||||
//LHS
|
||||
expr += to_name(id_lhs);
|
||||
expr += ".";
|
||||
expr += to_member_name(t_lhs, mbr_idx);
|
||||
|
||||
expr += " = ";
|
||||
|
||||
//RHS
|
||||
string qual_mbr_name = get_member_qualified_name(tid_rhs, mbr_idx);
|
||||
if (qual_mbr_name.empty())
|
||||
{
|
||||
expr += to_name(id_rhs);
|
||||
expr += ".";
|
||||
expr += to_member_name(t_rhs, mbr_idx);
|
||||
}
|
||||
else
|
||||
expr += qual_mbr_name;
|
||||
|
||||
statement(expr, ";");
|
||||
}
|
||||
return true;
|
||||
expr += to_name(id_rhs);
|
||||
expr += ".";
|
||||
expr += to_member_name(type, mbr_idx);
|
||||
}
|
||||
else
|
||||
expr += qual_mbr_name;
|
||||
|
||||
statement(expr, ";");
|
||||
}
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Since MSL does not allow arrays to be copied via simple variable assignment,
|
||||
// if the LHS and RHS represent an assignment of an entire array, it must be
|
||||
// implemented by calling an array copy function.
|
||||
// Returns whether the struct assignment was emitted.
|
||||
bool CompilerMSL::maybe_emit_array_assignment(uint32_t id_lhs, uint32_t id_rhs)
|
||||
{
|
||||
// Assignment from an array initializer is fine.
|
||||
if (ids[id_rhs].get_type() == TypeConstant)
|
||||
return false;
|
||||
|
||||
// We only care about assignments of an entire array
|
||||
auto &type = expression_type(id_rhs);
|
||||
if (type.array.size() == 0)
|
||||
return false;
|
||||
|
||||
// Ensure the LHS variable has been declared
|
||||
auto *p_v_lhs = maybe_get_backing_variable(id_lhs);
|
||||
if (p_v_lhs)
|
||||
flush_variable_declaration(p_v_lhs->self);
|
||||
|
||||
statement("spvArrayCopy(", to_expression(id_lhs), ", ", to_expression(id_rhs), ", ", to_array_size(type, 0), ");");
|
||||
register_write(id_lhs);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Emits one of the atomic functions. In MSL, the atomic functions operate on pointers
|
||||
@ -1604,7 +1769,7 @@ void CompilerMSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
|
||||
emit_unary_func_op(result_type, id, args[0], "pack_float_to_unorm2x16");
|
||||
break;
|
||||
case GLSLstd450PackHalf2x16:
|
||||
emit_unary_func_op(result_type, id, args[0], "pack_half_to_snorm2x16");
|
||||
emit_unary_func_op(result_type, id, args[0], "unsupported_GLSLstd450PackHalf2x16"); // Currently unsupported
|
||||
break;
|
||||
|
||||
case GLSLstd450UnpackSnorm4x8:
|
||||
@ -1620,7 +1785,7 @@ void CompilerMSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
|
||||
emit_unary_func_op(result_type, id, args[0], "unpack_unorm2x16_to_float");
|
||||
break;
|
||||
case GLSLstd450UnpackHalf2x16:
|
||||
emit_unary_func_op(result_type, id, args[0], "unpack_snorm2x16_to_half");
|
||||
emit_unary_func_op(result_type, id, args[0], "unsupported_GLSLstd450UnpackHalf2x16"); // Currently unsupported
|
||||
break;
|
||||
|
||||
case GLSLstd450PackDouble2x32:
|
||||
@ -1650,10 +1815,10 @@ void CompilerMSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
|
||||
break;
|
||||
}
|
||||
|
||||
// TODO:
|
||||
// GLSLstd450InterpolateAtCentroid (centroid_no_perspective qualifier)
|
||||
// GLSLstd450InterpolateAtSample (sample_no_perspective qualifier)
|
||||
// GLSLstd450InterpolateAtOffset
|
||||
// TODO:
|
||||
// GLSLstd450InterpolateAtCentroid (centroid_no_perspective qualifier)
|
||||
// GLSLstd450InterpolateAtSample (sample_no_perspective qualifier)
|
||||
// GLSLstd450InterpolateAtOffset
|
||||
|
||||
default:
|
||||
CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
|
||||
@ -1686,7 +1851,7 @@ void CompilerMSL::emit_function_prototype(SPIRFunction &func, uint64_t)
|
||||
auto &type = get<SPIRType>(func.return_type);
|
||||
decl += func_type_decl(type);
|
||||
decl += " ";
|
||||
decl += clean_func_name(to_name(func.self));
|
||||
decl += to_name(func.self);
|
||||
|
||||
decl += "(";
|
||||
|
||||
@ -2290,14 +2455,7 @@ string CompilerMSL::func_type_decl(SPIRType &type)
|
||||
return entry_type + " " + return_type;
|
||||
}
|
||||
|
||||
// Ensures the function name is not "main", which is illegal in MSL
|
||||
string CompilerMSL::clean_func_name(string func_name)
|
||||
{
|
||||
auto iter = func_name_overrides.find(func_name);
|
||||
return (iter != func_name_overrides.end()) ? iter->second : func_name;
|
||||
}
|
||||
|
||||
// In MSL address space qualifiers are required for all pointer or reference arguments
|
||||
// In MSL, address space qualifiers are required for all pointer or reference arguments
|
||||
string CompilerMSL::get_argument_address_space(const SPIRVariable &argument)
|
||||
{
|
||||
const auto &type = get<SPIRType>(argument.basetype);
|
||||
@ -2477,20 +2635,33 @@ uint32_t CompilerMSL::get_metal_resource_index(SPIRVariable &var, SPIRType::Base
|
||||
// Returns the name of the entry point of this shader
|
||||
string CompilerMSL::get_entry_point_name()
|
||||
{
|
||||
return clean_func_name(to_name(entry_point));
|
||||
return to_name(entry_point);
|
||||
}
|
||||
|
||||
string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg)
|
||||
{
|
||||
auto &var = get<SPIRVariable>(arg.id);
|
||||
auto &type = expression_type(arg.id);
|
||||
bool constref = !arg.alias_global_variable && (!type.pointer || arg.write_count == 0);
|
||||
|
||||
// TODO: Check if this arg is an uniform pointer
|
||||
bool pointer = type.storage == StorageClassUniformConstant;
|
||||
|
||||
auto &var = get<SPIRVariable>(arg.id);
|
||||
return join(constref ? "const " : "", type_to_glsl(type), pointer ? " " : "& ", to_name(var.self),
|
||||
type_to_array_glsl(type));
|
||||
string decl;
|
||||
if (constref)
|
||||
decl += "const ";
|
||||
|
||||
decl += type_to_glsl(type);
|
||||
|
||||
if (is_array(type))
|
||||
decl += "*";
|
||||
else if (!pointer)
|
||||
decl += "&";
|
||||
|
||||
decl += " ";
|
||||
decl += to_name(var.self);
|
||||
|
||||
return decl;
|
||||
}
|
||||
|
||||
// If we're currently in the entry point function, and the object
|
||||
@ -2525,17 +2696,79 @@ string CompilerMSL::to_qualified_member_name(const SPIRType &type, uint32_t inde
|
||||
// if the first chars are _ and a digit, which indicate a transient name.
|
||||
string CompilerMSL::ensure_valid_name(string name, string pfx)
|
||||
{
|
||||
if (name.size() >= 2 && name[0] == '_' && isdigit(name[1]))
|
||||
return (name.size() >= 2 && name[0] == '_' && isdigit(name[1])) ? (pfx + name) : name;
|
||||
}
|
||||
|
||||
// Replace all names that match MSL keywords or Metal Standard Library functions.
|
||||
void CompilerMSL::replace_illegal_names()
|
||||
{
|
||||
static const unordered_set<string> keywords = {
|
||||
"kernel", "bias",
|
||||
};
|
||||
|
||||
static const unordered_set<string> illegal_func_names = {
|
||||
"main", "saturate",
|
||||
};
|
||||
|
||||
for (auto &id : ids)
|
||||
{
|
||||
return join(pfx, name);
|
||||
switch (id.get_type())
|
||||
{
|
||||
case TypeVariable:
|
||||
{
|
||||
auto &dec = meta[id.get_id()].decoration;
|
||||
if (keywords.find(dec.alias) != end(keywords))
|
||||
dec.alias += "0";
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case TypeFunction:
|
||||
{
|
||||
auto &dec = meta[id.get_id()].decoration;
|
||||
if (illegal_func_names.find(dec.alias) != end(illegal_func_names))
|
||||
dec.alias += "0";
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case TypeType:
|
||||
{
|
||||
for (auto &mbr_dec : meta[id.get_id()].members)
|
||||
if (keywords.find(mbr_dec.alias) != end(keywords))
|
||||
mbr_dec.alias += "0";
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
|
||||
for (auto &entry : entry_points)
|
||||
{
|
||||
auto iter = var_name_overrides.find(name);
|
||||
return (iter != var_name_overrides.end()) ? iter->second : name;
|
||||
// Change both the entry point name and the alias, to keep them synced.
|
||||
string &ep_name = entry.second.name;
|
||||
if (illegal_func_names.find(ep_name) != end(illegal_func_names))
|
||||
{
|
||||
ep_name += "0";
|
||||
meta[entry.first].decoration.alias = ep_name;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
string CompilerMSL::to_qualifiers_glsl(uint32_t id)
|
||||
{
|
||||
string quals;
|
||||
|
||||
auto &type = expression_type(id);
|
||||
if (type.storage == StorageClassWorkgroup)
|
||||
quals += "threadgroup ";
|
||||
|
||||
return quals;
|
||||
}
|
||||
|
||||
// The optional id parameter indicates the object whose type we are trying
|
||||
// to find the description for. It is optional. Most type descriptions do not
|
||||
// depend on a specific object's use of that type.
|
||||
@ -2880,12 +3113,11 @@ string CompilerMSL::built_in_func_arg(BuiltIn builtin, bool prefix_comma)
|
||||
string bi_arg;
|
||||
if (prefix_comma)
|
||||
bi_arg += ", ";
|
||||
|
||||
bi_arg += builtin_type_decl(builtin);
|
||||
|
||||
assert(builtin == BuiltInVertexIndex || builtin == BuiltInInstanceIndex);
|
||||
bi_arg += " " + builtin_to_glsl(builtin, StorageClassInput);
|
||||
|
||||
bi_arg += " [[" + builtin_qualifier(builtin) + "]]";
|
||||
|
||||
return bi_arg;
|
||||
}
|
||||
|
||||
@ -2905,9 +3137,6 @@ size_t CompilerMSL::get_declared_struct_member_size(const SPIRType &struct_type,
|
||||
case SPIRType::Sampler:
|
||||
SPIRV_CROSS_THROW("Querying size of opaque object.");
|
||||
|
||||
case SPIRType::Struct:
|
||||
return get_declared_struct_size(type);
|
||||
|
||||
default:
|
||||
{
|
||||
size_t component_size = type.width / 8;
|
||||
@ -2915,8 +3144,12 @@ size_t CompilerMSL::get_declared_struct_member_size(const SPIRType &struct_type,
|
||||
unsigned columns = type.columns;
|
||||
|
||||
// For arrays, we can use ArrayStride to get an easy check.
|
||||
// Runtime arrays will have zero size so force to min of one.
|
||||
if (!type.array.empty())
|
||||
return type_struct_member_array_stride(struct_type, index) * type.array.back();
|
||||
return type_struct_member_array_stride(struct_type, index) * max(type.array.back(), 1U);
|
||||
|
||||
if (type.basetype == SPIRType::Struct)
|
||||
return get_declared_struct_size(type);
|
||||
|
||||
if (columns == 1) // An unpacked 3-element vector is the same size as a 4-element vector.
|
||||
{
|
||||
@ -2967,11 +3200,14 @@ size_t CompilerMSL::get_declared_struct_member_alignment(const SPIRType &struct_
|
||||
{
|
||||
// Alignment of packed type is the same as the underlying component size.
|
||||
// Alignment of unpacked type is the same as the type size (or one matrix column).
|
||||
auto dec_mask = get_member_decoration_mask(struct_type.self, index);
|
||||
if (dec_mask & (1ull << DecorationCPacked))
|
||||
if (member_is_packed_type(struct_type, index))
|
||||
return type.width / 8;
|
||||
else
|
||||
return get_declared_struct_member_size(struct_type, index) / type.columns;
|
||||
{
|
||||
// Divide by array size and colum count. Runtime arrays will have zero size so force to min of one.
|
||||
uint32_t array_size = type.array.empty() ? 1 : max(type.array.back(), 1U);
|
||||
return get_declared_struct_member_size(struct_type, index) / (type.columns * array_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -2981,7 +3217,7 @@ bool CompilerMSL::skip_argument(uint32_t) const
|
||||
return false;
|
||||
}
|
||||
|
||||
bool CompilerMSL::OpCodePreprocessor::handle(Op opcode, const uint32_t *args, uint32_t /*length*/)
|
||||
bool CompilerMSL::OpCodePreprocessor::handle(Op opcode, const uint32_t *args, uint32_t length)
|
||||
{
|
||||
// Since MSL exists in a single execution scope, function prototype declarations are not
|
||||
// needed, and clutter the output. If secondary functions are output (either as a SPIR-V
|
||||
@ -2989,12 +3225,11 @@ bool CompilerMSL::OpCodePreprocessor::handle(Op opcode, const uint32_t *args, ui
|
||||
// suppress_missing_prototypes to suppress compiler warnings of missing function prototypes.
|
||||
|
||||
// Mark if the input requires the implementation of an SPIR-V function that does not exist in Metal.
|
||||
SPVFuncImpl spv_func = compiler.get_spv_func_impl(opcode, args);
|
||||
SPVFuncImpl spv_func = get_spv_func_impl(opcode, args);
|
||||
if (spv_func != SPVFuncImplNone)
|
||||
{
|
||||
compiler.spv_function_implementations.insert(spv_func);
|
||||
suppress_missing_prototypes = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
switch (opcode)
|
||||
@ -3026,21 +3261,38 @@ bool CompilerMSL::OpCodePreprocessor::handle(Op opcode, const uint32_t *args, ui
|
||||
break;
|
||||
}
|
||||
|
||||
// Keep track of the instruction return types, mapped by ID
|
||||
if (length > 1)
|
||||
result_types[args[1]] = args[0];
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Returns an enumeration of a SPIR-V function that needs to be output for certain Op codes.
|
||||
CompilerMSL::SPVFuncImpl CompilerMSL::get_spv_func_impl(Op opcode, const uint32_t *args)
|
||||
CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op opcode, const uint32_t *args)
|
||||
{
|
||||
switch (opcode)
|
||||
{
|
||||
case OpFMod:
|
||||
return SPVFuncImplMod;
|
||||
|
||||
case OpStore:
|
||||
{
|
||||
// Get the result type of the RHS. Since this is run as a pre-processing stage,
|
||||
// we must extract the result type directly from the Instruction, rather than the ID.
|
||||
uint32_t id_rhs = args[1];
|
||||
uint32_t type_id_rhs = result_types[id_rhs];
|
||||
if ((compiler.ids[id_rhs].get_type() != TypeConstant) && type_id_rhs &&
|
||||
compiler.is_array(compiler.get<SPIRType>(type_id_rhs)))
|
||||
return SPVFuncImplArrayCopy;
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case OpExtInst:
|
||||
{
|
||||
uint32_t extension_set = args[2];
|
||||
if (get<SPIRExtension>(extension_set).ext == SPIRExtension::GLSL)
|
||||
if (compiler.get<SPIRExtension>(extension_set).ext == SPIRExtension::GLSL)
|
||||
{
|
||||
GLSLstd450 op_450 = static_cast<GLSLstd450>(args[3]);
|
||||
switch (op_450)
|
||||
@ -3057,7 +3309,7 @@ CompilerMSL::SPVFuncImpl CompilerMSL::get_spv_func_impl(Op opcode, const uint32_
|
||||
return SPVFuncImplFindUMsb;
|
||||
case GLSLstd450MatrixInverse:
|
||||
{
|
||||
auto &mat_type = get<SPIRType>(args[0]);
|
||||
auto &mat_type = compiler.get<SPIRType>(args[0]);
|
||||
switch (mat_type.columns)
|
||||
{
|
||||
case 2:
|
||||
|
@ -18,6 +18,7 @@
|
||||
#define SPIRV_CROSS_MSL_HPP
|
||||
|
||||
#include "spirv_glsl.hpp"
|
||||
#include <limits>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <unordered_map>
|
||||
@ -63,7 +64,7 @@ using MSLStructMemberKey = uint64_t;
|
||||
|
||||
// Special constant used in a MSLResourceBinding desc_set
|
||||
// element to indicate the bindings for the push constants.
|
||||
static const uint32_t kPushConstDescSet = UINT32_MAX;
|
||||
static const uint32_t kPushConstDescSet = std::numeric_limits<uint32_t>::max();
|
||||
|
||||
// Special constant used in a MSLResourceBinding binding
|
||||
// element to indicate the bindings for the push constants.
|
||||
@ -76,8 +77,24 @@ public:
|
||||
// Options for compiling to Metal Shading Language
|
||||
struct Options
|
||||
{
|
||||
uint32_t msl_version = make_msl_version(1, 2);
|
||||
bool enable_point_size_builtin = true;
|
||||
std::string entry_point_name;
|
||||
bool resolve_specialized_array_lengths = true;
|
||||
|
||||
void set_msl_version(uint32_t major, uint32_t minor = 0, uint32_t patch = 0)
|
||||
{
|
||||
msl_version = make_msl_version(major, minor, patch);
|
||||
}
|
||||
|
||||
bool supports_msl_version(uint32_t major, uint32_t minor = 0, uint32_t patch = 0)
|
||||
{
|
||||
return msl_version >= make_msl_version(major, minor, patch);
|
||||
}
|
||||
|
||||
static uint32_t make_msl_version(uint32_t major, uint32_t minor = 0, uint32_t patch = 0)
|
||||
{
|
||||
return (major * 10000) + (minor * 100) + patch;
|
||||
}
|
||||
};
|
||||
|
||||
const Options &get_options() const
|
||||
@ -101,6 +118,7 @@ public:
|
||||
SPVFuncImplFindILsb,
|
||||
SPVFuncImplFindSMsb,
|
||||
SPVFuncImplFindUMsb,
|
||||
SPVFuncImplArrayCopy,
|
||||
SPVFuncImplInverse2x2,
|
||||
SPVFuncImplInverse3x3,
|
||||
SPVFuncImplInverse4x4,
|
||||
@ -165,10 +183,15 @@ protected:
|
||||
std::string unpack_expression_type(std::string expr_str, const SPIRType &type) override;
|
||||
std::string bitcast_glsl_op(const SPIRType &result_type, const SPIRType &argument_type) override;
|
||||
bool skip_argument(uint32_t id) const override;
|
||||
std::string to_qualifiers_glsl(uint32_t id) override;
|
||||
void replace_illegal_names() override;
|
||||
|
||||
void preprocess_op_codes();
|
||||
void localize_global_variables();
|
||||
void extract_global_variables_from_functions();
|
||||
void resolve_specialized_array_lengths();
|
||||
void mark_packable_structs();
|
||||
void mark_as_packable(SPIRType &type);
|
||||
|
||||
std::unordered_map<uint32_t, std::set<uint32_t>> function_global_vars;
|
||||
void extract_global_variables_from_function(uint32_t func_id, std::set<uint32_t> &added_arg_ids,
|
||||
@ -181,12 +204,10 @@ protected:
|
||||
void emit_resources();
|
||||
void emit_specialization_constants();
|
||||
void emit_interface_block(uint32_t ib_var_id);
|
||||
void populate_func_name_overrides();
|
||||
void populate_var_name_overrides();
|
||||
bool maybe_emit_input_struct_assignment(uint32_t id_lhs, uint32_t id_rhs);
|
||||
bool maybe_emit_array_assignment(uint32_t id_lhs, uint32_t id_rhs);
|
||||
|
||||
std::string func_type_decl(SPIRType &type);
|
||||
std::string clean_func_name(std::string func_name) override;
|
||||
std::string entry_point_args(bool append_comma);
|
||||
std::string get_entry_point_name();
|
||||
std::string to_qualified_member_name(const SPIRType &type, uint32_t index);
|
||||
@ -210,17 +231,15 @@ protected:
|
||||
void align_struct(SPIRType &ib_type);
|
||||
bool is_member_packable(SPIRType &ib_type, uint32_t index);
|
||||
MSLStructMemberKey get_struct_member_key(uint32_t type_id, uint32_t index);
|
||||
SPVFuncImpl get_spv_func_impl(spv::Op opcode, const uint32_t *args);
|
||||
std::string get_argument_address_space(const SPIRVariable &argument);
|
||||
void emit_atomic_func_op(uint32_t result_type, uint32_t result_id, const char *op, uint32_t mem_order_1,
|
||||
uint32_t mem_order_2, bool has_mem_order_2, uint32_t op0, uint32_t op1 = 0,
|
||||
bool op1_is_pointer = false, uint32_t op2 = 0);
|
||||
const char *get_memory_order(uint32_t spv_mem_sem);
|
||||
void add_pragma_line(const std::string &line);
|
||||
void emit_barrier(uint32_t id_exe_scope, uint32_t id_mem_scope, uint32_t id_mem_sem);
|
||||
|
||||
Options options;
|
||||
std::unordered_map<std::string, std::string> func_name_overrides;
|
||||
std::unordered_map<std::string, std::string> var_name_overrides;
|
||||
std::set<SPVFuncImpl> spv_function_implementations;
|
||||
std::unordered_map<uint32_t, MSLVertexAttr *> vtx_attrs_by_location;
|
||||
std::map<uint32_t, uint32_t> non_stage_in_input_var_ids;
|
||||
@ -238,6 +257,7 @@ protected:
|
||||
std::string stage_out_var_name = "out";
|
||||
std::string stage_uniform_var_name = "uniforms";
|
||||
std::string sampler_name_suffix = "Smplr";
|
||||
spv::Op previous_instruction_opcode = spv::OpNop;
|
||||
|
||||
// OpcodeHandler that handles several MSL preprocessing operations.
|
||||
struct OpCodePreprocessor : OpcodeHandler
|
||||
@ -248,8 +268,10 @@ protected:
|
||||
}
|
||||
|
||||
bool handle(spv::Op opcode, const uint32_t *args, uint32_t length) override;
|
||||
CompilerMSL::SPVFuncImpl get_spv_func_impl(spv::Op opcode, const uint32_t *args);
|
||||
|
||||
CompilerMSL &compiler;
|
||||
std::unordered_map<uint32_t, uint32_t> result_types;
|
||||
bool suppress_missing_prototypes = false;
|
||||
bool uses_atomics = false;
|
||||
};
|
||||
|
Loading…
Reference in New Issue
Block a user