From 2583321657e9b54e68ecf6f7d348f51c6d6ca7c6 Mon Sep 17 00:00:00 2001 From: Chip Davis Date: Wed, 19 Sep 2018 20:36:33 -0500 Subject: [PATCH] MSL: Add an option to insert texture swizzles into generated shaders. It's intended to be used with MoltenVK to support arbitrary `VkComponentMapping` settings. The idea is that MoltenVK will pass a buffer (which it set to some buffer index that isn't being used) containing packed versions of the `VkComponentMapping` struct, one for each sampled image. Yes, this is horribly ugly. It is unfortunately necessary. Much of the ugliness is to support swizzling gather operations, where we need to alter the component that the gather operates on--something complicated by the `gather()` method requiring the passed-in component to be a constant expression. It doesn't even support swizzling gathers on depth textures, though I could add that if it turns out we need it. --- main.cpp | 3 + .../asm/frag/texture-access.swizzle.asm.frag | 157 ++++++++ .../frag/texture-access.swizzle.frag | 157 ++++++++ .../asm/frag/texture-access.swizzle.asm.frag | 364 ++++++++++++++++++ .../frag/texture-access.swizzle.frag | 79 ++++ spirv_common.hpp | 1 + spirv_msl.cpp | 199 +++++++++- spirv_msl.hpp | 3 + test_shaders.py | 2 + 9 files changed, 962 insertions(+), 3 deletions(-) create mode 100644 reference/shaders-msl-no-opt/asm/frag/texture-access.swizzle.asm.frag create mode 100644 reference/shaders-msl-no-opt/frag/texture-access.swizzle.frag create mode 100644 shaders-msl-no-opt/asm/frag/texture-access.swizzle.asm.frag create mode 100644 shaders-msl-no-opt/frag/texture-access.swizzle.frag diff --git a/main.cpp b/main.cpp index f39f4c25..1596e095 100644 --- a/main.cpp +++ b/main.cpp @@ -489,6 +489,7 @@ struct CLIArguments bool yflip = false; bool sso = false; bool support_nonzero_baseinstance = true; + bool msl_swizzle_texture_samples = false; vector pls_in; vector pls_out; vector remaps; @@ -703,6 +704,7 @@ static int main_inner(int argc, char *argv[]) cbs.add("--vulkan-semantics", [&args](CLIParser &) { args.vulkan_semantics = true; }); cbs.add("--flatten-multidimensional-arrays", [&args](CLIParser &) { args.flatten_multidimensional_arrays = true; }); cbs.add("--no-420pack-extension", [&args](CLIParser &) { args.use_420pack_extension = false; }); + cbs.add("--msl-swizzle-texture-samples", [&args](CLIParser &) { args.msl_swizzle_texture_samples = true; }); cbs.add("--extension", [&args](CLIParser &parser) { args.extensions.push_back(parser.next_string()); }); cbs.add("--rename-entry-point", [&args](CLIParser &parser) { auto old_name = parser.next_string(); @@ -822,6 +824,7 @@ static int main_inner(int argc, char *argv[]) auto msl_opts = msl_comp->get_msl_options(); if (args.set_msl_version) msl_opts.msl_version = args.msl_version; + msl_opts.swizzle_texture_samples = args.msl_swizzle_texture_samples; msl_comp->set_msl_options(msl_opts); } else if (args.hlsl) diff --git a/reference/shaders-msl-no-opt/asm/frag/texture-access.swizzle.asm.frag b/reference/shaders-msl-no-opt/asm/frag/texture-access.swizzle.asm.frag new file mode 100644 index 00000000..ed062be2 --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/frag/texture-access.swizzle.asm.frag @@ -0,0 +1,157 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +// Returns 2D texture coords corresponding to 1D texel buffer coords +uint2 spvTexelBufferCoord(uint tc) +{ + return uint2(tc % 4096, tc / 4096); +} + +enum class spvSwizzle : uint +{ + none = 0, + zero, + one, + red, + green, + blue, + alpha +} ; + +template struct spvRemoveReference { typedef T type; }; +template struct spvRemoveReference { typedef T type; }; +template struct spvRemoveReference { typedef T type; }; +template inline constexpr thread T&& spvForward(thread typename spvRemoveReference::type& x) +{ + return static_cast(x); +} +template inline constexpr thread T&& spvForward(thread typename spvRemoveReference::type&& x) +{ + return static_cast(x); +} + +template +inline T spvGetSwizzle(vec x, spvSwizzle s) +{ + switch (s) + { + case spvSwizzle::zero: + return 0; + case spvSwizzle::one: + return 1; + case spvSwizzle::red: + return x.r; + case spvSwizzle::green: + return x.g; + case spvSwizzle::blue: + return x.b; + case spvSwizzle::alpha: + return x.a; + default: + break; + } + return 0; +} + +// Wrapper function that swizzles texture samples and fetches. +template +inline vec spvTextureSwizzle(vec x, uint s) +{ + if (!s) + return x; + return vec(spvGetSwizzle(x, spvSwizzle((s >> 0) & 0x7)), spvGetSwizzle(x, spvSwizzle((s >> 3) & 0x7)), spvGetSwizzle(x, spvSwizzle((s >> 6) & 0x7)), spvGetSwizzle(x, spvSwizzle((s >> 9) & 0x7))); +} + +template +inline T spvTextureSwizzle(T x, uint s) +{ + return spvTextureSwizzle(vec(x, 0, 0, 1), s).x; +} + +// Wrapper function that swizzles texture gathers. +template +inline vec spvGatherSwizzle(sampler s, thread Tex& t, Ts... params, component c, uint sw) METAL_CONST_ARG(c) +{ + if (sw) + { + switch (spvSwizzle((sw >> (uint(c) * 3)) & 0x7)) + { + case spvSwizzle::none: + break; + case spvSwizzle::zero: + return vec(0, 0, 0, 0); + case spvSwizzle::one: + return vec(1, 1, 1, 1); + case spvSwizzle::red: + return t.gather(s, spvForward(params)..., component::x); + case spvSwizzle::green: + return t.gather(s, spvForward(params)..., component::y); + case spvSwizzle::blue: + return t.gather(s, spvForward(params)..., component::z); + case spvSwizzle::alpha: + return t.gather(s, spvForward(params)..., component::w); + } + } + switch (c) + { + case component::x: + return t.gather(s, spvForward(params)..., component::x); + case component::y: + return t.gather(s, spvForward(params)..., component::y); + case component::z: + return t.gather(s, spvForward(params)..., component::z); + case component::w: + return t.gather(s, spvForward(params)..., component::w); + } +} + +fragment void main0(constant uint32_t* spvSwizzleConst [[buffer(0)]], texture1d tex1d [[texture(0)]], texture2d tex2d [[texture(1)]], texture3d tex3d [[texture(2)]], texturecube texCube [[texture(3)]], texture2d_array tex2dArray [[texture(4)]], texturecube_array texCubeArray [[texture(5)]], texture2d texBuffer [[texture(6)]], depth2d depth2d [[texture(7)]], depthcube depthCube [[texture(8)]], depth2d_array depth2dArray [[texture(9)]], depthcube_array depthCubeArray [[texture(10)]], sampler tex1dSamp [[sampler(0)]], sampler tex2dSamp [[sampler(1)]], sampler tex3dSamp [[sampler(2)]], sampler texCubeSamp [[sampler(3)]], sampler tex2dArraySamp [[sampler(4)]], sampler texCubeArraySamp [[sampler(5)]], sampler depth2dSamp [[sampler(7)]], sampler depthCubeSamp [[sampler(8)]], sampler depth2dArraySamp [[sampler(9)]], sampler depthCubeArraySamp [[sampler(10)]]) +{ + float4 c = spvTextureSwizzle(tex1d.sample(tex1dSamp, 0.0), spvSwizzleConst[0]); + c = spvTextureSwizzle(tex2d.sample(tex2dSamp, float2(0.0)), spvSwizzleConst[1]); + c = spvTextureSwizzle(tex3d.sample(tex3dSamp, float3(0.0)), spvSwizzleConst[2]); + c = spvTextureSwizzle(texCube.sample(texCubeSamp, float3(0.0)), spvSwizzleConst[3]); + c = spvTextureSwizzle(tex2dArray.sample(tex2dArraySamp, float3(0.0).xy, uint(round(float3(0.0).z))), spvSwizzleConst[4]); + c = spvTextureSwizzle(texCubeArray.sample(texCubeArraySamp, float4(0.0).xyz, uint(round(float4(0.0).w))), spvSwizzleConst[5]); + c.x = spvTextureSwizzle(depth2d.sample_compare(depth2dSamp, float3(0.0, 0.0, 1.0).xy, float3(0.0, 0.0, 1.0).z), spvSwizzleConst[7]); + c.x = spvTextureSwizzle(depthCube.sample_compare(depthCubeSamp, float4(0.0, 0.0, 0.0, 1.0).xyz, float4(0.0, 0.0, 0.0, 1.0).w), spvSwizzleConst[8]); + c.x = spvTextureSwizzle(depth2dArray.sample_compare(depth2dArraySamp, float4(0.0, 0.0, 0.0, 1.0).xy, uint(round(float4(0.0, 0.0, 0.0, 1.0).z)), float4(0.0, 0.0, 0.0, 1.0).w), spvSwizzleConst[9]); + c.x = spvTextureSwizzle(depthCubeArray.sample_compare(depthCubeArraySamp, float4(0.0).xyz, uint(round(float4(0.0).w)), 1.0), spvSwizzleConst[10]); + c = spvTextureSwizzle(tex1d.sample(tex1dSamp, float2(0.0, 1.0).x / float2(0.0, 1.0).y), spvSwizzleConst[0]); + c = spvTextureSwizzle(tex2d.sample(tex2dSamp, float3(0.0, 0.0, 1.0).xy / float3(0.0, 0.0, 1.0).z), spvSwizzleConst[1]); + c = spvTextureSwizzle(tex3d.sample(tex3dSamp, float4(0.0, 0.0, 0.0, 1.0).xyz / float4(0.0, 0.0, 0.0, 1.0).w), spvSwizzleConst[2]); + float4 _152 = float4(0.0, 0.0, 1.0, 1.0); + _152.z = float4(0.0, 0.0, 1.0, 1.0).w; + c.x = spvTextureSwizzle(depth2d.sample_compare(depth2dSamp, _152.xy / _152.z, float4(0.0, 0.0, 1.0, 1.0).z), spvSwizzleConst[7]); + c = spvTextureSwizzle(tex1d.sample(tex1dSamp, 0.0), spvSwizzleConst[0]); + c = spvTextureSwizzle(tex2d.sample(tex2dSamp, float2(0.0), level(0.0)), spvSwizzleConst[1]); + c = spvTextureSwizzle(tex3d.sample(tex3dSamp, float3(0.0), level(0.0)), spvSwizzleConst[2]); + c = spvTextureSwizzle(texCube.sample(texCubeSamp, float3(0.0), level(0.0)), spvSwizzleConst[3]); + c = spvTextureSwizzle(tex2dArray.sample(tex2dArraySamp, float3(0.0).xy, uint(round(float3(0.0).z)), level(0.0)), spvSwizzleConst[4]); + c = spvTextureSwizzle(texCubeArray.sample(texCubeArraySamp, float4(0.0).xyz, uint(round(float4(0.0).w)), level(0.0)), spvSwizzleConst[5]); + c.x = spvTextureSwizzle(depth2d.sample_compare(depth2dSamp, float3(0.0, 0.0, 1.0).xy, float3(0.0, 0.0, 1.0).z, level(0.0)), spvSwizzleConst[7]); + c = spvTextureSwizzle(tex1d.sample(tex1dSamp, float2(0.0, 1.0).x / float2(0.0, 1.0).y), spvSwizzleConst[0]); + c = spvTextureSwizzle(tex2d.sample(tex2dSamp, float3(0.0, 0.0, 1.0).xy / float3(0.0, 0.0, 1.0).z, level(0.0)), spvSwizzleConst[1]); + c = spvTextureSwizzle(tex3d.sample(tex3dSamp, float4(0.0, 0.0, 0.0, 1.0).xyz / float4(0.0, 0.0, 0.0, 1.0).w, level(0.0)), spvSwizzleConst[2]); + float4 _202 = float4(0.0, 0.0, 1.0, 1.0); + _202.z = float4(0.0, 0.0, 1.0, 1.0).w; + c.x = spvTextureSwizzle(depth2d.sample_compare(depth2dSamp, _202.xy / _202.z, float4(0.0, 0.0, 1.0, 1.0).z, level(0.0)), spvSwizzleConst[7]); + c = spvTextureSwizzle(tex1d.read(uint(0)), spvSwizzleConst[0]); + c = spvTextureSwizzle(tex2d.read(uint2(int2(0)), 0), spvSwizzleConst[1]); + c = spvTextureSwizzle(tex3d.read(uint3(int3(0)), 0), spvSwizzleConst[2]); + c = spvTextureSwizzle(tex2dArray.read(uint2(int3(0).xy), uint(int3(0).z), 0), spvSwizzleConst[4]); + c = texBuffer.read(spvTexelBufferCoord(0)); + c = spvGatherSwizzle, float2, int2>(tex2dSamp, tex2d, float2(0.0), int2(0), component::x, spvSwizzleConst[1]); + c = spvGatherSwizzle, float3>(texCubeSamp, texCube, float3(0.0), component::y, spvSwizzleConst[3]); + c = spvGatherSwizzle, float2, uint, int2>(tex2dArraySamp, tex2dArray, float3(0.0).xy, uint(round(float3(0.0).z)), int2(0), component::z, spvSwizzleConst[4]); + c = spvGatherSwizzle, float3, uint>(texCubeArraySamp, texCubeArray, float4(0.0).xyz, uint(round(float4(0.0).w)), component::w, spvSwizzleConst[5]); + c = depth2d.gather_compare(depth2dSamp, float2(0.0), 1.0); + c = depthCube.gather_compare(depthCubeSamp, float3(0.0), 1.0); + c = depth2dArray.gather_compare(depth2dArraySamp, float3(0.0).xy, uint(round(float3(0.0).z)), 1.0); + c = depthCubeArray.gather_compare(depthCubeArraySamp, float4(0.0).xyz, uint(round(float4(0.0).w)), 1.0); +} + diff --git a/reference/shaders-msl-no-opt/frag/texture-access.swizzle.frag b/reference/shaders-msl-no-opt/frag/texture-access.swizzle.frag new file mode 100644 index 00000000..e4387018 --- /dev/null +++ b/reference/shaders-msl-no-opt/frag/texture-access.swizzle.frag @@ -0,0 +1,157 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +// Returns 2D texture coords corresponding to 1D texel buffer coords +uint2 spvTexelBufferCoord(uint tc) +{ + return uint2(tc % 4096, tc / 4096); +} + +enum class spvSwizzle : uint +{ + none = 0, + zero, + one, + red, + green, + blue, + alpha +} ; + +template struct spvRemoveReference { typedef T type; }; +template struct spvRemoveReference { typedef T type; }; +template struct spvRemoveReference { typedef T type; }; +template inline constexpr thread T&& spvForward(thread typename spvRemoveReference::type& x) +{ + return static_cast(x); +} +template inline constexpr thread T&& spvForward(thread typename spvRemoveReference::type&& x) +{ + return static_cast(x); +} + +template +inline T spvGetSwizzle(vec x, spvSwizzle s) +{ + switch (s) + { + case spvSwizzle::zero: + return 0; + case spvSwizzle::one: + return 1; + case spvSwizzle::red: + return x.r; + case spvSwizzle::green: + return x.g; + case spvSwizzle::blue: + return x.b; + case spvSwizzle::alpha: + return x.a; + default: + break; + } + return 0; +} + +// Wrapper function that swizzles texture samples and fetches. +template +inline vec spvTextureSwizzle(vec x, uint s) +{ + if (!s) + return x; + return vec(spvGetSwizzle(x, spvSwizzle((s >> 0) & 0x7)), spvGetSwizzle(x, spvSwizzle((s >> 3) & 0x7)), spvGetSwizzle(x, spvSwizzle((s >> 6) & 0x7)), spvGetSwizzle(x, spvSwizzle((s >> 9) & 0x7))); +} + +template +inline T spvTextureSwizzle(T x, uint s) +{ + return spvTextureSwizzle(vec(x, 0, 0, 1), s).x; +} + +// Wrapper function that swizzles texture gathers. +template +inline vec spvGatherSwizzle(sampler s, thread Tex& t, Ts... params, component c, uint sw) METAL_CONST_ARG(c) +{ + if (sw) + { + switch (spvSwizzle((sw >> (uint(c) * 3)) & 0x7)) + { + case spvSwizzle::none: + break; + case spvSwizzle::zero: + return vec(0, 0, 0, 0); + case spvSwizzle::one: + return vec(1, 1, 1, 1); + case spvSwizzle::red: + return t.gather(s, spvForward(params)..., component::x); + case spvSwizzle::green: + return t.gather(s, spvForward(params)..., component::y); + case spvSwizzle::blue: + return t.gather(s, spvForward(params)..., component::z); + case spvSwizzle::alpha: + return t.gather(s, spvForward(params)..., component::w); + } + } + switch (c) + { + case component::x: + return t.gather(s, spvForward(params)..., component::x); + case component::y: + return t.gather(s, spvForward(params)..., component::y); + case component::z: + return t.gather(s, spvForward(params)..., component::z); + case component::w: + return t.gather(s, spvForward(params)..., component::w); + } +} + +fragment void main0(constant uint32_t* spvSwizzleConst [[buffer(0)]], texture1d tex1d [[texture(0)]], texture2d tex2d [[texture(1)]], texture3d tex3d [[texture(2)]], texturecube texCube [[texture(3)]], texture2d_array tex2dArray [[texture(4)]], texturecube_array texCubeArray [[texture(5)]], texture2d texBuffer [[texture(6)]], depth2d depth2d [[texture(7)]], depthcube depthCube [[texture(8)]], depth2d_array depth2dArray [[texture(9)]], depthcube_array depthCubeArray [[texture(10)]], sampler tex1dSmplr [[sampler(0)]], sampler tex2dSmplr [[sampler(1)]], sampler tex3dSmplr [[sampler(2)]], sampler texCubeSmplr [[sampler(3)]], sampler tex2dArraySmplr [[sampler(4)]], sampler texCubeArraySmplr [[sampler(5)]], sampler depth2dSmplr [[sampler(7)]], sampler depthCubeSmplr [[sampler(8)]], sampler depth2dArraySmplr [[sampler(9)]], sampler depthCubeArraySmplr [[sampler(10)]]) +{ + float4 c = spvTextureSwizzle(tex1d.sample(tex1dSmplr, 0.0), spvSwizzleConst[0]); + c = spvTextureSwizzle(tex2d.sample(tex2dSmplr, float2(0.0)), spvSwizzleConst[1]); + c = spvTextureSwizzle(tex3d.sample(tex3dSmplr, float3(0.0)), spvSwizzleConst[2]); + c = spvTextureSwizzle(texCube.sample(texCubeSmplr, float3(0.0)), spvSwizzleConst[3]); + c = spvTextureSwizzle(tex2dArray.sample(tex2dArraySmplr, float3(0.0).xy, uint(round(float3(0.0).z))), spvSwizzleConst[4]); + c = spvTextureSwizzle(texCubeArray.sample(texCubeArraySmplr, float4(0.0).xyz, uint(round(float4(0.0).w))), spvSwizzleConst[5]); + c.x = spvTextureSwizzle(depth2d.sample_compare(depth2dSmplr, float3(0.0, 0.0, 1.0).xy, float3(0.0, 0.0, 1.0).z), spvSwizzleConst[7]); + c.x = spvTextureSwizzle(depthCube.sample_compare(depthCubeSmplr, float4(0.0, 0.0, 0.0, 1.0).xyz, float4(0.0, 0.0, 0.0, 1.0).w), spvSwizzleConst[8]); + c.x = spvTextureSwizzle(depth2dArray.sample_compare(depth2dArraySmplr, float4(0.0, 0.0, 0.0, 1.0).xy, uint(round(float4(0.0, 0.0, 0.0, 1.0).z)), float4(0.0, 0.0, 0.0, 1.0).w), spvSwizzleConst[9]); + c.x = spvTextureSwizzle(depthCubeArray.sample_compare(depthCubeArraySmplr, float4(0.0).xyz, uint(round(float4(0.0).w)), 1.0), spvSwizzleConst[10]); + c = spvTextureSwizzle(tex1d.sample(tex1dSmplr, float2(0.0, 1.0).x / float2(0.0, 1.0).y), spvSwizzleConst[0]); + c = spvTextureSwizzle(tex2d.sample(tex2dSmplr, float3(0.0, 0.0, 1.0).xy / float3(0.0, 0.0, 1.0).z), spvSwizzleConst[1]); + c = spvTextureSwizzle(tex3d.sample(tex3dSmplr, float4(0.0, 0.0, 0.0, 1.0).xyz / float4(0.0, 0.0, 0.0, 1.0).w), spvSwizzleConst[2]); + float4 _100 = float4(0.0, 0.0, 1.0, 1.0); + _100.z = float4(0.0, 0.0, 1.0, 1.0).w; + c.x = spvTextureSwizzle(depth2d.sample_compare(depth2dSmplr, _100.xy / _100.z, float4(0.0, 0.0, 1.0, 1.0).z), spvSwizzleConst[7]); + c = spvTextureSwizzle(tex1d.sample(tex1dSmplr, 0.0), spvSwizzleConst[0]); + c = spvTextureSwizzle(tex2d.sample(tex2dSmplr, float2(0.0), level(0.0)), spvSwizzleConst[1]); + c = spvTextureSwizzle(tex3d.sample(tex3dSmplr, float3(0.0), level(0.0)), spvSwizzleConst[2]); + c = spvTextureSwizzle(texCube.sample(texCubeSmplr, float3(0.0), level(0.0)), spvSwizzleConst[3]); + c = spvTextureSwizzle(tex2dArray.sample(tex2dArraySmplr, float3(0.0).xy, uint(round(float3(0.0).z)), level(0.0)), spvSwizzleConst[4]); + c = spvTextureSwizzle(texCubeArray.sample(texCubeArraySmplr, float4(0.0).xyz, uint(round(float4(0.0).w)), level(0.0)), spvSwizzleConst[5]); + c.x = spvTextureSwizzle(depth2d.sample_compare(depth2dSmplr, float3(0.0, 0.0, 1.0).xy, float3(0.0, 0.0, 1.0).z, level(0.0)), spvSwizzleConst[7]); + c = spvTextureSwizzle(tex1d.sample(tex1dSmplr, float2(0.0, 1.0).x / float2(0.0, 1.0).y), spvSwizzleConst[0]); + c = spvTextureSwizzle(tex2d.sample(tex2dSmplr, float3(0.0, 0.0, 1.0).xy / float3(0.0, 0.0, 1.0).z, level(0.0)), spvSwizzleConst[1]); + c = spvTextureSwizzle(tex3d.sample(tex3dSmplr, float4(0.0, 0.0, 0.0, 1.0).xyz / float4(0.0, 0.0, 0.0, 1.0).w, level(0.0)), spvSwizzleConst[2]); + float4 _128 = float4(0.0, 0.0, 1.0, 1.0); + _128.z = float4(0.0, 0.0, 1.0, 1.0).w; + c.x = spvTextureSwizzle(depth2d.sample_compare(depth2dSmplr, _128.xy / _128.z, float4(0.0, 0.0, 1.0, 1.0).z, level(0.0)), spvSwizzleConst[7]); + c = spvTextureSwizzle(tex1d.read(uint(0)), spvSwizzleConst[0]); + c = spvTextureSwizzle(tex2d.read(uint2(int2(0)), 0), spvSwizzleConst[1]); + c = spvTextureSwizzle(tex3d.read(uint3(int3(0)), 0), spvSwizzleConst[2]); + c = spvTextureSwizzle(tex2dArray.read(uint2(int3(0).xy), uint(int3(0).z), 0), spvSwizzleConst[4]); + c = texBuffer.read(spvTexelBufferCoord(0)); + c = spvGatherSwizzle, float2, int2>(tex2dSmplr, tex2d, float2(0.0), int2(0), component::x, spvSwizzleConst[1]); + c = spvGatherSwizzle, float3>(texCubeSmplr, texCube, float3(0.0), component::y, spvSwizzleConst[3]); + c = spvGatherSwizzle, float2, uint, int2>(tex2dArraySmplr, tex2dArray, float3(0.0).xy, uint(round(float3(0.0).z)), int2(0), component::z, spvSwizzleConst[4]); + c = spvGatherSwizzle, float3, uint>(texCubeArraySmplr, texCubeArray, float4(0.0).xyz, uint(round(float4(0.0).w)), component::w, spvSwizzleConst[5]); + c = depth2d.gather_compare(depth2dSmplr, float2(0.0), 1.0); + c = depthCube.gather_compare(depthCubeSmplr, float3(0.0), 1.0); + c = depth2dArray.gather_compare(depth2dArraySmplr, float3(0.0).xy, uint(round(float3(0.0).z)), 1.0); + c = depthCubeArray.gather_compare(depthCubeArraySmplr, float4(0.0).xyz, uint(round(float4(0.0).w)), 1.0); +} + diff --git a/shaders-msl-no-opt/asm/frag/texture-access.swizzle.asm.frag b/shaders-msl-no-opt/asm/frag/texture-access.swizzle.asm.frag new file mode 100644 index 00000000..4c1408b3 --- /dev/null +++ b/shaders-msl-no-opt/asm/frag/texture-access.swizzle.asm.frag @@ -0,0 +1,364 @@ +; SPIR-V +; Version: 1.3 +; Generator: Khronos Glslang Reference Front End; 6 +; Bound: 247 +; Schema: 0 + OpCapability Shader + OpCapability Sampled1D + OpCapability SampledCubeArray + OpCapability SampledBuffer + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" + OpExecutionMode %main OriginUpperLeft + OpSource GLSL 450 + OpName %main "main" + OpName %c "c" + OpName %tex1d "tex1d" + OpName %tex2d "tex2d" + OpName %tex3d "tex3d" + OpName %texCube "texCube" + OpName %tex2dArray "tex2dArray" + OpName %texCubeArray "texCubeArray" + OpName %depth2d "depth2d" + OpName %depthCube "depthCube" + OpName %depth2dArray "depth2dArray" + OpName %depthCubeArray "depthCubeArray" + OpName %texBuffer "texBuffer" + OpName %tex1dSamp "tex1dSamp" + OpName %tex2dSamp "tex2dSamp" + OpName %tex3dSamp "tex3dSamp" + OpName %texCubeSamp "texCubeSamp" + OpName %tex2dArraySamp "tex2dArraySamp" + OpName %texCubeArraySamp "texCubeArraySamp" + OpName %depth2dSamp "depth2dSamp" + OpName %depthCubeSamp "depthCubeSamp" + OpName %depth2dArraySamp "depth2dArraySamp" + OpName %depthCubeArraySamp "depthCubeArraySamp" + OpDecorate %tex1d DescriptorSet 0 + OpDecorate %tex1d Binding 0 + OpDecorate %tex2d DescriptorSet 0 + OpDecorate %tex2d Binding 1 + OpDecorate %tex3d DescriptorSet 0 + OpDecorate %tex3d Binding 2 + OpDecorate %texCube DescriptorSet 0 + OpDecorate %texCube Binding 3 + OpDecorate %tex2dArray DescriptorSet 0 + OpDecorate %tex2dArray Binding 4 + OpDecorate %texCubeArray DescriptorSet 0 + OpDecorate %texCubeArray Binding 5 + OpDecorate %depth2d DescriptorSet 0 + OpDecorate %depth2d Binding 7 + OpDecorate %depthCube DescriptorSet 0 + OpDecorate %depthCube Binding 8 + OpDecorate %depth2dArray DescriptorSet 0 + OpDecorate %depth2dArray Binding 9 + OpDecorate %depthCubeArray DescriptorSet 0 + OpDecorate %depthCubeArray Binding 10 + OpDecorate %texBuffer DescriptorSet 0 + OpDecorate %texBuffer Binding 6 + OpDecorate %tex1dSamp DescriptorSet 1 + OpDecorate %tex1dSamp Binding 0 + OpDecorate %tex2dSamp DescriptorSet 1 + OpDecorate %tex2dSamp Binding 1 + OpDecorate %tex3dSamp DescriptorSet 1 + OpDecorate %tex3dSamp Binding 2 + OpDecorate %texCubeSamp DescriptorSet 1 + OpDecorate %texCubeSamp Binding 3 + OpDecorate %tex2dArraySamp DescriptorSet 1 + OpDecorate %tex2dArraySamp Binding 4 + OpDecorate %texCubeArraySamp DescriptorSet 1 + OpDecorate %texCubeArraySamp Binding 5 + OpDecorate %depth2dSamp DescriptorSet 1 + OpDecorate %depth2dSamp Binding 7 + OpDecorate %depthCubeSamp DescriptorSet 1 + OpDecorate %depthCubeSamp Binding 8 + OpDecorate %depth2dArraySamp DescriptorSet 1 + OpDecorate %depth2dArraySamp Binding 9 + OpDecorate %depthCubeArraySamp DescriptorSet 1 + OpDecorate %depthCubeArraySamp Binding 10 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_ptr_Function_v4float = OpTypePointer Function %v4float + %10 = OpTypeImage %float 1D 0 0 0 1 Unknown + %11 = OpTypeSampledImage %10 + %12 = OpTypeSampler +%_ptr_UniformConstant_10 = OpTypePointer UniformConstant %10 + %tex1d = OpVariable %_ptr_UniformConstant_10 UniformConstant +%_ptr_UniformConstant_12 = OpTypePointer UniformConstant %12 + %tex1dSamp = OpVariable %_ptr_UniformConstant_12 UniformConstant + %float_0 = OpConstant %float 0 + %17 = OpTypeImage %float 2D 0 0 0 1 Unknown + %18 = OpTypeSampledImage %17 +%_ptr_UniformConstant_17 = OpTypePointer UniformConstant %17 + %tex2d = OpVariable %_ptr_UniformConstant_17 UniformConstant + %tex2dSamp = OpVariable %_ptr_UniformConstant_12 UniformConstant + %v2float = OpTypeVector %float 2 + %23 = OpConstantComposite %v2float %float_0 %float_0 + %25 = OpTypeImage %float 3D 0 0 0 1 Unknown + %26 = OpTypeSampledImage %25 +%_ptr_UniformConstant_25 = OpTypePointer UniformConstant %25 + %tex3d = OpVariable %_ptr_UniformConstant_25 UniformConstant + %tex3dSamp = OpVariable %_ptr_UniformConstant_12 UniformConstant + %v3float = OpTypeVector %float 3 + %31 = OpConstantComposite %v3float %float_0 %float_0 %float_0 + %33 = OpTypeImage %float Cube 0 0 0 1 Unknown + %34 = OpTypeSampledImage %33 +%_ptr_UniformConstant_33 = OpTypePointer UniformConstant %33 + %texCube = OpVariable %_ptr_UniformConstant_33 UniformConstant +%texCubeSamp = OpVariable %_ptr_UniformConstant_12 UniformConstant + %39 = OpTypeImage %float 2D 0 1 0 1 Unknown + %40 = OpTypeSampledImage %39 +%_ptr_UniformConstant_39 = OpTypePointer UniformConstant %39 + %tex2dArray = OpVariable %_ptr_UniformConstant_39 UniformConstant +%tex2dArraySamp = OpVariable %_ptr_UniformConstant_12 UniformConstant + %45 = OpTypeImage %float Cube 0 1 0 1 Unknown + %46 = OpTypeSampledImage %45 +%_ptr_UniformConstant_45 = OpTypePointer UniformConstant %45 +%texCubeArray = OpVariable %_ptr_UniformConstant_45 UniformConstant +%texCubeArraySamp = OpVariable %_ptr_UniformConstant_12 UniformConstant + %50 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0 + %52 = OpTypeImage %float 2D 1 0 0 1 Unknown + %53 = OpTypeSampledImage %52 +%_ptr_UniformConstant_52 = OpTypePointer UniformConstant %52 + %depth2d = OpVariable %_ptr_UniformConstant_52 UniformConstant +%depth2dSamp = OpVariable %_ptr_UniformConstant_12 UniformConstant + %float_1 = OpConstant %float 1 + %58 = OpConstantComposite %v3float %float_0 %float_0 %float_1 + %uint = OpTypeInt 32 0 + %uint_0 = OpConstant %uint 0 +%_ptr_Function_float = OpTypePointer Function %float + %65 = OpTypeImage %float Cube 1 0 0 1 Unknown + %66 = OpTypeSampledImage %65 +%_ptr_UniformConstant_65 = OpTypePointer UniformConstant %65 + %depthCube = OpVariable %_ptr_UniformConstant_65 UniformConstant +%depthCubeSamp = OpVariable %_ptr_UniformConstant_12 UniformConstant + %70 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_1 + %74 = OpTypeImage %float 2D 1 1 0 1 Unknown + %75 = OpTypeSampledImage %74 +%_ptr_UniformConstant_74 = OpTypePointer UniformConstant %74 +%depth2dArray = OpVariable %_ptr_UniformConstant_74 UniformConstant +%depth2dArraySamp = OpVariable %_ptr_UniformConstant_12 UniformConstant + %82 = OpTypeImage %float Cube 1 1 0 1 Unknown + %83 = OpTypeSampledImage %82 +%_ptr_UniformConstant_82 = OpTypePointer UniformConstant %82 +%depthCubeArray = OpVariable %_ptr_UniformConstant_82 UniformConstant +%depthCubeArraySamp = OpVariable %_ptr_UniformConstant_12 UniformConstant + %97 = OpConstantComposite %v2float %float_0 %float_1 + %98 = OpConstantComposite %v4float %float_0 %float_0 %float_1 %float_1 + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %v2int = OpTypeVector %int 2 + %138 = OpConstantComposite %v2int %int_0 %int_0 + %v3int = OpTypeVector %int 3 + %143 = OpConstantComposite %v3int %int_0 %int_0 %int_0 + %149 = OpTypeImage %float Buffer 0 0 0 1 Unknown +%_ptr_UniformConstant_149 = OpTypePointer UniformConstant %149 + %texBuffer = OpVariable %_ptr_UniformConstant_149 UniformConstant + %int_1 = OpConstant %int 1 + %int_2 = OpConstant %int 2 + %int_3 = OpConstant %int 3 + %main = OpFunction %void None %3 + %5 = OpLabel + %c = OpVariable %_ptr_Function_v4float Function + %13 = OpLoad %10 %tex1d + %14 = OpLoad %12 %tex1dSamp + %15 = OpSampledImage %11 %13 %14 + %16 = OpImageSampleImplicitLod %v4float %15 %float_0 + OpStore %c %16 + %19 = OpLoad %17 %tex2d + %20 = OpLoad %12 %tex2dSamp + %21 = OpSampledImage %18 %19 %20 + %24 = OpImageSampleImplicitLod %v4float %21 %23 + OpStore %c %24 + %27 = OpLoad %25 %tex3d + %28 = OpLoad %12 %tex3dSamp + %29 = OpSampledImage %26 %27 %28 + %32 = OpImageSampleImplicitLod %v4float %29 %31 + OpStore %c %32 + %35 = OpLoad %33 %texCube + %36 = OpLoad %12 %texCubeSamp + %37 = OpSampledImage %34 %35 %36 + %38 = OpImageSampleImplicitLod %v4float %37 %31 + OpStore %c %38 + %41 = OpLoad %39 %tex2dArray + %42 = OpLoad %12 %tex2dArraySamp + %43 = OpSampledImage %40 %41 %42 + %44 = OpImageSampleImplicitLod %v4float %43 %31 + OpStore %c %44 + %47 = OpLoad %45 %texCubeArray + %48 = OpLoad %12 %texCubeArraySamp + %49 = OpSampledImage %46 %47 %48 + %51 = OpImageSampleImplicitLod %v4float %49 %50 + OpStore %c %51 + %54 = OpLoad %52 %depth2d + %55 = OpLoad %12 %depth2dSamp + %56 = OpSampledImage %53 %54 %55 + %59 = OpCompositeExtract %float %58 2 + %60 = OpImageSampleDrefImplicitLod %float %56 %58 %59 + %64 = OpAccessChain %_ptr_Function_float %c %uint_0 + OpStore %64 %60 + %67 = OpLoad %65 %depthCube + %68 = OpLoad %12 %depthCubeSamp + %69 = OpSampledImage %66 %67 %68 + %71 = OpCompositeExtract %float %70 3 + %72 = OpImageSampleDrefImplicitLod %float %69 %70 %71 + %73 = OpAccessChain %_ptr_Function_float %c %uint_0 + OpStore %73 %72 + %76 = OpLoad %74 %depth2dArray + %77 = OpLoad %12 %depth2dArraySamp + %78 = OpSampledImage %75 %76 %77 + %79 = OpCompositeExtract %float %70 3 + %80 = OpImageSampleDrefImplicitLod %float %78 %70 %79 + %81 = OpAccessChain %_ptr_Function_float %c %uint_0 + OpStore %81 %80 + %84 = OpLoad %82 %depthCubeArray + %85 = OpLoad %12 %depthCubeArraySamp + %86 = OpSampledImage %83 %84 %85 + %87 = OpImageSampleDrefImplicitLod %float %86 %50 %float_1 + %88 = OpAccessChain %_ptr_Function_float %c %uint_0 + OpStore %88 %87 + %89 = OpLoad %10 %tex1d + %90 = OpLoad %12 %tex1dSamp + %91 = OpSampledImage %11 %89 %90 + %92 = OpImageSampleProjImplicitLod %v4float %91 %97 + OpStore %c %92 + %93 = OpLoad %17 %tex2d + %94 = OpLoad %12 %tex2dSamp + %95 = OpSampledImage %18 %93 %94 + %96 = OpImageSampleProjImplicitLod %v4float %95 %58 + OpStore %c %96 + %99 = OpLoad %25 %tex3d + %100 = OpLoad %12 %tex3dSamp + %101 = OpSampledImage %26 %99 %100 + %102 = OpImageSampleProjImplicitLod %v4float %101 %70 + OpStore %c %102 + %103 = OpLoad %52 %depth2d + %104 = OpLoad %12 %depth2dSamp + %105 = OpSampledImage %53 %103 %104 + %106 = OpCompositeExtract %float %98 2 + %107 = OpCompositeExtract %float %98 3 + %108 = OpCompositeInsert %v4float %107 %98 2 + %109 = OpImageSampleProjDrefImplicitLod %float %105 %108 %106 + %110 = OpAccessChain %_ptr_Function_float %c %uint_0 + OpStore %110 %109 + %111 = OpLoad %10 %tex1d + %112 = OpLoad %12 %tex1dSamp + %113 = OpSampledImage %11 %111 %112 + %114 = OpImageSampleExplicitLod %v4float %113 %float_0 Lod %float_0 + OpStore %c %114 + %115 = OpLoad %17 %tex2d + %116 = OpLoad %12 %tex2dSamp + %117 = OpSampledImage %18 %115 %116 + %118 = OpImageSampleExplicitLod %v4float %117 %23 Lod %float_0 + OpStore %c %118 + %119 = OpLoad %25 %tex3d + %120 = OpLoad %12 %tex3dSamp + %121 = OpSampledImage %26 %119 %120 + %122 = OpImageSampleExplicitLod %v4float %121 %31 Lod %float_0 + OpStore %c %122 + %123 = OpLoad %33 %texCube + %124 = OpLoad %12 %texCubeSamp + %125 = OpSampledImage %34 %123 %124 + %126 = OpImageSampleExplicitLod %v4float %125 %31 Lod %float_0 + OpStore %c %126 + %127 = OpLoad %39 %tex2dArray + %128 = OpLoad %12 %tex2dArraySamp + %129 = OpSampledImage %40 %127 %128 + %130 = OpImageSampleExplicitLod %v4float %129 %31 Lod %float_0 + OpStore %c %130 + %131 = OpLoad %45 %texCubeArray + %132 = OpLoad %12 %texCubeArraySamp + %133 = OpSampledImage %46 %131 %132 + %134 = OpImageSampleExplicitLod %v4float %133 %50 Lod %float_0 + OpStore %c %134 + %135 = OpLoad %52 %depth2d + %136 = OpLoad %12 %depth2dSamp + %137 = OpSampledImage %53 %135 %136 + %139 = OpCompositeExtract %float %58 2 + %140 = OpImageSampleDrefExplicitLod %float %137 %58 %139 Lod %float_0 + %141 = OpAccessChain %_ptr_Function_float %c %uint_0 + OpStore %141 %140 + %142 = OpLoad %10 %tex1d + %144 = OpLoad %12 %tex1dSamp + %145 = OpSampledImage %11 %142 %144 + %146 = OpImageSampleProjExplicitLod %v4float %145 %97 Lod %float_0 + OpStore %c %146 + %147 = OpLoad %17 %tex2d + %148 = OpLoad %12 %tex2dSamp + %150 = OpSampledImage %18 %147 %148 + %151 = OpImageSampleProjExplicitLod %v4float %150 %58 Lod %float_0 + OpStore %c %151 + %152 = OpLoad %25 %tex3d + %153 = OpLoad %12 %tex3dSamp + %154 = OpSampledImage %26 %152 %153 + %155 = OpImageSampleProjExplicitLod %v4float %154 %70 Lod %float_0 + OpStore %c %155 + %156 = OpLoad %52 %depth2d + %157 = OpLoad %12 %depth2dSamp + %158 = OpSampledImage %53 %156 %157 + %159 = OpCompositeExtract %float %98 2 + %160 = OpCompositeExtract %float %98 3 + %161 = OpCompositeInsert %v4float %160 %98 2 + %162 = OpImageSampleProjDrefExplicitLod %float %158 %161 %159 Lod %float_0 + %163 = OpAccessChain %_ptr_Function_float %c %uint_0 + OpStore %163 %162 + %164 = OpLoad %10 %tex1d + %165 = OpImageFetch %v4float %164 %int_0 Lod %int_0 + OpStore %c %165 + %166 = OpLoad %17 %tex2d + %167 = OpImageFetch %v4float %166 %138 Lod %int_0 + OpStore %c %167 + %168 = OpLoad %25 %tex3d + %169 = OpImageFetch %v4float %168 %143 Lod %int_0 + OpStore %c %169 + %170 = OpLoad %39 %tex2dArray + %171 = OpImageFetch %v4float %170 %143 Lod %int_0 + OpStore %c %171 + %172 = OpLoad %149 %texBuffer + %173 = OpImageFetch %v4float %172 %int_0 + OpStore %c %173 + %174 = OpLoad %17 %tex2d + %175 = OpLoad %12 %tex2dSamp + %176 = OpSampledImage %18 %174 %175 + %177 = OpImageGather %v4float %176 %23 %int_0 + OpStore %c %177 + %178 = OpLoad %33 %texCube + %179 = OpLoad %12 %texCubeSamp + %180 = OpSampledImage %34 %178 %179 + %181 = OpImageGather %v4float %180 %31 %int_1 + OpStore %c %181 + %182 = OpLoad %39 %tex2dArray + %183 = OpLoad %12 %tex2dArraySamp + %184 = OpSampledImage %40 %182 %183 + %185 = OpImageGather %v4float %184 %31 %int_2 + OpStore %c %185 + %186 = OpLoad %45 %texCubeArray + %187 = OpLoad %12 %texCubeArraySamp + %188 = OpSampledImage %46 %186 %187 + %189 = OpImageGather %v4float %188 %50 %int_3 + OpStore %c %189 + %190 = OpLoad %52 %depth2d + %191 = OpLoad %12 %depth2dSamp + %192 = OpSampledImage %53 %190 %191 + %193 = OpImageDrefGather %v4float %192 %23 %float_1 + OpStore %c %193 + %194 = OpLoad %65 %depthCube + %195 = OpLoad %12 %depthCubeSamp + %196 = OpSampledImage %66 %194 %195 + %197 = OpImageDrefGather %v4float %196 %31 %float_1 + OpStore %c %197 + %198 = OpLoad %74 %depth2dArray + %199 = OpLoad %12 %depth2dArraySamp + %200 = OpSampledImage %75 %198 %199 + %201 = OpImageDrefGather %v4float %200 %31 %float_1 + OpStore %c %201 + %202 = OpLoad %82 %depthCubeArray + %203 = OpLoad %12 %depthCubeArraySamp + %204 = OpSampledImage %83 %202 %203 + %205 = OpImageDrefGather %v4float %204 %50 %float_1 + OpStore %c %205 + OpReturn + OpFunctionEnd diff --git a/shaders-msl-no-opt/frag/texture-access.swizzle.frag b/shaders-msl-no-opt/frag/texture-access.swizzle.frag new file mode 100644 index 00000000..b09ebed7 --- /dev/null +++ b/shaders-msl-no-opt/frag/texture-access.swizzle.frag @@ -0,0 +1,79 @@ +#version 450 + +layout(binding = 0) uniform sampler1D tex1d; +layout(binding = 1) uniform sampler2D tex2d; +layout(binding = 2) uniform sampler3D tex3d; +layout(binding = 3) uniform samplerCube texCube; +layout(binding = 4) uniform sampler2DArray tex2dArray; +layout(binding = 5) uniform samplerCubeArray texCubeArray; +layout(binding = 6) uniform samplerBuffer texBuffer; + +layout(binding = 7) uniform sampler2DShadow depth2d; +layout(binding = 8) uniform samplerCubeShadow depthCube; +layout(binding = 9) uniform sampler2DArrayShadow depth2dArray; +layout(binding = 10) uniform samplerCubeArrayShadow depthCubeArray; + +void main() +{ + // OpImageSampleImplicitLod + vec4 c = texture(tex1d, 0.0); + c = texture(tex2d, vec2(0.0, 0.0)); + c = texture(tex3d, vec3(0.0, 0.0, 0.0)); + c = texture(texCube, vec3(0.0, 0.0, 0.0)); + c = texture(tex2dArray, vec3(0.0, 0.0, 0.0)); + c = texture(texCubeArray, vec4(0.0, 0.0, 0.0, 0.0)); + + // OpImageSampleDrefImplicitLod + c.r = texture(depth2d, vec3(0.0, 0.0, 1.0)); + c.r = texture(depthCube, vec4(0.0, 0.0, 0.0, 1.0)); + c.r = texture(depth2dArray, vec4(0.0, 0.0, 0.0, 1.0)); + c.r = texture(depthCubeArray, vec4(0.0, 0.0, 0.0, 0.0), 1.0); + + // OpImageSampleProjImplicitLod + c = textureProj(tex1d, vec2(0.0, 1.0)); + c = textureProj(tex2d, vec3(0.0, 0.0, 1.0)); + c = textureProj(tex3d, vec4(0.0, 0.0, 0.0, 1.0)); + + // OpImageSampleProjDrefImplicitLod + c.r = textureProj(depth2d, vec4(0.0, 0.0, 1.0, 1.0)); + + // OpImageSampleExplicitLod + c = textureLod(tex1d, 0.0, 0.0); + c = textureLod(tex2d, vec2(0.0, 0.0), 0.0); + c = textureLod(tex3d, vec3(0.0, 0.0, 0.0), 0.0); + c = textureLod(texCube, vec3(0.0, 0.0, 0.0), 0.0); + c = textureLod(tex2dArray, vec3(0.0, 0.0, 0.0), 0.0); + c = textureLod(texCubeArray, vec4(0.0, 0.0, 0.0, 0.0), 0.0); + + // OpImageSampleDrefExplicitLod + c.r = textureLod(depth2d, vec3(0.0, 0.0, 1.0), 0.0); + + // OpImageSampleProjExplicitLod + c = textureProjLod(tex1d, vec2(0.0, 1.0), 0.0); + c = textureProjLod(tex2d, vec3(0.0, 0.0, 1.0), 0.0); + c = textureProjLod(tex3d, vec4(0.0, 0.0, 0.0, 1.0), 0.0); + + // OpImageSampleProjDrefExplicitLod + c.r = textureProjLod(depth2d, vec4(0.0, 0.0, 1.0, 1.0), 0.0); + + // OpImageFetch + c = texelFetch(tex1d, 0, 0); + c = texelFetch(tex2d, ivec2(0, 0), 0); + c = texelFetch(tex3d, ivec3(0, 0, 0), 0); + c = texelFetch(tex2dArray, ivec3(0, 0, 0), 0); + + // Show that this transformation doesn't apply to Buffer images. + c = texelFetch(texBuffer, 0); + + // OpImageGather + c = textureGather(tex2d, vec2(0.0, 0.0), 0); + c = textureGather(texCube, vec3(0.0, 0.0, 0.0), 1); + c = textureGather(tex2dArray, vec3(0.0, 0.0, 0.0), 2); + c = textureGather(texCubeArray, vec4(0.0, 0.0, 0.0, 0.0), 3); + + // OpImageDrefGather + c = textureGather(depth2d, vec2(0.0, 0.0), 1.0); + c = textureGather(depthCube, vec3(0.0, 0.0, 0.0), 1.0); + c = textureGather(depth2dArray, vec3(0.0, 0.0, 0.0), 1.0); + c = textureGather(depthCubeArray, vec4(0.0, 0.0, 0.0, 0.0), 1.0); +} diff --git a/spirv_common.hpp b/spirv_common.hpp index 9544e3ca..6560880a 100644 --- a/spirv_common.hpp +++ b/spirv_common.hpp @@ -1237,6 +1237,7 @@ struct Meta Decoration decoration; std::vector members; uint32_t sampler = 0; + uint32_t image = 0; std::unordered_map decoration_word_offset; diff --git a/spirv_msl.cpp b/spirv_msl.cpp index 49db9518..31693463 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -1584,6 +1584,113 @@ void CompilerMSL::emit_custom_functions() statement(""); break; + case SPVFuncImplTextureSwizzle: + statement("enum class spvSwizzle : uint"); + begin_scope(); + statement("none = 0,"); + statement("zero,"); + statement("one,"); + statement("red,"); + statement("green,"); + statement("blue,"); + statement("alpha"); + end_scope_decl(""); + statement(""); + statement("template struct spvRemoveReference { typedef T type; };"); + statement("template struct spvRemoveReference { typedef T type; };"); + statement("template struct spvRemoveReference { typedef T type; };"); + statement("template inline constexpr thread T&& spvForward(thread typename " + "spvRemoveReference::type& x)"); + begin_scope(); + statement("return static_cast(x);"); + end_scope(); + statement("template inline constexpr thread T&& spvForward(thread typename " + "spvRemoveReference::type&& x)"); + begin_scope(); + statement("return static_cast(x);"); + end_scope(); + statement(""); + statement("template"); + statement("inline T spvGetSwizzle(vec x, spvSwizzle s)"); + begin_scope(); + statement("switch (s)"); + begin_scope(); + statement("case spvSwizzle::zero:"); + statement(" return 0;"); + statement("case spvSwizzle::one:"); + statement(" return 1;"); + statement("case spvSwizzle::red:"); + statement(" return x.r;"); + statement("case spvSwizzle::green:"); + statement(" return x.g;"); + statement("case spvSwizzle::blue:"); + statement(" return x.b;"); + statement("case spvSwizzle::alpha:"); + statement(" return x.a;"); + statement("default:"); + statement(" break;"); + end_scope(); + statement("return 0;"); + end_scope(); + statement(""); + statement("// Wrapper function that swizzles texture samples and fetches."); + statement("template"); + statement("inline vec spvTextureSwizzle(vec x, uint s)"); + begin_scope(); + statement("if (!s)"); + statement(" return x;"); + statement( + "return vec(spvGetSwizzle(x, spvSwizzle((s >> 0) & 0x7)), spvGetSwizzle(x, spvSwizzle((s >> 3) & " + "0x7)), spvGetSwizzle(x, spvSwizzle((s >> 6) & 0x7)), spvGetSwizzle(x, spvSwizzle((s >> 9) & 0x7)));"); + end_scope(); + statement(""); + statement("template"); + statement("inline T spvTextureSwizzle(T x, uint s)"); + begin_scope(); + statement("return spvTextureSwizzle(vec(x, 0, 0, 1), s).x;"); + end_scope(); + statement(""); + statement("// Wrapper function that swizzles texture gathers."); + statement("template"); + statement("inline vec spvGatherSwizzle(sampler s, thread Tex& t, Ts... params, component c, uint sw) " + "METAL_CONST_ARG(c)"); + begin_scope(); + statement("if (sw)"); + begin_scope(); + statement("switch (spvSwizzle((sw >> (uint(c) * 3)) & 0x7))"); + begin_scope(); + statement("case spvSwizzle::none:"); + statement(" break;"); + statement("case spvSwizzle::zero:"); + statement(" return vec(0, 0, 0, 0);"); + statement("case spvSwizzle::one:"); + statement(" return vec(1, 1, 1, 1);"); + statement("case spvSwizzle::red:"); + statement(" return t.gather(s, spvForward(params)..., component::x);"); + statement("case spvSwizzle::green:"); + statement(" return t.gather(s, spvForward(params)..., component::y);"); + statement("case spvSwizzle::blue:"); + statement(" return t.gather(s, spvForward(params)..., component::z);"); + statement("case spvSwizzle::alpha:"); + statement(" return t.gather(s, spvForward(params)..., component::w);"); + end_scope(); + end_scope(); + // texture::gather insists on its component parameter being a constant + // expression, so we need this silly workaround just to compile the shader. + statement("switch (c)"); + begin_scope(); + statement("case component::x:"); + statement(" return t.gather(s, spvForward(params)..., component::x);"); + statement("case component::y:"); + statement(" return t.gather(s, spvForward(params)..., component::y);"); + statement("case component::z:"); + statement(" return t.gather(s, spvForward(params)..., component::z);"); + statement("case component::w:"); + statement(" return t.gather(s, spvForward(params)..., component::w);"); + end_scope(); + end_scope(); + statement(""); + default: break; } @@ -2680,11 +2787,41 @@ void CompilerMSL::emit_function_prototype(SPIRFunction &func, const Bitset &) } // Returns the texture sampling function string for the specified image and sampling characteristics. -string CompilerMSL::to_function_name(uint32_t img, const SPIRType &, bool is_fetch, bool is_gather, bool, bool, bool, - bool, bool has_dref, uint32_t) +string CompilerMSL::to_function_name(uint32_t img, const SPIRType &imgtype, bool is_fetch, bool is_gather, bool, bool, + bool, bool, bool has_dref, uint32_t) { + // Special-case gather. We have to alter the component being looked up + // in the swizzle case. + if (msl_options.swizzle_texture_samples && is_gather && !imgtype.image.depth) + { + string fname = "spvGatherSwizzle<" + type_to_glsl(get(imgtype.image.type)) + ", " + type_to_glsl(imgtype); + // Add the arg types ourselves. Yes, this sucks, but Clang can't + // deduce template pack parameters in the middle of an argument list. + switch (imgtype.image.dim) + { + case Dim2D: + fname += ", float2"; + if (imgtype.image.arrayed) + fname += ", uint"; + fname += ", int2"; + break; + case DimCube: + fname += ", float3"; + if (imgtype.image.arrayed) + fname += ", uint"; + break; + default: + SPIRV_CROSS_THROW("Invalid texture dimension for gather op."); + } + fname += ">"; + return fname; + } + // Texture reference string fname = to_expression(img) + "."; + if (msl_options.swizzle_texture_samples && !is_gather && imgtype.image.sampled == 1 && + imgtype.image.dim != DimBuffer) + fname = "spvTextureSwizzle(" + fname; // Texture function and sampler if (is_fetch) @@ -2701,7 +2838,7 @@ string CompilerMSL::to_function_name(uint32_t img, const SPIRType &, bool is_fet } // Returns the function args for a texture sampling function for the specified image and sampling characteristics. -string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool is_fetch, bool, bool is_proj, +string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool is_fetch, bool is_gather, bool is_proj, uint32_t coord, uint32_t, uint32_t dref, uint32_t grad_x, uint32_t grad_y, uint32_t lod, uint32_t coffset, uint32_t offset, uint32_t bias, uint32_t comp, uint32_t sample, bool *p_forward) @@ -2710,6 +2847,13 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool if (!is_fetch) farg_str += to_sampler_expression(img); + if (msl_options.swizzle_texture_samples && is_gather && !imgtype.image.depth) + { + if (!farg_str.empty()) + farg_str += ", "; + farg_str += to_expression(img); + } + // Texture coordinates bool forward = should_forward(coord); auto coord_expr = to_enclosed_expression(coord); @@ -2958,6 +3102,22 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool farg_str += to_expression(sample); } + if (msl_options.swizzle_texture_samples && imgtype.image.sampled == 1 && imgtype.image.dim != DimBuffer && + (!is_gather || !imgtype.image.depth)) + { + // Add the swizzle constant from the swizzle buffer. + if (!is_gather) + farg_str += ")"; + // Get the original input variable for this image. + uint32_t img_var = img; + if (meta[img].image) + img_var = meta[img].image; + if (auto *var = maybe_get_backing_variable(img_var)) + img_var = var->self; + farg_str += ", spvSwizzleConst[" + + convert_to_string(get_metal_resource_index(get(img_var), SPIRType::Image)) + "]"; + } + *p_forward = forward; return farg_str; @@ -3003,6 +3163,7 @@ void CompilerMSL::emit_sampled_image_op(uint32_t result_type, uint32_t result_id { set(result_id, to_expression(image_id), result_type, true); meta[result_id].sampler = samp_id; + meta[result_id].image = image_id; } // Returns a string representation of the ID, usable as a function arg. @@ -3514,6 +3675,7 @@ string CompilerMSL::entry_point_args(bool append_comma) }; vector resources; + bool has_sampled_image = false; for (auto &id : ids) { @@ -3530,6 +3692,9 @@ string CompilerMSL::entry_point_args(bool append_comma) { if (type.basetype == SPIRType::SampledImage) { + if (type.image.dim != DimBuffer) + has_sampled_image = true; + resources.push_back( { &id, to_name(var_id), SPIRType::Image, get_metal_resource_index(var, SPIRType::Image) }); @@ -3542,6 +3707,9 @@ string CompilerMSL::entry_point_args(bool append_comma) else if (constexpr_samplers.count(var_id) == 0) { // constexpr samplers are not declared as resources. + if (type.basetype == SPIRType::Image && type.image.sampled == 1 && type.image.dim != DimBuffer) + has_sampled_image = true; + resources.push_back( { &id, to_name(var_id), type.basetype, get_metal_resource_index(var, type.basetype) }); } @@ -3553,6 +3721,15 @@ string CompilerMSL::entry_point_args(bool append_comma) return tie(lhs.basetype, lhs.index) < tie(rhs.basetype, rhs.index); }); + if (msl_options.swizzle_texture_samples && has_sampled_image) + { + // Declare a buffer to hold the swizzle constants. + if (!ep_args.empty()) + ep_args += ", "; + ep_args += "constant uint32_t* spvSwizzleConst [[buffer(" + + convert_to_string(msl_options.swizzle_constants_buffer_index) + ")]]"; + } + for (auto &r : resources) { auto &var = r.id->get(); @@ -4568,9 +4745,25 @@ CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op o if (tid && compiler.get(tid).image.dim == DimBuffer) return SPVFuncImplTexelBufferCoords; + if (opcode == OpImageFetch && compiler.msl_options.swizzle_texture_samples) + return SPVFuncImplTextureSwizzle; + break; } + case OpImageSampleExplicitLod: + case OpImageSampleProjExplicitLod: + case OpImageSampleDrefExplicitLod: + case OpImageSampleProjDrefExplicitLod: + case OpImageSampleImplicitLod: + case OpImageSampleProjImplicitLod: + case OpImageSampleDrefImplicitLod: + case OpImageSampleProjDrefImplicitLod: + case OpImageGather: + if (compiler.msl_options.swizzle_texture_samples) + return SPVFuncImplTextureSwizzle; + break; + case OpCompositeConstruct: { auto &type = compiler.get(args[0]); diff --git a/spirv_msl.hpp b/spirv_msl.hpp index cc5319fd..744ff30a 100644 --- a/spirv_msl.hpp +++ b/spirv_msl.hpp @@ -152,9 +152,11 @@ public: Platform platform = macOS; uint32_t msl_version = make_msl_version(1, 2); uint32_t texel_buffer_texture_width = 4096; // Width of 2D Metal textures used as 1D texel buffers + uint32_t swizzle_constants_buffer_index = 0; bool enable_point_size_builtin = true; bool disable_rasterization = false; bool resolve_specialized_array_lengths = true; + bool swizzle_texture_samples = false; bool is_ios() { @@ -241,6 +243,7 @@ public: SPVFuncImplRowMajor3x4, SPVFuncImplRowMajor4x2, SPVFuncImplRowMajor4x3, + SPVFuncImplTextureSwizzle, SPVFuncImplArrayCopyMultidimMax = 6 }; diff --git a/test_shaders.py b/test_shaders.py index c3657ba5..cdb49ce9 100755 --- a/test_shaders.py +++ b/test_shaders.py @@ -137,6 +137,8 @@ def cross_compile_msl(shader, spirv, opt): msl_args = [spirv_cross_path, '--entry', 'main', '--output', msl_path, spirv_path, '--msl'] msl_args.append('--msl-version') msl_args.append(path_to_msl_standard_cli(shader)) + if '.swizzle.' in shader: + msl_args.append('--msl-swizzle-texture-samples') subprocess.check_call(msl_args)