diff --git a/.travis.yml b/.travis.yml index 86974118..345107b3 100644 --- a/.travis.yml +++ b/.travis.yml @@ -13,7 +13,7 @@ env: - GLSLANG_REV=f0bc598dd7871689f25514b22a82f7455d762bef SPIRV_TOOLS_REV=40e9c60ffea56f45f388835e6945b01d4d8b022d before_script: - - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install python3; fi + - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update; brew install python3; fi - git clone https://github.com/KhronosGroup/glslang.git glslang - git clone https://github.com/KhronosGroup/SPIRV-Tools SPIRV-Tools - git clone https://github.com/KhronosGroup/SPIRV-Headers.git SPIRV-Tools/external/spirv-headers diff --git a/main.cpp b/main.cpp index 82b2ee56..6542f259 100644 --- a/main.cpp +++ b/main.cpp @@ -590,6 +590,17 @@ void rename_interface_variable(Compiler &compiler, const vector &resou if (loc != rename.location) continue; + auto &type = compiler.get_type(v.base_type_id); + + // This is more of a friendly variant. If we need to rename interface variables, we might have to rename + // structs as well and make sure all the names match up. + if (type.basetype == SPIRType::Struct) + { + compiler.set_name(v.base_type_id, join("SPIRV_Cross_Interface_Location", rename.location)); + for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++) + compiler.set_member_name(v.base_type_id, i, join("InterfaceMember", i)); + } + compiler.set_name(v.id, rename.variable_name); } } diff --git a/reference/shaders-hlsl/asm/comp/specialization-constant-workgroup.asm.comp b/reference/shaders-hlsl/asm/comp/specialization-constant-workgroup.asm.comp index c8ebaa8b..8243347b 100644 --- a/reference/shaders-hlsl/asm/comp/specialization-constant-workgroup.asm.comp +++ b/reference/shaders-hlsl/asm/comp/specialization-constant-workgroup.asm.comp @@ -1,6 +1,6 @@ -const uint _5 = 9u; -const uint _6 = 4u; -const uint3 gl_WorkGroupSize = uint3(_5, 20u, _6); +static const uint _5 = 9u; +static const uint _6 = 4u; +static const uint3 gl_WorkGroupSize = uint3(_5, 20u, _6); RWByteAddressBuffer _4 : register(u0); diff --git a/reference/shaders-hlsl/asm/comp/storage-buffer-basic.asm.comp b/reference/shaders-hlsl/asm/comp/storage-buffer-basic.asm.comp index 0d96bf5b..1887eaa8 100644 --- a/reference/shaders-hlsl/asm/comp/storage-buffer-basic.asm.comp +++ b/reference/shaders-hlsl/asm/comp/storage-buffer-basic.asm.comp @@ -1,6 +1,6 @@ -const uint _3 = 1u; -const uint _4 = 3u; -const uint3 gl_WorkGroupSize = uint3(_3, 2u, _4); +static const uint _3 = 1u; +static const uint _4 = 3u; +static const uint3 gl_WorkGroupSize = uint3(_3, 2u, _4); RWByteAddressBuffer _8 : register(u0); RWByteAddressBuffer _9 : register(u1); diff --git a/reference/shaders-hlsl/asm/frag/cbuffer-stripped.asm.frag b/reference/shaders-hlsl/asm/frag/cbuffer-stripped.asm.frag new file mode 100644 index 00000000..e097ab20 --- /dev/null +++ b/reference/shaders-hlsl/asm/frag/cbuffer-stripped.asm.frag @@ -0,0 +1,31 @@ +cbuffer _5 : register(b0) +{ + column_major float2x4 _4_m0 : packoffset(c0); + float4 _4_m1 : packoffset(c4); +}; + +static float2 _3; + +struct SPIRV_Cross_Output +{ + float2 _3 : SV_Target0; +}; + +float2 _23() +{ + float2 _25 = mul(_4_m0, _4_m1); + return _25; +} + +void frag_main() +{ + _3 = _23(); +} + +SPIRV_Cross_Output main() +{ + frag_main(); + SPIRV_Cross_Output stage_output; + stage_output._3 = _3; + return stage_output; +} diff --git a/reference/shaders-hlsl/asm/vert/empty-struct-composite.asm.vert b/reference/shaders-hlsl/asm/vert/empty-struct-composite.asm.vert new file mode 100644 index 00000000..103ff46a --- /dev/null +++ b/reference/shaders-hlsl/asm/vert/empty-struct-composite.asm.vert @@ -0,0 +1,8 @@ +void vert_main() +{ +} + +void main() +{ + vert_main(); +} diff --git a/reference/shaders-hlsl/comp/atomic.comp b/reference/shaders-hlsl/comp/atomic.comp new file mode 100644 index 00000000..382d4298 --- /dev/null +++ b/reference/shaders-hlsl/comp/atomic.comp @@ -0,0 +1,90 @@ +RWByteAddressBuffer ssbo : register(u2); +RWTexture2D uImage : register(u0); +RWTexture2D iImage : register(u1); + +groupshared int int_atomic; +groupshared uint uint_atomic; +groupshared int int_atomic_array[1]; +groupshared uint uint_atomic_array[1]; + +void comp_main() +{ + uint _19; + InterlockedAdd(uImage[int2(1, 5)], 1u, _19); + uint _27; + InterlockedAdd(uImage[int2(1, 5)], 1u, _27); + int _28 = int(_27); + iImage[int2(1, 6)] = int4(_28, _28, _28, _28).x; + uint _32; + InterlockedOr(uImage[int2(1, 5)], 1u, _32); + uint _34; + InterlockedXor(uImage[int2(1, 5)], 1u, _34); + uint _36; + InterlockedAnd(uImage[int2(1, 5)], 1u, _36); + uint _38; + InterlockedMin(uImage[int2(1, 5)], 1u, _38); + uint _40; + InterlockedMax(uImage[int2(1, 5)], 1u, _40); + uint _44; + InterlockedCompareExchange(uImage[int2(1, 5)], 10u, 2u, _44); + int _47; + InterlockedAdd(iImage[int2(1, 6)], 1, _47); + int _49; + InterlockedOr(iImage[int2(1, 6)], 1, _49); + int _51; + InterlockedXor(iImage[int2(1, 6)], 1, _51); + int _53; + InterlockedAnd(iImage[int2(1, 6)], 1, _53); + int _55; + InterlockedMin(iImage[int2(1, 6)], 1, _55); + int _57; + InterlockedMax(iImage[int2(1, 6)], 1, _57); + int _61; + InterlockedCompareExchange(iImage[int2(1, 5)], 10, 2, _61); + uint _68; + ssbo.InterlockedAdd(0, 1u, _68); + uint _70; + ssbo.InterlockedOr(0, 1u, _70); + uint _72; + ssbo.InterlockedXor(0, 1u, _72); + uint _74; + ssbo.InterlockedAnd(0, 1u, _74); + uint _76; + ssbo.InterlockedMin(0, 1u, _76); + uint _78; + ssbo.InterlockedMax(0, 1u, _78); + uint _80; + ssbo.InterlockedExchange(0, 1u, _80); + uint _82; + ssbo.InterlockedCompareExchange(0, 10u, 2u, _82); + int _85; + ssbo.InterlockedAdd(4, 1, _85); + int _87; + ssbo.InterlockedOr(4, 1, _87); + int _89; + ssbo.InterlockedXor(4, 1, _89); + int _91; + ssbo.InterlockedAnd(4, 1, _91); + int _93; + ssbo.InterlockedMin(4, 1, _93); + int _95; + ssbo.InterlockedMax(4, 1, _95); + int _97; + ssbo.InterlockedExchange(4, 1, _97); + int _99; + ssbo.InterlockedCompareExchange(4, 10, 2, _99); + int _102; + InterlockedAdd(int_atomic, 10, _102); + uint _105; + InterlockedAdd(uint_atomic, 10u, _105); + int _110; + InterlockedAdd(int_atomic_array[0], 10, _110); + uint _115; + InterlockedAdd(uint_atomic_array[0], 10u, _115); +} + +[numthreads(1, 1, 1)] +void main() +{ + comp_main(); +} diff --git a/reference/shaders-hlsl/comp/builtins.comp b/reference/shaders-hlsl/comp/builtins.comp index 45b6c030..5d84883b 100644 --- a/reference/shaders-hlsl/comp/builtins.comp +++ b/reference/shaders-hlsl/comp/builtins.comp @@ -1,4 +1,4 @@ -const uint3 gl_WorkGroupSize = uint3(8u, 4u, 2u); +static const uint3 gl_WorkGroupSize = uint3(8u, 4u, 2u); static uint3 gl_WorkGroupID; static uint3 gl_LocalInvocationID; diff --git a/reference/shaders-hlsl/comp/image.comp b/reference/shaders-hlsl/comp/image.comp new file mode 100644 index 00000000..cb084e22 --- /dev/null +++ b/reference/shaders-hlsl/comp/image.comp @@ -0,0 +1,65 @@ +RWTexture2D uImageInF : register(u0); +RWTexture2D uImageOutF : register(u1); +RWTexture2D uImageInI : register(u2); +RWTexture2D uImageOutI : register(u3); +RWTexture2D uImageInU : register(u4); +RWTexture2D uImageOutU : register(u5); +RWBuffer uImageInBuffer : register(u6); +RWBuffer uImageOutBuffer : register(u7); +RWTexture2D uImageInF2 : register(u0); +RWTexture2D uImageOutF2 : register(u1); +RWTexture2D uImageInI2 : register(u2); +RWTexture2D uImageOutI2 : register(u3); +RWTexture2D uImageInU2 : register(u4); +RWTexture2D uImageOutU2 : register(u5); +RWBuffer uImageInBuffer2 : register(u6); +RWBuffer uImageOutBuffer2 : register(u7); +RWTexture2D uImageInF4 : register(u0); +RWTexture2D uImageOutF4 : register(u1); +RWTexture2D uImageInI4 : register(u2); +RWTexture2D uImageOutI4 : register(u3); +RWTexture2D uImageInU4 : register(u4); +RWTexture2D uImageOutU4 : register(u5); +RWBuffer uImageInBuffer4 : register(u6); +RWBuffer uImageOutBuffer4 : register(u7); + +static uint3 gl_GlobalInvocationID; +struct SPIRV_Cross_Input +{ + uint3 gl_GlobalInvocationID : SV_DispatchThreadID; +}; + +void comp_main() +{ + float4 f = float4(uImageInF[int2(gl_GlobalInvocationID.xy)]); + uImageOutF[int2(gl_GlobalInvocationID.xy)] = f.x; + int4 i = int4(uImageInI[int2(gl_GlobalInvocationID.xy)]); + uImageOutI[int2(gl_GlobalInvocationID.xy)] = i.x; + uint4 u = uint4(uImageInU[int2(gl_GlobalInvocationID.xy)]); + uImageOutU[int2(gl_GlobalInvocationID.xy)] = u.x; + float4 b = float4(uImageInBuffer[int(gl_GlobalInvocationID.x)]); + uImageOutBuffer[int(gl_GlobalInvocationID.x)] = b.x; + float4 f2 = uImageInF2[int2(gl_GlobalInvocationID.xy)].xyyy; + uImageOutF2[int2(gl_GlobalInvocationID.xy)] = f2.xy; + int4 i2 = uImageInI2[int2(gl_GlobalInvocationID.xy)].xyyy; + uImageOutI2[int2(gl_GlobalInvocationID.xy)] = i2.xy; + uint4 u2 = uImageInU2[int2(gl_GlobalInvocationID.xy)].xyyy; + uImageOutU2[int2(gl_GlobalInvocationID.xy)] = u2.xy; + float4 b2 = uImageInBuffer2[int(gl_GlobalInvocationID.x)].xyyy; + uImageOutBuffer2[int(gl_GlobalInvocationID.x)] = b2.xy; + float4 f4 = uImageInF4[int2(gl_GlobalInvocationID.xy)]; + uImageOutF4[int2(gl_GlobalInvocationID.xy)] = f4; + int4 i4 = uImageInI4[int2(gl_GlobalInvocationID.xy)]; + uImageOutI4[int2(gl_GlobalInvocationID.xy)] = i4; + uint4 u4 = uImageInU4[int2(gl_GlobalInvocationID.xy)]; + uImageOutU4[int2(gl_GlobalInvocationID.xy)] = u4; + float4 b4 = uImageInBuffer4[int(gl_GlobalInvocationID.x)]; + uImageOutBuffer4[int(gl_GlobalInvocationID.x)] = b4; +} + +[numthreads(1, 1, 1)] +void main(SPIRV_Cross_Input stage_input) +{ + gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; + comp_main(); +} diff --git a/reference/shaders-hlsl/comp/rwbuffer-matrix.comp b/reference/shaders-hlsl/comp/rwbuffer-matrix.comp new file mode 100644 index 00000000..4ae22364 --- /dev/null +++ b/reference/shaders-hlsl/comp/rwbuffer-matrix.comp @@ -0,0 +1,136 @@ +RWByteAddressBuffer _28 : register(u0); +cbuffer _68 : register(b1) +{ + int UBO_index0 : packoffset(c0); + int UBO_index1 : packoffset(c0.y); +}; + +void row_to_col() +{ + float4x4 _55 = asfloat(uint4x4(_28.Load(64), _28.Load(80), _28.Load(96), _28.Load(112), _28.Load(68), _28.Load(84), _28.Load(100), _28.Load(116), _28.Load(72), _28.Load(88), _28.Load(104), _28.Load(120), _28.Load(76), _28.Load(92), _28.Load(108), _28.Load(124))); + _28.Store4(0, asuint(_55[0])); + _28.Store4(16, asuint(_55[1])); + _28.Store4(32, asuint(_55[2])); + _28.Store4(48, asuint(_55[3])); + float2x2 _58 = asfloat(uint2x2(_28.Load(144), _28.Load(152), _28.Load(148), _28.Load(156))); + _28.Store2(128, asuint(_58[0])); + _28.Store2(136, asuint(_58[1])); + float2x3 _61 = asfloat(uint2x3(_28.Load(192), _28.Load(200), _28.Load(208), _28.Load(196), _28.Load(204), _28.Load(212))); + _28.Store3(160, asuint(_61[0])); + _28.Store3(176, asuint(_61[1])); + float3x2 _64 = asfloat(uint3x2(_28.Load(240), _28.Load(256), _28.Load(244), _28.Load(260), _28.Load(248), _28.Load(264))); + _28.Store2(216, asuint(_64[0])); + _28.Store2(224, asuint(_64[1])); + _28.Store2(232, asuint(_64[2])); +} + +void col_to_row() +{ + float4x4 _34 = asfloat(uint4x4(_28.Load4(0), _28.Load4(16), _28.Load4(32), _28.Load4(48))); + _28.Store(64, asuint(_34[0].x)); + _28.Store(68, asuint(_34[1].x)); + _28.Store(72, asuint(_34[2].x)); + _28.Store(76, asuint(_34[3].x)); + _28.Store(80, asuint(_34[0].y)); + _28.Store(84, asuint(_34[1].y)); + _28.Store(88, asuint(_34[2].y)); + _28.Store(92, asuint(_34[3].y)); + _28.Store(96, asuint(_34[0].z)); + _28.Store(100, asuint(_34[1].z)); + _28.Store(104, asuint(_34[2].z)); + _28.Store(108, asuint(_34[3].z)); + _28.Store(112, asuint(_34[0].w)); + _28.Store(116, asuint(_34[1].w)); + _28.Store(120, asuint(_34[2].w)); + _28.Store(124, asuint(_34[3].w)); + float2x2 _40 = asfloat(uint2x2(_28.Load2(128), _28.Load2(136))); + _28.Store(144, asuint(_40[0].x)); + _28.Store(148, asuint(_40[1].x)); + _28.Store(152, asuint(_40[0].y)); + _28.Store(156, asuint(_40[1].y)); + float2x3 _46 = asfloat(uint2x3(_28.Load3(160), _28.Load3(176))); + _28.Store(192, asuint(_46[0].x)); + _28.Store(196, asuint(_46[1].x)); + _28.Store(200, asuint(_46[0].y)); + _28.Store(204, asuint(_46[1].y)); + _28.Store(208, asuint(_46[0].z)); + _28.Store(212, asuint(_46[1].z)); + float3x2 _52 = asfloat(uint3x2(_28.Load2(216), _28.Load2(224), _28.Load2(232))); + _28.Store(240, asuint(_52[0].x)); + _28.Store(244, asuint(_52[1].x)); + _28.Store(248, asuint(_52[2].x)); + _28.Store(256, asuint(_52[0].y)); + _28.Store(260, asuint(_52[1].y)); + _28.Store(264, asuint(_52[2].y)); +} + +void write_dynamic_index_row() +{ + _28.Store(UBO_index0 * 4 + UBO_index1 * 16 + 64, asuint(1.0f)); + _28.Store(UBO_index0 * 4 + UBO_index1 * 8 + 144, asuint(2.0f)); + _28.Store(UBO_index0 * 4 + UBO_index1 * 8 + 192, asuint(3.0f)); + _28.Store(UBO_index0 * 4 + UBO_index1 * 16 + 240, asuint(4.0f)); + _28.Store(UBO_index0 * 4 + 64, asuint(float4(1.0f, 1.0f, 1.0f, 1.0f).x)); + _28.Store(UBO_index0 * 4 + 80, asuint(float4(1.0f, 1.0f, 1.0f, 1.0f).y)); + _28.Store(UBO_index0 * 4 + 96, asuint(float4(1.0f, 1.0f, 1.0f, 1.0f).z)); + _28.Store(UBO_index0 * 4 + 112, asuint(float4(1.0f, 1.0f, 1.0f, 1.0f).w)); + _28.Store(UBO_index0 * 4 + 144, asuint(float2(2.0f, 2.0f).x)); + _28.Store(UBO_index0 * 4 + 152, asuint(float2(2.0f, 2.0f).y)); + _28.Store(UBO_index0 * 4 + 192, asuint(float3(3.0f, 3.0f, 3.0f).x)); + _28.Store(UBO_index0 * 4 + 200, asuint(float3(3.0f, 3.0f, 3.0f).y)); + _28.Store(UBO_index0 * 4 + 208, asuint(float3(3.0f, 3.0f, 3.0f).z)); + _28.Store(UBO_index0 * 4 + 240, asuint(float2(4.0f, 4.0f).x)); + _28.Store(UBO_index0 * 4 + 256, asuint(float2(4.0f, 4.0f).y)); +} + +void write_dynamic_index_col() +{ + _28.Store(UBO_index0 * 16 + UBO_index1 * 4 + 0, asuint(1.0f)); + _28.Store(UBO_index0 * 8 + UBO_index1 * 4 + 128, asuint(2.0f)); + _28.Store(UBO_index0 * 16 + UBO_index1 * 4 + 160, asuint(3.0f)); + _28.Store(UBO_index0 * 8 + UBO_index1 * 4 + 216, asuint(4.0f)); + _28.Store4(UBO_index0 * 16 + 0, asuint(float4(1.0f, 1.0f, 1.0f, 1.0f))); + _28.Store2(UBO_index0 * 8 + 128, asuint(float2(2.0f, 2.0f))); + _28.Store3(UBO_index0 * 16 + 160, asuint(float3(3.0f, 3.0f, 3.0f))); + _28.Store2(UBO_index0 * 8 + 216, asuint(float2(4.0f, 4.0f))); +} + +void read_dynamic_index_row() +{ + float a0 = asfloat(_28.Load(UBO_index0 * 4 + UBO_index1 * 16 + 64)); + float a1 = asfloat(_28.Load(UBO_index0 * 4 + UBO_index1 * 8 + 144)); + float a2 = asfloat(_28.Load(UBO_index0 * 4 + UBO_index1 * 8 + 192)); + float a3 = asfloat(_28.Load(UBO_index0 * 4 + UBO_index1 * 16 + 240)); + float4 v0 = asfloat(uint4(_28.Load(UBO_index0 * 4 + 64), _28.Load(UBO_index0 * 4 + 80), _28.Load(UBO_index0 * 4 + 96), _28.Load(UBO_index0 * 4 + 112))); + float2 v1 = asfloat(uint2(_28.Load(UBO_index0 * 4 + 144), _28.Load(UBO_index0 * 4 + 152))); + float3 v2 = asfloat(uint3(_28.Load(UBO_index0 * 4 + 192), _28.Load(UBO_index0 * 4 + 200), _28.Load(UBO_index0 * 4 + 208))); + float2 v3 = asfloat(uint2(_28.Load(UBO_index0 * 4 + 240), _28.Load(UBO_index0 * 4 + 256))); +} + +void read_dynamic_index_col() +{ + float a0 = asfloat(_28.Load(UBO_index0 * 16 + UBO_index1 * 4 + 0)); + float a1 = asfloat(_28.Load(UBO_index0 * 8 + UBO_index1 * 4 + 128)); + float a2 = asfloat(_28.Load(UBO_index0 * 16 + UBO_index1 * 4 + 160)); + float a3 = asfloat(_28.Load(UBO_index0 * 8 + UBO_index1 * 4 + 216)); + float4 v0 = asfloat(_28.Load4(UBO_index0 * 16 + 0)); + float2 v1 = asfloat(_28.Load2(UBO_index0 * 8 + 128)); + float3 v2 = asfloat(_28.Load3(UBO_index0 * 16 + 160)); + float2 v3 = asfloat(_28.Load2(UBO_index0 * 8 + 216)); +} + +void comp_main() +{ + row_to_col(); + col_to_row(); + write_dynamic_index_row(); + write_dynamic_index_col(); + read_dynamic_index_row(); + read_dynamic_index_col(); +} + +[numthreads(1, 1, 1)] +void main() +{ + comp_main(); +} diff --git a/reference/shaders-hlsl/comp/shared.comp b/reference/shaders-hlsl/comp/shared.comp new file mode 100644 index 00000000..40ba1e46 --- /dev/null +++ b/reference/shaders-hlsl/comp/shared.comp @@ -0,0 +1,31 @@ +static const uint3 gl_WorkGroupSize = uint3(4u, 1u, 1u); + +ByteAddressBuffer _22 : register(u0); +RWByteAddressBuffer _44 : register(u1); + +static uint3 gl_GlobalInvocationID; +static uint gl_LocalInvocationIndex; +struct SPIRV_Cross_Input +{ + uint3 gl_GlobalInvocationID : SV_DispatchThreadID; + uint gl_LocalInvocationIndex : SV_GroupIndex; +}; + +groupshared float sShared[4]; + +void comp_main() +{ + uint ident = gl_GlobalInvocationID.x; + float idata = asfloat(_22.Load(ident * 4 + 0)); + sShared[gl_LocalInvocationIndex] = idata; + GroupMemoryBarrierWithGroupSync(); + _44.Store(ident * 4 + 0, asuint(sShared[(4u - gl_LocalInvocationIndex) - 1u])); +} + +[numthreads(4, 1, 1)] +void main(SPIRV_Cross_Input stage_input) +{ + gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; + gl_LocalInvocationIndex = stage_input.gl_LocalInvocationIndex; + comp_main(); +} diff --git a/reference/shaders-hlsl/frag/constant-buffer-array.frag b/reference/shaders-hlsl/frag/constant-buffer-array.frag deleted file mode 100644 index 23c0e460..00000000 --- a/reference/shaders-hlsl/frag/constant-buffer-array.frag +++ /dev/null @@ -1,51 +0,0 @@ -struct _CBO -{ - float4 a; - float4 b; - float4 c; - float4 d; -}; - -cbuffer CBO : register(b4) -{ - _CBO cbo[2][4]; -}; -struct _PushMe -{ - float4 a; - float4 b; - float4 c; - float4 d; -}; - -cbuffer PushMe -{ - _PushMe push; -}; - -static float4 FragColor; - -struct SPIRV_Cross_Output -{ - float4 FragColor : SV_Target0; -}; - -void frag_main() -{ - FragColor = cbo[1][2].a; - FragColor += cbo[1][2].b; - FragColor += cbo[1][2].c; - FragColor += cbo[1][2].d; - FragColor += push.a; - FragColor += push.b; - FragColor += push.c; - FragColor += push.d; -} - -SPIRV_Cross_Output main() -{ - frag_main(); - SPIRV_Cross_Output stage_output; - stage_output.FragColor = FragColor; - return stage_output; -} diff --git a/reference/shaders-hlsl/frag/constant-buffer-array.sm51.frag b/reference/shaders-hlsl/frag/constant-buffer-array.sm51.frag index f3daa9e2..72c707e5 100644 --- a/reference/shaders-hlsl/frag/constant-buffer-array.sm51.frag +++ b/reference/shaders-hlsl/frag/constant-buffer-array.sm51.frag @@ -1,4 +1,4 @@ -struct CBO +struct CBO_1 { float4 a; float4 b; @@ -6,17 +6,15 @@ struct CBO float4 d; }; -ConstantBuffer cbo[2][4] : register(b4); -struct PushMe +ConstantBuffer cbo[2][4] : register(b4); +cbuffer push { - float4 a; - float4 b; - float4 c; - float4 d; + float4 PushMe_a : packoffset(c0); + float4 PushMe_b : packoffset(c1); + float4 PushMe_c : packoffset(c2); + float4 PushMe_d : packoffset(c3); }; -ConstantBuffer push; - static float4 FragColor; struct SPIRV_Cross_Output @@ -30,10 +28,10 @@ void frag_main() FragColor += cbo[1][2].b; FragColor += cbo[1][2].c; FragColor += cbo[1][2].d; - FragColor += push.a; - FragColor += push.b; - FragColor += push.c; - FragColor += push.d; + FragColor += PushMe_a; + FragColor += PushMe_b; + FragColor += PushMe_c; + FragColor += PushMe_d; } SPIRV_Cross_Output main() diff --git a/reference/shaders-hlsl/frag/mod.frag b/reference/shaders-hlsl/frag/mod.frag new file mode 100644 index 00000000..43407cbb --- /dev/null +++ b/reference/shaders-hlsl/frag/mod.frag @@ -0,0 +1,71 @@ +static float4 a4; +static float4 b4; +static float3 a3; +static float3 b3; +static float2 a2; +static float2 b2; +static float a1; +static float b1; +static float4 FragColor; + +struct SPIRV_Cross_Input +{ + float4 a4 : TEXCOORD0; + float3 a3 : TEXCOORD1; + float2 a2 : TEXCOORD2; + float a1 : TEXCOORD3; + float4 b4 : TEXCOORD4; + float3 b3 : TEXCOORD5; + float2 b2 : TEXCOORD6; + float b1 : TEXCOORD7; +}; + +struct SPIRV_Cross_Output +{ + float4 FragColor : SV_Target0; +}; + +float mod(float x, float y) +{ + return x - y * floor(x / y); +} + +float2 mod(float2 x, float2 y) +{ + return x - y * floor(x / y); +} + +float3 mod(float3 x, float3 y) +{ + return x - y * floor(x / y); +} + +float4 mod(float4 x, float4 y) +{ + return x - y * floor(x / y); +} + +void frag_main() +{ + float4 m0 = mod(a4, b4); + float3 m1 = mod(a3, b3); + float2 m2 = mod(a2, b2); + float m3 = mod(a1, b1); + FragColor = ((m0 + m1.xyzx) + m2.xyxy) + float4(m3, m3, m3, m3); +} + +SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) +{ + a4 = stage_input.a4; + b4 = stage_input.b4; + a3 = stage_input.a3; + b3 = stage_input.b3; + a2 = stage_input.a2; + b2 = stage_input.b2; + a1 = stage_input.a1; + b1 = stage_input.b1; + frag_main(); + SPIRV_Cross_Output stage_output; + stage_output.FragColor = FragColor; + return stage_output; +} diff --git a/reference/shaders-hlsl/frag/partial-write-preserve.frag b/reference/shaders-hlsl/frag/partial-write-preserve.frag index 20f52c1d..ebe3ad1a 100644 --- a/reference/shaders-hlsl/frag/partial-write-preserve.frag +++ b/reference/shaders-hlsl/frag/partial-write-preserve.frag @@ -4,14 +4,9 @@ struct B float b; }; -struct _UBO +cbuffer _42 : register(b0) { - int some_value; -}; - -cbuffer UBO : register(b0) -{ - _UBO _42; + int UBO_some_value : packoffset(c0); }; void partial_inout(inout float4 x) @@ -27,7 +22,7 @@ void complete_inout(out float4 x) void branchy_inout(inout float4 v) { v.y = 20.0f; - if (_42.some_value == 20) + if (UBO_some_value == 20) { v = float4(50.0f, 50.0f, 50.0f, 50.0f); } @@ -35,7 +30,7 @@ void branchy_inout(inout float4 v) void branchy_inout_2(out float4 v) { - if (_42.some_value == 20) + if (UBO_some_value == 20) { v = float4(50.0f, 50.0f, 50.0f, 50.0f); } diff --git a/reference/shaders-hlsl/frag/resources.frag b/reference/shaders-hlsl/frag/resources.frag index df9ce711..12237101 100644 --- a/reference/shaders-hlsl/frag/resources.frag +++ b/reference/shaders-hlsl/frag/resources.frag @@ -1,20 +1,10 @@ -struct _CBuffer +cbuffer cbuf : register(b3) { - float4 a; + float4 CBuffer_a : packoffset(c0); }; - -cbuffer CBuffer : register(b3) +cbuffer registers { - _CBuffer cbuf; -}; -struct _PushMe -{ - float4 d; -}; - -cbuffer PushMe -{ - _PushMe registers; + float4 PushMe_d : packoffset(c0); }; Texture2D uSampledImage : register(t4); SamplerState _uSampledImage_sampler : register(s4); @@ -38,7 +28,7 @@ void frag_main() { float4 c0 = uSampledImage.Sample(_uSampledImage_sampler, vTex); float4 c1 = uTexture.Sample(uSampler, vTex); - float4 c2 = cbuf.a + registers.d; + float4 c2 = CBuffer_a + PushMe_d; FragColor = (c0 + c1) + c2; } diff --git a/reference/shaders-hlsl/frag/spec-constant.frag b/reference/shaders-hlsl/frag/spec-constant.frag index 63873d29..539ca24f 100644 --- a/reference/shaders-hlsl/frag/spec-constant.frag +++ b/reference/shaders-hlsl/frag/spec-constant.frag @@ -1,11 +1,11 @@ -const float a = 1.0f; -const float b = 2.0f; -const int c = 3; -const int d = 4; -const uint e = 5u; -const uint f = 6u; -const bool g = false; -const bool h = true; +static const float a = 1.0f; +static const float b = 2.0f; +static const int c = 3; +static const int d = 4; +static const uint e = 5u; +static const uint f = 6u; +static const bool g = false; +static const bool h = true; struct Foo { diff --git a/reference/shaders-hlsl/vert/basic.vert b/reference/shaders-hlsl/vert/basic.vert index a779f2df..61aa8a55 100644 --- a/reference/shaders-hlsl/vert/basic.vert +++ b/reference/shaders-hlsl/vert/basic.vert @@ -1,11 +1,6 @@ -struct _UBO +cbuffer _16 { - float4x4 uMVP; -}; - -cbuffer UBO -{ - _UBO _16; + row_major float4x4 UBO_uMVP : packoffset(c0); }; static float4 gl_Position; @@ -27,7 +22,7 @@ struct SPIRV_Cross_Output void vert_main() { - gl_Position = mul(aVertex, _16.uMVP); + gl_Position = mul(aVertex, UBO_uMVP); vNormal = aNormal; } diff --git a/reference/shaders-hlsl/vert/texture_buffer.vert b/reference/shaders-hlsl/vert/texture_buffer.vert new file mode 100644 index 00000000..1c92f6fe --- /dev/null +++ b/reference/shaders-hlsl/vert/texture_buffer.vert @@ -0,0 +1,21 @@ +Buffer uSamp : register(t4); +RWBuffer uSampo : register(u5); + +static float4 gl_Position; +struct SPIRV_Cross_Output +{ + float4 gl_Position : SV_Position; +}; + +void vert_main() +{ + gl_Position = uSamp.Load(10) + uSampo[100]; +} + +SPIRV_Cross_Output main() +{ + vert_main(); + SPIRV_Cross_Output stage_output; + stage_output.gl_Position = gl_Position; + return stage_output; +} diff --git a/reference/shaders-msl/asm/vert/empty-struct-composite.asm.vert b/reference/shaders-msl/asm/vert/empty-struct-composite.asm.vert new file mode 100644 index 00000000..9e024c20 --- /dev/null +++ b/reference/shaders-msl/asm/vert/empty-struct-composite.asm.vert @@ -0,0 +1,9 @@ +#include +#include + +using namespace metal; + +vertex void main0() +{ +} + diff --git a/reference/shaders/asm/vert/empty-struct-composite.asm.vert b/reference/shaders/asm/vert/empty-struct-composite.asm.vert new file mode 100644 index 00000000..05ce10ad --- /dev/null +++ b/reference/shaders/asm/vert/empty-struct-composite.asm.vert @@ -0,0 +1,6 @@ +#version 450 + +void main() +{ +} + diff --git a/reference/shaders/comp/struct-packing.comp b/reference/shaders/comp/struct-packing.comp index a6ea8d58..3c30aa60 100644 --- a/reference/shaders/comp/struct-packing.comp +++ b/reference/shaders/comp/struct-packing.comp @@ -64,6 +64,14 @@ layout(binding = 0, std140) buffer SSBO0 Content content; Content content1[2]; Content content2; + mat2 m0; + mat2 m1; + mat2x3 m2[4]; + mat3x2 m3; + layout(row_major) mat2 m4; + layout(row_major) mat2 m5[9]; + layout(row_major) mat2x3 m6[4][2]; + layout(row_major) mat3x2 m7; float array[]; } ssbo_140; diff --git a/reference/shaders/desktop-only/comp/enhanced-layouts.comp b/reference/shaders/desktop-only/comp/enhanced-layouts.comp new file mode 100644 index 00000000..ba37ca23 --- /dev/null +++ b/reference/shaders/desktop-only/comp/enhanced-layouts.comp @@ -0,0 +1,40 @@ +#version 450 +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +struct Foo +{ + int a; + int b; + int c; +}; + +layout(binding = 1, std140) buffer SSBO1 +{ + layout(offset = 4) int a; + layout(offset = 8) int b; + layout(offset = 16) Foo foo; + layout(offset = 48) int c[8]; +} ssbo1; + +layout(binding = 2, std430) buffer SSBO2 +{ + layout(offset = 4) int a; + layout(offset = 8) int b; + layout(offset = 16) Foo foo; + layout(offset = 48) int c[8]; +} ssbo2; + +layout(binding = 0, std140) uniform UBO +{ + layout(offset = 4) int a; + layout(offset = 8) int b; + layout(offset = 16) Foo foo; + layout(offset = 48) int c[8]; +} ubo; + +void main() +{ + ssbo1.a = ssbo2.a; + ssbo1.b = ubo.b; +} + diff --git a/shaders-hlsl/asm/frag/cbuffer-stripped.asm.frag b/shaders-hlsl/asm/frag/cbuffer-stripped.asm.frag new file mode 100644 index 00000000..d778034b --- /dev/null +++ b/shaders-hlsl/asm/frag/cbuffer-stripped.asm.frag @@ -0,0 +1,55 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 1 +; Bound: 34 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %_entryPointOutput + OpExecutionMode %main OriginUpperLeft + OpSource HLSL 500 + OpMemberDecorate %UBO 0 RowMajor + OpMemberDecorate %UBO 0 Offset 0 + OpMemberDecorate %UBO 0 MatrixStride 16 + OpMemberDecorate %UBO 1 Offset 64 + OpDecorate %UBO Block + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + OpDecorate %_entryPointOutput Location 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v2float = OpTypeVector %float 2 + %8 = OpTypeFunction %v2float +%_ptr_Function_v2float = OpTypePointer Function %v2float + %v4float = OpTypeVector %float 4 +%mat2v4float = OpTypeMatrix %v4float 2 + %UBO = OpTypeStruct %mat2v4float %v4float +%_ptr_Uniform_UBO = OpTypePointer Uniform %UBO + %_ = OpVariable %_ptr_Uniform_UBO Uniform + %int = OpTypeInt 32 1 + %int_1 = OpConstant %int 1 +%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float + %int_0 = OpConstant %int 0 +%_ptr_Uniform_mat2v4float = OpTypePointer Uniform %mat2v4float +%_ptr_Output_v2float = OpTypePointer Output %v2float +%_entryPointOutput = OpVariable %_ptr_Output_v2float Output + %main = OpFunction %void None %3 + %5 = OpLabel + %33 = OpFunctionCall %v2float %_main_ + OpStore %_entryPointOutput %33 + OpReturn + OpFunctionEnd + %_main_ = OpFunction %v2float None %8 + %10 = OpLabel + %a0 = OpVariable %_ptr_Function_v2float Function + %21 = OpAccessChain %_ptr_Uniform_v4float %_ %int_1 + %22 = OpLoad %v4float %21 + %25 = OpAccessChain %_ptr_Uniform_mat2v4float %_ %int_0 + %26 = OpLoad %mat2v4float %25 + %27 = OpVectorTimesMatrix %v2float %22 %26 + OpStore %a0 %27 + %28 = OpLoad %v2float %a0 + OpReturnValue %28 + OpFunctionEnd diff --git a/shaders-hlsl/asm/vert/empty-struct-composite.asm.vert b/shaders-hlsl/asm/vert/empty-struct-composite.asm.vert new file mode 100644 index 00000000..37a2d879 --- /dev/null +++ b/shaders-hlsl/asm/vert/empty-struct-composite.asm.vert @@ -0,0 +1,37 @@ +; SPIR-V +; Version: 1.1 +; Generator: Google rspirv; 0 +; Bound: 17 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Vertex %2 "main" + OpExecutionMode %2 OriginUpperLeft + OpName %Test "Test" + OpName %t "t" + OpName %retvar "retvar" + OpName %main "main" + OpName %retvar_0 "retvar" + %void = OpTypeVoid + %6 = OpTypeFunction %void + %Test = OpTypeStruct +%_ptr_Function_Test = OpTypePointer Function %Test +%_ptr_Function_void = OpTypePointer Function %void + %2 = OpFunction %void None %6 + %7 = OpLabel + %t = OpVariable %_ptr_Function_Test Function + %retvar = OpVariable %_ptr_Function_void Function + OpBranch %4 + %4 = OpLabel + %13 = OpCompositeConstruct %Test + OpStore %t %13 + OpReturn + OpFunctionEnd + %main = OpFunction %void None %6 + %15 = OpLabel + %retvar_0 = OpVariable %_ptr_Function_void Function + OpBranch %14 + %14 = OpLabel + OpReturn + OpFunctionEnd diff --git a/shaders-hlsl/comp/atomic.comp b/shaders-hlsl/comp/atomic.comp new file mode 100644 index 00000000..6f69ec72 --- /dev/null +++ b/shaders-hlsl/comp/atomic.comp @@ -0,0 +1,66 @@ +#version 310 es +#extension GL_OES_shader_image_atomic : require +layout(local_size_x = 1) in; + +layout(r32ui, binding = 0) uniform highp uimage2D uImage; +layout(r32i, binding = 1) uniform highp iimage2D iImage; +layout(binding = 2, std430) buffer SSBO +{ + uint u32; + int i32; +} ssbo; + +shared int int_atomic; +shared uint uint_atomic; +shared int int_atomic_array[1]; +shared uint uint_atomic_array[1]; + +void main() +{ + imageAtomicAdd(uImage, ivec2(1, 5), 1u); + + // Test that we do not invalidate OpImage variables which are loaded from UniformConstant + // address space. + imageStore(iImage, ivec2(1, 6), ivec4(imageAtomicAdd(uImage, ivec2(1, 5), 1u))); + + imageAtomicOr(uImage, ivec2(1, 5), 1u); + imageAtomicXor(uImage, ivec2(1, 5), 1u); + imageAtomicAnd(uImage, ivec2(1, 5), 1u); + imageAtomicMin(uImage, ivec2(1, 5), 1u); + imageAtomicMax(uImage, ivec2(1, 5), 1u); + //imageAtomicExchange(uImage, ivec2(1, 5), 1u); + imageAtomicCompSwap(uImage, ivec2(1, 5), 10u, 2u); + + imageAtomicAdd(iImage, ivec2(1, 6), 1); + imageAtomicOr(iImage, ivec2(1, 6), 1); + imageAtomicXor(iImage, ivec2(1, 6), 1); + imageAtomicAnd(iImage, ivec2(1, 6), 1); + imageAtomicMin(iImage, ivec2(1, 6), 1); + imageAtomicMax(iImage, ivec2(1, 6), 1); + //imageAtomicExchange(iImage, ivec2(1, 5), 1u); + imageAtomicCompSwap(iImage, ivec2(1, 5), 10, 2); + + atomicAdd(ssbo.u32, 1u); + atomicOr(ssbo.u32, 1u); + atomicXor(ssbo.u32, 1u); + atomicAnd(ssbo.u32, 1u); + atomicMin(ssbo.u32, 1u); + atomicMax(ssbo.u32, 1u); + atomicExchange(ssbo.u32, 1u); + atomicCompSwap(ssbo.u32, 10u, 2u); + + atomicAdd(ssbo.i32, 1); + atomicOr(ssbo.i32, 1); + atomicXor(ssbo.i32, 1); + atomicAnd(ssbo.i32, 1); + atomicMin(ssbo.i32, 1); + atomicMax(ssbo.i32, 1); + atomicExchange(ssbo.i32, 1); + atomicCompSwap(ssbo.i32, 10, 2); + + atomicAdd(int_atomic, 10); + atomicAdd(uint_atomic, 10u); + atomicAdd(int_atomic_array[0], 10); + atomicAdd(uint_atomic_array[0], 10u); +} + diff --git a/shaders-hlsl/comp/image.comp b/shaders-hlsl/comp/image.comp new file mode 100644 index 00000000..218af74d --- /dev/null +++ b/shaders-hlsl/comp/image.comp @@ -0,0 +1,69 @@ +#version 450 +layout(local_size_x = 1) in; + +layout(r32f, binding = 0) uniform readonly image2D uImageInF; +layout(r32f, binding = 1) uniform writeonly image2D uImageOutF; +layout(r32i, binding = 2) uniform readonly iimage2D uImageInI; +layout(r32i, binding = 3) uniform writeonly iimage2D uImageOutI; +layout(r32ui, binding = 4) uniform readonly uimage2D uImageInU; +layout(r32ui, binding = 5) uniform writeonly uimage2D uImageOutU; +layout(r32f, binding = 6) uniform readonly imageBuffer uImageInBuffer; +layout(r32f, binding = 7) uniform writeonly imageBuffer uImageOutBuffer; + +layout(rg32f, binding = 0) uniform readonly image2D uImageInF2; +layout(rg32f, binding = 1) uniform writeonly image2D uImageOutF2; +layout(rg32i, binding = 2) uniform readonly iimage2D uImageInI2; +layout(rg32i, binding = 3) uniform writeonly iimage2D uImageOutI2; +layout(rg32ui, binding = 4) uniform readonly uimage2D uImageInU2; +layout(rg32ui, binding = 5) uniform writeonly uimage2D uImageOutU2; +layout(rg32f, binding = 6) uniform readonly imageBuffer uImageInBuffer2; +layout(rg32f, binding = 7) uniform writeonly imageBuffer uImageOutBuffer2; + +layout(rgba32f, binding = 0) uniform readonly image2D uImageInF4; +layout(rgba32f, binding = 1) uniform writeonly image2D uImageOutF4; +layout(rgba32i, binding = 2) uniform readonly iimage2D uImageInI4; +layout(rgba32i, binding = 3) uniform writeonly iimage2D uImageOutI4; +layout(rgba32ui, binding = 4) uniform readonly uimage2D uImageInU4; +layout(rgba32ui, binding = 5) uniform writeonly uimage2D uImageOutU4; +layout(rgba32f, binding = 6) uniform readonly imageBuffer uImageInBuffer4; +layout(rgba32f, binding = 7) uniform writeonly imageBuffer uImageOutBuffer4; + +void main() +{ + vec4 f = imageLoad(uImageInF, ivec2(gl_GlobalInvocationID.xy)); + imageStore(uImageOutF, ivec2(gl_GlobalInvocationID.xy), f); + + ivec4 i = imageLoad(uImageInI, ivec2(gl_GlobalInvocationID.xy)); + imageStore(uImageOutI, ivec2(gl_GlobalInvocationID.xy), i); + + uvec4 u = imageLoad(uImageInU, ivec2(gl_GlobalInvocationID.xy)); + imageStore(uImageOutU, ivec2(gl_GlobalInvocationID.xy), u); + + vec4 b = imageLoad(uImageInBuffer, int(gl_GlobalInvocationID.x)); + imageStore(uImageOutBuffer, int(gl_GlobalInvocationID.x), b); + + vec4 f2 = imageLoad(uImageInF2, ivec2(gl_GlobalInvocationID.xy)); + imageStore(uImageOutF2, ivec2(gl_GlobalInvocationID.xy), f2); + + ivec4 i2 = imageLoad(uImageInI2, ivec2(gl_GlobalInvocationID.xy)); + imageStore(uImageOutI2, ivec2(gl_GlobalInvocationID.xy), i2); + + uvec4 u2 = imageLoad(uImageInU2, ivec2(gl_GlobalInvocationID.xy)); + imageStore(uImageOutU2, ivec2(gl_GlobalInvocationID.xy), u2); + + vec4 b2 = imageLoad(uImageInBuffer2, int(gl_GlobalInvocationID.x)); + imageStore(uImageOutBuffer2, int(gl_GlobalInvocationID.x), b2); + + vec4 f4 = imageLoad(uImageInF4, ivec2(gl_GlobalInvocationID.xy)); + imageStore(uImageOutF4, ivec2(gl_GlobalInvocationID.xy), f4); + + ivec4 i4 = imageLoad(uImageInI4, ivec2(gl_GlobalInvocationID.xy)); + imageStore(uImageOutI4, ivec2(gl_GlobalInvocationID.xy), i4); + + uvec4 u4 = imageLoad(uImageInU4, ivec2(gl_GlobalInvocationID.xy)); + imageStore(uImageOutU4, ivec2(gl_GlobalInvocationID.xy), u4); + + vec4 b4 = imageLoad(uImageInBuffer4, int(gl_GlobalInvocationID.x)); + imageStore(uImageOutBuffer4, int(gl_GlobalInvocationID.x), b4); +} + diff --git a/shaders-hlsl/comp/rwbuffer-matrix.comp b/shaders-hlsl/comp/rwbuffer-matrix.comp new file mode 100644 index 00000000..0e722e0a --- /dev/null +++ b/shaders-hlsl/comp/rwbuffer-matrix.comp @@ -0,0 +1,104 @@ +#version 310 es +layout(local_size_x = 1) in; + +layout(std140, binding = 1) uniform UBO +{ + int index0; + int index1; +}; + +layout(binding = 0, std430) buffer SSBO +{ + layout(column_major) mat4 mcol; + layout(row_major) mat4 mrow; + + layout(column_major) mat2 mcol2x2; + layout(row_major) mat2 mrow2x2; + + layout(column_major) mat2x3 mcol2x3; + layout(row_major) mat2x3 mrow2x3; + + layout(column_major) mat3x2 mcol3x2; + layout(row_major) mat3x2 mrow3x2; +}; + +void col_to_row() +{ + // Load column-major, store row-major. + mrow = mcol; + mrow2x2 = mcol2x2; + mrow2x3 = mcol2x3; + mrow3x2 = mcol3x2; +} + +void row_to_col() +{ + // Load row-major, store column-major. + mcol = mrow; + mcol2x2 = mrow2x2; + mcol2x3 = mrow2x3; + mcol3x2 = mrow3x2; +} + +void write_dynamic_index_row() +{ + mrow[index0][index1] = 1.0; + mrow2x2[index0][index1] = 2.0; + mrow2x3[index0][index1] = 3.0; + mrow3x2[index0][index1] = 4.0; + + mrow[index0] = vec4(1.0); + mrow2x2[index0] = vec2(2.0); + mrow2x3[index0] = vec3(3.0); + mrow3x2[index0] = vec2(4.0); +} + +void write_dynamic_index_col() +{ + mcol[index0][index1] = 1.0; + mcol2x2[index0][index1] = 2.0; + mcol2x3[index0][index1] = 3.0; + mcol3x2[index0][index1] = 4.0; + + mcol[index0] = vec4(1.0); + mcol2x2[index0] = vec2(2.0); + mcol2x3[index0] = vec3(3.0); + mcol3x2[index0] = vec2(4.0); +} + +void read_dynamic_index_row() +{ + float a0 = mrow[index0][index1]; + float a1 = mrow2x2[index0][index1]; + float a2 = mrow2x3[index0][index1]; + float a3 = mrow3x2[index0][index1]; + + vec4 v0 = mrow[index0]; + vec2 v1 = mrow2x2[index0]; + vec3 v2 = mrow2x3[index0]; + vec2 v3 = mrow3x2[index0]; +} + +void read_dynamic_index_col() +{ + float a0 = mcol[index0][index1]; + float a1 = mcol2x2[index0][index1]; + float a2 = mcol2x3[index0][index1]; + float a3 = mcol3x2[index0][index1]; + + vec4 v0 = mcol[index0]; + vec2 v1 = mcol2x2[index0]; + vec3 v2 = mcol2x3[index0]; + vec2 v3 = mcol3x2[index0]; +} + +void main() +{ + row_to_col(); + col_to_row(); + write_dynamic_index_row(); + write_dynamic_index_col(); + read_dynamic_index_row(); + read_dynamic_index_col(); +} + diff --git a/shaders-hlsl/comp/shared.comp b/shaders-hlsl/comp/shared.comp new file mode 100644 index 00000000..4deff935 --- /dev/null +++ b/shaders-hlsl/comp/shared.comp @@ -0,0 +1,27 @@ +#version 310 es +layout(local_size_x = 4) in; + +shared float sShared[gl_WorkGroupSize.x]; + +layout(std430, binding = 0) readonly buffer SSBO +{ + float in_data[]; +}; + +layout(std430, binding = 1) writeonly buffer SSBO2 +{ + float out_data[]; +}; + +void main() +{ + uint ident = gl_GlobalInvocationID.x; + float idata = in_data[ident]; + + sShared[gl_LocalInvocationIndex] = idata; + memoryBarrierShared(); + barrier(); + + out_data[ident] = sShared[gl_WorkGroupSize.x - gl_LocalInvocationIndex - 1u]; +} + diff --git a/shaders-hlsl/frag/constant-buffer-array.frag b/shaders-hlsl/frag/constant-buffer-array.frag deleted file mode 100644 index d60002a0..00000000 --- a/shaders-hlsl/frag/constant-buffer-array.frag +++ /dev/null @@ -1,32 +0,0 @@ -#version 450 - -layout(std140, binding = 4) uniform CBO -{ - vec4 a; - vec4 b; - vec4 c; - vec4 d; -} cbo[2][4]; - -layout(std430, push_constant) uniform PushMe -{ - vec4 a; - vec4 b; - vec4 c; - vec4 d; -} push; - -layout(location = 0) out vec4 FragColor; - -void main() -{ - FragColor = cbo[1][2].a; - FragColor += cbo[1][2].b; - FragColor += cbo[1][2].c; - FragColor += cbo[1][2].d; - FragColor += push.a; - FragColor += push.b; - FragColor += push.c; - FragColor += push.d; -} - diff --git a/shaders-hlsl/frag/mod.frag b/shaders-hlsl/frag/mod.frag new file mode 100644 index 00000000..32edb618 --- /dev/null +++ b/shaders-hlsl/frag/mod.frag @@ -0,0 +1,22 @@ +#version 310 es +precision mediump float; + +layout(location = 0) in vec4 a4; +layout(location = 1) in vec3 a3; +layout(location = 2) in vec2 a2; +layout(location = 3) in float a1; +layout(location = 4) in vec4 b4; +layout(location = 5) in vec3 b3; +layout(location = 6) in vec2 b2; +layout(location = 7) in float b1; + +layout(location = 0) out vec4 FragColor; + +void main() +{ + vec4 m0 = mod(a4, b4); + vec3 m1 = mod(a3, b3); + vec2 m2 = mod(a2, b2); + float m3 = mod(a1, b1); + FragColor = m0 + m1.xyzx + m2.xyxy + m3; +} diff --git a/shaders-hlsl/vert/texture_buffer.vert b/shaders-hlsl/vert/texture_buffer.vert new file mode 100644 index 00000000..b071e0c9 --- /dev/null +++ b/shaders-hlsl/vert/texture_buffer.vert @@ -0,0 +1,9 @@ +#version 450 + +layout(binding = 4) uniform samplerBuffer uSamp; +layout(rgba32f, binding = 5) uniform readonly imageBuffer uSampo; + +void main() +{ + gl_Position = texelFetch(uSamp, 10) + imageLoad(uSampo, 100); +} diff --git a/shaders-msl/asm/vert/empty-struct-composite.asm.vert b/shaders-msl/asm/vert/empty-struct-composite.asm.vert new file mode 100644 index 00000000..37a2d879 --- /dev/null +++ b/shaders-msl/asm/vert/empty-struct-composite.asm.vert @@ -0,0 +1,37 @@ +; SPIR-V +; Version: 1.1 +; Generator: Google rspirv; 0 +; Bound: 17 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Vertex %2 "main" + OpExecutionMode %2 OriginUpperLeft + OpName %Test "Test" + OpName %t "t" + OpName %retvar "retvar" + OpName %main "main" + OpName %retvar_0 "retvar" + %void = OpTypeVoid + %6 = OpTypeFunction %void + %Test = OpTypeStruct +%_ptr_Function_Test = OpTypePointer Function %Test +%_ptr_Function_void = OpTypePointer Function %void + %2 = OpFunction %void None %6 + %7 = OpLabel + %t = OpVariable %_ptr_Function_Test Function + %retvar = OpVariable %_ptr_Function_void Function + OpBranch %4 + %4 = OpLabel + %13 = OpCompositeConstruct %Test + OpStore %t %13 + OpReturn + OpFunctionEnd + %main = OpFunction %void None %6 + %15 = OpLabel + %retvar_0 = OpVariable %_ptr_Function_void Function + OpBranch %14 + %14 = OpLabel + OpReturn + OpFunctionEnd diff --git a/shaders/asm/vert/empty-struct-composite.asm.vert b/shaders/asm/vert/empty-struct-composite.asm.vert new file mode 100644 index 00000000..37a2d879 --- /dev/null +++ b/shaders/asm/vert/empty-struct-composite.asm.vert @@ -0,0 +1,37 @@ +; SPIR-V +; Version: 1.1 +; Generator: Google rspirv; 0 +; Bound: 17 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Vertex %2 "main" + OpExecutionMode %2 OriginUpperLeft + OpName %Test "Test" + OpName %t "t" + OpName %retvar "retvar" + OpName %main "main" + OpName %retvar_0 "retvar" + %void = OpTypeVoid + %6 = OpTypeFunction %void + %Test = OpTypeStruct +%_ptr_Function_Test = OpTypePointer Function %Test +%_ptr_Function_void = OpTypePointer Function %void + %2 = OpFunction %void None %6 + %7 = OpLabel + %t = OpVariable %_ptr_Function_Test Function + %retvar = OpVariable %_ptr_Function_void Function + OpBranch %4 + %4 = OpLabel + %13 = OpCompositeConstruct %Test + OpStore %t %13 + OpReturn + OpFunctionEnd + %main = OpFunction %void None %6 + %15 = OpLabel + %retvar_0 = OpVariable %_ptr_Function_void Function + OpBranch %14 + %14 = OpLabel + OpReturn + OpFunctionEnd diff --git a/shaders/comp/struct-packing.comp b/shaders/comp/struct-packing.comp index 04b933dd..53a54e49 100644 --- a/shaders/comp/struct-packing.comp +++ b/shaders/comp/struct-packing.comp @@ -66,6 +66,16 @@ layout(binding = 0, std140) buffer SSBO0 Content content; Content content1[2]; Content content2; + + layout(column_major) mat2 m0; + layout(column_major) mat2 m1; + layout(column_major) mat2x3 m2[4]; + layout(column_major) mat3x2 m3; + layout(row_major) mat2 m4; + layout(row_major) mat2 m5[9]; + layout(row_major) mat2x3 m6[4][2]; + layout(row_major) mat3x2 m7; + float array[]; } ssbo_140; diff --git a/shaders/desktop-only/comp/enhanced-layouts.comp b/shaders/desktop-only/comp/enhanced-layouts.comp new file mode 100644 index 00000000..470b73e9 --- /dev/null +++ b/shaders/desktop-only/comp/enhanced-layouts.comp @@ -0,0 +1,39 @@ +#version 450 + +struct Foo +{ + int a; + int b; + int c; +}; + +layout(std140, binding = 0) uniform UBO +{ + layout(offset = 4) int a; + layout(offset = 8) int b; + layout(offset = 16) Foo foo; + layout(offset = 48) int c[8]; +} ubo; + +layout(std140, binding = 1) buffer SSBO1 +{ + layout(offset = 4) int a; + layout(offset = 8) int b; + layout(offset = 16) Foo foo; + layout(offset = 48) int c[8]; +} ssbo1; + +layout(std430, binding = 2) buffer SSBO2 +{ + layout(offset = 4) int a; + layout(offset = 8) int b; + layout(offset = 16) Foo foo; + layout(offset = 48) int c[8]; +} ssbo2; + +void main() +{ + ssbo1.a = ssbo2.a; + ssbo1.b = ubo.b; +} + diff --git a/spirv_common.hpp b/spirv_common.hpp index e30f32a3..1e15ae40 100644 --- a/spirv_common.hpp +++ b/spirv_common.hpp @@ -291,7 +291,7 @@ struct SPIRType : IVariant std::vector member_types; - struct Image + struct ImageType { uint32_t type; spv::Dim dim; @@ -640,7 +640,8 @@ struct SPIRAccessChain : IVariant int32_t static_index; uint32_t loaded_from = 0; - bool need_transpose = false; + uint32_t matrix_stride = 0; + bool row_major_matrix = false; bool immutable = false; }; diff --git a/spirv_cpp.cpp b/spirv_cpp.cpp index 9100a2ba..e65fb99c 100644 --- a/spirv_cpp.cpp +++ b/spirv_cpp.cpp @@ -171,8 +171,9 @@ void CompilerCPP::emit_resources() auto &type = get(var.basetype); if (var.storage != StorageClassFunction && type.pointer && type.storage == StorageClassUniform && - !is_hidden_variable(var) && (meta[type.self].decoration.decoration_flags & - ((1ull << DecorationBlock) | (1ull << DecorationBufferBlock)))) + !is_hidden_variable(var) && + (meta[type.self].decoration.decoration_flags & + ((1ull << DecorationBlock) | (1ull << DecorationBufferBlock)))) { emit_buffer_block(var); } diff --git a/spirv_cross.cpp b/spirv_cross.cpp index 7e504e30..ecee6763 100644 --- a/spirv_cross.cpp +++ b/spirv_cross.cpp @@ -27,7 +27,7 @@ using namespace spirv_cross; #define log(...) fprintf(stderr, __VA_ARGS__) -static string ensure_valid_identifier(const string &name) +static string ensure_valid_identifier(const string &name, bool member) { // Functions in glslangValidator are mangled with name( stuff. // Normally, we would never see '(' in any legal identifiers, so just strip them out. @@ -37,12 +37,26 @@ static string ensure_valid_identifier(const string &name) { auto &c = str[i]; - // _ variables are reserved by the internal implementation, - // otherwise, make sure the name is a valid identifier. - if (i == 0 || (str[0] == '_' && i == 1)) - c = isalpha(c) ? c : '_'; + if (member) + { + // _m variables are reserved by the internal implementation, + // otherwise, make sure the name is a valid identifier. + if (i == 0) + c = isalpha(c) ? c : '_'; + else if (i == 2 && str[0] == '_' && str[1] == 'm') + c = isalpha(c) ? c : '_'; + else + c = isalnum(c) ? c : '_'; + } else - c = isalnum(c) ? c : '_'; + { + // _ variables are reserved by the internal implementation, + // otherwise, make sure the name is a valid identifier. + if (i == 0 || (str[0] == '_' && i == 1)) + c = isalpha(c) ? c : '_'; + else + c = isalnum(c) ? c : '_'; + } } return str; } @@ -154,7 +168,7 @@ bool Compiler::block_is_pure(const SPIRBlock &block) case OpMemoryBarrier: return false; - // OpExtInst is potentially impure depending on extension, but GLSL builtins are at least pure. + // OpExtInst is potentially impure depending on extension, but GLSL builtins are at least pure. default: break; @@ -443,7 +457,7 @@ bool Compiler::is_hidden_variable(const SPIRVariable &var, bool include_builtins // Combined image samplers are always considered active as they are "magic" variables. if (find_if(begin(combined_image_samplers), end(combined_image_samplers), [&var](const CombinedImageSampler &samp) { return samp.combined_id == var.self; - }) != end(combined_image_samplers)) + }) != end(combined_image_samplers)) { return false; } @@ -899,7 +913,7 @@ void Compiler::set_name(uint32_t id, const std::string &name) if (name[0] == '_' && name.size() >= 2 && isdigit(name[1])) return; - str = ensure_valid_identifier(name); + str = ensure_valid_identifier(name, false); } const SPIRType &Compiler::get_type(uint32_t id) const @@ -960,10 +974,10 @@ void Compiler::set_member_name(uint32_t id, uint32_t index, const std::string &n return; // Reserved for unnamed members. - if (name[0] == '_' && name.size() >= 2 && isdigit(name[1])) + if (name[0] == '_' && name.size() >= 3 && name[1] == 'm' && isdigit(name[2])) return; - str = ensure_valid_identifier(name); + str = ensure_valid_identifier(name, true); } const std::string &Compiler::get_member_name(uint32_t id, uint32_t index) const @@ -2710,7 +2724,7 @@ void Compiler::CombinedImageSamplerHandler::register_combined_image_sampler(SPIR [¶m](const SPIRFunction::CombinedImageSamplerParameter &p) { return param.image_id == p.image_id && param.sampler_id == p.sampler_id && param.global_image == p.global_image && param.global_sampler == p.global_sampler; - }); + }); if (itr == end(caller.combined_parameters)) { @@ -2847,7 +2861,7 @@ bool Compiler::CombinedImageSamplerHandler::handle(Op opcode, const uint32_t *ar auto itr = find_if(begin(compiler.combined_image_samplers), end(compiler.combined_image_samplers), [image_id, sampler_id](const CombinedImageSampler &combined) { return combined.image_id == image_id && combined.sampler_id == sampler_id; - }); + }); if (itr == end(compiler.combined_image_samplers)) { @@ -3187,8 +3201,8 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry) break; } - // Atomics shouldn't be able to access function-local variables. - // Some GLSL builtins access a pointer. + // Atomics shouldn't be able to access function-local variables. + // Some GLSL builtins access a pointer. default: break; diff --git a/spirv_cross.hpp b/spirv_cross.hpp index 93eae84c..7dab692a 100644 --- a/spirv_cross.hpp +++ b/spirv_cross.hpp @@ -98,6 +98,16 @@ struct BufferRange size_t range; }; +enum BufferPackingStandard +{ + BufferPackingStd140, + BufferPackingStd430, + BufferPackingStd140EnhancedLayout, + BufferPackingStd430EnhancedLayout, + BufferPackingHLSLCbuffer, + BufferPackingHLSLCbufferPackOffset +}; + class Compiler { public: diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp index 1cf559f6..cf9d441e 100644 --- a/spirv_glsl.cpp +++ b/spirv_glsl.cpp @@ -25,8 +25,65 @@ using namespace spv; using namespace spirv_cross; using namespace std; +static bool packing_is_vec4_padded(BufferPackingStandard packing) +{ + switch (packing) + { + case BufferPackingHLSLCbuffer: + case BufferPackingHLSLCbufferPackOffset: + case BufferPackingStd140: + case BufferPackingStd140EnhancedLayout: + return true; + + default: + return false; + } +} + +static bool packing_is_hlsl(BufferPackingStandard packing) +{ + switch (packing) + { + case BufferPackingHLSLCbuffer: + case BufferPackingHLSLCbufferPackOffset: + return true; + + default: + return false; + } +} + +static bool packing_has_flexible_offset(BufferPackingStandard packing) +{ + switch (packing) + { + case BufferPackingStd140: + case BufferPackingStd430: + case BufferPackingHLSLCbuffer: + return false; + + default: + return true; + } +} + +static BufferPackingStandard packing_to_substruct_packing(BufferPackingStandard packing) +{ + switch (packing) + { + case BufferPackingStd140EnhancedLayout: + return BufferPackingStd140; + case BufferPackingStd430EnhancedLayout: + return BufferPackingStd430; + case BufferPackingHLSLCbufferPackOffset: + return BufferPackingHLSLCbuffer; + default: + return packing; + } +} + // Sanitizes underscores for GLSL where multiple underscores in a row are not allowed. -static string sanitize_underscores(const string &str) +string CompilerGLSL::sanitize_underscores(const string &str) { string res; res.reserve(str.size()); @@ -349,12 +406,18 @@ void CompilerGLSL::emit_header() auto &execution = get_entry_point(); statement("#version ", options.version, options.es && options.version > 100 ? " es" : ""); - // Needed for binding = # on UBOs, etc. - if (!options.es && options.version < 420 && options.enable_420pack_extension) + if (!options.es && options.version < 420) { - statement("#ifdef GL_ARB_shading_language_420pack"); - statement("#extension GL_ARB_shading_language_420pack : require"); - statement("#endif"); + // Needed for binding = # on UBOs, etc. + if (options.enable_420pack_extension) + { + statement("#ifdef GL_ARB_shading_language_420pack"); + statement("#extension GL_ARB_shading_language_420pack : require"); + statement("#endif"); + } + // Needed for: layout(early_fragment_tests) in; + if (execution.flags & (1ull << ExecutionModeEarlyFragmentTests)) + require_extension("GL_ARB_shader_image_load_store"); } for (auto &ext : forced_extensions) @@ -647,9 +710,14 @@ string CompilerGLSL::layout_for_member(const SPIRType &type, uint32_t index) //if (flags & (1ull << DecorationColMajor)) // attr.push_back("column_major"); - if (dec.decoration_flags & (1ull << DecorationLocation)) + if ((dec.decoration_flags & (1ull << DecorationLocation)) != 0 && can_use_io_location(type.storage)) attr.push_back(join("location = ", dec.location)); + // DecorationCPacked is set by layout_for_variable earlier to mark that we need to emit offset qualifiers. + // This is only done selectively in GLSL as needed. + if (has_decoration(type.self, DecorationCPacked) && (dec.decoration_flags & (1ull << DecorationOffset)) != 0) + attr.push_back(join("offset = ", dec.offset)); + if (attr.empty()) return ""; @@ -779,7 +847,7 @@ const char *CompilerGLSL::format_to_glsl(spv::ImageFormat format) } } -uint32_t CompilerGLSL::type_to_std430_base_size(const SPIRType &type) +uint32_t CompilerGLSL::type_to_packed_base_size(const SPIRType &type, BufferPackingStandard) { switch (type.basetype) { @@ -792,9 +860,23 @@ uint32_t CompilerGLSL::type_to_std430_base_size(const SPIRType &type) } } -uint32_t CompilerGLSL::type_to_std430_alignment(const SPIRType &type, uint64_t flags) +uint32_t CompilerGLSL::type_to_packed_alignment(const SPIRType &type, uint64_t flags, BufferPackingStandard packing) { - const uint32_t base_alignment = type_to_std430_base_size(type); + const uint32_t base_alignment = type_to_packed_base_size(type, packing); + + if (!type.array.empty()) + { + uint32_t minimum_alignment = 1; + if (packing_is_vec4_padded(packing)) + minimum_alignment = 16; + + auto *tmp = &get(type.parent_type); + while (!tmp->array.empty()) + tmp = &get(tmp->parent_type); + + // Get the alignment of the base type, then maybe round up. + return max(minimum_alignment, type_to_packed_alignment(*tmp, flags, packing)); + } if (type.basetype == SPIRType::Struct) { @@ -803,13 +885,23 @@ uint32_t CompilerGLSL::type_to_std430_alignment(const SPIRType &type, uint64_t f for (uint32_t i = 0; i < type.member_types.size(); i++) { auto member_flags = meta[type.self].members.at(i).decoration_flags; - alignment = max(alignment, type_to_std430_alignment(get(type.member_types[i]), member_flags)); + alignment = + max(alignment, type_to_packed_alignment(get(type.member_types[i]), member_flags, packing)); } + // In std140, struct alignment is rounded up to 16. + if (packing_is_vec4_padded(packing)) + alignment = max(alignment, 16u); + return alignment; } else { + // Vectors are *not* aligned in HLSL, but there's an extra rule where vectors cannot straddle + // a vec4, this is handled outside since that part knows our current offset. + if (type.columns == 1 && packing_is_hlsl(packing)) + return base_alignment; + // From 7.6.2.2 in GL 4.5 core spec. // Rule 1 if (type.vecsize == 1 && type.columns == 1) @@ -829,7 +921,9 @@ uint32_t CompilerGLSL::type_to_std430_alignment(const SPIRType &type, uint64_t f // vectors. if ((flags & (1ull << DecorationColMajor)) && type.columns > 1) { - if (type.vecsize == 3) + if (packing_is_vec4_padded(packing)) + return 4 * base_alignment; + else if (type.vecsize == 3) return 4 * base_alignment; else return type.vecsize * base_alignment; @@ -840,7 +934,9 @@ uint32_t CompilerGLSL::type_to_std430_alignment(const SPIRType &type, uint64_t f // Rule 7. if ((flags & (1ull << DecorationRowMajor)) && type.vecsize > 1) { - if (type.columns == 3) + if (packing_is_vec4_padded(packing)) + return 4 * base_alignment; + else if (type.columns == 3) return 4 * base_alignment; else return type.columns * base_alignment; @@ -849,26 +945,40 @@ uint32_t CompilerGLSL::type_to_std430_alignment(const SPIRType &type, uint64_t f // Rule 8 implied. } - SPIRV_CROSS_THROW("Did not find suitable std430 rule for type. Bogus decorations?"); + SPIRV_CROSS_THROW("Did not find suitable rule for type. Bogus decorations?"); } -uint32_t CompilerGLSL::type_to_std430_array_stride(const SPIRType &type, uint64_t flags) +uint32_t CompilerGLSL::type_to_packed_array_stride(const SPIRType &type, uint64_t flags, BufferPackingStandard packing) { // Array stride is equal to aligned size of the underlying type. - SPIRType tmp = type; - tmp.array.pop_back(); - tmp.array_size_literal.pop_back(); - uint32_t size = type_to_std430_size(tmp, flags); - uint32_t alignment = type_to_std430_alignment(tmp, flags); - return (size + alignment - 1) & ~(alignment - 1); + uint32_t parent = type.parent_type; + assert(parent); + + auto &tmp = get(parent); + + uint32_t size = type_to_packed_size(tmp, flags, packing); + if (tmp.array.empty()) + { + uint32_t alignment = type_to_packed_alignment(type, flags, packing); + return (size + alignment - 1) & ~(alignment - 1); + } + else + { + // For multidimensional arrays, array stride always matches size of subtype. + // The alignment cannot change because multidimensional arrays are basically N * M array elements. + return size; + } } -uint32_t CompilerGLSL::type_to_std430_size(const SPIRType &type, uint64_t flags) +uint32_t CompilerGLSL::type_to_packed_size(const SPIRType &type, uint64_t flags, BufferPackingStandard packing) { if (!type.array.empty()) - return to_array_size_literal(type, uint32_t(type.array.size()) - 1) * type_to_std430_array_stride(type, flags); + { + return to_array_size_literal(type, uint32_t(type.array.size()) - 1) * + type_to_packed_array_stride(type, flags, packing); + } - const uint32_t base_alignment = type_to_std430_base_size(type); + const uint32_t base_alignment = type_to_packed_base_size(type, packing); uint32_t size = 0; if (type.basetype == SPIRType::Struct) @@ -880,18 +990,18 @@ uint32_t CompilerGLSL::type_to_std430_size(const SPIRType &type, uint64_t flags) auto member_flags = meta[type.self].members.at(i).decoration_flags; auto &member_type = get(type.member_types[i]); - uint32_t std430_alignment = type_to_std430_alignment(member_type, member_flags); - uint32_t alignment = max(std430_alignment, pad_alignment); + uint32_t packed_alignment = type_to_packed_alignment(member_type, member_flags, packing); + uint32_t alignment = max(packed_alignment, pad_alignment); // The next member following a struct member is aligned to the base alignment of the struct that came before. // GL 4.5 spec, 7.6.2.2. if (member_type.basetype == SPIRType::Struct) - pad_alignment = std430_alignment; + pad_alignment = packed_alignment; else pad_alignment = 1; size = (size + alignment - 1) & ~(alignment - 1); - size += type_to_std430_size(member_type, member_flags); + size += type_to_packed_size(member_type, member_flags, packing); } } else @@ -901,7 +1011,9 @@ uint32_t CompilerGLSL::type_to_std430_size(const SPIRType &type, uint64_t flags) if ((flags & (1ull << DecorationColMajor)) && type.columns > 1) { - if (type.vecsize == 3) + if (packing_is_vec4_padded(packing)) + size = type.columns * 4 * base_alignment; + else if (type.vecsize == 3) size = type.columns * 4 * base_alignment; else size = type.columns * type.vecsize * base_alignment; @@ -909,7 +1021,9 @@ uint32_t CompilerGLSL::type_to_std430_size(const SPIRType &type, uint64_t flags) if ((flags & (1ull << DecorationRowMajor)) && type.vecsize > 1) { - if (type.columns == 3) + if (packing_is_vec4_padded(packing)) + size = type.vecsize * 4 * base_alignment; + else if (type.columns == 3) size = type.vecsize * 4 * base_alignment; else size = type.vecsize * type.columns * base_alignment; @@ -919,7 +1033,7 @@ uint32_t CompilerGLSL::type_to_std430_size(const SPIRType &type, uint64_t flags) return size; } -bool CompilerGLSL::ssbo_is_std430_packing(const SPIRType &type) +bool CompilerGLSL::buffer_is_packing_standard(const SPIRType &type, BufferPackingStandard packing) { // This is very tricky and error prone, but try to be exhaustive and correct here. // SPIR-V doesn't directly say if we're using std430 or std140. @@ -943,32 +1057,76 @@ bool CompilerGLSL::ssbo_is_std430_packing(const SPIRType &type) auto member_flags = meta[type.self].members.at(i).decoration_flags; // Verify alignment rules. - uint32_t std430_alignment = type_to_std430_alignment(memb_type, member_flags); - uint32_t alignment = max(std430_alignment, pad_alignment); + uint32_t packed_alignment = type_to_packed_alignment(memb_type, member_flags, packing); + uint32_t packed_size = type_to_packed_size(memb_type, member_flags, packing); + + if (packing_is_hlsl(packing)) + { + // If a member straddles across a vec4 boundary, alignment is actually vec4. + uint32_t begin_word = offset / 16; + uint32_t end_word = (offset + packed_size - 1) / 16; + if (begin_word != end_word) + packed_alignment = max(packed_alignment, 16u); + } + + uint32_t alignment = max(packed_alignment, pad_alignment); offset = (offset + alignment - 1) & ~(alignment - 1); // The next member following a struct member is aligned to the base alignment of the struct that came before. // GL 4.5 spec, 7.6.2.2. if (memb_type.basetype == SPIRType::Struct) - pad_alignment = std430_alignment; + pad_alignment = packed_alignment; else pad_alignment = 1; - uint32_t actual_offset = type_struct_member_offset(type, i); - if (actual_offset != offset) // This cannot be std430. - return false; + // We only care about offsets in std140, std430, etc ... + // For EnhancedLayout variants, we have the flexibility to choose our own offsets. + if (!packing_has_flexible_offset(packing)) + { + uint32_t actual_offset = type_struct_member_offset(type, i); + if (actual_offset != offset) // This cannot be the packing we're looking for. + return false; + } // Verify array stride rules. if (!memb_type.array.empty() && - type_to_std430_array_stride(memb_type, member_flags) != type_struct_member_array_stride(type, i)) + type_to_packed_array_stride(memb_type, member_flags, packing) != type_struct_member_array_stride(type, i)) return false; - // Verify that sub-structs also follow std430 rules. - if (!memb_type.member_types.empty() && !ssbo_is_std430_packing(memb_type)) + // Verify that sub-structs also follow packing rules. + // We cannot use enhanced layouts on substructs, so they better be up to spec. + auto substruct_packing = packing_to_substruct_packing(packing); + + if (!memb_type.member_types.empty() && !buffer_is_packing_standard(memb_type, substruct_packing)) return false; // Bump size. - offset += type_to_std430_size(memb_type, member_flags); + offset += packed_size; + } + + return true; +} + +bool CompilerGLSL::can_use_io_location(StorageClass storage) +{ + // Location specifiers are must have in SPIR-V, but they aren't really supported in earlier versions of GLSL. + // Be very explicit here about how to solve the issue. + if ((get_execution_model() != ExecutionModelVertex && storage == StorageClassInput) || + (get_execution_model() != ExecutionModelFragment && storage == StorageClassOutput)) + { + if (!options.es && options.version < 410 && !options.separate_shader_objects) + return false; + else if (options.es && options.version < 310) + return false; + } + + if ((get_execution_model() == ExecutionModelVertex && storage == StorageClassInput) || + (get_execution_model() == ExecutionModelFragment && storage == StorageClassOutput)) + { + if (options.es && options.version < 300) + return false; + else if (!options.es && options.version < 330) + return false; } return true; @@ -1004,32 +1162,16 @@ string CompilerGLSL::layout_for_variable(const SPIRVariable &var) attr.push_back(join("input_attachment_index = ", dec.input_attachment)); } - if (flags & (1ull << DecorationLocation)) + if ((flags & (1ull << DecorationLocation)) != 0 && can_use_io_location(var.storage)) { - bool can_use_varying_location = true; + uint64_t combined_decoration = 0; + for (uint32_t i = 0; i < meta[type.self].members.size(); i++) + combined_decoration |= combined_decoration_for_member(type, i); - // Location specifiers are must have in SPIR-V, but they aren't really supported in earlier versions of GLSL. - // Be very explicit here about how to solve the issue. - if ((get_execution_model() != ExecutionModelVertex && var.storage == StorageClassInput) || - (get_execution_model() != ExecutionModelFragment && var.storage == StorageClassOutput)) - { - if (!options.es && options.version < 410 && !options.separate_shader_objects) - can_use_varying_location = false; - else if (options.es && options.version < 310) - can_use_varying_location = false; - } - - if (can_use_varying_location) - { - uint64_t combined_decoration = 0; - for (uint32_t i = 0; i < meta[type.self].members.size(); i++) - combined_decoration |= combined_decoration_for_member(type, i); - - // If our members have location decorations, we don't need to - // emit location decorations at the top as well (looks weird). - if ((combined_decoration & (1ull << DecorationLocation)) == 0) - attr.push_back(join("location = ", dec.location)); - } + // If our members have location decorations, we don't need to + // emit location decorations at the top as well (looks weird). + if ((combined_decoration & (1ull << DecorationLocation)) == 0) + attr.push_back(join("location = ", dec.location)); } // set = 0 is the default. Do not emit set = decoration in regular GLSL output, but @@ -1052,15 +1194,80 @@ string CompilerGLSL::layout_for_variable(const SPIRVariable &var) if (flags & (1ull << DecorationOffset)) attr.push_back(join("offset = ", dec.offset)); + bool push_constant_block = options.vulkan_semantics && var.storage == StorageClassPushConstant; + bool ssbo_block = var.storage == StorageClassStorageBuffer || + (var.storage == StorageClassUniform && (typeflags & (1ull << DecorationBufferBlock))); + // Instead of adding explicit offsets for every element here, just assume we're using std140 or std430. // If SPIR-V does not comply with either layout, we cannot really work around it. if (var.storage == StorageClassUniform && (typeflags & (1ull << DecorationBlock))) - attr.push_back("std140"); - else if (var.storage == StorageClassStorageBuffer || - (var.storage == StorageClassUniform && (typeflags & (1ull << DecorationBufferBlock)))) - attr.push_back(ssbo_is_std430_packing(type) ? "std430" : "std140"); - else if (options.vulkan_semantics && var.storage == StorageClassPushConstant) - attr.push_back(ssbo_is_std430_packing(type) ? "std430" : "std140"); + { + if (buffer_is_packing_standard(type, BufferPackingStd140)) + attr.push_back("std140"); + else if (buffer_is_packing_standard(type, BufferPackingStd140EnhancedLayout)) + { + attr.push_back("std140"); + // Fallback time. We might be able to use the ARB_enhanced_layouts to deal with this difference, + // however, we can only use layout(offset) on the block itself, not any substructs, so the substructs better be the appropriate layout. + // Enhanced layouts seem to always work in Vulkan GLSL, so no need for extensions there. + if (options.es && !options.vulkan_semantics) + SPIRV_CROSS_THROW("Push constant block cannot be expressed as neither std430 nor std140. ES-targets do " + "not support GL_ARB_enhanced_layouts."); + if (!options.es && !options.vulkan_semantics && options.version < 440) + require_extension("GL_ARB_enhanced_layouts"); + + // This is a very last minute to check for this, but use this unused decoration to mark that we should emit + // explicit offsets for this block type. + // layout_for_variable() will be called before the actual buffer emit. + // The alternative is a full pass before codegen where we deduce this decoration, + // but then we are just doing the exact same work twice, and more complexity. + set_decoration(type.self, DecorationCPacked); + } + else + { + SPIRV_CROSS_THROW("Uniform buffer cannot be expressed as std140, even with enhanced layouts. You can try " + "flattening this block to " + "support a more flexible layout."); + } + } + else if (push_constant_block || ssbo_block) + { + if (buffer_is_packing_standard(type, BufferPackingStd430)) + attr.push_back("std430"); + else if (buffer_is_packing_standard(type, BufferPackingStd140)) + attr.push_back("std140"); + else if (buffer_is_packing_standard(type, BufferPackingStd140EnhancedLayout)) + { + attr.push_back("std140"); + + // Fallback time. We might be able to use the ARB_enhanced_layouts to deal with this difference, + // however, we can only use layout(offset) on the block itself, not any substructs, so the substructs better be the appropriate layout. + // Enhanced layouts seem to always work in Vulkan GLSL, so no need for extensions there. + if (options.es && !options.vulkan_semantics) + SPIRV_CROSS_THROW("Push constant block cannot be expressed as neither std430 nor std140. ES-targets do " + "not support GL_ARB_enhanced_layouts."); + if (!options.es && !options.vulkan_semantics && options.version < 440) + require_extension("GL_ARB_enhanced_layouts"); + + set_decoration(type.self, DecorationCPacked); + } + else if (buffer_is_packing_standard(type, BufferPackingStd430EnhancedLayout)) + { + attr.push_back("std430"); + if (options.es && !options.vulkan_semantics) + SPIRV_CROSS_THROW("Push constant block cannot be expressed as neither std430 nor std140. ES-targets do " + "not support GL_ARB_enhanced_layouts."); + if (!options.es && !options.vulkan_semantics && options.version < 440) + require_extension("GL_ARB_enhanced_layouts"); + + set_decoration(type.self, DecorationCPacked); + } + else + { + SPIRV_CROSS_THROW("Buffer block cannot be expressed as neither std430 nor std140, even with enhanced " + "layouts. You can try flattening this block to support a more flexible layout."); + } + } // For images, the type itself adds a layout qualifer. // Only emit the format for storage images. @@ -1249,16 +1456,6 @@ void CompilerGLSL::emit_flattened_io_block(const SPIRVariable &var, const char * if (!type.array.empty()) SPIRV_CROSS_THROW("Array of varying structs cannot be flattened to legacy-compatible varyings."); - // Block names should never alias. - auto block_name = to_name(type.self, false); - - // Shaders never use the block by interface name, so we don't - // have to track this other than updating name caches. - if (resource_names.find(block_name) != end(resource_names)) - block_name = get_fallback_name(type.self); - else - resource_names.insert(block_name); - auto old_flags = meta[type.self].decoration.decoration_flags; // Emit the members as if they are part of a block to get all qualifiers. meta[type.self].decoration.decoration_flags |= 1ull << DecorationBlock; @@ -1276,7 +1473,8 @@ void CompilerGLSL::emit_flattened_io_block(const SPIRVariable &var, const char * // Replace member name while emitting it so it encodes both struct name and member name. // Sanitize underscores because joining the two identifiers might create more than 1 underscore in a row, // which is not allowed. - auto member_name = get_member_name(type.self, i); + auto backup_name = get_member_name(type.self, i); + auto member_name = to_member_name(type, i); set_member_name(type.self, i, sanitize_underscores(join(to_name(type.self), "_", member_name))); emit_struct_member(type, member, i, qual); // Restore member name. @@ -1506,17 +1704,15 @@ void CompilerGLSL::replace_fragment_outputs() } } -string CompilerGLSL::remap_swizzle(uint32_t result_type, uint32_t input_components, uint32_t expr) +string CompilerGLSL::remap_swizzle(const SPIRType &out_type, uint32_t input_components, const string &expr) { - auto &out_type = get(result_type); - if (out_type.vecsize == input_components) - return to_expression(expr); + return expr; else if (input_components == 1) - return join(type_to_glsl(out_type), "(", to_expression(expr), ")"); + return join(type_to_glsl(out_type), "(", expr, ")"); else { - auto e = to_enclosed_expression(expr) + "."; + auto e = enclose_expression(expr) + "."; // Just clamp the swizzle index if we have more outputs than inputs. for (uint32_t c = 0; c < out_type.vecsize; c++) e += index_to_swizzle(min(c, input_components - 1)); @@ -2278,7 +2474,7 @@ string CompilerGLSL::constant_expression_vector(const SPIRConstant &c, uint32_t if (splat) { - if (type_to_std430_base_size(type) == 8) + if (type.width == 64) { uint64_t ident = c.scalar_u64(vector, 0); for (uint32_t i = 1; i < c.vector_size(); i++) @@ -2490,7 +2686,7 @@ string CompilerGLSL::declare_temporary(uint32_t result_type, uint32_t result_id) if (find_if(begin(header.declare_temporary), end(header.declare_temporary), [result_type, result_id](const pair &tmp) { return tmp.first == result_type && tmp.second == result_id; - }) == end(header.declare_temporary)) + }) == end(header.declare_temporary)) { header.declare_temporary.emplace_back(result_type, result_id); force_recompile = true; @@ -2717,8 +2913,9 @@ void CompilerGLSL::emit_quaternary_func_op(uint32_t result_type, uint32_t result uint32_t op2, uint32_t op3, const char *op) { bool forward = should_forward(op0) && should_forward(op1) && should_forward(op2) && should_forward(op3); - emit_op(result_type, result_id, join(op, "(", to_expression(op0), ", ", to_expression(op1), ", ", - to_expression(op2), ", ", to_expression(op3), ")"), + emit_op(result_type, result_id, + join(op, "(", to_expression(op0), ", ", to_expression(op1), ", ", to_expression(op2), ", ", + to_expression(op3), ")"), forward); inherit_expression_dependencies(result_id, op0); @@ -4019,8 +4216,8 @@ string CompilerGLSL::access_chain(uint32_t base, const uint32_t *indices, uint32 { if (flattened_buffer_blocks.count(base)) { - uint32_t matrix_stride; - bool need_transpose; + uint32_t matrix_stride = 0; + bool need_transpose = false; flattened_access_chain_offset(expression_type(base), indices, count, 0, 16, &need_transpose, &matrix_stride); if (out_need_transpose) @@ -4255,10 +4452,11 @@ std::pair CompilerGLSL::flattened_access_chain_offset(con assert(type->basetype == SPIRType::Struct); uint32_t type_id = 0; - uint32_t matrix_stride = 0; - std::string expr; - bool row_major_matrix_needs_conversion = false; + + // Inherit matrix information in case we are access chaining a vector which might have come from a row major layout. + bool row_major_matrix_needs_conversion = need_transpose ? *need_transpose : false; + uint32_t matrix_stride = out_matrix_stride ? *out_matrix_stride : 0; for (uint32_t i = 0; i < count; i++) { @@ -4328,11 +4526,29 @@ std::pair CompilerGLSL::flattened_access_chain_offset(con // Matrix -> Vector else if (type->columns > 1) { - if (ids[index].get_type() != TypeConstant) - SPIRV_CROSS_THROW("Cannot flatten dynamic matrix indexing!"); + auto *constant = maybe_get(index); + if (constant) + { + index = get(index).scalar(); + offset += index * (row_major_matrix_needs_conversion ? (type->width / 8) : matrix_stride); + } + else + { + uint32_t indexing_stride = row_major_matrix_needs_conversion ? (type->width / 8) : matrix_stride; + // Dynamic array access. + if (indexing_stride % word_stride) + { + SPIRV_CROSS_THROW( + "Matrix stride for dynamic indexing must be divisible by the size of a 4-component vector. " + "Likely culprit here is a row-major matrix being accessed dynamically. " + "This cannot be flattened. Try using std140 layout instead."); + } - index = get(index).scalar(); - offset += index * (row_major_matrix_needs_conversion ? type->width / 8 : matrix_stride); + expr += to_enclosed_expression(index); + expr += " * "; + expr += convert_to_string(indexing_stride / word_stride); + expr += " + "; + } uint32_t parent_type = type->parent_type; type = &get(type->parent_type); @@ -4341,11 +4557,29 @@ std::pair CompilerGLSL::flattened_access_chain_offset(con // Vector -> Scalar else if (type->vecsize > 1) { - if (ids[index].get_type() != TypeConstant) - SPIRV_CROSS_THROW("Cannot flatten dynamic vector indexing!"); + auto *constant = maybe_get(index); + if (constant) + { + index = get(index).scalar(); + offset += index * (row_major_matrix_needs_conversion ? matrix_stride : (type->width / 8)); + } + else + { + uint32_t indexing_stride = row_major_matrix_needs_conversion ? matrix_stride : (type->width / 8); - index = get(index).scalar(); - offset += index * (row_major_matrix_needs_conversion ? matrix_stride : type->width / 8); + // Dynamic array access. + if (indexing_stride % word_stride) + { + SPIRV_CROSS_THROW( + "Stride for dynamic vector indexing must be divisible by the size of a 4-component vector. " + "This cannot be flattened in legacy targets."); + } + + expr += to_enclosed_expression(index); + expr += " * "; + expr += convert_to_string(indexing_stride / word_stride); + expr += " + "; + } uint32_t parent_type = type->parent_type; type = &get(type->parent_type); @@ -4679,6 +4913,14 @@ bool CompilerGLSL::optimize_read_modify_write(const string &lhs, const string &r return true; } +void CompilerGLSL::emit_block_instructions(const SPIRBlock &block) +{ + current_emitting_block = █ + for (auto &op : block.ops) + emit_instruction(op); + current_emitting_block = nullptr; +} + void CompilerGLSL::emit_instruction(const Instruction &instruction) { auto ops = stream(instruction); @@ -4771,16 +5013,20 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) } else { - auto lhs = to_expression(ops[0]); auto rhs = to_expression(ops[1]); + // Statements to OpStore may be empty if it is a struct with zero members. Just forward the store to /dev/null. + if (!rhs.empty()) + { + auto lhs = to_expression(ops[0]); - // Tries to optimize assignments like " = op expr". - // While this is purely cosmetic, this is important for legacy ESSL where loop - // variable increments must be in either i++ or i += const-expr. - // Without this, we end up with i = i + 1, which is correct GLSL, but not correct GLES 2.0. - if (!optimize_read_modify_write(lhs, rhs)) - statement(lhs, " = ", rhs, ";"); - register_write(ops[0]); + // Tries to optimize assignments like " = op expr". + // While this is purely cosmetic, this is important for legacy ESSL where loop + // variable increments must be in either i++ or i += const-expr. + // Without this, we end up with i = i + 1, which is correct GLSL, but not correct GLES 2.0. + if (!optimize_read_modify_write(lhs, rhs)) + statement(lhs, " = ", rhs, ";"); + register_write(ops[0]); + } } break; } @@ -4898,16 +5144,28 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) const auto *elems = &ops[2]; length -= 2; - if (!length) - SPIRV_CROSS_THROW("Invalid input to OpCompositeConstruct."); - bool forward = true; for (uint32_t i = 0; i < length; i++) forward = forward && should_forward(elems[i]); - auto &in_type = expression_type(elems[0]); auto &out_type = get(result_type); + if (!length) + { + if (out_type.basetype == SPIRType::Struct) + { + // It is technically allowed to make a blank struct, + // but we cannot make a meaningful expression out of it in high level languages, + // so make it a blank expression. + emit_op(result_type, id, "", forward); + break; + } + else + SPIRV_CROSS_THROW("Invalid input to OpCompositeConstruct."); + } + + auto &in_type = expression_type(elems[0]); + // Only splat if we have vector constructors. // Arrays and structs must be initialized properly in full. bool composite = !out_type.array.empty() || out_type.basetype == SPIRType::Struct; @@ -5625,8 +5883,8 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) register_read(ops[1], ops[2], should_forward(ops[2])); break; - // OpAtomicStore unimplemented. Not sure what would use that. - // OpAtomicLoad seems to only be relevant for atomic counters. + // OpAtomicStore unimplemented. Not sure what would use that. + // OpAtomicLoad seems to only be relevant for atomic counters. case OpAtomicIIncrement: forced_temporaries.insert(ops[1]); @@ -5870,14 +6128,14 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) // since ImageRead always returns 4-component vectors and the backing type is opaque. if (!var->remapped_components) SPIRV_CROSS_THROW("subpassInput was remapped, but remap_components is not set correctly."); - imgexpr = remap_swizzle(result_type, var->remapped_components, ops[2]); + imgexpr = remap_swizzle(get(result_type), var->remapped_components, to_expression(ops[2])); } else { // PLS input could have different number of components than what the SPIR expects, swizzle to // the appropriate vector size. uint32_t components = pls_format_to_components(itr->format); - imgexpr = remap_swizzle(result_type, components, ops[2]); + imgexpr = remap_swizzle(get(result_type), components, to_expression(ops[2])); } pure = true; } @@ -5918,6 +6176,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) imgexpr = join("texelFetch(", to_expression(ops[2]), ", ivec2(gl_FragCoord.xy), 0)"); } } + imgexpr = remap_swizzle(get(result_type), 4, imgexpr); pure = true; } else @@ -5935,6 +6194,8 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) } else imgexpr = join("imageLoad(", to_expression(ops[2]), ", ", to_expression(ops[3]), ")"); + + imgexpr = remap_swizzle(get(result_type), 4, imgexpr); pure = false; } @@ -5983,6 +6244,10 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) } auto &type = expression_type(ops[0]); + auto &value_type = expression_type(ops[2]); + auto store_type = value_type; + store_type.vecsize = 4; + if (type.image.ms) { uint32_t operands = ops[3]; @@ -5990,11 +6255,11 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) SPIRV_CROSS_THROW("Multisampled image used in OpImageWrite, but unexpected operand mask was used."); uint32_t samples = ops[4]; statement("imageStore(", to_expression(ops[0]), ", ", to_expression(ops[1]), ", ", to_expression(samples), - ", ", to_expression(ops[2]), ");"); + ", ", remap_swizzle(store_type, value_type.vecsize, to_expression(ops[2])), ");"); } else - statement("imageStore(", to_expression(ops[0]), ", ", to_expression(ops[1]), ", ", to_expression(ops[2]), - ");"); + statement("imageStore(", to_expression(ops[0]), ", ", to_expression(ops[1]), ", ", + remap_swizzle(store_type, value_type.vecsize, to_expression(ops[2])), ");"); if (var && variable_storage_is_aliased(*var)) flush_all_aliased_variables(); @@ -6037,6 +6302,11 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) if (get_entry_point().model == ExecutionModelGLCompute) { uint32_t mem = get(ops[2]).scalar(); + + // We cannot forward any loads beyond the memory barrier. + if (mem) + flush_all_active_variables(); + if (mem == MemorySemanticsWorkgroupMemoryMask) statement("memoryBarrierShared();"); else if (mem) @@ -6870,12 +7140,15 @@ void CompilerGLSL::emit_function(SPIRFunction &func, uint64_t return_flags) // Don't declare variable until first use to declutter the GLSL output quite a lot. // If we don't touch the variable before first branch, // declare it then since we need variable declaration to be in top scope. - var.deferred_declaration = true; + // Never declare empty structs. They have no meaningful representation. + auto &type = get(var.basetype); + bool empty_struct = type.basetype == SPIRType::Struct && type.member_types.empty(); + var.deferred_declaration = !empty_struct; } } else { - // HACK: SPIRV likes to use samplers and images as local variables, but GLSL does not allow this. + // HACK: SPIR-V in older glslang output likes to use samplers and images as local variables, but GLSL does not allow this. // For these types (non-lvalue), we enforce forwarding through a shadowed variable. // This means that when we OpStore to these variables, we just write in the expression ID directly. // This breaks any kind of branching, since the variable must be statically assigned. @@ -7110,8 +7383,7 @@ string CompilerGLSL::emit_continue_block(uint32_t continue_block) { propagate_loop_dominators(*block); // Write out all instructions we have in this block. - for (auto &op : block->ops) - emit_instruction(op); + emit_block_instructions(*block); // For plain branchless for/while continue blocks. if (block->next_block) @@ -7182,8 +7454,7 @@ bool CompilerGLSL::attempt_emit_loop_header(SPIRBlock &block, SPIRBlock::Method // If we're trying to create a true for loop, // we need to make sure that all opcodes before branch statement do not actually emit any code. // We can then take the condition expression and create a for (; cond ; ) { body; } structure instead. - for (auto &op : block.ops) - emit_instruction(op); + emit_block_instructions(block); bool condition_is_temporary = forced_temporaries.find(block.condition) == end(forced_temporaries); @@ -7234,8 +7505,7 @@ bool CompilerGLSL::attempt_emit_loop_header(SPIRBlock &block, SPIRBlock::Method // If we're trying to create a true for loop, // we need to make sure that all opcodes before branch statement do not actually emit any code. // We can then take the condition expression and create a for (; cond ; ) { body; } structure instead. - for (auto &op : child.ops) - emit_instruction(op); + emit_block_instructions(child); bool condition_is_temporary = forced_temporaries.find(child.condition) == end(forced_temporaries); @@ -7341,8 +7611,8 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block) { statement("do"); begin_scope(); - for (auto &op : block.ops) - emit_instruction(op); + + emit_block_instructions(block); } else if (block.merge == SPIRBlock::MergeLoop) { @@ -7354,13 +7624,12 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block) statement("for (;;)"); begin_scope(); - for (auto &op : block.ops) - emit_instruction(op); + + emit_block_instructions(block); } else { - for (auto &op : block.ops) - emit_instruction(op); + emit_block_instructions(block); } // If we didn't successfully emit a loop header and we had loop variable candidates, we have a problem diff --git a/spirv_glsl.hpp b/spirv_glsl.hpp index 73a0d062..9bd0cf26 100644 --- a/spirv_glsl.hpp +++ b/spirv_glsl.hpp @@ -181,7 +181,12 @@ protected: // Virtualize methods which need to be overridden by subclass targets like C++ and such. virtual void emit_function_prototype(SPIRFunction &func, uint64_t return_flags); + + // Kinda ugly way to let opcodes peek at their neighbor instructions for trivial peephole scenarios. + const SPIRBlock *current_emitting_block = nullptr; + virtual void emit_instruction(const Instruction &instr); + void emit_block_instructions(const SPIRBlock &block); virtual void emit_glsl_op(uint32_t result_type, uint32_t result_id, uint32_t op, const uint32_t *args, uint32_t count); virtual void emit_header(); @@ -375,7 +380,7 @@ protected: uint32_t *matrix_stride = nullptr); const char *index_to_swizzle(uint32_t index); - std::string remap_swizzle(uint32_t result_type, uint32_t input_components, uint32_t expr); + std::string remap_swizzle(const SPIRType &result_type, uint32_t input_components, const std::string &expr); std::string declare_temporary(uint32_t type, uint32_t id); void append_global_func_args(const SPIRFunction &func, uint32_t index, std::vector &arglist); std::string to_expression(uint32_t id); @@ -397,11 +402,11 @@ protected: std::string to_combined_image_sampler(uint32_t image_id, uint32_t samp_id); virtual bool skip_argument(uint32_t id) const; - bool ssbo_is_std430_packing(const SPIRType &type); - uint32_t type_to_std430_base_size(const SPIRType &type); - uint32_t type_to_std430_alignment(const SPIRType &type, uint64_t flags); - uint32_t type_to_std430_array_stride(const SPIRType &type, uint64_t flags); - uint32_t type_to_std430_size(const SPIRType &type, uint64_t flags); + bool buffer_is_packing_standard(const SPIRType &type, BufferPackingStandard packing); + uint32_t type_to_packed_base_size(const SPIRType &type, BufferPackingStandard packing); + uint32_t type_to_packed_alignment(const SPIRType &type, uint64_t flags, BufferPackingStandard packing); + uint32_t type_to_packed_array_stride(const SPIRType &type, uint64_t flags, BufferPackingStandard packing); + uint32_t type_to_packed_size(const SPIRType &type, uint64_t flags, BufferPackingStandard packing); std::string bitcast_glsl(const SPIRType &result_type, uint32_t arg); virtual std::string bitcast_glsl_op(const SPIRType &result_type, const SPIRType &argument_type); @@ -485,6 +490,10 @@ protected: void declare_undefined_values(); + static std::string sanitize_underscores(const std::string &str); + + bool can_use_io_location(spv::StorageClass storage); + private: void init() { diff --git a/spirv_hlsl.cpp b/spirv_hlsl.cpp index bc00235d..3bd2931b 100644 --- a/spirv_hlsl.cpp +++ b/spirv_hlsl.cpp @@ -23,6 +23,134 @@ using namespace spv; using namespace spirv_cross; using namespace std; +static unsigned image_format_to_components(ImageFormat fmt) +{ + switch (fmt) + { + case ImageFormatR8: + case ImageFormatR16: + case ImageFormatR8Snorm: + case ImageFormatR16Snorm: + case ImageFormatR16f: + case ImageFormatR32f: + case ImageFormatR8i: + case ImageFormatR16i: + case ImageFormatR32i: + case ImageFormatR8ui: + case ImageFormatR16ui: + case ImageFormatR32ui: + return 1; + + case ImageFormatRg8: + case ImageFormatRg16: + case ImageFormatRg8Snorm: + case ImageFormatRg16Snorm: + case ImageFormatRg16f: + case ImageFormatRg32f: + case ImageFormatRg8i: + case ImageFormatRg16i: + case ImageFormatRg32i: + case ImageFormatRg8ui: + case ImageFormatRg16ui: + case ImageFormatRg32ui: + return 2; + + case ImageFormatR11fG11fB10f: + return 3; + + case ImageFormatRgba8: + case ImageFormatRgba16: + case ImageFormatRgb10A2: + case ImageFormatRgba8Snorm: + case ImageFormatRgba16Snorm: + case ImageFormatRgba16f: + case ImageFormatRgba32f: + case ImageFormatRgba8i: + case ImageFormatRgba16i: + case ImageFormatRgba32i: + case ImageFormatRgba8ui: + case ImageFormatRgba16ui: + case ImageFormatRgba32ui: + case ImageFormatRgb10a2ui: + return 4; + + default: + SPIRV_CROSS_THROW("Unrecognized typed image format."); + } +} + +static string image_format_to_type(ImageFormat fmt) +{ + switch (fmt) + { + case ImageFormatR8: + case ImageFormatR16: + return "unorm float"; + case ImageFormatRg8: + case ImageFormatRg16: + return "unorm float2"; + case ImageFormatRgba8: + case ImageFormatRgba16: + return "unorm float4"; + case ImageFormatRgb10A2: + return "unorm float4"; + + case ImageFormatR8Snorm: + case ImageFormatR16Snorm: + return "snorm float"; + case ImageFormatRg8Snorm: + case ImageFormatRg16Snorm: + return "snorm float2"; + case ImageFormatRgba8Snorm: + case ImageFormatRgba16Snorm: + return "snorm float4"; + + case ImageFormatR16f: + case ImageFormatR32f: + return "float"; + case ImageFormatRg16f: + case ImageFormatRg32f: + return "float2"; + case ImageFormatRgba16f: + case ImageFormatRgba32f: + return "float4"; + + case ImageFormatR11fG11fB10f: + return "float3"; + + case ImageFormatR8i: + case ImageFormatR16i: + case ImageFormatR32i: + return "int"; + case ImageFormatRg8i: + case ImageFormatRg16i: + case ImageFormatRg32i: + return "int2"; + case ImageFormatRgba8i: + case ImageFormatRgba16i: + case ImageFormatRgba32i: + return "int4"; + + case ImageFormatR8ui: + case ImageFormatR16ui: + case ImageFormatR32ui: + return "uint"; + case ImageFormatRg8ui: + case ImageFormatRg16ui: + case ImageFormatRg32ui: + return "uint2"; + case ImageFormatRgba8ui: + case ImageFormatRgba16ui: + case ImageFormatRgba32ui: + return "uint4"; + case ImageFormatRgb10a2ui: + return "int4"; + + default: + SPIRV_CROSS_THROW("Unrecognized typed image format."); + } +} + // Returns true if an arithmetic operation does not change behavior depending on signedness. static bool opcode_is_sign_invariant(Op opcode) { @@ -48,20 +176,26 @@ string CompilerHLSL::image_type_hlsl_modern(const SPIRType &type) { auto &imagetype = get(type.image.type); const char *dim = nullptr; + bool typed_load = false; uint32_t components = 4; switch (type.image.dim) { case Dim1D: + typed_load = type.image.sampled == 2; dim = "1D"; break; case Dim2D: + typed_load = type.image.sampled == 2; dim = "2D"; break; case Dim3D: + typed_load = type.image.sampled == 2; dim = "3D"; break; case DimCube: + if (type.image.sampled == 2) + SPIRV_CROSS_THROW("RWTextureCube does not exist in HLSL."); dim = "Cube"; break; case DimRect: @@ -70,10 +204,7 @@ string CompilerHLSL::image_type_hlsl_modern(const SPIRType &type) if (type.image.sampled == 1) return join("Buffer<", type_to_glsl(imagetype), components, ">"); else if (type.image.sampled == 2) - { - SPIRV_CROSS_THROW("RWBuffer is not implemented yet for HLSL."); - //return join("RWBuffer<", type_to_glsl(imagetype), components, ">"); - } + return join("RWBuffer<", image_format_to_type(type.image.format), ">"); else SPIRV_CROSS_THROW("Sampler buffers must be either sampled or unsampled. Cannot deduce in runtime."); case DimSubpassData: @@ -84,7 +215,9 @@ string CompilerHLSL::image_type_hlsl_modern(const SPIRType &type) } const char *arrayed = type.image.arrayed ? "Array" : ""; const char *ms = type.image.ms ? "MS" : ""; - return join("Texture", dim, ms, arrayed, "<", type_to_glsl(imagetype), components, ">"); + const char *rw = typed_load ? "RW" : ""; + return join(rw, "Texture", dim, ms, arrayed, "<", + typed_load ? image_format_to_type(type.image.format) : join(type_to_glsl(imagetype), components), ">"); } string CompilerHLSL::image_type_hlsl_legacy(const SPIRType &type) @@ -644,14 +777,15 @@ void CompilerHLSL::emit_specialization_constants() auto &type = get(c.constant_type); auto name = to_name(c.self); - statement("const ", variable_decl(type, name), " = ", constant_expression(c), ";"); + statement("static const ", variable_decl(type, name), " = ", constant_expression(c), ";"); emitted = true; } } if (workgroup_size_id) { - statement("const uint3 gl_WorkGroupSize = ", constant_expression(get(workgroup_size_id)), ";"); + statement("static const uint3 gl_WorkGroupSize = ", constant_expression(get(workgroup_size_id)), + ";"); emitted = true; } @@ -893,7 +1027,19 @@ void CompilerHLSL::emit_resources() if (var.storage != StorageClassOutput) { add_resource_name(var.self); - statement("static ", variable_decl(var), ";"); + + const char *storage = nullptr; + switch (var.storage) + { + case StorageClassWorkgroup: + storage = "groupshared"; + break; + + default: + storage = "static"; + break; + } + statement(storage, " ", variable_decl(var), ";"); emitted = true; } } @@ -905,11 +1051,21 @@ void CompilerHLSL::emit_resources() if (requires_op_fmod) { - statement("float mod(float x, float y)"); - begin_scope(); - statement("return x - y * floor(x / y);"); - end_scope(); - statement(""); + static const char *types[] = { + "float", + "float2", + "float3", + "float4", + }; + + for (auto &type : types) + { + statement(type, " mod(", type, " x, ", type, " y)"); + begin_scope(); + statement("return x - y * floor(x / y);"); + end_scope(); + statement(""); + } } if (requires_textureProj) @@ -1017,11 +1173,57 @@ void CompilerHLSL::emit_resources() } } -string CompilerHLSL::layout_for_member(const SPIRType &, uint32_t) +string CompilerHLSL::layout_for_member(const SPIRType &type, uint32_t index) { + auto flags = combined_decoration_for_member(type, index); + + bool is_block = (meta[type.self].decoration.decoration_flags & + ((1ull << DecorationBlock) | (1ull << DecorationBufferBlock))) != 0; + + if (!is_block) + return ""; + + // Flip the convention. HLSL is a bit odd in that the memory layout is column major ... but the language API is "row-major". + // The way to deal with this is to multiply everything in inverse order, and reverse the memory layout. + if (flags & (1ull << DecorationColMajor)) + return "row_major "; + else if (flags & (1ull << DecorationRowMajor)) + return "column_major "; + return ""; } +void CompilerHLSL::emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index, + const string &qualifier) +{ + auto &membertype = get(member_type_id); + + uint64_t memberflags = 0; + auto &memb = meta[type.self].members; + if (index < memb.size()) + memberflags = memb[index].decoration_flags; + + string qualifiers; + bool is_block = (meta[type.self].decoration.decoration_flags & + ((1ull << DecorationBlock) | (1ull << DecorationBufferBlock))) != 0; + if (is_block) + qualifiers = to_interpolation_qualifiers(memberflags); + + string packing_offset; + if (has_decoration(type.self, DecorationCPacked) && has_member_decoration(type.self, index, DecorationOffset)) + { + uint32_t offset = memb[index].offset; + if (offset & 3) + SPIRV_CROSS_THROW("Cannot pack on tighter bounds than 4 bytes in HLSL."); + + static const char *packing_swizzle[] = { "", ".y", ".z", ".w" }; + packing_offset = join(" : packoffset(c", offset / 16, packing_swizzle[(offset & 15) >> 2], ")"); + } + + statement(layout_for_member(type, index), qualifiers, qualifier, + variable_decl(membertype, to_member_name(type, index)), packing_offset, ";"); +} + void CompilerHLSL::emit_buffer_block(const SPIRVariable &var) { auto &type = get(var.basetype); @@ -1038,43 +1240,53 @@ void CompilerHLSL::emit_buffer_block(const SPIRVariable &var) } else { - add_resource_name(type.self); - add_resource_name(var.self); - - string struct_name; - if (options.shader_model >= 51) - struct_name = to_name(type.self); - else - struct_name = join("_", to_name(type.self)); - - // First, declare the struct of the UBO. - statement("struct ", struct_name); - begin_scope(); - - type.member_name_cache.clear(); - - uint32_t i = 0; - for (auto &member : type.member_types) + if (type.array.empty()) { - add_member_name(type, i); - emit_struct_member(type, member, i); - i++; - } - end_scope_decl(); - statement(""); + if (buffer_is_packing_standard(type, BufferPackingHLSLCbufferPackOffset)) + set_decoration(type.self, DecorationCPacked); + else + SPIRV_CROSS_THROW("cbuffer cannot be expressed with either HLSL packing layout or packoffset."); - if (options.shader_model >= 51) // SM 5.1 uses ConstantBuffer instead of cbuffer. - { - statement("ConstantBuffer<", struct_name, "> ", to_name(var.self), type_to_array_glsl(type), - to_resource_binding(var), ";"); - } - else - { - statement("cbuffer ", to_name(type.self), to_resource_binding(var)); + // Flatten the top-level struct so we can use packoffset, + // this restriction is similar to GLSL where layout(offset) is not possible on sub-structs. + flattened_structs.insert(var.self); + + type.member_name_cache.clear(); + add_resource_name(var.self); + statement("cbuffer ", to_name(var.self), to_resource_binding(var)); begin_scope(); - statement(struct_name, " ", to_name(var.self), type_to_array_glsl(type), ";"); + + uint32_t i = 0; + for (auto &member : type.member_types) + { + add_member_name(type, i); + auto backup_name = get_member_name(type.self, i); + auto member_name = to_member_name(type, i); + set_member_name(type.self, i, sanitize_underscores(join(to_name(type.self), "_", member_name))); + emit_struct_member(type, member, i, ""); + set_member_name(type.self, i, backup_name); + i++; + } + end_scope_decl(); } + else + { + if (options.shader_model < 51) + SPIRV_CROSS_THROW( + "Need ConstantBuffer to use arrays of UBOs, but this is only supported in SM 5.1."); + + // ConstantBuffer does not support packoffset, so it is unuseable unless everything aligns as we expect. + if (!buffer_is_packing_standard(type, BufferPackingHLSLCbuffer)) + SPIRV_CROSS_THROW("HLSL ConstantBuffer cannot be expressed with normal HLSL packing rules."); + + add_resource_name(type.self); + add_resource_name(var.self); + + emit_struct(get(type.self)); + statement("ConstantBuffer<", to_name(type.self), "> ", to_name(var.self), type_to_array_glsl(type), + to_resource_binding(var), ";"); + } } } @@ -1790,10 +2002,16 @@ string CompilerHLSL::to_resource_binding(const SPIRVariable &var) switch (type.basetype) { case SPIRType::SampledImage: - case SPIRType::Image: space = "t"; // SRV break; + case SPIRType::Image: + if (type.image.sampled == 2) + space = "u"; // UAV + else + space = "t"; // SRV + break; + case SPIRType::Sampler: space = "s"; break; @@ -1806,20 +2024,10 @@ string CompilerHLSL::to_resource_binding(const SPIRVariable &var) if (has_decoration(type.self, DecorationBufferBlock)) space = "u"; // UAV else if (has_decoration(type.self, DecorationBlock)) - { - if (options.shader_model >= 40) - space = "b"; // Constant buffers - else - space = "c"; // Constant buffers - } + space = "b"; // Constant buffers } else if (storage == StorageClassPushConstant) - { - if (options.shader_model >= 40) - space = "b"; // Constant buffers - else - space = "c"; // Constant buffers - } + space = "b"; // Constant buffers else if (storage == StorageClassStorageBuffer) space = "u"; // UAV @@ -1984,36 +2192,117 @@ string CompilerHLSL::read_access_chain(const SPIRAccessChain &chain) target_type.vecsize = type.vecsize; target_type.columns = type.columns; - // FIXME: Transposition? - if (type.columns != 1) - SPIRV_CROSS_THROW("Reading matrices from ByteAddressBuffer not yet supported."); - if (type.basetype == SPIRType::Struct) SPIRV_CROSS_THROW("Reading structs from ByteAddressBuffer not yet supported."); if (type.width != 32) SPIRV_CROSS_THROW("Reading types other than 32-bit from ByteAddressBuffer not yet supported."); - const char *load_op = nullptr; - switch (type.vecsize) + if (!type.array.empty()) + SPIRV_CROSS_THROW("Reading arrays from ByteAddressBuffer not yet supported."); + + string load_expr; + + // Load a vector or scalar. + if (type.columns == 1 && !chain.row_major_matrix) { - case 1: - load_op = "Load"; - break; - case 2: - load_op = "Load2"; - break; - case 3: - load_op = "Load3"; - break; - case 4: - load_op = "Load4"; - break; - default: - SPIRV_CROSS_THROW("Unknown vector size."); + const char *load_op = nullptr; + switch (type.vecsize) + { + case 1: + load_op = "Load"; + break; + case 2: + load_op = "Load2"; + break; + case 3: + load_op = "Load3"; + break; + case 4: + load_op = "Load4"; + break; + default: + SPIRV_CROSS_THROW("Unknown vector size."); + } + + load_expr = join(chain.base, ".", load_op, "(", chain.dynamic_index, chain.static_index, ")"); + } + else if (type.columns == 1) + { + // Strided load since we are loading a column from a row-major matrix. + if (type.vecsize > 1) + { + load_expr = type_to_glsl(target_type); + load_expr += "("; + } + + for (uint32_t r = 0; r < type.vecsize; r++) + { + load_expr += + join(chain.base, ".Load(", chain.dynamic_index, chain.static_index + r * chain.matrix_stride, ")"); + if (r + 1 < type.vecsize) + load_expr += ", "; + } + + if (type.vecsize > 1) + load_expr += ")"; + } + else if (!chain.row_major_matrix) + { + // Load a matrix, column-major, the easy case. + const char *load_op = nullptr; + switch (type.vecsize) + { + case 1: + load_op = "Load"; + break; + case 2: + load_op = "Load2"; + break; + case 3: + load_op = "Load3"; + break; + case 4: + load_op = "Load4"; + break; + default: + SPIRV_CROSS_THROW("Unknown vector size."); + } + + // Note, this loading style in HLSL is *actually* row-major, but we always treat matrices as transposed in this backend, + // so row-major is technically column-major ... + load_expr = type_to_glsl(target_type); + load_expr += "("; + for (uint32_t c = 0; c < type.columns; c++) + { + load_expr += join(chain.base, ".", load_op, "(", chain.dynamic_index, + chain.static_index + c * chain.matrix_stride, ")"); + if (c + 1 < type.columns) + load_expr += ", "; + } + load_expr += ")"; + } + else + { + // Pick out elements one by one ... Hopefully compilers are smart enough to recognize this pattern + // considering HLSL is "row-major decl", but "column-major" memory layout (basically implicit transpose model, ugh) ... + + load_expr = type_to_glsl(target_type); + load_expr += "("; + for (uint32_t c = 0; c < type.columns; c++) + { + for (uint32_t r = 0; r < type.vecsize; r++) + { + load_expr += join(chain.base, ".Load(", chain.dynamic_index, + chain.static_index + c * (type.width / 8) + r * chain.matrix_stride, ")"); + + if ((r + 1 < type.vecsize) || (c + 1 < type.columns)) + load_expr += ", "; + } + } + load_expr += ")"; } - auto load_expr = join(chain.base, ".", load_op, "(", chain.dynamic_index, chain.static_index, ")"); auto bitcast_op = bitcast_glsl_op(type, target_type); if (!bitcast_op.empty()) load_expr = join(bitcast_op, "(", load_expr, ")"); @@ -2035,27 +2324,38 @@ void CompilerHLSL::emit_load(const Instruction &instruction) auto load_expr = read_access_chain(*chain); bool forward = should_forward(ptr) && forced_temporaries.find(id) == end(forced_temporaries); + + // Do not forward complex load sequences like matrices, structs and arrays. + auto &type = get(result_type); + if (type.columns > 1 || !type.array.empty() || type.basetype == SPIRType::Struct) + forward = false; + auto &e = emit_op(result_type, id, load_expr, forward, true); - e.need_transpose = false; // TODO: Forward this somehow. + e.need_transpose = false; register_read(id, ptr, forward); } else CompilerGLSL::emit_instruction(instruction); } -void CompilerHLSL::emit_store(const Instruction &instruction) +void CompilerHLSL::write_access_chain(const SPIRAccessChain &chain, uint32_t value) { - auto ops = stream(instruction); - auto *chain = maybe_get(ops[0]); - if (chain) + auto &type = get(chain.basetype); + + SPIRType target_type; + target_type.basetype = SPIRType::UInt; + target_type.vecsize = type.vecsize; + target_type.columns = type.columns; + + if (type.basetype == SPIRType::Struct) + SPIRV_CROSS_THROW("Writing structs to RWByteAddressBuffer not yet supported."); + if (type.width != 32) + SPIRV_CROSS_THROW("Writing types other than 32-bit to RWByteAddressBuffer not yet supported."); + if (!type.array.empty()) + SPIRV_CROSS_THROW("Reading arrays from ByteAddressBuffer not yet supported."); + + if (type.columns == 1 && !chain.row_major_matrix) { - auto &type = expression_type(ops[0]); - - SPIRType target_type; - target_type.basetype = SPIRType::UInt; - target_type.vecsize = type.vecsize; - target_type.columns = type.columns; - const char *store_op = nullptr; switch (type.vecsize) { @@ -2075,20 +2375,87 @@ void CompilerHLSL::emit_store(const Instruction &instruction) SPIRV_CROSS_THROW("Unknown vector size."); } - if (type.columns != 1) - SPIRV_CROSS_THROW("Writing matrices to RWByteAddressBuffer not yet supported."); - if (type.basetype == SPIRType::Struct) - SPIRV_CROSS_THROW("Writing structs to RWByteAddressBuffer not yet supported."); - if (type.width != 32) - SPIRV_CROSS_THROW("Writing types other than 32-bit to RWByteAddressBuffer not yet supported."); - - auto store_expr = to_expression(ops[1]); + auto store_expr = to_expression(value); auto bitcast_op = bitcast_glsl_op(target_type, type); if (!bitcast_op.empty()) store_expr = join(bitcast_op, "(", store_expr, ")"); - statement(chain->base, ".", store_op, "(", chain->dynamic_index, chain->static_index, ", ", store_expr, ");"); - register_write(ops[0]); + statement(chain.base, ".", store_op, "(", chain.dynamic_index, chain.static_index, ", ", store_expr, ");"); } + else if (type.columns == 1) + { + // Strided store. + for (uint32_t r = 0; r < type.vecsize; r++) + { + auto store_expr = to_enclosed_expression(value); + if (type.vecsize > 1) + { + store_expr += "."; + store_expr += index_to_swizzle(r); + } + + auto bitcast_op = bitcast_glsl_op(target_type, type); + if (!bitcast_op.empty()) + store_expr = join(bitcast_op, "(", store_expr, ")"); + statement(chain.base, ".Store(", chain.dynamic_index, chain.static_index + chain.matrix_stride * r, ", ", + store_expr, ");"); + } + } + else if (!chain.row_major_matrix) + { + const char *store_op = nullptr; + switch (type.vecsize) + { + case 1: + store_op = "Store"; + break; + case 2: + store_op = "Store2"; + break; + case 3: + store_op = "Store3"; + break; + case 4: + store_op = "Store4"; + break; + default: + SPIRV_CROSS_THROW("Unknown vector size."); + } + + for (uint32_t c = 0; c < type.columns; c++) + { + auto store_expr = join(to_enclosed_expression(value), "[", c, "]"); + auto bitcast_op = bitcast_glsl_op(target_type, type); + if (!bitcast_op.empty()) + store_expr = join(bitcast_op, "(", store_expr, ")"); + statement(chain.base, ".", store_op, "(", chain.dynamic_index, chain.static_index + c * chain.matrix_stride, + ", ", store_expr, ");"); + } + } + else + { + for (uint32_t r = 0; r < type.vecsize; r++) + { + for (uint32_t c = 0; c < type.columns; c++) + { + auto store_expr = join(to_enclosed_expression(value), "[", c, "].", index_to_swizzle(r)); + auto bitcast_op = bitcast_glsl_op(target_type, type); + if (!bitcast_op.empty()) + store_expr = join(bitcast_op, "(", store_expr, ")"); + statement(chain.base, ".Store(", chain.dynamic_index, + chain.static_index + c * (type.width / 8) + r * chain.matrix_stride, ", ", store_expr, ");"); + } + } + } + + register_write(chain.self); +} + +void CompilerHLSL::emit_store(const Instruction &instruction) +{ + auto ops = stream(instruction); + auto *chain = maybe_get(ops[0]); + if (chain) + write_access_chain(*chain, ops[1]); else CompilerGLSL::emit_instruction(instruction); } @@ -2147,19 +2514,29 @@ void CompilerHLSL::emit_access_chain(const Instruction &instruction) } uint32_t matrix_stride = 0; - bool need_transpose = false; + bool row_major_matrix = false; + + // Inherit matrix information. + if (chain) + { + matrix_stride = chain->matrix_stride; + row_major_matrix = chain->row_major_matrix; + } + auto offsets = flattened_access_chain_offset(*basetype, &ops[3 + to_plain_buffer_length], - length - 3 - to_plain_buffer_length, 0, 1, &need_transpose, &matrix_stride); + length - 3 - to_plain_buffer_length, 0, 1, &row_major_matrix, &matrix_stride); auto &e = set(ops[1], ops[0], type.storage, base, offsets.first, offsets.second); + e.row_major_matrix = row_major_matrix; + e.matrix_stride = matrix_stride; + e.immutable = should_forward(ops[2]); + if (chain) { e.dynamic_index += chain->dynamic_index; e.static_index += chain->static_index; } - - e.immutable = should_forward(ops[2]); } else { @@ -2167,6 +2544,101 @@ void CompilerHLSL::emit_access_chain(const Instruction &instruction) } } +void CompilerHLSL::emit_atomic(const uint32_t *ops, uint32_t length, spv::Op op) +{ + const char *atomic_op = nullptr; + auto value_expr = to_expression(ops[op == OpAtomicCompareExchange ? 6 : 5]); + + switch (op) + { + case OpAtomicISub: + atomic_op = "InterlockedAdd"; + value_expr = join("-", enclose_expression(value_expr)); + break; + + case OpAtomicSMin: + case OpAtomicUMin: + atomic_op = "InterlockedMin"; + break; + + case OpAtomicSMax: + case OpAtomicUMax: + atomic_op = "InterlockedMax"; + break; + + case OpAtomicAnd: + atomic_op = "InterlockedAnd"; + break; + + case OpAtomicOr: + atomic_op = "InterlockedOr"; + break; + + case OpAtomicXor: + atomic_op = "InterlockedXor"; + break; + + case OpAtomicIAdd: + atomic_op = "InterlockedAdd"; + break; + + case OpAtomicExchange: + atomic_op = "InterlockedExchange"; + break; + + case OpAtomicCompareExchange: + if (length < 8) + SPIRV_CROSS_THROW("Not enough data for opcode."); + atomic_op = "InterlockedCompareExchange"; + value_expr = join(to_expression(ops[7]), ", ", value_expr); + break; + + default: + SPIRV_CROSS_THROW("Unknown atomic opcode."); + } + + if (length < 6) + SPIRV_CROSS_THROW("Not enough data for opcode."); + + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + forced_temporaries.insert(ops[1]); + + auto &type = get(result_type); + statement(variable_decl(type, to_name(id)), ";"); + + auto &data_type = expression_type(ops[2]); + auto *chain = maybe_get(ops[2]); + SPIRType::BaseType expression_type; + if (data_type.storage == StorageClassImage || !chain) + { + statement(atomic_op, "(", to_expression(ops[2]), ", ", value_expr, ", ", to_name(id), ");"); + expression_type = data_type.basetype; + } + else + { + // RWByteAddress buffer is always uint in its underlying type. + expression_type = SPIRType::UInt; + statement(chain->base, ".", atomic_op, "(", chain->dynamic_index, chain->static_index, ", ", value_expr, ", ", + to_name(id), ");"); + } + + auto expr = bitcast_expression(type, expression_type, to_name(id)); + set(id, expr, result_type, true); + flush_all_atomic_capable_variables(); + register_read(ops[1], ops[2], should_forward(ops[2])); +} + +const Instruction *CompilerHLSL::get_next_instruction_in_block(const Instruction &instr) +{ + // FIXME: This is kind of hacky. There should be a cleaner way. + uint32_t offset = uint32_t(&instr - current_emitting_block->ops.data()); + if ((offset + 1) < current_emitting_block->ops.size()) + return ¤t_emitting_block->ops[offset + 1]; + else + return nullptr; +} + void CompilerHLSL::emit_instruction(const Instruction &instruction) { auto ops = stream(instruction); @@ -2491,6 +2963,126 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) break; } + case OpImageRead: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + auto *var = maybe_get_backing_variable(ops[2]); + auto imgexpr = join(to_expression(ops[2]), "[", to_expression(ops[3]), "]"); + + // The underlying image type in HLSL depends on the image format, unlike GLSL, where all images are "vec4", + // except that the underlying type changes how the data is interpreted. + if (var) + imgexpr = remap_swizzle(get(result_type), + image_format_to_components(get(var->basetype).image.format), imgexpr); + + if (var && var->forwardable) + { + auto &e = emit_op(result_type, id, imgexpr, true); + e.loaded_from = var->self; + var->dependees.push_back(id); + } + else + emit_op(result_type, id, imgexpr, false); + break; + } + + case OpImageWrite: + { + auto *var = maybe_get_backing_variable(ops[0]); + + // The underlying image type in HLSL depends on the image format, unlike GLSL, where all images are "vec4", + // except that the underlying type changes how the data is interpreted. + auto value_expr = to_expression(ops[2]); + if (var) + { + auto &type = get(var->basetype); + auto narrowed_type = get(type.image.type); + narrowed_type.vecsize = image_format_to_components(type.image.format); + value_expr = remap_swizzle(narrowed_type, expression_type(ops[2]).vecsize, value_expr); + } + + statement(to_expression(ops[0]), "[", to_expression(ops[1]), "] = ", value_expr, ";"); + if (var && variable_storage_is_aliased(*var)) + flush_all_aliased_variables(); + break; + } + + case OpImageTexelPointer: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + auto &e = + set(id, join(to_expression(ops[2]), "[", to_expression(ops[3]), "]"), result_type, true); + + // When using the pointer, we need to know which variable it is actually loaded from. + auto *var = maybe_get_backing_variable(ops[2]); + e.loaded_from = var ? var->self : 0; + break; + } + + case OpAtomicCompareExchange: + case OpAtomicExchange: + case OpAtomicISub: + case OpAtomicSMin: + case OpAtomicUMin: + case OpAtomicSMax: + case OpAtomicUMax: + case OpAtomicAnd: + case OpAtomicOr: + case OpAtomicXor: + case OpAtomicIAdd: + { + emit_atomic(ops, instruction.length, opcode); + break; + } + + case OpMemoryBarrier: + { + uint32_t mem = get(ops[1]).scalar(); + + // If the next instruction is OpControlBarrier and it does what we need, this opcode can be a noop. + const Instruction *next = get_next_instruction_in_block(instruction); + if (next && next->op == OpControlBarrier) + { + auto *next_ops = stream(*next); + uint32_t next_mem = get(next_ops[2]).scalar(); + next_mem |= MemorySemanticsWorkgroupMemoryMask; // Barrier in HLSL always implies GroupSync. + if ((next_mem & mem) == mem) + break; + } + + // We cannot forward any loads beyond the memory barrier. + if (mem) + flush_all_active_variables(); + + if (mem == MemorySemanticsWorkgroupMemoryMask) + statement("GroupMemoryBarrier();"); + else if (mem) + statement("DeviceMemoryBarrier();"); + break; + } + + case OpControlBarrier: + { + uint32_t mem = get(ops[2]).scalar(); + + // We cannot forward any loads beyond the memory barrier. + if (mem) + flush_all_active_variables(); + + if (mem == MemorySemanticsWorkgroupMemoryMask) + statement("GroupMemoryBarrierWithGroupSync();"); + else if (mem) + statement("DeviceMemoryBarrierWithGroupSync();"); + else + { + // There is no "GroupSync" standalone function. + statement("GroupMemoryBarrierWithGroupSync();"); + } + break; + } + default: CompilerGLSL::emit_instruction(instruction); break; diff --git a/spirv_hlsl.hpp b/spirv_hlsl.hpp index b2598722..11134a88 100644 --- a/spirv_hlsl.hpp +++ b/spirv_hlsl.hpp @@ -91,7 +91,13 @@ private: void emit_access_chain(const Instruction &instruction); void emit_load(const Instruction &instruction); std::string read_access_chain(const SPIRAccessChain &chain); + void write_access_chain(const SPIRAccessChain &chain, uint32_t value); void emit_store(const Instruction &instruction); + void emit_atomic(const uint32_t *ops, uint32_t length, spv::Op op); + const Instruction *get_next_instruction_in_block(const Instruction &instr); + + void emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index, + const std::string &qualifier) override; const char *to_storage_qualifiers_glsl(const SPIRVariable &var) override; diff --git a/spirv_msl.cpp b/spirv_msl.cpp index 75858de3..98491dbf 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -1515,7 +1515,7 @@ void CompilerMSL::emit_instruction(const Instruction &instruction) emit_barrier(ops[0], ops[1], ops[2]); break; - // OpOuterProduct + // OpOuterProduct default: CompilerGLSL::emit_instruction(instruction); @@ -1820,10 +1820,10 @@ void CompilerMSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, break; } - // TODO: - // GLSLstd450InterpolateAtCentroid (centroid_no_perspective qualifier) - // GLSLstd450InterpolateAtSample (sample_no_perspective qualifier) - // GLSLstd450InterpolateAtOffset + // TODO: + // GLSLstd450InterpolateAtCentroid (centroid_no_perspective qualifier) + // GLSLstd450InterpolateAtSample (sample_no_perspective qualifier) + // GLSLstd450InterpolateAtOffset default: CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);