From 52c7c2dab6802a94d358fac49d752b28a0d99823 Mon Sep 17 00:00:00 2001 From: Bill Hollings Date: Mon, 20 Jun 2022 20:21:00 -0400 Subject: [PATCH 1/4] MSL: Add support for SPV_KHR_physical_storage_buffer extension. - Determine sizing and alignments of pointers to types as distinct from the size and alignment of the types themselves. - Declare all buffer pointers in the MSL device address space. - Support struct pointer recursion, where structs can contain pointers to themselves or to a parent struct. - Add SPIRType::was_forward_referenced to track if a type was forward referenced, to help emit MSL structs in the correct dependency order. - Handle pointers to pointers that are not just arrays of arrays. --- ...ddress-recursive-struct-pointers.msl2.comp | 94 +++++++++++++++++++ .../comp/buffer_device_address.msl2.comp | 52 ++++++++++ .../vert/buffer_device_address.msl2.vert | 59 ++++++++++++ ...ddress-recursive-struct-pointers.msl2.comp | 65 +++++++++++++ .../comp/buffer_device_address.msl2.comp | 40 ++++++++ .../vert/buffer_device_address.msl2.vert | 46 +++++++++ ...ddress-recursive-struct-pointers.msl2.comp | 64 +++++++++++++ .../comp/buffer_device_address.msl2.comp | 86 +++++++++++++++++ .../vert/buffer_device_address.msl2.vert | 83 ++++++++++++++++ spirv_common.hpp | 1 + spirv_msl.cpp | 55 +++++++++-- spirv_parser.cpp | 11 +++ 12 files changed, 647 insertions(+), 9 deletions(-) create mode 100644 reference/opt/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl2.comp create mode 100644 reference/opt/shaders-msl/comp/buffer_device_address.msl2.comp create mode 100644 reference/opt/shaders-msl/vert/buffer_device_address.msl2.vert create mode 100644 reference/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl2.comp create mode 100644 reference/shaders-msl/comp/buffer_device_address.msl2.comp create mode 100644 reference/shaders-msl/vert/buffer_device_address.msl2.vert create mode 100644 shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl2.comp create mode 100644 shaders-msl/comp/buffer_device_address.msl2.comp create mode 100644 shaders-msl/vert/buffer_device_address.msl2.vert diff --git a/reference/opt/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl2.comp b/reference/opt/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl2.comp new file mode 100644 index 00000000..7b4564c4 --- /dev/null +++ b/reference/opt/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl2.comp @@ -0,0 +1,94 @@ +#include +#include + +using namespace metal; + +struct t21 +{ + int4 m0[2]; + int m1; + device t21* m2[2]; + device t21* m3; + float2x4 m4; +}; + +struct t24 +{ + int4 m0[2]; + int m1; + device t21* m2[2]; + device t21* m3; + float2x4 m4; +}; + +struct t35 +{ + int m0[32]; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +kernel void main0(constant t24& u24 [[buffer(0)]], constant t35& u35 [[buffer(1)]], texture2d v295 [[texture(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) +{ + int v8 = 0; + int _30 = 0 | (u24.m0[0].x - 0); + v8 = _30; + int _44 = _30 | (u24.m0[u35.m0[1]].x - 1); + v8 = _44; + int _50 = _44 | (u24.m1 - 2); + v8 = _50; + int _60 = _50 | int(u24.m4[0u][0] - 3.0); + v8 = _60; + int _68 = _60 | int(u24.m4[1u][0] - 5.0); + v8 = _68; + int _75 = _68 | int(u24.m4[0u][1] - 4.0); + v8 = _75; + int _82 = _75 | int(u24.m4[1u][1] - 6.0); + v8 = _82; + int _92 = _82 | (u24.m2[0]->m0[0].x - 3); + v8 = _92; + int _101 = _92 | (u24.m2[0]->m0[u35.m0[1]].x - 4); + v8 = _101; + int _109 = _101 | (u24.m2[0]->m1 - 5); + v8 = _109; + int _118 = _109 | int(u24.m2[0]->m4[0u][0] - 6.0); + v8 = _118; + int _127 = _118 | int(u24.m2[0]->m4[1u][0] - 8.0); + v8 = _127; + int _136 = _127 | int(u24.m2[0]->m4[0u][1] - 7.0); + v8 = _136; + int _145 = _136 | int(u24.m2[0]->m4[1u][1] - 9.0); + v8 = _145; + int _155 = _145 | (u24.m2[u35.m0[1]]->m0[0].x - 6); + v8 = _155; + int _167 = _155 | (u24.m2[u35.m0[1]]->m0[u35.m0[1]].x - 7); + v8 = _167; + int _177 = _167 | (u24.m2[u35.m0[1]]->m1 - 8); + v8 = _177; + int _187 = _177 | int(u24.m2[u35.m0[1]]->m4[0u][0] - 9.0); + v8 = _187; + int _198 = _187 | int(u24.m2[u35.m0[1]]->m4[1u][0] - 11.0); + v8 = _198; + int _209 = _198 | int(u24.m2[u35.m0[1]]->m4[0u][1] - 10.0); + v8 = _209; + int _220 = _209 | int(u24.m2[u35.m0[1]]->m4[1u][1] - 12.0); + v8 = _220; + int _228 = _220 | (u24.m3->m0[0].x - 9); + v8 = _228; + int _238 = _228 | (u24.m3->m0[u35.m0[1]].x - 10); + v8 = _238; + int _246 = _238 | (u24.m3->m1 - 11); + v8 = _246; + int _254 = _246 | int(u24.m3->m4[0u][0] - 12.0); + v8 = _254; + int _263 = _254 | int(u24.m3->m4[1u][0] - 14.0); + v8 = _263; + int _272 = _263 | int(u24.m3->m4[0u][1] - 13.0); + v8 = _272; + int _281 = _272 | int(u24.m3->m4[1u][1] - 15.0); + v8 = _281; + uint4 _292 = select(uint4(1u, 0u, 0u, 1u), uint4(0u), bool4(_281 != 0)); + uint4 v284 = _292; + v295.write(_292, uint2(int2(gl_GlobalInvocationID.xy))); +} + diff --git a/reference/opt/shaders-msl/comp/buffer_device_address.msl2.comp b/reference/opt/shaders-msl/comp/buffer_device_address.msl2.comp new file mode 100644 index 00000000..ed18a406 --- /dev/null +++ b/reference/opt/shaders-msl/comp/buffer_device_address.msl2.comp @@ -0,0 +1,52 @@ +#include +#include + +using namespace metal; + +struct Position +{ + float2 positions[1]; +}; + +struct PositionReferences +{ + device Position* buffers[1]; +}; + +struct Registers +{ + device PositionReferences* references; + float fract_time; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(8u, 8u, 1u); + +kernel void main0(constant Registers& registers [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_NumWorkGroups [[threadgroups_per_grid]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) +{ + uint2 local_offset = gl_GlobalInvocationID.xy; + uint _19 = local_offset.y; + uint _29 = local_offset.x; + uint _30 = ((_19 * 8u) * gl_NumWorkGroups.x) + _29; + uint local_index = _30; + uint slice = gl_WorkGroupID.z; + device Position* positions = registers.references->buffers[gl_WorkGroupID.z]; + float _66 = float(gl_WorkGroupID.z); + float _70 = fract(fma(_66, 0.100000001490116119384765625, registers.fract_time)); + float _71 = 6.283125400543212890625 * _70; + float offset = _71; + float2 pos = float2(local_offset); + float _83 = sin(fma(2.2000000476837158203125, pos.x, _71)); + pos.x = fma(0.20000000298023223876953125, _83, pos.x); + float _97 = sin(fma(2.25, pos.y, _70 * 12.56625080108642578125)); + pos.y = fma(0.20000000298023223876953125, _97, pos.y); + float _111 = cos(fma(1.7999999523162841796875, pos.y, _70 * 18.849376678466796875)); + pos.x = fma(0.20000000298023223876953125, _111, pos.x); + float _125 = cos(fma(2.849999904632568359375, pos.x, _70 * 25.1325016021728515625)); + pos.y = fma(0.20000000298023223876953125, _125, pos.y); + float _133 = sin(_71); + pos.x = fma(0.5, _133, pos.x); + float _142 = sin(fma(6.283125400543212890625, _70, 0.300000011920928955078125)); + pos.y = fma(0.5, _142, pos.y); + registers.references->buffers[gl_WorkGroupID.z]->positions[_30] = (pos / ((float2(8.0) * float2(gl_NumWorkGroups.xy)) - float2(1.0))) - float2(0.5); +} + diff --git a/reference/opt/shaders-msl/vert/buffer_device_address.msl2.vert b/reference/opt/shaders-msl/vert/buffer_device_address.msl2.vert new file mode 100644 index 00000000..ff8b5f06 --- /dev/null +++ b/reference/opt/shaders-msl/vert/buffer_device_address.msl2.vert @@ -0,0 +1,59 @@ +#include +#include + +using namespace metal; + +struct Position +{ + float2 positions[1]; +}; + +struct PositionReferences +{ + device Position* buffers[1]; +}; + +struct Registers +{ + float4x4 view_projection; + device PositionReferences* references; +}; + +struct main0_out +{ + float4 out_color [[user(locn0)]]; + float4 gl_Position [[position]]; +}; + +vertex main0_out main0(constant Registers& registers [[buffer(0)]], uint gl_InstanceIndex [[instance_id]], uint gl_VertexIndex [[vertex_id]]) +{ + main0_out out = {}; + int slice = int(gl_InstanceIndex); + const device Position* positions = registers.references->buffers[int(gl_InstanceIndex)]; + float2 _45 = registers.references->buffers[int(gl_InstanceIndex)]->positions[int(gl_VertexIndex)] * 2.5; + float2 pos = _45; + float2 _60 = _45 + ((float2(float(int(gl_InstanceIndex) % 8), float(int(gl_InstanceIndex) / 8)) - float2(3.5)) * 3.0); + pos = _60; + out.gl_Position = registers.view_projection * float4(_60, 0.0, 1.0); + int _82 = int(gl_VertexIndex) % 16; + int index_x = _82; + int _85 = int(gl_VertexIndex) / 16; + int index_y = _85; + float _92 = sin(float(_82)); + float _94 = fma(0.300000011920928955078125, _92, 0.5); + float r = _94; + float _98 = sin(float(_85)); + float _100 = fma(0.300000011920928955078125, _98, 0.5); + float g = _100; + int _105 = (_82 ^ _85) & 1; + int checkerboard = _105; + float _107 = float(_105); + float _111 = fma(_107, 0.800000011920928955078125, 0.20000000298023223876953125); + float _113 = _94 * _111; + r = _113; + float _119 = _100 * _111; + g = _119; + out.out_color = float4(_113, _119, 0.1500000059604644775390625, 1.0); + return out; +} + diff --git a/reference/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl2.comp b/reference/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl2.comp new file mode 100644 index 00000000..e18b4a5c --- /dev/null +++ b/reference/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl2.comp @@ -0,0 +1,65 @@ +#include +#include + +using namespace metal; + +struct t21 +{ + int4 m0[2]; + int m1; + device t21* m2[2]; + device t21* m3; + float2x4 m4; +}; + +struct t24 +{ + int4 m0[2]; + int m1; + device t21* m2[2]; + device t21* m3; + float2x4 m4; +}; + +struct t35 +{ + int m0[32]; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +kernel void main0(constant t24& u24 [[buffer(0)]], constant t35& u35 [[buffer(1)]], texture2d v295 [[texture(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) +{ + int v8 = 0; + v8 |= (u24.m0[0].x - 0); + v8 |= (u24.m0[u35.m0[1]].x - 1); + v8 |= (u24.m1 - 2); + v8 |= int(u24.m4[0u][0] - 3.0); + v8 |= int(u24.m4[1u][0] - 5.0); + v8 |= int(u24.m4[0u][1] - 4.0); + v8 |= int(u24.m4[1u][1] - 6.0); + v8 |= (u24.m2[0]->m0[0].x - 3); + v8 |= (u24.m2[0]->m0[u35.m0[1]].x - 4); + v8 |= (u24.m2[0]->m1 - 5); + v8 |= int(u24.m2[0]->m4[0u][0] - 6.0); + v8 |= int(u24.m2[0]->m4[1u][0] - 8.0); + v8 |= int(u24.m2[0]->m4[0u][1] - 7.0); + v8 |= int(u24.m2[0]->m4[1u][1] - 9.0); + v8 |= (u24.m2[u35.m0[1]]->m0[0].x - 6); + v8 |= (u24.m2[u35.m0[1]]->m0[u35.m0[1]].x - 7); + v8 |= (u24.m2[u35.m0[1]]->m1 - 8); + v8 |= int(u24.m2[u35.m0[1]]->m4[0u][0] - 9.0); + v8 |= int(u24.m2[u35.m0[1]]->m4[1u][0] - 11.0); + v8 |= int(u24.m2[u35.m0[1]]->m4[0u][1] - 10.0); + v8 |= int(u24.m2[u35.m0[1]]->m4[1u][1] - 12.0); + v8 |= (u24.m3->m0[0].x - 9); + v8 |= (u24.m3->m0[u35.m0[1]].x - 10); + v8 |= (u24.m3->m1 - 11); + v8 |= int(u24.m3->m4[0u][0] - 12.0); + v8 |= int(u24.m3->m4[1u][0] - 14.0); + v8 |= int(u24.m3->m4[0u][1] - 13.0); + v8 |= int(u24.m3->m4[1u][1] - 15.0); + uint4 v284 = select(uint4(1u, 0u, 0u, 1u), uint4(0u), bool4(v8 != 0)); + v295.write(v284, uint2(int2(gl_GlobalInvocationID.xy))); +} + diff --git a/reference/shaders-msl/comp/buffer_device_address.msl2.comp b/reference/shaders-msl/comp/buffer_device_address.msl2.comp new file mode 100644 index 00000000..523fcecd --- /dev/null +++ b/reference/shaders-msl/comp/buffer_device_address.msl2.comp @@ -0,0 +1,40 @@ +#include +#include + +using namespace metal; + +struct Position +{ + float2 positions[1]; +}; + +struct PositionReferences +{ + device Position* buffers[1]; +}; + +struct Registers +{ + device PositionReferences* references; + float fract_time; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(8u, 8u, 1u); + +kernel void main0(constant Registers& registers [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_NumWorkGroups [[threadgroups_per_grid]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) +{ + uint2 local_offset = gl_GlobalInvocationID.xy; + uint local_index = ((local_offset.y * 8u) * gl_NumWorkGroups.x) + local_offset.x; + uint slice = gl_WorkGroupID.z; + device Position* positions = registers.references->buffers[slice]; + float offset = 6.283125400543212890625 * fract(registers.fract_time + (float(slice) * 0.100000001490116119384765625)); + float2 pos = float2(local_offset); + pos.x += (0.20000000298023223876953125 * sin((2.2000000476837158203125 * pos.x) + offset)); + pos.y += (0.20000000298023223876953125 * sin((2.25 * pos.y) + (2.0 * offset))); + pos.x += (0.20000000298023223876953125 * cos((1.7999999523162841796875 * pos.y) + (3.0 * offset))); + pos.y += (0.20000000298023223876953125 * cos((2.849999904632568359375 * pos.x) + (4.0 * offset))); + pos.x += (0.5 * sin(offset)); + pos.y += (0.5 * sin(offset + 0.300000011920928955078125)); + positions->positions[local_index] = (pos / ((float2(8.0) * float2(gl_NumWorkGroups.xy)) - float2(1.0))) - float2(0.5); +} + diff --git a/reference/shaders-msl/vert/buffer_device_address.msl2.vert b/reference/shaders-msl/vert/buffer_device_address.msl2.vert new file mode 100644 index 00000000..3c4d65a5 --- /dev/null +++ b/reference/shaders-msl/vert/buffer_device_address.msl2.vert @@ -0,0 +1,46 @@ +#include +#include + +using namespace metal; + +struct Position +{ + float2 positions[1]; +}; + +struct PositionReferences +{ + device Position* buffers[1]; +}; + +struct Registers +{ + float4x4 view_projection; + device PositionReferences* references; +}; + +struct main0_out +{ + float4 out_color [[user(locn0)]]; + float4 gl_Position [[position]]; +}; + +vertex main0_out main0(constant Registers& registers [[buffer(0)]], uint gl_InstanceIndex [[instance_id]], uint gl_VertexIndex [[vertex_id]]) +{ + main0_out out = {}; + int slice = int(gl_InstanceIndex); + const device Position* positions = registers.references->buffers[slice]; + float2 pos = positions->positions[int(gl_VertexIndex)] * 2.5; + pos += ((float2(float(slice % 8), float(slice / 8)) - float2(3.5)) * 3.0); + out.gl_Position = registers.view_projection * float4(pos, 0.0, 1.0); + int index_x = int(gl_VertexIndex) % 16; + int index_y = int(gl_VertexIndex) / 16; + float r = 0.5 + (0.300000011920928955078125 * sin(float(index_x))); + float g = 0.5 + (0.300000011920928955078125 * sin(float(index_y))); + int checkerboard = (index_x ^ index_y) & 1; + r *= ((float(checkerboard) * 0.800000011920928955078125) + 0.20000000298023223876953125); + g *= ((float(checkerboard) * 0.800000011920928955078125) + 0.20000000298023223876953125); + out.out_color = float4(r, g, 0.1500000059604644775390625, 1.0); + return out; +} + diff --git a/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl2.comp b/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl2.comp new file mode 100644 index 00000000..2bb19eed --- /dev/null +++ b/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl2.comp @@ -0,0 +1,64 @@ +#version 450 +#extension GL_EXT_buffer_reference : require +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(buffer_reference) buffer t21; +layout(buffer_reference, buffer_reference_align = 16, std140) buffer t21 +{ + int m0[2]; + int m1; + layout(row_major) t21 m2[2]; + layout(row_major) t21 m3; + layout(row_major) mat2 m4; +}; + +layout(set = 0, binding = 1, std140) uniform t24 +{ + int m0[2]; + int m1; + layout(row_major) t21 m2[2]; + layout(row_major) t21 m3; + layout(row_major) mat2 m4; +} u24; + +layout(push_constant, std430) uniform t35 +{ + int m0[32]; +} u35; + +layout(set = 0, binding = 0, r32ui) uniform writeonly uimage2D v295; + +void main() +{ + int v8 = 0; + v8 |= (u24.m0[0] - 0); + v8 |= (u24.m0[u35.m0[1]] - 1); + v8 |= (u24.m1 - 2); + v8 |= int(u24.m4[0].x - 3.0); + v8 |= int(u24.m4[0].y - 5.0); + v8 |= int(u24.m4[1].x - 4.0); + v8 |= int(u24.m4[1].y - 6.0); + v8 |= (u24.m2[0].m0[0] - 3); + v8 |= (u24.m2[0].m0[u35.m0[1]] - 4); + v8 |= (u24.m2[0].m1 - 5); + v8 |= int(u24.m2[0].m4[0].x - 6.0); + v8 |= int(u24.m2[0].m4[0].y - 8.0); + v8 |= int(u24.m2[0].m4[1].x - 7.0); + v8 |= int(u24.m2[0].m4[1].y - 9.0); + v8 |= (u24.m2[u35.m0[1]].m0[0] - 6); + v8 |= (u24.m2[u35.m0[1]].m0[u35.m0[1]] - 7); + v8 |= (u24.m2[u35.m0[1]].m1 - 8); + v8 |= int(u24.m2[u35.m0[1]].m4[0].x - 9.0); + v8 |= int(u24.m2[u35.m0[1]].m4[0].y - 11.0); + v8 |= int(u24.m2[u35.m0[1]].m4[1].x - 10.0); + v8 |= int(u24.m2[u35.m0[1]].m4[1].y - 12.0); + v8 |= (u24.m3.m0[0] - 9); + v8 |= (u24.m3.m0[u35.m0[1]] - 10); + v8 |= (u24.m3.m1 - 11); + v8 |= int(u24.m3.m4[0].x - 12.0); + v8 |= int(u24.m3.m4[0].y - 14.0); + v8 |= int(u24.m3.m4[1].x - 13.0); + v8 |= int(u24.m3.m4[1].y - 15.0); + uvec4 v284 = mix(uvec4(1u, 0u, 0u, 1u), uvec4(0u), bvec4(v8 != 0)); + imageStore(v295, ivec2(gl_GlobalInvocationID.xy), v284); +} diff --git a/shaders-msl/comp/buffer_device_address.msl2.comp b/shaders-msl/comp/buffer_device_address.msl2.comp new file mode 100644 index 00000000..14ac1ef9 --- /dev/null +++ b/shaders-msl/comp/buffer_device_address.msl2.comp @@ -0,0 +1,86 @@ +/* Copyright (c) 2021, Arm Limited and Contributors + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#version 450 + +// Allows buffer_reference. +#extension GL_EXT_buffer_reference : require + +layout(local_size_x = 8, local_size_y = 8) in; + +// If we mark a buffer as buffer_reference, this is treated as a pointer type. +// A variable with the type Position is a 64-bit pointer to the data within. +// We can freely cast between pointer types if we wish, but that is not necessary in this sample. +// buffer_reference_align is used to let the underlying implementation know which alignment to expect. +// The pointer can have scalar alignment, which is something the compiler cannot know unless you tell it. +// It is best to use vector alignment when you can for optimal performance, but scalar alignment is sometimes useful. +// With SSBOs, the API has a minimum offset alignment which guarantees a minimum level of alignment from API side. + +// It is possible to forward reference a pointer, so you can contain a pointer to yourself inside a struct. +// Useful if you need something like a linked list on the GPU. +// Here it's not particularly useful, but something to know about. +layout(buffer_reference) buffer Position; + +layout(std430, buffer_reference, buffer_reference_align = 8) writeonly buffer Position +{ + vec2 positions[]; +}; + +layout(std430, buffer_reference, buffer_reference_align = 8) readonly buffer PositionReferences +{ + // This buffer contains an array of pointers to other buffers. + Position buffers[]; +}; + +// In push constant we place a pointer to VBO pointers, spicy! +// This way we don't need any descriptor sets, but there's nothing wrong with combining use of descriptor sets and buffer device addresses. +// It is mostly done for convenience here. +layout(push_constant) uniform Registers +{ + PositionReferences references; + // A buffer reference is 64-bit, so offset of fract_time is 8 bytes. + float fract_time; +} registers; + +void main() +{ + // Every slice is a 8x8 grid of vertices which we update here in compute. + uvec2 local_offset = gl_GlobalInvocationID.xy; + uint local_index = local_offset.y * gl_WorkGroupSize.x * gl_NumWorkGroups.x + local_offset.x; + uint slice = gl_WorkGroupID.z; + + restrict Position positions = registers.references.buffers[slice]; + + // This is a trivial wave-like function. Arbitrary for demonstration purposes. + const float TWO_PI = 3.1415628 * 2.0; + float offset = TWO_PI * fract(registers.fract_time + float(slice) * 0.1); + + // Simple grid. + vec2 pos = vec2(local_offset); + + // Wobble, wobble. + pos.x += 0.2 * sin(2.2 * pos.x + offset); + pos.y += 0.2 * sin(2.25 * pos.y + 2.0 * offset); + pos.x += 0.2 * cos(1.8 * pos.y + 3.0 * offset); + pos.y += 0.2 * cos(2.85 * pos.x + 4.0 * offset); + pos.x += 0.5 * sin(offset); + pos.y += 0.5 * sin(offset + 0.3); + + // Center the mesh in [-0.5, 0.5] range. + // Here we write to a raw pointer. + // Be aware, there is no robustness support for buffer_device_address since we don't have a complete descriptor! + positions.positions[local_index] = pos / (vec2(gl_WorkGroupSize.xy) * vec2(gl_NumWorkGroups.xy) - 1.0) - 0.5; +} diff --git a/shaders-msl/vert/buffer_device_address.msl2.vert b/shaders-msl/vert/buffer_device_address.msl2.vert new file mode 100644 index 00000000..ffc88713 --- /dev/null +++ b/shaders-msl/vert/buffer_device_address.msl2.vert @@ -0,0 +1,83 @@ +/* Copyright (c) 2021, Arm Limited and Contributors + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#version 450 + +// Allows buffer_reference. +#extension GL_EXT_buffer_reference : require + +// Since we did not enable vertexPipelineStoresAndAtomics, we must mark everything readonly. +layout(std430, buffer_reference, buffer_reference_align = 8) readonly buffer Position +{ + vec2 positions[]; +}; + +layout(std430, buffer_reference, buffer_reference_align = 8) readonly buffer PositionReferences +{ + // Represents an array of pointers, where each pointer points to its own VBO (Position). + // The size of a pointer (VkDeviceAddress) is always 8 in Vulkan. + Position buffers[]; +}; + +layout(push_constant) uniform Registers +{ + mat4 view_projection; + + // This is a pointer to an array of pointers, essentially: + // const VBO * const *vbos + PositionReferences references; +} registers; + +// Flat shading looks a little cooler here :) +layout(location = 0) flat out vec4 out_color; + +void main() +{ + int slice = gl_InstanceIndex; + + // One VBO per instance, load the VBO pointer. + // The cool thing here is that a compute shader could hypothetically + // write the pointer list where vertices are stored. + // With vertex attributes we do not have the luxury to modify VBO bindings on the GPU. + // The best we can do is to just modify the vertexOffset in an indirect draw call, + // but that's not always flexible enough, and enforces a very specific engine design to work. + // We can even modify the attribute layout per slice here, since we can just cast the pointer + // to something else if we want. + restrict Position positions = registers.references.buffers[slice]; + + // Load the vertex based on VertexIndex instead of an attribute. Fully flexible. + // Only downside is that we do not get format conversion for free like we do with normal vertex attributes. + vec2 pos = positions.positions[gl_VertexIndex] * 2.5; + + // Place the quad meshes on screen and center it. + pos += 3.0 * (vec2(slice % 8, slice / 8) - 3.5); + + // Normal projection. + gl_Position = registers.view_projection * vec4(pos, 0.0, 1.0); + + // Color the vertex. Use a combination of a wave and checkerboard, completely arbitrary. + int index_x = gl_VertexIndex % 16; + int index_y = gl_VertexIndex / 16; + + float r = 0.5 + 0.3 * sin(float(index_x)); + float g = 0.5 + 0.3 * sin(float(index_y)); + + int checkerboard = (index_x ^ index_y) & 1; + r *= float(checkerboard) * 0.8 + 0.2; + g *= float(checkerboard) * 0.8 + 0.2; + + out_color = vec4(r, g, 0.15, 1.0); +} diff --git a/spirv_common.hpp b/spirv_common.hpp index 1c8a7253..7864f0fd 100644 --- a/spirv_common.hpp +++ b/spirv_common.hpp @@ -591,6 +591,7 @@ struct SPIRType : IVariant uint32_t pointer_depth = 0; bool pointer = false; bool forward_pointer = false; + bool was_forward_referenced = false; spv::StorageClass storage = spv::StorageClassGeneric; diff --git a/spirv_msl.cpp b/spirv_msl.cpp index 4687742b..37e9b9a1 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -1980,7 +1980,8 @@ void CompilerMSL::mark_as_packable(SPIRType &type) return; } - if (type.basetype == SPIRType::Struct) + // Handle possible recursion when a struct contains a pointer to its own type nested somewhere. + if (type.basetype == SPIRType::Struct && !has_extended_decoration(type.self, SPIRVCrossDecorationBufferBlockRepacked)) { set_extended_decoration(type.self, SPIRVCrossDecorationBufferBlockRepacked); @@ -4039,6 +4040,10 @@ uint32_t CompilerMSL::ensure_correct_input_type(uint32_t type_id, uint32_t locat void CompilerMSL::mark_struct_members_packed(const SPIRType &type) { + // Handle possible recursion when a struct contains a pointer to its own type nested somewhere. + if (has_extended_decoration(type.self, SPIRVCrossDecorationPhysicalTypePacked)) + return; + set_extended_decoration(type.self, SPIRVCrossDecorationPhysicalTypePacked); // Problem case! Struct needs to be placed at an awkward alignment. @@ -4065,8 +4070,9 @@ void CompilerMSL::mark_scalar_layout_structs(const SPIRType &type) uint32_t mbr_cnt = uint32_t(type.member_types.size()); for (uint32_t i = 0; i < mbr_cnt; i++) { + // Handle possible recursion when a struct contains a pointer to its own type nested somewhere. auto &mbr_type = get(type.member_types[i]); - if (mbr_type.basetype == SPIRType::Struct) + if (mbr_type.basetype == SPIRType::Struct && !(mbr_type.pointer && mbr_type.storage == StorageClassPhysicalStorageBuffer)) { auto *struct_type = &mbr_type; while (!struct_type->array.empty()) @@ -4278,8 +4284,10 @@ void CompilerMSL::ensure_member_packing_rules_msl(SPIRType &ib_type, uint32_t in // This case will be nightmare-ish to deal with. This could possibly happen if struct alignment does not quite // match up with what we want. Scalar block layout comes to mind here where we might have to work around the rule // that struct alignment == max alignment of all members and struct size depends on this alignment. + // Can't repack structs, but can repack pointers to structs. auto &mbr_type = get(ib_type.member_types[index]); - if (mbr_type.basetype == SPIRType::Struct) + bool is_buff_ptr = mbr_type.pointer && mbr_type.storage == StorageClassPhysicalStorageBuffer; + if (mbr_type.basetype == SPIRType::Struct && !is_buff_ptr) SPIRV_CROSS_THROW("Cannot perform any repacking for structs when it is used as a member of another struct."); // Perform remapping here. @@ -4306,7 +4314,9 @@ void CompilerMSL::ensure_member_packing_rules_msl(SPIRType &ib_type, uint32_t in for (uint32_t dim = 0; dim < dimensions; dim++) array_stride /= max(to_array_size_literal(mbr_type, dim), 1u); - uint32_t elems_per_stride = array_stride / (mbr_type.width / 8); + // Pointers are 8 bytes + uint32_t mbr_width_in_bytes = is_buff_ptr ? 8 : (mbr_type.width / 8); + uint32_t elems_per_stride = array_stride / mbr_width_in_bytes; if (elems_per_stride == 3) SPIRV_CROSS_THROW("Cannot use ArrayStride of 3 elements in remapping scenarios."); @@ -4315,7 +4325,9 @@ void CompilerMSL::ensure_member_packing_rules_msl(SPIRType &ib_type, uint32_t in auto physical_type = mbr_type; physical_type.vecsize = elems_per_stride; - physical_type.parent_type = 0; + if (!is_buff_ptr) + physical_type.parent_type = 0; + uint32_t type_id = ir.increase_bound_by(1); set(type_id, physical_type); set_extended_member_decoration(ib_type.self, index, SPIRVCrossDecorationPhysicalTypeID, type_id); @@ -6868,7 +6880,9 @@ void CompilerMSL::emit_specialization_constants_and_structs() auto &type = id.get(); TypeID type_id = type.self; - bool is_struct = (type.basetype == SPIRType::Struct) && type.array.empty() && !type.pointer; + // Forward references can change the declaration dependency order. + // Ensure that change is considered when emitting MSL structs, which must be emitted in dependency order. + bool is_struct = (type.basetype == SPIRType::Struct) && type.array.empty() && (!type.pointer || type.was_forward_referenced); bool is_block = has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock); @@ -11400,6 +11414,7 @@ string CompilerMSL::get_type_address_space(const SPIRType &type, uint32_t id, bo break; case StorageClassStorageBuffer: + case StorageClassPhysicalStorageBuffer: { // For arguments from variable pointers, we use the write count deduction, so // we should not assume any constness here. Only for global SSBOs. @@ -13528,7 +13543,7 @@ string CompilerMSL::type_to_glsl(const SPIRType &type, uint32_t id) const char *restrict_kw; auto type_address_space = get_type_address_space(type, id); - auto type_decl = type_to_glsl(get(type.parent_type), id); + const auto* p_parent_type = &get(type.parent_type); // Work around C pointer qualifier rules. If glsl_type is a pointer type as well // we'll need to emit the address space to the right. @@ -13536,9 +13551,16 @@ string CompilerMSL::type_to_glsl(const SPIRType &type, uint32_t id) // Prefer emitting thread T *foo over T thread* foo since it's more readable, // but we'll have to emit thread T * thread * T constant bar; for example. if (type_is_pointer_to_pointer(type)) - type_name = join(type_decl, " ", type_address_space, " "); + type_name = join(type_to_glsl(*p_parent_type, id), " ", type_address_space, " "); else - type_name = join(type_address_space, " ", type_decl); + { + // Since this is not a pointer-to-pointer, ensure we've dug down to the base type. + // Some situations chain pointers even though they are not formally pointers-of-pointers. + while(type_is_pointer(*p_parent_type)) + p_parent_type = &get(p_parent_type->parent_type); + + type_name = join(type_address_space, " ", type_to_glsl(*p_parent_type, id)); + } switch (type.basetype) { @@ -15007,6 +15029,17 @@ uint32_t CompilerMSL::get_declared_struct_size_msl(const SPIRType &struct_type, // Returns the byte size of a struct member. uint32_t CompilerMSL::get_declared_type_size_msl(const SPIRType &type, bool is_packed, bool row_major) const { + // Pointers take 8 bytes each + if (type.pointer && type.storage == StorageClassPhysicalStorageBuffer) + { + uint32_t type_size = 8 * (type.vecsize == 3 ? 4 : type.vecsize); + size_t dim_cnt = type.array.size(); + for (uint32_t dim_idx = 0; dim_idx < dim_cnt; dim_idx++) + type_size *= to_array_size_literal(type, dim_idx); + + return type_size; + } + switch (type.basetype) { case SPIRType::Unknown: @@ -15066,6 +15099,10 @@ uint32_t CompilerMSL::get_declared_input_size_msl(const SPIRType &type, uint32_t // Returns the byte alignment of a type. uint32_t CompilerMSL::get_declared_type_alignment_msl(const SPIRType &type, bool is_packed, bool row_major) const { + // Pointers aligns on multiples of 8 bytes + if (type.pointer && type.storage == StorageClassPhysicalStorageBuffer) + return 8 * (type.vecsize == 3 ? 4 : type.vecsize); + switch (type.basetype) { case SPIRType::Unknown: diff --git a/spirv_parser.cpp b/spirv_parser.cpp index 262aa70f..e508f655 100644 --- a/spirv_parser.cpp +++ b/spirv_parser.cpp @@ -659,6 +659,16 @@ void Parser::parse(const Instruction &instruction) { uint32_t id = ops[0]; + // The type may have previously been declared as an OpTypeForwardPointer. + // If so, we want to retain that knowledge, in case content needs to be emitted + // relative to when the forward pointer was declared, not when the pointer is + // declared. SPIR-V allows composites to contain a type that has only been + // declared as a forward pointer when the composite is defined, but in many + // languages, it is necessary to emit the component before the composite. + // Retrieve this info *before* overwriting the type info for the id below. + auto *fwd_ptr = maybe_get(id); + bool was_fwd_ptr = fwd_ptr && fwd_ptr->forward_pointer; + // Very rarely, we might receive a FunctionPrototype here. // We won't be able to compile it, but we shouldn't crash when parsing. // We should be able to reflect. @@ -671,6 +681,7 @@ void Parser::parse(const Instruction &instruction) ptrbase.pointer = true; ptrbase.pointer_depth++; ptrbase.storage = static_cast(ops[1]); + ptrbase.was_forward_referenced = was_fwd_ptr; if (ptrbase.storage == StorageClassAtomicCounter) ptrbase.basetype = SPIRType::AtomicCounter; From 78eb5043f9a9923d5c06c2328ea198fd808fa469 Mon Sep 17 00:00:00 2001 From: Bill Hollings Date: Fri, 24 Jun 2022 17:28:17 -0400 Subject: [PATCH 2/4] MSL: Fixes from review for SPV_KHR_physical_storage_buffer extension. - For physical storage pointers, emit forward reference declarations in addition to definitions. - Minor syntax corrections. --- ...ddress-recursive-struct-pointers.msl2.comp | 6 +++-- .../comp/buffer_device_address.msl2.comp | 13 +++++---- .../vert/buffer_device_address.msl2.vert | 13 +++++---- ...ddress-recursive-struct-pointers.msl2.comp | 6 +++-- .../comp/buffer_device_address.msl2.comp | 13 +++++---- .../vert/buffer_device_address.msl2.vert | 13 +++++---- spirv_common.hpp | 1 - spirv_msl.cpp | 27 +++++++++++++++---- spirv_parser.cpp | 11 -------- 9 files changed, 62 insertions(+), 41 deletions(-) diff --git a/reference/opt/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl2.comp b/reference/opt/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl2.comp index 7b4564c4..4d6330e3 100644 --- a/reference/opt/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl2.comp +++ b/reference/opt/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl2.comp @@ -3,7 +3,9 @@ using namespace metal; -struct t21 +struct t21; + +struct t24 { int4 m0[2]; int m1; @@ -12,7 +14,7 @@ struct t21 float2x4 m4; }; -struct t24 +struct t21 { int4 m0[2]; int m1; diff --git a/reference/opt/shaders-msl/comp/buffer_device_address.msl2.comp b/reference/opt/shaders-msl/comp/buffer_device_address.msl2.comp index ed18a406..b03300ee 100644 --- a/reference/opt/shaders-msl/comp/buffer_device_address.msl2.comp +++ b/reference/opt/shaders-msl/comp/buffer_device_address.msl2.comp @@ -3,22 +3,25 @@ using namespace metal; +struct Position; +struct PositionReferences; + struct Position { float2 positions[1]; }; -struct PositionReferences -{ - device Position* buffers[1]; -}; - struct Registers { device PositionReferences* references; float fract_time; }; +struct PositionReferences +{ + device Position* buffers[1]; +}; + constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(8u, 8u, 1u); kernel void main0(constant Registers& registers [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_NumWorkGroups [[threadgroups_per_grid]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) diff --git a/reference/opt/shaders-msl/vert/buffer_device_address.msl2.vert b/reference/opt/shaders-msl/vert/buffer_device_address.msl2.vert index ff8b5f06..57f86ec1 100644 --- a/reference/opt/shaders-msl/vert/buffer_device_address.msl2.vert +++ b/reference/opt/shaders-msl/vert/buffer_device_address.msl2.vert @@ -3,22 +3,25 @@ using namespace metal; +struct Position; +struct PositionReferences; + struct Position { float2 positions[1]; }; -struct PositionReferences -{ - device Position* buffers[1]; -}; - struct Registers { float4x4 view_projection; device PositionReferences* references; }; +struct PositionReferences +{ + device Position* buffers[1]; +}; + struct main0_out { float4 out_color [[user(locn0)]]; diff --git a/reference/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl2.comp b/reference/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl2.comp index e18b4a5c..bbb07380 100644 --- a/reference/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl2.comp +++ b/reference/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl2.comp @@ -3,7 +3,9 @@ using namespace metal; -struct t21 +struct t21; + +struct t24 { int4 m0[2]; int m1; @@ -12,7 +14,7 @@ struct t21 float2x4 m4; }; -struct t24 +struct t21 { int4 m0[2]; int m1; diff --git a/reference/shaders-msl/comp/buffer_device_address.msl2.comp b/reference/shaders-msl/comp/buffer_device_address.msl2.comp index 523fcecd..8c1ff2d1 100644 --- a/reference/shaders-msl/comp/buffer_device_address.msl2.comp +++ b/reference/shaders-msl/comp/buffer_device_address.msl2.comp @@ -3,22 +3,25 @@ using namespace metal; +struct Position; +struct PositionReferences; + struct Position { float2 positions[1]; }; -struct PositionReferences -{ - device Position* buffers[1]; -}; - struct Registers { device PositionReferences* references; float fract_time; }; +struct PositionReferences +{ + device Position* buffers[1]; +}; + constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(8u, 8u, 1u); kernel void main0(constant Registers& registers [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_NumWorkGroups [[threadgroups_per_grid]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) diff --git a/reference/shaders-msl/vert/buffer_device_address.msl2.vert b/reference/shaders-msl/vert/buffer_device_address.msl2.vert index 3c4d65a5..2fa16bcd 100644 --- a/reference/shaders-msl/vert/buffer_device_address.msl2.vert +++ b/reference/shaders-msl/vert/buffer_device_address.msl2.vert @@ -3,22 +3,25 @@ using namespace metal; +struct Position; +struct PositionReferences; + struct Position { float2 positions[1]; }; -struct PositionReferences -{ - device Position* buffers[1]; -}; - struct Registers { float4x4 view_projection; device PositionReferences* references; }; +struct PositionReferences +{ + device Position* buffers[1]; +}; + struct main0_out { float4 out_color [[user(locn0)]]; diff --git a/spirv_common.hpp b/spirv_common.hpp index 7864f0fd..1c8a7253 100644 --- a/spirv_common.hpp +++ b/spirv_common.hpp @@ -591,7 +591,6 @@ struct SPIRType : IVariant uint32_t pointer_depth = 0; bool pointer = false; bool forward_pointer = false; - bool was_forward_referenced = false; spv::StorageClass storage = spv::StorageClassGeneric; diff --git a/spirv_msl.cpp b/spirv_msl.cpp index 37e9b9a1..04cad54a 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -6804,6 +6804,25 @@ void CompilerMSL::emit_specialization_constants_and_structs() // these types for purpose of iterating over them in ir.ids_for_type and friends. auto loop_lock = ir.create_loop_soft_lock(); + // Physical storage buffer pointers can have cyclical references, + // so emit forward declarations of them before other structs. + // Ignore type_id because we want the underlying struct type from the pointer. + ir.for_each_typed_id([&](uint32_t /* type_id */, const SPIRType &type) { + if (type.basetype == SPIRType::Struct && + type.pointer && type.storage == StorageClassPhysicalStorageBuffer && + declared_structs.count(type.self) == 0) + { + statement("struct ", to_name(type.self), ";"); + declared_structs.insert(type.self); + emitted = true; + } + }); + if (emitted) + statement(""); + + emitted = false; + declared_structs.clear(); + for (auto &id_ : ir.ids_for_constant_or_type) { auto &id = ir.ids[id_]; @@ -6880,9 +6899,7 @@ void CompilerMSL::emit_specialization_constants_and_structs() auto &type = id.get(); TypeID type_id = type.self; - // Forward references can change the declaration dependency order. - // Ensure that change is considered when emitting MSL structs, which must be emitted in dependency order. - bool is_struct = (type.basetype == SPIRType::Struct) && type.array.empty() && (!type.pointer || type.was_forward_referenced); + bool is_struct = (type.basetype == SPIRType::Struct) && type.array.empty() && !type.pointer; bool is_block = has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock); @@ -13543,7 +13560,7 @@ string CompilerMSL::type_to_glsl(const SPIRType &type, uint32_t id) const char *restrict_kw; auto type_address_space = get_type_address_space(type, id); - const auto* p_parent_type = &get(type.parent_type); + const auto *p_parent_type = &get(type.parent_type); // Work around C pointer qualifier rules. If glsl_type is a pointer type as well // we'll need to emit the address space to the right. @@ -13556,7 +13573,7 @@ string CompilerMSL::type_to_glsl(const SPIRType &type, uint32_t id) { // Since this is not a pointer-to-pointer, ensure we've dug down to the base type. // Some situations chain pointers even though they are not formally pointers-of-pointers. - while(type_is_pointer(*p_parent_type)) + while (type_is_pointer(*p_parent_type)) p_parent_type = &get(p_parent_type->parent_type); type_name = join(type_address_space, " ", type_to_glsl(*p_parent_type, id)); diff --git a/spirv_parser.cpp b/spirv_parser.cpp index e508f655..262aa70f 100644 --- a/spirv_parser.cpp +++ b/spirv_parser.cpp @@ -659,16 +659,6 @@ void Parser::parse(const Instruction &instruction) { uint32_t id = ops[0]; - // The type may have previously been declared as an OpTypeForwardPointer. - // If so, we want to retain that knowledge, in case content needs to be emitted - // relative to when the forward pointer was declared, not when the pointer is - // declared. SPIR-V allows composites to contain a type that has only been - // declared as a forward pointer when the composite is defined, but in many - // languages, it is necessary to emit the component before the composite. - // Retrieve this info *before* overwriting the type info for the id below. - auto *fwd_ptr = maybe_get(id); - bool was_fwd_ptr = fwd_ptr && fwd_ptr->forward_pointer; - // Very rarely, we might receive a FunctionPrototype here. // We won't be able to compile it, but we shouldn't crash when parsing. // We should be able to reflect. @@ -681,7 +671,6 @@ void Parser::parse(const Instruction &instruction) ptrbase.pointer = true; ptrbase.pointer_depth++; ptrbase.storage = static_cast(ops[1]); - ptrbase.was_forward_referenced = was_fwd_ptr; if (ptrbase.storage == StorageClassAtomicCounter) ptrbase.basetype = SPIRType::AtomicCounter; From 4185acc70de8d8af5fa633e1e5e96ae6bdca2f9e Mon Sep 17 00:00:00 2001 From: Bill Hollings Date: Fri, 1 Jul 2022 16:10:41 -0400 Subject: [PATCH 3/4] MSL: Fixes from review for SPV_KHR_physical_storage_buffer extension. - Assign ulongn physical type to buffer pointers in short arrays when array stride is larger than pointer size. - Support GL_EXT_buffer_reference_uvec2 casting buffer reference pointers to and from uvec2 values. - When packing structs, include structs inside physical buffers. - Update mechanism for traversing pointer arrays when calculating type sizes. - Added unit test shaders. --- ...-vec-and-cast-to-and-from-uvec2.msl23.comp | 28 +++++++ ...ress-recursive-struct-pointers.msl23.comp} | 32 ++++---- ...-vec-and-cast-to-and-from-uvec2.msl23.comp | 26 +++++++ ...ress-recursive-struct-pointers.msl23.comp} | 32 ++++---- ...-vec-and-cast-to-and-from-uvec2.msl23.comp | 22 ++++++ ...ress-recursive-struct-pointers.msl23.comp} | 0 spirv_glsl.cpp | 5 ++ spirv_glsl.hpp | 1 + spirv_msl.cpp | 75 ++++++++++++++++--- spirv_msl.hpp | 2 + 10 files changed, 181 insertions(+), 42 deletions(-) create mode 100644 reference/opt/shaders-msl/comp/buffer_device_address-packed-vec-and-cast-to-and-from-uvec2.msl23.comp rename reference/opt/shaders-msl/comp/{buffer_device_address-recursive-struct-pointers.msl2.comp => buffer_device_address-recursive-struct-pointers.msl23.comp} (62%) create mode 100644 reference/shaders-msl/comp/buffer_device_address-packed-vec-and-cast-to-and-from-uvec2.msl23.comp rename reference/shaders-msl/comp/{buffer_device_address-recursive-struct-pointers.msl2.comp => buffer_device_address-recursive-struct-pointers.msl23.comp} (57%) create mode 100644 shaders-msl/comp/buffer_device_address-packed-vec-and-cast-to-and-from-uvec2.msl23.comp rename shaders-msl/comp/{buffer_device_address-recursive-struct-pointers.msl2.comp => buffer_device_address-recursive-struct-pointers.msl23.comp} (100%) diff --git a/reference/opt/shaders-msl/comp/buffer_device_address-packed-vec-and-cast-to-and-from-uvec2.msl23.comp b/reference/opt/shaders-msl/comp/buffer_device_address-packed-vec-and-cast-to-and-from-uvec2.msl23.comp new file mode 100644 index 00000000..f2ba5416 --- /dev/null +++ b/reference/opt/shaders-msl/comp/buffer_device_address-packed-vec-and-cast-to-and-from-uvec2.msl23.comp @@ -0,0 +1,28 @@ +#include +#include + +using namespace metal; + +struct SSBO; + +struct UBO +{ + uint2 b; +}; + +struct SSBO +{ + packed_float3 a1; + float a2; +}; + +kernel void main0(constant UBO& _10 [[buffer(0)]]) +{ + ((device SSBO*)as_type(_10.b))->a1 = float3(1.0, 2.0, 3.0); + uint2 _35 = as_type((uint64_t)((device SSBO*)as_type(_10.b + uint2(32u)))); + uint2 v2 = _35; + device SSBO* _39 = ((device SSBO*)as_type(_35)); + float3 v3 = float3(_39->a1); + _39->a1 = float3(_39->a1) + float3(1.0); +} + diff --git a/reference/opt/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl2.comp b/reference/opt/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl23.comp similarity index 62% rename from reference/opt/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl2.comp rename to reference/opt/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl23.comp index 4d6330e3..d66154b5 100644 --- a/reference/opt/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl2.comp +++ b/reference/opt/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl23.comp @@ -9,7 +9,7 @@ struct t24 { int4 m0[2]; int m1; - device t21* m2[2]; + ulong2 m2[2]; device t21* m3; float2x4 m4; }; @@ -18,7 +18,7 @@ struct t21 { int4 m0[2]; int m1; - device t21* m2[2]; + ulong2 m2[2]; device t21* m3; float2x4 m4; }; @@ -47,33 +47,33 @@ kernel void main0(constant t24& u24 [[buffer(0)]], constant t35& u35 [[buffer(1) v8 = _75; int _82 = _75 | int(u24.m4[1u][1] - 6.0); v8 = _82; - int _92 = _82 | (u24.m2[0]->m0[0].x - 3); + int _92 = _82 | (((device t21*)u24.m2[0].x)->m0[0].x - 3); v8 = _92; - int _101 = _92 | (u24.m2[0]->m0[u35.m0[1]].x - 4); + int _101 = _92 | (((device t21*)u24.m2[0].x)->m0[u35.m0[1]].x - 4); v8 = _101; - int _109 = _101 | (u24.m2[0]->m1 - 5); + int _109 = _101 | (((device t21*)u24.m2[0].x)->m1 - 5); v8 = _109; - int _118 = _109 | int(u24.m2[0]->m4[0u][0] - 6.0); + int _118 = _109 | int(((device t21*)u24.m2[0].x)->m4[0u][0] - 6.0); v8 = _118; - int _127 = _118 | int(u24.m2[0]->m4[1u][0] - 8.0); + int _127 = _118 | int(((device t21*)u24.m2[0].x)->m4[1u][0] - 8.0); v8 = _127; - int _136 = _127 | int(u24.m2[0]->m4[0u][1] - 7.0); + int _136 = _127 | int(((device t21*)u24.m2[0].x)->m4[0u][1] - 7.0); v8 = _136; - int _145 = _136 | int(u24.m2[0]->m4[1u][1] - 9.0); + int _145 = _136 | int(((device t21*)u24.m2[0].x)->m4[1u][1] - 9.0); v8 = _145; - int _155 = _145 | (u24.m2[u35.m0[1]]->m0[0].x - 6); + int _155 = _145 | (((device t21*)u24.m2[u35.m0[1]].x)->m0[0].x - 6); v8 = _155; - int _167 = _155 | (u24.m2[u35.m0[1]]->m0[u35.m0[1]].x - 7); + int _167 = _155 | (((device t21*)u24.m2[u35.m0[1]].x)->m0[u35.m0[1]].x - 7); v8 = _167; - int _177 = _167 | (u24.m2[u35.m0[1]]->m1 - 8); + int _177 = _167 | (((device t21*)u24.m2[u35.m0[1]].x)->m1 - 8); v8 = _177; - int _187 = _177 | int(u24.m2[u35.m0[1]]->m4[0u][0] - 9.0); + int _187 = _177 | int(((device t21*)u24.m2[u35.m0[1]].x)->m4[0u][0] - 9.0); v8 = _187; - int _198 = _187 | int(u24.m2[u35.m0[1]]->m4[1u][0] - 11.0); + int _198 = _187 | int(((device t21*)u24.m2[u35.m0[1]].x)->m4[1u][0] - 11.0); v8 = _198; - int _209 = _198 | int(u24.m2[u35.m0[1]]->m4[0u][1] - 10.0); + int _209 = _198 | int(((device t21*)u24.m2[u35.m0[1]].x)->m4[0u][1] - 10.0); v8 = _209; - int _220 = _209 | int(u24.m2[u35.m0[1]]->m4[1u][1] - 12.0); + int _220 = _209 | int(((device t21*)u24.m2[u35.m0[1]].x)->m4[1u][1] - 12.0); v8 = _220; int _228 = _220 | (u24.m3->m0[0].x - 9); v8 = _228; diff --git a/reference/shaders-msl/comp/buffer_device_address-packed-vec-and-cast-to-and-from-uvec2.msl23.comp b/reference/shaders-msl/comp/buffer_device_address-packed-vec-and-cast-to-and-from-uvec2.msl23.comp new file mode 100644 index 00000000..474d5092 --- /dev/null +++ b/reference/shaders-msl/comp/buffer_device_address-packed-vec-and-cast-to-and-from-uvec2.msl23.comp @@ -0,0 +1,26 @@ +#include +#include + +using namespace metal; + +struct SSBO; + +struct UBO +{ + uint2 b; +}; + +struct SSBO +{ + packed_float3 a1; + float a2; +}; + +kernel void main0(constant UBO& _10 [[buffer(0)]]) +{ + ((device SSBO*)as_type(_10.b))->a1 = float3(1.0, 2.0, 3.0); + uint2 v2 = as_type((uint64_t)((device SSBO*)as_type(_10.b + uint2(32u)))); + float3 v3 = float3(((device SSBO*)as_type(v2))->a1); + ((device SSBO*)as_type(v2))->a1 = v3 + float3(1.0); +} + diff --git a/reference/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl2.comp b/reference/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl23.comp similarity index 57% rename from reference/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl2.comp rename to reference/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl23.comp index bbb07380..35b7af54 100644 --- a/reference/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl2.comp +++ b/reference/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl23.comp @@ -9,7 +9,7 @@ struct t24 { int4 m0[2]; int m1; - device t21* m2[2]; + ulong2 m2[2]; device t21* m3; float2x4 m4; }; @@ -18,7 +18,7 @@ struct t21 { int4 m0[2]; int m1; - device t21* m2[2]; + ulong2 m2[2]; device t21* m3; float2x4 m4; }; @@ -40,20 +40,20 @@ kernel void main0(constant t24& u24 [[buffer(0)]], constant t35& u35 [[buffer(1) v8 |= int(u24.m4[1u][0] - 5.0); v8 |= int(u24.m4[0u][1] - 4.0); v8 |= int(u24.m4[1u][1] - 6.0); - v8 |= (u24.m2[0]->m0[0].x - 3); - v8 |= (u24.m2[0]->m0[u35.m0[1]].x - 4); - v8 |= (u24.m2[0]->m1 - 5); - v8 |= int(u24.m2[0]->m4[0u][0] - 6.0); - v8 |= int(u24.m2[0]->m4[1u][0] - 8.0); - v8 |= int(u24.m2[0]->m4[0u][1] - 7.0); - v8 |= int(u24.m2[0]->m4[1u][1] - 9.0); - v8 |= (u24.m2[u35.m0[1]]->m0[0].x - 6); - v8 |= (u24.m2[u35.m0[1]]->m0[u35.m0[1]].x - 7); - v8 |= (u24.m2[u35.m0[1]]->m1 - 8); - v8 |= int(u24.m2[u35.m0[1]]->m4[0u][0] - 9.0); - v8 |= int(u24.m2[u35.m0[1]]->m4[1u][0] - 11.0); - v8 |= int(u24.m2[u35.m0[1]]->m4[0u][1] - 10.0); - v8 |= int(u24.m2[u35.m0[1]]->m4[1u][1] - 12.0); + v8 |= (((device t21*)u24.m2[0].x)->m0[0].x - 3); + v8 |= (((device t21*)u24.m2[0].x)->m0[u35.m0[1]].x - 4); + v8 |= (((device t21*)u24.m2[0].x)->m1 - 5); + v8 |= int(((device t21*)u24.m2[0].x)->m4[0u][0] - 6.0); + v8 |= int(((device t21*)u24.m2[0].x)->m4[1u][0] - 8.0); + v8 |= int(((device t21*)u24.m2[0].x)->m4[0u][1] - 7.0); + v8 |= int(((device t21*)u24.m2[0].x)->m4[1u][1] - 9.0); + v8 |= (((device t21*)u24.m2[u35.m0[1]].x)->m0[0].x - 6); + v8 |= (((device t21*)u24.m2[u35.m0[1]].x)->m0[u35.m0[1]].x - 7); + v8 |= (((device t21*)u24.m2[u35.m0[1]].x)->m1 - 8); + v8 |= int(((device t21*)u24.m2[u35.m0[1]].x)->m4[0u][0] - 9.0); + v8 |= int(((device t21*)u24.m2[u35.m0[1]].x)->m4[1u][0] - 11.0); + v8 |= int(((device t21*)u24.m2[u35.m0[1]].x)->m4[0u][1] - 10.0); + v8 |= int(((device t21*)u24.m2[u35.m0[1]].x)->m4[1u][1] - 12.0); v8 |= (u24.m3->m0[0].x - 9); v8 |= (u24.m3->m0[u35.m0[1]].x - 10); v8 |= (u24.m3->m1 - 11); diff --git a/shaders-msl/comp/buffer_device_address-packed-vec-and-cast-to-and-from-uvec2.msl23.comp b/shaders-msl/comp/buffer_device_address-packed-vec-and-cast-to-and-from-uvec2.msl23.comp new file mode 100644 index 00000000..9212e04c --- /dev/null +++ b/shaders-msl/comp/buffer_device_address-packed-vec-and-cast-to-and-from-uvec2.msl23.comp @@ -0,0 +1,22 @@ +#version 450 +#extension GL_EXT_buffer_reference : require +#extension GL_EXT_buffer_reference_uvec2 : require + +layout(buffer_reference, buffer_reference_align = 4) buffer SSBO +{ + vec3 a1; // Will be 12-byte packed + float a2; +}; + +layout(push_constant) uniform UBO +{ + uvec2 b; +}; + +void main() +{ + SSBO(b).a1 = vec3(1.0, 2.0, 3.0); // uvec2 -> buff ref and assign to packed + uvec2 v2 = uvec2(SSBO(b + 32)); // uvec2 -> buff ref -> uvec2 + vec3 v3 = SSBO(v2).a1; // uvec2 -> buff ref and assign from packed + SSBO(v2).a1 = v3 + 1.0; // uvec2 -> buff ref and assign to packed +} diff --git a/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl2.comp b/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl23.comp similarity index 100% rename from shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl2.comp rename to shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl23.comp diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp index 80d40c0d..a9094174 100644 --- a/spirv_glsl.cpp +++ b/spirv_glsl.cpp @@ -8981,6 +8981,7 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice if (!is_literal) mod_flags &= ~ACCESS_CHAIN_INDEX_IS_LITERAL_BIT; access_chain_internal_append_index(expr, base, type, mod_flags, access_chain_is_arrayed, index); + check_physical_type_cast(expr, type, physical_type); }; for (uint32_t i = 0; i < count; i++) @@ -9313,6 +9314,10 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice return expr; } +void CompilerGLSL::check_physical_type_cast(std::string &, const SPIRType *, uint32_t) +{ +} + void CompilerGLSL::prepare_access_chain_for_scalar_access(std::string &, const SPIRType &, spv::StorageClass, bool &) { } diff --git a/spirv_glsl.hpp b/spirv_glsl.hpp index edb09fa6..dc693787 100644 --- a/spirv_glsl.hpp +++ b/spirv_glsl.hpp @@ -711,6 +711,7 @@ protected: spv::StorageClass get_expression_effective_storage_class(uint32_t ptr); virtual bool access_chain_needs_stage_io_builtin_translation(uint32_t base); + virtual void check_physical_type_cast(std::string &expr, const SPIRType *type, uint32_t physical_type); virtual void prepare_access_chain_for_scalar_access(std::string &expr, const SPIRType &type, spv::StorageClass storage, bool &is_packed); diff --git a/spirv_msl.cpp b/spirv_msl.cpp index 04cad54a..f849adff 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -1967,6 +1967,13 @@ void CompilerMSL::mark_packable_structs() mark_as_packable(type); } }); + + // Physical storage buffer pointers can appear outside of the context of a variable, if the address + // is calculated from a ulong or uvec2 and cast to a pointer, so check if they need to be packed too. + ir.for_each_typed_id([&](uint32_t, SPIRType &type) { + if (type.basetype == SPIRType::Struct && type.pointer && type.storage == StorageClassPhysicalStorageBuffer) + mark_as_packable(type); + }); } // If the specified type is a struct, it and any nested structs @@ -4325,8 +4332,17 @@ void CompilerMSL::ensure_member_packing_rules_msl(SPIRType &ib_type, uint32_t in auto physical_type = mbr_type; physical_type.vecsize = elems_per_stride; - if (!is_buff_ptr) - physical_type.parent_type = 0; + physical_type.parent_type = 0; + + // If this is a physical buffer pointer, replace type with a ulongn vector. + if (is_buff_ptr) + { + physical_type.width = 64; + physical_type.basetype = to_unsigned_basetype(physical_type.width); + physical_type.pointer = false; + physical_type.pointer_depth = false; + physical_type.forward_pointer = false; + } uint32_t type_id = ir.increase_bound_by(1); set(type_id, physical_type); @@ -7709,6 +7725,23 @@ void CompilerMSL::fix_up_interpolant_access_chain(const uint32_t *ops, uint32_t set_extended_decoration(ops[1], SPIRVCrossDecorationInterfaceMemberIndex, interface_index); } + +// If the physical type of a physical buffer pointer has been changed +// to a ulong or ulongn vector, add a cast back to the pointer type. +void CompilerMSL::check_physical_type_cast(std::string &expr, const SPIRType *type, uint32_t physical_type) +{ + auto *p_physical_type = maybe_get(physical_type); + if (p_physical_type && + p_physical_type->storage == StorageClassPhysicalStorageBuffer && + p_physical_type->basetype == to_unsigned_basetype(64)) + { + if (p_physical_type->vecsize > 1) + expr += ".x"; + + expr = join("((", type_to_glsl(*type), ")", expr, ")"); + } +} + // Override for MSL-specific syntax instructions void CompilerMSL::emit_instruction(const Instruction &instruction) { @@ -14362,18 +14395,31 @@ string CompilerMSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &in // size (eg. short shift right becomes int), which means chaining integer ops // together may introduce size variations that SPIR-V doesn't know about. if (same_size_cast && !integral_cast) - { return "as_type<" + type_to_glsl(out_type) + ">"; - } else - { return type_to_glsl(out_type); - } } -bool CompilerMSL::emit_complex_bitcast(uint32_t, uint32_t, uint32_t) +bool CompilerMSL::emit_complex_bitcast(uint32_t result_type, uint32_t id, uint32_t op0) { - return false; + auto &out_type = get(result_type); + auto &in_type = expression_type(op0); + bool uvec2_to_ptr = (in_type.basetype == SPIRType::UInt && in_type.vecsize == 2 && + out_type.pointer && out_type.storage == StorageClassPhysicalStorageBuffer); + bool ptr_to_uvec2 = (in_type.pointer && in_type.storage == StorageClassPhysicalStorageBuffer && + out_type.basetype == SPIRType::UInt && out_type.vecsize == 2); + string expr; + + // Casting between uvec2 and buffer storage pointer per GL_EXT_buffer_reference_uvec2 + if (uvec2_to_ptr) + expr = join("((", type_to_glsl(out_type), ")as_type(", to_unpacked_expression(op0), "))"); + else if (ptr_to_uvec2) + expr = join("as_type<", type_to_glsl(out_type), ">((uint64_t)", to_unpacked_expression(op0), ")"); + else + return false; + + emit_op(result_type, id, expr, should_forward(op0)); + return true; } // Returns an MSL string identifying the name of a SPIR-V builtin. @@ -15050,9 +15096,18 @@ uint32_t CompilerMSL::get_declared_type_size_msl(const SPIRType &type, bool is_p if (type.pointer && type.storage == StorageClassPhysicalStorageBuffer) { uint32_t type_size = 8 * (type.vecsize == 3 ? 4 : type.vecsize); + + // Work our way through potentially layered arrays, + // stopping when we hit a pointer that is not also an array. size_t dim_cnt = type.array.size(); - for (uint32_t dim_idx = 0; dim_idx < dim_cnt; dim_idx++) - type_size *= to_array_size_literal(type, dim_idx); + int32_t dim_idx = dim_cnt - 1; + auto *p_type = &type; + while (!type_is_pointer(*p_type) && dim_idx >= 0) + { + type_size *= to_array_size_literal(*p_type, dim_idx); + p_type = &get(p_type->parent_type); + dim_idx--; + } return type_size; } diff --git a/spirv_msl.hpp b/spirv_msl.hpp index b1abd12a..c0317c7a 100644 --- a/spirv_msl.hpp +++ b/spirv_msl.hpp @@ -986,6 +986,8 @@ protected: void prepare_access_chain_for_scalar_access(std::string &expr, const SPIRType &type, spv::StorageClass storage, bool &is_packed) override; void fix_up_interpolant_access_chain(const uint32_t *ops, uint32_t length); + void check_physical_type_cast(std::string &expr, const SPIRType *type, uint32_t physical_type) override; + bool emit_tessellation_access_chain(const uint32_t *ops, uint32_t length); bool emit_tessellation_io_load(uint32_t result_type, uint32_t id, uint32_t ptr); bool is_out_of_bounds_tessellation_level(uint32_t id_lhs); From 064a697b18c135514d08b580793da77fb37061ef Mon Sep 17 00:00:00 2001 From: Bill Hollings Date: Fri, 1 Jul 2022 17:09:35 -0400 Subject: [PATCH 4/4] MSL: Fix implicit conversion precision build error. --- spirv_msl.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/spirv_msl.cpp b/spirv_msl.cpp index f849adff..3b3d8a2b 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -15099,8 +15099,7 @@ uint32_t CompilerMSL::get_declared_type_size_msl(const SPIRType &type, bool is_p // Work our way through potentially layered arrays, // stopping when we hit a pointer that is not also an array. - size_t dim_cnt = type.array.size(); - int32_t dim_idx = dim_cnt - 1; + int32_t dim_idx = (int32_t)type.array.size() - 1; auto *p_type = &type; while (!type_is_pointer(*p_type) && dim_idx >= 0) {