diff --git a/reference/opt/shaders-msl/flatten/rowmajor.flatten.vert b/reference/opt/shaders-msl/flatten/rowmajor.flatten.vert new file mode 100644 index 00000000..3e0fcdbb --- /dev/null +++ b/reference/opt/shaders-msl/flatten/rowmajor.flatten.vert @@ -0,0 +1,29 @@ +#include +#include + +using namespace metal; + +struct UBO +{ + float4x4 uMVPR; + float4x4 uMVPC; + float2x4 uMVP; +}; + +struct main0_in +{ + float4 aVertex [[attribute(0)]]; +}; + +struct main0_out +{ + float4 gl_Position [[position]]; +}; + +vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _18 [[buffer(0)]]) +{ + main0_out out = {}; + out.gl_Position = (_18.uMVPR * in.aVertex) + (in.aVertex * _18.uMVPC); + return out; +} + diff --git a/reference/opt/shaders-msl/flatten/swizzle.flatten.vert b/reference/opt/shaders-msl/flatten/swizzle.flatten.vert index 1accb1d7..53fc21f9 100644 --- a/reference/opt/shaders-msl/flatten/swizzle.flatten.vert +++ b/reference/opt/shaders-msl/flatten/swizzle.flatten.vert @@ -38,8 +38,8 @@ vertex main0_out main0(constant UBO& _22 [[buffer(0)]]) out.gl_Position = float4(0.0); out.oA = _22.A; out.oB = float4(_22.B0, _22.B1); - out.oC = float4(_22.C0, _22.C1); - out.oD = float4(_22.D0, _22.D1); + out.oC = float4(_22.C0, _22.C1) + float4(_22.C1.xy, _22.C1.z, _22.C0); + out.oD = float4(_22.D0, _22.D1) + float4(float3(_22.D0).xy, float3(_22.D0).z, _22.D1); out.oE = float4(_22.E0, _22.E1, _22.E2, _22.E3); out.oF = float4(_22.F0, _22.F1, _22.F2); return out; diff --git a/reference/shaders-msl-no-opt/vert/functions_nested.vert b/reference/shaders-msl-no-opt/vert/functions_nested.vert new file mode 100644 index 00000000..f0c9d135 --- /dev/null +++ b/reference/shaders-msl-no-opt/vert/functions_nested.vert @@ -0,0 +1,190 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct attr_desc +{ + int type; + int attribute_size; + int starting_offset; + int stride; + int swap_bytes; + int is_volatile; +}; + +struct VertexBuffer +{ + float4x4 scale_offset_mat; + uint vertex_base_index; + int4 input_attributes[16]; +}; + +struct VertexConstantsBuffer +{ + float4 vc[16]; +}; + +constant float4 _295 = {}; + +struct main0_out +{ + float4 tc0 [[user(locn0)]]; + float4 back_color [[user(locn10)]]; + float4 gl_Position [[position]]; +}; + +attr_desc fetch_desc(thread const int& location, constant VertexBuffer& v_227) +{ + int attribute_flags = v_227.input_attributes[location].w; + attr_desc result; + result.type = v_227.input_attributes[location].x; + result.attribute_size = v_227.input_attributes[location].y; + result.starting_offset = v_227.input_attributes[location].z; + result.stride = attribute_flags & 255; + result.swap_bytes = (attribute_flags >> 8) & 1; + result.is_volatile = (attribute_flags >> 9) & 1; + return result; +} + +uint get_bits(thread const uint4& v, thread const int& swap) +{ + if (swap != 0) + { + return ((v.w | (v.z << uint(8))) | (v.y << uint(16))) | (v.x << uint(24)); + } + return ((v.x | (v.y << uint(8))) | (v.z << uint(16))) | (v.w << uint(24)); +} + +float4 fetch_attr(thread const attr_desc& desc, thread const int& vertex_id, thread const texture2d input_stream) +{ + float4 result = float4(0.0, 0.0, 0.0, 1.0); + bool reverse_order = false; + int first_byte = (vertex_id * desc.stride) + desc.starting_offset; + for (int n = 0; n < 4; n++) + { + if (n == desc.attribute_size) + { + break; + } + uint4 tmp; + switch (desc.type) + { + case 0: + { + int _131 = first_byte; + first_byte = _131 + 1; + tmp.x = input_stream.read(uint2(_131, 0)).x; + int _138 = first_byte; + first_byte = _138 + 1; + tmp.y = input_stream.read(uint2(_138, 0)).x; + uint4 param = tmp; + int param_1 = desc.swap_bytes; + result[n] = float(get_bits(param, param_1)); + break; + } + case 1: + { + int _156 = first_byte; + first_byte = _156 + 1; + tmp.x = input_stream.read(uint2(_156, 0)).x; + int _163 = first_byte; + first_byte = _163 + 1; + tmp.y = input_stream.read(uint2(_163, 0)).x; + int _170 = first_byte; + first_byte = _170 + 1; + tmp.z = input_stream.read(uint2(_170, 0)).x; + int _177 = first_byte; + first_byte = _177 + 1; + tmp.w = input_stream.read(uint2(_177, 0)).x; + uint4 param_2 = tmp; + int param_3 = desc.swap_bytes; + result[n] = as_type(get_bits(param_2, param_3)); + break; + } + case 2: + { + int _195 = first_byte; + first_byte = _195 + 1; + result[n] = float(input_stream.read(uint2(_195, 0)).x); + reverse_order = desc.swap_bytes != 0; + break; + } + } + } + float4 _209; + if (reverse_order) + { + _209 = result.wzyx; + } + else + { + _209 = result; + } + return _209; +} + +float4 read_location(thread const int& location, constant VertexBuffer& v_227, thread uint& gl_VertexIndex, thread texture2d buff_in_2, thread texture2d buff_in_1) +{ + int param = location; + attr_desc desc = fetch_desc(param, v_227); + int vertex_id = gl_VertexIndex - int(v_227.vertex_base_index); + if (desc.is_volatile != 0) + { + attr_desc param_1 = desc; + int param_2 = vertex_id; + return fetch_attr(param_1, param_2, buff_in_2); + } + else + { + attr_desc param_3 = desc; + int param_4 = vertex_id; + return fetch_attr(param_3, param_4, buff_in_1); + } +} + +void vs_adjust(thread float4& dst_reg0, thread float4& dst_reg1, thread float4& dst_reg7, constant VertexBuffer& v_227, thread uint& gl_VertexIndex, thread texture2d buff_in_2, thread texture2d buff_in_1, constant VertexConstantsBuffer& v_309) +{ + int param = 3; + float4 in_diff_color = read_location(param, v_227, gl_VertexIndex, buff_in_2, buff_in_1); + int param_1 = 0; + float4 in_pos = read_location(param_1, v_227, gl_VertexIndex, buff_in_2, buff_in_1); + int param_2 = 8; + float4 in_tc0 = read_location(param_2, v_227, gl_VertexIndex, buff_in_2, buff_in_1); + dst_reg1 = in_diff_color * v_309.vc[13]; + float4 tmp0; + tmp0.x = float4(dot(float4(in_pos.xyz, 1.0), v_309.vc[4])).x; + tmp0.y = float4(dot(float4(in_pos.xyz, 1.0), v_309.vc[5])).y; + tmp0.z = float4(dot(float4(in_pos.xyz, 1.0), v_309.vc[6])).z; + float4 tmp1; + float4 _359 = float4(in_tc0.xy.x, in_tc0.xy.y, tmp1.z, tmp1.w); + tmp1 = _359; + tmp1.z = v_309.vc[15].x; + dst_reg7.y = float4(dot(float4(tmp1.xyz, 1.0), v_309.vc[8])).y; + dst_reg7.x = float4(dot(float4(tmp1.xyz, 1.0), v_309.vc[7])).x; + dst_reg0.y = float4(dot(float4(tmp0.xyz, 1.0), v_309.vc[1])).y; + dst_reg0.x = float4(dot(float4(tmp0.xyz, 1.0), v_309.vc[0])).x; +} + +vertex main0_out main0(constant VertexBuffer& v_227 [[buffer(0)]], uint gl_VertexIndex [[vertex_id]], texture2d buff_in_2 [[texture(0)]], texture2d buff_in_1 [[texture(1)]], constant VertexConstantsBuffer& v_309 [[buffer(1)]]) +{ + main0_out out = {}; + float4 dst_reg0 = float4(0.0, 0.0, 0.0, 1.0); + float4 dst_reg1 = float4(0.0); + float4 dst_reg7 = float4(0.0); + float4 param = dst_reg0; + float4 param_1 = dst_reg1; + float4 param_2 = dst_reg7; + vs_adjust(param, param_1, param_2, v_227, gl_VertexIndex, buff_in_2, buff_in_1, v_309); + dst_reg0 = param; + dst_reg1 = param_1; + dst_reg7 = param_2; + out.gl_Position = dst_reg0; + out.back_color = dst_reg1; + out.tc0 = dst_reg7; + out.gl_Position *= v_227.scale_offset_mat; + return out; +} + diff --git a/reference/shaders-msl/flatten/rowmajor.flatten.vert b/reference/shaders-msl/flatten/rowmajor.flatten.vert new file mode 100644 index 00000000..3ea6d78b --- /dev/null +++ b/reference/shaders-msl/flatten/rowmajor.flatten.vert @@ -0,0 +1,38 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct UBO +{ + float4x4 uMVPR; + float4x4 uMVPC; + float2x4 uMVP; +}; + +struct main0_in +{ + float4 aVertex [[attribute(0)]]; +}; + +struct main0_out +{ + float4 gl_Position [[position]]; +}; + +// Implementation of a conversion of matrix content from RowMajor to ColumnMajor organization. +float2x4 spvConvertFromRowMajor2x4(float2x4 m) +{ + return float2x4(float4(m[0][0], m[0][2], m[1][0], m[1][2]), float4(m[0][1], m[0][3], m[1][1], m[1][3])); +} + +vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _18 [[buffer(0)]]) +{ + main0_out out = {}; + float2 v = in.aVertex * spvConvertFromRowMajor2x4(_18.uMVP); + out.gl_Position = (_18.uMVPR * in.aVertex) + (in.aVertex * _18.uMVPC); + return out; +} + diff --git a/reference/shaders-msl/flatten/swizzle.flatten.vert b/reference/shaders-msl/flatten/swizzle.flatten.vert index 1accb1d7..53fc21f9 100644 --- a/reference/shaders-msl/flatten/swizzle.flatten.vert +++ b/reference/shaders-msl/flatten/swizzle.flatten.vert @@ -38,8 +38,8 @@ vertex main0_out main0(constant UBO& _22 [[buffer(0)]]) out.gl_Position = float4(0.0); out.oA = _22.A; out.oB = float4(_22.B0, _22.B1); - out.oC = float4(_22.C0, _22.C1); - out.oD = float4(_22.D0, _22.D1); + out.oC = float4(_22.C0, _22.C1) + float4(_22.C1.xy, _22.C1.z, _22.C0); + out.oD = float4(_22.D0, _22.D1) + float4(float3(_22.D0).xy, float3(_22.D0).z, _22.D1); out.oE = float4(_22.E0, _22.E1, _22.E2, _22.E3); out.oF = float4(_22.F0, _22.F1, _22.F2); return out; diff --git a/shaders-msl-no-opt/vert/functions_nested.vert b/shaders-msl-no-opt/vert/functions_nested.vert new file mode 100644 index 00000000..2eec5ac5 --- /dev/null +++ b/shaders-msl-no-opt/vert/functions_nested.vert @@ -0,0 +1,132 @@ +#version 450 +#extension GL_ARB_separate_shader_objects : enable + +layout(std140, set = 0, binding = 0) uniform VertexBuffer +{ + mat4 scale_offset_mat; + uint vertex_base_index; + ivec4 input_attributes[16]; +}; +layout(set=0, binding=3) uniform usamplerBuffer buff_in_1; +layout(set=0, binding=4) uniform usamplerBuffer buff_in_2; + +layout(location=10) out vec4 back_color; +layout(location=0) out vec4 tc0; + +layout(std140, set=0, binding = 1) uniform VertexConstantsBuffer +{ + vec4 vc[16]; +}; + +struct attr_desc +{ + int type; + int attribute_size; + int starting_offset; + int stride; + int swap_bytes; + int is_volatile; +}; + +uint get_bits(uvec4 v, int swap) +{ + if (swap != 0) return (v.w | v.z << 8 | v.y << 16 | v.x << 24); + return (v.x | v.y << 8 | v.z << 16 | v.w << 24); +} + +vec4 fetch_attr(attr_desc desc, int vertex_id, usamplerBuffer input_stream) +{ + vec4 result = vec4(0.0f, 0.0f, 0.0f, 1.0f); + uvec4 tmp; + uint bits; + bool reverse_order = false; + + int first_byte = (vertex_id * desc.stride) + desc.starting_offset; + for (int n = 0; n < 4; n++) + { + if (n == desc.attribute_size) break; + + switch (desc.type) + { + case 0: + //signed normalized 16-bit + tmp.x = texelFetch(input_stream, first_byte++).x; + tmp.y = texelFetch(input_stream, first_byte++).x; + result[n] = get_bits(tmp, desc.swap_bytes); + break; + case 1: + //float + tmp.x = texelFetch(input_stream, first_byte++).x; + tmp.y = texelFetch(input_stream, first_byte++).x; + tmp.z = texelFetch(input_stream, first_byte++).x; + tmp.w = texelFetch(input_stream, first_byte++).x; + result[n] = uintBitsToFloat(get_bits(tmp, desc.swap_bytes)); + break; + case 2: + //unsigned byte + result[n] = texelFetch(input_stream, first_byte++).x; + reverse_order = (desc.swap_bytes != 0); + break; + } + } + + return (reverse_order)? result.wzyx: result; +} + +attr_desc fetch_desc(int location) +{ + attr_desc result; + int attribute_flags = input_attributes[location].w; + result.type = input_attributes[location].x; + result.attribute_size = input_attributes[location].y; + result.starting_offset = input_attributes[location].z; + result.stride = attribute_flags & 0xFF; + result.swap_bytes = (attribute_flags >> 8) & 0x1; + result.is_volatile = (attribute_flags >> 9) & 0x1; + return result; +} + +vec4 read_location(int location) +{ + attr_desc desc = fetch_desc(location); + + int vertex_id = gl_VertexIndex - int(vertex_base_index); + if (desc.is_volatile != 0) + return fetch_attr(desc, vertex_id, buff_in_2); + else + return fetch_attr(desc, vertex_id, buff_in_1); +} + +void vs_adjust(inout vec4 dst_reg0, inout vec4 dst_reg1, inout vec4 dst_reg7) +{ + vec4 tmp0; + vec4 tmp1; + vec4 in_diff_color= read_location(3); + vec4 in_pos= read_location(0); + vec4 in_tc0= read_location(8); + dst_reg1 = (in_diff_color * vc[13]); + tmp0.x = vec4(dot(vec4(in_pos.xyzx.xyz, 1.0), vc[4])).x; + tmp0.y = vec4(dot(vec4(in_pos.xyzx.xyz, 1.0), vc[5])).y; + tmp0.z = vec4(dot(vec4(in_pos.xyzx.xyz, 1.0), vc[6])).z; + tmp1.xy = in_tc0.xyxx.xy; + tmp1.z = vc[15].xxxx.z; + dst_reg7.y = vec4(dot(vec4(tmp1.xyzx.xyz, 1.0), vc[8])).y; + dst_reg7.x = vec4(dot(vec4(tmp1.xyzx.xyz, 1.0), vc[7])).x; + dst_reg0.y = vec4(dot(vec4(tmp0.xyzx.xyz, 1.0), vc[1])).y; + dst_reg0.x = vec4(dot(vec4(tmp0.xyzx.xyz, 1.0), vc[0])).x; +} + +void main () +{ + vec4 dst_reg0= vec4(0.0f, 0.0f, 0.0f, 1.0f); + vec4 dst_reg1= vec4(0.0, 0.0, 0.0, 0.0); + vec4 dst_reg7= vec4(0.0, 0.0, 0.0, 0.0); + + vs_adjust(dst_reg0, dst_reg1, dst_reg7); + + gl_Position = dst_reg0; + back_color = dst_reg1; + tc0 = dst_reg7; + gl_Position = gl_Position * scale_offset_mat; +} + diff --git a/shaders-msl/flatten/rowmajor.flatten.vert b/shaders-msl/flatten/rowmajor.flatten.vert new file mode 100644 index 00000000..88c468c8 --- /dev/null +++ b/shaders-msl/flatten/rowmajor.flatten.vert @@ -0,0 +1,16 @@ +#version 310 es + +layout(std140) uniform UBO +{ + layout(column_major) mat4 uMVPR; + layout(row_major) mat4 uMVPC; + layout(row_major) mat2x4 uMVP; +}; + +layout(location = 0) in vec4 aVertex; + +void main() +{ + vec2 v = aVertex * uMVP; + gl_Position = uMVPR * aVertex + uMVPC * aVertex; +} diff --git a/shaders-msl/flatten/swizzle.flatten.vert b/shaders-msl/flatten/swizzle.flatten.vert index b4d9655d..e310cdf3 100644 --- a/shaders-msl/flatten/swizzle.flatten.vert +++ b/shaders-msl/flatten/swizzle.flatten.vert @@ -40,8 +40,8 @@ void main() oA = A; oB = vec4(B0, B1); - oC = vec4(C0, C1); - oD = vec4(D0, D1); + oC = vec4(C0, C1) + vec4(C1.xy, C1.z, C0); // not packed + oD = vec4(D0, D1) + vec4(D0.xy, D0.z, D1); // packed - must convert for swizzle oE = vec4(E0, E1, E2, E3); oF = vec4(F0, F1, F2); } diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp index 74120d00..be7c38cc 100644 --- a/spirv_glsl.cpp +++ b/spirv_glsl.cpp @@ -2237,7 +2237,7 @@ string CompilerGLSL::to_expression(uint32_t id) if (e.base_expression) return to_enclosed_expression(e.base_expression) + e.expression; else if (e.need_transpose) - return convert_row_major_matrix(e.expression); + return convert_row_major_matrix(e.expression, get(e.expression_type)); else return e.expression; } @@ -4252,7 +4252,8 @@ const char *CompilerGLSL::index_to_swizzle(uint32_t index) } string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indices, uint32_t count, - bool index_is_literal, bool chain_only, bool *need_transpose) + bool index_is_literal, bool chain_only, bool *need_transpose, + bool *result_is_packed) { string expr; if (!chain_only) @@ -4411,7 +4412,7 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice { if (row_major_matrix_needs_conversion) { - expr = convert_row_major_matrix(expr); + expr = convert_row_major_matrix(expr, *type); row_major_matrix_needs_conversion = false; } @@ -4429,7 +4430,10 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice else if (type->vecsize > 1) { if (vector_is_packed) + { expr = unpack_expression_type(expr, *type); + vector_is_packed = false; + } if (index_is_literal) { @@ -4465,6 +4469,10 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice if (need_transpose) *need_transpose = row_major_matrix_needs_conversion; + + if (result_is_packed) + *result_is_packed = vector_is_packed; + return expr; } @@ -4474,7 +4482,7 @@ string CompilerGLSL::to_flattened_struct_member(const SPIRType &type, uint32_t i } string CompilerGLSL::access_chain(uint32_t base, const uint32_t *indices, uint32_t count, const SPIRType &target_type, - bool *out_need_transpose) + bool *out_need_transpose, bool *result_is_packed) { if (flattened_buffer_blocks.count(base)) { @@ -4484,6 +4492,8 @@ string CompilerGLSL::access_chain(uint32_t base, const uint32_t *indices, uint32 if (out_need_transpose) *out_need_transpose = target_type.columns > 1 && need_transpose; + if (result_is_packed) + *result_is_packed = false; return flattened_access_chain(base, indices, count, target_type, 0, matrix_stride, need_transpose); } @@ -4493,11 +4503,13 @@ string CompilerGLSL::access_chain(uint32_t base, const uint32_t *indices, uint32 auto &type = get(get(base).basetype); if (out_need_transpose) *out_need_transpose = false; + if (result_is_packed) + *result_is_packed = false; return sanitize_underscores(join(to_name(type.self), "_", chain)); } else { - return access_chain_internal(base, indices, count, false, false, out_need_transpose); + return access_chain_internal(base, indices, count, false, false, out_need_transpose, result_is_packed); } } @@ -4590,7 +4602,7 @@ std::string CompilerGLSL::flattened_access_chain_struct(uint32_t base, const uin // Cannot forward transpositions, so resolve them here. if (need_transpose) - expr += convert_row_major_matrix(tmp); + expr += convert_row_major_matrix(tmp, member_type); else expr += tmp; } @@ -5080,7 +5092,7 @@ string CompilerGLSL::build_composite_combiner(const uint32_t *elems, uint32_t le { // We'll likely end up with duplicated swizzles, e.g. // foobar.xyz.xyz from patterns like - // OpVectorSwizzle + // OpVectorShuffle // OpCompositeExtract x 3 // OpCompositeConstruct 3x + other scalar. // Just modify op in-place. @@ -5240,6 +5252,11 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) auto &e = emit_op(result_type, id, expr, forward, true); e.need_transpose = need_transpose; register_read(id, ptr, forward); + + // Pass through whether the result is of a packed type. + if (has_decoration(ptr, DecorationCPacked)) + set_decoration(id, DecorationCPacked); + break; } @@ -5252,11 +5269,18 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) // If the base is immutable, the access chain pointer must also be. // If an expression is mutable and forwardable, we speculate that it is immutable. - bool need_transpose; - auto e = access_chain(ops[2], &ops[3], length - 3, get(ops[0]), &need_transpose); + bool need_transpose, result_is_packed; + auto e = access_chain(ops[2], &ops[3], length - 3, get(ops[0]), &need_transpose, &result_is_packed); auto &expr = set(ops[1], move(e), ops[0], should_forward(ops[2])); expr.loaded_from = ops[2]; expr.need_transpose = need_transpose; + + // Mark the result as being packed. Some platforms handled packed vectors differently than non-packed. + if (result_is_packed) + set_decoration(ops[1], DecorationCPacked); + else + unset_decoration(ops[1], DecorationCPacked); + break; } @@ -5635,11 +5659,13 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) shuffle = true; string expr; - bool trivial_forward; + bool should_fwd, trivial_forward; if (shuffle) { - trivial_forward = !expression_is_forwarded(vec0) && !expression_is_forwarded(vec1); + bool allow_fwd = !backend.force_temp_use_for_two_vector_shuffles; + should_fwd = allow_fwd && should_forward(vec0) && should_forward(vec1); + trivial_forward = allow_fwd && !expression_is_forwarded(vec0) && !expression_is_forwarded(vec1); // Constructor style and shuffling from two different vectors. vector args; @@ -5654,13 +5680,19 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) } else { + should_fwd = should_forward(vec0); trivial_forward = !expression_is_forwarded(vec0); // We only source from first vector, so can use swizzle. + // If the vector is packed, unpack it before applying a swizzle (needed for MSL) expr += to_enclosed_expression(vec0); + if (has_decoration(vec0, DecorationCPacked)) + expr = unpack_expression_type(expr, expression_type(vec0)); + expr += "."; for (uint32_t i = 0; i < length; i++) expr += index_to_swizzle(elems[i]); + if (backend.swizzle_is_function && length > 1) expr += "()"; } @@ -5668,7 +5700,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) // A shuffle is trivial in that it doesn't actually *do* anything. // We inherit the forwardedness from our arguments to avoid flushing out to temporaries when it's not really needed. - emit_op(result_type, id, expr, should_forward(vec0) && should_forward(vec1), trivial_forward); + emit_op(result_type, id, expr, should_fwd, trivial_forward); break; } @@ -6167,8 +6199,8 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) register_read(ops[1], ops[2], should_forward(ops[2])); break; - // OpAtomicStore unimplemented. Not sure what would use that. - // OpAtomicLoad seems to only be relevant for atomic counters. + // OpAtomicStore unimplemented. Not sure what would use that. + // OpAtomicLoad seems to only be relevant for atomic counters. case OpAtomicIIncrement: forced_temporaries.insert(ops[1]); @@ -6905,7 +6937,7 @@ bool CompilerGLSL::member_is_packed_type(const SPIRType &type, uint32_t index) c // row_major matrix result of the expression to a column_major matrix. // Base implementation uses the standard library transpose() function. // Subclasses may override to use a different function. -string CompilerGLSL::convert_row_major_matrix(string exp_str) +string CompilerGLSL::convert_row_major_matrix(string exp_str, const SPIRType & /*exp_type*/) { strip_enclosed_expression(exp_str); return join("transpose(", exp_str, ")"); diff --git a/spirv_glsl.hpp b/spirv_glsl.hpp index aad1527a..67076272 100644 --- a/spirv_glsl.hpp +++ b/spirv_glsl.hpp @@ -290,10 +290,10 @@ protected: void add_resource_name(uint32_t id); void add_member_name(SPIRType &type, uint32_t name); - bool is_non_native_row_major_matrix(uint32_t id); - bool member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index); + virtual bool is_non_native_row_major_matrix(uint32_t id); + virtual bool member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index); bool member_is_packed_type(const SPIRType &type, uint32_t index) const; - virtual std::string convert_row_major_matrix(std::string exp_str); + virtual std::string convert_row_major_matrix(std::string exp_str, const SPIRType &exp_type); std::unordered_set local_variable_names; std::unordered_set resource_names; @@ -321,6 +321,8 @@ protected: bool boolean_mix_support = true; bool allow_precision_qualifiers = false; bool can_swizzle_scalar = false; + bool force_temp_use_for_two_vector_shuffles = false; + } backend; void emit_struct(SPIRType &type); @@ -371,9 +373,10 @@ protected: SPIRExpression &emit_op(uint32_t result_type, uint32_t result_id, const std::string &rhs, bool forward_rhs, bool suppress_usage_tracking = false); std::string access_chain_internal(uint32_t base, const uint32_t *indices, uint32_t count, bool index_is_literal, - bool chain_only = false, bool *need_transpose = nullptr); + bool chain_only = false, bool *need_transpose = nullptr, + bool *result_is_packed = nullptr); std::string access_chain(uint32_t base, const uint32_t *indices, uint32_t count, const SPIRType &target_type, - bool *need_transpose = nullptr); + bool *need_transpose = nullptr, bool *result_is_packed = nullptr); std::string flattened_access_chain(uint32_t base, const uint32_t *indices, uint32_t count, const SPIRType &target_type, uint32_t offset, uint32_t matrix_stride, diff --git a/spirv_msl.cpp b/spirv_msl.cpp index 087c4150..8e8a8db7 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -57,6 +57,21 @@ string CompilerMSL::compile() // Force a classic "C" locale, reverts when function returns ClassicLocale classic_locale; + // Do not deal with GLES-isms like precision, older extensions and such. + CompilerGLSL::options.vulkan_semantics = true; + CompilerGLSL::options.es = false; + CompilerGLSL::options.version = 120; + backend.float_literal_suffix = false; + backend.uint32_t_literal_suffix = true; + backend.basic_int_type = "int"; + backend.basic_uint_type = "uint"; + backend.discard_literal = "discard_fragment()"; + backend.swizzle_is_function = false; + backend.shared_is_implied = false; + backend.native_row_major_matrix = false; + backend.flexible_member_array_supported = false; + backend.force_temp_use_for_two_vector_shuffles = true; + replace_illegal_names(); non_stage_in_input_var_ids.clear(); @@ -88,20 +103,6 @@ string CompilerMSL::compile() if (options.resolve_specialized_array_lengths) resolve_specialized_array_lengths(); - // Do not deal with GLES-isms like precision, older extensions and such. - CompilerGLSL::options.vulkan_semantics = true; - CompilerGLSL::options.es = false; - CompilerGLSL::options.version = 120; - backend.float_literal_suffix = false; - backend.uint32_t_literal_suffix = true; - backend.basic_int_type = "int"; - backend.basic_uint_type = "uint"; - backend.discard_literal = "discard_fragment()"; - backend.swizzle_is_function = false; - backend.shared_is_implied = false; - backend.native_row_major_matrix = false; - backend.flexible_member_array_supported = false; - uint32_t pass_count = 0; do { @@ -279,6 +280,15 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std:: } case OpFunctionCall: { + // First see if any of the function call args are globals + for (uint32_t arg_idx = 3; arg_idx < i.length; arg_idx++) + { + uint32_t arg_id = ops[arg_idx]; + if (global_var_ids.find(arg_id) != global_var_ids.end()) + added_arg_ids.insert(arg_id); + } + + // Then recurse into the function itself to extract globals used internally in the function uint32_t inner_func_id = ops[2]; std::set inner_func_args; extract_global_variables_from_function(inner_func_id, inner_func_args, global_var_ids, @@ -306,12 +316,10 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std:: func.add_parameter(type_id, next_id, true); set(next_id, type_id, StorageClassFunction, 0, arg_id); - // Ensure both the existing and new variables have the same name, and the name is valid - string vld_name = ensure_valid_name(to_name(arg_id), "v"); - set_name(arg_id, vld_name); - set_name(next_id, vld_name); + // Ensure the existing variable has a valid name and the new variable has all the same meta info + set_name(arg_id, ensure_valid_name(to_name(arg_id), "v")); + meta[next_id] = meta[arg_id]; - meta[next_id].decoration.qualified_alias = meta[arg_id].decoration.qualified_alias; next_id++; } } @@ -811,8 +819,8 @@ string CompilerMSL::unpack_expression_type(string expr_str, const SPIRType &type // Emits the file header info void CompilerMSL::emit_header() { - for (auto &header : pragma_lines) - statement(header); + for (auto &pragma : pragma_lines) + statement(pragma); if (!pragma_lines.empty()) statement(""); @@ -830,7 +838,7 @@ void CompilerMSL::emit_header() void CompilerMSL::add_pragma_line(const string &line) { - pragma_lines.push_back(line); + pragma_lines.insert(line); } // Emits any needed custom function bodies. @@ -1041,6 +1049,64 @@ void CompilerMSL::emit_custom_functions() statement(""); break; + case SPVFuncImplRowMajor2x3: + statement("// Implementation of a conversion of matrix content from RowMajor to ColumnMajor organization."); + statement("float2x3 spvConvertFromRowMajor2x3(float2x3 m)"); + begin_scope(); + statement("return float2x3(float3(m[0][0], m[0][2], m[1][1]), float3(m[0][1], m[1][0], m[1][2]));"); + end_scope(); + statement(""); + break; + + case SPVFuncImplRowMajor2x4: + statement("// Implementation of a conversion of matrix content from RowMajor to ColumnMajor organization."); + statement("float2x4 spvConvertFromRowMajor2x4(float2x4 m)"); + begin_scope(); + statement("return float2x4(float4(m[0][0], m[0][2], m[1][0], m[1][2]), float4(m[0][1], m[0][3], m[1][1], " + "m[1][3]));"); + end_scope(); + statement(""); + break; + + case SPVFuncImplRowMajor3x2: + statement("// Implementation of a conversion of matrix content from RowMajor to ColumnMajor organization."); + statement("float3x2 spvConvertFromRowMajor3x2(float3x2 m)"); + begin_scope(); + statement("return float3x2(float2(m[0][0], m[1][1]), float2(m[0][1], m[2][0]), float2(m[1][0], m[2][1]));"); + end_scope(); + statement(""); + break; + + case SPVFuncImplRowMajor3x4: + statement("// Implementation of a conversion of matrix content from RowMajor to ColumnMajor organization."); + statement("float3x4 spvConvertFromRowMajor3x4(float3x4 m)"); + begin_scope(); + statement("return float3x4(float4(m[0][0], m[0][3], m[1][2], m[2][1]), float4(m[0][1], m[1][0], m[1][3], " + "m[2][2]), float4(m[0][2], m[1][1], m[2][0], m[2][3]));"); + end_scope(); + statement(""); + break; + + case SPVFuncImplRowMajor4x2: + statement("// Implementation of a conversion of matrix content from RowMajor to ColumnMajor organization."); + statement("float4x2 spvConvertFromRowMajor4x2(float4x2 m)"); + begin_scope(); + statement("return float4x2(float2(m[0][0], m[2][0]), float2(m[0][1], m[2][1]), float2(m[1][0], m[3][0]), " + "float2(m[1][1], m[3][1]));"); + end_scope(); + statement(""); + break; + + case SPVFuncImplRowMajor4x3: + statement("// Implementation of a conversion of matrix content from RowMajor to ColumnMajor organization."); + statement("float4x3 spvConvertFromRowMajor4x3(float4x3 m)"); + begin_scope(); + statement("return float4x3(float3(m[0][0], m[1][1], m[2][2]), float3(m[0][1], m[1][2], m[3][0]), " + "float3(m[0][2], m[2][0], m[3][1]), float3(m[1][0], m[2][1], m[3][2]));"); + end_scope(); + statement(""); + break; + default: break; } @@ -1541,6 +1607,24 @@ void CompilerMSL::emit_instruction(const Instruction &instruction) emit_barrier(ops[0], ops[1], ops[2]); break; + case OpVectorTimesMatrix: + case OpMatrixTimesVector: + { + // If the matrix needs transpose and it is square, just flip the multiply order. + uint32_t mtx_id = ops[opcode == OpMatrixTimesVector ? 2 : 3]; + auto *e = maybe_get(mtx_id); + auto &t = expression_type(mtx_id); + if (e && e->need_transpose && t.columns == t.vecsize) + { + e->need_transpose = false; + emit_binary_op(ops[0], ops[1], ops[3], ops[2], "*"); + e->need_transpose = true; + } + else + BOP(*); + break; + } + // OpOuterProduct default: @@ -1559,34 +1643,18 @@ void CompilerMSL::emit_barrier(uint32_t id_exe_scope, uint32_t id_mem_scope, uin string bar_stmt = "threadgroup_barrier(mem_flags::"; uint32_t mem_sem = id_mem_sem ? get(id_mem_sem).scalar() : uint32_t(MemorySemanticsMaskNone); - switch (mem_sem) - { - case MemorySemanticsCrossWorkgroupMemoryMask: + + if (mem_sem & MemorySemanticsCrossWorkgroupMemoryMask) bar_stmt += "mem_device"; - break; - - case MemorySemanticsSubgroupMemoryMask: - case MemorySemanticsWorkgroupMemoryMask: - case MemorySemanticsAtomicCounterMemoryMask: + else if (mem_sem & (MemorySemanticsSubgroupMemoryMask | MemorySemanticsWorkgroupMemoryMask | + MemorySemanticsAtomicCounterMemoryMask)) bar_stmt += "mem_threadgroup"; - break; - - case MemorySemanticsImageMemoryMask: + else if (mem_sem & MemorySemanticsImageMemoryMask) bar_stmt += "mem_texture"; - break; - - case MemorySemanticsAcquireMask: - case MemorySemanticsReleaseMask: - case MemorySemanticsAcquireReleaseMask: - case MemorySemanticsSequentiallyConsistentMask: - case MemorySemanticsUniformMemoryMask: - case MemorySemanticsMaskNone: - default: + else bar_stmt += "mem_none"; - break; - } - if (options.supports_msl_version(2)) + if (options.is_ios() && options.supports_msl_version(2)) { bar_stmt += ", "; @@ -1919,7 +1987,7 @@ void CompilerMSL::emit_function_prototype(SPIRFunction &func, uint64_t) // Manufacture automatic sampler arg for SampledImage texture auto &arg_type = get(arg.type); - if (arg_type.basetype == SPIRType::SampledImage) + if (arg_type.basetype == SPIRType::SampledImage && arg_type.image.dim != DimBuffer) decl += ", thread const sampler& " + to_sampler_expression(arg.id); if (&arg != &func.arguments.back()) @@ -2213,7 +2281,7 @@ string CompilerMSL::to_func_call_arg(uint32_t id) { auto &var = id_v.get(); auto &type = get(var.basetype); - if (type.basetype == SPIRType::SampledImage) + if (type.basetype == SPIRType::SampledImage && type.image.dim != DimBuffer) arg_str += ", " + to_sampler_expression(id); } @@ -2229,6 +2297,84 @@ string CompilerMSL::to_sampler_expression(uint32_t id) return samp_id ? to_expression(samp_id) : to_expression(id) + sampler_name_suffix; } +// Checks whether the ID is a row_major matrix that requires conversion before use +bool CompilerMSL::is_non_native_row_major_matrix(uint32_t id) +{ + // Natively supported row-major matrices do not need to be converted. + if (backend.native_row_major_matrix) + return false; + + // Non-matrix or column-major matrix types do not need to be converted. + if (!(meta[id].decoration.decoration_flags & (1ull << DecorationRowMajor))) + return false; + + // Generate a function that will swap matrix elements from row-major to column-major. + const auto type = expression_type(id); + add_convert_row_major_matrix_function(type.columns, type.vecsize); + return true; +} + +// Checks whether the member is a row_major matrix that requires conversion before use +bool CompilerMSL::member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index) +{ + // Natively supported row-major matrices do not need to be converted. + if (backend.native_row_major_matrix) + return false; + + // Non-matrix or column-major matrix types do not need to be converted. + if (!(combined_decoration_for_member(type, index) & (1ull << DecorationRowMajor))) + return false; + + // Generate a function that will swap matrix elements from row-major to column-major. + const auto mbr_type = get(type.member_types[index]); + add_convert_row_major_matrix_function(mbr_type.columns, mbr_type.vecsize); + return true; +} + +// Adds a function suitable for converting a non-square row-major matrix to a column-major matrix. +void CompilerMSL::add_convert_row_major_matrix_function(uint32_t cols, uint32_t rows) +{ + SPVFuncImpl spv_func; + if (cols == rows) // Square matrix...just use transpose() function + return; + else if (cols == 2 && rows == 3) + spv_func = SPVFuncImplRowMajor2x3; + else if (cols == 2 && rows == 4) + spv_func = SPVFuncImplRowMajor2x4; + else if (cols == 3 && rows == 2) + spv_func = SPVFuncImplRowMajor3x2; + else if (cols == 3 && rows == 4) + spv_func = SPVFuncImplRowMajor3x4; + else if (cols == 4 && rows == 2) + spv_func = SPVFuncImplRowMajor4x2; + else if (cols == 4 && rows == 3) + spv_func = SPVFuncImplRowMajor4x3; + else + SPIRV_CROSS_THROW("Could not convert row-major matrix."); + + auto rslt = spv_function_implementations.insert(spv_func); + if (rslt.second) + { + add_pragma_line("#pragma clang diagnostic ignored \"-Wmissing-prototypes\""); + force_recompile = true; + } +} + +// Wraps the expression string in a function call that converts the +// row_major matrix result of the expression to a column_major matrix. +string CompilerMSL::convert_row_major_matrix(string exp_str, const SPIRType &exp_type) +{ + strip_enclosed_expression(exp_str); + + string func_name; + if (exp_type.columns == exp_type.vecsize) + func_name = "transpose"; + else + func_name = string("spvConvertFromRowMajor") + to_string(exp_type.columns) + "x" + to_string(exp_type.vecsize); + + return join(func_name, "(", exp_str, ")"); +} + // Called automatically at the end of the entry point function void CompilerMSL::emit_fixup() { @@ -2237,10 +2383,8 @@ void CompilerMSL::emit_fixup() if ((execution.model == ExecutionModelVertex) && stage_out_var_id && !qual_pos_var_name.empty()) { if (CompilerGLSL::options.vertex.fixup_clipspace) - { statement(qual_pos_var_name, ".z = (", qual_pos_var_name, ".z + ", qual_pos_var_name, ".w) * 0.5; // Adjust clip-space for Metal"); - } if (CompilerGLSL::options.vertex.flip_vert_y) statement(qual_pos_var_name, ".y = -(", qual_pos_var_name, ".y);", " // Invert Y-axis for Metal"); @@ -2599,6 +2743,7 @@ string CompilerMSL::entry_point_args(bool append_comma) { if (!ep_args.empty()) ep_args += ", "; + BuiltIn bi_type = meta[var_id].decoration.builtin_type; ep_args += builtin_type_decl(bi_type) + " " + to_expression(var_id); ep_args += " [[" + builtin_qualifier(bi_type) + "]]"; @@ -2682,7 +2827,10 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg) if (constref) decl += "const "; - decl += type_to_glsl(type, arg.id); + if (is_builtin_variable(var)) + decl += builtin_type_decl((BuiltIn)get_decoration(arg.id, DecorationBuiltIn)); + else + decl += type_to_glsl(type, arg.id); if (is_array(type)) decl += "*"; @@ -3298,9 +3446,35 @@ bool CompilerMSL::OpCodePreprocessor::handle(Op opcode, const uint32_t *args, ui break; } - // Keep track of the instruction return types, mapped by ID - if (length > 1) - result_types[args[1]] = args[0]; + // If it has one, keep track of the instruction's result type, mapped by ID + switch (opcode) + { + case OpStore: + case OpCopyMemory: + case OpCopyMemorySized: + case OpImageWrite: + case OpAtomicStore: + case OpAtomicFlagClear: + case OpEmitStreamVertex: + case OpEndStreamPrimitive: + case OpControlBarrier: + case OpMemoryBarrier: + case OpGroupWaitEvents: + case OpRetainEvent: + case OpReleaseEvent: + case OpSetUserEventStatus: + case OpCaptureEventProfilingInfo: + case OpCommitReadPipe: + case OpCommitWritePipe: + case OpGroupCommitReadPipe: + case OpGroupCommitWritePipe: + break; + + default: + if (length > 1) + result_types[args[1]] = args[0]; + break; + } return true; } diff --git a/spirv_msl.hpp b/spirv_msl.hpp index a7082ae3..fbc8dadf 100644 --- a/spirv_msl.hpp +++ b/spirv_msl.hpp @@ -77,10 +77,26 @@ public: // Options for compiling to Metal Shading Language struct Options { + typedef enum { + iOS, + macOS, + } Platform; + + Platform platform = macOS; uint32_t msl_version = make_msl_version(1, 2); bool enable_point_size_builtin = true; bool resolve_specialized_array_lengths = true; + bool is_ios() + { + return platform == iOS; + } + + bool is_macos() + { + return platform == macOS; + } + void set_msl_version(uint32_t major, uint32_t minor = 0, uint32_t patch = 0) { msl_version = make_msl_version(major, minor, patch); @@ -122,6 +138,12 @@ public: SPVFuncImplInverse2x2, SPVFuncImplInverse3x3, SPVFuncImplInverse4x4, + SPVFuncImplRowMajor2x3, + SPVFuncImplRowMajor2x4, + SPVFuncImplRowMajor3x2, + SPVFuncImplRowMajor3x4, + SPVFuncImplRowMajor4x2, + SPVFuncImplRowMajor4x3, }; // Constructs an instance to compile the SPIR-V code into Metal Shading Language, @@ -186,6 +208,9 @@ protected: std::string to_qualifiers_glsl(uint32_t id) override; void replace_illegal_names() override; void declare_undefined_values() override; + bool is_non_native_row_major_matrix(uint32_t id) override; + bool member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index) override; + std::string convert_row_major_matrix(std::string exp_str, const SPIRType &exp_type) override; void preprocess_op_codes(); void localize_global_variables(); @@ -207,6 +232,7 @@ protected: void emit_interface_block(uint32_t ib_var_id); bool maybe_emit_input_struct_assignment(uint32_t id_lhs, uint32_t id_rhs); bool maybe_emit_array_assignment(uint32_t id_lhs, uint32_t id_rhs); + void add_convert_row_major_matrix_function(uint32_t cols, uint32_t rows); std::string func_type_decl(SPIRType &type); std::string entry_point_args(bool append_comma); @@ -245,7 +271,7 @@ protected: std::unordered_map vtx_attrs_by_location; std::map non_stage_in_input_var_ids; std::unordered_map struct_member_padding; - std::vector pragma_lines; + std::set pragma_lines; std::vector resource_bindings; MSLResourceBinding next_metal_resource_index; uint32_t stage_in_var_id = 0; diff --git a/test_shaders.py b/test_shaders.py index b83bd052..1ca8f9af 100755 --- a/test_shaders.py +++ b/test_shaders.py @@ -358,11 +358,20 @@ def test_shader_msl(stats, shader, update, keep, opt): noopt = shader_is_noopt(shader[1]) spirv, msl = cross_compile_msl(joined_path, is_spirv, opt and (not noopt)) regression_check(shader, msl, update, keep, opt) - os.remove(spirv) + + # Uncomment the following line to print the temp SPIR-V file path. + # This temp SPIR-V file is not deleted until after the Metal validation step below. + # If Metal validation fails, the temp SPIR-V file can be copied out and + # used as input to an invocation of spirv-cross to debug from Xcode directly. + # To do so, build spriv-cross using `make DEBUG=1`, then run the spriv-cross + # executable from Xcode using args: `--msl --entry main --output msl_path spirv_path`. +# print('SPRIV shader: ' + spirv) if not force_no_external_validation: validate_shader_msl(shader, opt) + os.remove(spirv) + def test_shader_hlsl(stats, shader, update, keep, opt): joined_path = os.path.join(shader[0], shader[1]) print('Testing HLSL shader:', joined_path) diff --git a/test_shaders.sh b/test_shaders.sh index 0657cb5c..a3608730 100755 --- a/test_shaders.sh +++ b/test_shaders.sh @@ -11,6 +11,7 @@ echo "Using spirv-opt in: $(which spirv-opt)." ./test_shaders.py shaders --opt || exit 1 ./test_shaders.py shaders-msl --msl || exit 1 ./test_shaders.py shaders-msl --msl --opt || exit 1 +./test_shaders.py shaders-msl-no-opt --msl || exit 1 ./test_shaders.py shaders-hlsl --hlsl || exit 1 ./test_shaders.py shaders-hlsl --hlsl --opt || exit 1 diff --git a/update_test_shaders.sh b/update_test_shaders.sh index 69165847..712c3eec 100755 --- a/update_test_shaders.sh +++ b/update_test_shaders.sh @@ -11,6 +11,7 @@ echo "Using spirv-opt in: $(which spirv-opt)." ./test_shaders.py shaders --update --opt || exit 1 ./test_shaders.py shaders-msl --msl --update || exit 1 ./test_shaders.py shaders-msl --msl --update --opt || exit 1 +./test_shaders.py shaders-msl-no-opt --msl --update || exit 1 ./test_shaders.py shaders-hlsl --hlsl --update || exit 1 ./test_shaders.py shaders-hlsl --hlsl --update --opt || exit 1