Merge pull request #373 from billhollings/master
CompilerMSL enhancements PLUS refactoring of MSL tests to avoid optimization crashes
This commit is contained in:
commit
513ba86fc8
29
reference/opt/shaders-msl/flatten/rowmajor.flatten.vert
Normal file
29
reference/opt/shaders-msl/flatten/rowmajor.flatten.vert
Normal file
@ -0,0 +1,29 @@
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
struct UBO
|
||||
{
|
||||
float4x4 uMVPR;
|
||||
float4x4 uMVPC;
|
||||
float2x4 uMVP;
|
||||
};
|
||||
|
||||
struct main0_in
|
||||
{
|
||||
float4 aVertex [[attribute(0)]];
|
||||
};
|
||||
|
||||
struct main0_out
|
||||
{
|
||||
float4 gl_Position [[position]];
|
||||
};
|
||||
|
||||
vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _18 [[buffer(0)]])
|
||||
{
|
||||
main0_out out = {};
|
||||
out.gl_Position = (_18.uMVPR * in.aVertex) + (in.aVertex * _18.uMVPC);
|
||||
return out;
|
||||
}
|
||||
|
@ -38,8 +38,8 @@ vertex main0_out main0(constant UBO& _22 [[buffer(0)]])
|
||||
out.gl_Position = float4(0.0);
|
||||
out.oA = _22.A;
|
||||
out.oB = float4(_22.B0, _22.B1);
|
||||
out.oC = float4(_22.C0, _22.C1);
|
||||
out.oD = float4(_22.D0, _22.D1);
|
||||
out.oC = float4(_22.C0, _22.C1) + float4(_22.C1.xy, _22.C1.z, _22.C0);
|
||||
out.oD = float4(_22.D0, _22.D1) + float4(float3(_22.D0).xy, float3(_22.D0).z, _22.D1);
|
||||
out.oE = float4(_22.E0, _22.E1, _22.E2, _22.E3);
|
||||
out.oF = float4(_22.F0, _22.F1, _22.F2);
|
||||
return out;
|
||||
|
190
reference/shaders-msl-no-opt/vert/functions_nested.vert
Normal file
190
reference/shaders-msl-no-opt/vert/functions_nested.vert
Normal file
@ -0,0 +1,190 @@
|
||||
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
struct attr_desc
|
||||
{
|
||||
int type;
|
||||
int attribute_size;
|
||||
int starting_offset;
|
||||
int stride;
|
||||
int swap_bytes;
|
||||
int is_volatile;
|
||||
};
|
||||
|
||||
struct VertexBuffer
|
||||
{
|
||||
float4x4 scale_offset_mat;
|
||||
uint vertex_base_index;
|
||||
int4 input_attributes[16];
|
||||
};
|
||||
|
||||
struct VertexConstantsBuffer
|
||||
{
|
||||
float4 vc[16];
|
||||
};
|
||||
|
||||
constant float4 _295 = {};
|
||||
|
||||
struct main0_out
|
||||
{
|
||||
float4 tc0 [[user(locn0)]];
|
||||
float4 back_color [[user(locn10)]];
|
||||
float4 gl_Position [[position]];
|
||||
};
|
||||
|
||||
attr_desc fetch_desc(thread const int& location, constant VertexBuffer& v_227)
|
||||
{
|
||||
int attribute_flags = v_227.input_attributes[location].w;
|
||||
attr_desc result;
|
||||
result.type = v_227.input_attributes[location].x;
|
||||
result.attribute_size = v_227.input_attributes[location].y;
|
||||
result.starting_offset = v_227.input_attributes[location].z;
|
||||
result.stride = attribute_flags & 255;
|
||||
result.swap_bytes = (attribute_flags >> 8) & 1;
|
||||
result.is_volatile = (attribute_flags >> 9) & 1;
|
||||
return result;
|
||||
}
|
||||
|
||||
uint get_bits(thread const uint4& v, thread const int& swap)
|
||||
{
|
||||
if (swap != 0)
|
||||
{
|
||||
return ((v.w | (v.z << uint(8))) | (v.y << uint(16))) | (v.x << uint(24));
|
||||
}
|
||||
return ((v.x | (v.y << uint(8))) | (v.z << uint(16))) | (v.w << uint(24));
|
||||
}
|
||||
|
||||
float4 fetch_attr(thread const attr_desc& desc, thread const int& vertex_id, thread const texture2d<uint> input_stream)
|
||||
{
|
||||
float4 result = float4(0.0, 0.0, 0.0, 1.0);
|
||||
bool reverse_order = false;
|
||||
int first_byte = (vertex_id * desc.stride) + desc.starting_offset;
|
||||
for (int n = 0; n < 4; n++)
|
||||
{
|
||||
if (n == desc.attribute_size)
|
||||
{
|
||||
break;
|
||||
}
|
||||
uint4 tmp;
|
||||
switch (desc.type)
|
||||
{
|
||||
case 0:
|
||||
{
|
||||
int _131 = first_byte;
|
||||
first_byte = _131 + 1;
|
||||
tmp.x = input_stream.read(uint2(_131, 0)).x;
|
||||
int _138 = first_byte;
|
||||
first_byte = _138 + 1;
|
||||
tmp.y = input_stream.read(uint2(_138, 0)).x;
|
||||
uint4 param = tmp;
|
||||
int param_1 = desc.swap_bytes;
|
||||
result[n] = float(get_bits(param, param_1));
|
||||
break;
|
||||
}
|
||||
case 1:
|
||||
{
|
||||
int _156 = first_byte;
|
||||
first_byte = _156 + 1;
|
||||
tmp.x = input_stream.read(uint2(_156, 0)).x;
|
||||
int _163 = first_byte;
|
||||
first_byte = _163 + 1;
|
||||
tmp.y = input_stream.read(uint2(_163, 0)).x;
|
||||
int _170 = first_byte;
|
||||
first_byte = _170 + 1;
|
||||
tmp.z = input_stream.read(uint2(_170, 0)).x;
|
||||
int _177 = first_byte;
|
||||
first_byte = _177 + 1;
|
||||
tmp.w = input_stream.read(uint2(_177, 0)).x;
|
||||
uint4 param_2 = tmp;
|
||||
int param_3 = desc.swap_bytes;
|
||||
result[n] = as_type<float>(get_bits(param_2, param_3));
|
||||
break;
|
||||
}
|
||||
case 2:
|
||||
{
|
||||
int _195 = first_byte;
|
||||
first_byte = _195 + 1;
|
||||
result[n] = float(input_stream.read(uint2(_195, 0)).x);
|
||||
reverse_order = desc.swap_bytes != 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
float4 _209;
|
||||
if (reverse_order)
|
||||
{
|
||||
_209 = result.wzyx;
|
||||
}
|
||||
else
|
||||
{
|
||||
_209 = result;
|
||||
}
|
||||
return _209;
|
||||
}
|
||||
|
||||
float4 read_location(thread const int& location, constant VertexBuffer& v_227, thread uint& gl_VertexIndex, thread texture2d<uint> buff_in_2, thread texture2d<uint> buff_in_1)
|
||||
{
|
||||
int param = location;
|
||||
attr_desc desc = fetch_desc(param, v_227);
|
||||
int vertex_id = gl_VertexIndex - int(v_227.vertex_base_index);
|
||||
if (desc.is_volatile != 0)
|
||||
{
|
||||
attr_desc param_1 = desc;
|
||||
int param_2 = vertex_id;
|
||||
return fetch_attr(param_1, param_2, buff_in_2);
|
||||
}
|
||||
else
|
||||
{
|
||||
attr_desc param_3 = desc;
|
||||
int param_4 = vertex_id;
|
||||
return fetch_attr(param_3, param_4, buff_in_1);
|
||||
}
|
||||
}
|
||||
|
||||
void vs_adjust(thread float4& dst_reg0, thread float4& dst_reg1, thread float4& dst_reg7, constant VertexBuffer& v_227, thread uint& gl_VertexIndex, thread texture2d<uint> buff_in_2, thread texture2d<uint> buff_in_1, constant VertexConstantsBuffer& v_309)
|
||||
{
|
||||
int param = 3;
|
||||
float4 in_diff_color = read_location(param, v_227, gl_VertexIndex, buff_in_2, buff_in_1);
|
||||
int param_1 = 0;
|
||||
float4 in_pos = read_location(param_1, v_227, gl_VertexIndex, buff_in_2, buff_in_1);
|
||||
int param_2 = 8;
|
||||
float4 in_tc0 = read_location(param_2, v_227, gl_VertexIndex, buff_in_2, buff_in_1);
|
||||
dst_reg1 = in_diff_color * v_309.vc[13];
|
||||
float4 tmp0;
|
||||
tmp0.x = float4(dot(float4(in_pos.xyz, 1.0), v_309.vc[4])).x;
|
||||
tmp0.y = float4(dot(float4(in_pos.xyz, 1.0), v_309.vc[5])).y;
|
||||
tmp0.z = float4(dot(float4(in_pos.xyz, 1.0), v_309.vc[6])).z;
|
||||
float4 tmp1;
|
||||
float4 _359 = float4(in_tc0.xy.x, in_tc0.xy.y, tmp1.z, tmp1.w);
|
||||
tmp1 = _359;
|
||||
tmp1.z = v_309.vc[15].x;
|
||||
dst_reg7.y = float4(dot(float4(tmp1.xyz, 1.0), v_309.vc[8])).y;
|
||||
dst_reg7.x = float4(dot(float4(tmp1.xyz, 1.0), v_309.vc[7])).x;
|
||||
dst_reg0.y = float4(dot(float4(tmp0.xyz, 1.0), v_309.vc[1])).y;
|
||||
dst_reg0.x = float4(dot(float4(tmp0.xyz, 1.0), v_309.vc[0])).x;
|
||||
}
|
||||
|
||||
vertex main0_out main0(constant VertexBuffer& v_227 [[buffer(0)]], uint gl_VertexIndex [[vertex_id]], texture2d<uint> buff_in_2 [[texture(0)]], texture2d<uint> buff_in_1 [[texture(1)]], constant VertexConstantsBuffer& v_309 [[buffer(1)]])
|
||||
{
|
||||
main0_out out = {};
|
||||
float4 dst_reg0 = float4(0.0, 0.0, 0.0, 1.0);
|
||||
float4 dst_reg1 = float4(0.0);
|
||||
float4 dst_reg7 = float4(0.0);
|
||||
float4 param = dst_reg0;
|
||||
float4 param_1 = dst_reg1;
|
||||
float4 param_2 = dst_reg7;
|
||||
vs_adjust(param, param_1, param_2, v_227, gl_VertexIndex, buff_in_2, buff_in_1, v_309);
|
||||
dst_reg0 = param;
|
||||
dst_reg1 = param_1;
|
||||
dst_reg7 = param_2;
|
||||
out.gl_Position = dst_reg0;
|
||||
out.back_color = dst_reg1;
|
||||
out.tc0 = dst_reg7;
|
||||
out.gl_Position *= v_227.scale_offset_mat;
|
||||
return out;
|
||||
}
|
||||
|
38
reference/shaders-msl/flatten/rowmajor.flatten.vert
Normal file
38
reference/shaders-msl/flatten/rowmajor.flatten.vert
Normal file
@ -0,0 +1,38 @@
|
||||
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
struct UBO
|
||||
{
|
||||
float4x4 uMVPR;
|
||||
float4x4 uMVPC;
|
||||
float2x4 uMVP;
|
||||
};
|
||||
|
||||
struct main0_in
|
||||
{
|
||||
float4 aVertex [[attribute(0)]];
|
||||
};
|
||||
|
||||
struct main0_out
|
||||
{
|
||||
float4 gl_Position [[position]];
|
||||
};
|
||||
|
||||
// Implementation of a conversion of matrix content from RowMajor to ColumnMajor organization.
|
||||
float2x4 spvConvertFromRowMajor2x4(float2x4 m)
|
||||
{
|
||||
return float2x4(float4(m[0][0], m[0][2], m[1][0], m[1][2]), float4(m[0][1], m[0][3], m[1][1], m[1][3]));
|
||||
}
|
||||
|
||||
vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _18 [[buffer(0)]])
|
||||
{
|
||||
main0_out out = {};
|
||||
float2 v = in.aVertex * spvConvertFromRowMajor2x4(_18.uMVP);
|
||||
out.gl_Position = (_18.uMVPR * in.aVertex) + (in.aVertex * _18.uMVPC);
|
||||
return out;
|
||||
}
|
||||
|
@ -38,8 +38,8 @@ vertex main0_out main0(constant UBO& _22 [[buffer(0)]])
|
||||
out.gl_Position = float4(0.0);
|
||||
out.oA = _22.A;
|
||||
out.oB = float4(_22.B0, _22.B1);
|
||||
out.oC = float4(_22.C0, _22.C1);
|
||||
out.oD = float4(_22.D0, _22.D1);
|
||||
out.oC = float4(_22.C0, _22.C1) + float4(_22.C1.xy, _22.C1.z, _22.C0);
|
||||
out.oD = float4(_22.D0, _22.D1) + float4(float3(_22.D0).xy, float3(_22.D0).z, _22.D1);
|
||||
out.oE = float4(_22.E0, _22.E1, _22.E2, _22.E3);
|
||||
out.oF = float4(_22.F0, _22.F1, _22.F2);
|
||||
return out;
|
||||
|
132
shaders-msl-no-opt/vert/functions_nested.vert
Normal file
132
shaders-msl-no-opt/vert/functions_nested.vert
Normal file
@ -0,0 +1,132 @@
|
||||
#version 450
|
||||
#extension GL_ARB_separate_shader_objects : enable
|
||||
|
||||
layout(std140, set = 0, binding = 0) uniform VertexBuffer
|
||||
{
|
||||
mat4 scale_offset_mat;
|
||||
uint vertex_base_index;
|
||||
ivec4 input_attributes[16];
|
||||
};
|
||||
layout(set=0, binding=3) uniform usamplerBuffer buff_in_1;
|
||||
layout(set=0, binding=4) uniform usamplerBuffer buff_in_2;
|
||||
|
||||
layout(location=10) out vec4 back_color;
|
||||
layout(location=0) out vec4 tc0;
|
||||
|
||||
layout(std140, set=0, binding = 1) uniform VertexConstantsBuffer
|
||||
{
|
||||
vec4 vc[16];
|
||||
};
|
||||
|
||||
struct attr_desc
|
||||
{
|
||||
int type;
|
||||
int attribute_size;
|
||||
int starting_offset;
|
||||
int stride;
|
||||
int swap_bytes;
|
||||
int is_volatile;
|
||||
};
|
||||
|
||||
uint get_bits(uvec4 v, int swap)
|
||||
{
|
||||
if (swap != 0) return (v.w | v.z << 8 | v.y << 16 | v.x << 24);
|
||||
return (v.x | v.y << 8 | v.z << 16 | v.w << 24);
|
||||
}
|
||||
|
||||
vec4 fetch_attr(attr_desc desc, int vertex_id, usamplerBuffer input_stream)
|
||||
{
|
||||
vec4 result = vec4(0.0f, 0.0f, 0.0f, 1.0f);
|
||||
uvec4 tmp;
|
||||
uint bits;
|
||||
bool reverse_order = false;
|
||||
|
||||
int first_byte = (vertex_id * desc.stride) + desc.starting_offset;
|
||||
for (int n = 0; n < 4; n++)
|
||||
{
|
||||
if (n == desc.attribute_size) break;
|
||||
|
||||
switch (desc.type)
|
||||
{
|
||||
case 0:
|
||||
//signed normalized 16-bit
|
||||
tmp.x = texelFetch(input_stream, first_byte++).x;
|
||||
tmp.y = texelFetch(input_stream, first_byte++).x;
|
||||
result[n] = get_bits(tmp, desc.swap_bytes);
|
||||
break;
|
||||
case 1:
|
||||
//float
|
||||
tmp.x = texelFetch(input_stream, first_byte++).x;
|
||||
tmp.y = texelFetch(input_stream, first_byte++).x;
|
||||
tmp.z = texelFetch(input_stream, first_byte++).x;
|
||||
tmp.w = texelFetch(input_stream, first_byte++).x;
|
||||
result[n] = uintBitsToFloat(get_bits(tmp, desc.swap_bytes));
|
||||
break;
|
||||
case 2:
|
||||
//unsigned byte
|
||||
result[n] = texelFetch(input_stream, first_byte++).x;
|
||||
reverse_order = (desc.swap_bytes != 0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return (reverse_order)? result.wzyx: result;
|
||||
}
|
||||
|
||||
attr_desc fetch_desc(int location)
|
||||
{
|
||||
attr_desc result;
|
||||
int attribute_flags = input_attributes[location].w;
|
||||
result.type = input_attributes[location].x;
|
||||
result.attribute_size = input_attributes[location].y;
|
||||
result.starting_offset = input_attributes[location].z;
|
||||
result.stride = attribute_flags & 0xFF;
|
||||
result.swap_bytes = (attribute_flags >> 8) & 0x1;
|
||||
result.is_volatile = (attribute_flags >> 9) & 0x1;
|
||||
return result;
|
||||
}
|
||||
|
||||
vec4 read_location(int location)
|
||||
{
|
||||
attr_desc desc = fetch_desc(location);
|
||||
|
||||
int vertex_id = gl_VertexIndex - int(vertex_base_index);
|
||||
if (desc.is_volatile != 0)
|
||||
return fetch_attr(desc, vertex_id, buff_in_2);
|
||||
else
|
||||
return fetch_attr(desc, vertex_id, buff_in_1);
|
||||
}
|
||||
|
||||
void vs_adjust(inout vec4 dst_reg0, inout vec4 dst_reg1, inout vec4 dst_reg7)
|
||||
{
|
||||
vec4 tmp0;
|
||||
vec4 tmp1;
|
||||
vec4 in_diff_color= read_location(3);
|
||||
vec4 in_pos= read_location(0);
|
||||
vec4 in_tc0= read_location(8);
|
||||
dst_reg1 = (in_diff_color * vc[13]);
|
||||
tmp0.x = vec4(dot(vec4(in_pos.xyzx.xyz, 1.0), vc[4])).x;
|
||||
tmp0.y = vec4(dot(vec4(in_pos.xyzx.xyz, 1.0), vc[5])).y;
|
||||
tmp0.z = vec4(dot(vec4(in_pos.xyzx.xyz, 1.0), vc[6])).z;
|
||||
tmp1.xy = in_tc0.xyxx.xy;
|
||||
tmp1.z = vc[15].xxxx.z;
|
||||
dst_reg7.y = vec4(dot(vec4(tmp1.xyzx.xyz, 1.0), vc[8])).y;
|
||||
dst_reg7.x = vec4(dot(vec4(tmp1.xyzx.xyz, 1.0), vc[7])).x;
|
||||
dst_reg0.y = vec4(dot(vec4(tmp0.xyzx.xyz, 1.0), vc[1])).y;
|
||||
dst_reg0.x = vec4(dot(vec4(tmp0.xyzx.xyz, 1.0), vc[0])).x;
|
||||
}
|
||||
|
||||
void main ()
|
||||
{
|
||||
vec4 dst_reg0= vec4(0.0f, 0.0f, 0.0f, 1.0f);
|
||||
vec4 dst_reg1= vec4(0.0, 0.0, 0.0, 0.0);
|
||||
vec4 dst_reg7= vec4(0.0, 0.0, 0.0, 0.0);
|
||||
|
||||
vs_adjust(dst_reg0, dst_reg1, dst_reg7);
|
||||
|
||||
gl_Position = dst_reg0;
|
||||
back_color = dst_reg1;
|
||||
tc0 = dst_reg7;
|
||||
gl_Position = gl_Position * scale_offset_mat;
|
||||
}
|
||||
|
16
shaders-msl/flatten/rowmajor.flatten.vert
Normal file
16
shaders-msl/flatten/rowmajor.flatten.vert
Normal file
@ -0,0 +1,16 @@
|
||||
#version 310 es
|
||||
|
||||
layout(std140) uniform UBO
|
||||
{
|
||||
layout(column_major) mat4 uMVPR;
|
||||
layout(row_major) mat4 uMVPC;
|
||||
layout(row_major) mat2x4 uMVP;
|
||||
};
|
||||
|
||||
layout(location = 0) in vec4 aVertex;
|
||||
|
||||
void main()
|
||||
{
|
||||
vec2 v = aVertex * uMVP;
|
||||
gl_Position = uMVPR * aVertex + uMVPC * aVertex;
|
||||
}
|
@ -40,8 +40,8 @@ void main()
|
||||
|
||||
oA = A;
|
||||
oB = vec4(B0, B1);
|
||||
oC = vec4(C0, C1);
|
||||
oD = vec4(D0, D1);
|
||||
oC = vec4(C0, C1) + vec4(C1.xy, C1.z, C0); // not packed
|
||||
oD = vec4(D0, D1) + vec4(D0.xy, D0.z, D1); // packed - must convert for swizzle
|
||||
oE = vec4(E0, E1, E2, E3);
|
||||
oF = vec4(F0, F1, F2);
|
||||
}
|
||||
|
@ -2237,7 +2237,7 @@ string CompilerGLSL::to_expression(uint32_t id)
|
||||
if (e.base_expression)
|
||||
return to_enclosed_expression(e.base_expression) + e.expression;
|
||||
else if (e.need_transpose)
|
||||
return convert_row_major_matrix(e.expression);
|
||||
return convert_row_major_matrix(e.expression, get<SPIRType>(e.expression_type));
|
||||
else
|
||||
return e.expression;
|
||||
}
|
||||
@ -4252,7 +4252,8 @@ const char *CompilerGLSL::index_to_swizzle(uint32_t index)
|
||||
}
|
||||
|
||||
string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indices, uint32_t count,
|
||||
bool index_is_literal, bool chain_only, bool *need_transpose)
|
||||
bool index_is_literal, bool chain_only, bool *need_transpose,
|
||||
bool *result_is_packed)
|
||||
{
|
||||
string expr;
|
||||
if (!chain_only)
|
||||
@ -4411,7 +4412,7 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
|
||||
{
|
||||
if (row_major_matrix_needs_conversion)
|
||||
{
|
||||
expr = convert_row_major_matrix(expr);
|
||||
expr = convert_row_major_matrix(expr, *type);
|
||||
row_major_matrix_needs_conversion = false;
|
||||
}
|
||||
|
||||
@ -4429,7 +4430,10 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
|
||||
else if (type->vecsize > 1)
|
||||
{
|
||||
if (vector_is_packed)
|
||||
{
|
||||
expr = unpack_expression_type(expr, *type);
|
||||
vector_is_packed = false;
|
||||
}
|
||||
|
||||
if (index_is_literal)
|
||||
{
|
||||
@ -4465,6 +4469,10 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
|
||||
|
||||
if (need_transpose)
|
||||
*need_transpose = row_major_matrix_needs_conversion;
|
||||
|
||||
if (result_is_packed)
|
||||
*result_is_packed = vector_is_packed;
|
||||
|
||||
return expr;
|
||||
}
|
||||
|
||||
@ -4474,7 +4482,7 @@ string CompilerGLSL::to_flattened_struct_member(const SPIRType &type, uint32_t i
|
||||
}
|
||||
|
||||
string CompilerGLSL::access_chain(uint32_t base, const uint32_t *indices, uint32_t count, const SPIRType &target_type,
|
||||
bool *out_need_transpose)
|
||||
bool *out_need_transpose, bool *result_is_packed)
|
||||
{
|
||||
if (flattened_buffer_blocks.count(base))
|
||||
{
|
||||
@ -4484,6 +4492,8 @@ string CompilerGLSL::access_chain(uint32_t base, const uint32_t *indices, uint32
|
||||
|
||||
if (out_need_transpose)
|
||||
*out_need_transpose = target_type.columns > 1 && need_transpose;
|
||||
if (result_is_packed)
|
||||
*result_is_packed = false;
|
||||
|
||||
return flattened_access_chain(base, indices, count, target_type, 0, matrix_stride, need_transpose);
|
||||
}
|
||||
@ -4493,11 +4503,13 @@ string CompilerGLSL::access_chain(uint32_t base, const uint32_t *indices, uint32
|
||||
auto &type = get<SPIRType>(get<SPIRVariable>(base).basetype);
|
||||
if (out_need_transpose)
|
||||
*out_need_transpose = false;
|
||||
if (result_is_packed)
|
||||
*result_is_packed = false;
|
||||
return sanitize_underscores(join(to_name(type.self), "_", chain));
|
||||
}
|
||||
else
|
||||
{
|
||||
return access_chain_internal(base, indices, count, false, false, out_need_transpose);
|
||||
return access_chain_internal(base, indices, count, false, false, out_need_transpose, result_is_packed);
|
||||
}
|
||||
}
|
||||
|
||||
@ -4590,7 +4602,7 @@ std::string CompilerGLSL::flattened_access_chain_struct(uint32_t base, const uin
|
||||
|
||||
// Cannot forward transpositions, so resolve them here.
|
||||
if (need_transpose)
|
||||
expr += convert_row_major_matrix(tmp);
|
||||
expr += convert_row_major_matrix(tmp, member_type);
|
||||
else
|
||||
expr += tmp;
|
||||
}
|
||||
@ -5080,7 +5092,7 @@ string CompilerGLSL::build_composite_combiner(const uint32_t *elems, uint32_t le
|
||||
{
|
||||
// We'll likely end up with duplicated swizzles, e.g.
|
||||
// foobar.xyz.xyz from patterns like
|
||||
// OpVectorSwizzle
|
||||
// OpVectorShuffle
|
||||
// OpCompositeExtract x 3
|
||||
// OpCompositeConstruct 3x + other scalar.
|
||||
// Just modify op in-place.
|
||||
@ -5240,6 +5252,11 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
|
||||
auto &e = emit_op(result_type, id, expr, forward, true);
|
||||
e.need_transpose = need_transpose;
|
||||
register_read(id, ptr, forward);
|
||||
|
||||
// Pass through whether the result is of a packed type.
|
||||
if (has_decoration(ptr, DecorationCPacked))
|
||||
set_decoration(id, DecorationCPacked);
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
@ -5252,11 +5269,18 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
|
||||
|
||||
// If the base is immutable, the access chain pointer must also be.
|
||||
// If an expression is mutable and forwardable, we speculate that it is immutable.
|
||||
bool need_transpose;
|
||||
auto e = access_chain(ops[2], &ops[3], length - 3, get<SPIRType>(ops[0]), &need_transpose);
|
||||
bool need_transpose, result_is_packed;
|
||||
auto e = access_chain(ops[2], &ops[3], length - 3, get<SPIRType>(ops[0]), &need_transpose, &result_is_packed);
|
||||
auto &expr = set<SPIRExpression>(ops[1], move(e), ops[0], should_forward(ops[2]));
|
||||
expr.loaded_from = ops[2];
|
||||
expr.need_transpose = need_transpose;
|
||||
|
||||
// Mark the result as being packed. Some platforms handled packed vectors differently than non-packed.
|
||||
if (result_is_packed)
|
||||
set_decoration(ops[1], DecorationCPacked);
|
||||
else
|
||||
unset_decoration(ops[1], DecorationCPacked);
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
@ -5635,11 +5659,13 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
|
||||
shuffle = true;
|
||||
|
||||
string expr;
|
||||
bool trivial_forward;
|
||||
bool should_fwd, trivial_forward;
|
||||
|
||||
if (shuffle)
|
||||
{
|
||||
trivial_forward = !expression_is_forwarded(vec0) && !expression_is_forwarded(vec1);
|
||||
bool allow_fwd = !backend.force_temp_use_for_two_vector_shuffles;
|
||||
should_fwd = allow_fwd && should_forward(vec0) && should_forward(vec1);
|
||||
trivial_forward = allow_fwd && !expression_is_forwarded(vec0) && !expression_is_forwarded(vec1);
|
||||
|
||||
// Constructor style and shuffling from two different vectors.
|
||||
vector<string> args;
|
||||
@ -5654,13 +5680,19 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
|
||||
}
|
||||
else
|
||||
{
|
||||
should_fwd = should_forward(vec0);
|
||||
trivial_forward = !expression_is_forwarded(vec0);
|
||||
|
||||
// We only source from first vector, so can use swizzle.
|
||||
// If the vector is packed, unpack it before applying a swizzle (needed for MSL)
|
||||
expr += to_enclosed_expression(vec0);
|
||||
if (has_decoration(vec0, DecorationCPacked))
|
||||
expr = unpack_expression_type(expr, expression_type(vec0));
|
||||
|
||||
expr += ".";
|
||||
for (uint32_t i = 0; i < length; i++)
|
||||
expr += index_to_swizzle(elems[i]);
|
||||
|
||||
if (backend.swizzle_is_function && length > 1)
|
||||
expr += "()";
|
||||
}
|
||||
@ -5668,7 +5700,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
|
||||
// A shuffle is trivial in that it doesn't actually *do* anything.
|
||||
// We inherit the forwardedness from our arguments to avoid flushing out to temporaries when it's not really needed.
|
||||
|
||||
emit_op(result_type, id, expr, should_forward(vec0) && should_forward(vec1), trivial_forward);
|
||||
emit_op(result_type, id, expr, should_fwd, trivial_forward);
|
||||
break;
|
||||
}
|
||||
|
||||
@ -6167,8 +6199,8 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
|
||||
register_read(ops[1], ops[2], should_forward(ops[2]));
|
||||
break;
|
||||
|
||||
// OpAtomicStore unimplemented. Not sure what would use that.
|
||||
// OpAtomicLoad seems to only be relevant for atomic counters.
|
||||
// OpAtomicStore unimplemented. Not sure what would use that.
|
||||
// OpAtomicLoad seems to only be relevant for atomic counters.
|
||||
|
||||
case OpAtomicIIncrement:
|
||||
forced_temporaries.insert(ops[1]);
|
||||
@ -6905,7 +6937,7 @@ bool CompilerGLSL::member_is_packed_type(const SPIRType &type, uint32_t index) c
|
||||
// row_major matrix result of the expression to a column_major matrix.
|
||||
// Base implementation uses the standard library transpose() function.
|
||||
// Subclasses may override to use a different function.
|
||||
string CompilerGLSL::convert_row_major_matrix(string exp_str)
|
||||
string CompilerGLSL::convert_row_major_matrix(string exp_str, const SPIRType & /*exp_type*/)
|
||||
{
|
||||
strip_enclosed_expression(exp_str);
|
||||
return join("transpose(", exp_str, ")");
|
||||
|
@ -290,10 +290,10 @@ protected:
|
||||
void add_resource_name(uint32_t id);
|
||||
void add_member_name(SPIRType &type, uint32_t name);
|
||||
|
||||
bool is_non_native_row_major_matrix(uint32_t id);
|
||||
bool member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index);
|
||||
virtual bool is_non_native_row_major_matrix(uint32_t id);
|
||||
virtual bool member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index);
|
||||
bool member_is_packed_type(const SPIRType &type, uint32_t index) const;
|
||||
virtual std::string convert_row_major_matrix(std::string exp_str);
|
||||
virtual std::string convert_row_major_matrix(std::string exp_str, const SPIRType &exp_type);
|
||||
|
||||
std::unordered_set<std::string> local_variable_names;
|
||||
std::unordered_set<std::string> resource_names;
|
||||
@ -321,6 +321,8 @@ protected:
|
||||
bool boolean_mix_support = true;
|
||||
bool allow_precision_qualifiers = false;
|
||||
bool can_swizzle_scalar = false;
|
||||
bool force_temp_use_for_two_vector_shuffles = false;
|
||||
|
||||
} backend;
|
||||
|
||||
void emit_struct(SPIRType &type);
|
||||
@ -371,9 +373,10 @@ protected:
|
||||
SPIRExpression &emit_op(uint32_t result_type, uint32_t result_id, const std::string &rhs, bool forward_rhs,
|
||||
bool suppress_usage_tracking = false);
|
||||
std::string access_chain_internal(uint32_t base, const uint32_t *indices, uint32_t count, bool index_is_literal,
|
||||
bool chain_only = false, bool *need_transpose = nullptr);
|
||||
bool chain_only = false, bool *need_transpose = nullptr,
|
||||
bool *result_is_packed = nullptr);
|
||||
std::string access_chain(uint32_t base, const uint32_t *indices, uint32_t count, const SPIRType &target_type,
|
||||
bool *need_transpose = nullptr);
|
||||
bool *need_transpose = nullptr, bool *result_is_packed = nullptr);
|
||||
|
||||
std::string flattened_access_chain(uint32_t base, const uint32_t *indices, uint32_t count,
|
||||
const SPIRType &target_type, uint32_t offset, uint32_t matrix_stride,
|
||||
|
280
spirv_msl.cpp
280
spirv_msl.cpp
@ -57,6 +57,21 @@ string CompilerMSL::compile()
|
||||
// Force a classic "C" locale, reverts when function returns
|
||||
ClassicLocale classic_locale;
|
||||
|
||||
// Do not deal with GLES-isms like precision, older extensions and such.
|
||||
CompilerGLSL::options.vulkan_semantics = true;
|
||||
CompilerGLSL::options.es = false;
|
||||
CompilerGLSL::options.version = 120;
|
||||
backend.float_literal_suffix = false;
|
||||
backend.uint32_t_literal_suffix = true;
|
||||
backend.basic_int_type = "int";
|
||||
backend.basic_uint_type = "uint";
|
||||
backend.discard_literal = "discard_fragment()";
|
||||
backend.swizzle_is_function = false;
|
||||
backend.shared_is_implied = false;
|
||||
backend.native_row_major_matrix = false;
|
||||
backend.flexible_member_array_supported = false;
|
||||
backend.force_temp_use_for_two_vector_shuffles = true;
|
||||
|
||||
replace_illegal_names();
|
||||
|
||||
non_stage_in_input_var_ids.clear();
|
||||
@ -88,20 +103,6 @@ string CompilerMSL::compile()
|
||||
if (options.resolve_specialized_array_lengths)
|
||||
resolve_specialized_array_lengths();
|
||||
|
||||
// Do not deal with GLES-isms like precision, older extensions and such.
|
||||
CompilerGLSL::options.vulkan_semantics = true;
|
||||
CompilerGLSL::options.es = false;
|
||||
CompilerGLSL::options.version = 120;
|
||||
backend.float_literal_suffix = false;
|
||||
backend.uint32_t_literal_suffix = true;
|
||||
backend.basic_int_type = "int";
|
||||
backend.basic_uint_type = "uint";
|
||||
backend.discard_literal = "discard_fragment()";
|
||||
backend.swizzle_is_function = false;
|
||||
backend.shared_is_implied = false;
|
||||
backend.native_row_major_matrix = false;
|
||||
backend.flexible_member_array_supported = false;
|
||||
|
||||
uint32_t pass_count = 0;
|
||||
do
|
||||
{
|
||||
@ -279,6 +280,15 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std::
|
||||
}
|
||||
case OpFunctionCall:
|
||||
{
|
||||
// First see if any of the function call args are globals
|
||||
for (uint32_t arg_idx = 3; arg_idx < i.length; arg_idx++)
|
||||
{
|
||||
uint32_t arg_id = ops[arg_idx];
|
||||
if (global_var_ids.find(arg_id) != global_var_ids.end())
|
||||
added_arg_ids.insert(arg_id);
|
||||
}
|
||||
|
||||
// Then recurse into the function itself to extract globals used internally in the function
|
||||
uint32_t inner_func_id = ops[2];
|
||||
std::set<uint32_t> inner_func_args;
|
||||
extract_global_variables_from_function(inner_func_id, inner_func_args, global_var_ids,
|
||||
@ -306,12 +316,10 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std::
|
||||
func.add_parameter(type_id, next_id, true);
|
||||
set<SPIRVariable>(next_id, type_id, StorageClassFunction, 0, arg_id);
|
||||
|
||||
// Ensure both the existing and new variables have the same name, and the name is valid
|
||||
string vld_name = ensure_valid_name(to_name(arg_id), "v");
|
||||
set_name(arg_id, vld_name);
|
||||
set_name(next_id, vld_name);
|
||||
// Ensure the existing variable has a valid name and the new variable has all the same meta info
|
||||
set_name(arg_id, ensure_valid_name(to_name(arg_id), "v"));
|
||||
meta[next_id] = meta[arg_id];
|
||||
|
||||
meta[next_id].decoration.qualified_alias = meta[arg_id].decoration.qualified_alias;
|
||||
next_id++;
|
||||
}
|
||||
}
|
||||
@ -811,8 +819,8 @@ string CompilerMSL::unpack_expression_type(string expr_str, const SPIRType &type
|
||||
// Emits the file header info
|
||||
void CompilerMSL::emit_header()
|
||||
{
|
||||
for (auto &header : pragma_lines)
|
||||
statement(header);
|
||||
for (auto &pragma : pragma_lines)
|
||||
statement(pragma);
|
||||
|
||||
if (!pragma_lines.empty())
|
||||
statement("");
|
||||
@ -830,7 +838,7 @@ void CompilerMSL::emit_header()
|
||||
|
||||
void CompilerMSL::add_pragma_line(const string &line)
|
||||
{
|
||||
pragma_lines.push_back(line);
|
||||
pragma_lines.insert(line);
|
||||
}
|
||||
|
||||
// Emits any needed custom function bodies.
|
||||
@ -1041,6 +1049,64 @@ void CompilerMSL::emit_custom_functions()
|
||||
statement("");
|
||||
break;
|
||||
|
||||
case SPVFuncImplRowMajor2x3:
|
||||
statement("// Implementation of a conversion of matrix content from RowMajor to ColumnMajor organization.");
|
||||
statement("float2x3 spvConvertFromRowMajor2x3(float2x3 m)");
|
||||
begin_scope();
|
||||
statement("return float2x3(float3(m[0][0], m[0][2], m[1][1]), float3(m[0][1], m[1][0], m[1][2]));");
|
||||
end_scope();
|
||||
statement("");
|
||||
break;
|
||||
|
||||
case SPVFuncImplRowMajor2x4:
|
||||
statement("// Implementation of a conversion of matrix content from RowMajor to ColumnMajor organization.");
|
||||
statement("float2x4 spvConvertFromRowMajor2x4(float2x4 m)");
|
||||
begin_scope();
|
||||
statement("return float2x4(float4(m[0][0], m[0][2], m[1][0], m[1][2]), float4(m[0][1], m[0][3], m[1][1], "
|
||||
"m[1][3]));");
|
||||
end_scope();
|
||||
statement("");
|
||||
break;
|
||||
|
||||
case SPVFuncImplRowMajor3x2:
|
||||
statement("// Implementation of a conversion of matrix content from RowMajor to ColumnMajor organization.");
|
||||
statement("float3x2 spvConvertFromRowMajor3x2(float3x2 m)");
|
||||
begin_scope();
|
||||
statement("return float3x2(float2(m[0][0], m[1][1]), float2(m[0][1], m[2][0]), float2(m[1][0], m[2][1]));");
|
||||
end_scope();
|
||||
statement("");
|
||||
break;
|
||||
|
||||
case SPVFuncImplRowMajor3x4:
|
||||
statement("// Implementation of a conversion of matrix content from RowMajor to ColumnMajor organization.");
|
||||
statement("float3x4 spvConvertFromRowMajor3x4(float3x4 m)");
|
||||
begin_scope();
|
||||
statement("return float3x4(float4(m[0][0], m[0][3], m[1][2], m[2][1]), float4(m[0][1], m[1][0], m[1][3], "
|
||||
"m[2][2]), float4(m[0][2], m[1][1], m[2][0], m[2][3]));");
|
||||
end_scope();
|
||||
statement("");
|
||||
break;
|
||||
|
||||
case SPVFuncImplRowMajor4x2:
|
||||
statement("// Implementation of a conversion of matrix content from RowMajor to ColumnMajor organization.");
|
||||
statement("float4x2 spvConvertFromRowMajor4x2(float4x2 m)");
|
||||
begin_scope();
|
||||
statement("return float4x2(float2(m[0][0], m[2][0]), float2(m[0][1], m[2][1]), float2(m[1][0], m[3][0]), "
|
||||
"float2(m[1][1], m[3][1]));");
|
||||
end_scope();
|
||||
statement("");
|
||||
break;
|
||||
|
||||
case SPVFuncImplRowMajor4x3:
|
||||
statement("// Implementation of a conversion of matrix content from RowMajor to ColumnMajor organization.");
|
||||
statement("float4x3 spvConvertFromRowMajor4x3(float4x3 m)");
|
||||
begin_scope();
|
||||
statement("return float4x3(float3(m[0][0], m[1][1], m[2][2]), float3(m[0][1], m[1][2], m[3][0]), "
|
||||
"float3(m[0][2], m[2][0], m[3][1]), float3(m[1][0], m[2][1], m[3][2]));");
|
||||
end_scope();
|
||||
statement("");
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@ -1541,6 +1607,24 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
|
||||
emit_barrier(ops[0], ops[1], ops[2]);
|
||||
break;
|
||||
|
||||
case OpVectorTimesMatrix:
|
||||
case OpMatrixTimesVector:
|
||||
{
|
||||
// If the matrix needs transpose and it is square, just flip the multiply order.
|
||||
uint32_t mtx_id = ops[opcode == OpMatrixTimesVector ? 2 : 3];
|
||||
auto *e = maybe_get<SPIRExpression>(mtx_id);
|
||||
auto &t = expression_type(mtx_id);
|
||||
if (e && e->need_transpose && t.columns == t.vecsize)
|
||||
{
|
||||
e->need_transpose = false;
|
||||
emit_binary_op(ops[0], ops[1], ops[3], ops[2], "*");
|
||||
e->need_transpose = true;
|
||||
}
|
||||
else
|
||||
BOP(*);
|
||||
break;
|
||||
}
|
||||
|
||||
// OpOuterProduct
|
||||
|
||||
default:
|
||||
@ -1559,34 +1643,18 @@ void CompilerMSL::emit_barrier(uint32_t id_exe_scope, uint32_t id_mem_scope, uin
|
||||
string bar_stmt = "threadgroup_barrier(mem_flags::";
|
||||
|
||||
uint32_t mem_sem = id_mem_sem ? get<SPIRConstant>(id_mem_sem).scalar() : uint32_t(MemorySemanticsMaskNone);
|
||||
switch (mem_sem)
|
||||
{
|
||||
case MemorySemanticsCrossWorkgroupMemoryMask:
|
||||
|
||||
if (mem_sem & MemorySemanticsCrossWorkgroupMemoryMask)
|
||||
bar_stmt += "mem_device";
|
||||
break;
|
||||
|
||||
case MemorySemanticsSubgroupMemoryMask:
|
||||
case MemorySemanticsWorkgroupMemoryMask:
|
||||
case MemorySemanticsAtomicCounterMemoryMask:
|
||||
else if (mem_sem & (MemorySemanticsSubgroupMemoryMask | MemorySemanticsWorkgroupMemoryMask |
|
||||
MemorySemanticsAtomicCounterMemoryMask))
|
||||
bar_stmt += "mem_threadgroup";
|
||||
break;
|
||||
|
||||
case MemorySemanticsImageMemoryMask:
|
||||
else if (mem_sem & MemorySemanticsImageMemoryMask)
|
||||
bar_stmt += "mem_texture";
|
||||
break;
|
||||
|
||||
case MemorySemanticsAcquireMask:
|
||||
case MemorySemanticsReleaseMask:
|
||||
case MemorySemanticsAcquireReleaseMask:
|
||||
case MemorySemanticsSequentiallyConsistentMask:
|
||||
case MemorySemanticsUniformMemoryMask:
|
||||
case MemorySemanticsMaskNone:
|
||||
default:
|
||||
else
|
||||
bar_stmt += "mem_none";
|
||||
break;
|
||||
}
|
||||
|
||||
if (options.supports_msl_version(2))
|
||||
if (options.is_ios() && options.supports_msl_version(2))
|
||||
{
|
||||
bar_stmt += ", ";
|
||||
|
||||
@ -1919,7 +1987,7 @@ void CompilerMSL::emit_function_prototype(SPIRFunction &func, uint64_t)
|
||||
|
||||
// Manufacture automatic sampler arg for SampledImage texture
|
||||
auto &arg_type = get<SPIRType>(arg.type);
|
||||
if (arg_type.basetype == SPIRType::SampledImage)
|
||||
if (arg_type.basetype == SPIRType::SampledImage && arg_type.image.dim != DimBuffer)
|
||||
decl += ", thread const sampler& " + to_sampler_expression(arg.id);
|
||||
|
||||
if (&arg != &func.arguments.back())
|
||||
@ -2213,7 +2281,7 @@ string CompilerMSL::to_func_call_arg(uint32_t id)
|
||||
{
|
||||
auto &var = id_v.get<SPIRVariable>();
|
||||
auto &type = get<SPIRType>(var.basetype);
|
||||
if (type.basetype == SPIRType::SampledImage)
|
||||
if (type.basetype == SPIRType::SampledImage && type.image.dim != DimBuffer)
|
||||
arg_str += ", " + to_sampler_expression(id);
|
||||
}
|
||||
|
||||
@ -2229,6 +2297,84 @@ string CompilerMSL::to_sampler_expression(uint32_t id)
|
||||
return samp_id ? to_expression(samp_id) : to_expression(id) + sampler_name_suffix;
|
||||
}
|
||||
|
||||
// Checks whether the ID is a row_major matrix that requires conversion before use
|
||||
bool CompilerMSL::is_non_native_row_major_matrix(uint32_t id)
|
||||
{
|
||||
// Natively supported row-major matrices do not need to be converted.
|
||||
if (backend.native_row_major_matrix)
|
||||
return false;
|
||||
|
||||
// Non-matrix or column-major matrix types do not need to be converted.
|
||||
if (!(meta[id].decoration.decoration_flags & (1ull << DecorationRowMajor)))
|
||||
return false;
|
||||
|
||||
// Generate a function that will swap matrix elements from row-major to column-major.
|
||||
const auto type = expression_type(id);
|
||||
add_convert_row_major_matrix_function(type.columns, type.vecsize);
|
||||
return true;
|
||||
}
|
||||
|
||||
// Checks whether the member is a row_major matrix that requires conversion before use
|
||||
bool CompilerMSL::member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index)
|
||||
{
|
||||
// Natively supported row-major matrices do not need to be converted.
|
||||
if (backend.native_row_major_matrix)
|
||||
return false;
|
||||
|
||||
// Non-matrix or column-major matrix types do not need to be converted.
|
||||
if (!(combined_decoration_for_member(type, index) & (1ull << DecorationRowMajor)))
|
||||
return false;
|
||||
|
||||
// Generate a function that will swap matrix elements from row-major to column-major.
|
||||
const auto mbr_type = get<SPIRType>(type.member_types[index]);
|
||||
add_convert_row_major_matrix_function(mbr_type.columns, mbr_type.vecsize);
|
||||
return true;
|
||||
}
|
||||
|
||||
// Adds a function suitable for converting a non-square row-major matrix to a column-major matrix.
|
||||
void CompilerMSL::add_convert_row_major_matrix_function(uint32_t cols, uint32_t rows)
|
||||
{
|
||||
SPVFuncImpl spv_func;
|
||||
if (cols == rows) // Square matrix...just use transpose() function
|
||||
return;
|
||||
else if (cols == 2 && rows == 3)
|
||||
spv_func = SPVFuncImplRowMajor2x3;
|
||||
else if (cols == 2 && rows == 4)
|
||||
spv_func = SPVFuncImplRowMajor2x4;
|
||||
else if (cols == 3 && rows == 2)
|
||||
spv_func = SPVFuncImplRowMajor3x2;
|
||||
else if (cols == 3 && rows == 4)
|
||||
spv_func = SPVFuncImplRowMajor3x4;
|
||||
else if (cols == 4 && rows == 2)
|
||||
spv_func = SPVFuncImplRowMajor4x2;
|
||||
else if (cols == 4 && rows == 3)
|
||||
spv_func = SPVFuncImplRowMajor4x3;
|
||||
else
|
||||
SPIRV_CROSS_THROW("Could not convert row-major matrix.");
|
||||
|
||||
auto rslt = spv_function_implementations.insert(spv_func);
|
||||
if (rslt.second)
|
||||
{
|
||||
add_pragma_line("#pragma clang diagnostic ignored \"-Wmissing-prototypes\"");
|
||||
force_recompile = true;
|
||||
}
|
||||
}
|
||||
|
||||
// Wraps the expression string in a function call that converts the
|
||||
// row_major matrix result of the expression to a column_major matrix.
|
||||
string CompilerMSL::convert_row_major_matrix(string exp_str, const SPIRType &exp_type)
|
||||
{
|
||||
strip_enclosed_expression(exp_str);
|
||||
|
||||
string func_name;
|
||||
if (exp_type.columns == exp_type.vecsize)
|
||||
func_name = "transpose";
|
||||
else
|
||||
func_name = string("spvConvertFromRowMajor") + to_string(exp_type.columns) + "x" + to_string(exp_type.vecsize);
|
||||
|
||||
return join(func_name, "(", exp_str, ")");
|
||||
}
|
||||
|
||||
// Called automatically at the end of the entry point function
|
||||
void CompilerMSL::emit_fixup()
|
||||
{
|
||||
@ -2237,10 +2383,8 @@ void CompilerMSL::emit_fixup()
|
||||
if ((execution.model == ExecutionModelVertex) && stage_out_var_id && !qual_pos_var_name.empty())
|
||||
{
|
||||
if (CompilerGLSL::options.vertex.fixup_clipspace)
|
||||
{
|
||||
statement(qual_pos_var_name, ".z = (", qual_pos_var_name, ".z + ", qual_pos_var_name,
|
||||
".w) * 0.5; // Adjust clip-space for Metal");
|
||||
}
|
||||
|
||||
if (CompilerGLSL::options.vertex.flip_vert_y)
|
||||
statement(qual_pos_var_name, ".y = -(", qual_pos_var_name, ".y);", " // Invert Y-axis for Metal");
|
||||
@ -2599,6 +2743,7 @@ string CompilerMSL::entry_point_args(bool append_comma)
|
||||
{
|
||||
if (!ep_args.empty())
|
||||
ep_args += ", ";
|
||||
|
||||
BuiltIn bi_type = meta[var_id].decoration.builtin_type;
|
||||
ep_args += builtin_type_decl(bi_type) + " " + to_expression(var_id);
|
||||
ep_args += " [[" + builtin_qualifier(bi_type) + "]]";
|
||||
@ -2682,7 +2827,10 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg)
|
||||
if (constref)
|
||||
decl += "const ";
|
||||
|
||||
decl += type_to_glsl(type, arg.id);
|
||||
if (is_builtin_variable(var))
|
||||
decl += builtin_type_decl((BuiltIn)get_decoration(arg.id, DecorationBuiltIn));
|
||||
else
|
||||
decl += type_to_glsl(type, arg.id);
|
||||
|
||||
if (is_array(type))
|
||||
decl += "*";
|
||||
@ -3298,9 +3446,35 @@ bool CompilerMSL::OpCodePreprocessor::handle(Op opcode, const uint32_t *args, ui
|
||||
break;
|
||||
}
|
||||
|
||||
// Keep track of the instruction return types, mapped by ID
|
||||
if (length > 1)
|
||||
result_types[args[1]] = args[0];
|
||||
// If it has one, keep track of the instruction's result type, mapped by ID
|
||||
switch (opcode)
|
||||
{
|
||||
case OpStore:
|
||||
case OpCopyMemory:
|
||||
case OpCopyMemorySized:
|
||||
case OpImageWrite:
|
||||
case OpAtomicStore:
|
||||
case OpAtomicFlagClear:
|
||||
case OpEmitStreamVertex:
|
||||
case OpEndStreamPrimitive:
|
||||
case OpControlBarrier:
|
||||
case OpMemoryBarrier:
|
||||
case OpGroupWaitEvents:
|
||||
case OpRetainEvent:
|
||||
case OpReleaseEvent:
|
||||
case OpSetUserEventStatus:
|
||||
case OpCaptureEventProfilingInfo:
|
||||
case OpCommitReadPipe:
|
||||
case OpCommitWritePipe:
|
||||
case OpGroupCommitReadPipe:
|
||||
case OpGroupCommitWritePipe:
|
||||
break;
|
||||
|
||||
default:
|
||||
if (length > 1)
|
||||
result_types[args[1]] = args[0];
|
||||
break;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -77,10 +77,26 @@ public:
|
||||
// Options for compiling to Metal Shading Language
|
||||
struct Options
|
||||
{
|
||||
typedef enum {
|
||||
iOS,
|
||||
macOS,
|
||||
} Platform;
|
||||
|
||||
Platform platform = macOS;
|
||||
uint32_t msl_version = make_msl_version(1, 2);
|
||||
bool enable_point_size_builtin = true;
|
||||
bool resolve_specialized_array_lengths = true;
|
||||
|
||||
bool is_ios()
|
||||
{
|
||||
return platform == iOS;
|
||||
}
|
||||
|
||||
bool is_macos()
|
||||
{
|
||||
return platform == macOS;
|
||||
}
|
||||
|
||||
void set_msl_version(uint32_t major, uint32_t minor = 0, uint32_t patch = 0)
|
||||
{
|
||||
msl_version = make_msl_version(major, minor, patch);
|
||||
@ -122,6 +138,12 @@ public:
|
||||
SPVFuncImplInverse2x2,
|
||||
SPVFuncImplInverse3x3,
|
||||
SPVFuncImplInverse4x4,
|
||||
SPVFuncImplRowMajor2x3,
|
||||
SPVFuncImplRowMajor2x4,
|
||||
SPVFuncImplRowMajor3x2,
|
||||
SPVFuncImplRowMajor3x4,
|
||||
SPVFuncImplRowMajor4x2,
|
||||
SPVFuncImplRowMajor4x3,
|
||||
};
|
||||
|
||||
// Constructs an instance to compile the SPIR-V code into Metal Shading Language,
|
||||
@ -186,6 +208,9 @@ protected:
|
||||
std::string to_qualifiers_glsl(uint32_t id) override;
|
||||
void replace_illegal_names() override;
|
||||
void declare_undefined_values() override;
|
||||
bool is_non_native_row_major_matrix(uint32_t id) override;
|
||||
bool member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index) override;
|
||||
std::string convert_row_major_matrix(std::string exp_str, const SPIRType &exp_type) override;
|
||||
|
||||
void preprocess_op_codes();
|
||||
void localize_global_variables();
|
||||
@ -207,6 +232,7 @@ protected:
|
||||
void emit_interface_block(uint32_t ib_var_id);
|
||||
bool maybe_emit_input_struct_assignment(uint32_t id_lhs, uint32_t id_rhs);
|
||||
bool maybe_emit_array_assignment(uint32_t id_lhs, uint32_t id_rhs);
|
||||
void add_convert_row_major_matrix_function(uint32_t cols, uint32_t rows);
|
||||
|
||||
std::string func_type_decl(SPIRType &type);
|
||||
std::string entry_point_args(bool append_comma);
|
||||
@ -245,7 +271,7 @@ protected:
|
||||
std::unordered_map<uint32_t, MSLVertexAttr *> vtx_attrs_by_location;
|
||||
std::map<uint32_t, uint32_t> non_stage_in_input_var_ids;
|
||||
std::unordered_map<MSLStructMemberKey, uint32_t> struct_member_padding;
|
||||
std::vector<std::string> pragma_lines;
|
||||
std::set<std::string> pragma_lines;
|
||||
std::vector<MSLResourceBinding *> resource_bindings;
|
||||
MSLResourceBinding next_metal_resource_index;
|
||||
uint32_t stage_in_var_id = 0;
|
||||
|
@ -358,11 +358,20 @@ def test_shader_msl(stats, shader, update, keep, opt):
|
||||
noopt = shader_is_noopt(shader[1])
|
||||
spirv, msl = cross_compile_msl(joined_path, is_spirv, opt and (not noopt))
|
||||
regression_check(shader, msl, update, keep, opt)
|
||||
os.remove(spirv)
|
||||
|
||||
# Uncomment the following line to print the temp SPIR-V file path.
|
||||
# This temp SPIR-V file is not deleted until after the Metal validation step below.
|
||||
# If Metal validation fails, the temp SPIR-V file can be copied out and
|
||||
# used as input to an invocation of spirv-cross to debug from Xcode directly.
|
||||
# To do so, build spriv-cross using `make DEBUG=1`, then run the spriv-cross
|
||||
# executable from Xcode using args: `--msl --entry main --output msl_path spirv_path`.
|
||||
# print('SPRIV shader: ' + spirv)
|
||||
|
||||
if not force_no_external_validation:
|
||||
validate_shader_msl(shader, opt)
|
||||
|
||||
os.remove(spirv)
|
||||
|
||||
def test_shader_hlsl(stats, shader, update, keep, opt):
|
||||
joined_path = os.path.join(shader[0], shader[1])
|
||||
print('Testing HLSL shader:', joined_path)
|
||||
|
@ -11,6 +11,7 @@ echo "Using spirv-opt in: $(which spirv-opt)."
|
||||
./test_shaders.py shaders --opt || exit 1
|
||||
./test_shaders.py shaders-msl --msl || exit 1
|
||||
./test_shaders.py shaders-msl --msl --opt || exit 1
|
||||
./test_shaders.py shaders-msl-no-opt --msl || exit 1
|
||||
./test_shaders.py shaders-hlsl --hlsl || exit 1
|
||||
./test_shaders.py shaders-hlsl --hlsl --opt || exit 1
|
||||
|
||||
|
@ -11,6 +11,7 @@ echo "Using spirv-opt in: $(which spirv-opt)."
|
||||
./test_shaders.py shaders --update --opt || exit 1
|
||||
./test_shaders.py shaders-msl --msl --update || exit 1
|
||||
./test_shaders.py shaders-msl --msl --update --opt || exit 1
|
||||
./test_shaders.py shaders-msl-no-opt --msl --update || exit 1
|
||||
./test_shaders.py shaders-hlsl --hlsl --update || exit 1
|
||||
./test_shaders.py shaders-hlsl --hlsl --update --opt || exit 1
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user