Merge pull request #373 from billhollings/master

CompilerMSL enhancements PLUS refactoring of MSL tests to avoid optimization crashes
This commit is contained in:
Hans-Kristian Arntzen 2018-01-08 09:04:03 +01:00 committed by GitHub
commit 513ba86fc8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 732 additions and 81 deletions

View File

@ -0,0 +1,29 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct UBO
{
float4x4 uMVPR;
float4x4 uMVPC;
float2x4 uMVP;
};
struct main0_in
{
float4 aVertex [[attribute(0)]];
};
struct main0_out
{
float4 gl_Position [[position]];
};
vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _18 [[buffer(0)]])
{
main0_out out = {};
out.gl_Position = (_18.uMVPR * in.aVertex) + (in.aVertex * _18.uMVPC);
return out;
}

View File

@ -38,8 +38,8 @@ vertex main0_out main0(constant UBO& _22 [[buffer(0)]])
out.gl_Position = float4(0.0);
out.oA = _22.A;
out.oB = float4(_22.B0, _22.B1);
out.oC = float4(_22.C0, _22.C1);
out.oD = float4(_22.D0, _22.D1);
out.oC = float4(_22.C0, _22.C1) + float4(_22.C1.xy, _22.C1.z, _22.C0);
out.oD = float4(_22.D0, _22.D1) + float4(float3(_22.D0).xy, float3(_22.D0).z, _22.D1);
out.oE = float4(_22.E0, _22.E1, _22.E2, _22.E3);
out.oF = float4(_22.F0, _22.F1, _22.F2);
return out;

View File

@ -0,0 +1,190 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct attr_desc
{
int type;
int attribute_size;
int starting_offset;
int stride;
int swap_bytes;
int is_volatile;
};
struct VertexBuffer
{
float4x4 scale_offset_mat;
uint vertex_base_index;
int4 input_attributes[16];
};
struct VertexConstantsBuffer
{
float4 vc[16];
};
constant float4 _295 = {};
struct main0_out
{
float4 tc0 [[user(locn0)]];
float4 back_color [[user(locn10)]];
float4 gl_Position [[position]];
};
attr_desc fetch_desc(thread const int& location, constant VertexBuffer& v_227)
{
int attribute_flags = v_227.input_attributes[location].w;
attr_desc result;
result.type = v_227.input_attributes[location].x;
result.attribute_size = v_227.input_attributes[location].y;
result.starting_offset = v_227.input_attributes[location].z;
result.stride = attribute_flags & 255;
result.swap_bytes = (attribute_flags >> 8) & 1;
result.is_volatile = (attribute_flags >> 9) & 1;
return result;
}
uint get_bits(thread const uint4& v, thread const int& swap)
{
if (swap != 0)
{
return ((v.w | (v.z << uint(8))) | (v.y << uint(16))) | (v.x << uint(24));
}
return ((v.x | (v.y << uint(8))) | (v.z << uint(16))) | (v.w << uint(24));
}
float4 fetch_attr(thread const attr_desc& desc, thread const int& vertex_id, thread const texture2d<uint> input_stream)
{
float4 result = float4(0.0, 0.0, 0.0, 1.0);
bool reverse_order = false;
int first_byte = (vertex_id * desc.stride) + desc.starting_offset;
for (int n = 0; n < 4; n++)
{
if (n == desc.attribute_size)
{
break;
}
uint4 tmp;
switch (desc.type)
{
case 0:
{
int _131 = first_byte;
first_byte = _131 + 1;
tmp.x = input_stream.read(uint2(_131, 0)).x;
int _138 = first_byte;
first_byte = _138 + 1;
tmp.y = input_stream.read(uint2(_138, 0)).x;
uint4 param = tmp;
int param_1 = desc.swap_bytes;
result[n] = float(get_bits(param, param_1));
break;
}
case 1:
{
int _156 = first_byte;
first_byte = _156 + 1;
tmp.x = input_stream.read(uint2(_156, 0)).x;
int _163 = first_byte;
first_byte = _163 + 1;
tmp.y = input_stream.read(uint2(_163, 0)).x;
int _170 = first_byte;
first_byte = _170 + 1;
tmp.z = input_stream.read(uint2(_170, 0)).x;
int _177 = first_byte;
first_byte = _177 + 1;
tmp.w = input_stream.read(uint2(_177, 0)).x;
uint4 param_2 = tmp;
int param_3 = desc.swap_bytes;
result[n] = as_type<float>(get_bits(param_2, param_3));
break;
}
case 2:
{
int _195 = first_byte;
first_byte = _195 + 1;
result[n] = float(input_stream.read(uint2(_195, 0)).x);
reverse_order = desc.swap_bytes != 0;
break;
}
}
}
float4 _209;
if (reverse_order)
{
_209 = result.wzyx;
}
else
{
_209 = result;
}
return _209;
}
float4 read_location(thread const int& location, constant VertexBuffer& v_227, thread uint& gl_VertexIndex, thread texture2d<uint> buff_in_2, thread texture2d<uint> buff_in_1)
{
int param = location;
attr_desc desc = fetch_desc(param, v_227);
int vertex_id = gl_VertexIndex - int(v_227.vertex_base_index);
if (desc.is_volatile != 0)
{
attr_desc param_1 = desc;
int param_2 = vertex_id;
return fetch_attr(param_1, param_2, buff_in_2);
}
else
{
attr_desc param_3 = desc;
int param_4 = vertex_id;
return fetch_attr(param_3, param_4, buff_in_1);
}
}
void vs_adjust(thread float4& dst_reg0, thread float4& dst_reg1, thread float4& dst_reg7, constant VertexBuffer& v_227, thread uint& gl_VertexIndex, thread texture2d<uint> buff_in_2, thread texture2d<uint> buff_in_1, constant VertexConstantsBuffer& v_309)
{
int param = 3;
float4 in_diff_color = read_location(param, v_227, gl_VertexIndex, buff_in_2, buff_in_1);
int param_1 = 0;
float4 in_pos = read_location(param_1, v_227, gl_VertexIndex, buff_in_2, buff_in_1);
int param_2 = 8;
float4 in_tc0 = read_location(param_2, v_227, gl_VertexIndex, buff_in_2, buff_in_1);
dst_reg1 = in_diff_color * v_309.vc[13];
float4 tmp0;
tmp0.x = float4(dot(float4(in_pos.xyz, 1.0), v_309.vc[4])).x;
tmp0.y = float4(dot(float4(in_pos.xyz, 1.0), v_309.vc[5])).y;
tmp0.z = float4(dot(float4(in_pos.xyz, 1.0), v_309.vc[6])).z;
float4 tmp1;
float4 _359 = float4(in_tc0.xy.x, in_tc0.xy.y, tmp1.z, tmp1.w);
tmp1 = _359;
tmp1.z = v_309.vc[15].x;
dst_reg7.y = float4(dot(float4(tmp1.xyz, 1.0), v_309.vc[8])).y;
dst_reg7.x = float4(dot(float4(tmp1.xyz, 1.0), v_309.vc[7])).x;
dst_reg0.y = float4(dot(float4(tmp0.xyz, 1.0), v_309.vc[1])).y;
dst_reg0.x = float4(dot(float4(tmp0.xyz, 1.0), v_309.vc[0])).x;
}
vertex main0_out main0(constant VertexBuffer& v_227 [[buffer(0)]], uint gl_VertexIndex [[vertex_id]], texture2d<uint> buff_in_2 [[texture(0)]], texture2d<uint> buff_in_1 [[texture(1)]], constant VertexConstantsBuffer& v_309 [[buffer(1)]])
{
main0_out out = {};
float4 dst_reg0 = float4(0.0, 0.0, 0.0, 1.0);
float4 dst_reg1 = float4(0.0);
float4 dst_reg7 = float4(0.0);
float4 param = dst_reg0;
float4 param_1 = dst_reg1;
float4 param_2 = dst_reg7;
vs_adjust(param, param_1, param_2, v_227, gl_VertexIndex, buff_in_2, buff_in_1, v_309);
dst_reg0 = param;
dst_reg1 = param_1;
dst_reg7 = param_2;
out.gl_Position = dst_reg0;
out.back_color = dst_reg1;
out.tc0 = dst_reg7;
out.gl_Position *= v_227.scale_offset_mat;
return out;
}

View File

@ -0,0 +1,38 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct UBO
{
float4x4 uMVPR;
float4x4 uMVPC;
float2x4 uMVP;
};
struct main0_in
{
float4 aVertex [[attribute(0)]];
};
struct main0_out
{
float4 gl_Position [[position]];
};
// Implementation of a conversion of matrix content from RowMajor to ColumnMajor organization.
float2x4 spvConvertFromRowMajor2x4(float2x4 m)
{
return float2x4(float4(m[0][0], m[0][2], m[1][0], m[1][2]), float4(m[0][1], m[0][3], m[1][1], m[1][3]));
}
vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _18 [[buffer(0)]])
{
main0_out out = {};
float2 v = in.aVertex * spvConvertFromRowMajor2x4(_18.uMVP);
out.gl_Position = (_18.uMVPR * in.aVertex) + (in.aVertex * _18.uMVPC);
return out;
}

View File

@ -38,8 +38,8 @@ vertex main0_out main0(constant UBO& _22 [[buffer(0)]])
out.gl_Position = float4(0.0);
out.oA = _22.A;
out.oB = float4(_22.B0, _22.B1);
out.oC = float4(_22.C0, _22.C1);
out.oD = float4(_22.D0, _22.D1);
out.oC = float4(_22.C0, _22.C1) + float4(_22.C1.xy, _22.C1.z, _22.C0);
out.oD = float4(_22.D0, _22.D1) + float4(float3(_22.D0).xy, float3(_22.D0).z, _22.D1);
out.oE = float4(_22.E0, _22.E1, _22.E2, _22.E3);
out.oF = float4(_22.F0, _22.F1, _22.F2);
return out;

View File

@ -0,0 +1,132 @@
#version 450
#extension GL_ARB_separate_shader_objects : enable
layout(std140, set = 0, binding = 0) uniform VertexBuffer
{
mat4 scale_offset_mat;
uint vertex_base_index;
ivec4 input_attributes[16];
};
layout(set=0, binding=3) uniform usamplerBuffer buff_in_1;
layout(set=0, binding=4) uniform usamplerBuffer buff_in_2;
layout(location=10) out vec4 back_color;
layout(location=0) out vec4 tc0;
layout(std140, set=0, binding = 1) uniform VertexConstantsBuffer
{
vec4 vc[16];
};
struct attr_desc
{
int type;
int attribute_size;
int starting_offset;
int stride;
int swap_bytes;
int is_volatile;
};
uint get_bits(uvec4 v, int swap)
{
if (swap != 0) return (v.w | v.z << 8 | v.y << 16 | v.x << 24);
return (v.x | v.y << 8 | v.z << 16 | v.w << 24);
}
vec4 fetch_attr(attr_desc desc, int vertex_id, usamplerBuffer input_stream)
{
vec4 result = vec4(0.0f, 0.0f, 0.0f, 1.0f);
uvec4 tmp;
uint bits;
bool reverse_order = false;
int first_byte = (vertex_id * desc.stride) + desc.starting_offset;
for (int n = 0; n < 4; n++)
{
if (n == desc.attribute_size) break;
switch (desc.type)
{
case 0:
//signed normalized 16-bit
tmp.x = texelFetch(input_stream, first_byte++).x;
tmp.y = texelFetch(input_stream, first_byte++).x;
result[n] = get_bits(tmp, desc.swap_bytes);
break;
case 1:
//float
tmp.x = texelFetch(input_stream, first_byte++).x;
tmp.y = texelFetch(input_stream, first_byte++).x;
tmp.z = texelFetch(input_stream, first_byte++).x;
tmp.w = texelFetch(input_stream, first_byte++).x;
result[n] = uintBitsToFloat(get_bits(tmp, desc.swap_bytes));
break;
case 2:
//unsigned byte
result[n] = texelFetch(input_stream, first_byte++).x;
reverse_order = (desc.swap_bytes != 0);
break;
}
}
return (reverse_order)? result.wzyx: result;
}
attr_desc fetch_desc(int location)
{
attr_desc result;
int attribute_flags = input_attributes[location].w;
result.type = input_attributes[location].x;
result.attribute_size = input_attributes[location].y;
result.starting_offset = input_attributes[location].z;
result.stride = attribute_flags & 0xFF;
result.swap_bytes = (attribute_flags >> 8) & 0x1;
result.is_volatile = (attribute_flags >> 9) & 0x1;
return result;
}
vec4 read_location(int location)
{
attr_desc desc = fetch_desc(location);
int vertex_id = gl_VertexIndex - int(vertex_base_index);
if (desc.is_volatile != 0)
return fetch_attr(desc, vertex_id, buff_in_2);
else
return fetch_attr(desc, vertex_id, buff_in_1);
}
void vs_adjust(inout vec4 dst_reg0, inout vec4 dst_reg1, inout vec4 dst_reg7)
{
vec4 tmp0;
vec4 tmp1;
vec4 in_diff_color= read_location(3);
vec4 in_pos= read_location(0);
vec4 in_tc0= read_location(8);
dst_reg1 = (in_diff_color * vc[13]);
tmp0.x = vec4(dot(vec4(in_pos.xyzx.xyz, 1.0), vc[4])).x;
tmp0.y = vec4(dot(vec4(in_pos.xyzx.xyz, 1.0), vc[5])).y;
tmp0.z = vec4(dot(vec4(in_pos.xyzx.xyz, 1.0), vc[6])).z;
tmp1.xy = in_tc0.xyxx.xy;
tmp1.z = vc[15].xxxx.z;
dst_reg7.y = vec4(dot(vec4(tmp1.xyzx.xyz, 1.0), vc[8])).y;
dst_reg7.x = vec4(dot(vec4(tmp1.xyzx.xyz, 1.0), vc[7])).x;
dst_reg0.y = vec4(dot(vec4(tmp0.xyzx.xyz, 1.0), vc[1])).y;
dst_reg0.x = vec4(dot(vec4(tmp0.xyzx.xyz, 1.0), vc[0])).x;
}
void main ()
{
vec4 dst_reg0= vec4(0.0f, 0.0f, 0.0f, 1.0f);
vec4 dst_reg1= vec4(0.0, 0.0, 0.0, 0.0);
vec4 dst_reg7= vec4(0.0, 0.0, 0.0, 0.0);
vs_adjust(dst_reg0, dst_reg1, dst_reg7);
gl_Position = dst_reg0;
back_color = dst_reg1;
tc0 = dst_reg7;
gl_Position = gl_Position * scale_offset_mat;
}

View File

@ -0,0 +1,16 @@
#version 310 es
layout(std140) uniform UBO
{
layout(column_major) mat4 uMVPR;
layout(row_major) mat4 uMVPC;
layout(row_major) mat2x4 uMVP;
};
layout(location = 0) in vec4 aVertex;
void main()
{
vec2 v = aVertex * uMVP;
gl_Position = uMVPR * aVertex + uMVPC * aVertex;
}

View File

@ -40,8 +40,8 @@ void main()
oA = A;
oB = vec4(B0, B1);
oC = vec4(C0, C1);
oD = vec4(D0, D1);
oC = vec4(C0, C1) + vec4(C1.xy, C1.z, C0); // not packed
oD = vec4(D0, D1) + vec4(D0.xy, D0.z, D1); // packed - must convert for swizzle
oE = vec4(E0, E1, E2, E3);
oF = vec4(F0, F1, F2);
}

View File

@ -2237,7 +2237,7 @@ string CompilerGLSL::to_expression(uint32_t id)
if (e.base_expression)
return to_enclosed_expression(e.base_expression) + e.expression;
else if (e.need_transpose)
return convert_row_major_matrix(e.expression);
return convert_row_major_matrix(e.expression, get<SPIRType>(e.expression_type));
else
return e.expression;
}
@ -4252,7 +4252,8 @@ const char *CompilerGLSL::index_to_swizzle(uint32_t index)
}
string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indices, uint32_t count,
bool index_is_literal, bool chain_only, bool *need_transpose)
bool index_is_literal, bool chain_only, bool *need_transpose,
bool *result_is_packed)
{
string expr;
if (!chain_only)
@ -4411,7 +4412,7 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
{
if (row_major_matrix_needs_conversion)
{
expr = convert_row_major_matrix(expr);
expr = convert_row_major_matrix(expr, *type);
row_major_matrix_needs_conversion = false;
}
@ -4429,7 +4430,10 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
else if (type->vecsize > 1)
{
if (vector_is_packed)
{
expr = unpack_expression_type(expr, *type);
vector_is_packed = false;
}
if (index_is_literal)
{
@ -4465,6 +4469,10 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
if (need_transpose)
*need_transpose = row_major_matrix_needs_conversion;
if (result_is_packed)
*result_is_packed = vector_is_packed;
return expr;
}
@ -4474,7 +4482,7 @@ string CompilerGLSL::to_flattened_struct_member(const SPIRType &type, uint32_t i
}
string CompilerGLSL::access_chain(uint32_t base, const uint32_t *indices, uint32_t count, const SPIRType &target_type,
bool *out_need_transpose)
bool *out_need_transpose, bool *result_is_packed)
{
if (flattened_buffer_blocks.count(base))
{
@ -4484,6 +4492,8 @@ string CompilerGLSL::access_chain(uint32_t base, const uint32_t *indices, uint32
if (out_need_transpose)
*out_need_transpose = target_type.columns > 1 && need_transpose;
if (result_is_packed)
*result_is_packed = false;
return flattened_access_chain(base, indices, count, target_type, 0, matrix_stride, need_transpose);
}
@ -4493,11 +4503,13 @@ string CompilerGLSL::access_chain(uint32_t base, const uint32_t *indices, uint32
auto &type = get<SPIRType>(get<SPIRVariable>(base).basetype);
if (out_need_transpose)
*out_need_transpose = false;
if (result_is_packed)
*result_is_packed = false;
return sanitize_underscores(join(to_name(type.self), "_", chain));
}
else
{
return access_chain_internal(base, indices, count, false, false, out_need_transpose);
return access_chain_internal(base, indices, count, false, false, out_need_transpose, result_is_packed);
}
}
@ -4590,7 +4602,7 @@ std::string CompilerGLSL::flattened_access_chain_struct(uint32_t base, const uin
// Cannot forward transpositions, so resolve them here.
if (need_transpose)
expr += convert_row_major_matrix(tmp);
expr += convert_row_major_matrix(tmp, member_type);
else
expr += tmp;
}
@ -5080,7 +5092,7 @@ string CompilerGLSL::build_composite_combiner(const uint32_t *elems, uint32_t le
{
// We'll likely end up with duplicated swizzles, e.g.
// foobar.xyz.xyz from patterns like
// OpVectorSwizzle
// OpVectorShuffle
// OpCompositeExtract x 3
// OpCompositeConstruct 3x + other scalar.
// Just modify op in-place.
@ -5240,6 +5252,11 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
auto &e = emit_op(result_type, id, expr, forward, true);
e.need_transpose = need_transpose;
register_read(id, ptr, forward);
// Pass through whether the result is of a packed type.
if (has_decoration(ptr, DecorationCPacked))
set_decoration(id, DecorationCPacked);
break;
}
@ -5252,11 +5269,18 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
// If the base is immutable, the access chain pointer must also be.
// If an expression is mutable and forwardable, we speculate that it is immutable.
bool need_transpose;
auto e = access_chain(ops[2], &ops[3], length - 3, get<SPIRType>(ops[0]), &need_transpose);
bool need_transpose, result_is_packed;
auto e = access_chain(ops[2], &ops[3], length - 3, get<SPIRType>(ops[0]), &need_transpose, &result_is_packed);
auto &expr = set<SPIRExpression>(ops[1], move(e), ops[0], should_forward(ops[2]));
expr.loaded_from = ops[2];
expr.need_transpose = need_transpose;
// Mark the result as being packed. Some platforms handled packed vectors differently than non-packed.
if (result_is_packed)
set_decoration(ops[1], DecorationCPacked);
else
unset_decoration(ops[1], DecorationCPacked);
break;
}
@ -5635,11 +5659,13 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
shuffle = true;
string expr;
bool trivial_forward;
bool should_fwd, trivial_forward;
if (shuffle)
{
trivial_forward = !expression_is_forwarded(vec0) && !expression_is_forwarded(vec1);
bool allow_fwd = !backend.force_temp_use_for_two_vector_shuffles;
should_fwd = allow_fwd && should_forward(vec0) && should_forward(vec1);
trivial_forward = allow_fwd && !expression_is_forwarded(vec0) && !expression_is_forwarded(vec1);
// Constructor style and shuffling from two different vectors.
vector<string> args;
@ -5654,13 +5680,19 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
}
else
{
should_fwd = should_forward(vec0);
trivial_forward = !expression_is_forwarded(vec0);
// We only source from first vector, so can use swizzle.
// If the vector is packed, unpack it before applying a swizzle (needed for MSL)
expr += to_enclosed_expression(vec0);
if (has_decoration(vec0, DecorationCPacked))
expr = unpack_expression_type(expr, expression_type(vec0));
expr += ".";
for (uint32_t i = 0; i < length; i++)
expr += index_to_swizzle(elems[i]);
if (backend.swizzle_is_function && length > 1)
expr += "()";
}
@ -5668,7 +5700,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
// A shuffle is trivial in that it doesn't actually *do* anything.
// We inherit the forwardedness from our arguments to avoid flushing out to temporaries when it's not really needed.
emit_op(result_type, id, expr, should_forward(vec0) && should_forward(vec1), trivial_forward);
emit_op(result_type, id, expr, should_fwd, trivial_forward);
break;
}
@ -6167,8 +6199,8 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
register_read(ops[1], ops[2], should_forward(ops[2]));
break;
// OpAtomicStore unimplemented. Not sure what would use that.
// OpAtomicLoad seems to only be relevant for atomic counters.
// OpAtomicStore unimplemented. Not sure what would use that.
// OpAtomicLoad seems to only be relevant for atomic counters.
case OpAtomicIIncrement:
forced_temporaries.insert(ops[1]);
@ -6905,7 +6937,7 @@ bool CompilerGLSL::member_is_packed_type(const SPIRType &type, uint32_t index) c
// row_major matrix result of the expression to a column_major matrix.
// Base implementation uses the standard library transpose() function.
// Subclasses may override to use a different function.
string CompilerGLSL::convert_row_major_matrix(string exp_str)
string CompilerGLSL::convert_row_major_matrix(string exp_str, const SPIRType & /*exp_type*/)
{
strip_enclosed_expression(exp_str);
return join("transpose(", exp_str, ")");

View File

@ -290,10 +290,10 @@ protected:
void add_resource_name(uint32_t id);
void add_member_name(SPIRType &type, uint32_t name);
bool is_non_native_row_major_matrix(uint32_t id);
bool member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index);
virtual bool is_non_native_row_major_matrix(uint32_t id);
virtual bool member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index);
bool member_is_packed_type(const SPIRType &type, uint32_t index) const;
virtual std::string convert_row_major_matrix(std::string exp_str);
virtual std::string convert_row_major_matrix(std::string exp_str, const SPIRType &exp_type);
std::unordered_set<std::string> local_variable_names;
std::unordered_set<std::string> resource_names;
@ -321,6 +321,8 @@ protected:
bool boolean_mix_support = true;
bool allow_precision_qualifiers = false;
bool can_swizzle_scalar = false;
bool force_temp_use_for_two_vector_shuffles = false;
} backend;
void emit_struct(SPIRType &type);
@ -371,9 +373,10 @@ protected:
SPIRExpression &emit_op(uint32_t result_type, uint32_t result_id, const std::string &rhs, bool forward_rhs,
bool suppress_usage_tracking = false);
std::string access_chain_internal(uint32_t base, const uint32_t *indices, uint32_t count, bool index_is_literal,
bool chain_only = false, bool *need_transpose = nullptr);
bool chain_only = false, bool *need_transpose = nullptr,
bool *result_is_packed = nullptr);
std::string access_chain(uint32_t base, const uint32_t *indices, uint32_t count, const SPIRType &target_type,
bool *need_transpose = nullptr);
bool *need_transpose = nullptr, bool *result_is_packed = nullptr);
std::string flattened_access_chain(uint32_t base, const uint32_t *indices, uint32_t count,
const SPIRType &target_type, uint32_t offset, uint32_t matrix_stride,

View File

@ -57,6 +57,21 @@ string CompilerMSL::compile()
// Force a classic "C" locale, reverts when function returns
ClassicLocale classic_locale;
// Do not deal with GLES-isms like precision, older extensions and such.
CompilerGLSL::options.vulkan_semantics = true;
CompilerGLSL::options.es = false;
CompilerGLSL::options.version = 120;
backend.float_literal_suffix = false;
backend.uint32_t_literal_suffix = true;
backend.basic_int_type = "int";
backend.basic_uint_type = "uint";
backend.discard_literal = "discard_fragment()";
backend.swizzle_is_function = false;
backend.shared_is_implied = false;
backend.native_row_major_matrix = false;
backend.flexible_member_array_supported = false;
backend.force_temp_use_for_two_vector_shuffles = true;
replace_illegal_names();
non_stage_in_input_var_ids.clear();
@ -88,20 +103,6 @@ string CompilerMSL::compile()
if (options.resolve_specialized_array_lengths)
resolve_specialized_array_lengths();
// Do not deal with GLES-isms like precision, older extensions and such.
CompilerGLSL::options.vulkan_semantics = true;
CompilerGLSL::options.es = false;
CompilerGLSL::options.version = 120;
backend.float_literal_suffix = false;
backend.uint32_t_literal_suffix = true;
backend.basic_int_type = "int";
backend.basic_uint_type = "uint";
backend.discard_literal = "discard_fragment()";
backend.swizzle_is_function = false;
backend.shared_is_implied = false;
backend.native_row_major_matrix = false;
backend.flexible_member_array_supported = false;
uint32_t pass_count = 0;
do
{
@ -279,6 +280,15 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std::
}
case OpFunctionCall:
{
// First see if any of the function call args are globals
for (uint32_t arg_idx = 3; arg_idx < i.length; arg_idx++)
{
uint32_t arg_id = ops[arg_idx];
if (global_var_ids.find(arg_id) != global_var_ids.end())
added_arg_ids.insert(arg_id);
}
// Then recurse into the function itself to extract globals used internally in the function
uint32_t inner_func_id = ops[2];
std::set<uint32_t> inner_func_args;
extract_global_variables_from_function(inner_func_id, inner_func_args, global_var_ids,
@ -306,12 +316,10 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std::
func.add_parameter(type_id, next_id, true);
set<SPIRVariable>(next_id, type_id, StorageClassFunction, 0, arg_id);
// Ensure both the existing and new variables have the same name, and the name is valid
string vld_name = ensure_valid_name(to_name(arg_id), "v");
set_name(arg_id, vld_name);
set_name(next_id, vld_name);
// Ensure the existing variable has a valid name and the new variable has all the same meta info
set_name(arg_id, ensure_valid_name(to_name(arg_id), "v"));
meta[next_id] = meta[arg_id];
meta[next_id].decoration.qualified_alias = meta[arg_id].decoration.qualified_alias;
next_id++;
}
}
@ -811,8 +819,8 @@ string CompilerMSL::unpack_expression_type(string expr_str, const SPIRType &type
// Emits the file header info
void CompilerMSL::emit_header()
{
for (auto &header : pragma_lines)
statement(header);
for (auto &pragma : pragma_lines)
statement(pragma);
if (!pragma_lines.empty())
statement("");
@ -830,7 +838,7 @@ void CompilerMSL::emit_header()
void CompilerMSL::add_pragma_line(const string &line)
{
pragma_lines.push_back(line);
pragma_lines.insert(line);
}
// Emits any needed custom function bodies.
@ -1041,6 +1049,64 @@ void CompilerMSL::emit_custom_functions()
statement("");
break;
case SPVFuncImplRowMajor2x3:
statement("// Implementation of a conversion of matrix content from RowMajor to ColumnMajor organization.");
statement("float2x3 spvConvertFromRowMajor2x3(float2x3 m)");
begin_scope();
statement("return float2x3(float3(m[0][0], m[0][2], m[1][1]), float3(m[0][1], m[1][0], m[1][2]));");
end_scope();
statement("");
break;
case SPVFuncImplRowMajor2x4:
statement("// Implementation of a conversion of matrix content from RowMajor to ColumnMajor organization.");
statement("float2x4 spvConvertFromRowMajor2x4(float2x4 m)");
begin_scope();
statement("return float2x4(float4(m[0][0], m[0][2], m[1][0], m[1][2]), float4(m[0][1], m[0][3], m[1][1], "
"m[1][3]));");
end_scope();
statement("");
break;
case SPVFuncImplRowMajor3x2:
statement("// Implementation of a conversion of matrix content from RowMajor to ColumnMajor organization.");
statement("float3x2 spvConvertFromRowMajor3x2(float3x2 m)");
begin_scope();
statement("return float3x2(float2(m[0][0], m[1][1]), float2(m[0][1], m[2][0]), float2(m[1][0], m[2][1]));");
end_scope();
statement("");
break;
case SPVFuncImplRowMajor3x4:
statement("// Implementation of a conversion of matrix content from RowMajor to ColumnMajor organization.");
statement("float3x4 spvConvertFromRowMajor3x4(float3x4 m)");
begin_scope();
statement("return float3x4(float4(m[0][0], m[0][3], m[1][2], m[2][1]), float4(m[0][1], m[1][0], m[1][3], "
"m[2][2]), float4(m[0][2], m[1][1], m[2][0], m[2][3]));");
end_scope();
statement("");
break;
case SPVFuncImplRowMajor4x2:
statement("// Implementation of a conversion of matrix content from RowMajor to ColumnMajor organization.");
statement("float4x2 spvConvertFromRowMajor4x2(float4x2 m)");
begin_scope();
statement("return float4x2(float2(m[0][0], m[2][0]), float2(m[0][1], m[2][1]), float2(m[1][0], m[3][0]), "
"float2(m[1][1], m[3][1]));");
end_scope();
statement("");
break;
case SPVFuncImplRowMajor4x3:
statement("// Implementation of a conversion of matrix content from RowMajor to ColumnMajor organization.");
statement("float4x3 spvConvertFromRowMajor4x3(float4x3 m)");
begin_scope();
statement("return float4x3(float3(m[0][0], m[1][1], m[2][2]), float3(m[0][1], m[1][2], m[3][0]), "
"float3(m[0][2], m[2][0], m[3][1]), float3(m[1][0], m[2][1], m[3][2]));");
end_scope();
statement("");
break;
default:
break;
}
@ -1541,6 +1607,24 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
emit_barrier(ops[0], ops[1], ops[2]);
break;
case OpVectorTimesMatrix:
case OpMatrixTimesVector:
{
// If the matrix needs transpose and it is square, just flip the multiply order.
uint32_t mtx_id = ops[opcode == OpMatrixTimesVector ? 2 : 3];
auto *e = maybe_get<SPIRExpression>(mtx_id);
auto &t = expression_type(mtx_id);
if (e && e->need_transpose && t.columns == t.vecsize)
{
e->need_transpose = false;
emit_binary_op(ops[0], ops[1], ops[3], ops[2], "*");
e->need_transpose = true;
}
else
BOP(*);
break;
}
// OpOuterProduct
default:
@ -1559,34 +1643,18 @@ void CompilerMSL::emit_barrier(uint32_t id_exe_scope, uint32_t id_mem_scope, uin
string bar_stmt = "threadgroup_barrier(mem_flags::";
uint32_t mem_sem = id_mem_sem ? get<SPIRConstant>(id_mem_sem).scalar() : uint32_t(MemorySemanticsMaskNone);
switch (mem_sem)
{
case MemorySemanticsCrossWorkgroupMemoryMask:
if (mem_sem & MemorySemanticsCrossWorkgroupMemoryMask)
bar_stmt += "mem_device";
break;
case MemorySemanticsSubgroupMemoryMask:
case MemorySemanticsWorkgroupMemoryMask:
case MemorySemanticsAtomicCounterMemoryMask:
else if (mem_sem & (MemorySemanticsSubgroupMemoryMask | MemorySemanticsWorkgroupMemoryMask |
MemorySemanticsAtomicCounterMemoryMask))
bar_stmt += "mem_threadgroup";
break;
case MemorySemanticsImageMemoryMask:
else if (mem_sem & MemorySemanticsImageMemoryMask)
bar_stmt += "mem_texture";
break;
case MemorySemanticsAcquireMask:
case MemorySemanticsReleaseMask:
case MemorySemanticsAcquireReleaseMask:
case MemorySemanticsSequentiallyConsistentMask:
case MemorySemanticsUniformMemoryMask:
case MemorySemanticsMaskNone:
default:
else
bar_stmt += "mem_none";
break;
}
if (options.supports_msl_version(2))
if (options.is_ios() && options.supports_msl_version(2))
{
bar_stmt += ", ";
@ -1919,7 +1987,7 @@ void CompilerMSL::emit_function_prototype(SPIRFunction &func, uint64_t)
// Manufacture automatic sampler arg for SampledImage texture
auto &arg_type = get<SPIRType>(arg.type);
if (arg_type.basetype == SPIRType::SampledImage)
if (arg_type.basetype == SPIRType::SampledImage && arg_type.image.dim != DimBuffer)
decl += ", thread const sampler& " + to_sampler_expression(arg.id);
if (&arg != &func.arguments.back())
@ -2213,7 +2281,7 @@ string CompilerMSL::to_func_call_arg(uint32_t id)
{
auto &var = id_v.get<SPIRVariable>();
auto &type = get<SPIRType>(var.basetype);
if (type.basetype == SPIRType::SampledImage)
if (type.basetype == SPIRType::SampledImage && type.image.dim != DimBuffer)
arg_str += ", " + to_sampler_expression(id);
}
@ -2229,6 +2297,84 @@ string CompilerMSL::to_sampler_expression(uint32_t id)
return samp_id ? to_expression(samp_id) : to_expression(id) + sampler_name_suffix;
}
// Checks whether the ID is a row_major matrix that requires conversion before use
bool CompilerMSL::is_non_native_row_major_matrix(uint32_t id)
{
// Natively supported row-major matrices do not need to be converted.
if (backend.native_row_major_matrix)
return false;
// Non-matrix or column-major matrix types do not need to be converted.
if (!(meta[id].decoration.decoration_flags & (1ull << DecorationRowMajor)))
return false;
// Generate a function that will swap matrix elements from row-major to column-major.
const auto type = expression_type(id);
add_convert_row_major_matrix_function(type.columns, type.vecsize);
return true;
}
// Checks whether the member is a row_major matrix that requires conversion before use
bool CompilerMSL::member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index)
{
// Natively supported row-major matrices do not need to be converted.
if (backend.native_row_major_matrix)
return false;
// Non-matrix or column-major matrix types do not need to be converted.
if (!(combined_decoration_for_member(type, index) & (1ull << DecorationRowMajor)))
return false;
// Generate a function that will swap matrix elements from row-major to column-major.
const auto mbr_type = get<SPIRType>(type.member_types[index]);
add_convert_row_major_matrix_function(mbr_type.columns, mbr_type.vecsize);
return true;
}
// Adds a function suitable for converting a non-square row-major matrix to a column-major matrix.
void CompilerMSL::add_convert_row_major_matrix_function(uint32_t cols, uint32_t rows)
{
SPVFuncImpl spv_func;
if (cols == rows) // Square matrix...just use transpose() function
return;
else if (cols == 2 && rows == 3)
spv_func = SPVFuncImplRowMajor2x3;
else if (cols == 2 && rows == 4)
spv_func = SPVFuncImplRowMajor2x4;
else if (cols == 3 && rows == 2)
spv_func = SPVFuncImplRowMajor3x2;
else if (cols == 3 && rows == 4)
spv_func = SPVFuncImplRowMajor3x4;
else if (cols == 4 && rows == 2)
spv_func = SPVFuncImplRowMajor4x2;
else if (cols == 4 && rows == 3)
spv_func = SPVFuncImplRowMajor4x3;
else
SPIRV_CROSS_THROW("Could not convert row-major matrix.");
auto rslt = spv_function_implementations.insert(spv_func);
if (rslt.second)
{
add_pragma_line("#pragma clang diagnostic ignored \"-Wmissing-prototypes\"");
force_recompile = true;
}
}
// Wraps the expression string in a function call that converts the
// row_major matrix result of the expression to a column_major matrix.
string CompilerMSL::convert_row_major_matrix(string exp_str, const SPIRType &exp_type)
{
strip_enclosed_expression(exp_str);
string func_name;
if (exp_type.columns == exp_type.vecsize)
func_name = "transpose";
else
func_name = string("spvConvertFromRowMajor") + to_string(exp_type.columns) + "x" + to_string(exp_type.vecsize);
return join(func_name, "(", exp_str, ")");
}
// Called automatically at the end of the entry point function
void CompilerMSL::emit_fixup()
{
@ -2237,10 +2383,8 @@ void CompilerMSL::emit_fixup()
if ((execution.model == ExecutionModelVertex) && stage_out_var_id && !qual_pos_var_name.empty())
{
if (CompilerGLSL::options.vertex.fixup_clipspace)
{
statement(qual_pos_var_name, ".z = (", qual_pos_var_name, ".z + ", qual_pos_var_name,
".w) * 0.5; // Adjust clip-space for Metal");
}
if (CompilerGLSL::options.vertex.flip_vert_y)
statement(qual_pos_var_name, ".y = -(", qual_pos_var_name, ".y);", " // Invert Y-axis for Metal");
@ -2599,6 +2743,7 @@ string CompilerMSL::entry_point_args(bool append_comma)
{
if (!ep_args.empty())
ep_args += ", ";
BuiltIn bi_type = meta[var_id].decoration.builtin_type;
ep_args += builtin_type_decl(bi_type) + " " + to_expression(var_id);
ep_args += " [[" + builtin_qualifier(bi_type) + "]]";
@ -2682,7 +2827,10 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg)
if (constref)
decl += "const ";
decl += type_to_glsl(type, arg.id);
if (is_builtin_variable(var))
decl += builtin_type_decl((BuiltIn)get_decoration(arg.id, DecorationBuiltIn));
else
decl += type_to_glsl(type, arg.id);
if (is_array(type))
decl += "*";
@ -3298,9 +3446,35 @@ bool CompilerMSL::OpCodePreprocessor::handle(Op opcode, const uint32_t *args, ui
break;
}
// Keep track of the instruction return types, mapped by ID
if (length > 1)
result_types[args[1]] = args[0];
// If it has one, keep track of the instruction's result type, mapped by ID
switch (opcode)
{
case OpStore:
case OpCopyMemory:
case OpCopyMemorySized:
case OpImageWrite:
case OpAtomicStore:
case OpAtomicFlagClear:
case OpEmitStreamVertex:
case OpEndStreamPrimitive:
case OpControlBarrier:
case OpMemoryBarrier:
case OpGroupWaitEvents:
case OpRetainEvent:
case OpReleaseEvent:
case OpSetUserEventStatus:
case OpCaptureEventProfilingInfo:
case OpCommitReadPipe:
case OpCommitWritePipe:
case OpGroupCommitReadPipe:
case OpGroupCommitWritePipe:
break;
default:
if (length > 1)
result_types[args[1]] = args[0];
break;
}
return true;
}

View File

@ -77,10 +77,26 @@ public:
// Options for compiling to Metal Shading Language
struct Options
{
typedef enum {
iOS,
macOS,
} Platform;
Platform platform = macOS;
uint32_t msl_version = make_msl_version(1, 2);
bool enable_point_size_builtin = true;
bool resolve_specialized_array_lengths = true;
bool is_ios()
{
return platform == iOS;
}
bool is_macos()
{
return platform == macOS;
}
void set_msl_version(uint32_t major, uint32_t minor = 0, uint32_t patch = 0)
{
msl_version = make_msl_version(major, minor, patch);
@ -122,6 +138,12 @@ public:
SPVFuncImplInverse2x2,
SPVFuncImplInverse3x3,
SPVFuncImplInverse4x4,
SPVFuncImplRowMajor2x3,
SPVFuncImplRowMajor2x4,
SPVFuncImplRowMajor3x2,
SPVFuncImplRowMajor3x4,
SPVFuncImplRowMajor4x2,
SPVFuncImplRowMajor4x3,
};
// Constructs an instance to compile the SPIR-V code into Metal Shading Language,
@ -186,6 +208,9 @@ protected:
std::string to_qualifiers_glsl(uint32_t id) override;
void replace_illegal_names() override;
void declare_undefined_values() override;
bool is_non_native_row_major_matrix(uint32_t id) override;
bool member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index) override;
std::string convert_row_major_matrix(std::string exp_str, const SPIRType &exp_type) override;
void preprocess_op_codes();
void localize_global_variables();
@ -207,6 +232,7 @@ protected:
void emit_interface_block(uint32_t ib_var_id);
bool maybe_emit_input_struct_assignment(uint32_t id_lhs, uint32_t id_rhs);
bool maybe_emit_array_assignment(uint32_t id_lhs, uint32_t id_rhs);
void add_convert_row_major_matrix_function(uint32_t cols, uint32_t rows);
std::string func_type_decl(SPIRType &type);
std::string entry_point_args(bool append_comma);
@ -245,7 +271,7 @@ protected:
std::unordered_map<uint32_t, MSLVertexAttr *> vtx_attrs_by_location;
std::map<uint32_t, uint32_t> non_stage_in_input_var_ids;
std::unordered_map<MSLStructMemberKey, uint32_t> struct_member_padding;
std::vector<std::string> pragma_lines;
std::set<std::string> pragma_lines;
std::vector<MSLResourceBinding *> resource_bindings;
MSLResourceBinding next_metal_resource_index;
uint32_t stage_in_var_id = 0;

View File

@ -358,11 +358,20 @@ def test_shader_msl(stats, shader, update, keep, opt):
noopt = shader_is_noopt(shader[1])
spirv, msl = cross_compile_msl(joined_path, is_spirv, opt and (not noopt))
regression_check(shader, msl, update, keep, opt)
os.remove(spirv)
# Uncomment the following line to print the temp SPIR-V file path.
# This temp SPIR-V file is not deleted until after the Metal validation step below.
# If Metal validation fails, the temp SPIR-V file can be copied out and
# used as input to an invocation of spirv-cross to debug from Xcode directly.
# To do so, build spriv-cross using `make DEBUG=1`, then run the spriv-cross
# executable from Xcode using args: `--msl --entry main --output msl_path spirv_path`.
# print('SPRIV shader: ' + spirv)
if not force_no_external_validation:
validate_shader_msl(shader, opt)
os.remove(spirv)
def test_shader_hlsl(stats, shader, update, keep, opt):
joined_path = os.path.join(shader[0], shader[1])
print('Testing HLSL shader:', joined_path)

View File

@ -11,6 +11,7 @@ echo "Using spirv-opt in: $(which spirv-opt)."
./test_shaders.py shaders --opt || exit 1
./test_shaders.py shaders-msl --msl || exit 1
./test_shaders.py shaders-msl --msl --opt || exit 1
./test_shaders.py shaders-msl-no-opt --msl || exit 1
./test_shaders.py shaders-hlsl --hlsl || exit 1
./test_shaders.py shaders-hlsl --hlsl --opt || exit 1

View File

@ -11,6 +11,7 @@ echo "Using spirv-opt in: $(which spirv-opt)."
./test_shaders.py shaders --update --opt || exit 1
./test_shaders.py shaders-msl --msl --update || exit 1
./test_shaders.py shaders-msl --msl --update --opt || exit 1
./test_shaders.py shaders-msl-no-opt --msl --update || exit 1
./test_shaders.py shaders-hlsl --hlsl --update || exit 1
./test_shaders.py shaders-hlsl --hlsl --update --opt || exit 1