CompilerMSL support smaller offsets for 3-row row-major matrices.

Support MSL typedefs to declare 3-row row-major matrices as 3-column matrices.
Allow those matrices to be decorated as packed.
Support transposing those matrices when used.
Modify how member alignments are calculated.
This commit is contained in:
Bill Hollings 2018-02-11 16:52:57 -05:00
parent 8b53b70a56
commit 607b0d6d42
10 changed files with 293 additions and 90 deletions

View File

@ -3,6 +3,8 @@
using namespace metal;
typedef float3x2 packed_float2x3;
struct S0
{
float2 a[1];
@ -58,8 +60,10 @@ struct SSBO1
float3x2 m3;
float2x2 m4;
float2x2 m5[9];
float2x3 m6[4][2];
packed_float2x3 m6[4][2];
char pad10[8];
float3x2 m7;
char pad11[8];
float array[1];
};
@ -96,5 +100,6 @@ kernel void main0(device SSBO1& ssbo_430 [[buffer(0)]], device SSBO0& ssbo_140 [
ssbo_430.content.m3s[5].c = ssbo_140.content.m3s[5].c;
ssbo_430.content.m3s[6].c = ssbo_140.content.m3s[6].c;
ssbo_430.content.m3s[7].c = ssbo_140.content.m3s[7].c;
ssbo_430.content.m1.a = ssbo_430.content.m3.a * ssbo_430.m6[1][1];
}

View File

@ -0,0 +1,56 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
typedef float3x4 packed_float4x3;
struct _15
{
packed_float4x3 _m0;
packed_float4x3 _m1;
};
struct _42
{
float4x4 _m0;
float4x4 _m1;
float _m2;
char pad3[12];
packed_float3 _m3;
float _m4;
packed_float3 _m5;
float _m6;
float _m7;
float _m8;
float2 _m9;
};
struct main0_in
{
float4 m_25 [[attribute(0)]];
};
struct main0_out
{
float3 m_72 [[user(locn0)]];
float4 gl_Position [[position]];
};
vertex main0_out main0(main0_in in [[stage_in]], constant _15& _17 [[buffer(0)]], constant _42& _44 [[buffer(1)]])
{
main0_out out = {};
float3 _34;
do
{
_34 = normalize(float4(in.m_25.xyz, 0.0) * _17._m1);
break;
} while (false);
float4 _70 = _44._m0 * float4(_44._m3 + (in.m_25.xyz * (_44._m6 + _44._m7)), 1.0);
out.m_72 = _34;
float4 _95 = _70;
_95.y = -_70.y;
out.gl_Position = _95;
return out;
}

View File

@ -3,6 +3,8 @@
using namespace metal;
typedef float3x2 packed_float2x3;
struct S0
{
float2 a[1];
@ -58,8 +60,10 @@ struct SSBO1
float3x2 m3;
float2x2 m4;
float2x2 m5[9];
float2x3 m6[4][2];
packed_float2x3 m6[4][2];
char pad10[8];
float3x2 m7;
char pad11[8];
float array[1];
};
@ -96,5 +100,6 @@ kernel void main0(device SSBO1& ssbo_430 [[buffer(0)]], device SSBO0& ssbo_140 [
ssbo_430.content.m3s[5].c = ssbo_140.content.m3s[5].c;
ssbo_430.content.m3s[6].c = ssbo_140.content.m3s[6].c;
ssbo_430.content.m3s[7].c = ssbo_140.content.m3s[7].c;
ssbo_430.content.m1.a = ssbo_430.content.m3.a * ssbo_430.m6[1][1];
}

View File

@ -0,0 +1,56 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
typedef float3x4 packed_float4x3;
struct _15
{
packed_float4x3 _m0;
packed_float4x3 _m1;
};
struct _42
{
float4x4 _m0;
float4x4 _m1;
float _m2;
char pad3[12];
packed_float3 _m3;
float _m4;
packed_float3 _m5;
float _m6;
float _m7;
float _m8;
float2 _m9;
};
struct main0_in
{
float4 m_25 [[attribute(0)]];
};
struct main0_out
{
float3 m_72 [[user(locn0)]];
float4 gl_Position [[position]];
};
vertex main0_out main0(main0_in in [[stage_in]], constant _15& _17 [[buffer(0)]], constant _42& _44 [[buffer(1)]])
{
main0_out out = {};
float3 _13;
do
{
_13 = normalize(float4(in.m_25.xyz, 0.0) * _17._m1);
break;
} while (false);
float4 _39 = _44._m0 * float4(_44._m3 + (in.m_25.xyz * (_44._m6 + _44._m7)), 1.0);
out.m_72 = _13;
float4 _74 = _39;
_74.y = -_39.y;
out.gl_Position = _74;
return out;
}

View File

@ -72,5 +72,6 @@ layout(binding = 0, std140) buffer SSBO0
void main()
{
ssbo_430.content = ssbo_140.content;
ssbo_430.content.m1.a = ssbo_430.m6[1][1] * ssbo_430.content.m3.a; // test packed matrix access
}

View File

@ -0,0 +1,41 @@
#version 450
layout(binding = 13, std140) uniform _1365_18812
{
layout(row_major) mat4x3 _m0;
layout(row_major) mat4x3 _m1;
} _18812;
layout(binding = 12, std140) uniform _1126_22044
{
layout(row_major) mat4 _m0;
layout(row_major) mat4 _m1;
float _m9;
vec3 _m10;
float _m11;
vec3 _m12;
float _m17;
float _m18;
float _m19;
vec2 _m20;
} _22044;
layout(location = 0) out vec3 _3976;
layout(location = 0) in vec4 _5275;
vec3 _2;
void main()
{
vec3 _23783;
do
{
_23783 = normalize(_18812._m1 * vec4(_5275.xyz, 0.0));
break;
} while (false);
vec4 _14995 = vec4(_22044._m10 + (_5275.xyz * (_22044._m17 + _22044._m18)), 1.0) * _22044._m0;
_3976 = _23783;
vec4 _6282 = _14995;
_6282.y = -_14995.y;
gl_Position = _6282;
}

View File

@ -2238,7 +2238,10 @@ string CompilerGLSL::to_expression(uint32_t id)
if (e.base_expression)
return to_enclosed_expression(e.base_expression) + e.expression;
else if (e.need_transpose)
return convert_row_major_matrix(e.expression, get<SPIRType>(e.expression_type));
{
bool is_packed = has_decoration(id, DecorationCPacked);
return convert_row_major_matrix(e.expression, get<SPIRType>(e.expression_type), is_packed);
}
else
{
if (force_recompile)
@ -4289,7 +4292,7 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
bool access_chain_is_arrayed = false;
bool row_major_matrix_needs_conversion = is_non_native_row_major_matrix(base);
bool vector_is_packed = false;
bool is_packed = false;
bool pending_array_enclose = false;
bool dimension_flatten = false;
@ -4421,7 +4424,7 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
}
}
vector_is_packed = member_is_packed_type(*type, index);
is_packed = member_is_packed_type(*type, index);
row_major_matrix_needs_conversion = member_is_non_native_row_major_matrix(*type, index);
type = &get<SPIRType>(type->member_types[index]);
}
@ -4430,8 +4433,9 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
{
if (row_major_matrix_needs_conversion)
{
expr = convert_row_major_matrix(expr, *type);
expr = convert_row_major_matrix(expr, *type, is_packed);
row_major_matrix_needs_conversion = false;
is_packed = false;
}
expr += "[";
@ -4447,10 +4451,10 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
// Vector -> Scalar
else if (type->vecsize > 1)
{
if (vector_is_packed)
if (is_packed)
{
expr = unpack_expression_type(expr, *type);
vector_is_packed = false;
is_packed = false;
}
if (index_is_literal)
@ -4489,7 +4493,7 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
*need_transpose = row_major_matrix_needs_conversion;
if (result_is_packed)
*result_is_packed = vector_is_packed;
*result_is_packed = is_packed;
return expr;
}
@ -4620,7 +4624,7 @@ std::string CompilerGLSL::flattened_access_chain_struct(uint32_t base, const uin
// Cannot forward transpositions, so resolve them here.
if (need_transpose)
expr += convert_row_major_matrix(tmp, member_type);
expr += convert_row_major_matrix(tmp, member_type, false);
else
expr += tmp;
}
@ -7050,7 +7054,7 @@ bool CompilerGLSL::member_is_packed_type(const SPIRType &type, uint32_t index) c
// row_major matrix result of the expression to a column_major matrix.
// Base implementation uses the standard library transpose() function.
// Subclasses may override to use a different function.
string CompilerGLSL::convert_row_major_matrix(string exp_str, const SPIRType & /*exp_type*/)
string CompilerGLSL::convert_row_major_matrix(string exp_str, const SPIRType & /*exp_type*/, bool /*is_packed*/)
{
strip_enclosed_expression(exp_str);
return join("transpose(", exp_str, ")");

View File

@ -298,7 +298,7 @@ protected:
virtual bool is_non_native_row_major_matrix(uint32_t id);
virtual bool member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index);
bool member_is_packed_type(const SPIRType &type, uint32_t index) const;
virtual std::string convert_row_major_matrix(std::string exp_str, const SPIRType &exp_type);
virtual std::string convert_row_major_matrix(std::string exp_str, const SPIRType &exp_type, bool is_packed);
std::unordered_set<std::string> local_variable_names;
std::unordered_set<std::string> resource_names;

View File

@ -744,29 +744,9 @@ void CompilerMSL::align_struct(SPIRType &ib_type)
curr_offset = 0;
for (uint32_t mbr_idx = 0; mbr_idx < mbr_cnt; mbr_idx++)
{
// Align current offset to the current member's default alignment.
size_t align_mask = get_declared_struct_member_alignment(ib_type, mbr_idx) - 1;
curr_offset = uint32_t((curr_offset + align_mask) & ~align_mask);
if (is_member_packable(ib_type, mbr_idx))
set_member_decoration(ib_type_id, mbr_idx, DecorationCPacked);
// Fetch the member offset as declared in the SPIRV.
uint32_t mbr_offset = get_member_decoration(ib_type_id, mbr_idx, DecorationOffset);
if (curr_offset > mbr_offset)
{
uint32_t prev_mbr_idx = mbr_idx - 1;
if (is_member_packable(ib_type, prev_mbr_idx))
set_member_decoration(ib_type_id, prev_mbr_idx, DecorationCPacked);
}
// Increment the current offset to be positioned immediately after the current member.
curr_offset = mbr_offset + uint32_t(get_declared_struct_member_size(ib_type, mbr_idx));
}
// Test the alignment of each member, and if a member is positioned farther than its
// alignment and the end of the previous member, add a dummy padding member that will
// be added before the current member when the delaration of this struct is emitted.
curr_offset = 0;
for (uint32_t mbr_idx = 0; mbr_idx < mbr_cnt; mbr_idx++)
{
// Align current offset to the current member's default alignment.
size_t align_mask = get_declared_struct_member_alignment(ib_type, mbr_idx) - 1;
curr_offset = uint32_t((curr_offset + align_mask) & ~align_mask);
@ -791,14 +771,48 @@ void CompilerMSL::align_struct(SPIRType &ib_type)
// variation that is smaller than the unpacked variation of that type.
bool CompilerMSL::is_member_packable(SPIRType &ib_type, uint32_t index)
{
uint32_t mbr_type_id = ib_type.member_types[index];
auto &mbr_type = get<SPIRType>(mbr_type_id);
// 3-element vectors (char3, uchar3, short3, ushort3, int3, uint3, half3, float3)
if (mbr_type.vecsize == 3 && mbr_type.columns == 1)
// We've already marked it as packable
if (has_member_decoration(ib_type.self, index, DecorationCPacked))
return true;
return false;
auto &mbr_type = get<SPIRType>(ib_type.member_types[index]);
// Only 3-element vectors or 3-row matrices need to be packed.
if (mbr_type.vecsize != 3)
return false;
// Only row-major matrices need to be packed.
if (is_matrix(mbr_type) && !has_member_decoration(ib_type.self, index, DecorationRowMajor))
return false;
uint32_t component_size = mbr_type.width / 8;
uint32_t unpacked_mbr_size = component_size * (mbr_type.vecsize + 1) * mbr_type.columns;
if (is_array(mbr_type))
{
// If member is an array, and the array stride is larger than the type needs, don't pack it.
// Take into consideration multi-dimentional arrays.
uint32_t md_elem_cnt = 1;
size_t last_elem_idx = mbr_type.array.size() - 1;
for (uint32_t i = 0; i < last_elem_idx; i++)
md_elem_cnt *= max(to_array_size_literal(mbr_type, i), 1U);
uint32_t unpacked_array_stride = unpacked_mbr_size * md_elem_cnt;
uint32_t array_stride = type_struct_member_array_stride(ib_type, index);
return unpacked_array_stride > array_stride;
}
else
{
// Pack if there is not enough space between this member and next.
// If last member, only pack if it's a row-major matrix.
if (index < ib_type.member_types.size() - 1)
{
uint32_t mbr_offset_curr = get_member_decoration(ib_type.self, index, DecorationOffset);
uint32_t mbr_offset_next = get_member_decoration(ib_type.self, index + 1, DecorationOffset);
return unpacked_mbr_size > mbr_offset_next - mbr_offset_curr;
}
else
return is_matrix(mbr_type);
}
}
// Returns a combination of type ID and member index for use as hash key
@ -835,11 +849,26 @@ void CompilerMSL::emit_header()
statement("");
statement("using namespace metal;");
statement("");
for (auto &td : typedef_lines)
statement(td);
if (!typedef_lines.empty())
statement("");
}
void CompilerMSL::add_pragma_line(const string &line)
{
pragma_lines.insert(line);
auto rslt = pragma_lines.insert(line);
if (rslt.second)
force_recompile = true;
}
void CompilerMSL::add_typedef_line(const string &line)
{
auto rslt = typedef_lines.insert(line);
if (rslt.second)
force_recompile = true;
}
// Emits any needed custom function bodies.
@ -1611,11 +1640,12 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
case OpVectorTimesMatrix:
case OpMatrixTimesVector:
{
// If the matrix needs transpose and it is square, just flip the multiply order.
// If the matrix needs transpose and it is square or packed, just flip the multiply order.
uint32_t mtx_id = ops[opcode == OpMatrixTimesVector ? 2 : 3];
auto *e = maybe_get<SPIRExpression>(mtx_id);
auto &t = expression_type(mtx_id);
if (e && e->need_transpose && t.columns == t.vecsize)
bool is_packed = has_decoration(mtx_id, DecorationCPacked);
if (e && e->need_transpose && (t.columns == t.vecsize || is_packed))
{
e->need_transpose = false;
emit_binary_op(ops[0], ops[1], ops[3], ops[2], "*");
@ -2310,8 +2340,13 @@ bool CompilerMSL::is_non_native_row_major_matrix(uint32_t id)
return false;
// Generate a function that will swap matrix elements from row-major to column-major.
const auto type = expression_type(id);
add_convert_row_major_matrix_function(type.columns, type.vecsize);
// Packed row-matrix should just use transpose() function.
if (!has_decoration(id, DecorationCPacked))
{
const auto type = expression_type(id);
add_convert_row_major_matrix_function(type.columns, type.vecsize);
}
return true;
}
@ -2323,12 +2358,17 @@ bool CompilerMSL::member_is_non_native_row_major_matrix(const SPIRType &type, ui
return false;
// Non-matrix or column-major matrix types do not need to be converted.
if (!(combined_decoration_for_member(type, index) & (1ull << DecorationRowMajor)))
if (!has_member_decoration(type.self, index, DecorationRowMajor))
return false;
// Generate a function that will swap matrix elements from row-major to column-major.
const auto mbr_type = get<SPIRType>(type.member_types[index]);
add_convert_row_major_matrix_function(mbr_type.columns, mbr_type.vecsize);
// Packed row-matrix should just use transpose() function.
if (!has_member_decoration(type.self, index, DecorationCPacked))
{
const auto mbr_type = get<SPIRType>(type.member_types[index]);
add_convert_row_major_matrix_function(mbr_type.columns, mbr_type.vecsize);
}
return true;
}
@ -2355,20 +2395,19 @@ void CompilerMSL::add_convert_row_major_matrix_function(uint32_t cols, uint32_t
auto rslt = spv_function_implementations.insert(spv_func);
if (rslt.second)
{
add_pragma_line("#pragma clang diagnostic ignored \"-Wmissing-prototypes\"");
force_recompile = true;
}
}
// Wraps the expression string in a function call that converts the
// row_major matrix result of the expression to a column_major matrix.
string CompilerMSL::convert_row_major_matrix(string exp_str, const SPIRType &exp_type)
string CompilerMSL::convert_row_major_matrix(string exp_str, const SPIRType &exp_type, bool is_packed)
{
strip_enclosed_expression(exp_str);
string func_name;
if (exp_type.columns == exp_type.vecsize)
// Square and packed matrices can just use transpose
if (exp_type.columns == exp_type.vecsize || is_packed)
func_name = "transpose";
else
func_name = string("spvConvertFromRowMajor") + to_string(exp_type.columns) + "x" + to_string(exp_type.vecsize);
@ -2405,7 +2444,23 @@ void CompilerMSL::emit_struct_member(const SPIRType &type, uint32_t member_type_
statement("char pad", to_string(index), "[", to_string(pad_len), "];");
// If this member is packed, mark it as so.
string pack_pfx = member_is_packed_type(type, index) ? "packed_" : "";
string pack_pfx = "";
if (member_is_packed_type(type, index))
{
pack_pfx = "packed_";
// If we're packing a matrix, output an appropriate typedef
if (membertype.vecsize > 1 && membertype.columns > 1)
{
string base_type = membertype.width == 16 ? "half" : "float";
string td_line = "typedef ";
td_line += base_type + to_string(membertype.vecsize) + "x" + to_string(membertype.columns);
td_line += " " + pack_pfx;
td_line += base_type + to_string(membertype.columns) + "x" + to_string(membertype.vecsize);
td_line += ";";
add_typedef_line(td_line);
}
}
statement(pack_pfx, type_to_glsl(membertype), " ", qualifier, to_member_name(type, index),
member_attribute_qualifier(type, index), type_to_array_glsl(membertype), ";");
@ -3369,7 +3424,6 @@ string CompilerMSL::built_in_func_arg(BuiltIn builtin, bool prefix_comma)
// Returns the byte size of a struct member.
size_t CompilerMSL::get_declared_struct_member_size(const SPIRType &struct_type, uint32_t index) const
{
auto dec_mask = get_member_decoration_mask(struct_type.self, index);
auto &type = get<SPIRType>(struct_type.member_types[index]);
switch (type.basetype)
@ -3384,10 +3438,6 @@ size_t CompilerMSL::get_declared_struct_member_size(const SPIRType &struct_type,
default:
{
size_t component_size = type.width / 8;
unsigned vecsize = type.vecsize;
unsigned columns = type.columns;
// For arrays, we can use ArrayStride to get an easy check.
// Runtime arrays will have zero size so force to min of one.
if (!type.array.empty())
@ -3396,29 +3446,15 @@ size_t CompilerMSL::get_declared_struct_member_size(const SPIRType &struct_type,
if (type.basetype == SPIRType::Struct)
return get_declared_struct_size(type);
if (columns == 1) // An unpacked 3-element vector is the same size as a 4-element vector.
{
if (!(dec_mask & (1ull << DecorationCPacked)))
{
if (vecsize == 3)
vecsize = 4;
}
}
else // For matrices, a 3-element column is the same size as a 4-element column.
{
if (dec_mask & (1ull << DecorationColMajor))
{
if (vecsize == 3)
vecsize = 4;
}
else if (dec_mask & (1ull << DecorationRowMajor))
{
if (columns == 3)
columns = 4;
}
}
uint32_t component_size = type.width / 8;
uint32_t vecsize = type.vecsize;
uint32_t columns = type.columns;
return vecsize * columns * component_size;
// An unpacked 3-element vector or matrix column is the same memory size as a 4-element.
if (vecsize == 3 && !has_member_decoration(struct_type.self, index, DecorationCPacked))
vecsize = 4;
return component_size * vecsize * columns;
}
}
}
@ -3443,16 +3479,13 @@ size_t CompilerMSL::get_declared_struct_member_alignment(const SPIRType &struct_
default:
{
// Alignment of packed type is the same as the underlying component size.
// Alignment of unpacked type is the same as the type size (or one matrix column).
// Alignment of packed type is the same as the underlying component or column size.
// Alignment of unpacked type is the same as the vector size.
// Alignment of 3-elements vector is the same as 4-elements (including packed using column).
if (member_is_packed_type(struct_type, index))
return type.width / 8;
return (type.width / 8) * (type.columns == 3 ? 4 : type.columns);
else
{
// Divide by array size and colum count. Runtime arrays will have zero size so force to min of one.
uint32_t array_size = type.array.empty() ? 1 : max(type.array.back(), 1U);
return get_declared_struct_member_size(struct_type, index) / (type.columns * array_size);
}
return (type.width / 8) * (type.vecsize == 3 ? 4 : type.vecsize);
}
}
}

View File

@ -210,7 +210,7 @@ protected:
void declare_undefined_values() override;
bool is_non_native_row_major_matrix(uint32_t id) override;
bool member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index) override;
std::string convert_row_major_matrix(std::string exp_str, const SPIRType &exp_type) override;
std::string convert_row_major_matrix(std::string exp_str, const SPIRType &exp_type, bool is_packed) override;
void preprocess_op_codes();
void localize_global_variables();
@ -264,6 +264,7 @@ protected:
bool op1_is_pointer = false, uint32_t op2 = 0);
const char *get_memory_order(uint32_t spv_mem_sem);
void add_pragma_line(const std::string &line);
void add_typedef_line(const std::string &line);
void emit_barrier(uint32_t id_exe_scope, uint32_t id_mem_scope, uint32_t id_mem_sem);
Options options;
@ -272,6 +273,7 @@ protected:
std::map<uint32_t, uint32_t> non_stage_in_input_var_ids;
std::unordered_map<MSLStructMemberKey, uint32_t> struct_member_padding;
std::set<std::string> pragma_lines;
std::set<std::string> typedef_lines;
std::vector<MSLResourceBinding *> resource_bindings;
MSLResourceBinding next_metal_resource_index;
uint32_t stage_in_var_id = 0;