Merge pull request #448 from billhollings/master
MSL support for smaller offsets for 3-row row-major matrices (fix issue #436).
This commit is contained in:
commit
64d30836c8
@ -3,6 +3,8 @@
|
||||
|
||||
using namespace metal;
|
||||
|
||||
typedef float3x2 packed_float2x3;
|
||||
|
||||
struct S0
|
||||
{
|
||||
float2 a[1];
|
||||
@ -58,8 +60,10 @@ struct SSBO1
|
||||
float3x2 m3;
|
||||
float2x2 m4;
|
||||
float2x2 m5[9];
|
||||
float2x3 m6[4][2];
|
||||
packed_float2x3 m6[4][2];
|
||||
char pad10[8];
|
||||
float3x2 m7;
|
||||
char pad11[8];
|
||||
float array[1];
|
||||
};
|
||||
|
||||
@ -96,5 +100,6 @@ kernel void main0(device SSBO1& ssbo_430 [[buffer(0)]], device SSBO0& ssbo_140 [
|
||||
ssbo_430.content.m3s[5].c = ssbo_140.content.m3s[5].c;
|
||||
ssbo_430.content.m3s[6].c = ssbo_140.content.m3s[6].c;
|
||||
ssbo_430.content.m3s[7].c = ssbo_140.content.m3s[7].c;
|
||||
ssbo_430.content.m1.a = ssbo_430.content.m3.a * ssbo_430.m6[1][1];
|
||||
}
|
||||
|
||||
|
56
reference/opt/shaders-msl/vert/packed_matrix.vert
Normal file
56
reference/opt/shaders-msl/vert/packed_matrix.vert
Normal file
@ -0,0 +1,56 @@
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
typedef float3x4 packed_float4x3;
|
||||
|
||||
struct _15
|
||||
{
|
||||
packed_float4x3 _m0;
|
||||
packed_float4x3 _m1;
|
||||
};
|
||||
|
||||
struct _42
|
||||
{
|
||||
float4x4 _m0;
|
||||
float4x4 _m1;
|
||||
float _m2;
|
||||
char pad3[12];
|
||||
packed_float3 _m3;
|
||||
float _m4;
|
||||
packed_float3 _m5;
|
||||
float _m6;
|
||||
float _m7;
|
||||
float _m8;
|
||||
float2 _m9;
|
||||
};
|
||||
|
||||
struct main0_in
|
||||
{
|
||||
float4 m_25 [[attribute(0)]];
|
||||
};
|
||||
|
||||
struct main0_out
|
||||
{
|
||||
float3 m_72 [[user(locn0)]];
|
||||
float4 gl_Position [[position]];
|
||||
};
|
||||
|
||||
vertex main0_out main0(main0_in in [[stage_in]], constant _15& _17 [[buffer(0)]], constant _42& _44 [[buffer(1)]])
|
||||
{
|
||||
main0_out out = {};
|
||||
float3 _34;
|
||||
do
|
||||
{
|
||||
_34 = normalize(float4(in.m_25.xyz, 0.0) * _17._m1);
|
||||
break;
|
||||
} while (false);
|
||||
float4 _70 = _44._m0 * float4(_44._m3 + (in.m_25.xyz * (_44._m6 + _44._m7)), 1.0);
|
||||
out.m_72 = _34;
|
||||
float4 _95 = _70;
|
||||
_95.y = -_70.y;
|
||||
out.gl_Position = _95;
|
||||
return out;
|
||||
}
|
||||
|
@ -3,6 +3,8 @@
|
||||
|
||||
using namespace metal;
|
||||
|
||||
typedef float3x2 packed_float2x3;
|
||||
|
||||
struct S0
|
||||
{
|
||||
float2 a[1];
|
||||
@ -58,8 +60,10 @@ struct SSBO1
|
||||
float3x2 m3;
|
||||
float2x2 m4;
|
||||
float2x2 m5[9];
|
||||
float2x3 m6[4][2];
|
||||
packed_float2x3 m6[4][2];
|
||||
char pad10[8];
|
||||
float3x2 m7;
|
||||
char pad11[8];
|
||||
float array[1];
|
||||
};
|
||||
|
||||
@ -96,5 +100,6 @@ kernel void main0(device SSBO1& ssbo_430 [[buffer(0)]], device SSBO0& ssbo_140 [
|
||||
ssbo_430.content.m3s[5].c = ssbo_140.content.m3s[5].c;
|
||||
ssbo_430.content.m3s[6].c = ssbo_140.content.m3s[6].c;
|
||||
ssbo_430.content.m3s[7].c = ssbo_140.content.m3s[7].c;
|
||||
ssbo_430.content.m1.a = ssbo_430.content.m3.a * ssbo_430.m6[1][1];
|
||||
}
|
||||
|
||||
|
56
reference/shaders-msl/vert/packed_matrix.vert
Normal file
56
reference/shaders-msl/vert/packed_matrix.vert
Normal file
@ -0,0 +1,56 @@
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
typedef float3x4 packed_float4x3;
|
||||
|
||||
struct _15
|
||||
{
|
||||
packed_float4x3 _m0;
|
||||
packed_float4x3 _m1;
|
||||
};
|
||||
|
||||
struct _42
|
||||
{
|
||||
float4x4 _m0;
|
||||
float4x4 _m1;
|
||||
float _m2;
|
||||
char pad3[12];
|
||||
packed_float3 _m3;
|
||||
float _m4;
|
||||
packed_float3 _m5;
|
||||
float _m6;
|
||||
float _m7;
|
||||
float _m8;
|
||||
float2 _m9;
|
||||
};
|
||||
|
||||
struct main0_in
|
||||
{
|
||||
float4 m_25 [[attribute(0)]];
|
||||
};
|
||||
|
||||
struct main0_out
|
||||
{
|
||||
float3 m_72 [[user(locn0)]];
|
||||
float4 gl_Position [[position]];
|
||||
};
|
||||
|
||||
vertex main0_out main0(main0_in in [[stage_in]], constant _15& _17 [[buffer(0)]], constant _42& _44 [[buffer(1)]])
|
||||
{
|
||||
main0_out out = {};
|
||||
float3 _13;
|
||||
do
|
||||
{
|
||||
_13 = normalize(float4(in.m_25.xyz, 0.0) * _17._m1);
|
||||
break;
|
||||
} while (false);
|
||||
float4 _39 = _44._m0 * float4(_44._m3 + (in.m_25.xyz * (_44._m6 + _44._m7)), 1.0);
|
||||
out.m_72 = _13;
|
||||
float4 _74 = _39;
|
||||
_74.y = -_39.y;
|
||||
out.gl_Position = _74;
|
||||
return out;
|
||||
}
|
||||
|
@ -72,5 +72,6 @@ layout(binding = 0, std140) buffer SSBO0
|
||||
void main()
|
||||
{
|
||||
ssbo_430.content = ssbo_140.content;
|
||||
ssbo_430.content.m1.a = ssbo_430.m6[1][1] * ssbo_430.content.m3.a; // test packed matrix access
|
||||
}
|
||||
|
||||
|
41
shaders-msl/vert/packed_matrix.vert
Normal file
41
shaders-msl/vert/packed_matrix.vert
Normal file
@ -0,0 +1,41 @@
|
||||
#version 450
|
||||
|
||||
layout(binding = 13, std140) uniform _1365_18812
|
||||
{
|
||||
layout(row_major) mat4x3 _m0;
|
||||
layout(row_major) mat4x3 _m1;
|
||||
} _18812;
|
||||
|
||||
layout(binding = 12, std140) uniform _1126_22044
|
||||
{
|
||||
layout(row_major) mat4 _m0;
|
||||
layout(row_major) mat4 _m1;
|
||||
float _m9;
|
||||
vec3 _m10;
|
||||
float _m11;
|
||||
vec3 _m12;
|
||||
float _m17;
|
||||
float _m18;
|
||||
float _m19;
|
||||
vec2 _m20;
|
||||
} _22044;
|
||||
|
||||
layout(location = 0) out vec3 _3976;
|
||||
layout(location = 0) in vec4 _5275;
|
||||
|
||||
vec3 _2;
|
||||
|
||||
void main()
|
||||
{
|
||||
vec3 _23783;
|
||||
do
|
||||
{
|
||||
_23783 = normalize(_18812._m1 * vec4(_5275.xyz, 0.0));
|
||||
break;
|
||||
} while (false);
|
||||
vec4 _14995 = vec4(_22044._m10 + (_5275.xyz * (_22044._m17 + _22044._m18)), 1.0) * _22044._m0;
|
||||
_3976 = _23783;
|
||||
vec4 _6282 = _14995;
|
||||
_6282.y = -_14995.y;
|
||||
gl_Position = _6282;
|
||||
}
|
@ -2247,7 +2247,10 @@ string CompilerGLSL::to_expression(uint32_t id)
|
||||
if (e.base_expression)
|
||||
return to_enclosed_expression(e.base_expression) + e.expression;
|
||||
else if (e.need_transpose)
|
||||
return convert_row_major_matrix(e.expression, get<SPIRType>(e.expression_type));
|
||||
{
|
||||
bool is_packed = has_decoration(id, DecorationCPacked);
|
||||
return convert_row_major_matrix(e.expression, get<SPIRType>(e.expression_type), is_packed);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (force_recompile)
|
||||
@ -4298,7 +4301,7 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
|
||||
|
||||
bool access_chain_is_arrayed = false;
|
||||
bool row_major_matrix_needs_conversion = is_non_native_row_major_matrix(base);
|
||||
bool vector_is_packed = false;
|
||||
bool is_packed = false;
|
||||
bool pending_array_enclose = false;
|
||||
bool dimension_flatten = false;
|
||||
|
||||
@ -4430,7 +4433,7 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
|
||||
}
|
||||
}
|
||||
|
||||
vector_is_packed = member_is_packed_type(*type, index);
|
||||
is_packed = member_is_packed_type(*type, index);
|
||||
row_major_matrix_needs_conversion = member_is_non_native_row_major_matrix(*type, index);
|
||||
type = &get<SPIRType>(type->member_types[index]);
|
||||
}
|
||||
@ -4439,8 +4442,9 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
|
||||
{
|
||||
if (row_major_matrix_needs_conversion)
|
||||
{
|
||||
expr = convert_row_major_matrix(expr, *type);
|
||||
expr = convert_row_major_matrix(expr, *type, is_packed);
|
||||
row_major_matrix_needs_conversion = false;
|
||||
is_packed = false;
|
||||
}
|
||||
|
||||
expr += "[";
|
||||
@ -4456,10 +4460,10 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
|
||||
// Vector -> Scalar
|
||||
else if (type->vecsize > 1)
|
||||
{
|
||||
if (vector_is_packed)
|
||||
if (is_packed)
|
||||
{
|
||||
expr = unpack_expression_type(expr, *type);
|
||||
vector_is_packed = false;
|
||||
is_packed = false;
|
||||
}
|
||||
|
||||
if (index_is_literal)
|
||||
@ -4498,7 +4502,7 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
|
||||
*need_transpose = row_major_matrix_needs_conversion;
|
||||
|
||||
if (result_is_packed)
|
||||
*result_is_packed = vector_is_packed;
|
||||
*result_is_packed = is_packed;
|
||||
|
||||
return expr;
|
||||
}
|
||||
@ -4629,7 +4633,7 @@ std::string CompilerGLSL::flattened_access_chain_struct(uint32_t base, const uin
|
||||
|
||||
// Cannot forward transpositions, so resolve them here.
|
||||
if (need_transpose)
|
||||
expr += convert_row_major_matrix(tmp, member_type);
|
||||
expr += convert_row_major_matrix(tmp, member_type, false);
|
||||
else
|
||||
expr += tmp;
|
||||
}
|
||||
@ -7080,7 +7084,7 @@ bool CompilerGLSL::member_is_packed_type(const SPIRType &type, uint32_t index) c
|
||||
// row_major matrix result of the expression to a column_major matrix.
|
||||
// Base implementation uses the standard library transpose() function.
|
||||
// Subclasses may override to use a different function.
|
||||
string CompilerGLSL::convert_row_major_matrix(string exp_str, const SPIRType & /*exp_type*/)
|
||||
string CompilerGLSL::convert_row_major_matrix(string exp_str, const SPIRType & /*exp_type*/, bool /*is_packed*/)
|
||||
{
|
||||
strip_enclosed_expression(exp_str);
|
||||
return join("transpose(", exp_str, ")");
|
||||
|
@ -299,7 +299,7 @@ protected:
|
||||
virtual bool is_non_native_row_major_matrix(uint32_t id);
|
||||
virtual bool member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index);
|
||||
bool member_is_packed_type(const SPIRType &type, uint32_t index) const;
|
||||
virtual std::string convert_row_major_matrix(std::string exp_str, const SPIRType &exp_type);
|
||||
virtual std::string convert_row_major_matrix(std::string exp_str, const SPIRType &exp_type, bool is_packed);
|
||||
|
||||
std::unordered_set<std::string> local_variable_names;
|
||||
std::unordered_set<std::string> resource_names;
|
||||
|
187
spirv_msl.cpp
187
spirv_msl.cpp
@ -811,29 +811,9 @@ void CompilerMSL::align_struct(SPIRType &ib_type)
|
||||
curr_offset = 0;
|
||||
for (uint32_t mbr_idx = 0; mbr_idx < mbr_cnt; mbr_idx++)
|
||||
{
|
||||
// Align current offset to the current member's default alignment.
|
||||
size_t align_mask = get_declared_struct_member_alignment(ib_type, mbr_idx) - 1;
|
||||
curr_offset = uint32_t((curr_offset + align_mask) & ~align_mask);
|
||||
if (is_member_packable(ib_type, mbr_idx))
|
||||
set_member_decoration(ib_type_id, mbr_idx, DecorationCPacked);
|
||||
|
||||
// Fetch the member offset as declared in the SPIRV.
|
||||
uint32_t mbr_offset = get_member_decoration(ib_type_id, mbr_idx, DecorationOffset);
|
||||
if (curr_offset > mbr_offset)
|
||||
{
|
||||
uint32_t prev_mbr_idx = mbr_idx - 1;
|
||||
if (is_member_packable(ib_type, prev_mbr_idx))
|
||||
set_member_decoration(ib_type_id, prev_mbr_idx, DecorationCPacked);
|
||||
}
|
||||
|
||||
// Increment the current offset to be positioned immediately after the current member.
|
||||
curr_offset = mbr_offset + uint32_t(get_declared_struct_member_size(ib_type, mbr_idx));
|
||||
}
|
||||
|
||||
// Test the alignment of each member, and if a member is positioned farther than its
|
||||
// alignment and the end of the previous member, add a dummy padding member that will
|
||||
// be added before the current member when the delaration of this struct is emitted.
|
||||
curr_offset = 0;
|
||||
for (uint32_t mbr_idx = 0; mbr_idx < mbr_cnt; mbr_idx++)
|
||||
{
|
||||
// Align current offset to the current member's default alignment.
|
||||
size_t align_mask = get_declared_struct_member_alignment(ib_type, mbr_idx) - 1;
|
||||
curr_offset = uint32_t((curr_offset + align_mask) & ~align_mask);
|
||||
@ -858,14 +838,48 @@ void CompilerMSL::align_struct(SPIRType &ib_type)
|
||||
// variation that is smaller than the unpacked variation of that type.
|
||||
bool CompilerMSL::is_member_packable(SPIRType &ib_type, uint32_t index)
|
||||
{
|
||||
uint32_t mbr_type_id = ib_type.member_types[index];
|
||||
auto &mbr_type = get<SPIRType>(mbr_type_id);
|
||||
|
||||
// 3-element vectors (char3, uchar3, short3, ushort3, int3, uint3, half3, float3)
|
||||
if (mbr_type.vecsize == 3 && mbr_type.columns == 1)
|
||||
// We've already marked it as packable
|
||||
if (has_member_decoration(ib_type.self, index, DecorationCPacked))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
auto &mbr_type = get<SPIRType>(ib_type.member_types[index]);
|
||||
|
||||
// Only 3-element vectors or 3-row matrices need to be packed.
|
||||
if (mbr_type.vecsize != 3)
|
||||
return false;
|
||||
|
||||
// Only row-major matrices need to be packed.
|
||||
if (is_matrix(mbr_type) && !has_member_decoration(ib_type.self, index, DecorationRowMajor))
|
||||
return false;
|
||||
|
||||
uint32_t component_size = mbr_type.width / 8;
|
||||
uint32_t unpacked_mbr_size = component_size * (mbr_type.vecsize + 1) * mbr_type.columns;
|
||||
if (is_array(mbr_type))
|
||||
{
|
||||
// If member is an array, and the array stride is larger than the type needs, don't pack it.
|
||||
// Take into consideration multi-dimentional arrays.
|
||||
uint32_t md_elem_cnt = 1;
|
||||
size_t last_elem_idx = mbr_type.array.size() - 1;
|
||||
for (uint32_t i = 0; i < last_elem_idx; i++)
|
||||
md_elem_cnt *= max(to_array_size_literal(mbr_type, i), 1U);
|
||||
|
||||
uint32_t unpacked_array_stride = unpacked_mbr_size * md_elem_cnt;
|
||||
uint32_t array_stride = type_struct_member_array_stride(ib_type, index);
|
||||
return unpacked_array_stride > array_stride;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Pack if there is not enough space between this member and next.
|
||||
// If last member, only pack if it's a row-major matrix.
|
||||
if (index < ib_type.member_types.size() - 1)
|
||||
{
|
||||
uint32_t mbr_offset_curr = get_member_decoration(ib_type.self, index, DecorationOffset);
|
||||
uint32_t mbr_offset_next = get_member_decoration(ib_type.self, index + 1, DecorationOffset);
|
||||
return unpacked_mbr_size > mbr_offset_next - mbr_offset_curr;
|
||||
}
|
||||
else
|
||||
return is_matrix(mbr_type);
|
||||
}
|
||||
}
|
||||
|
||||
// Returns a combination of type ID and member index for use as hash key
|
||||
@ -902,11 +916,26 @@ void CompilerMSL::emit_header()
|
||||
statement("");
|
||||
statement("using namespace metal;");
|
||||
statement("");
|
||||
|
||||
for (auto &td : typedef_lines)
|
||||
statement(td);
|
||||
|
||||
if (!typedef_lines.empty())
|
||||
statement("");
|
||||
}
|
||||
|
||||
void CompilerMSL::add_pragma_line(const string &line)
|
||||
{
|
||||
pragma_lines.insert(line);
|
||||
auto rslt = pragma_lines.insert(line);
|
||||
if (rslt.second)
|
||||
force_recompile = true;
|
||||
}
|
||||
|
||||
void CompilerMSL::add_typedef_line(const string &line)
|
||||
{
|
||||
auto rslt = typedef_lines.insert(line);
|
||||
if (rslt.second)
|
||||
force_recompile = true;
|
||||
}
|
||||
|
||||
// Emits any needed custom function bodies.
|
||||
@ -1719,11 +1748,12 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
|
||||
case OpVectorTimesMatrix:
|
||||
case OpMatrixTimesVector:
|
||||
{
|
||||
// If the matrix needs transpose and it is square, just flip the multiply order.
|
||||
// If the matrix needs transpose and it is square or packed, just flip the multiply order.
|
||||
uint32_t mtx_id = ops[opcode == OpMatrixTimesVector ? 2 : 3];
|
||||
auto *e = maybe_get<SPIRExpression>(mtx_id);
|
||||
auto &t = expression_type(mtx_id);
|
||||
if (e && e->need_transpose && t.columns == t.vecsize)
|
||||
bool is_packed = has_decoration(mtx_id, DecorationCPacked);
|
||||
if (e && e->need_transpose && (t.columns == t.vecsize || is_packed))
|
||||
{
|
||||
e->need_transpose = false;
|
||||
emit_binary_op(ops[0], ops[1], ops[3], ops[2], "*");
|
||||
@ -2462,8 +2492,13 @@ bool CompilerMSL::is_non_native_row_major_matrix(uint32_t id)
|
||||
return false;
|
||||
|
||||
// Generate a function that will swap matrix elements from row-major to column-major.
|
||||
const auto type = expression_type(id);
|
||||
add_convert_row_major_matrix_function(type.columns, type.vecsize);
|
||||
// Packed row-matrix should just use transpose() function.
|
||||
if (!has_decoration(id, DecorationCPacked))
|
||||
{
|
||||
const auto type = expression_type(id);
|
||||
add_convert_row_major_matrix_function(type.columns, type.vecsize);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -2475,12 +2510,17 @@ bool CompilerMSL::member_is_non_native_row_major_matrix(const SPIRType &type, ui
|
||||
return false;
|
||||
|
||||
// Non-matrix or column-major matrix types do not need to be converted.
|
||||
if (!(combined_decoration_for_member(type, index) & (1ull << DecorationRowMajor)))
|
||||
if (!has_member_decoration(type.self, index, DecorationRowMajor))
|
||||
return false;
|
||||
|
||||
// Generate a function that will swap matrix elements from row-major to column-major.
|
||||
const auto mbr_type = get<SPIRType>(type.member_types[index]);
|
||||
add_convert_row_major_matrix_function(mbr_type.columns, mbr_type.vecsize);
|
||||
// Packed row-matrix should just use transpose() function.
|
||||
if (!has_member_decoration(type.self, index, DecorationCPacked))
|
||||
{
|
||||
const auto mbr_type = get<SPIRType>(type.member_types[index]);
|
||||
add_convert_row_major_matrix_function(mbr_type.columns, mbr_type.vecsize);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -2507,20 +2547,19 @@ void CompilerMSL::add_convert_row_major_matrix_function(uint32_t cols, uint32_t
|
||||
|
||||
auto rslt = spv_function_implementations.insert(spv_func);
|
||||
if (rslt.second)
|
||||
{
|
||||
add_pragma_line("#pragma clang diagnostic ignored \"-Wmissing-prototypes\"");
|
||||
force_recompile = true;
|
||||
}
|
||||
}
|
||||
|
||||
// Wraps the expression string in a function call that converts the
|
||||
// row_major matrix result of the expression to a column_major matrix.
|
||||
string CompilerMSL::convert_row_major_matrix(string exp_str, const SPIRType &exp_type)
|
||||
string CompilerMSL::convert_row_major_matrix(string exp_str, const SPIRType &exp_type, bool is_packed)
|
||||
{
|
||||
strip_enclosed_expression(exp_str);
|
||||
|
||||
string func_name;
|
||||
if (exp_type.columns == exp_type.vecsize)
|
||||
|
||||
// Square and packed matrices can just use transpose
|
||||
if (exp_type.columns == exp_type.vecsize || is_packed)
|
||||
func_name = "transpose";
|
||||
else
|
||||
func_name = string("spvConvertFromRowMajor") + to_string(exp_type.columns) + "x" + to_string(exp_type.vecsize);
|
||||
@ -2557,7 +2596,23 @@ void CompilerMSL::emit_struct_member(const SPIRType &type, uint32_t member_type_
|
||||
statement("char pad", to_string(index), "[", to_string(pad_len), "];");
|
||||
|
||||
// If this member is packed, mark it as so.
|
||||
string pack_pfx = member_is_packed_type(type, index) ? "packed_" : "";
|
||||
string pack_pfx = "";
|
||||
if (member_is_packed_type(type, index))
|
||||
{
|
||||
pack_pfx = "packed_";
|
||||
|
||||
// If we're packing a matrix, output an appropriate typedef
|
||||
if (membertype.vecsize > 1 && membertype.columns > 1)
|
||||
{
|
||||
string base_type = membertype.width == 16 ? "half" : "float";
|
||||
string td_line = "typedef ";
|
||||
td_line += base_type + to_string(membertype.vecsize) + "x" + to_string(membertype.columns);
|
||||
td_line += " " + pack_pfx;
|
||||
td_line += base_type + to_string(membertype.columns) + "x" + to_string(membertype.vecsize);
|
||||
td_line += ";";
|
||||
add_typedef_line(td_line);
|
||||
}
|
||||
}
|
||||
|
||||
statement(pack_pfx, type_to_glsl(membertype), " ", qualifier, to_member_name(type, index),
|
||||
member_attribute_qualifier(type, index), type_to_array_glsl(membertype), ";");
|
||||
@ -3523,7 +3578,6 @@ string CompilerMSL::built_in_func_arg(BuiltIn builtin, bool prefix_comma)
|
||||
// Returns the byte size of a struct member.
|
||||
size_t CompilerMSL::get_declared_struct_member_size(const SPIRType &struct_type, uint32_t index) const
|
||||
{
|
||||
auto dec_mask = get_member_decoration_mask(struct_type.self, index);
|
||||
auto &type = get<SPIRType>(struct_type.member_types[index]);
|
||||
|
||||
switch (type.basetype)
|
||||
@ -3538,10 +3592,6 @@ size_t CompilerMSL::get_declared_struct_member_size(const SPIRType &struct_type,
|
||||
|
||||
default:
|
||||
{
|
||||
size_t component_size = type.width / 8;
|
||||
unsigned vecsize = type.vecsize;
|
||||
unsigned columns = type.columns;
|
||||
|
||||
// For arrays, we can use ArrayStride to get an easy check.
|
||||
// Runtime arrays will have zero size so force to min of one.
|
||||
if (!type.array.empty())
|
||||
@ -3550,29 +3600,15 @@ size_t CompilerMSL::get_declared_struct_member_size(const SPIRType &struct_type,
|
||||
if (type.basetype == SPIRType::Struct)
|
||||
return get_declared_struct_size(type);
|
||||
|
||||
if (columns == 1) // An unpacked 3-element vector is the same size as a 4-element vector.
|
||||
{
|
||||
if (!(dec_mask & (1ull << DecorationCPacked)))
|
||||
{
|
||||
if (vecsize == 3)
|
||||
vecsize = 4;
|
||||
}
|
||||
}
|
||||
else // For matrices, a 3-element column is the same size as a 4-element column.
|
||||
{
|
||||
if (dec_mask & (1ull << DecorationColMajor))
|
||||
{
|
||||
if (vecsize == 3)
|
||||
vecsize = 4;
|
||||
}
|
||||
else if (dec_mask & (1ull << DecorationRowMajor))
|
||||
{
|
||||
if (columns == 3)
|
||||
columns = 4;
|
||||
}
|
||||
}
|
||||
uint32_t component_size = type.width / 8;
|
||||
uint32_t vecsize = type.vecsize;
|
||||
uint32_t columns = type.columns;
|
||||
|
||||
return vecsize * columns * component_size;
|
||||
// An unpacked 3-element vector or matrix column is the same memory size as a 4-element.
|
||||
if (vecsize == 3 && !has_member_decoration(struct_type.self, index, DecorationCPacked))
|
||||
vecsize = 4;
|
||||
|
||||
return component_size * vecsize * columns;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -3597,16 +3633,13 @@ size_t CompilerMSL::get_declared_struct_member_alignment(const SPIRType &struct_
|
||||
|
||||
default:
|
||||
{
|
||||
// Alignment of packed type is the same as the underlying component size.
|
||||
// Alignment of unpacked type is the same as the type size (or one matrix column).
|
||||
// Alignment of packed type is the same as the underlying component or column size.
|
||||
// Alignment of unpacked type is the same as the vector size.
|
||||
// Alignment of 3-elements vector is the same as 4-elements (including packed using column).
|
||||
if (member_is_packed_type(struct_type, index))
|
||||
return type.width / 8;
|
||||
return (type.width / 8) * (type.columns == 3 ? 4 : type.columns);
|
||||
else
|
||||
{
|
||||
// Divide by array size and colum count. Runtime arrays will have zero size so force to min of one.
|
||||
uint32_t array_size = type.array.empty() ? 1 : max(type.array.back(), 1U);
|
||||
return get_declared_struct_member_size(struct_type, index) / (type.columns * array_size);
|
||||
}
|
||||
return (type.width / 8) * (type.vecsize == 3 ? 4 : type.vecsize);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -211,7 +211,7 @@ protected:
|
||||
void declare_constant_arrays();
|
||||
bool is_non_native_row_major_matrix(uint32_t id) override;
|
||||
bool member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index) override;
|
||||
std::string convert_row_major_matrix(std::string exp_str, const SPIRType &exp_type) override;
|
||||
std::string convert_row_major_matrix(std::string exp_str, const SPIRType &exp_type, bool is_packed) override;
|
||||
|
||||
void preprocess_op_codes();
|
||||
void localize_global_variables();
|
||||
@ -265,6 +265,7 @@ protected:
|
||||
bool op1_is_pointer = false, uint32_t op2 = 0);
|
||||
const char *get_memory_order(uint32_t spv_mem_sem);
|
||||
void add_pragma_line(const std::string &line);
|
||||
void add_typedef_line(const std::string &line);
|
||||
void emit_barrier(uint32_t id_exe_scope, uint32_t id_mem_scope, uint32_t id_mem_sem);
|
||||
void emit_array_copy(const std::string &lhs, uint32_t rhs_id) override;
|
||||
void build_implicit_builtins();
|
||||
@ -276,6 +277,7 @@ protected:
|
||||
std::map<uint32_t, uint32_t> non_stage_in_input_var_ids;
|
||||
std::unordered_map<MSLStructMemberKey, uint32_t> struct_member_padding;
|
||||
std::set<std::string> pragma_lines;
|
||||
std::set<std::string> typedef_lines;
|
||||
std::vector<MSLResourceBinding *> resource_bindings;
|
||||
MSLResourceBinding next_metal_resource_index;
|
||||
uint32_t stage_in_var_id = 0;
|
||||
|
Loading…
Reference in New Issue
Block a user