MSL: Use more appropriate padded types.

This commit is contained in:
Hans-Kristian Arntzen 2023-11-27 16:30:24 +01:00
parent 57dbfa0400
commit 81c047c3ed
4 changed files with 67 additions and 56 deletions

View File

@ -5,23 +5,21 @@
using namespace metal;
struct half8 { alignas(16) half4 data; half4 padding_for_std140_fix_your_shader; };
using half2x8 = half8[2];
using half3x8 = half8[3];
using half4x8 = half8[4];
struct ushort8 { alignas(16) ushort4 data; ushort4 padding_for_std140_fix_your_shader; };
struct short8 { alignas(16) short4 data; short4 padding_for_std140_fix_your_shader; };
template <typename T>
struct spvPaddedStd140 { alignas(16) T data; };
template <typename T, int n>
using spvPaddedStd140Matrix = spvPaddedStd140<T>[n];
struct Foo
{
half2x8 c23;
half3x8 c32;
half3x8 r23;
half2x8 r32;
half8 h1[6];
half8 h2[6];
half8 h3[6];
half8 h4[6];
spvPaddedStd140Matrix<half3, 2> c23;
spvPaddedStd140Matrix<half2, 3> c32;
spvPaddedStd140Matrix<half2, 3> r23;
spvPaddedStd140Matrix<half3, 2> r32;
spvPaddedStd140<half> h1[6];
spvPaddedStd140<half2> h2[6];
spvPaddedStd140<half3> h3[6];
spvPaddedStd140<half4> h4[6];
};
struct main0_out

View File

@ -5,39 +5,37 @@
using namespace metal;
struct half8 { alignas(16) half4 data; half4 padding_for_std140_fix_your_shader; };
using half2x8 = half8[2];
using half3x8 = half8[3];
using half4x8 = half8[4];
struct ushort8 { alignas(16) ushort4 data; ushort4 padding_for_std140_fix_your_shader; };
struct short8 { alignas(16) short4 data; short4 padding_for_std140_fix_your_shader; };
template <typename T>
struct spvPaddedStd140 { alignas(16) T data; };
template <typename T, int n>
using spvPaddedStd140Matrix = spvPaddedStd140<T>[n];
struct Foo
{
half2x8 c22;
half2x8 c22arr[3];
half2x8 c23;
half2x8 c24;
half3x8 c32;
half3x8 c33;
half3x8 c34;
half4x8 c42;
half4x8 c43;
half4x8 c44;
half2x8 r22;
half2x8 r22arr[3];
half3x8 r23;
half4x8 r24;
half2x8 r32;
half3x8 r33;
half4x8 r34;
half2x8 r42;
half3x8 r43;
half4x8 r44;
half8 h1[6];
half8 h2[6];
half8 h3[6];
half8 h4[6];
spvPaddedStd140Matrix<half2, 2> c22;
spvPaddedStd140Matrix<half2, 2> c22arr[3];
spvPaddedStd140Matrix<half3, 2> c23;
spvPaddedStd140Matrix<half4, 2> c24;
spvPaddedStd140Matrix<half2, 3> c32;
spvPaddedStd140Matrix<half3, 3> c33;
spvPaddedStd140Matrix<half4, 3> c34;
spvPaddedStd140Matrix<half2, 4> c42;
spvPaddedStd140Matrix<half3, 4> c43;
spvPaddedStd140Matrix<half4, 4> c44;
spvPaddedStd140Matrix<half2, 2> r22;
spvPaddedStd140Matrix<half2, 2> r22arr[3];
spvPaddedStd140Matrix<half2, 3> r23;
spvPaddedStd140Matrix<half2, 4> r24;
spvPaddedStd140Matrix<half3, 2> r32;
spvPaddedStd140Matrix<half3, 3> r33;
spvPaddedStd140Matrix<half3, 4> r34;
spvPaddedStd140Matrix<half4, 2> r42;
spvPaddedStd140Matrix<half4, 3> r43;
spvPaddedStd140Matrix<half4, 4> r44;
spvPaddedStd140<half> h1[6];
spvPaddedStd140<half2> h2[6];
spvPaddedStd140<half3> h3[6];
spvPaddedStd140<half4> h4[6];
};
struct main0_out

View File

@ -4781,7 +4781,7 @@ void CompilerMSL::ensure_member_packing_rules_msl(SPIRType &ib_type, uint32_t in
if (elems_per_stride == 8)
{
if (mbr_type.width == 16)
add_spv_func_and_recompile(SPVFuncImplHalfStd140);
add_spv_func_and_recompile(SPVFuncImplPaddedStd140);
else
SPIRV_CROSS_THROW("Unexpected type in std140 wide array resolve.");
}
@ -4824,7 +4824,7 @@ void CompilerMSL::ensure_member_packing_rules_msl(SPIRType &ib_type, uint32_t in
{
if (mbr_type.basetype != SPIRType::Half)
SPIRV_CROSS_THROW("Unexpected type in std140 wide matrix stride resolve.");
add_spv_func_and_recompile(SPVFuncImplHalfStd140);
add_spv_func_and_recompile(SPVFuncImplPaddedStd140);
}
bool row_major = has_member_decoration(ib_type.self, index, DecorationRowMajor);
@ -7361,15 +7361,12 @@ void CompilerMSL::emit_custom_functions()
}
break;
case SPVFuncImplHalfStd140:
case SPVFuncImplPaddedStd140:
// .data is used in access chain.
statement("struct half8 { alignas(16) half4 data; half4 padding_for_std140_fix_your_shader; };");
// Physical type remapping is used to load/store full matrices anyway.
statement("using half2x8 = half8[2];");
statement("using half3x8 = half8[3];");
statement("using half4x8 = half8[4];");
statement("struct ushort8 { alignas(16) ushort4 data; ushort4 padding_for_std140_fix_your_shader; };");
statement("struct short8 { alignas(16) short4 data; short4 padding_for_std140_fix_your_shader; };");
statement("template <typename T>");
statement("struct spvPaddedStd140 { alignas(16) T data; };");
statement("template <typename T, int n>");
statement("using spvPaddedStd140Matrix = spvPaddedStd140<T>[n];");
statement("");
break;
@ -11817,6 +11814,7 @@ void CompilerMSL::emit_fixup()
string CompilerMSL::to_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index,
const string &qualifier)
{
uint32_t orig_member_type_id = member_type_id;
if (member_is_remapped_physical_type(type, index))
member_type_id = get_extended_member_decoration(type.self, index, SPIRVCrossDecorationPhysicalTypeID);
auto &physical_type = get<SPIRType>(member_type_id);
@ -11928,7 +11926,24 @@ string CompilerMSL::to_struct_member(const SPIRType &type, uint32_t member_type_
array_type = type_to_array_glsl(physical_type);
}
auto result = join(pack_pfx, type_to_glsl(*declared_type, orig_id, true), " ", qualifier,
string decl_type;
if (declared_type->vecsize > 4)
{
auto orig_type = get<SPIRType>(orig_member_type_id);
if (is_matrix(orig_type) && row_major)
swap(orig_type.vecsize, orig_type.columns);
orig_type.columns = 1;
decl_type = type_to_glsl(orig_type, orig_id, true);
if (declared_type->columns > 1)
decl_type = join("spvPaddedStd140Matrix<", decl_type, ", ", declared_type->columns, ">");
else
decl_type = join("spvPaddedStd140<", decl_type, ">");
}
else
decl_type = type_to_glsl(*declared_type, orig_id, true);
auto result = join(pack_pfx, decl_type, " ", qualifier,
to_member_name(type, index), member_attribute_qualifier(type, index), array_type, ";");
is_using_builtin_array = false;

View File

@ -815,7 +815,7 @@ protected:
SPVFuncImplVariableDescriptor,
SPVFuncImplVariableSizedDescriptor,
SPVFuncImplVariableDescriptorArray,
SPVFuncImplHalfStd140
SPVFuncImplPaddedStd140
};
// If the underlying resource has been used for comparison then duplicate loads of that resource must be too