Deal correctly with non-forwarded packed loads.

Need to unpack the expression if we're not forwarding.
This commit is contained in:
Hans-Kristian Arntzen 2019-07-23 16:25:19 +02:00
parent 79f533b662
commit ebe109d91d
6 changed files with 208 additions and 16 deletions

View File

@ -23,7 +23,7 @@ vertex main0_out main0(main0_in in [[stage_in]], constant Block& _104 [[buffer(0
{
main0_out out = {};
out.gl_Position = in.a_position;
out.v_vtxResult = ((float(abs(float3(transpose(float3x2(_104.var[0][0][0].xy, _104.var[0][0][1].xy, _104.var[0][0][2].xy))[0][0], transpose(float3x2(_104.var[0][0][0].xy, _104.var[0][0][1].xy, _104.var[0][0][2].xy))[1][0], transpose(float3x2(_104.var[0][0][0].xy, _104.var[0][0][1].xy, _104.var[0][0][2].xy))[2][0])[0] - 2.0) < 0.0500000007450580596923828125) * float(abs(float3(transpose(float3x2(_104.var[0][0][0].xy, _104.var[0][0][1].xy, _104.var[0][0][2].xy))[0][0], transpose(float3x2(_104.var[0][0][0].xy, _104.var[0][0][1].xy, _104.var[0][0][2].xy))[1][0], transpose(float3x2(_104.var[0][0][0].xy, _104.var[0][0][1].xy, _104.var[0][0][2].xy))[2][0])[1] - 6.0) < 0.0500000007450580596923828125)) * float(abs(float3(transpose(float3x2(_104.var[0][0][0].xy, _104.var[0][0][1].xy, _104.var[0][0][2].xy))[0][0], transpose(float3x2(_104.var[0][0][0].xy, _104.var[0][0][1].xy, _104.var[0][0][2].xy))[1][0], transpose(float3x2(_104.var[0][0][0].xy, _104.var[0][0][1].xy, _104.var[0][0][2].xy))[2][0])[2] - (-6.0)) < 0.0500000007450580596923828125)) * ((float(abs(float3(transpose(float3x2(_104.var[0][0][0].xy, _104.var[0][0][1].xy, _104.var[0][0][2].xy))[0][1], transpose(float3x2(_104.var[0][0][0].xy, _104.var[0][0][1].xy, _104.var[0][0][2].xy))[1][1], transpose(float3x2(_104.var[0][0][0].xy, _104.var[0][0][1].xy, _104.var[0][0][2].xy))[2][1])[0]) < 0.0500000007450580596923828125) * float(abs(float3(transpose(float3x2(_104.var[0][0][0].xy, _104.var[0][0][1].xy, _104.var[0][0][2].xy))[0][1], transpose(float3x2(_104.var[0][0][0].xy, _104.var[0][0][1].xy, _104.var[0][0][2].xy))[1][1], transpose(float3x2(_104.var[0][0][0].xy, _104.var[0][0][1].xy, _104.var[0][0][2].xy))[2][1])[1] - 5.0) < 0.0500000007450580596923828125)) * float(abs(float3(transpose(float3x2(_104.var[0][0][0].xy, _104.var[0][0][1].xy, _104.var[0][0][2].xy))[0][1], transpose(float3x2(_104.var[0][0][0].xy, _104.var[0][0][1].xy, _104.var[0][0][2].xy))[1][1], transpose(float3x2(_104.var[0][0][0].xy, _104.var[0][0][1].xy, _104.var[0][0][2].xy))[2][1])[2] - 5.0) < 0.0500000007450580596923828125));
out.v_vtxResult = ((float(abs(float3(_104.var[0][0][0][0], _104.var[0][0][1][0], _104.var[0][0][2][0])[0] - 2.0) < 0.0500000007450580596923828125) * float(abs(float3(_104.var[0][0][0][0], _104.var[0][0][1][0], _104.var[0][0][2][0])[1] - 6.0) < 0.0500000007450580596923828125)) * float(abs(float3(_104.var[0][0][0][0], _104.var[0][0][1][0], _104.var[0][0][2][0])[2] - (-6.0)) < 0.0500000007450580596923828125)) * ((float(abs(float3(_104.var[0][0][0][1], _104.var[0][0][1][1], _104.var[0][0][2][1])[0]) < 0.0500000007450580596923828125) * float(abs(float3(_104.var[0][0][0][1], _104.var[0][0][1][1], _104.var[0][0][2][1])[1] - 5.0) < 0.0500000007450580596923828125)) * float(abs(float3(_104.var[0][0][0][1], _104.var[0][0][1][1], _104.var[0][0][2][1])[2] - 5.0) < 0.0500000007450580596923828125));
return out;
}

View File

@ -0,0 +1,19 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct SSBOScalar
{
float3 a;
float3x3 b;
float3x3 c;
};
kernel void main0(device SSBOScalar& _4 [[buffer(0)]])
{
float3x3 _20 = transpose(_4.b);
_4.b = _4.c;
_4.a = _20 * _4.a;
}

View File

@ -0,0 +1,23 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
typedef packed_float3 packed_rm_float3x3[3];
struct SSBOScalar
{
packed_float3 a;
packed_rm_float3x3 b;
packed_rm_float3x3 c;
};
kernel void main0(device SSBOScalar& _4 [[buffer(0)]])
{
float3x3 _20 = transpose(float3x3(float3(_4.b[0]), float3(_4.b[1]), float3(_4.b[2])));
_4.b[0] = float3x3(float3(_4.c[0]), float3(_4.c[1]), float3(_4.c[2]))[0];
_4.b[1] = float3x3(float3(_4.c[0]), float3(_4.c[1]), float3(_4.c[2]))[1];
_4.b[2] = float3x3(float3(_4.c[0]), float3(_4.c[1]), float3(_4.c[2]))[2];
_4.a = _20 * float3(_4.a);
}

View File

@ -0,0 +1,56 @@
; SPIR-V
; Version: 1.0
; Generator: Khronos Glslang Reference Front End; 7
; Bound: 23
; Schema: 0
OpCapability Shader
%1 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %main "main"
OpExecutionMode %main LocalSize 1 1 1
OpSource GLSL 450
OpSourceExtension "GL_EXT_scalar_block_layout"
OpName %main "main"
OpName %SSBOScalar "SSBOScalar"
OpMemberName %SSBOScalar 0 "a"
OpMemberName %SSBOScalar 1 "b"
OpMemberName %SSBOScalar 2 "c"
OpName %_ ""
OpMemberDecorate %SSBOScalar 0 Offset 0
OpMemberDecorate %SSBOScalar 1 RowMajor
OpMemberDecorate %SSBOScalar 1 Offset 16
OpMemberDecorate %SSBOScalar 1 MatrixStride 16
OpMemberDecorate %SSBOScalar 2 RowMajor
OpMemberDecorate %SSBOScalar 2 Offset 64
OpMemberDecorate %SSBOScalar 2 MatrixStride 16
OpDecorate %SSBOScalar BufferBlock
OpDecorate %_ DescriptorSet 0
OpDecorate %_ Binding 0
%void = OpTypeVoid
%3 = OpTypeFunction %void
%float = OpTypeFloat 32
%v3float = OpTypeVector %float 3
%mat3v3float = OpTypeMatrix %v3float 3
%SSBOScalar = OpTypeStruct %v3float %mat3v3float %mat3v3float
%_ptr_Uniform_SSBOScalar = OpTypePointer Uniform %SSBOScalar
%_ = OpVariable %_ptr_Uniform_SSBOScalar Uniform
%int = OpTypeInt 32 1
%int_0 = OpConstant %int 0
%int_1 = OpConstant %int 1
%int_2 = OpConstant %int 2
%_ptr_Uniform_mat3v3float = OpTypePointer Uniform %mat3v3float
%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
%main = OpFunction %void None %3
%5 = OpLabel
%b_ptr = OpAccessChain %_ptr_Uniform_mat3v3float %_ %int_1
%c_ptr = OpAccessChain %_ptr_Uniform_mat3v3float %_ %int_2
%b = OpLoad %mat3v3float %b_ptr
%c = OpLoad %mat3v3float %c_ptr
OpStore %b_ptr %c
%19 = OpAccessChain %_ptr_Uniform_v3float %_ %int_0
%20 = OpLoad %v3float %19
%21 = OpMatrixTimesVector %v3float %b %20
%22 = OpAccessChain %_ptr_Uniform_v3float %_ %int_0
OpStore %22 %21
OpReturn
OpFunctionEnd

View File

@ -0,0 +1,56 @@
; SPIR-V
; Version: 1.0
; Generator: Khronos Glslang Reference Front End; 7
; Bound: 23
; Schema: 0
OpCapability Shader
%1 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %main "main"
OpExecutionMode %main LocalSize 1 1 1
OpSource GLSL 450
OpSourceExtension "GL_EXT_scalar_block_layout"
OpName %main "main"
OpName %SSBOScalar "SSBOScalar"
OpMemberName %SSBOScalar 0 "a"
OpMemberName %SSBOScalar 1 "b"
OpMemberName %SSBOScalar 2 "c"
OpName %_ ""
OpMemberDecorate %SSBOScalar 0 Offset 0
OpMemberDecorate %SSBOScalar 1 RowMajor
OpMemberDecorate %SSBOScalar 1 Offset 12
OpMemberDecorate %SSBOScalar 1 MatrixStride 12
OpMemberDecorate %SSBOScalar 2 RowMajor
OpMemberDecorate %SSBOScalar 2 Offset 48
OpMemberDecorate %SSBOScalar 2 MatrixStride 12
OpDecorate %SSBOScalar BufferBlock
OpDecorate %_ DescriptorSet 0
OpDecorate %_ Binding 0
%void = OpTypeVoid
%3 = OpTypeFunction %void
%float = OpTypeFloat 32
%v3float = OpTypeVector %float 3
%mat3v3float = OpTypeMatrix %v3float 3
%SSBOScalar = OpTypeStruct %v3float %mat3v3float %mat3v3float
%_ptr_Uniform_SSBOScalar = OpTypePointer Uniform %SSBOScalar
%_ = OpVariable %_ptr_Uniform_SSBOScalar Uniform
%int = OpTypeInt 32 1
%int_0 = OpConstant %int 0
%int_1 = OpConstant %int 1
%int_2 = OpConstant %int 2
%_ptr_Uniform_mat3v3float = OpTypePointer Uniform %mat3v3float
%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
%main = OpFunction %void None %3
%5 = OpLabel
%b_ptr = OpAccessChain %_ptr_Uniform_mat3v3float %_ %int_1
%c_ptr = OpAccessChain %_ptr_Uniform_mat3v3float %_ %int_2
%b = OpLoad %mat3v3float %b_ptr
%c = OpLoad %mat3v3float %c_ptr
OpStore %b_ptr %c
%19 = OpAccessChain %_ptr_Uniform_v3float %_ %int_0
%20 = OpLoad %v3float %19
%21 = OpMatrixTimesVector %v3float %b %20
%22 = OpAccessChain %_ptr_Uniform_v3float %_ %int_0
OpStore %22 %21
OpReturn
OpFunctionEnd

View File

@ -3003,7 +3003,7 @@ string CompilerGLSL::to_expression(uint32_t id, bool register_expression_read)
auto &e = get<SPIRExpression>(id);
if (e.base_expression)
return to_enclosed_expression(e.base_expression) + e.expression;
else if (e.need_transpose && !e.access_chain)
else if (e.need_transpose)
{
// This should not be reached for access chains, since we always deal explicitly with transpose state
// when consuming an access chain expression.
@ -6366,7 +6366,16 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
bool register_expression_read = (flags & ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT) == 0;
if (!chain_only)
{
// We handle transpose explicitly, so don't resolve that here.
auto *e = maybe_get<SPIRExpression>(base);
bool old_transpose = e && e->need_transpose;
if (e)
e->need_transpose = false;
expr = to_enclosed_expression(base, register_expression_read);
if (e)
e->need_transpose = old_transpose;
}
// Start traversing type hierarchy at the proper non-pointer types,
// but keep type_id referencing the original pointer for use below.
@ -7674,19 +7683,39 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
bool old_need_transpose = false;
auto *ptr_expression = maybe_get<SPIRExpression>(ptr);
if (ptr_expression && ptr_expression->need_transpose)
if (forward)
{
old_need_transpose = true;
ptr_expression->need_transpose = false;
need_transpose = true;
// If we're forwarding the load, we're also going to forward transpose state, so don't transpose while
// taking the expression.
if (ptr_expression && ptr_expression->need_transpose)
{
old_need_transpose = true;
ptr_expression->need_transpose = false;
need_transpose = true;
}
else if (is_non_native_row_major_matrix(ptr))
need_transpose = true;
}
else if (is_non_native_row_major_matrix(ptr))
need_transpose = true;
// If we are forwarding this load,
// don't register the read to access chain here, defer that to when we actually use the expression,
// using the add_implied_read_expression mechanism.
auto expr = to_dereferenced_expression(ptr, !forward);
string expr;
bool is_packed = has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypePacked);
bool is_remapped = has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypeID);
if (forward || (!is_packed && !is_remapped))
{
// For the simple case, we do not need to deal with repacking.
expr = to_dereferenced_expression(ptr, false);
}
else
{
// If we are not forwarding the expression, we need to unpack and resolve any physical type remapping here before
// storing the expression to a temporary.
expr = to_unpacked_expression(ptr);
}
// We might need to bitcast in order to load from a builtin.
bitcast_from_builtin_load(ptr, expr, get<SPIRType>(result_type));
@ -7705,7 +7734,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
convert_non_uniform_expression(type, expr);
}
if (ptr_expression)
if (forward && ptr_expression)
ptr_expression->need_transpose = old_need_transpose;
// By default, suppress usage tracking since using same expression multiple times does not imply any extra work.
@ -7729,13 +7758,22 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
e->need_transpose = need_transpose;
register_read(id, ptr, forward);
// Pass through whether the result is of a packed type and the physical type ID.
if (has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypePacked))
set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
if (has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypeID))
if (forward)
{
set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID,
get_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypeID));
// Pass through whether the result is of a packed type and the physical type ID.
if (has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypePacked))
set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
if (has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypeID))
{
set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID,
get_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypeID));
}
}
else
{
// This might have been set on an earlier compilation iteration, force it to be unset.
unset_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
unset_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID);
}
inherit_expression_dependencies(id, ptr);