GLSL: Support a workaround for loading row-major matrices.

On AMD Windows OpenGL, it has been reported that we need to load
matrices via a wrapper function.
This commit is contained in:
Hans-Kristian Arntzen 2020-10-14 15:51:49 +02:00
parent 5ae9153a78
commit e47561a28b
11 changed files with 242 additions and 11 deletions

View File

@ -12,6 +12,8 @@ layout(location = 0) out int _entryPointOutput;
int _231;
mat4 SPIRV_Cross_workaround_load_row_major(mat4 wrap) { return wrap; }
void main()
{
int _228;
@ -35,7 +37,7 @@ void main()
_223 = mat4(vec4(1.0, 0.0, 0.0, 0.0), vec4(0.0, 1.0, 0.0, 0.0), vec4(0.0, 0.0, 1.0, 0.0), vec4(0.0, 0.0, 0.0, 1.0));
break;
} while(false);
vec4 _170 = (_223 * _11.lightVP[_222]) * vec4(fragWorld, 1.0);
vec4 _170 = (_223 * SPIRV_Cross_workaround_load_row_major(_11.lightVP[_222])) * vec4(fragWorld, 1.0);
float _172 = _170.z;
float _179 = _170.x;
float _181 = _170.y;

View File

@ -0,0 +1,46 @@
#version 450
struct RowMajor
{
mat4 B;
};
struct NestedRowMajor
{
RowMajor rm;
};
layout(binding = 2, std140) uniform UBO3
{
layout(row_major) NestedRowMajor rm2;
} _17;
layout(binding = 1, std140) uniform UBO2
{
layout(row_major) RowMajor rm;
} _35;
layout(binding = 0, std140) uniform UBO
{
layout(row_major) mat4 A;
mat4 C;
} _42;
layout(binding = 3, std140) uniform UBONoWorkaround
{
mat4 D;
} _56;
layout(location = 0) out vec4 FragColor;
layout(location = 0) in vec4 Clip;
NestedRowMajor SPIRV_Cross_workaround_load_row_major(NestedRowMajor wrap) { return wrap; }
mat4 SPIRV_Cross_workaround_load_row_major(mat4 wrap) { return wrap; }
void main()
{
FragColor = (((SPIRV_Cross_workaround_load_row_major(_17.rm2).rm.B * SPIRV_Cross_workaround_load_row_major(_35.rm.B)) * SPIRV_Cross_workaround_load_row_major(_42.A)) * SPIRV_Cross_workaround_load_row_major(_42.C)) * Clip;
FragColor += (_56.D * Clip);
FragColor += (_42.A[1] * Clip);
}

View File

@ -11,8 +11,10 @@ uniform Buffer _13;
attribute vec4 Position;
mat4 SPIRV_Cross_workaround_load_row_major(mat4 wrap) { return wrap; }
void main()
{
gl_Position = (((_13.M * (Position * _13.MVPRowMajor)) + (_13.M * (_13.MVPColMajor * Position))) + (_13.M * (_13.MVPRowMajor * Position))) + (_13.M * (Position * _13.MVPColMajor));
gl_Position = (((SPIRV_Cross_workaround_load_row_major(_13.M) * (Position * _13.MVPRowMajor)) + (SPIRV_Cross_workaround_load_row_major(_13.M) * (SPIRV_Cross_workaround_load_row_major(_13.MVPColMajor) * Position))) + (SPIRV_Cross_workaround_load_row_major(_13.M) * (_13.MVPRowMajor * Position))) + (SPIRV_Cross_workaround_load_row_major(_13.M) * (Position * SPIRV_Cross_workaround_load_row_major(_13.MVPColMajor)));
}

View File

@ -8,9 +8,11 @@ layout(binding = 0, std140) uniform Block
layout(location = 0) in vec4 a_position;
layout(location = 0) out mediump float v_vtxResult;
mat2x3 SPIRV_Cross_workaround_load_row_major(mat2x3 wrap) { return wrap; }
void main()
{
gl_Position = a_position;
v_vtxResult = ((float(abs(_104.var[0][0][0].x - 2.0) < 0.0500000007450580596923828125) * float(abs(_104.var[0][0][0].y - 6.0) < 0.0500000007450580596923828125)) * float(abs(_104.var[0][0][0].z - (-6.0)) < 0.0500000007450580596923828125)) * ((float(abs(_104.var[0][0][1].x) < 0.0500000007450580596923828125) * float(abs(_104.var[0][0][1].y - 5.0) < 0.0500000007450580596923828125)) * float(abs(_104.var[0][0][1].z - 5.0) < 0.0500000007450580596923828125));
v_vtxResult = ((float(abs(SPIRV_Cross_workaround_load_row_major(_104.var[0][0])[0].x - 2.0) < 0.0500000007450580596923828125) * float(abs(SPIRV_Cross_workaround_load_row_major(_104.var[0][0])[0].y - 6.0) < 0.0500000007450580596923828125)) * float(abs(SPIRV_Cross_workaround_load_row_major(_104.var[0][0])[0].z - (-6.0)) < 0.0500000007450580596923828125)) * ((float(abs(SPIRV_Cross_workaround_load_row_major(_104.var[0][0])[1].x) < 0.0500000007450580596923828125) * float(abs(SPIRV_Cross_workaround_load_row_major(_104.var[0][0])[1].y - 5.0) < 0.0500000007450580596923828125)) * float(abs(SPIRV_Cross_workaround_load_row_major(_104.var[0][0])[1].z - 5.0) < 0.0500000007450580596923828125));
}

View File

@ -10,6 +10,8 @@ layout(binding = 0, std140) uniform Foo
layout(location = 0) in vec3 fragWorld;
layout(location = 0) out int _entryPointOutput;
mat4 SPIRV_Cross_workaround_load_row_major(mat4 wrap) { return wrap; }
mat4 GetClip2TexMatrix()
{
if (_11.test == 0)
@ -23,7 +25,7 @@ int GetCascade(vec3 fragWorldPosition)
{
for (uint cascadeIndex = 0u; cascadeIndex < _11.shadowCascadesNum; cascadeIndex++)
{
mat4 worldToShadowMap = GetClip2TexMatrix() * _11.lightVP[cascadeIndex];
mat4 worldToShadowMap = GetClip2TexMatrix() * SPIRV_Cross_workaround_load_row_major(_11.lightVP[cascadeIndex]);
vec4 fragShadowMapPos = worldToShadowMap * vec4(fragWorldPosition, 1.0);
if ((((fragShadowMapPos.z >= 0.0) && (fragShadowMapPos.z <= 1.0)) && (max(fragShadowMapPos.x, fragShadowMapPos.y) <= 1.0)) && (min(fragShadowMapPos.x, fragShadowMapPos.y) >= 0.0))
{

View File

@ -0,0 +1,48 @@
#version 450
struct RowMajor
{
mat4 B;
};
struct NestedRowMajor
{
RowMajor rm;
};
layout(binding = 2, std140) uniform UBO3
{
layout(row_major) NestedRowMajor rm2;
} _17;
layout(binding = 1, std140) uniform UBO2
{
layout(row_major) RowMajor rm;
} _35;
layout(binding = 0, std140) uniform UBO
{
layout(row_major) mat4 A;
mat4 C;
} _42;
layout(binding = 3, std140) uniform UBONoWorkaround
{
mat4 D;
} _56;
layout(location = 0) out vec4 FragColor;
layout(location = 0) in vec4 Clip;
NestedRowMajor SPIRV_Cross_workaround_load_row_major(NestedRowMajor wrap) { return wrap; }
mat4 SPIRV_Cross_workaround_load_row_major(mat4 wrap) { return wrap; }
void main()
{
NestedRowMajor rm2_loaded;
rm2_loaded.rm.B = SPIRV_Cross_workaround_load_row_major(_17.rm2).rm.B;
FragColor = (((rm2_loaded.rm.B * SPIRV_Cross_workaround_load_row_major(_35.rm.B)) * SPIRV_Cross_workaround_load_row_major(_42.A)) * SPIRV_Cross_workaround_load_row_major(_42.C)) * Clip;
FragColor += (_56.D * Clip);
FragColor += (_42.A[1] * Clip);
}

View File

@ -11,12 +11,14 @@ uniform Buffer _13;
attribute vec4 Position;
mat4 SPIRV_Cross_workaround_load_row_major(mat4 wrap) { return wrap; }
void main()
{
vec4 c0 = _13.M * (Position * _13.MVPRowMajor);
vec4 c1 = _13.M * (_13.MVPColMajor * Position);
vec4 c2 = _13.M * (_13.MVPRowMajor * Position);
vec4 c3 = _13.M * (Position * _13.MVPColMajor);
vec4 c0 = SPIRV_Cross_workaround_load_row_major(_13.M) * (Position * _13.MVPRowMajor);
vec4 c1 = SPIRV_Cross_workaround_load_row_major(_13.M) * (SPIRV_Cross_workaround_load_row_major(_13.MVPColMajor) * Position);
vec4 c2 = SPIRV_Cross_workaround_load_row_major(_13.M) * (_13.MVPRowMajor * Position);
vec4 c3 = SPIRV_Cross_workaround_load_row_major(_13.M) * (Position * SPIRV_Cross_workaround_load_row_major(_13.MVPColMajor));
gl_Position = ((c0 + c1) + c2) + c3;
}

View File

@ -8,6 +8,8 @@ layout(binding = 0, std140) uniform Block
layout(location = 0) in vec4 a_position;
layout(location = 0) out mediump float v_vtxResult;
mat2x3 SPIRV_Cross_workaround_load_row_major(mat2x3 wrap) { return wrap; }
mediump float compare_float(float a, float b)
{
return float(abs(a - b) < 0.0500000007450580596923828125);
@ -37,7 +39,7 @@ void main()
{
gl_Position = a_position;
mediump float result = 1.0;
mat2x3 param = _104.var[0][0];
mat2x3 param = SPIRV_Cross_workaround_load_row_major(_104.var[0][0]);
mat2x3 param_1 = mat2x3(vec3(2.0, 6.0, -6.0), vec3(0.0, 5.0, 5.0));
result *= compare_mat2x3(param, param_1);
v_vtxResult = result;

View File

@ -0,0 +1,44 @@
#version 450
struct RowMajor
{
mat4 B;
};
struct NestedRowMajor
{
RowMajor rm;
};
layout(set = 0, binding = 0, row_major) uniform UBO
{
mat4 A;
layout(column_major) mat4 C; // This should also be worked around.
};
layout(set = 0, binding = 1, row_major) uniform UBO2
{
RowMajor rm;
};
layout(set = 0, binding = 2, row_major) uniform UBO3
{
NestedRowMajor rm2;
};
layout(set = 0, binding = 3) uniform UBONoWorkaround
{
mat4 D;
};
layout(location = 0) in vec4 Clip;
layout(location = 0) out vec4 FragColor;
void main()
{
NestedRowMajor rm2_loaded = rm2;
FragColor = rm2_loaded.rm.B * rm.B * A * C * Clip;
FragColor += D * Clip;
FragColor += A[1] * Clip;
}

View File

@ -511,6 +511,7 @@ string CompilerGLSL::compile()
{
// only NV_gpu_shader5 supports divergent indexing on OpenGL, and it does so without extra qualifiers
backend.nonuniform_qualifier = "";
backend.needs_row_major_load_workaround = true;
}
backend.force_gl_in_out_block = true;
backend.supports_extensions = true;
@ -3798,6 +3799,17 @@ void CompilerGLSL::emit_extension_workarounds(spv::ExecutionModel model)
statement("");
}
}
if (!workaround_ubo_load_overload_types.empty())
{
for (auto &type_id : workaround_ubo_load_overload_types)
{
auto &type = get<SPIRType>(type_id);
statement(type_to_glsl(type), " SPIRV_Cross_workaround_load_row_major(", type_to_glsl(type),
" wrap) { return wrap; }");
}
statement("");
}
}
// Returns a string representation of the ID, usable as a function arg.
@ -9496,11 +9508,15 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
if (forward && ptr_expression)
ptr_expression->need_transpose = old_need_transpose;
bool flattened = ptr_expression && flattened_buffer_blocks.count(ptr_expression->loaded_from) != 0;
if (backend.needs_row_major_load_workaround && !is_non_native_row_major_matrix(ptr) && !flattened)
rewrite_load_for_wrapped_row_major(expr, result_type, ptr);
// By default, suppress usage tracking since using same expression multiple times does not imply any extra work.
// However, if we try to load a complex, composite object from a flattened buffer,
// we should avoid emitting the same code over and over and lower the result to a temporary.
bool usage_tracking = ptr_expression && flattened_buffer_blocks.count(ptr_expression->loaded_from) != 0 &&
(type.basetype == SPIRType::Struct || (type.columns > 1));
bool usage_tracking = flattened && (type.basetype == SPIRType::Struct || (type.columns > 1));
SPIRExpression *e = nullptr;
if (!forward && expression_is_non_value_type_array(ptr))
@ -15087,3 +15103,63 @@ CompilerGLSL::ShaderSubgroupSupportHelper::Result::Result()
weights[KHR_shader_subgroup_basic] = big_num;
weights[KHR_shader_subgroup_vote] = big_num;
}
void CompilerGLSL::request_workaround_wrapper_overload(TypeID id)
{
// Must be ordered to maintain deterministic output, so vector is appropriate.
if (find(begin(workaround_ubo_load_overload_types), end(workaround_ubo_load_overload_types), id) ==
end(workaround_ubo_load_overload_types))
{
force_recompile();
workaround_ubo_load_overload_types.push_back(id);
}
}
void CompilerGLSL::rewrite_load_for_wrapped_row_major(std::string &expr, TypeID loaded_type, ID ptr)
{
// Loading row-major matrices from UBOs on older AMD Windows OpenGL drivers is problematic.
// To load these types correctly, we must first wrap them in a dummy function which only purpose is to
// ensure row_major decoration is actually respected.
auto *var = maybe_get_backing_variable(ptr);
if (!var)
return;
auto &backing_type = get<SPIRType>(var->basetype);
bool is_ubo = backing_type.basetype == SPIRType::Struct &&
backing_type.storage == StorageClassUniform &&
has_decoration(backing_type.self, DecorationBlock);
if (!is_ubo)
return;
auto *type = &get<SPIRType>(loaded_type);
bool rewrite = false;
if (is_matrix(*type))
{
// To avoid adding a lot of unnecessary meta tracking to forward the row_major state,
// we will simply look at the base struct itself. It is exceptionally rare to mix and match row-major/col-major state.
// If there is any row-major action going on, we apply the workaround.
// It is harmless to apply the workaround to column-major matrices, so this is still a valid solution.
// If an access chain occurred, the workaround is not required, so loading vectors or scalars don't need workaround.
type = &backing_type;
}
if (type->basetype == SPIRType::Struct)
{
// If we're loading a struct where any member is a row-major matrix, apply the workaround.
for (uint32_t i = 0; i < uint32_t(type->member_types.size()); i++)
{
if (combined_decoration_for_member(*type, i).get(DecorationRowMajor))
{
rewrite = true;
break;
}
}
}
if (rewrite)
{
request_workaround_wrapper_overload(loaded_type);
expr = join("SPIRV_Cross_workaround_load_row_major(", expr, ")");
}
}

View File

@ -560,6 +560,7 @@ protected:
bool support_small_type_sampling_result = false;
bool support_case_fallthrough = true;
bool use_array_constructor = false;
bool needs_row_major_load_workaround = false;
} backend;
void emit_struct(SPIRType &type);
@ -784,6 +785,10 @@ protected:
// Currently used by NMin/Max/Clamp implementations.
std::unordered_map<uint32_t, uint32_t> extra_sub_expressions;
SmallVector<TypeID> workaround_ubo_load_overload_types;
void request_workaround_wrapper_overload(TypeID id);
void rewrite_load_for_wrapped_row_major(std::string &expr, TypeID loaded_type, ID ptr);
uint32_t statement_count = 0;
inline bool is_legacy() const