From e47561a28b388361b71f71b3c33dae2da9505f28 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Wed, 14 Oct 2020 15:51:49 +0200 Subject: [PATCH] GLSL: Support a workaround for loading row-major matrices. On AMD Windows OpenGL, it has been reported that we need to load matrices via a wrapper function. --- ...op-body-dominator-continue-access.asm.frag | 4 +- .../frag/ubo-load-row-major-workaround.frag | 46 +++++++++++ .../shaders/legacy/vert/transpose.legacy.vert | 4 +- .../vert/read-from-row-major-array.vert | 4 +- ...op-body-dominator-continue-access.asm.frag | 4 +- .../frag/ubo-load-row-major-workaround.frag | 48 +++++++++++ .../shaders/legacy/vert/transpose.legacy.vert | 10 ++- .../vert/read-from-row-major-array.vert | 4 +- .../frag/ubo-load-row-major-workaround.frag | 44 ++++++++++ spirv_glsl.cpp | 80 ++++++++++++++++++- spirv_glsl.hpp | 5 ++ 11 files changed, 242 insertions(+), 11 deletions(-) create mode 100644 reference/opt/shaders/frag/ubo-load-row-major-workaround.frag create mode 100644 reference/shaders/frag/ubo-load-row-major-workaround.frag create mode 100644 shaders/frag/ubo-load-row-major-workaround.frag diff --git a/reference/opt/shaders/asm/frag/loop-body-dominator-continue-access.asm.frag b/reference/opt/shaders/asm/frag/loop-body-dominator-continue-access.asm.frag index df32f1a4..4df83d48 100644 --- a/reference/opt/shaders/asm/frag/loop-body-dominator-continue-access.asm.frag +++ b/reference/opt/shaders/asm/frag/loop-body-dominator-continue-access.asm.frag @@ -12,6 +12,8 @@ layout(location = 0) out int _entryPointOutput; int _231; +mat4 SPIRV_Cross_workaround_load_row_major(mat4 wrap) { return wrap; } + void main() { int _228; @@ -35,7 +37,7 @@ void main() _223 = mat4(vec4(1.0, 0.0, 0.0, 0.0), vec4(0.0, 1.0, 0.0, 0.0), vec4(0.0, 0.0, 1.0, 0.0), vec4(0.0, 0.0, 0.0, 1.0)); break; } while(false); - vec4 _170 = (_223 * _11.lightVP[_222]) * vec4(fragWorld, 1.0); + vec4 _170 = (_223 * SPIRV_Cross_workaround_load_row_major(_11.lightVP[_222])) * vec4(fragWorld, 1.0); float _172 = _170.z; float _179 = _170.x; float _181 = _170.y; diff --git a/reference/opt/shaders/frag/ubo-load-row-major-workaround.frag b/reference/opt/shaders/frag/ubo-load-row-major-workaround.frag new file mode 100644 index 00000000..6ed3f788 --- /dev/null +++ b/reference/opt/shaders/frag/ubo-load-row-major-workaround.frag @@ -0,0 +1,46 @@ +#version 450 + +struct RowMajor +{ + mat4 B; +}; + +struct NestedRowMajor +{ + RowMajor rm; +}; + +layout(binding = 2, std140) uniform UBO3 +{ + layout(row_major) NestedRowMajor rm2; +} _17; + +layout(binding = 1, std140) uniform UBO2 +{ + layout(row_major) RowMajor rm; +} _35; + +layout(binding = 0, std140) uniform UBO +{ + layout(row_major) mat4 A; + mat4 C; +} _42; + +layout(binding = 3, std140) uniform UBONoWorkaround +{ + mat4 D; +} _56; + +layout(location = 0) out vec4 FragColor; +layout(location = 0) in vec4 Clip; + +NestedRowMajor SPIRV_Cross_workaround_load_row_major(NestedRowMajor wrap) { return wrap; } +mat4 SPIRV_Cross_workaround_load_row_major(mat4 wrap) { return wrap; } + +void main() +{ + FragColor = (((SPIRV_Cross_workaround_load_row_major(_17.rm2).rm.B * SPIRV_Cross_workaround_load_row_major(_35.rm.B)) * SPIRV_Cross_workaround_load_row_major(_42.A)) * SPIRV_Cross_workaround_load_row_major(_42.C)) * Clip; + FragColor += (_56.D * Clip); + FragColor += (_42.A[1] * Clip); +} + diff --git a/reference/opt/shaders/legacy/vert/transpose.legacy.vert b/reference/opt/shaders/legacy/vert/transpose.legacy.vert index 0d30c0e2..d405ba70 100644 --- a/reference/opt/shaders/legacy/vert/transpose.legacy.vert +++ b/reference/opt/shaders/legacy/vert/transpose.legacy.vert @@ -11,8 +11,10 @@ uniform Buffer _13; attribute vec4 Position; +mat4 SPIRV_Cross_workaround_load_row_major(mat4 wrap) { return wrap; } + void main() { - gl_Position = (((_13.M * (Position * _13.MVPRowMajor)) + (_13.M * (_13.MVPColMajor * Position))) + (_13.M * (_13.MVPRowMajor * Position))) + (_13.M * (Position * _13.MVPColMajor)); + gl_Position = (((SPIRV_Cross_workaround_load_row_major(_13.M) * (Position * _13.MVPRowMajor)) + (SPIRV_Cross_workaround_load_row_major(_13.M) * (SPIRV_Cross_workaround_load_row_major(_13.MVPColMajor) * Position))) + (SPIRV_Cross_workaround_load_row_major(_13.M) * (_13.MVPRowMajor * Position))) + (SPIRV_Cross_workaround_load_row_major(_13.M) * (Position * SPIRV_Cross_workaround_load_row_major(_13.MVPColMajor))); } diff --git a/reference/opt/shaders/vert/read-from-row-major-array.vert b/reference/opt/shaders/vert/read-from-row-major-array.vert index 25fc9495..6c31cae9 100644 --- a/reference/opt/shaders/vert/read-from-row-major-array.vert +++ b/reference/opt/shaders/vert/read-from-row-major-array.vert @@ -8,9 +8,11 @@ layout(binding = 0, std140) uniform Block layout(location = 0) in vec4 a_position; layout(location = 0) out mediump float v_vtxResult; +mat2x3 SPIRV_Cross_workaround_load_row_major(mat2x3 wrap) { return wrap; } + void main() { gl_Position = a_position; - v_vtxResult = ((float(abs(_104.var[0][0][0].x - 2.0) < 0.0500000007450580596923828125) * float(abs(_104.var[0][0][0].y - 6.0) < 0.0500000007450580596923828125)) * float(abs(_104.var[0][0][0].z - (-6.0)) < 0.0500000007450580596923828125)) * ((float(abs(_104.var[0][0][1].x) < 0.0500000007450580596923828125) * float(abs(_104.var[0][0][1].y - 5.0) < 0.0500000007450580596923828125)) * float(abs(_104.var[0][0][1].z - 5.0) < 0.0500000007450580596923828125)); + v_vtxResult = ((float(abs(SPIRV_Cross_workaround_load_row_major(_104.var[0][0])[0].x - 2.0) < 0.0500000007450580596923828125) * float(abs(SPIRV_Cross_workaround_load_row_major(_104.var[0][0])[0].y - 6.0) < 0.0500000007450580596923828125)) * float(abs(SPIRV_Cross_workaround_load_row_major(_104.var[0][0])[0].z - (-6.0)) < 0.0500000007450580596923828125)) * ((float(abs(SPIRV_Cross_workaround_load_row_major(_104.var[0][0])[1].x) < 0.0500000007450580596923828125) * float(abs(SPIRV_Cross_workaround_load_row_major(_104.var[0][0])[1].y - 5.0) < 0.0500000007450580596923828125)) * float(abs(SPIRV_Cross_workaround_load_row_major(_104.var[0][0])[1].z - 5.0) < 0.0500000007450580596923828125)); } diff --git a/reference/shaders/asm/frag/loop-body-dominator-continue-access.asm.frag b/reference/shaders/asm/frag/loop-body-dominator-continue-access.asm.frag index e1edccff..4302113d 100644 --- a/reference/shaders/asm/frag/loop-body-dominator-continue-access.asm.frag +++ b/reference/shaders/asm/frag/loop-body-dominator-continue-access.asm.frag @@ -10,6 +10,8 @@ layout(binding = 0, std140) uniform Foo layout(location = 0) in vec3 fragWorld; layout(location = 0) out int _entryPointOutput; +mat4 SPIRV_Cross_workaround_load_row_major(mat4 wrap) { return wrap; } + mat4 GetClip2TexMatrix() { if (_11.test == 0) @@ -23,7 +25,7 @@ int GetCascade(vec3 fragWorldPosition) { for (uint cascadeIndex = 0u; cascadeIndex < _11.shadowCascadesNum; cascadeIndex++) { - mat4 worldToShadowMap = GetClip2TexMatrix() * _11.lightVP[cascadeIndex]; + mat4 worldToShadowMap = GetClip2TexMatrix() * SPIRV_Cross_workaround_load_row_major(_11.lightVP[cascadeIndex]); vec4 fragShadowMapPos = worldToShadowMap * vec4(fragWorldPosition, 1.0); if ((((fragShadowMapPos.z >= 0.0) && (fragShadowMapPos.z <= 1.0)) && (max(fragShadowMapPos.x, fragShadowMapPos.y) <= 1.0)) && (min(fragShadowMapPos.x, fragShadowMapPos.y) >= 0.0)) { diff --git a/reference/shaders/frag/ubo-load-row-major-workaround.frag b/reference/shaders/frag/ubo-load-row-major-workaround.frag new file mode 100644 index 00000000..95b0eda7 --- /dev/null +++ b/reference/shaders/frag/ubo-load-row-major-workaround.frag @@ -0,0 +1,48 @@ +#version 450 + +struct RowMajor +{ + mat4 B; +}; + +struct NestedRowMajor +{ + RowMajor rm; +}; + +layout(binding = 2, std140) uniform UBO3 +{ + layout(row_major) NestedRowMajor rm2; +} _17; + +layout(binding = 1, std140) uniform UBO2 +{ + layout(row_major) RowMajor rm; +} _35; + +layout(binding = 0, std140) uniform UBO +{ + layout(row_major) mat4 A; + mat4 C; +} _42; + +layout(binding = 3, std140) uniform UBONoWorkaround +{ + mat4 D; +} _56; + +layout(location = 0) out vec4 FragColor; +layout(location = 0) in vec4 Clip; + +NestedRowMajor SPIRV_Cross_workaround_load_row_major(NestedRowMajor wrap) { return wrap; } +mat4 SPIRV_Cross_workaround_load_row_major(mat4 wrap) { return wrap; } + +void main() +{ + NestedRowMajor rm2_loaded; + rm2_loaded.rm.B = SPIRV_Cross_workaround_load_row_major(_17.rm2).rm.B; + FragColor = (((rm2_loaded.rm.B * SPIRV_Cross_workaround_load_row_major(_35.rm.B)) * SPIRV_Cross_workaround_load_row_major(_42.A)) * SPIRV_Cross_workaround_load_row_major(_42.C)) * Clip; + FragColor += (_56.D * Clip); + FragColor += (_42.A[1] * Clip); +} + diff --git a/reference/shaders/legacy/vert/transpose.legacy.vert b/reference/shaders/legacy/vert/transpose.legacy.vert index c73d1a11..2683d172 100644 --- a/reference/shaders/legacy/vert/transpose.legacy.vert +++ b/reference/shaders/legacy/vert/transpose.legacy.vert @@ -11,12 +11,14 @@ uniform Buffer _13; attribute vec4 Position; +mat4 SPIRV_Cross_workaround_load_row_major(mat4 wrap) { return wrap; } + void main() { - vec4 c0 = _13.M * (Position * _13.MVPRowMajor); - vec4 c1 = _13.M * (_13.MVPColMajor * Position); - vec4 c2 = _13.M * (_13.MVPRowMajor * Position); - vec4 c3 = _13.M * (Position * _13.MVPColMajor); + vec4 c0 = SPIRV_Cross_workaround_load_row_major(_13.M) * (Position * _13.MVPRowMajor); + vec4 c1 = SPIRV_Cross_workaround_load_row_major(_13.M) * (SPIRV_Cross_workaround_load_row_major(_13.MVPColMajor) * Position); + vec4 c2 = SPIRV_Cross_workaround_load_row_major(_13.M) * (_13.MVPRowMajor * Position); + vec4 c3 = SPIRV_Cross_workaround_load_row_major(_13.M) * (Position * SPIRV_Cross_workaround_load_row_major(_13.MVPColMajor)); gl_Position = ((c0 + c1) + c2) + c3; } diff --git a/reference/shaders/vert/read-from-row-major-array.vert b/reference/shaders/vert/read-from-row-major-array.vert index 1c950f3f..3b77687f 100644 --- a/reference/shaders/vert/read-from-row-major-array.vert +++ b/reference/shaders/vert/read-from-row-major-array.vert @@ -8,6 +8,8 @@ layout(binding = 0, std140) uniform Block layout(location = 0) in vec4 a_position; layout(location = 0) out mediump float v_vtxResult; +mat2x3 SPIRV_Cross_workaround_load_row_major(mat2x3 wrap) { return wrap; } + mediump float compare_float(float a, float b) { return float(abs(a - b) < 0.0500000007450580596923828125); @@ -37,7 +39,7 @@ void main() { gl_Position = a_position; mediump float result = 1.0; - mat2x3 param = _104.var[0][0]; + mat2x3 param = SPIRV_Cross_workaround_load_row_major(_104.var[0][0]); mat2x3 param_1 = mat2x3(vec3(2.0, 6.0, -6.0), vec3(0.0, 5.0, 5.0)); result *= compare_mat2x3(param, param_1); v_vtxResult = result; diff --git a/shaders/frag/ubo-load-row-major-workaround.frag b/shaders/frag/ubo-load-row-major-workaround.frag new file mode 100644 index 00000000..03205ee8 --- /dev/null +++ b/shaders/frag/ubo-load-row-major-workaround.frag @@ -0,0 +1,44 @@ +#version 450 + +struct RowMajor +{ + mat4 B; +}; + +struct NestedRowMajor +{ + RowMajor rm; +}; + +layout(set = 0, binding = 0, row_major) uniform UBO +{ + mat4 A; + layout(column_major) mat4 C; // This should also be worked around. +}; + + +layout(set = 0, binding = 1, row_major) uniform UBO2 +{ + RowMajor rm; +}; + +layout(set = 0, binding = 2, row_major) uniform UBO3 +{ + NestedRowMajor rm2; +}; + +layout(set = 0, binding = 3) uniform UBONoWorkaround +{ + mat4 D; +}; + +layout(location = 0) in vec4 Clip; +layout(location = 0) out vec4 FragColor; + +void main() +{ + NestedRowMajor rm2_loaded = rm2; + FragColor = rm2_loaded.rm.B * rm.B * A * C * Clip; + FragColor += D * Clip; + FragColor += A[1] * Clip; +} diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp index e5db12f3..aa006a19 100644 --- a/spirv_glsl.cpp +++ b/spirv_glsl.cpp @@ -511,6 +511,7 @@ string CompilerGLSL::compile() { // only NV_gpu_shader5 supports divergent indexing on OpenGL, and it does so without extra qualifiers backend.nonuniform_qualifier = ""; + backend.needs_row_major_load_workaround = true; } backend.force_gl_in_out_block = true; backend.supports_extensions = true; @@ -3798,6 +3799,17 @@ void CompilerGLSL::emit_extension_workarounds(spv::ExecutionModel model) statement(""); } } + + if (!workaround_ubo_load_overload_types.empty()) + { + for (auto &type_id : workaround_ubo_load_overload_types) + { + auto &type = get(type_id); + statement(type_to_glsl(type), " SPIRV_Cross_workaround_load_row_major(", type_to_glsl(type), + " wrap) { return wrap; }"); + } + statement(""); + } } // Returns a string representation of the ID, usable as a function arg. @@ -9496,11 +9508,15 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) if (forward && ptr_expression) ptr_expression->need_transpose = old_need_transpose; + bool flattened = ptr_expression && flattened_buffer_blocks.count(ptr_expression->loaded_from) != 0; + + if (backend.needs_row_major_load_workaround && !is_non_native_row_major_matrix(ptr) && !flattened) + rewrite_load_for_wrapped_row_major(expr, result_type, ptr); + // By default, suppress usage tracking since using same expression multiple times does not imply any extra work. // However, if we try to load a complex, composite object from a flattened buffer, // we should avoid emitting the same code over and over and lower the result to a temporary. - bool usage_tracking = ptr_expression && flattened_buffer_blocks.count(ptr_expression->loaded_from) != 0 && - (type.basetype == SPIRType::Struct || (type.columns > 1)); + bool usage_tracking = flattened && (type.basetype == SPIRType::Struct || (type.columns > 1)); SPIRExpression *e = nullptr; if (!forward && expression_is_non_value_type_array(ptr)) @@ -15087,3 +15103,63 @@ CompilerGLSL::ShaderSubgroupSupportHelper::Result::Result() weights[KHR_shader_subgroup_basic] = big_num; weights[KHR_shader_subgroup_vote] = big_num; } + +void CompilerGLSL::request_workaround_wrapper_overload(TypeID id) +{ + // Must be ordered to maintain deterministic output, so vector is appropriate. + if (find(begin(workaround_ubo_load_overload_types), end(workaround_ubo_load_overload_types), id) == + end(workaround_ubo_load_overload_types)) + { + force_recompile(); + workaround_ubo_load_overload_types.push_back(id); + } +} + +void CompilerGLSL::rewrite_load_for_wrapped_row_major(std::string &expr, TypeID loaded_type, ID ptr) +{ + // Loading row-major matrices from UBOs on older AMD Windows OpenGL drivers is problematic. + // To load these types correctly, we must first wrap them in a dummy function which only purpose is to + // ensure row_major decoration is actually respected. + auto *var = maybe_get_backing_variable(ptr); + if (!var) + return; + + auto &backing_type = get(var->basetype); + bool is_ubo = backing_type.basetype == SPIRType::Struct && + backing_type.storage == StorageClassUniform && + has_decoration(backing_type.self, DecorationBlock); + if (!is_ubo) + return; + + auto *type = &get(loaded_type); + bool rewrite = false; + + if (is_matrix(*type)) + { + // To avoid adding a lot of unnecessary meta tracking to forward the row_major state, + // we will simply look at the base struct itself. It is exceptionally rare to mix and match row-major/col-major state. + // If there is any row-major action going on, we apply the workaround. + // It is harmless to apply the workaround to column-major matrices, so this is still a valid solution. + // If an access chain occurred, the workaround is not required, so loading vectors or scalars don't need workaround. + type = &backing_type; + } + + if (type->basetype == SPIRType::Struct) + { + // If we're loading a struct where any member is a row-major matrix, apply the workaround. + for (uint32_t i = 0; i < uint32_t(type->member_types.size()); i++) + { + if (combined_decoration_for_member(*type, i).get(DecorationRowMajor)) + { + rewrite = true; + break; + } + } + } + + if (rewrite) + { + request_workaround_wrapper_overload(loaded_type); + expr = join("SPIRV_Cross_workaround_load_row_major(", expr, ")"); + } +} diff --git a/spirv_glsl.hpp b/spirv_glsl.hpp index d8125456..d52f786e 100644 --- a/spirv_glsl.hpp +++ b/spirv_glsl.hpp @@ -560,6 +560,7 @@ protected: bool support_small_type_sampling_result = false; bool support_case_fallthrough = true; bool use_array_constructor = false; + bool needs_row_major_load_workaround = false; } backend; void emit_struct(SPIRType &type); @@ -784,6 +785,10 @@ protected: // Currently used by NMin/Max/Clamp implementations. std::unordered_map extra_sub_expressions; + SmallVector workaround_ubo_load_overload_types; + void request_workaround_wrapper_overload(TypeID id); + void rewrite_load_for_wrapped_row_major(std::string &expr, TypeID loaded_type, ID ptr); + uint32_t statement_count = 0; inline bool is_legacy() const