HLSL: add implicit mat*mat truncations

Goes with PR #1161, and completes the space for mul() implicit truncations. Note that the v*v and scalar cases are already handled by existing code.
2024-11-09 12:00:05 +00:00 · 2017-11-27 09:42:53 -07:00 · 2017-11-27 09:42:53 -07:00 · cee29b04d6
commit cee29b04d6
parent 792a94809b
3 changed files with 619 additions and 345 deletions
--- a/Test/baseResults/hlsl.mul-truncate.frag.out
+++ b/Test/baseResults/hlsl.mul-truncate.frag.out
--- a/Test/hlsl.mul-truncate.frag
+++ b/Test/hlsl.mul-truncate.frag
@ -6,6 +6,7 @@ cbuffer Matrix
    float4x4  m44;
    float4x3  m43;
    float3x4  m34;
+    float3x3  m33;
    float2x4  m24;
    float4x2  m42;
    float4    v4;
@ -27,11 +28,11 @@ float4 main() : SV_Target0
    float4 r20 = mul(m44, v3); // float4 = float4x4 * float3;  // clamp mat to float4x3;
    float4 r21 = mul(m43, v4); // truncate vector to vec3

-    // // m*m
-    // float2x3 r30 = mul(m24, m33);  // float2x3 = float2x4 * float3x3;
-    // float3x4 r31 = mul(m33, m24);  // float3x4 = float3x3 * float2x4;
-    // float3x2 r32 = mul(m33, m42);  // float3x2 = float3x3 * float4x2;
-    // float4x3 r33 = mul(m42, m33);  // float4x3 = float4x2 * float3x3;
+    // m*m
+    float2x3 r30 = mul(m24, m33);  // float2x3 = float2x4 * float3x3;
+    float3x4 r31 = mul(m33, m24);  // float3x4 = float3x3 * float2x4;
+    float3x2 r32 = mul(m33, m42);  // float3x2 = float3x3 * float4x2;
+    float4x3 r33 = mul(m42, m33);  // float4x3 = float4x2 * float3x3;

-    return r10 + r11 + r20 + r21 + r00 + r01; // + r30[0].x + r31[0] + r32[0].x + transpose(r33)[0];
+    return r10 + r11 + r20 + r21 + r00 + r01 + r30[0].x + r31[0] + r32[0].x + transpose(r33)[0];
 }
--- a/hlsl/hlslParseHelper.cpp
+++ b/hlsl/hlslParseHelper.cpp
@ -5178,7 +5178,7 @@ void HlslParseContext::pushFrontArguments(TIntermTyped* front, TIntermTyped*& ar
 //
 // HLSL allows mismatched dimensions on vec*mat, mat*vec, vec*vec, and mat*mat.  This is a
 // situation not well suited to resolution in intrinsic selection, but we can do so here, since we
-// can look at both arguments insert explicit shape changes here, if required.
+// can look at both arguments insert explicit shape changes if required.
 //
 void HlslParseContext::addGenMulArgumentConversion(const TSourceLoc& loc, TFunction& call, TIntermTyped*& args)
 {
@ -5224,12 +5224,23 @@ void HlslParseContext::addGenMulArgumentConversion(const TSourceLoc& loc, TFunct
            arg1 = addConstructor(loc, arg1, truncType);
        }
    } else if (arg0->isMatrix() && arg1->isMatrix()) {
-        // mat * mat
+        // mat * mat: we clamp the smaller inner dimension to match the other matrix size.
+        // Remember, HLSL Mrc = GLSL/SPIRV Mcr.
+        if (arg0->getMatrixRows() > arg1->getMatrixCols()) {
+            const TType truncType(arg0->getBasicType(), arg0->getQualifier().storage, arg0->getQualifier().precision,
+                                  0, arg0->getMatrixCols(), arg1->getMatrixCols());
+            arg0 = addConstructor(loc, arg0, truncType);
+        } else if (arg0->getMatrixRows() < arg1->getMatrixCols()) {
+            const TType truncType(arg1->getBasicType(), arg1->getQualifier().storage, arg1->getQualifier().precision,
+                                  0, arg0->getMatrixRows(), arg1->getMatrixRows());
+            arg1 = addConstructor(loc, arg1, truncType);
+        }
    } else {
-        // It's something with scalars: we'll just leave it alone.
+        // It's something with scalars: we'll just leave it alone.  Function selection will handle it
+        // downstream.
    }

-    // Put arguments back.
+    // Put arguments back.  (They might be unchanged, in which case this is harmless).
    argAggregate->getSequence()[0] = arg0;
    argAggregate->getSequence()[1] = arg1;