HLSL: Include shape-changing conversions in overloaded signature selection.

This also enables vecN -> vec1 shape conversions for all places doing shape conversions. For signature selection, makes shape changes worse than any other comparison when deciding what conversions are better than others.
2024-09-19 12:19:53 +00:00 · 2016-08-25 15:57:56 -06:00 · 2016-08-25 15:57:56 -06:00 · e3f2c8f98a
commit e3f2c8f98a
parent 90dd70f752
6 changed files with 1504 additions and 1026 deletions
--- a/Test/baseResults/hlsl.intrinsics.negative.frag.out
+++ b/Test/baseResults/hlsl.intrinsics.negative.frag.out
@ -1,12 +1,6 @@
 hlsl.intrinsics.negative.frag
 ERROR: 0:8: 'cross' : no matching overloaded function found 
 ERROR: 0:9: 'D3DCOLORtoUBYTE4' : no matching overloaded function found 
 ERROR: 0:10: 'determinant' : no matching overloaded function found 
 ERROR: 0:12: 'f32tof16' : unimplemented intrinsic: handle natively 
 ERROR: 0:23: 'length' : no matching overloaded function found 
 ERROR: 0:25: 'normalize' : no matching overloaded function found 
 ERROR: 0:26: 'reflect' : no matching overloaded function found 
 ERROR: 0:27: 'refract' : no matching overloaded function found 
 ERROR: 0:28: 'refract' : no matching overloaded function found 
 ERROR: 0:30: 'transpose' : no matching overloaded function found 
 ERROR: 0:39: 'GetRenderTargetSamplePosition' : no matching overloaded function found 
@ -23,7 +17,6 @@ ERROR: 0:67: 'determinant' : no matching overloaded function found
 ERROR: 0:68: 'f32tof16' : unimplemented intrinsic: handle natively 
 ERROR: 0:73: 'transpose' : no matching overloaded function found 
 ERROR: 0:81: 'CheckAccessFullyMapped' : no matching overloaded function found 
 ERROR: 0:83: 'cross' : no matching overloaded function found 
 ERROR: 0:84: 'determinant' : no matching overloaded function found 
 ERROR: 0:85: 'f32tof16' : unimplemented intrinsic: handle natively 
 ERROR: 0:90: 'transpose' : no matching overloaded function found 
@ -66,7 +59,7 @@ ERROR: 0:133: 'normalize' : no matching overloaded function found
 ERROR: 0:133: 'reflect' : no matching overloaded function found 
 ERROR: 0:133: 'refract' : no matching overloaded function found 
 ERROR: 0:133: 'reversebits' : no matching overloaded function found 
-ERROR: 67 compilation errors.  No code generated.
+ERROR: 60 compilation errors.  No code generated.
 Shader version: 450
@ -91,10 +84,14 @@ ERROR: node is still EOpNull!
 0:7      bitCount (global uint)
 0:7        Convert float to uint (temp uint)
 0:7          'inF0' (in float)
-0:8      Constant:
+0:8      cross-product (global 3-component vector of float)
-0:8        0.000000
+0:8        Construct vec3 (in 3-component vector of float)
-0:9      Constant:
+0:8          'inF0' (in float)
-0:9        0.000000
+0:8        Construct vec3 (in 3-component vector of float)
 0:8          'inF1' (in float)
 0:9      Function Call: D3DCOLORtoUBYTE4(vf4; (global 4-component vector of int)
 0:9        Construct vec4 (in 4-component vector of float)
 0:9          'inF0' (in float)
 0:10      Constant:
 0:10        0.000000
 0:12      ERROR: Bad unary op
@ -107,8 +104,9 @@ ERROR: node is still EOpNull!
 0:14      findLSB (global uint)
 0:14        Convert float to uint (temp uint)
 0:14          'inF0' (in float)
-0:23      Constant:
+0:23      length (global float)
-0:23        0.000000
+0:23        Construct vec2 (in 2-component vector of float)
 0:23          'inF0' (in float)
 0:24      Function Call: msad4(u1;vu2;vu4; (global 4-component vector of uint)
 0:24        Convert float to uint (temp uint)
 0:24          'inF0' (in float)
@ -120,12 +118,20 @@ ERROR: node is still EOpNull!
 0:24          0 (const uint)
 0:24          0 (const uint)
 0:24          0 (const uint)
-0:25      Constant:
+0:25      normalize (global 2-component vector of float)
-0:25        0.000000
+0:25        Construct vec2 (in 2-component vector of float)
-0:26      Constant:
+0:25          'inF0' (in float)
-0:26        0.000000
+0:26      reflect (global 2-component vector of float)
-0:27      Constant:
+0:26        Construct vec2 (in 2-component vector of float)
-0:27        0.000000
+0:26          'inF0' (in float)
 0:26        Construct vec2 (in 2-component vector of float)
 0:26          'inF1' (in float)
 0:27      refract (global 2-component vector of float)
 0:27        Construct vec2 (in 2-component vector of float)
 0:27          'inF0' (in float)
 0:27        Construct vec2 (in 2-component vector of float)
 0:27          'inF1' (in float)
 0:27        'inF2' (in float)
 0:28      Constant:
 0:28        0.000000
 0:29      bitFieldReverse (global uint)
@ -239,8 +245,11 @@ ERROR: node is still EOpNull!
 0:82      bitCount (global 4-component vector of uint)
 0:82        Convert float to uint (temp 4-component vector of uint)
 0:82          'inF0' (in 4-component vector of float)
-0:83      Constant:
+0:83      cross-product (global 3-component vector of float)
-0:83        0.000000
+0:83        Construct vec3 (in 3-component vector of float)
 0:83          'inF0' (in 4-component vector of float)
 0:83        Construct vec3 (in 3-component vector of float)
 0:83          'inF1' (in 4-component vector of float)
 0:84      Constant:
 0:84        0.000000
 0:85      ERROR: Bad unary op
@ -423,10 +432,14 @@ ERROR: node is still EOpNull!
 0:7      bitCount (global uint)
 0:7        Convert float to uint (temp uint)
 0:7          'inF0' (in float)
-0:8      Constant:
+0:8      cross-product (global 3-component vector of float)
-0:8        0.000000
+0:8        Construct vec3 (in 3-component vector of float)
-0:9      Constant:
+0:8          'inF0' (in float)
-0:9        0.000000
+0:8        Construct vec3 (in 3-component vector of float)
 0:8          'inF1' (in float)
 0:9      Function Call: D3DCOLORtoUBYTE4(vf4; (global 4-component vector of int)
 0:9        Construct vec4 (in 4-component vector of float)
 0:9          'inF0' (in float)
 0:10      Constant:
 0:10        0.000000
 0:12      ERROR: Bad unary op
@ -439,8 +452,9 @@ ERROR: node is still EOpNull!
 0:14      findLSB (global uint)
 0:14        Convert float to uint (temp uint)
 0:14          'inF0' (in float)
-0:23      Constant:
+0:23      length (global float)
-0:23        0.000000
+0:23        Construct vec2 (in 2-component vector of float)
 0:23          'inF0' (in float)
 0:24      Function Call: msad4(u1;vu2;vu4; (global 4-component vector of uint)
 0:24        Convert float to uint (temp uint)
 0:24          'inF0' (in float)
@ -452,12 +466,20 @@ ERROR: node is still EOpNull!
 0:24          0 (const uint)
 0:24          0 (const uint)
 0:24          0 (const uint)
-0:25      Constant:
+0:25      normalize (global 2-component vector of float)
-0:25        0.000000
+0:25        Construct vec2 (in 2-component vector of float)
-0:26      Constant:
+0:25          'inF0' (in float)
-0:26        0.000000
+0:26      reflect (global 2-component vector of float)
-0:27      Constant:
+0:26        Construct vec2 (in 2-component vector of float)
-0:27        0.000000
+0:26          'inF0' (in float)
 0:26        Construct vec2 (in 2-component vector of float)
 0:26          'inF1' (in float)
 0:27      refract (global 2-component vector of float)
 0:27        Construct vec2 (in 2-component vector of float)
 0:27          'inF0' (in float)
 0:27        Construct vec2 (in 2-component vector of float)
 0:27          'inF1' (in float)
 0:27        'inF2' (in float)
 0:28      Constant:
 0:28        0.000000
 0:29      bitFieldReverse (global uint)
@ -571,8 +593,11 @@ ERROR: node is still EOpNull!
 0:82      bitCount (global 4-component vector of uint)
 0:82        Convert float to uint (temp 4-component vector of uint)
 0:82          'inF0' (in 4-component vector of float)
-0:83      Constant:
+0:83      cross-product (global 3-component vector of float)
-0:83        0.000000
+0:83        Construct vec3 (in 3-component vector of float)
 0:83          'inF0' (in 4-component vector of float)
 0:83        Construct vec3 (in 3-component vector of float)
 0:83          'inF1' (in 4-component vector of float)
 0:84      Constant:
 0:84        0.000000
 0:85      ERROR: Bad unary op
--- a/Test/baseResults/hlsl.overload.frag.out
+++ b/Test/baseResults/hlsl.overload.frag.out
--- a/Test/hlsl.overload.frag
+++ b/Test/hlsl.overload.frag
@ -1,117 +1,142 @@
 // function selection under type conversion
-void foo1(double a, bool b)  {}
+void foo1(double a, bool b)  {}
-void foo1(double a, uint b)  {}
+void foo1(double a, uint b)  {}
-void foo1(double a, int b)   {}
+void foo1(double a, int b)   {}
-void foo1(double a, float b) {}
+void foo1(double a, float b) {}
 void foo1(double a, double b){}
 // uint -> int
-void foo2(int a, bool b)  {}
+void foo2(int a, bool b)  {}
-void foo2(int a, uint b)  {}
+void foo2(int a, uint b)  {}
-void foo2(int a, int b)   {}
+void foo2(int a, int b)   {}
-void foo2(int a, float b) {}
+void foo2(int a, float b) {}
 void foo2(int a, double b){}
 // everything can promote
-void foo3(bool b)  {}
+void foo3(bool b)  {}
-void foo4(uint b)  {}
+void foo4(uint b)  {}
-void foo5(int b)   {}
+void foo5(int b)   {}
-void foo6(float b) {}
+void foo6(float b) {}
 void foo7(double b){}
 // shorter forward chain better than longer or backward chain
-void foo8(float);
+void foo8(float)  {}
-void foo8(double);
+void foo8(double) {}
-void foo9(int);
+void foo9(int)    {}
-void foo9(uint);
+void foo9(uint)   {}
-void foo10(bool);
+void foo10(bool)  {}
-void foo10(int);
+void foo10(int)   {}
 // shape change is worse
 void foo11(float3)  {}
 void foo11(double)  {}
 void foo11(int3)    {}
 void foo11(uint)    {}
 void foo12(float1)  {}
 void foo12(double3) {}
 void foo16(uint)    {}
 void foo16(uint2)   {}
 // shape change
 void foo13(float3)  {}
 void foo14(int1)     {}
 void foo15(bool1)   {}
 float4 PixelShaderFunction(float4 input) : COLOR0
 {
-    bool b;
+    bool b;
-    double d;
+    double d;
-    uint u;
+    uint u;
-    int i;
+    int i;
-    float f;
+    float f;
    foo1(d, b);
    foo1(d, d);
    foo1(d, u);
    foo1(d, i);
    foo1(d, f);
    foo1(f, b);
    foo1(f, d);
    foo1(f, u);
    foo1(f, i);
    foo1(f, f);
    foo1(u, b);
    foo1(u, d);
    foo1(u, u);
    foo1(u, i);
    foo1(u, f);
    foo1(i, b);
    foo1(i, d);
    foo1(i, u);
    foo1(i, i);
    foo1(i, f);
    foo2(u, b);
    foo2(u, d);
    foo2(u, u);
    foo2(u, i);
    foo2(u, f);
    foo2(i, b);
    foo2(i, d);
    foo2(i, u);
    foo2(i, i);
    foo2(i, f);
    foo3(b);
    foo3(d);
    foo3(u);
    foo3(i);
    foo3(f);
    foo4(b);
    foo4(d);
    foo4(u);
    foo4(i);
    foo4(f);
    foo5(b);
    foo5(d);
    foo5(u);
    foo5(i);
    foo5(f);
    foo6(b);
    foo6(d);
    foo6(u);
    foo6(i);
    foo6(f);
    foo7(b);
    foo7(d);
    foo7(u);
    foo7(i);
    foo7(f);
    foo8(b);
    foo8(u);
    foo8(i);
    foo9(b);
    foo9(f);
    foo9(d);
    foo10(u);
    foo10(f);
    foo10(d);
    foo11(b);
    foo11(f);
    foo12(float3(f));
    foo16(int2(i,i));
    foo13(f);
    foo14(int4(i));
    foo15(b);
    foo15(bool3(b));
    foo1(d, b);
    foo1(d, d);
    foo1(d, u);
    foo1(d, i);
    foo1(d, f);
    foo1(f, b);
    foo1(f, d);
    foo1(f, u);
    foo1(f, i);
    foo1(f, f);
    foo1(u, b);
    foo1(u, d);
    foo1(u, u);
    foo1(u, i);
    foo1(u, f);
    foo1(i, b);
    foo1(i, d);
    foo1(i, u);
    foo1(i, i);
    foo1(i, f);
    foo2(u, b);
    foo2(u, d);
    foo2(u, u);
    foo2(u, i);
    foo2(u, f);
    foo2(i, b);
    foo2(i, d);
    foo2(i, u);
    foo2(i, i);
    foo2(i, f);
    foo3(b);
    foo3(d);
    foo3(u);
    foo3(i);
    foo3(f);
    foo4(b);
    foo4(d);
    foo4(u);
    foo4(i);
    foo4(f);
    foo5(b);
    foo5(d);
    foo5(u);
    foo5(i);
    foo5(f);
    foo6(b);
    foo6(d);
    foo6(u);
    foo6(i);
    foo6(f);
    foo7(b);
    foo7(d);
    foo7(u);
    foo7(i);
    foo7(f);
    foo8(b);
    foo8(u);
    foo8(i);
    foo9(b);
    foo9(f);
    foo9(d);
    foo10(u);
    foo10(f);
    foo10(d);
    return input;
 }
--- a/glslang/Include/revision.h
+++ b/glslang/Include/revision.h
@ -2,5 +2,5 @@
 // For the version, it uses the latest git tag followed by the number of commits.
 // For the date, it uses the current date (when then script is run).
-#define GLSLANG_REVISION "Overload400-PrecQual.1434"
+#define GLSLANG_REVISION "Overload400-PrecQual.1438"
 #define GLSLANG_DATE "25-Aug-2016"
--- a/glslang/MachineIndependent/Intermediate.cpp
+++ b/glslang/MachineIndependent/Intermediate.cpp
@ -702,6 +702,7 @@ TIntermTyped* TIntermediate::addShapeConversion(TOperator op, const TType& type,
    case EOpGreaterThan:
    case EOpLessThanEqual:
    case EOpGreaterThanEqual:
    case EOpFunctionCall:
        break;
    default:
        return node;
@ -715,9 +716,11 @@ TIntermTyped* TIntermediate::addShapeConversion(TOperator op, const TType& type,
    // The new node that handles the conversion
    TOperator constructorOp = mapTypeToConstructorOp(type);
-    // scalar -> smeared -> vector
+    // scalar -> smeared -> vector, or
-    if (type.isVector() && node->getType().isScalar())
+    // bigger vector -> smaller vector or scalar
-        return setAggregateOperator(node, constructorOp, type, node->getLoc());
+    if ((type.isVector() && node->getType().isScalar()) ||
        (node->getVectorSize() > type.getVectorSize() && type.isVector()))
        return setAggregateOperator(makeAggregate(node), constructorOp, type, node->getLoc());
    return node;
 }
@ -731,6 +734,7 @@ bool TIntermediate::canImplicitlyPromote(TBasicType from, TBasicType to, TOperat
    if (profile == EEsProfile || version == 110)
        return false;
    // TODO: Move more policies into language-specific handlers.
    // Some languages allow more general (or potentially, more specific) conversions under some conditions.
    if (source == EShSourceHlsl) {
        const bool fromConvertable = (from == EbtFloat || from == EbtDouble || from == EbtInt || from == EbtUint || from == EbtBool);
--- a/hlsl/hlslParseHelper.cpp
+++ b/hlsl/hlslParseHelper.cpp
@ -2001,6 +2001,7 @@ void HlslParseContext::addInputArgumentConversions(const TFunction& function, TI
                // In-qualified arguments just need an extra node added above the argument to
                // convert to the correct type.
                arg = intermediate.addConversion(EOpFunctionCall, *function[i].type, arg);
                arg = intermediate.addShapeConversion(EOpFunctionCall, *function[i].type, arg);
                if (arg) {
                    if (function.getParamCount() == 1)
                        arguments = arg;
@ -3565,9 +3566,25 @@ const TFunction* HlslParseContext::findFunction(const TSourceLoc& loc, const TFu
    auto convertible = [this](const TType& from, const TType& to) {
        if (from == to)
            return true;
-        if (from.isArray() || to.isArray() || ! from.sameElementShape(to))
+
        // no aggregate conversions
        if (from.isArray()  || to.isArray() || 
            from.isStruct() || to.isStruct())
            return false;
-        return intermediate.canImplicitlyPromote(from.getBasicType(), to.getBasicType(), EOpFunctionCall);
+
        // basic types have to be convertible
        if (! intermediate.canImplicitlyPromote(from.getBasicType(), to.getBasicType(), EOpFunctionCall))
            return false;
        // shapes have to be convertible
        if ((from.isScalar() && to.isScalar()) ||
            (from.isScalar() && to.isVector()) ||
            (from.isVector() && to.isVector() && from.getVectorSize() >= to.getVectorSize()))
            return true;
        // TODO: what are the matrix rules? they go here
        return false;
    };
    // Is 'to2' a better conversion than 'to1'?
@ -3580,33 +3597,41 @@ const TFunction* HlslParseContext::findFunction(const TSourceLoc& loc, const TFu
        if (from == to1)
            return false;
-        // float -> double is better than any other float conversion
+        // shape changes are always worse
-        if (from.getBasicType() == EbtFloat) {
+        if (from.isScalar() || from.isVector()) {
-            if (to2.getBasicType() == EbtDouble && to1.getBasicType() != EbtDouble)
+            if (from.getVectorSize() == to2.getVectorSize() &&
                from.getVectorSize() != to1.getVectorSize())
                return true;
            if (from.getVectorSize() == to1.getVectorSize() &&
                from.getVectorSize() != to2.getVectorSize())
                return false;
        }
-        // int -> uint is better than any other int conversion
+        // Might or might not be changing shape, which means basic type might
-        if (from.getBasicType() == EbtInt) {
+        // or might not match, so within that, the question is how big a
-            if (to2.getBasicType() == EbtUint && to1.getBasicType() != EbtUint)
+        // basic-type conversion is being done.
-                return true;
+        //
-        }
+        // Use a hierarchy of domains, translated to order of magnitude
        // in a linearized view:
        //   - floating-point vs. integer
        //     - 32 vs. 64 bit (or width in general)
        //       - bool vs. non bool
        //         - signed vs. not signed
        auto linearize = [](const TBasicType& basicType) {
            switch (basicType) {
            case EbtBool:     return 1;
            case EbtInt:      return 10;
            case EbtUint:     return 11;
            case EbtInt64:    return 20;
            case EbtUint64:   return 21;
            case EbtFloat:    return 100;
            case EbtDouble:   return 110;
            default:          return 0;
            }
        };
-        // TODO: these should be replaced by a more generic "shorter chain is better than longer chain" rule
+        return std::abs(linearize(to2.getBasicType()) - linearize(from.getBasicType())) <
-
+               std::abs(linearize(to1.getBasicType()) - linearize(from.getBasicType()));
        // -> float is better than -> double
        if (to2.getBasicType() == EbtFloat && to1.getBasicType() == EbtDouble)
            return true;
        // -> int is better than -> bool
        if ((to2.getBasicType() == EbtInt || to2.getBasicType() == EbtUint) &&  to1.getBasicType() == EbtBool)
            return true;
        // -> uint is better than -> int
        if (to2.getBasicType() == EbtUint &&  to1.getBasicType() == EbtInt)
            return true;
        return false;
    };
    // for ambiguity reporting