Various optimizations to stroke tessellation shaders

Bug: skia:10419 Change-Id: I1544113b6ea2327674a48a0430146a859a547723 Reviewed-on: https://skia-review.googlesource.com/c/skia/+/322796 Commit-Queue: Chris Dalton <csmartdalton@google.com> Reviewed-by: Michael Ludwig <michaelludwig@google.com>
2020-10-12 19:35:50 -06:00 · 2020-10-12 19:35:50 -06:00 · f58db3c94d
commit f58db3c94d
parent 7205080055
4 changed files with 136 additions and 118 deletions
--- a/src/gpu/tessellate/GrStrokeTessellateShader.cpp
+++ b/src/gpu/tessellate/GrStrokeTessellateShader.cpp
@ -18,8 +18,11 @@ constexpr static float kLinearizationIntolerance =
 class GrStrokeTessellateShader::Impl : public GrGLSLGeometryProcessor {
 public:
-    const char* getTessControlArgsUniformName(const GrGLSLUniformHandler& uniformHandler) const {
+    const char* getTessArgs1UniformName(const GrGLSLUniformHandler& uniformHandler) const {
-        return uniformHandler.getUniformCStr(fTessControlArgsUniform);
+        return uniformHandler.getUniformCStr(fTessArgs1Uniform);
    }
    const char* getTessArgs2UniformName(const GrGLSLUniformHandler& uniformHandler) const {
        return uniformHandler.getUniformCStr(fTessArgs2Uniform);
    }
    const char* getTranslateUniformName(const GrGLSLUniformHandler& uniformHandler) const {
        return uniformHandler.getUniformCStr(fTranslateUniform);
@ -31,19 +34,23 @@ public:
 private:
    void onEmitCode(EmitArgs& args, GrGPArgs* gpArgs) override {
        const auto& shader = args.fGP.cast<GrStrokeTessellateShader>();
        auto* uniHandler = args.fUniformHandler;
        args.fVaryingHandler->emitAttributes(shader);
-        auto* uniHandler = args.fUniformHandler;
+        // uNumSegmentsInJoin, uCubicConstantPow2, uNumRadialSegmentsPerRadian, uMiterLimitInvPow2.
-        fTessControlArgsUniform = uniHandler->addUniform(nullptr,
+        fTessArgs1Uniform = uniHandler->addUniform(nullptr, kTessControl_GrShaderFlag,
-                                                         kTessControl_GrShaderFlag |
+                                                   kFloat4_GrSLType, "tessArgs1", nullptr);
-                                                         kTessEvaluation_GrShaderFlag,
+        // uRadialTolerancePow2, uStrokeRadius.
-                                                         kFloat4_GrSLType, "tessControlArgs",
+        fTessArgs2Uniform = uniHandler->addUniform(nullptr, kTessControl_GrShaderFlag |
-                                                         nullptr);
+                                                            kTessEvaluation_GrShaderFlag,
                                                   kFloat2_GrSLType,
                                                   "tessArgs2", nullptr);
        if (!shader.viewMatrix().isIdentity()) {
            fTranslateUniform = uniHandler->addUniform(nullptr, kTessEvaluation_GrShaderFlag,
                                                       kFloat2_GrSLType, "translate", nullptr);
            fAffineMatrixUniform = uniHandler->addUniform(nullptr, kTessEvaluation_GrShaderFlag,
-                                                          kFloat2x2_GrSLType, "affineMatrix",
+                                                          kFloat4_GrSLType, "affineMatrix",
                                                          nullptr);
        }
        const char* colorUniformName;
@ -78,12 +85,14 @@ private:
        // form the original input points or else the seams might crack.
        float2 tan0 = (P[1] == P[0]) ? P[2] - P[0] : P[1] - P[0];
        float2 tan1 = (P[3] == P[2]) ? P[3] - P[1] : P[3] - P[2];
        if (tan1 == float2(0)) {
            // [p0, p3, p3, p3] is a reserved pattern that means this patch is a join only.
            P[1] = P[2] = P[3] = P[0];  // Colocate all the curve's points.
            // This will disable the (co-located) curve sections by making their tangents equal.
            tan1 = tan0;
        }
        if (tan0 == float2(0)) {
            // [p0, p0, p0, p3] is a reserved pattern that means this patch is a cusp point.
            P[3] = P[0];  // Colocate all the points on the cusp.
@ -116,7 +125,8 @@ private:
        float a = cross(A, B);
        float b = cross(A, C);
        float c = cross(B, C);
-        float discr = b*b - 4*a*c;
+        float b_over_2 = b*.5;
        float discr_over_4 = b_over_2*b_over_2 - a*c;
        float2x2 innerTangents = float2x2(0);
        if (float3(a,b,c) == float3(0)) {
@ -131,11 +141,11 @@ private:
            //
            float3 coeffs = tan0 * float3x2(A,B,C);
            a = coeffs.x;
-            b = coeffs.y*2;
+            b_over_2 = coeffs.y;
            c = coeffs.z;
-            discr = max(b*b - 4*a*c, 0);
+            discr_over_4 = max(b_over_2*b_over_2 - a*c, 0);
            innerTangents = float2x2(-tan0, -tan0);
-        } else if (discr <= 0) {
+        } else if (discr_over_4 <= 0) {
            // The curve does not inflect. This means it might rotate more than 180 degrees instead.
            // Craft a quadratic whose roots are the points were rotation == 180 deg and 0. (These
            // are the points where the tangent is parallel to tan0.)
@ -148,18 +158,19 @@ private:
            // NOTE: When P0 == P1 then C != tan0, C == 0 and these roots will be undefined. But
            // that's ok because when P0 == P1 the curve cannot rotate more than 180 degrees anyway.
            a = b;
-            b = c*2;
+            b_over_2 = c;
            c = 0;
-            discr = b*b;
+            discr_over_4 = b_over_2*b_over_2;
            innerTangents[0] = -C;
        }
-        // Solve our quadratic equation for the chop points.
+        // Solve our quadratic equation for the chop points. This is inspired by the quadratic
-        float x = sqrt(discr);
+        // formula from Numerical Recipes in C.
-        if (b < 0) {
+        float q = sqrt(discr_over_4);
-            x = -x;
+        if (b_over_2 > 0) {
            q = -q;
        }
-        float q = -.5 * (b + x);
+        q -= b_over_2;
        float2 chopT = float2((a != 0) ? q/a : 0,
                              (q != 0) ? c/q : 0);
@ -194,37 +205,29 @@ private:
            innerTangents = float2x2(tan0, tan0);
        }
-        // Chop the curve at chopT[0].
+        // Chop the curve at chopT[0] and chopT[1].
-        float2 ab = mix(P[0], P[1], chopT[0]);
+        float4 ab = mix(P[0].xyxy, P[1].xyxy, chopT.sstt);
-        float2 bc = mix(P[1], P[2], chopT[0]);
+        float4 bc = mix(P[1].xyxy, P[2].xyxy, chopT.sstt);
-        float2 cd = mix(P[2], P[3], chopT[0]);
+        float4 cd = mix(P[2].xyxy, P[3].xyxy, chopT.sstt);
-        float2 abc = mix(ab, bc, chopT[0]);
+        float4 abc = mix(ab, bc, chopT.sstt);
-        float2 bcd = mix(bc, cd, chopT[0]);
+        float4 bcd = mix(bc, cd, chopT.sstt);
-        float2 abcd = mix(abc, bcd, chopT[0]);
+        float4 abcd = mix(abc, bcd, chopT.sstt);
-
+        float4 middle = mix(abc, bcd, chopT.ttss);
        // Chop the curve at chopT[1].
        float2 xy = mix(P[0], P[1], chopT[1]);
        float2 yz = mix(P[1], P[2], chopT[1]);
        float2 zw = mix(P[2], P[3], chopT[1]);
        float2 xyz = mix(xy, yz, chopT[1]);
        float2 yzw = mix(yz, zw, chopT[1]);
        float2 xyzw = mix(xyz, yzw, chopT[1]);
        // Find tangents at the chop points if an inner tangent wasn't specified.
        if (innerTangents[0] == float2(0)) {
-            innerTangents[0] = bcd - abc;
+            innerTangents[0] = bcd.xy - abc.xy;
        }
        if (innerTangents[1] == float2(0)) {
-            innerTangents[1] = yzw - xyz;
+            innerTangents[1] = bcd.zw - abc.zw;
        }
        // Package arguments for the tessellation control stage.
-        vsPts01 = float4(P[0], ab);
+        vsPts01 = float4(P[0], ab.xy);
-        vsPts23 = float4(abc, abcd);
+        vsPts23 = float4(abc.xy, abcd.xy);
-        vsPts45 = float4(mix(abcd, bcd, (chopT[1] - chopT[0]) / (1 - chopT[0])),
+        vsPts45 = middle;
-                         mix(xyz, xyzw, (chopT[1] != 0) ? chopT[0] / chopT[1] : 0));
+        vsPts67 = float4(abcd.zw, bcd.zw);
-        vsPts67 = float4(xyzw, yzw);
+        vsPts89 = float4(cd.zw, P[3]);
        vsPts89 = float4(zw, P[3]);
        vsTans01 = float4(tan0, innerTangents[0]);
        vsTans23 = float4(innerTangents[1], tan1);
        vsPrevJoinTangent = (prevJoinTangent == float2(0)) ? tan0 : prevJoinTangent;
@ -250,21 +253,30 @@ private:
                numSegmentsInJoin = 0;  // Use the rotation to calculate the number of segments.
                break;
        }
-        pdman.set4f(fTessControlArgsUniform,
+        float intolerance = kLinearizationIntolerance * shader.fMatrixScale;
-                shader.fStroke.getWidth() * .5,  // uStrokeRadius.
+        float cubicConstant = GrWangsFormula::cubic_constant(intolerance);
-                numSegmentsInJoin,  // uNumSegmentsInJoin
+        float strokeRadius = shader.fStroke.getWidth() * .5;
-                kLinearizationIntolerance * shader.fMatrixScale,  // uIntolerance in path space.
+        float radialIntolerance = 1 / (strokeRadius * intolerance);
-                1/(shader.fStroke.getMiter()*shader.fStroke.getMiter()));  // uMiterLimitPowMinus2.
+        float miterLimit = shader.fStroke.getMiter();
        pdman.set4f(fTessArgs1Uniform,
            numSegmentsInJoin,  // uNumSegmentsInJoin
            cubicConstant * cubicConstant,  // uCubicConstantPow2 in path space.
            .5f / acosf(std::max(1 - radialIntolerance, -1.f)),  // uNumRadialSegmentsPerRadian
            1 / (miterLimit * miterLimit));  // uMiterLimitInvPow2.
        pdman.set2f(fTessArgs2Uniform,
                    radialIntolerance * radialIntolerance,  // uRadialTolerancePow2.
                    strokeRadius);  // uStrokeRadius.
        const SkMatrix& m = shader.viewMatrix();
        if (!m.isIdentity()) {
            pdman.set2f(fTranslateUniform, m.getTranslateX(), m.getTranslateY());
-            float affineMatrix[4] = {m.getScaleX(), m.getSkewY(), m.getSkewX(), m.getScaleY()};
+            pdman.set4f(fAffineMatrixUniform, m.getScaleX(), m.getSkewY(), m.getSkewX(),
-            pdman.setMatrix2f(fAffineMatrixUniform, affineMatrix);
+                        m.getScaleY());
        }
        pdman.set4fv(fColorUniform, 1, shader.fColor.vec());
    }
-    GrGLSLUniformHandler::UniformHandle fTessControlArgsUniform;
+    GrGLSLUniformHandler::UniformHandle fTessArgs1Uniform;
    GrGLSLUniformHandler::UniformHandle fTessArgs2Uniform;
    GrGLSLUniformHandler::UniformHandle fTranslateUniform;
    GrGLSLUniformHandler::UniformHandle fAffineMatrixUniform;
    GrGLSLUniformHandler::UniformHandle fColorUniform;
@ -284,12 +296,16 @@ SkString GrStrokeTessellateShader::getTessControlShaderGLSL(
    code.appendf("const float kMaxTessellationSegments = %i;\n",
                 shaderCaps.maxTessellationSegments());
-    const char* tessControlArgsName = impl->getTessControlArgsUniformName(uniformHandler);
+    const char* tessArgs1Name = impl->getTessArgs1UniformName(uniformHandler);
-    code.appendf("uniform vec4 %s;\n", tessControlArgsName);
+    code.appendf("uniform vec4 %s;\n", tessArgs1Name);
-    code.appendf("#define uStrokeRadius %s.x\n", tessControlArgsName);
+    code.appendf("#define uNumSegmentsInJoin %s.x\n", tessArgs1Name);
-    code.appendf("#define uNumSegmentsInJoin %s.y\n", tessControlArgsName);
+    code.appendf("#define uCubicConstantPow2 %s.y\n", tessArgs1Name);
-    code.appendf("#define uIntolerance %s.z\n", tessControlArgsName);
+    code.appendf("#define uNumRadialSegmentsPerRadian %s.z\n", tessArgs1Name);
-    code.appendf("#define uMiterLimitPowMinus2 %s.w\n", tessControlArgsName);
+    code.appendf("#define uMiterLimitInvPow2 %s.w\n", tessArgs1Name);
    const char* tessArgs2Name = impl->getTessArgs2UniformName(uniformHandler);
    code.appendf("uniform vec2 %s;\n", tessArgs2Name);
    code.appendf("#define uRadialTolerancePow2 %s.x\n", tessArgs2Name);
    code.append(R"(
    in vec4 vsPts01[];
@ -322,6 +338,10 @@ SkString GrStrokeTessellateShader::getTessControlShaderGLSL(
        return determinant(mat2(a,b));
    }
    float length_pow2(vec2 v) {
        return dot(v, v);
    }
    void main() {
        // Unpack the input arguments from the vertex shader.
        mat4x2 P;
@ -348,15 +368,14 @@ SkString GrStrokeTessellateShader::getTessControlShaderGLSL(
        // section of the curve into. (See GrWangsFormula::cubic() for more documentation on this
        // formula.) The final tessellated strip will be a composition of these parametric segments
        // as well as radial segments.
-        float numParametricSegments = sqrt(
+        float w = length_pow2(max(abs(P[2] - P[1]*2.0 + P[0]),
-                .75*uIntolerance * length(max(abs(P[2] - P[1]*2.0 + P[0]),
+                                  abs(P[3] - P[2]*2.0 + P[1]))) * uCubicConstantPow2;
-                                              abs(P[3] - P[2]*2.0 + P[1]))));
+        float numParametricSegments = ceil(inversesqrt(inversesqrt(max(w, 1e-2))));
        if (P[0] == P[1] && P[2] == P[3]) {
            // This is how the patch builder articulates lineTos but Wang's formula returns
            // >>1 segment in this scenario. Assign 1 parametric segment.
            numParametricSegments = 1;
        }
        numParametricSegments = max(ceil(numParametricSegments), 1);
        // Determine the curve's start angle.
        float angle0 = atan2(tangents[0]);
@ -382,28 +401,27 @@ SkString GrStrokeTessellateShader::getTessControlShaderGLSL(
        // Calculate the number of evenly spaced radial segments to chop this section of the curve
        // into. Radial segments divide the curve's rotation into even steps. The final tessellated
        // strip will be a composition of both parametric and radial segments.
-        float radialTolerance = 1 / (uStrokeRadius * uIntolerance);
+        float numRadialSegments = abs(rotation) * uNumRadialSegmentsPerRadian;
        float numRadialSegments = abs(rotation) / (2 * acos(max(1 - radialTolerance, -1)));
        numRadialSegments = max(ceil(numRadialSegments), 1);
        if (gl_InvocationID == 0) {
            // Set up joins.
            numParametricSegments = 1;  // Joins don't have parametric segments.
            numRadialSegments = (uNumSegmentsInJoin == 0) ? numRadialSegments : uNumSegmentsInJoin;
-            float innerStrokeRadius = uStrokeRadius;
+            float innerStrokeRadiusMultiplier = 1;
            if (uNumSegmentsInJoin == 2) {
                // Miter join: extend the middle radius to either the miter point or the bevel edge.
                float x = fma(cosTheta, .5, .5);
-                innerStrokeRadius *= (x >= uMiterLimitPowMinus2) ? inversesqrt(x) : sqrt(x);
+                innerStrokeRadiusMultiplier = (x >= uMiterLimitInvPow2) ? inversesqrt(x) : sqrt(x);
            }
            vec2 strokeOutsetClamp = vec2(-1, 1);
-            if (distance(tan0norm, tan1norm) > radialTolerance) {
+            if (length_pow2(tan1norm - tan0norm) > uRadialTolerancePow2) {
                // Clamp the join to the exterior side of its junction. We only do this if the join
                // angle is large enough to guarantee there won't be cracks on the interior side of
                // the junction.
                strokeOutsetClamp = (rotation > 0) ? vec2(-1,0) : vec2(0,1);
            }
-            tcsJoinArgs = vec3(innerStrokeRadius, strokeOutsetClamp);
+            tcsJoinArgs = vec3(innerStrokeRadiusMultiplier, strokeOutsetClamp);
        }
        // The first and last edges are shared by both the parametric and radial sets of edges, so
@ -482,16 +500,16 @@ SkString GrStrokeTessellateShader::getTessEvaluationShaderGLSL(
    code.appendf("const float kPI = 3.141592653589793238;\n");
-    const char* tessControlArgsName = impl->getTessControlArgsUniformName(uniformHandler);
+    const char* tessArgs2Name = impl->getTessArgs2UniformName(uniformHandler);
-    code.appendf("uniform vec4 %s;\n", tessControlArgsName);
+    code.appendf("uniform vec2 %s;\n", tessArgs2Name);
-    code.appendf("#define uStrokeRadius %s.x\n", tessControlArgsName);
+    code.appendf("#define uStrokeRadius %s.y\n", tessArgs2Name);
    if (!this->viewMatrix().isIdentity()) {
        const char* translateName = impl->getTranslateUniformName(uniformHandler);
        code.appendf("uniform vec2 %s;\n", translateName);
        code.appendf("#define uTranslate %s\n", translateName);
        const char* affineMatrixName = impl->getAffineMatrixUniformName(uniformHandler);
-        code.appendf("uniform mat2x2 %s;\n", affineMatrixName);
+        code.appendf("uniform vec4 %s;\n", affineMatrixName);
        code.appendf("#define uAffineMatrix %s\n", affineMatrixName);
    }
@ -526,7 +544,7 @@ SkString GrStrokeTessellateShader::getTessEvaluationShaderGLSL(
            P = mat4x2(tcsPts01[0], tcsPt2Tan0[0].xy, tcsPts01[1].xy);
            tan0 = tcsPt2Tan0[0].zw;
            tessellationArgs = tcsTessArgs[0].yzw;
-            strokeRadius = (localEdgeID == 1) ? tcsJoinArgs.x : strokeRadius;
+            strokeRadius *= (localEdgeID == 1) ? tcsJoinArgs.x : 1;
            strokeOutsetClamp = tcsJoinArgs.yz;
        } else if ((localEdgeID -= tcsTessArgs[0].x) < tcsTessArgs[1].x) {
            // Our edge belongs to the first curve section.
@ -549,24 +567,25 @@ SkString GrStrokeTessellateShader::getTessEvaluationShaderGLSL(
        float angle0 = tessellationArgs.y;
        float radsPerSegment = tessellationArgs.z;
-        // Find a tangent matrix C' in power basis form. (This gives the derivative scaled by 1/3.)
+        // Start by finding the cubic's power basis coefficients. These define a tangent direction
-        //
+        // (scaled by a uniform 1/3) as:
-        //                                           |T^2|
+        //                                                 |T^2|
-        //     dx,dy (divided by 3) = tangent = C_ * |  T|
+        //     Tangent_Direction(T) = dx,dy = |A  2B  C| * |T  |
-        //                                           |  1|
+        //                                    |.   .  .|   |1  |
-        mat3x2 C_ = P * mat3x4(-1,  3, -3,  1,
+        vec2 C = P[1] - P[0];
-                                2, -4,  2,  0,
+        vec2 D = P[2] - P[1];
-                               -1,  1,  0,  0);
+        vec2 E = P[3] - P[0];
        vec2 B = D - C;
        vec2 A = fma(vec2(-3), D, E);
-        // Find the matrix C_s that produces an (arbitrarily scaled) tangent vector from a
+        // Now find the coefficients that give a tangent direction from a parametric edge ID:
        // parametric edge ID:
        //
-        //           |parametricEdgeId^2|
+        //                                                                 |parametricEdgeID^2|
-        //     C_s * |  parametricEdgeId| = tangent * s
+        //     Tangent_Direction(parametricEdgeID) = dx,dy = |A  B_  C_| * |parametricEdgeID  |
-        //           |                 1|
+        //                                                   |.   .   .|   |1                 |
        //
-        mat3x2 C_s = mat3x2(C_[0], C_[1] * numParametricSegments,
+        vec2 B_ = B * (numParametricSegments * 2);
-                            C_[2] * (numParametricSegments * numParametricSegments));
+        vec2 C_ = C * (numParametricSegments * numParametricSegments);
        // Run an O(log N) search to determine the highest parametric edge that is located on or
        // before the localEdgeID. A local edge ID is determined by the sum of complete parametric
@ -583,8 +602,8 @@ SkString GrStrokeTessellateShader::getTessEvaluationShaderGLSL(
            // Test the parametric edge at lastParametricEdgeID + 2^exp.
            float testParametricID = lastParametricEdgeID + (1 << exp);
            if (testParametricID <= maxParametricEdgeID) {
-                vec2 testTan = fma(vec2(testParametricID), C_s[0], C_s[1]);
+                vec2 testTan = fma(vec2(testParametricID), A, B_);
-                testTan = fma(vec2(testParametricID), testTan, C_s[2]);
+                testTan = fma(vec2(testParametricID), testTan, C_);
                float cosRotation = dot(normalize(testTan), tan0norm);
                float maxRotation = fma(testParametricID, negAbsRadsPerSegment, maxRotation0);
                maxRotation = min(maxRotation, kPI);
@ -605,35 +624,34 @@ SkString GrStrokeTessellateShader::getTessEvaluationShaderGLSL(
        // Now that we've identified the highest parametric edge on or before the localEdgeID, the
        // highest radial edge is easy:
        float lastRadialEdgeID = localEdgeID - lastParametricEdgeID;
        float radialAngle = fma(lastRadialEdgeID, radsPerSegment, angle0);
-        // Find the T value of the edge at lastRadialEdgeID. This is the point whose tangent angle
+        // Find the tangent vector on the edge at lastRadialEdgeID.
-        // is equal to radialAngle, or whose tangent vector is orthogonal to "norm".
+        float radialAngle = fma(lastRadialEdgeID, radsPerSegment, angle0);
        vec2 tangent = vec2(cos(radialAngle), sin(radialAngle));
        vec2 norm = vec2(-tangent.y, tangent.x);
-        // Find the T value where the cubic's tangent is orthogonal to norm:
+        // Find the T value where the cubic's tangent is orthogonal to norm. This is a quadratic:
        //
-        //                                          |T^2|
+        //     dot(norm, Tangent_Direction(T)) == 0
        //   dot(tangent, norm) == 0:   norm * C_ * |  T| == 0
        //                                          |  1|
        //
-        // The coeffs for the quadratic equation we need to solve are therefore: norm * C_.
+        //                         |T^2|
-        vec3 coeffs = norm * C_;
+        //     norm * |A  2B  C| * |T  | == 0
-        float a=coeffs.x, b=coeffs.y, c=coeffs.z;
+        //            |.   .  .|   |1  |
-
+        //
-        // Quadratic formula from Numerical Recipes in C.
+        vec3 coeffs = norm * mat3x2(A,B,C);
-        float x = sqrt(max(b*b - 4*a*c, 0));
+        float a=coeffs.x, b_over_2=coeffs.y, c=coeffs.z;
-        if (b < 0) {
+        float discr_over_4 = max(b_over_2*b_over_2 - a*c, 0);
-            x = -x;
+        float q = sqrt(discr_over_4);
        if (b_over_2 > 0) {
            q = -q;
        }
-        float q = -.5 * (b + x);
+        q -= b_over_2;
        // Roots are q/a and c/q. Since each curve section does not inflect or rotate more than 180
        // degrees, there can only be one tangent orthogonal to "norm" inside 0..1. Pick the root
-        // nearest 0.5.
+        // nearest .5.
-        float _5qa = .5*q*a;
+        float _5qa = -.5*q*a;
-        vec2 root = (abs(q*q - _5qa) < abs(a*c - _5qa)) ? vec2(q,a) : vec2(c,q);
+        vec2 root = (abs(fma(q,q,_5qa)) < abs(fma(a,c,_5qa))) ? vec2(q,a) : vec2(c,q);
        float radialT = (root.t != 0) ? root.s / root.t : 0;
        radialT = clamp(radialT, 0, 1);
@ -688,7 +706,7 @@ SkString GrStrokeTessellateShader::getTessEvaluationShaderGLSL(
    // Transform after tessellation. Stroke widths and normals are defined in (pre-transform) local
    // path space.
    if (!this->viewMatrix().isIdentity()) {
-        code.append("vertexPos = uAffineMatrix * vertexPos + uTranslate;");
+        code.append("vertexPos = mat2(uAffineMatrix) * vertexPos + uTranslate;");
    }
    code.append(R"(
--- a/src/gpu/tessellate/GrTessellationPathRenderer.cpp
+++ b/src/gpu/tessellate/GrTessellationPathRenderer.cpp
@ -81,7 +81,7 @@ void GrTessellationPathRenderer::initAtlasFlags(GrRecordingContext* rContext) {
    //     GrWangsFormula::worst_case_cubic(kLinearizationIntolerance, w, kMaxAtlasPathHeight^2 / w)
    //              == maxTessellationSegments
    //
-    float k = GrWangsFormula::cubic_k(kLinearizationIntolerance);
+    float k = GrWangsFormula::cubic_constant(kLinearizationIntolerance);
    float h = kMaxAtlasPathHeight;
    float s = caps.shaderCaps()->maxTessellationSegments();
    // Quadratic formula from Numerical Recipes in C:
--- a/src/gpu/tessellate/GrWangsFormula.h
+++ b/src/gpu/tessellate/GrWangsFormula.h
@ -28,7 +28,7 @@ SK_ALWAYS_INLINE static int ceil_log2_sqrt_sqrt(float x) {
 }
 // Constant term for the quatratic formula.
-constexpr float quadratic_k(float intolerance) {
+constexpr float quadratic_constant(float intolerance) {
    return .25f * intolerance;
 }
@ -43,7 +43,7 @@ SK_ALWAYS_INLINE static float quadratic_pow4(float intolerance, const SkPoint pt
    Sk2f v = p0 + p1*-2 + p2;
    v = vectorXform(v);
    Sk2f vv = v*v;
-    float k = quadratic_k(intolerance);
+    float k = quadratic_constant(intolerance);
    return k*k * (vv[0] + vv[1]);
 }
@ -62,7 +62,7 @@ SK_ALWAYS_INLINE static int quadratic_log2(float intolerance, const SkPoint pts[
 }
 // Constant term for the cubic formula.
-constexpr float cubic_k(float intolerance) {
+constexpr float cubic_constant(float intolerance) {
    return .75f * intolerance;
 }
@ -78,7 +78,7 @@ SK_ALWAYS_INLINE static float cubic_pow4(float intolerance, const SkPoint pts[],
    v = vectorXform(v);
    Sk4f vv = v*v;
    vv = Sk4f::Max(vv, SkNx_shuffle<2,3,0,1>(vv));
-    float k = cubic_k(intolerance);
+    float k = cubic_constant(intolerance);
    return k*k * (vv[0] + vv[1]);
 }
@ -100,7 +100,7 @@ SK_ALWAYS_INLINE static int cubic_log2(float intolerance, const SkPoint pts[],
 // would ever need to be divided into. This is simply a special case of the cubic formula where we
 // maximize its value by placing control points on specific corners of the bounding box.
 SK_ALWAYS_INLINE static float worst_case_cubic(float intolerance, float devWidth, float devHeight) {
-    float k = cubic_k(intolerance);
+    float k = cubic_constant(intolerance);
    return SkScalarSqrt(2*k * SkVector::Length(devWidth, devHeight));
 }
@ -108,7 +108,7 @@ SK_ALWAYS_INLINE static float worst_case_cubic(float intolerance, float devWidth
 // size would ever need to be divided into.
 SK_ALWAYS_INLINE static int worst_case_cubic_log2(float intolerance, float devWidth,
                                                  float devHeight) {
-    float k = cubic_k(intolerance);
+    float k = cubic_constant(intolerance);
    return ceil_log2_sqrt_sqrt(4*k*k * (devWidth * devWidth + devHeight * devHeight));
 }
--- a/tests/WangsFormulaTest.cpp
+++ b/tests/WangsFormulaTest.cpp
@ -30,7 +30,7 @@ static float wangs_formula_quadratic_reference_impl(float intolerance, const SkP
    Sk2f p0 = Sk2f::Load(pts);
    Sk2f p1 = Sk2f::Load(pts + 1);
    Sk2f p2 = Sk2f::Load(pts + 2);
-    float k = GrWangsFormula::quadratic_k(intolerance);
+    float k = GrWangsFormula::quadratic_constant(intolerance);
    return SkScalarSqrt(k * length(p0 - p1*2 + p2));
 }
@ -39,7 +39,7 @@ static float wangs_formula_cubic_reference_impl(float intolerance, const SkPoint
    Sk2f p1 = Sk2f::Load(pts + 1);
    Sk2f p2 = Sk2f::Load(pts + 2);
    Sk2f p3 = Sk2f::Load(pts + 3);
-    float k = GrWangsFormula::cubic_k(intolerance);
+    float k = GrWangsFormula::cubic_constant(intolerance);
    return SkScalarSqrt(k * length(Sk2f::Max((p0 - p1*2 + p2).abs(),
                                             (p1 - p2*2 + p3).abs())));
 }