Standardize on skvx aliases, plus clean-up

This adds aliases like skvx::float2, float4, etc. to SkVx.h and goes through existing usages of SkVx to standardize on those aliases, or refer to the full name directly. In particular, this lets us clean up the equivalent aliases in src/gpu/tessellate, src/gpu/graphite/VectorTypes and src/gpu/ganesh/GrVx Where possible, I switched to using skvx::Foo directly and leveraged auto to make it less redundant. Headers always used the full type except for PatchWriter.h and Rect.h because of the number of their usages. In this case, the alias is scoped to private so it can't leak. This is prep to migrate older code that is still using SkNx and its aliases like Sk4f to SkVx as well. Change-Id: I9dd104e83cf17c2b88995a047cfd2e2b0fe6fac2 Reviewed-on: https://skia-review.googlesource.com/c/skia/+/541058 Reviewed-by: Brian Osman <brianosman@google.com> Commit-Queue: Michael Ludwig <michaelludwig@google.com>
2022-05-16 14:58:37 -04:00 · 2022-05-16 14:58:37 -04:00 · 5c08e3c357
commit 5c08e3c357
parent 8d9d9fa93a
46 changed files with 508 additions and 763 deletions
--- a/gn/gpu.gni
+++ b/gn/gpu.gni
@ -226,7 +226,6 @@ skia_gpu_sources = [
  "$_src/gpu/ganesh/GrUtil.h",
  "$_src/gpu/ganesh/GrVertexChunkArray.cpp",
  "$_src/gpu/ganesh/GrVertexChunkArray.h",
-  "$_src/gpu/ganesh/GrVx.h",
  "$_src/gpu/ganesh/GrWaitRenderTask.cpp",
  "$_src/gpu/ganesh/GrWaitRenderTask.h",
  "$_src/gpu/ganesh/GrWindowRectangles.h",
--- a/gn/graphite.gni
+++ b/gn/graphite.gni
@ -113,7 +113,6 @@ skia_graphite_sources = [
  "$_src/geom/Shape.h",
  "$_src/geom/Transform.cpp",
  "$_src/geom/Transform_graphite.h",
-  "$_src/geom/VectorTypes.h",
  "$_src/render/CoverBoundsRenderStep.cpp",
  "$_src/render/CoverBoundsRenderStep.h",
  "$_src/render/MiddleOutFanRenderStep.cpp",
--- a/gn/tests.gni
+++ b/gn/tests.gni
@ -111,7 +111,6 @@ tests_sources = [
  "$_tests/GrSurfaceTest.cpp",
  "$_tests/GrTextBlobTest.cpp",
  "$_tests/GrTextureMipMapInvalidationTest.cpp",
-  "$_tests/GrVxTest.cpp",
  "$_tests/GradientTest.cpp",
  "$_tests/HSVRoundTripTest.cpp",
  "$_tests/HashTest.cpp",
--- a/include/private/SkVx.h
+++ b/include/private/SkVx.h
@ -790,73 +790,60 @@ SIN Vec<N,uint16_t> mull(const Vec<N,uint8_t>& x,
 }
 #endif

-// Allow floating point contraction. e.g., allow a*x + y to be compiled to a single FMA even though
-// it introduces LSB differences on platforms that don't have an FMA instruction.
-#if defined(__clang__)
-#pragma STDC FP_CONTRACT ON
-#endif
-
-// Approximates the inverse cosine of x within 0.96 degrees using the rational polynomial:
-//
-//     acos(x) ~= (bx^3 + ax) / (dx^4 + cx^2 + 1) + pi/2
-//
-// See: https://stackoverflow.com/a/36387954
-//
-// For a proof of max error, see the "SkVx_approx_acos" unit test.
-//
-// NOTE: This function deviates immediately from pi and 0 outside -1 and 1. (The derivatives are
-// infinite at -1 and 1). So the input must still be clamped between -1 and 1.
-#define SKVX_APPROX_ACOS_MAX_ERROR SkDegreesToRadians(.96f)
-SIN Vec<N,float> approx_acos(Vec<N,float> x) {
-    constexpr static float a = -0.939115566365855f;
-    constexpr static float b =  0.9217841528914573f;
-    constexpr static float c = -1.2845906244690837f;
-    constexpr static float d =  0.295624144969963174f;
-    constexpr static float pi_over_2 = 1.5707963267948966f;
-    auto xx = x*x;
-    auto numer = b*xx + a;
-    auto denom = xx*(d*xx + c) + 1;
-    return x * (numer/denom) + pi_over_2;
+SINT T dot(const Vec<N, T>& a, const Vec<N, T>& b) {
+    auto ab = a*b;
+    if constexpr (N == 2) {
+        return ab[0] + ab[1];
+    } else if constexpr (N == 4) {
+        return ab[0] + ab[1] + ab[2] + ab[3];
+    } else {
+        T sum = ab[0];
+        for (int i = 1; i < N; ++i) {
+            sum += ab[i];
+        }
+        return sum;
+    }
 }

-#if defined(__clang__)
-#pragma STDC FP_CONTRACT DEFAULT
-#endif
+SI float cross(const Vec<2, float>& a, const Vec<2, float>& b) {
+    auto x = a * shuffle<1,0>(b);
+    return x[0] - x[1];
+}

 // De-interleaving load of 4 vectors.
 //
 // WARNING: These are really only supported well on NEON. Consider restructuring your data before
 // resorting to these methods.
 SIT void strided_load4(const T* v,
-                       skvx::Vec<1,T>& a,
-                       skvx::Vec<1,T>& b,
-                       skvx::Vec<1,T>& c,
-                       skvx::Vec<1,T>& d) {
+                       Vec<1,T>& a,
+                       Vec<1,T>& b,
+                       Vec<1,T>& c,
+                       Vec<1,T>& d) {
    a.val = v[0];
    b.val = v[1];
    c.val = v[2];
    d.val = v[3];
 }
 SINT void strided_load4(const T* v,
-                        skvx::Vec<N,T>& a,
-                        skvx::Vec<N,T>& b,
-                        skvx::Vec<N,T>& c,
-                        skvx::Vec<N,T>& d) {
+                        Vec<N,T>& a,
+                        Vec<N,T>& b,
+                        Vec<N,T>& c,
+                        Vec<N,T>& d) {
    strided_load4(v, a.lo, b.lo, c.lo, d.lo);
    strided_load4(v + 4*(N/2), a.hi, b.hi, c.hi, d.hi);
 }
 #if SKVX_USE_SIMD && defined(__ARM_NEON)
 #define IMPL_LOAD4_TRANSPOSED(N, T, VLD) \
 SI void strided_load4(const T* v, \
-                      skvx::Vec<N,T>& a, \
-                      skvx::Vec<N,T>& b, \
-                      skvx::Vec<N,T>& c, \
-                      skvx::Vec<N,T>& d) { \
+                      Vec<N,T>& a, \
+                      Vec<N,T>& b, \
+                      Vec<N,T>& c, \
+                      Vec<N,T>& d) { \
    auto mat = VLD(v); \
-    a = skvx::bit_pun<skvx::Vec<N,T>>(mat.val[0]); \
-    b = skvx::bit_pun<skvx::Vec<N,T>>(mat.val[1]); \
-    c = skvx::bit_pun<skvx::Vec<N,T>>(mat.val[2]); \
-    d = skvx::bit_pun<skvx::Vec<N,T>>(mat.val[3]); \
+    a = bit_pun<Vec<N,T>>(mat.val[0]); \
+    b = bit_pun<Vec<N,T>>(mat.val[1]); \
+    c = bit_pun<Vec<N,T>>(mat.val[2]); \
+    d = bit_pun<Vec<N,T>>(mat.val[3]); \
 }
 IMPL_LOAD4_TRANSPOSED(2, uint32_t, vld4_u32)
 IMPL_LOAD4_TRANSPOSED(4, uint16_t, vld4_u16)
@ -881,7 +868,6 @@ SI void strided_load4(const float* v,
                      Vec<4,float>& b,
                      Vec<4,float>& c,
                      Vec<4,float>& d) {
-    using skvx::bit_pun;
    __m128 a_ = _mm_loadu_ps(v);
    __m128 b_ = _mm_loadu_ps(v+4);
    __m128 c_ = _mm_loadu_ps(v+8);
@ -898,20 +884,20 @@ SI void strided_load4(const float* v,
 //
 // WARNING: These are really only supported well on NEON. Consider restructuring your data before
 // resorting to these methods.
-SIT void strided_load2(const T* v, skvx::Vec<1,T>& a, skvx::Vec<1,T>& b) {
+SIT void strided_load2(const T* v, Vec<1,T>& a, Vec<1,T>& b) {
    a.val = v[0];
    b.val = v[1];
 }
-SINT void strided_load2(const T* v, skvx::Vec<N,T>& a, skvx::Vec<N,T>& b) {
+SINT void strided_load2(const T* v, Vec<N,T>& a, Vec<N,T>& b) {
    strided_load2(v, a.lo, b.lo);
    strided_load2(v + 2*(N/2), a.hi, b.hi);
 }
 #if SKVX_USE_SIMD && defined(__ARM_NEON)
 #define IMPL_LOAD2_TRANSPOSED(N, T, VLD) \
-SI void strided_load2(const T* v, skvx::Vec<N,T>& a, skvx::Vec<N,T>& b) { \
+SI void strided_load2(const T* v, Vec<N,T>& a, Vec<N,T>& b) { \
    auto mat = VLD(v); \
-    a = skvx::bit_pun<skvx::Vec<N,T>>(mat.val[0]); \
-    b = skvx::bit_pun<skvx::Vec<N,T>>(mat.val[1]); \
+    a = bit_pun<Vec<N,T>>(mat.val[0]); \
+    b = bit_pun<Vec<N,T>>(mat.val[1]); \
 }
 IMPL_LOAD2_TRANSPOSED(2, uint32_t, vld2_u32)
 IMPL_LOAD2_TRANSPOSED(4, uint16_t, vld2_u16)
@ -930,6 +916,37 @@ IMPL_LOAD2_TRANSPOSED(4, float, vld2q_f32)
 #undef IMPL_LOAD2_TRANSPOSED
 #endif

+// Define commonly used aliases
+using float2  = Vec< 2, float>;
+using float4  = Vec< 4, float>;
+using float8  = Vec< 8, float>;
+
+using double2 = Vec< 2, double>;
+using double4 = Vec< 4, double>;
+using double8 = Vec< 8, double>;
+
+using byte2   = Vec< 2, uint8_t>;
+using byte4   = Vec< 4, uint8_t>;
+using byte8   = Vec< 8, uint8_t>;
+using byte16  = Vec<16, uint8_t>;
+
+using int2    = Vec< 2, int32_t>;
+using int4    = Vec< 4, int32_t>;
+using int8    = Vec< 8, int32_t>;
+
+using uint2   = Vec< 2, uint32_t>;
+using uint4   = Vec< 4, uint32_t>;
+using uint8   = Vec< 8, uint32_t>;
+
+using long2   = Vec< 2, int64_t>;
+using long4   = Vec< 4, int64_t>;
+using long8   = Vec< 8, int64_t>;
+
+// Use with from_half and to_half to convert between floatX, and use these for storage.
+using half2   = Vec< 2, uint16_t>;
+using half4   = Vec< 4, uint16_t>;
+using half8   = Vec< 8, uint16_t>;
+
 }  // namespace skvx

 #undef SINTU
--- a/public.bzl
+++ b/public.bzl
@ -1012,7 +1012,6 @@ BASE_SRCS_ALL = [
    "src/gpu/ganesh/GrUtil.h",
    "src/gpu/ganesh/GrVertexChunkArray.cpp",
    "src/gpu/ganesh/GrVertexChunkArray.h",
-    "src/gpu/ganesh/GrVx.h",
    "src/gpu/ganesh/GrWaitRenderTask.cpp",
    "src/gpu/ganesh/GrWaitRenderTask.h",
    "src/gpu/ganesh/GrWindowRectangles.h",
--- a/src/core/BUILD.bazel
+++ b/src/core/BUILD.bazel
@ -5729,10 +5729,10 @@ generated_cc_atom(
        "//include/core:SkSpan_hdr",
        "//include/core:SkTileMode_hdr",
        "//include/private:SkColorData_hdr",
+        "//include/private:SkVx_hdr",
        "//src/gpu:Blend_hdr",
        "//src/gpu/graphite:TextureProxy_hdr",
        "//src/gpu/graphite:UniformManager_hdr",
-        "//src/gpu/graphite/geom:VectorTypes_hdr",
    ],
 )

--- a/src/core/SkM44.cpp
+++ b/src/core/SkM44.cpp
@ -13,23 +13,20 @@
 #include "src/core/SkMatrixPriv.h"
 #include "src/core/SkPathPriv.h"

-using sk4f = skvx::Vec<4, float>;
-using sk2f = skvx::Vec<2, float>;
-
 bool SkM44::operator==(const SkM44& other) const {
    if (this == &other) {
        return true;
    }

-    sk4f a0 = sk4f::Load(fMat +  0);
-    sk4f a1 = sk4f::Load(fMat +  4);
-    sk4f a2 = sk4f::Load(fMat +  8);
-    sk4f a3 = sk4f::Load(fMat + 12);
+    auto a0 = skvx::float4::Load(fMat +  0);
+    auto a1 = skvx::float4::Load(fMat +  4);
+    auto a2 = skvx::float4::Load(fMat +  8);
+    auto a3 = skvx::float4::Load(fMat + 12);

-    sk4f b0 = sk4f::Load(other.fMat +  0);
-    sk4f b1 = sk4f::Load(other.fMat +  4);
-    sk4f b2 = sk4f::Load(other.fMat +  8);
-    sk4f b3 = sk4f::Load(other.fMat + 12);
+    auto b0 = skvx::float4::Load(other.fMat +  0);
+    auto b1 = skvx::float4::Load(other.fMat +  4);
+    auto b2 = skvx::float4::Load(other.fMat +  8);
+    auto b3 = skvx::float4::Load(other.fMat + 12);

    auto eq = (a0 == b0) & (a1 == b1) & (a2 == b2) & (a3 == b3);
    return (eq[0] & eq[1] & eq[2] & eq[3]) == ~0;
@ -47,19 +44,19 @@ void SkM44::getRowMajor(SkScalar v[]) const {
 }

 SkM44& SkM44::setConcat(const SkM44& a, const SkM44& b) {
-    sk4f c0 = sk4f::Load(a.fMat +  0);
-    sk4f c1 = sk4f::Load(a.fMat +  4);
-    sk4f c2 = sk4f::Load(a.fMat +  8);
-    sk4f c3 = sk4f::Load(a.fMat + 12);
+    auto c0 = skvx::float4::Load(a.fMat +  0);
+    auto c1 = skvx::float4::Load(a.fMat +  4);
+    auto c2 = skvx::float4::Load(a.fMat +  8);
+    auto c3 = skvx::float4::Load(a.fMat + 12);

-    auto compute = [&](sk4f r) {
+    auto compute = [&](skvx::float4 r) {
        return c0*r[0] + (c1*r[1] + (c2*r[2] + c3*r[3]));
    };

-    sk4f m0 = compute(sk4f::Load(b.fMat +  0));
-    sk4f m1 = compute(sk4f::Load(b.fMat +  4));
-    sk4f m2 = compute(sk4f::Load(b.fMat +  8));
-    sk4f m3 = compute(sk4f::Load(b.fMat + 12));
+    auto m0 = compute(skvx::float4::Load(b.fMat +  0));
+    auto m1 = compute(skvx::float4::Load(b.fMat +  4));
+    auto m2 = compute(skvx::float4::Load(b.fMat +  8));
+    auto m3 = compute(skvx::float4::Load(b.fMat + 12));

    m0.store(fMat +  0);
    m1.store(fMat +  4);
@ -69,17 +66,17 @@ SkM44& SkM44::setConcat(const SkM44& a, const SkM44& b) {
 }

 SkM44& SkM44::preConcat(const SkMatrix& b) {
-    sk4f c0 = sk4f::Load(fMat +  0);
-    sk4f c1 = sk4f::Load(fMat +  4);
-    sk4f c3 = sk4f::Load(fMat + 12);
+    auto c0 = skvx::float4::Load(fMat +  0);
+    auto c1 = skvx::float4::Load(fMat +  4);
+    auto c3 = skvx::float4::Load(fMat + 12);

    auto compute = [&](float r0, float r1, float r3) {
        return (c0*r0 + (c1*r1 + c3*r3));
    };

-    sk4f m0 = compute(b[0], b[3], b[6]);
-    sk4f m1 = compute(b[1], b[4], b[7]);
-    sk4f m3 = compute(b[2], b[5], b[8]);
+    auto m0 = compute(b[0], b[3], b[6]);
+    auto m1 = compute(b[1], b[4], b[7]);
+    auto m3 = compute(b[2], b[5], b[8]);

    m0.store(fMat +  0);
    m1.store(fMat +  4);
@ -88,10 +85,10 @@ SkM44& SkM44::preConcat(const SkMatrix& b) {
 }

 SkM44& SkM44::preTranslate(SkScalar x, SkScalar y, SkScalar z) {
-    sk4f c0 = sk4f::Load(fMat +  0);
-    sk4f c1 = sk4f::Load(fMat +  4);
-    sk4f c2 = sk4f::Load(fMat +  8);
-    sk4f c3 = sk4f::Load(fMat + 12);
+    auto c0 = skvx::float4::Load(fMat +  0);
+    auto c1 = skvx::float4::Load(fMat +  4);
+    auto c2 = skvx::float4::Load(fMat +  8);
+    auto c3 = skvx::float4::Load(fMat + 12);

    // only need to update the last column
    (c0*x + (c1*y + (c2*z + c3))).store(fMat + 12);
@ -99,17 +96,17 @@ SkM44& SkM44::preTranslate(SkScalar x, SkScalar y, SkScalar z) {
 }

 SkM44& SkM44::postTranslate(SkScalar x, SkScalar y, SkScalar z) {
-    sk4f t = { x, y, z, 0 };
-    (t * fMat[ 3] + sk4f::Load(fMat +  0)).store(fMat +  0);
-    (t * fMat[ 7] + sk4f::Load(fMat +  4)).store(fMat +  4);
-    (t * fMat[11] + sk4f::Load(fMat +  8)).store(fMat +  8);
-    (t * fMat[15] + sk4f::Load(fMat + 12)).store(fMat + 12);
+    skvx::float4 t = { x, y, z, 0 };
+    (t * fMat[ 3] + skvx::float4::Load(fMat +  0)).store(fMat +  0);
+    (t * fMat[ 7] + skvx::float4::Load(fMat +  4)).store(fMat +  4);
+    (t * fMat[11] + skvx::float4::Load(fMat +  8)).store(fMat +  8);
+    (t * fMat[15] + skvx::float4::Load(fMat + 12)).store(fMat + 12);
    return *this;
 }

 SkM44& SkM44::preScale(SkScalar x, SkScalar y) {
-    sk4f c0 = sk4f::Load(fMat +  0);
-    sk4f c1 = sk4f::Load(fMat +  4);
+    auto c0 = skvx::float4::Load(fMat +  0);
+    auto c1 = skvx::float4::Load(fMat +  4);

    (c0 * x).store(fMat + 0);
    (c1 * y).store(fMat + 4);
@ -117,9 +114,9 @@ SkM44& SkM44::preScale(SkScalar x, SkScalar y) {
 }

 SkM44& SkM44::preScale(SkScalar x, SkScalar y, SkScalar z) {
-    sk4f c0 = sk4f::Load(fMat +  0);
-    sk4f c1 = sk4f::Load(fMat +  4);
-    sk4f c2 = sk4f::Load(fMat +  8);
+    auto c0 = skvx::float4::Load(fMat +  0);
+    auto c1 = skvx::float4::Load(fMat +  4);
+    auto c2 = skvx::float4::Load(fMat +  8);

    (c0 * x).store(fMat + 0);
    (c1 * y).store(fMat + 4);
@ -128,10 +125,10 @@ SkM44& SkM44::preScale(SkScalar x, SkScalar y, SkScalar z) {
 }

 SkV4 SkM44::map(float x, float y, float z, float w) const {
-    sk4f c0 = sk4f::Load(fMat +  0);
-    sk4f c1 = sk4f::Load(fMat +  4);
-    sk4f c2 = sk4f::Load(fMat +  8);
-    sk4f c3 = sk4f::Load(fMat + 12);
+    auto c0 = skvx::float4::Load(fMat +  0);
+    auto c1 = skvx::float4::Load(fMat +  4);
+    auto c2 = skvx::float4::Load(fMat +  8);
+    auto c3 = skvx::float4::Load(fMat + 12);

    SkV4 v;
    (c0*x + (c1*y + (c2*z + c3*w))).store(&v.x);
@ -139,19 +136,19 @@ SkV4 SkM44::map(float x, float y, float z, float w) const {
 }

 static SkRect map_rect_affine(const SkRect& src, const float mat[16]) {
-    // When multiplied against vectors of the form <x,y,x,y>, 'flip' allows a single min(sk4f, sk4f)
+    // When multiplied against vectors of the form <x,y,x,y>, 'flip' allows a single min()
    // to compute both the min and "negated" max between the xy coordinates. Once finished, another
    // multiplication produces the original max.
-    const sk4f flip{1.f, 1.f, -1.f, -1.f};
+    const skvx::float4 flip{1.f, 1.f, -1.f, -1.f};

    // Since z = 0 and it's assumed ther's no perspective, only load the upper 2x2 and (tx,ty) in c3
-    sk4f c0 = skvx::shuffle<0,1,0,1>(sk2f::Load(mat + 0)) * flip;
-    sk4f c1 = skvx::shuffle<0,1,0,1>(sk2f::Load(mat + 4)) * flip;
-    sk4f c3 = skvx::shuffle<0,1,0,1>(sk2f::Load(mat + 12));
+    auto c0 = skvx::shuffle<0,1,0,1>(skvx::float2::Load(mat + 0)) * flip;
+    auto c1 = skvx::shuffle<0,1,0,1>(skvx::float2::Load(mat + 4)) * flip;
+    auto c3 = skvx::shuffle<0,1,0,1>(skvx::float2::Load(mat + 12));

    // Compute the min and max of the four transformed corners pre-translation; then translate once
    // at the end.
-    sk4f minMax = c3 + flip * min(min(c0 * src.fLeft  + c1 * src.fTop,
+    auto minMax = c3 + flip * min(min(c0 * src.fLeft  + c1 * src.fTop,
                                      c0 * src.fRight + c1 * src.fTop),
                                  min(c0 * src.fLeft  + c1 * src.fBottom,
                                      c0 * src.fRight + c1 * src.fBottom));
@ -165,36 +162,36 @@ static SkRect map_rect_affine(const SkRect& src, const float mat[16]) {
 static SkRect map_rect_perspective(const SkRect& src, const float mat[16]) {
    // Like map_rect_affine, z = 0 so we can skip the 3rd column, but we do need to compute w's
    // for each corner of the src rect.
-    sk4f c0 = sk4f::Load(mat + 0);
-    sk4f c1 = sk4f::Load(mat + 4);
-    sk4f c3 = sk4f::Load(mat + 12);
+    auto c0 = skvx::float4::Load(mat + 0);
+    auto c1 = skvx::float4::Load(mat + 4);
+    auto c3 = skvx::float4::Load(mat + 12);

    // Unlike map_rect_affine, we do not defer the 4th column since we may need to homogeneous
    // coordinates to clip against the w=0 plane
-    sk4f tl = c0 * src.fLeft  + c1 * src.fTop    + c3;
-    sk4f tr = c0 * src.fRight + c1 * src.fTop    + c3;
-    sk4f bl = c0 * src.fLeft  + c1 * src.fBottom + c3;
-    sk4f br = c0 * src.fRight + c1 * src.fBottom + c3;
+    auto tl = c0 * src.fLeft  + c1 * src.fTop    + c3;
+    auto tr = c0 * src.fRight + c1 * src.fTop    + c3;
+    auto bl = c0 * src.fLeft  + c1 * src.fBottom + c3;
+    auto br = c0 * src.fRight + c1 * src.fBottom + c3;

    // After clipping to w>0 and projecting to 2d, 'project' employs the same negation trick to
    // compute min and max at the same time.
-    const sk4f flip{1.f, 1.f, -1.f, -1.f};
-    auto project = [&flip](const sk4f& p0, const sk4f& p1, const sk4f& p2) {
+    const skvx::float4 flip{1.f, 1.f, -1.f, -1.f};
+    auto project = [&flip](const skvx::float4& p0, const skvx::float4& p1, const skvx::float4& p2) {
        float w0 = p0[3];
        if (w0 >= SkPathPriv::kW0PlaneDistance) {
            // Unclipped, just divide by w
            return flip * skvx::shuffle<0,1,0,1>(p0) / w0;
        } else {
-            auto clip = [&](const sk4f& p) {
+            auto clip = [&](const skvx::float4& p) {
                float w = p[3];
                if (w >= SkPathPriv::kW0PlaneDistance) {
                    float t = (SkPathPriv::kW0PlaneDistance - w0) / (w - w0);
-                    sk2f c = (t * skvx::shuffle<0,1>(p) + (1.f - t) * skvx::shuffle<0,1>(p0)) /
+                    auto c = (t * skvx::shuffle<0,1>(p) + (1.f - t) * skvx::shuffle<0,1>(p0)) /
                                  SkPathPriv::kW0PlaneDistance;

                    return flip * skvx::shuffle<0,1,0,1>(c);
                } else {
-                    return sk4f(SK_ScalarInfinity);
+                    return skvx::float4(SK_ScalarInfinity);
                }
            };
            // Clip both edges leaving p0, and return the min/max of the two clipped points
@ -206,7 +203,7 @@ static SkRect map_rect_perspective(const SkRect& src, const float mat[16]) {

    // Project all 4 corners, and pass in their adjacent vertices for clipping if it has w < 0,
    // then accumulate the min and max xy's.
-    sk4f minMax = flip * min(min(project(tl, tr, bl), project(tr, br, tl)),
+    auto minMax = flip * min(min(project(tl, tr, bl), project(tr, br, tl)),
                             min(project(br, bl, tr), project(bl, tl, br)));

    SkRect r;
@ -231,10 +228,10 @@ void SkM44::normalizePerspective() {
    // and therefore faster (e.g. clients can forward-difference calculations).
    if (fMat[15] != 1 && fMat[15] != 0 && fMat[3] == 0 && fMat[7] == 0 && fMat[11] == 0) {
        double inv = 1.0 / fMat[15];
-        (sk4f::Load(fMat +  0) * inv).store(fMat +  0);
-        (sk4f::Load(fMat +  4) * inv).store(fMat +  4);
-        (sk4f::Load(fMat +  8) * inv).store(fMat +  8);
-        (sk4f::Load(fMat + 12) * inv).store(fMat + 12);
+        (skvx::float4::Load(fMat +  0) * inv).store(fMat +  0);
+        (skvx::float4::Load(fMat +  4) * inv).store(fMat +  4);
+        (skvx::float4::Load(fMat +  8) * inv).store(fMat +  8);
+        (skvx::float4::Load(fMat + 12) * inv).store(fMat + 12);
        fMat[15] = 1.0f;
    }
 }
--- a/src/core/SkPipelineData.h
+++ b/src/core/SkPipelineData.h
@ -18,10 +18,10 @@
 #include "src/core/SkEnumBitMask.h"

 #ifdef SK_GRAPHITE_ENABLED
+#include "include/private/SkVx.h"
 #include "src/gpu/Blend.h"
 #include "src/gpu/graphite/TextureProxy.h"
 #include "src/gpu/graphite/UniformManager.h"
-#include "src/gpu/graphite/geom/VectorTypes.h"
 #endif

 class SkArenaAlloc;
@ -172,7 +172,7 @@ public:
    void write(const float* floats, int count) { fUniformManager.write(floats, count); }
    void write(float f) { fUniformManager.write(&f, 1); }
    void write(int i) { fUniformManager.write(i); }
-    void write(skgpu::graphite::float2 v) { fUniformManager.write(v); }
+    void write(skvx::float2 v) { fUniformManager.write(v); }

    bool hasUniforms() const { return fUniformManager.size(); }

--- a/src/gpu/ganesh/BUILD.bazel
+++ b/src/gpu/ganesh/BUILD.bazel
@ -3152,16 +3152,6 @@ generated_cc_atom(
    ],
 )

-generated_cc_atom(
-    name = "GrVx_hdr",
-    hdrs = ["GrVx.h"],
-    visibility = ["//:__subpackages__"],
-    deps = [
-        "//include/core:SkTypes_hdr",
-        "//include/private:SkVx_hdr",
-    ],
-)
-
 generated_cc_atom(
    name = "GrWaitRenderTask_hdr",
    hdrs = ["GrWaitRenderTask.h"],
--- a/src/gpu/ganesh/GrVx.h
+++ b/src/gpu/ganesh/GrVx.h
@ -1,43 +0,0 @@
-/*
- * Copyright 2020 Google LLC.
- *
- * Use of this source code is governed by a BSD-style license that can be
- * found in the LICENSE file.
- */
-
-#ifndef GrVx_DEFINED
-#define GrVx_DEFINED
-
-#include "include/core/SkTypes.h"
-#include "include/private/SkVx.h"
-
-// grvx is Ganesh's addendum to skvx, Skia's SIMD library. Here we introduce functions that are
-// approximate and/or have LSB differences from platform to platform (e.g., by using hardware FMAs
-// when available). When a function is approximate, its error range is well documented and tested.
-namespace grvx {
-
-// Use familiar type names and functions from SkSL and GLSL.
-template<int N> using vec = skvx::Vec<N, float>;
-using float2 = vec<2>;
-using float4 = vec<4>;
-
-template<int N> using ivec = skvx::Vec<N, int32_t>;
-using int2 = ivec<2>;
-using int4 = ivec<4>;
-
-template<int N> using uvec = skvx::Vec<N, uint32_t>;
-using uint2 = uvec<2>;
-using uint4 = uvec<4>;
-
-static SK_ALWAYS_INLINE float dot(float2 a, float2 b) {
-    float2 ab = a*b;
-    return ab[0] + ab[1];
-}
-
-static SK_ALWAYS_INLINE float cross(float2 a, float2 b) {
-    float2 x = a*skvx::shuffle<1,0>(b);
-    return x[0] - x[1];
-}
-};  // namespace grvx
-
-#endif
--- a/src/gpu/ganesh/geometry/BUILD.bazel
+++ b/src/gpu/ganesh/geometry/BUILD.bazel
@ -69,7 +69,6 @@ generated_cc_atom(
        "//src/core:SkGeometry_hdr",
        "//src/core:SkPathPriv_hdr",
        "//src/gpu:BufferWriter_hdr",
-        "//src/gpu/ganesh:GrVx_hdr",
    ],
 )

--- a/src/gpu/ganesh/geometry/GrPathUtils.h
+++ b/src/gpu/ganesh/geometry/GrPathUtils.h
@ -13,7 +13,6 @@
 #include "src/core/SkGeometry.h"
 #include "src/core/SkPathPriv.h"
 #include "src/gpu/BufferWriter.h"
-#include "src/gpu/ganesh/GrVx.h"

 class SkMatrix;

--- a/src/gpu/ganesh/geometry/GrQuadUtils.cpp
+++ b/src/gpu/ganesh/geometry/GrQuadUtils.cpp
@ -13,8 +13,8 @@
 #include "src/core/SkPathPriv.h"
 #include "src/gpu/ganesh/geometry/GrQuad.h"

-using V4f = skvx::Vec<4, float>;
-using M4f = skvx::Vec<4, int32_t>;
+using float4 = skvx::float4;
+using mask4  = skvx::int4; // aliased to 'mask' to emphasize that it will hold boolean SIMD masks.

 #define AI SK_ALWAYS_INLINE

@ -37,14 +37,14 @@ static AI skvx::Vec<4, T> next_ccw(const skvx::Vec<4, T>& v) {
    return skvx::shuffle<1, 3, 0, 2>(v);
 }

-static AI V4f next_diag(const V4f& v) {
+static AI float4 next_diag(const float4& v) {
    // Same as next_ccw(next_ccw(v)), or next_cw(next_cw(v)), e.g. two rotations either direction.
    return skvx::shuffle<3, 2, 1, 0>(v);
 }

 // Replaces zero-length 'bad' edge vectors with the reversed opposite edge vector.
 // e3 may be null if only 2D edges need to be corrected for.
-static AI void correct_bad_edges(const M4f& bad, V4f* e1, V4f* e2, V4f* e3) {
+static AI void correct_bad_edges(const mask4& bad, float4* e1, float4* e2, float4* e3) {
    if (any(bad)) {
        // Want opposite edges, L B T R -> R T B L but with flipped sign to preserve winding
        *e1 = if_then_else(bad, -next_diag(*e1), *e1);
@ -56,7 +56,7 @@ static AI void correct_bad_edges(const M4f& bad, V4f* e1, V4f* e2, V4f* e3) {
 }

 // Replace 'bad' coordinates by rotating CCW to get the next point. c3 may be null for 2D points.
-static AI void correct_bad_coords(const M4f& bad, V4f* c1, V4f* c2, V4f* c3) {
+static AI void correct_bad_coords(const mask4& bad, float4* c1, float4* c2, float4* c3) {
    if (any(bad)) {
        *c1 = if_then_else(bad, next_ccw(*c1), *c1);
        *c2 = if_then_else(bad, next_ccw(*c2), *c2);
@ -238,8 +238,8 @@ static bool is_simple_rect(const GrQuad& quad) {
 // Calculates barycentric coordinates for each point in (testX, testY) in the triangle formed by
 // (x0,y0) - (x1,y1) - (x2, y2) and stores them in u, v, w.
 static bool barycentric_coords(float x0, float y0, float x1, float y1, float x2, float y2,
-                               const V4f& testX, const V4f& testY,
-                               V4f* u, V4f* v, V4f* w) {
+                               const float4& testX, const float4& testY,
+                               float4* u, float4* v, float4* w) {
    // The 32-bit calculations can have catastrophic cancellation if the device-space coordinates
    // are really big, and this code needs to handle that because we evaluate barycentric coords
    // pre-cropping to the render target bounds. This preserves some precision by shrinking the
@ -295,11 +295,11 @@ static bool barycentric_coords(float x0, float y0, float x1, float y1, float x2,
        invDenom = sk_ieee_float_divide(1.f, invDenom);
    }

-    V4f v2x = (scaleX * testX) - x0;
-    V4f v2y = (scaleY * testY) - y0;
+    float4 v2x = (scaleX * testX) - x0;
+    float4 v2y = (scaleY * testY) - y0;

-    V4f dot02 = v0x * v2x + v0y * v2y;
-    V4f dot12 = v1x * v2x + v1y * v2y;
+    float4 dot02 = v0x * v2x + v0y * v2y;
+    float4 dot12 = v1x * v2x + v1y * v2y;

    // These are relative to the vertices, so there's no need to undo the scale factor
    *u = (dot11 * dot02 - dot01 * dot12) * invDenom;
@ -309,45 +309,45 @@ static bool barycentric_coords(float x0, float y0, float x1, float y1, float x2,
    return true;
 }

-static M4f inside_triangle(const V4f& u, const V4f& v, const V4f& w) {
+static mask4 inside_triangle(const float4& u, const float4& v, const float4& w) {
    return ((u >= 0.f) & (u <= 1.f)) & ((v >= 0.f) & (v <= 1.f)) & ((w >= 0.f) & (w <= 1.f));
 }

 ///////////////////////////////////////////////////////////////////////////////////////////////////

 SkRect GrQuad::projectedBounds() const {
-    V4f xs = this->x4f();
-    V4f ys = this->y4f();
-    V4f ws = this->w4f();
-    M4f clipW = ws < SkPathPriv::kW0PlaneDistance;
+    float4 xs = this->x4f();
+    float4 ys = this->y4f();
+    float4 ws = this->w4f();
+    mask4 clipW = ws < SkPathPriv::kW0PlaneDistance;
    if (any(clipW)) {
-        V4f x2d = xs / ws;
-        V4f y2d = ys / ws;
+        float4 x2d = xs / ws;
+        float4 y2d = ys / ws;
        // Bounds of just the projected points in front of w = epsilon
        SkRect frontBounds = {
-            min(if_then_else(clipW, V4f(SK_ScalarInfinity), x2d)),
-            min(if_then_else(clipW, V4f(SK_ScalarInfinity), y2d)),
-            max(if_then_else(clipW, V4f(SK_ScalarNegativeInfinity), x2d)),
-            max(if_then_else(clipW, V4f(SK_ScalarNegativeInfinity), y2d))
+            min(if_then_else(clipW, float4(SK_ScalarInfinity), x2d)),
+            min(if_then_else(clipW, float4(SK_ScalarInfinity), y2d)),
+            max(if_then_else(clipW, float4(SK_ScalarNegativeInfinity), x2d)),
+            max(if_then_else(clipW, float4(SK_ScalarNegativeInfinity), y2d))
        };
        // Calculate clipped coordinates by following CCW edges, only keeping points where the w
        // actually changes sign between the vertices.
-        V4f t = (SkPathPriv::kW0PlaneDistance - ws) / (next_ccw(ws) - ws);
+        float4 t = (SkPathPriv::kW0PlaneDistance - ws) / (next_ccw(ws) - ws);
        x2d = (t * next_ccw(xs) + (1.f - t) * xs) / SkPathPriv::kW0PlaneDistance;
        y2d = (t * next_ccw(ys) + (1.f - t) * ys) / SkPathPriv::kW0PlaneDistance;
        // True if (w < e) xor (ccw(w) < e), i.e. crosses the w = epsilon plane
        clipW = clipW ^ (next_ccw(ws) < SkPathPriv::kW0PlaneDistance);
        return {
-            min(if_then_else(clipW, x2d, V4f(frontBounds.fLeft))),
-            min(if_then_else(clipW, y2d, V4f(frontBounds.fTop))),
-            max(if_then_else(clipW, x2d, V4f(frontBounds.fRight))),
-            max(if_then_else(clipW, y2d, V4f(frontBounds.fBottom)))
+            min(if_then_else(clipW, x2d, float4(frontBounds.fLeft))),
+            min(if_then_else(clipW, y2d, float4(frontBounds.fTop))),
+            max(if_then_else(clipW, x2d, float4(frontBounds.fRight))),
+            max(if_then_else(clipW, y2d, float4(frontBounds.fBottom)))
        };
    } else {
        // Nothing is behind the viewer, so the projection is straight forward and valid
        ws = 1.f / ws;
-        V4f x2d = xs * ws;
-        V4f y2d = ys * ws;
+        float4 x2d = xs * ws;
+        float4 y2d = ys * ws;
        return {min(x2d), min(y2d), max(x2d), max(y2d)};
    }
 }
@ -399,7 +399,7 @@ int ClipToW0(DrawQuad* quad, DrawQuad* extraVertices) {
        return 1;
    }

-    M4f validW = quad->fDevice.w4f() >= SkPathPriv::kW0PlaneDistance;
+    mask4 validW = quad->fDevice.w4f() >= SkPathPriv::kW0PlaneDistance;
    if (all(validW)) {
        // Nothing to clip, can proceed normally drawing just 'quad'
        return 1;
@ -425,7 +425,7 @@ int ClipToW0(DrawQuad* quad, DrawQuad* extraVertices) {
    SkASSERT(clipCount >= 1 && clipCount <= 3);

    // FIXME de-duplicate from the projectedBounds() calculations.
-    V4f t = (SkPathPriv::kW0PlaneDistance - v.fW) / (next_ccw(v.fW) - v.fW);
+    float4 t = (SkPathPriv::kW0PlaneDistance - v.fW) / (next_ccw(v.fW) - v.fW);

    Vertices clip;
    clip.fX = (t * next_ccw(v.fX) + (1.f - t) * v.fX);
@ -436,8 +436,8 @@ int ClipToW0(DrawQuad* quad, DrawQuad* extraVertices) {
    clip.fV = (t * next_ccw(v.fV) + (1.f - t) * v.fV);
    clip.fR = (t * next_ccw(v.fR) + (1.f - t) * v.fR);

-    M4f ccwValid = next_ccw(v.fW) >= SkPathPriv::kW0PlaneDistance;
-    M4f cwValid  = next_cw(v.fW)  >= SkPathPriv::kW0PlaneDistance;
+    mask4 ccwValid = next_ccw(v.fW) >= SkPathPriv::kW0PlaneDistance;
+    mask4 cwValid  = next_cw(v.fW)  >= SkPathPriv::kW0PlaneDistance;

    if (clipCount != 1) {
        // Simplest case, replace behind-w0 points with their clipped points by following CCW edge
@ -583,16 +583,16 @@ bool CropToRect(const SkRect& cropRect, GrAA cropAA, DrawQuad* quad, bool comput
        return false;
    }

-    V4f devX = quad->fDevice.x4f();
-    V4f devY = quad->fDevice.y4f();
+    float4 devX = quad->fDevice.x4f();
+    float4 devY = quad->fDevice.y4f();

-    V4f clipX = {cropRect.fLeft, cropRect.fLeft, cropRect.fRight, cropRect.fRight};
-    V4f clipY = {cropRect.fTop, cropRect.fBottom, cropRect.fTop, cropRect.fBottom};
+    float4 clipX = {cropRect.fLeft, cropRect.fLeft, cropRect.fRight, cropRect.fRight};
+    float4 clipY = {cropRect.fTop, cropRect.fBottom, cropRect.fTop, cropRect.fBottom};

    // Calculate barycentric coordinates for the 4 rect corners in the 2 triangles that the quad
    // is tessellated into when drawn.
-    V4f u1, v1, w1;
-    V4f u2, v2, w2;
+    float4 u1, v1, w1;
+    float4 u2, v2, w2;
    if (!barycentric_coords(devX[0], devY[0], devX[1], devY[1], devX[2], devY[2], clipX, clipY,
                            &u1, &v1, &w1) ||
        !barycentric_coords(devX[1], devY[1], devX[3], devY[3], devX[2], devY[2], clipX, clipY,
@ -602,8 +602,8 @@ bool CropToRect(const SkRect& cropRect, GrAA cropAA, DrawQuad* quad, bool comput
    }

    // clipDevRect is completely inside this quad if each corner is in at least one of two triangles
-    M4f inTri1 = inside_triangle(u1, v1, w1);
-    M4f inTri2 = inside_triangle(u2, v2, w2);
+    mask4 inTri1 = inside_triangle(u1, v1, w1);
+    mask4 inTri2 = inside_triangle(u2, v2, w2);
    if (all(inTri1 | inTri2)) {
        // We can crop to exactly the clipDevRect.
        // FIXME (michaelludwig) - there are other ways to have determined quad covering the clip
@ -663,7 +663,7 @@ void TessellationHelper::EdgeVectors::reset(const skvx::Vec<4, float>& xs,
                                            GrQuad::Type quadType) {
    // Calculate all projected edge vector values for this quad.
    if (quadType == GrQuad::Type::kPerspective) {
-        V4f iw = 1.f / ws;
+        float4 iw = 1.f / ws;
        fX2D = xs * iw;
        fY2D = ys * iw;
    } else {
@ -694,14 +694,14 @@ void TessellationHelper::EdgeVectors::reset(const skvx::Vec<4, float>& xs,
 //** EdgeEquations implementation

 void TessellationHelper::EdgeEquations::reset(const EdgeVectors& edgeVectors) {
-    V4f dx = edgeVectors.fDX;
-    V4f dy = edgeVectors.fDY;
+    float4 dx = edgeVectors.fDX;
+    float4 dy = edgeVectors.fDY;
    // Correct for bad edges by copying adjacent edge information into the bad component
    correct_bad_edges(edgeVectors.fInvLengths >= kInvDistTolerance, &dx, &dy, nullptr);

-    V4f c = dx*edgeVectors.fY2D - dy*edgeVectors.fX2D;
+    float4 c = dx*edgeVectors.fY2D - dy*edgeVectors.fX2D;
    // Make sure normals point into the shape
-    V4f test = dy * next_cw(edgeVectors.fX2D) + (-dx * next_cw(edgeVectors.fY2D) + c);
+    float4 test = dy * next_cw(edgeVectors.fX2D) + (-dx * next_cw(edgeVectors.fY2D) + c);
    if (any(test < -kDistTolerance)) {
        fA = -dy;
        fB = dx;
@ -713,12 +713,13 @@ void TessellationHelper::EdgeEquations::reset(const EdgeVectors& edgeVectors) {
    }
 }

-V4f TessellationHelper::EdgeEquations::estimateCoverage(const V4f& x2d, const V4f& y2d) const {
+float4 TessellationHelper::EdgeEquations::estimateCoverage(const float4& x2d,
+                                                           const float4& y2d) const {
    // Calculate distance of the 4 inset points (px, py) to the 4 edges
-    V4f d0 = fA[0]*x2d + (fB[0]*y2d + fC[0]);
-    V4f d1 = fA[1]*x2d + (fB[1]*y2d + fC[1]);
-    V4f d2 = fA[2]*x2d + (fB[2]*y2d + fC[2]);
-    V4f d3 = fA[3]*x2d + (fB[3]*y2d + fC[3]);
+    float4 d0 = fA[0]*x2d + (fB[0]*y2d + fC[0]);
+    float4 d1 = fA[1]*x2d + (fB[1]*y2d + fC[1]);
+    float4 d2 = fA[2]*x2d + (fB[2]*y2d + fC[2]);
+    float4 d3 = fA[3]*x2d + (fB[3]*y2d + fC[3]);

    // For each point, pretend that there's a rectangle that touches e0 and e3 on the horizontal
    // axis, so its width is "approximately" d0 + d3, and it touches e1 and e2 on the vertical axis
@ -727,30 +728,30 @@ V4f TessellationHelper::EdgeEquations::estimateCoverage(const V4f& x2d, const V4
    // accurate calculation of its area clipped to an aligned pixel. For arbitrary quads it is not
    // mathematically accurate but qualitatively provides a stable value proportional to the size of
    // the shape.
-    V4f w = max(0.f, min(1.f, d0 + d3));
-    V4f h = max(0.f, min(1.f, d1 + d2));
+    float4 w = max(0.f, min(1.f, d0 + d3));
+    float4 h = max(0.f, min(1.f, d1 + d2));
    return w * h;
 }

-bool TessellationHelper::EdgeEquations::isSubpixel(const V4f& x2d, const V4f& y2d) const {
+bool TessellationHelper::EdgeEquations::isSubpixel(const float4& x2d, const float4& y2d) const {
    // Compute the minimum distances from vertices to opposite edges. If all 4 minimum distances
    // are less than 1px, then the inset geometry would be a point or line and quad rendering
    // will switch to hairline mode.
-    V4f d = min(x2d * skvx::shuffle<1,2,1,2>(fA) + y2d * skvx::shuffle<1,2,1,2>(fB)
-                        + skvx::shuffle<1,2,1,2>(fC),
-                x2d * skvx::shuffle<3,3,0,0>(fA) + y2d * skvx::shuffle<3,3,0,0>(fB)
-                        + skvx::shuffle<3,3,0,0>(fC));
+    float4 d = min(x2d * skvx::shuffle<1,2,1,2>(fA) + y2d * skvx::shuffle<1,2,1,2>(fB)
+                           + skvx::shuffle<1,2,1,2>(fC),
+                   x2d * skvx::shuffle<3,3,0,0>(fA) + y2d * skvx::shuffle<3,3,0,0>(fB)
+                           + skvx::shuffle<3,3,0,0>(fC));
    return all(d < 1.f);
 }

-int TessellationHelper::EdgeEquations::computeDegenerateQuad(const V4f& signedEdgeDistances,
-                                                             V4f* x2d, V4f* y2d,
-                                                             M4f* aaMask) const {
+int TessellationHelper::EdgeEquations::computeDegenerateQuad(const float4& signedEdgeDistances,
+                                                             float4* x2d, float4* y2d,
+                                                             mask4* aaMask) const {
    // If the original points form a line in the 2D projection then give up on antialiasing.
    for (int i = 0; i < 4; ++i) {
-        V4f d = (*x2d)*fA[i] + (*y2d)*fB[i] + fC[i];
+        float4 d = (*x2d)*fA[i] + (*y2d)*fB[i] + fC[i];
        if (all(abs(d) < kDistTolerance)) {
-            *aaMask = M4f(0);
+            *aaMask = mask4(0);
            return 4;
        }
    }
@ -758,25 +759,25 @@ int TessellationHelper::EdgeEquations::computeDegenerateQuad(const V4f& signedEd
    *aaMask = signedEdgeDistances != 0.f;

    // Move the edge by the signed edge adjustment.
-    V4f oc = fC + signedEdgeDistances;
+    float4 oc = fC + signedEdgeDistances;

    // There are 6 points that we care about to determine the final shape of the polygon, which
    // are the intersections between (e0,e2), (e1,e0), (e2,e3), (e3,e1) (corresponding to the
    // 4 corners), and (e1, e2), (e0, e3) (representing the intersections of opposite edges).
-    V4f denom = fA * next_cw(fB) - fB * next_cw(fA);
-    V4f px = (fB * next_cw(oc) - oc * next_cw(fB)) / denom;
-    V4f py = (oc * next_cw(fA) - fA * next_cw(oc)) / denom;
+    float4 denom = fA * next_cw(fB) - fB * next_cw(fA);
+    float4 px = (fB * next_cw(oc) - oc * next_cw(fB)) / denom;
+    float4 py = (oc * next_cw(fA) - fA * next_cw(oc)) / denom;
    correct_bad_coords(abs(denom) < kTolerance, &px, &py, nullptr);

    // Calculate the signed distances from these 4 corners to the other two edges that did not
    // define the intersection. So p(0) is compared to e3,e1, p(1) to e3,e2 , p(2) to e0,e1, and
    // p(3) to e0,e2
-    V4f dists1 = px * skvx::shuffle<3, 3, 0, 0>(fA) +
-                 py * skvx::shuffle<3, 3, 0, 0>(fB) +
-                 skvx::shuffle<3, 3, 0, 0>(oc);
-    V4f dists2 = px * skvx::shuffle<1, 2, 1, 2>(fA) +
-                 py * skvx::shuffle<1, 2, 1, 2>(fB) +
-                 skvx::shuffle<1, 2, 1, 2>(oc);
+    float4 dists1 = px * skvx::shuffle<3, 3, 0, 0>(fA) +
+                    py * skvx::shuffle<3, 3, 0, 0>(fB) +
+                    skvx::shuffle<3, 3, 0, 0>(oc);
+    float4 dists2 = px * skvx::shuffle<1, 2, 1, 2>(fA) +
+                    py * skvx::shuffle<1, 2, 1, 2>(fB) +
+                    skvx::shuffle<1, 2, 1, 2>(oc);

    // If all the distances are >= 0, the 4 corners form a valid quadrilateral, so use them as
    // the 4 points. If any point is on the wrong side of both edges, the interior has collapsed
@ -784,10 +785,10 @@ int TessellationHelper::EdgeEquations::computeDegenerateQuad(const V4f& signedEd
    // wrong side of 1 edge, one edge has crossed over another and we use a line to represent it.
    // Otherwise, use a triangle that replaces the bad points with the intersections of
    // (e1, e2) or (e0, e3) as needed.
-    M4f d1v0 = dists1 < kDistTolerance;
-    M4f d2v0 = dists2 < kDistTolerance;
-    M4f d1And2 = d1v0 & d2v0;
-    M4f d1Or2 = d1v0 | d2v0;
+    mask4 d1v0 = dists1 < kDistTolerance;
+    mask4 d2v0 = dists2 < kDistTolerance;
+    mask4 d1And2 = d1v0 & d2v0;
+    mask4 d1Or2 = d1v0 | d2v0;

    if (!any(d1Or2)) {
        // Every dists1 and dists2 >= kTolerance so it's not degenerate, use all 4 corners as-is
@ -815,12 +816,12 @@ int TessellationHelper::EdgeEquations::computeDegenerateQuad(const V4f& signedEd
            // If edges 0 and 3 crossed then one must have AA but we moved both 2D points on the
            // edge so we need moveTo() to be able to move both 3D points along the shared edge. So
            // ensure both have AA.
-            *aaMask = *aaMask | M4f({1, 0, 0, 1});
+            *aaMask = *aaMask | mask4({1, 0, 0, 1});
        } else {
            // Edges 1 and 2 have crossed over, so make the line from average of (p0,p1) and (p2,p3)
            *x2d = 0.5f * (skvx::shuffle<0, 0, 2, 2>(px) + skvx::shuffle<1, 1, 3, 3>(px));
            *y2d = 0.5f * (skvx::shuffle<0, 0, 2, 2>(py) + skvx::shuffle<1, 1, 3, 3>(py));
-            *aaMask = *aaMask | M4f({0, 1, 1, 0});
+            *aaMask = *aaMask | mask4({0, 1, 1, 0});
        }
        return 2;
    } else {
@ -841,8 +842,8 @@ int TessellationHelper::EdgeEquations::computeDegenerateQuad(const V4f& signedEd
        V2f ey = (skvx::shuffle<0, 1>(oc) * skvx::shuffle<3, 2>(fA) -
                  skvx::shuffle<0, 1>(fA) * skvx::shuffle<3, 2>(oc)) / eDenom;

-        V4f avgX = 0.5f * (skvx::shuffle<0, 1, 0, 2>(px) + skvx::shuffle<2, 3, 1, 3>(px));
-        V4f avgY = 0.5f * (skvx::shuffle<0, 1, 0, 2>(py) + skvx::shuffle<2, 3, 1, 3>(py));
+        float4 avgX = 0.5f * (skvx::shuffle<0, 1, 0, 2>(px) + skvx::shuffle<2, 3, 1, 3>(px));
+        float4 avgY = 0.5f * (skvx::shuffle<0, 1, 0, 2>(py) + skvx::shuffle<2, 3, 1, 3>(py));
        for (int i = 0; i < 4; ++i) {
            // Note that we would not have taken this branch if any point failed both of its edges
            // tests. That is, it can't be the case that d1v0[i] and d2v0[i] are both true.
@ -914,14 +915,14 @@ void TessellationHelper::OutsetRequest::reset(const EdgeVectors& edgeVectors, Gr
            // (or cos(theta) for the other edge).

            // cos(pi - theta) = -cos(theta)
-            V4f halfTanTheta = -edgeVectors.fCosTheta * edgeVectors.fInvSinTheta;
-            V4f edgeAdjust = edgeDistances * (halfTanTheta + next_ccw(halfTanTheta)) +
-                             next_ccw(edgeDistances) * next_ccw(edgeVectors.fInvSinTheta) +
-                             next_cw(edgeDistances) * edgeVectors.fInvSinTheta;
+            float4 halfTanTheta = -edgeVectors.fCosTheta * edgeVectors.fInvSinTheta;
+            float4 edgeAdjust = edgeDistances * (halfTanTheta + next_ccw(halfTanTheta)) +
+                                next_ccw(edgeDistances) * next_ccw(edgeVectors.fInvSinTheta) +
+                                next_cw(edgeDistances) * edgeVectors.fInvSinTheta;

            // If either outsetting (plus edgeAdjust) or insetting (minus edgeAdjust) make
            // the edge lengths negative, then it's degenerate.
-            V4f threshold = 0.1f - (1.f / edgeVectors.fInvLengths);
+            float4 threshold = 0.1f - (1.f / edgeVectors.fInvLengths);
            fOutsetDegenerate = any(edgeAdjust < threshold);
            fInsetDegenerate = any(edgeAdjust > -threshold);
        }
@ -969,7 +970,7 @@ void TessellationHelper::Vertices::asGrQuads(GrQuad* deviceOut, GrQuad::Type dev
 }

 void TessellationHelper::Vertices::moveAlong(const EdgeVectors& edgeVectors,
-                                             const V4f& signedEdgeDistances) {
+                                             const float4& signedEdgeDistances) {
    // This shouldn't be called if fInvSinTheta is close to infinity (cosTheta close to 1).
    // FIXME (michaelludwig) - Temporarily allow NaNs on debug builds here, for crbug:224618's GM
    // Once W clipping is implemented, shouldn't see NaNs unless it's actually time to fail.
@ -981,8 +982,8 @@ void TessellationHelper::Vertices::moveAlong(const EdgeVectors& edgeVectors,
    // inwards and the cw-rotated edge points outwards, hence the minus-sign.
    // The edge distances are rotated compared to the corner outsets and (dx, dy), since if
    // the edge is "on" both its corners need to be moved along their other edge vectors.
-    V4f signedOutsets = -edgeVectors.fInvSinTheta * next_cw(signedEdgeDistances);
-    V4f signedOutsetsCW = edgeVectors.fInvSinTheta * signedEdgeDistances;
+    float4 signedOutsets = -edgeVectors.fInvSinTheta * next_cw(signedEdgeDistances);
+    float4 signedOutsetsCW = edgeVectors.fInvSinTheta * signedEdgeDistances;

    // x = x + outset * mask * next_cw(xdiff) - outset * next_cw(mask) * xdiff
    fX += signedOutsetsCW * next_cw(edgeVectors.fDX) + signedOutsets * edgeVectors.fDX;
@ -991,30 +992,30 @@ void TessellationHelper::Vertices::moveAlong(const EdgeVectors& edgeVectors,
        // We want to extend the texture coords by the same proportion as the positions.
        signedOutsets *= edgeVectors.fInvLengths;
        signedOutsetsCW *= next_cw(edgeVectors.fInvLengths);
-        V4f du = next_ccw(fU) - fU;
-        V4f dv = next_ccw(fV) - fV;
+        float4 du = next_ccw(fU) - fU;
+        float4 dv = next_ccw(fV) - fV;
        fU += signedOutsetsCW * next_cw(du) + signedOutsets * du;
        fV += signedOutsetsCW * next_cw(dv) + signedOutsets * dv;
        if (fUVRCount == 3) {
-            V4f dr = next_ccw(fR) - fR;
+            float4 dr = next_ccw(fR) - fR;
            fR += signedOutsetsCW * next_cw(dr) + signedOutsets * dr;
        }
    }
 }

-void TessellationHelper::Vertices::moveTo(const V4f& x2d, const V4f& y2d, const M4f& mask) {
+void TessellationHelper::Vertices::moveTo(const float4& x2d, const float4& y2d, const mask4& mask) {
    // Left to right, in device space, for each point
-    V4f e1x = skvx::shuffle<2, 3, 2, 3>(fX) - skvx::shuffle<0, 1, 0, 1>(fX);
-    V4f e1y = skvx::shuffle<2, 3, 2, 3>(fY) - skvx::shuffle<0, 1, 0, 1>(fY);
-    V4f e1w = skvx::shuffle<2, 3, 2, 3>(fW) - skvx::shuffle<0, 1, 0, 1>(fW);
-    M4f e1Bad = e1x*e1x + e1y*e1y < kDist2Tolerance;
+    float4 e1x = skvx::shuffle<2, 3, 2, 3>(fX) - skvx::shuffle<0, 1, 0, 1>(fX);
+    float4 e1y = skvx::shuffle<2, 3, 2, 3>(fY) - skvx::shuffle<0, 1, 0, 1>(fY);
+    float4 e1w = skvx::shuffle<2, 3, 2, 3>(fW) - skvx::shuffle<0, 1, 0, 1>(fW);
+    mask4 e1Bad = e1x*e1x + e1y*e1y < kDist2Tolerance;
    correct_bad_edges(e1Bad, &e1x, &e1y, &e1w);

    // // Top to bottom, in device space, for each point
-    V4f e2x = skvx::shuffle<1, 1, 3, 3>(fX) - skvx::shuffle<0, 0, 2, 2>(fX);
-    V4f e2y = skvx::shuffle<1, 1, 3, 3>(fY) - skvx::shuffle<0, 0, 2, 2>(fY);
-    V4f e2w = skvx::shuffle<1, 1, 3, 3>(fW) - skvx::shuffle<0, 0, 2, 2>(fW);
-    M4f e2Bad = e2x*e2x + e2y*e2y < kDist2Tolerance;
+    float4 e2x = skvx::shuffle<1, 1, 3, 3>(fX) - skvx::shuffle<0, 0, 2, 2>(fX);
+    float4 e2y = skvx::shuffle<1, 1, 3, 3>(fY) - skvx::shuffle<0, 0, 2, 2>(fY);
+    float4 e2w = skvx::shuffle<1, 1, 3, 3>(fW) - skvx::shuffle<0, 0, 2, 2>(fW);
+    mask4 e2Bad = e2x*e2x + e2y*e2y < kDist2Tolerance;
    correct_bad_edges(e2Bad, &e2x, &e2y, &e2w);

    // Can only move along e1 and e2 to reach the new 2D point, so we have
@ -1022,15 +1023,15 @@ void TessellationHelper::Vertices::moveTo(const V4f& x2d, const V4f& y2d, const
    // y2d = (y + a*e1y + b*e2y) / (w + a*e1w + b*e2w) for some a, b
    // This can be rewritten to a*c1x + b*c2x + c3x = 0; a * c1y + b*c2y + c3y = 0, where
    // the cNx and cNy coefficients are:
-    V4f c1x = e1w * x2d - e1x;
-    V4f c1y = e1w * y2d - e1y;
-    V4f c2x = e2w * x2d - e2x;
-    V4f c2y = e2w * y2d - e2y;
-    V4f c3x = fW * x2d - fX;
-    V4f c3y = fW * y2d - fY;
+    float4 c1x = e1w * x2d - e1x;
+    float4 c1y = e1w * y2d - e1y;
+    float4 c2x = e2w * x2d - e2x;
+    float4 c2y = e2w * y2d - e2y;
+    float4 c3x = fW * x2d - fX;
+    float4 c3y = fW * y2d - fY;

    // Solve for a and b
-    V4f a, b, denom;
+    float4 a, b, denom;
    if (all(mask)) {
        // When every edge is outset/inset, each corner can use both edge vectors
        denom = c1x * c2y - c2x * c1y;
@ -1038,15 +1039,15 @@ void TessellationHelper::Vertices::moveTo(const V4f& x2d, const V4f& y2d, const
        b = (c3x * c1y - c1x * c3y) / denom;
    } else {
        // Force a or b to be 0 if that edge cannot be used due to non-AA
-        M4f aMask = skvx::shuffle<0, 0, 3, 3>(mask);
-        M4f bMask = skvx::shuffle<2, 1, 2, 1>(mask);
+        mask4 aMask = skvx::shuffle<0, 0, 3, 3>(mask);
+        mask4 bMask = skvx::shuffle<2, 1, 2, 1>(mask);

        // When aMask[i]&bMask[i], then a[i], b[i], denom[i] match the kAll case.
        // When aMask[i]&!bMask[i], then b[i] = 0, a[i] = -c3x/c1x or -c3y/c1y, using better denom
        // When !aMask[i]&bMask[i], then a[i] = 0, b[i] = -c3x/c2x or -c3y/c2y, ""
        // When !aMask[i]&!bMask[i], then both a[i] = 0 and b[i] = 0
-        M4f useC1x = abs(c1x) > abs(c1y);
-        M4f useC2x = abs(c2x) > abs(c2y);
+        mask4 useC1x = abs(c1x) > abs(c1y);
+        mask4 useC2x = abs(c2x) > abs(c2y);

        denom = if_then_else(aMask,
                        if_then_else(bMask,
@ -1054,18 +1055,18 @@ void TessellationHelper::Vertices::moveTo(const V4f& x2d, const V4f& y2d, const
                                if_then_else(useC1x, c1x, c1y)),  /* A & !B  */
                        if_then_else(bMask,
                                if_then_else(useC2x, c2x, c2y),   /* !A & B  */
-                                V4f(1.f)));                       /* !A & !B */
+                                float4(1.f)));                    /* !A & !B */

        a = if_then_else(aMask,
                    if_then_else(bMask,
                            c2x * c3y - c3x * c2y,                /* A & B   */
                            if_then_else(useC1x, -c3x, -c3y)),    /* A & !B  */
-                    V4f(0.f)) / denom;                            /* !A      */
+                    float4(0.f)) / denom;                         /* !A      */
        b = if_then_else(bMask,
                    if_then_else(aMask,
                            c3x * c1y - c1x * c3y,                /* A & B   */
                            if_then_else(useC2x, -c3x, -c3y)),    /* !A & B  */
-                    V4f(0.f)) / denom;                            /* !B      */
+                    float4(0.f)) / denom;                         /* !B      */
    }

    fX += a * e1x + b * e2x;
@ -1078,7 +1079,7 @@ void TessellationHelper::Vertices::moveTo(const V4f& x2d, const V4f& y2d, const
    // computed screen space position but moves the 3D point off of the original quad. So far, this
    // seems to be a reasonable compromise.
    if (any(fW < 0.f)) {
-        V4f scale = if_then_else(fW < 0.f, V4f(-1.f), V4f(1.f));
+        float4 scale = if_then_else(fW < 0.f, float4(-1.f), float4(1.f));
        fX *= scale;
        fY *= scale;
        fW *= scale;
@ -1088,14 +1089,14 @@ void TessellationHelper::Vertices::moveTo(const V4f& x2d, const V4f& y2d, const

    if (fUVRCount > 0) {
        // Calculate R here so it can be corrected with U and V in case it's needed later
-        V4f e1u = skvx::shuffle<2, 3, 2, 3>(fU) - skvx::shuffle<0, 1, 0, 1>(fU);
-        V4f e1v = skvx::shuffle<2, 3, 2, 3>(fV) - skvx::shuffle<0, 1, 0, 1>(fV);
-        V4f e1r = skvx::shuffle<2, 3, 2, 3>(fR) - skvx::shuffle<0, 1, 0, 1>(fR);
+        float4 e1u = skvx::shuffle<2, 3, 2, 3>(fU) - skvx::shuffle<0, 1, 0, 1>(fU);
+        float4 e1v = skvx::shuffle<2, 3, 2, 3>(fV) - skvx::shuffle<0, 1, 0, 1>(fV);
+        float4 e1r = skvx::shuffle<2, 3, 2, 3>(fR) - skvx::shuffle<0, 1, 0, 1>(fR);
        correct_bad_edges(e1Bad, &e1u, &e1v, &e1r);

-        V4f e2u = skvx::shuffle<1, 1, 3, 3>(fU) - skvx::shuffle<0, 0, 2, 2>(fU);
-        V4f e2v = skvx::shuffle<1, 1, 3, 3>(fV) - skvx::shuffle<0, 0, 2, 2>(fV);
-        V4f e2r = skvx::shuffle<1, 1, 3, 3>(fR) - skvx::shuffle<0, 0, 2, 2>(fR);
+        float4 e2u = skvx::shuffle<1, 1, 3, 3>(fU) - skvx::shuffle<0, 0, 2, 2>(fU);
+        float4 e2v = skvx::shuffle<1, 1, 3, 3>(fV) - skvx::shuffle<0, 0, 2, 2>(fV);
+        float4 e2r = skvx::shuffle<1, 1, 3, 3>(fR) - skvx::shuffle<0, 0, 2, 2>(fR);
        correct_bad_edges(e2Bad, &e2u, &e2v, &e2r);

        fU += a * e1u + b * e2u;
@ -1127,8 +1128,8 @@ void TessellationHelper::reset(const GrQuad& deviceQuad, const GrQuad* localQuad
    fVerticesValid = true;
 }

-V4f TessellationHelper::inset(const skvx::Vec<4, float>& edgeDistances,
-                              GrQuad* deviceInset, GrQuad* localInset) {
+float4 TessellationHelper::inset(const skvx::Vec<4, float>& edgeDistances,
+                                 GrQuad* deviceInset, GrQuad* localInset) {
    SkASSERT(fVerticesValid);

    Vertices inset = fOriginal;
@ -1243,9 +1244,9 @@ int TessellationHelper::adjustDegenerateVertices(const skvx::Vec<4, float>& sign
        // For rectilinear, degenerate quads, can use moveAlong if the edge distances are adjusted
        // to not cross over each other.
        SkASSERT(all(signedEdgeDistances <= 0.f)); // Only way rectilinear can degenerate is insets
-        V4f halfLengths = -0.5f / next_cw(fEdgeVectors.fInvLengths); // Negate to inset
-        M4f crossedEdges = halfLengths > signedEdgeDistances;
-        V4f safeInsets = if_then_else(crossedEdges, halfLengths, signedEdgeDistances);
+        float4 halfLengths = -0.5f / next_cw(fEdgeVectors.fInvLengths); // Negate to inset
+        mask4 crossedEdges = halfLengths > signedEdgeDistances;
+        float4 safeInsets = if_then_else(crossedEdges, halfLengths, signedEdgeDistances);
        vertices->moveAlong(fEdgeVectors, safeInsets);

        // A degenerate rectilinear quad is either a point (both w and h crossed), or a line
@ -1253,10 +1254,10 @@ int TessellationHelper::adjustDegenerateVertices(const skvx::Vec<4, float>& sign
    } else {
        // Degenerate non-rectangular shape, must go through slowest path (which automatically
        // handles perspective).
-        V4f x2d = fEdgeVectors.fX2D;
-        V4f y2d = fEdgeVectors.fY2D;
+        float4 x2d = fEdgeVectors.fX2D;
+        float4 y2d = fEdgeVectors.fY2D;

-        M4f aaMask;
+        mask4 aaMask;
        int vertexCount = this->getEdgeEquations().computeDegenerateQuad(signedEdgeDistances,
                                                                         &x2d, &y2d, &aaMask);
        vertices->moveTo(x2d, y2d, aaMask);
--- a/src/gpu/ganesh/geometry/GrQuadUtils.h
+++ b/src/gpu/ganesh/geometry/GrQuadUtils.h
@ -46,7 +46,7 @@ namespace GrQuadUtils {
     */
    bool CropToRect(const SkRect& cropRect, GrAA cropAA, DrawQuad* quad, bool computeLocal=true);

-    inline void Outset(const skvx::Vec<4, float>& edgeDistances, GrQuad* quad);
+    inline void Outset(const skvx::float4& edgeDistances, GrQuad* quad);

    bool WillUseHairline(const GrQuad& quad, GrAAType aaType, GrQuadAAFlags edgeFlags);

@ -70,27 +70,27 @@ namespace GrQuadUtils {
        //
        // Note: the edge distances are in device pixel units, so after rendering the new quad
        // edge's shortest distance to the original quad's edge would be equal to provided edge dist
-        skvx::Vec<4, float> inset(const skvx::Vec<4, float>& edgeDistances,
-                                  GrQuad* deviceInset, GrQuad* localInset);
+        skvx::float4 inset(const skvx::float4& edgeDistances,
+                           GrQuad* deviceInset, GrQuad* localInset);

        // Calculates a new quadrilateral that outsets the original edges by the given distances.
        // Other than moving edges outwards, this function is equivalent to inset(). If the exact
        // same edge distances are provided, certain internal computations can be reused across
        // consecutive calls to inset() and outset() (in any order).
-        void outset(const skvx::Vec<4, float>& edgeDistances,
+        void outset(const skvx::float4& edgeDistances,
                    GrQuad* deviceOutset, GrQuad* localOutset);

        // Compute the edge equations of the original device space quad passed to 'reset()'. The
        // coefficients are stored per-edge in 'a', 'b', and 'c', such that ax + by + c = 0, and
        // a positive distance indicates the interior of the quad. Edges are ordered L, B, T, R,
        // matching edge distances passed to inset() and outset().
-        void getEdgeEquations(skvx::Vec<4, float>* a,
-                              skvx::Vec<4, float>* b,
-                              skvx::Vec<4, float>* c);
+        void getEdgeEquations(skvx::float4* a,
+                              skvx::float4* b,
+                              skvx::float4* c);

        // Compute the edge lengths of the original device space quad passed to 'reset()'. The
        // edge lengths are ordered LBTR to match distances passed to inset() and outset().
-        skvx::Vec<4, float> getEdgeLengths();
+        skvx::float4 getEdgeLengths();

        // Determine if the original device space quad has vertices closer than 1px to its opposing
        // edges, without going through the full work of computing the insets (assuming that the
@ -105,36 +105,36 @@ namespace GrQuadUtils {
        struct EdgeVectors {
            // Projected corners (x/w and y/w); these are the 2D coordinates that determine the
            // actual edge direction vectors, dx, dy, and invLengths
-            skvx::Vec<4, float> fX2D, fY2D;
+            skvx::float4 fX2D, fY2D;
            // Normalized edge vectors of the device space quad, ordered L, B, T, R
            // (i.e. next_ccw(x) - x).
-            skvx::Vec<4, float> fDX, fDY;
+            skvx::float4 fDX, fDY;
            // Reciprocal of edge length of the device space quad, i.e. 1 / sqrt(dx*dx + dy*dy)
-            skvx::Vec<4, float> fInvLengths;
+            skvx::float4 fInvLengths;
            // Theta represents the angle formed by the two edges connected at each corner.
-            skvx::Vec<4, float> fCosTheta;
-            skvx::Vec<4, float> fInvSinTheta; // 1 / sin(theta)
+            skvx::float4 fCosTheta;
+            skvx::float4 fInvSinTheta; // 1 / sin(theta)

-            void reset(const skvx::Vec<4, float>& xs, const skvx::Vec<4, float>& ys,
-                       const skvx::Vec<4, float>& ws, GrQuad::Type quadType);
+            void reset(const skvx::float4& xs, const skvx::float4& ys,
+                       const skvx::float4& ws, GrQuad::Type quadType);
        };

        struct EdgeEquations {
            // a * x + b * y + c = 0; positive distance is inside the quad; ordered LBTR.
-            skvx::Vec<4, float> fA, fB, fC;
+            skvx::float4 fA, fB, fC;

            void reset(const EdgeVectors& edgeVectors);

-            skvx::Vec<4, float> estimateCoverage(const skvx::Vec<4, float>& x2d,
-                                                 const skvx::Vec<4, float>& y2d) const;
+            skvx::float4 estimateCoverage(const skvx::float4& x2d,
+                                          const skvx::float4& y2d) const;

-            bool isSubpixel(const skvx::Vec<4, float>& x2d, const skvx::Vec<4, float>& y2d) const;
+            bool isSubpixel(const skvx::float4& x2d, const skvx::float4& y2d) const;

            // Outsets or insets 'x2d' and 'y2d' in place. To be used when the interior is very
            // small, edges are near parallel, or edges are very short/zero-length. Returns number
            // of effective vertices in the degenerate quad.
-            int computeDegenerateQuad(const skvx::Vec<4, float>& signedEdgeDistances,
-                                      skvx::Vec<4, float>* x2d, skvx::Vec<4, float>* y2d,
+            int computeDegenerateQuad(const skvx::float4& signedEdgeDistances,
+                                      skvx::float4* x2d, skvx::float4* y2d,
                                      skvx::Vec<4, int32_t>* aaMask) const;
        };

@ -142,7 +142,7 @@ namespace GrQuadUtils {
            // Positive edge distances to move each edge of the quad. These distances represent the
            // shortest (perpendicular) distance between the original edge and the inset or outset
            // edge. If the distance is 0, then the edge will not move.
-            skvx::Vec<4, float> fEdgeDistances;
+            skvx::float4 fEdgeDistances;
            // True if the new corners cannot be calculated by simply adding scaled edge vectors.
            // The quad may be degenerate because of the original geometry (near colinear edges), or
            // be because of the requested edge distances (collapse of inset, etc.)
@ -150,15 +150,15 @@ namespace GrQuadUtils {
            bool fOutsetDegenerate;

            void reset(const EdgeVectors& edgeVectors, GrQuad::Type quadType,
-                       const skvx::Vec<4, float>& edgeDistances);
+                       const skvx::float4& edgeDistances);
        };

        struct Vertices {
            // X, Y, and W coordinates in device space. If not perspective, w should be set to 1.f
-            skvx::Vec<4, float> fX, fY, fW;
+            skvx::float4 fX, fY, fW;
            // U, V, and R coordinates representing local quad.
            // Ignored depending on uvrCount (0, 1, 2).
-            skvx::Vec<4, float> fU, fV, fR;
+            skvx::float4 fU, fV, fR;
            int fUVRCount;

            void reset(const GrQuad& deviceQuad, const GrQuad* localQuad);
@ -171,14 +171,14 @@ namespace GrQuadUtils {
            // original lines. This should only be called if the 'edgeVectors' fInvSinTheta data is
            // numerically sound.
            void moveAlong(const EdgeVectors& edgeVectors,
-                           const skvx::Vec<4, float>& signedEdgeDistances);
+                           const skvx::float4& signedEdgeDistances);

            // Update the device coordinates by deriving (x,y,w) that project to (x2d, y2d), with
            // optional local coordinates updated to match the new vertices. It is assumed that
            // 'mask' was respected when determining (x2d, y2d), but it is used to ensure that only
            // unmasked unprojected edge vectors are used when computing device and local coords.
-            void moveTo(const skvx::Vec<4, float>& x2d,
-                        const skvx::Vec<4, float>& y2d,
+            void moveTo(const skvx::float4& x2d,
+                        const skvx::float4& y2d,
                        const skvx::Vec<4, int32_t>& mask);
        };

@ -201,15 +201,15 @@ namespace GrQuadUtils {

        // The requested edge distances must be positive so that they can be reused between inset
        // and outset calls.
-        const OutsetRequest& getOutsetRequest(const skvx::Vec<4, float>& edgeDistances);
+        const OutsetRequest& getOutsetRequest(const skvx::float4& edgeDistances);
        const EdgeEquations& getEdgeEquations();

        // Outsets or insets 'vertices' by the given perpendicular 'signedEdgeDistances' (inset or
        // outset is determined implicitly by the sign of the distances).
-        void adjustVertices(const skvx::Vec<4, float>& signedEdgeDistances, Vertices* vertices);
+        void adjustVertices(const skvx::float4& signedEdgeDistances, Vertices* vertices);
        // Like adjustVertices() but handles empty edges, collapsed quads, numerical issues, and
        // returns the number of effective vertices in the adjusted shape.
-        int adjustDegenerateVertices(const skvx::Vec<4, float>& signedEdgeDistances,
+        int adjustDegenerateVertices(const skvx::float4& signedEdgeDistances,
                                     Vertices* vertices);

        friend int ClipToW0(DrawQuad*, DrawQuad*); // To reuse Vertices struct
@ -217,7 +217,7 @@ namespace GrQuadUtils {

 }; // namespace GrQuadUtils

-void GrQuadUtils::Outset(const skvx::Vec<4, float>& edgeDistances, GrQuad* quad) {
+void GrQuadUtils::Outset(const skvx::float4& edgeDistances, GrQuad* quad) {
    TessellationHelper outsetter;
    outsetter.reset(*quad, nullptr);
    outsetter.outset(edgeDistances, quad, nullptr);
--- a/src/gpu/ganesh/ops/AtlasPathRenderer.cpp
+++ b/src/gpu/ganesh/ops/AtlasPathRenderer.cpp
@ -11,7 +11,6 @@
 #include "src/core/SkIPoint16.h"
 #include "src/gpu/ganesh/GrClip.h"
 #include "src/gpu/ganesh/GrDirectContextPriv.h"
-#include "src/gpu/ganesh/GrVx.h"
 #include "src/gpu/ganesh/effects/GrModulateAtlasCoverageEffect.h"
 #include "src/gpu/ganesh/geometry/GrStyledShape.h"
 #include "src/gpu/ganesh/ops/AtlasRenderTask.h"
@ -20,15 +19,13 @@
 #include "src/gpu/ganesh/tessellate/GrTessellationShader.h"
 #include "src/gpu/ganesh/v1/SurfaceDrawContext_v1.h"

-using grvx::float2;
-using grvx::int2;
-
 namespace {

 // Returns the rect [topLeftFloor, botRightCeil], which is the rect [r] rounded out to integer
 // boundaries.
-std::tuple<float2,float2> round_out(const SkRect& r) {
-    return {skvx::floor(float2::Load(&r.fLeft)), skvx::ceil(float2::Load(&r.fRight))};
+std::pair<skvx::float2, skvx::float2> round_out(const SkRect& r) {
+    return {floor(skvx::float2::Load(&r.fLeft)),
+            ceil(skvx::float2::Load(&r.fRight))};
 }

 // Returns whether the given proxyOwner uses the atlasProxy.
@ -46,17 +43,17 @@ template<typename T> bool refs_atlas(const T* proxyOwner, const GrSurfaceProxy*
 }

 bool is_visible(const SkRect& pathDevBounds, const SkIRect& clipBounds) {
-    float2 pathTopLeft = float2::Load(&pathDevBounds.fLeft);
-    float2 pathBotRight = float2::Load(&pathDevBounds.fRight);
+    auto pathTopLeft = skvx::float2::Load(&pathDevBounds.fLeft);
+    auto pathBotRight = skvx::float2::Load(&pathDevBounds.fRight);
    // Empty paths are never visible. Phrase this as a NOT of positive logic so we also return false
    // in the case of NaN.
-    if (!skvx::all(pathTopLeft < pathBotRight)) {
+    if (!all(pathTopLeft < pathBotRight)) {
        return false;
    }
-    float2 clipTopLeft = skvx::cast<float>(int2::Load(&clipBounds.fLeft));
-    float2 clipBotRight = skvx::cast<float>(int2::Load(&clipBounds.fRight));
+    auto clipTopLeft = skvx::cast<float>(skvx::int2::Load(&clipBounds.fLeft));
+    auto clipBotRight = skvx::cast<float>(skvx::int2::Load(&clipBounds.fRight));
    static_assert(sizeof(clipBounds) == sizeof(clipTopLeft) + sizeof(clipBotRight));
-    return skvx::all(pathTopLeft < clipBotRight) && skvx::all(pathBotRight > clipTopLeft);
+    return all(pathTopLeft < clipBotRight) && all(pathBotRight > clipTopLeft);
 }

 #ifdef SK_DEBUG
@ -145,9 +142,9 @@ bool AtlasPathRenderer::pathFitsInAtlas(const SkRect& pathDevBounds,
            ? kAtlasMaxPathHeightWithMSAAFallback * kAtlasMaxPathHeightWithMSAAFallback
            : kAtlasMaxPathHeight * kAtlasMaxPathHeight;
    auto [topLeftFloor, botRightCeil] = round_out(pathDevBounds);
-    float2 size = botRightCeil - topLeftFloor;
+    auto size = botRightCeil - topLeftFloor;
    return // Ensure the path's largest dimension fits in the atlas.
-           skvx::all(size <= fAtlasMaxPathWidth) &&
+           all(size <= fAtlasMaxPathWidth) &&
           // Since we will transpose tall skinny paths, limiting to atlasMaxPathHeight^2 pixels
           // guarantees heightInAtlas <= atlasMaxPathHeight, while also allowing paths that are
           // very wide and short.
@ -155,7 +152,6 @@ bool AtlasPathRenderer::pathFitsInAtlas(const SkRect& pathDevBounds,
 }

 void AtlasPathRenderer::AtlasPathKey::set(const SkMatrix& m, const SkPath& path) {
-    using grvx::float2;
    fPathGenID = path.getGenerationID();
    fAffineMatrix[0] = m.getScaleX();
    fAffineMatrix[1] = m.getSkewX();
@ -181,8 +177,8 @@ bool AtlasPathRenderer::addPathToAtlas(GrRecordingContext* rContext,
    // is_visible() should have guaranteed the path's bounds were representable as ints, since clip
    // bounds within the max render target size are nowhere near INT_MAX.
    auto [topLeftFloor, botRightCeil] = round_out(pathDevBounds);
-    SkASSERT(skvx::all(skvx::cast<float>(int2::Load(&devIBounds->fLeft)) == topLeftFloor));
-    SkASSERT(skvx::all(skvx::cast<float>(int2::Load(&devIBounds->fRight)) == botRightCeil));
+    SkASSERT(all(skvx::cast<float>(skvx::int2::Load(&devIBounds->fLeft)) == topLeftFloor));
+    SkASSERT(all(skvx::cast<float>(skvx::int2::Load(&devIBounds->fRight)) == botRightCeil));
 #endif

    int widthInAtlas = devIBounds->width();
--- a/src/gpu/ganesh/ops/BUILD.bazel
+++ b/src/gpu/ganesh/ops/BUILD.bazel
@ -174,7 +174,6 @@ generated_cc_atom(
        "//src/core:SkIPoint16_hdr",
        "//src/gpu/ganesh:GrClip_hdr",
        "//src/gpu/ganesh:GrDirectContextPriv_hdr",
-        "//src/gpu/ganesh:GrVx_hdr",
        "//src/gpu/ganesh/effects:GrModulateAtlasCoverageEffect_hdr",
        "//src/gpu/ganesh/geometry:GrStyledShape_hdr",
        "//src/gpu/ganesh/tessellate:GrTessellationShader_hdr",
@ -498,6 +497,7 @@ generated_cc_atom(
        ":GrMeshDrawOp_hdr",
        ":GrSimpleMeshDrawOpHelper_hdr",
        "//include/gpu:GrRecordingContext_hdr",
+        "//include/private:SkVx_hdr",
        "//src/core:SkRRectPriv_hdr",
        "//src/gpu:BufferWriter_hdr",
        "//src/gpu:KeyBuilder_hdr",
@ -510,7 +510,6 @@ generated_cc_atom(
        "//src/gpu/ganesh:GrProgramInfo_hdr",
        "//src/gpu/ganesh:GrRecordingContextPriv_hdr",
        "//src/gpu/ganesh:GrResourceProvider_hdr",
-        "//src/gpu/ganesh:GrVx_hdr",
        "//src/gpu/ganesh/geometry:GrShape_hdr",
        "//src/gpu/ganesh/glsl:GrGLSLFragmentShaderBuilder_hdr",
        "//src/gpu/ganesh/glsl:GrGLSLVarying_hdr",
@ -1191,12 +1190,10 @@ generated_cc_atom(
        ":PathTessellateOp_hdr",
        ":StrokeTessellateOp_hdr",
        ":TessellationPathRenderer_hdr",
-        "//include/private:SkVx_hdr",
        "//src/core:SkPathPriv_hdr",
        "//src/gpu/ganesh:GrClip_hdr",
        "//src/gpu/ganesh:GrMemoryPool_hdr",
        "//src/gpu/ganesh:GrRecordingContextPriv_hdr",
-        "//src/gpu/ganesh:GrVx_hdr",
        "//src/gpu/ganesh/effects:GrDisableColorXP_hdr",
        "//src/gpu/ganesh/geometry:GrStyledShape_hdr",
        "//src/gpu/ganesh/v1:SurfaceDrawContext_v1_hdr",
--- a/src/gpu/ganesh/ops/FillRRectOp.cpp
+++ b/src/gpu/ganesh/ops/FillRRectOp.cpp
@ -8,6 +8,7 @@
 #include "src/gpu/ganesh/ops/FillRRectOp.h"

 #include "include/gpu/GrRecordingContext.h"
+#include "include/private/SkVx.h"
 #include "src/core/SkRRectPriv.h"
 #include "src/gpu/BufferWriter.h"
 #include "src/gpu/KeyBuilder.h"
@ -19,7 +20,6 @@
 #include "src/gpu/ganesh/GrProgramInfo.h"
 #include "src/gpu/ganesh/GrRecordingContextPriv.h"
 #include "src/gpu/ganesh/GrResourceProvider.h"
-#include "src/gpu/ganesh/GrVx.h"
 #include "src/gpu/ganesh/geometry/GrShape.h"
 #include "src/gpu/ganesh/glsl/GrGLSLFragmentShaderBuilder.h"
 #include "src/gpu/ganesh/glsl/GrGLSLVarying.h"
@ -303,9 +303,9 @@ GrDrawOp::ClipResult FillRRectOpImpl::clipToShape(skgpu::v1::SurfaceDrawContext*

        if (fHeadInstance->fLocalCoords.fType == LocalCoords::Type::kRect) {
            // Update the local rect.
-            auto rect = skvx::bit_pun<grvx::float4>(fHeadInstance->fRRect.rect());
-            auto local = skvx::bit_pun<grvx::float4>(fHeadInstance->fLocalCoords.fRect);
-            auto isect = skvx::bit_pun<grvx::float4>(isectRRect.rect());
+            auto rect = skvx::bit_pun<skvx::float4>(fHeadInstance->fRRect.rect());
+            auto local = skvx::bit_pun<skvx::float4>(fHeadInstance->fLocalCoords.fRect);
+            auto isect = skvx::bit_pun<skvx::float4>(isectRRect.rect());
            auto rectToLocalSize = (local - skvx::shuffle<2,3,0,1>(local)) /
                                   (rect - skvx::shuffle<2,3,0,1>(rect));
            fHeadInstance->fLocalCoords.fRect =
@ -561,7 +561,7 @@ void FillRRectOpImpl::onPrepareDraws(GrMeshDrawTarget* target) {
            m.postConcat(i->fViewMatrix);

            // Convert the radii to [-1, -1, +1, +1] space and write their attribs.
-            grvx::float4 radiiX, radiiY;
+            skvx::float4 radiiX, radiiY;
            skvx::strided_load2(&SkRRectPriv::GetRadiiArray(i->fRRect)->fX, radiiX, radiiY);
            radiiX *= 2 / (r - l);
            radiiY *= 2 / (b - t);
--- a/src/gpu/ganesh/ops/TessellationPathRenderer.cpp
+++ b/src/gpu/ganesh/ops/TessellationPathRenderer.cpp
@ -7,12 +7,10 @@

 #include "src/gpu/ganesh/ops/TessellationPathRenderer.h"

-#include "include/private/SkVx.h"
 #include "src/core/SkPathPriv.h"
 #include "src/gpu/ganesh/GrClip.h"
 #include "src/gpu/ganesh/GrMemoryPool.h"
 #include "src/gpu/ganesh/GrRecordingContextPriv.h"
-#include "src/gpu/ganesh/GrVx.h"
 #include "src/gpu/ganesh/effects/GrDisableColorXP.h"
 #include "src/gpu/ganesh/geometry/GrStyledShape.h"
 #include "src/gpu/ganesh/ops/PathInnerTriangulateOp.h"
--- a/src/gpu/ganesh/tessellate/BUILD.bazel
+++ b/src/gpu/ganesh/tessellate/BUILD.bazel
@ -39,7 +39,6 @@ generated_cc_atom(
    deps = [
        ":GrTessellationShader_hdr",
        "//include/core:SkStrokeRec_hdr",
-        "//src/gpu/ganesh:GrVx_hdr",
        "//src/gpu/ganesh/glsl:GrGLSLVarying_hdr",
        "//src/gpu/tessellate:Tessellation_hdr",
    ],
--- a/src/gpu/ganesh/tessellate/GrStrokeTessellationShader.h
+++ b/src/gpu/ganesh/tessellate/GrStrokeTessellationShader.h
@ -11,7 +11,6 @@
 #include "src/gpu/ganesh/tessellate/GrTessellationShader.h"

 #include "include/core/SkStrokeRec.h"
-#include "src/gpu/ganesh/GrVx.h"
 #include "src/gpu/ganesh/glsl/GrGLSLVarying.h"
 #include "src/gpu/tessellate/Tessellation.h"

--- a/src/gpu/ganesh/tessellate/PathTessellator.cpp
+++ b/src/gpu/ganesh/tessellate/PathTessellator.cpp
@ -171,10 +171,10 @@ void PathCurveTessellator::prepareWithTriangles(
            SkDEBUGCODE(int breadcrumbCount = 0;)
            for (const auto* tri = extraTriangles->head(); tri; tri = tri->fNext) {
                SkDEBUGCODE(++breadcrumbCount;)
-                auto p0 = float2::Load(tri->fPts);
-                auto p1 = float2::Load(tri->fPts + 1);
-                auto p2 = float2::Load(tri->fPts + 2);
-                if (skvx::any((p0 == p1) & (p1 == p2))) {
+                auto p0 = skvx::float2::Load(tri->fPts);
+                auto p1 = skvx::float2::Load(tri->fPts + 1);
+                auto p2 = skvx::float2::Load(tri->fPts + 2);
+                if (any((p0 == p1) & (p1 == p2))) {
                    // Cull completely horizontal or vertical triangles. GrTriangulator can't always
                    // get these breadcrumb edges right when they run parallel to the sweep
                    // direction because their winding is undefined by its current definition.
--- a/src/gpu/graphite/BUILD.bazel
+++ b/src/gpu/graphite/BUILD.bazel
@ -771,8 +771,8 @@ generated_cc_atom(
        "//include/core:SkSpan_hdr",
        "//include/private:SkColorData_hdr",
        "//include/private:SkTDArray_hdr",
+        "//include/private:SkVx_hdr",
        "//src/core:SkSLTypeShared_hdr",
-        "//src/gpu/graphite/geom:VectorTypes_hdr",
    ],
 )

--- a/src/gpu/graphite/ClipStack.cpp
+++ b/src/gpu/graphite/ClipStack.cpp
@ -95,7 +95,7 @@ bool ClipStack::TransformedShape::intersects(const TransformedShape& o) const {
                // clipped to infinity, so pessimistically assume that they could intersect.
                return true;
            }
-            if (bounds.contains(Rect::Point(float2::Load(localQuad + i) / localQuad[i].w))) {
+            if (bounds.contains(Rect::Point(skvx::float2::Load(localQuad + i) / localQuad[i].w))) {
                // If any corner of 'o's bounds are contained then it intersects our bounds
                return true;
            }
@ -157,7 +157,7 @@ bool ClipStack::TransformedShape::contains(const TransformedShape& o) const {
                // to infinity, so it's extremely unlikely that this contains O.
                return false;
            }
-            if (!fShape.conservativeContains(float2::Load(localQuad + i) / localQuad[i].w)) {
+            if (!fShape.conservativeContains(skvx::float2::Load(localQuad + i) / localQuad[i].w)) {
                return false;
            }
        }
--- a/src/gpu/graphite/UniformManager.cpp
+++ b/src/gpu/graphite/UniformManager.cpp
@ -610,7 +610,7 @@ void UniformManager::write(int i) {
    this->write(kType, 1, &i);
 }

-void UniformManager::write(float2 v) {
+void UniformManager::write(skvx::float2 v) {
    static const SkSLType kType = SkSLType::kFloat2;
    SkDEBUGCODE(this->checkExpected(kType, 1);)
    this->write(kType, 1, &v);
--- a/src/gpu/graphite/UniformManager.h
+++ b/src/gpu/graphite/UniformManager.h
@ -12,8 +12,8 @@
 #include "include/core/SkSpan.h"
 #include "include/private/SkColorData.h"
 #include "include/private/SkTDArray.h"
+#include "include/private/SkVx.h"
 #include "src/core/SkSLTypeShared.h"
-#include "src/gpu/graphite/geom/VectorTypes.h"

 class SkM44;
 struct SkPoint;
@ -57,7 +57,7 @@ public:
    void write(const float*, int count);
    void write(float f) { this->write(&f, 1); }
    void write(int);
-    void write(float2);
+    void write(skvx::float2);

 private:
    SkSLType getUniformTypeForLayout(SkSLType type);
--- a/src/gpu/graphite/geom/BUILD.bazel
+++ b/src/gpu/graphite/geom/BUILD.bazel
@ -40,8 +40,8 @@ generated_cc_atom(
    hdrs = ["Rect.h"],
    visibility = ["//:__subpackages__"],
    deps = [
-        ":VectorTypes_hdr",
        "//include/core:SkRect_hdr",
+        "//include/private:SkVx_hdr",
    ],
 )

@ -84,17 +84,6 @@ generated_cc_atom(
    deps = [
        ":Rect_hdr",
        ":Transform_graphite_hdr",
-        ":VectorTypes_hdr",
        "//src/core:SkMatrixPriv_hdr",
    ],
 )
-
-generated_cc_atom(
-    name = "VectorTypes_hdr",
-    hdrs = ["VectorTypes.h"],
-    visibility = ["//:__subpackages__"],
-    deps = [
-        "//include/gpu/graphite:GraphiteTypes_hdr",
-        "//include/private:SkVx_hdr",
-    ],
-)
--- a/src/gpu/graphite/geom/IntersectionTree.cpp
+++ b/src/gpu/graphite/geom/IntersectionTree.cpp
@ -89,12 +89,12 @@ public:
        // fNumRects without failing.
        static_assert(kMaxRectsInList % 4 == 0);
        SkASSERT(fNumRects <= kMaxRectsInList);
-        float4 comp = Rect::ComplementRect(rect).fVals;
+        auto comp = Rect::ComplementRect(rect).fVals;
        for (int i = 0; i < fNumRects; i += 4) {
-            float4 l = float4::Load(fLefts + i);
-            float4 t = float4::Load(fTops + i);
-            float4 nr = float4::Load(fNegRights + i);
-            float4 nb = float4::Load(fNegBots + i);
+            auto l = skvx::float4::Load(fLefts + i);
+            auto t = skvx::float4::Load(fTops + i);
+            auto nr = skvx::float4::Load(fNegRights + i);
+            auto nb = skvx::float4::Load(fNegBots + i);
            if (any((l < comp[0]) &
                    (t < comp[1]) &
                    (nr < comp[2]) &
@ -128,7 +128,7 @@ private:
    }

    Rect loadRect(int i) const {
-        return Rect::FromVals(float4(fLefts[i], fTops[i], fNegRights[i], fNegBots[i]));
+        return Rect::FromVals({fLefts[i], fTops[i], fNegRights[i], fNegBots[i]});
    }

    // Splits this node with a new LeafNode, then returns a TreeNode that reuses our "this" pointer
@ -143,7 +143,7 @@ private:
        //     fSplittableBounds == [maxLeft, maxTop, -minRight, -minBot] == [r, b, -l, -t]
        //
        // Represents the region of splits that guarantee a strict subdivision of our rect list.
-        float2 splittableSize = fSplittableBounds.xy() + fSplittableBounds.zw();  // == [r-l, b-t]
+        auto splittableSize = fSplittableBounds.xy() + fSplittableBounds.zw();  // == [r-l, b-t]
        SkASSERT(max(splittableSize) >= 0);
        SplitType splitType = (splittableSize.x() > splittableSize.y()) ? SplitType::kX
                                                                        : SplitType::kY;
@ -191,13 +191,13 @@ private:
    }

    int fNumRects;
-    float4 fSplittableBounds;  // [maxLeft, maxTop, -minRight, -minBot]
-    float4 fRectValsSum;  // [sum(left), sum(top), -sum(right), -sum(bot)]
-    alignas(float4) float fLefts[kMaxRectsInList];
-    alignas(float4) float fTops[kMaxRectsInList];
-    alignas(float4) float fNegRights[kMaxRectsInList];
-    alignas(float4) float fNegBots[kMaxRectsInList];
-    static_assert((kMaxRectsInList * sizeof(float)) % sizeof(float4) == 0);
+    skvx::float4 fSplittableBounds;  // [maxLeft, maxTop, -minRight, -minBot]
+    skvx::float4 fRectValsSum;  // [sum(left), sum(top), -sum(right), -sum(bot)]
+    alignas(Rect) float fLefts[kMaxRectsInList];
+    alignas(Rect) float fTops[kMaxRectsInList];
+    alignas(Rect) float fNegRights[kMaxRectsInList];
+    alignas(Rect) float fNegBots[kMaxRectsInList];
+    static_assert((kMaxRectsInList * sizeof(float)) % sizeof(Rect) == 0);
 };

 IntersectionTree::IntersectionTree()
--- a/src/gpu/graphite/geom/IntersectionTree.h
+++ b/src/gpu/graphite/geom/IntersectionTree.h
@ -53,7 +53,7 @@ private:
    class LeafNode;

    constexpr static int kTreeNodeSize = 16 + sizeof(Node*) * 2;
-    constexpr static int kLeafNodeSize = 16 + (2 + 64) * sizeof(float4);
+    constexpr static int kLeafNodeSize = 16 + (2 + 64) * sizeof(Rect);
    constexpr static int kPadSize = 256;  // For footers and alignment.
    SkArenaAlloc fArena{kLeafNodeSize + kTreeNodeSize + kPadSize*2};
    Node* fRoot;
--- a/src/gpu/graphite/geom/Rect.h
+++ b/src/gpu/graphite/geom/Rect.h
@ -9,7 +9,7 @@
 #define skgpu_graphite_geom_Rect_DEFINED

 #include "include/core/SkRect.h"
-#include "src/gpu/graphite/geom/VectorTypes.h"
+#include "include/private/SkVx.h"

 namespace skgpu::graphite {

@ -25,6 +25,8 @@ namespace skgpu::graphite {
 * intended result. It is the caller's responsibility to check isEmptyOrNegative() if needed.
 */
 class Rect {
+    using float2 = skvx::float2;
+    using float4 = skvx::float4;
 public:
    AI Rect() = default;
    AI Rect(float l, float t, float r, float b) : fVals(NegateBotRight({l,t,r,b})) {}
@ -139,6 +141,7 @@ public:

 private:
    AI static float4 NegateBotRight(float4 vals) {  // Returns [vals.xy, -vals.zw].
+        using uint4 = skvx::uint4;
        return skvx::bit_pun<float4>(skvx::bit_pun<uint4>(vals) ^ uint4(0, 0, 1u << 31, 1u << 31));
    }

--- a/src/gpu/graphite/geom/Shape.cpp
+++ b/src/gpu/graphite/geom/Shape.cpp
@ -37,7 +37,7 @@ bool Shape::conservativeContains(const Rect& rect) const {
    SkUNREACHABLE;
 }

-bool Shape::conservativeContains(float2 point) const {
+bool Shape::conservativeContains(skvx::float2 point) const {
    switch (fType) {
        case Type::kEmpty: return false;
        case Type::kLine:  return false;
--- a/src/gpu/graphite/geom/Shape.h
+++ b/src/gpu/graphite/geom/Shape.h
@ -32,16 +32,16 @@ public:
    inline static constexpr int kTypeCount = static_cast<int>(Type::kPath) + 1;

    Shape() {}
-    Shape(const Shape& shape)            { *this = shape; }
+    Shape(const Shape& shape)               { *this = shape; }
    Shape(Shape&&) = delete;

-    Shape(SkPoint p0, SkPoint p1)        { this->setLine(p0, p1); }
-    Shape(SkV2 p0, SkV2 p1)              { this->setLine(p0, p1); }
-    Shape(float2 p0, float2 p1)          { this->setLine(p0, p1); }
-    explicit Shape(const Rect& rect)     { this->setRect(rect);   }
-    explicit Shape(const SkRect& rect)   { this->setRect(rect);   }
-    explicit Shape(const SkRRect& rrect) { this->setRRect(rrect); }
-    explicit Shape(const SkPath& path)   { this->setPath(path);   }
+    Shape(SkPoint p0, SkPoint p1)           { this->setLine(p0, p1); }
+    Shape(SkV2 p0, SkV2 p1)                 { this->setLine(p0, p1); }
+    Shape(skvx::float2 p0, skvx::float2 p1) { this->setLine(p0, p1); }
+    explicit Shape(const Rect& rect)        { this->setRect(rect);   }
+    explicit Shape(const SkRect& rect)      { this->setRect(rect);   }
+    explicit Shape(const SkRRect& rrect)    { this->setRRect(rrect); }
+    explicit Shape(const SkPath& path)      { this->setPath(path);   }

    ~Shape() { this->reset(); }

@ -84,7 +84,7 @@ public:
    // True if the given bounding box is completely inside the shape, if it's conservatively treated
    // as a filled, closed shape.
    bool conservativeContains(const Rect& rect) const;
-    bool conservativeContains(float2 point) const;
+    bool conservativeContains(skvx::float2 point) const;

    // True if the underlying geometry represents a closed shape, without the need for an
    // implicit close.
@ -102,8 +102,8 @@ public:

    // Access the actual geometric description of the shape. May only access the appropriate type
    // based on what was last set.
-    float2         p0()    const { SkASSERT(this->isLine());  return fRect.topLeft();  }
-    float2         p1()    const { SkASSERT(this->isLine());  return fRect.botRight(); }
+    skvx::float2   p0()    const { SkASSERT(this->isLine());  return fRect.topLeft();  }
+    skvx::float2   p1()    const { SkASSERT(this->isLine());  return fRect.botRight(); }
    const Rect&    rect()  const { SkASSERT(this->isRect());  return fRect;            }
    const SkRRect& rrect() const { SkASSERT(this->isRRect()); return fRRect;           }
    const SkPath&  path()  const { SkASSERT(this->isPath());  return fPath;            }
@ -114,12 +114,12 @@ public:
    //
    // These reset inversion to the default for the geometric type.
    void setLine(SkPoint p0, SkPoint p1) {
-        this->setLine(float2{p0.fX, p0.fY}, float2{p1.fX, p1.fY});
+        this->setLine(skvx::float2{p0.fX, p0.fY}, skvx::float2{p1.fX, p1.fY});
    }
    void setLine(SkV2 p0, SkV2 p1) {
-        this->setLine(float2{p0.x, p0.y}, float2{p1.x, p1.y});
+        this->setLine(skvx::float2{p0.x, p0.y}, skvx::float2{p1.x, p1.y});
    }
-    void setLine(float2 p0, float2 p1) {
+    void setLine(skvx::float2 p0, skvx::float2 p1) {
        this->setType(Type::kLine);
        fRect = Rect(p0, p1);
        fInverted = false;
--- a/src/gpu/graphite/geom/Transform.cpp
+++ b/src/gpu/graphite/geom/Transform.cpp
@ -9,7 +9,6 @@

 #include "src/core/SkMatrixPriv.h"
 #include "src/gpu/graphite/geom/Rect.h"
-#include "src/gpu/graphite/geom/VectorTypes.h"

 namespace skgpu::graphite {

@ -24,13 +23,13 @@ Rect map_rect(const SkM44& m, const Rect& r) {

 void map_points(const SkM44& m, const SkV4* in, SkV4* out, int count) {
    // TODO: These maybe should go into SkM44, since bulk point mapping seems generally useful
-    float4 c0 = float4::Load(SkMatrixPriv::M44ColMajor(m) + 0);
-    float4 c1 = float4::Load(SkMatrixPriv::M44ColMajor(m) + 4);
-    float4 c2 = float4::Load(SkMatrixPriv::M44ColMajor(m) + 8);
-    float4 c3 = float4::Load(SkMatrixPriv::M44ColMajor(m) + 12);
+    auto c0 = skvx::float4::Load(SkMatrixPriv::M44ColMajor(m) + 0);
+    auto c1 = skvx::float4::Load(SkMatrixPriv::M44ColMajor(m) + 4);
+    auto c2 = skvx::float4::Load(SkMatrixPriv::M44ColMajor(m) + 8);
+    auto c3 = skvx::float4::Load(SkMatrixPriv::M44ColMajor(m) + 12);

    for (int i = 0; i < count; ++i) {
-        float4 p = (c0 * in[i].x) + (c1 * in[i].y) + (c2 * in[i].z) + (c3 * in[i].w);
+        auto p = (c0 * in[i].x) + (c1 * in[i].y) + (c2 * in[i].z) + (c3 * in[i].w);
        p.store(out + i);
    }
 }
@ -113,13 +112,13 @@ void Transform::mapPoints(const Rect& localRect, SkV4 deviceOut[4]) const {

 void Transform::mapPoints(const SkV2* localIn, SkV4* deviceOut, int count) const {
    // TODO: These maybe should go into SkM44, since bulk point mapping seems generally useful
-    float4 c0 = float4::Load(SkMatrixPriv::M44ColMajor(fM) + 0);
-    float4 c1 = float4::Load(SkMatrixPriv::M44ColMajor(fM) + 4);
+    auto c0 = skvx::float4::Load(SkMatrixPriv::M44ColMajor(fM) + 0);
+    auto c1 = skvx::float4::Load(SkMatrixPriv::M44ColMajor(fM) + 4);
    // skip c2 since localIn's z is assumed to be 0
-    float4 c3 = float4::Load(SkMatrixPriv::M44ColMajor(fM) + 12);
+    auto c3 = skvx::float4::Load(SkMatrixPriv::M44ColMajor(fM) + 12);

    for (int i = 0; i < count; ++i) {
-        float4 p = c0 * localIn[i].x + c1 * localIn[i].y /* + c2*0.f */ + c3 /* *1.f */;
+        auto p = c0 * localIn[i].x + c1 * localIn[i].y /* + c2*0.f */ + c3 /* *1.f */;
        p.store(deviceOut + i);
    }
 }
--- a/src/gpu/graphite/geom/VectorTypes.h
+++ b/src/gpu/graphite/geom/VectorTypes.h
@ -1,32 +0,0 @@
-/*
- * Copyright 2021 Google LLC
- *
- * Use of this source code is governed by a BSD-style license that can be
- * found in the LICENSE file.
- */
-
-#ifndef skgpu_graphite_geom_VectorTypes_DEFINED
-#define skgpu_graphite_geom_VectorTypes_DEFINED
-
-#include "include/gpu/graphite/GraphiteTypes.h"
-#include "include/private/SkVx.h"
-
-namespace skgpu::graphite {
-
-// Use familiar type names from SkSL.
-template<int N> using vec = skvx::Vec<N, float>;
-using float2 = vec<2>;
-using float4 = vec<4>;
-
-template<int N> using ivec = skvx::Vec<N, int32_t>;
-using int2 = ivec<2>;
-using int4 = ivec<4>;
-
-template<int N> using uvec = skvx::Vec<N, uint32_t>;
-using uint2 = uvec<2>;
-using uint4 = uvec<4>;
-
-};  // namespace skgpu::graphite
-
-#endif // skgpu_graphite_geom_VectorTypes_DEFINED
-
--- a/src/gpu/graphite/render/TessellateWedgesRenderStep.cpp
+++ b/src/gpu/graphite/render/TessellateWedgesRenderStep.cpp
@ -120,8 +120,8 @@ void TessellateWedgesRenderStep::writeVertices(DrawWriter* dw, const DrawGeometr
    MidpointContourParser parser{path};
    while (parser.parseNextContour()) {
        writer.updateFanPointAttrib(m.mapPoint(parser.currentMidpoint()));
-        float2 lastPoint = {0, 0};
-        float2 startPoint = {0, 0};
+        skvx::float2 lastPoint = {0, 0};
+        skvx::float2 startPoint = {0, 0};
        for (auto [verb, pts, w] : parser.currentContour()) {
            switch (verb) {
                case SkPathVerb::kMove: {
--- a/src/gpu/tessellate/AffineMatrix.h
+++ b/src/gpu/tessellate/AffineMatrix.h
@ -26,40 +26,41 @@ public:
    AffineMatrix& operator=(const SkMatrix& m) {
        SkASSERT(!m.hasPerspective());
        // Duplicate the matrix in float4.lo and float4.hi so we can map two points at once.
-        fScale = float2(m.getScaleX(), m.getScaleY()).xyxy();
-        fSkew = float2(m.getSkewX(), m.getSkewY()).xyxy();
-        fTrans = float2(m.getTranslateX(), m.getTranslateY()).xyxy();
+        fScale = skvx::float2(m.getScaleX(), m.getScaleY()).xyxy();
+        fSkew = skvx::float2(m.getSkewX(), m.getSkewY()).xyxy();
+        fTrans = skvx::float2(m.getTranslateX(), m.getTranslateY()).xyxy();
        return *this;
    }

-    SK_ALWAYS_INLINE float4 map2Points(float4 p0p1) const {
+    SK_ALWAYS_INLINE skvx::float4 map2Points(skvx::float4 p0p1) const {
        return fScale * p0p1 + (fSkew * p0p1.yxwz() + fTrans);
    }

-    SK_ALWAYS_INLINE float4 map2Points(const SkPoint pts[2]) const {
-        return this->map2Points(float4::Load(pts));
+    SK_ALWAYS_INLINE skvx::float4 map2Points(const SkPoint pts[2]) const {
+        return this->map2Points(skvx::float4::Load(pts));
    }

-    SK_ALWAYS_INLINE float4 map2Points(SkPoint p0, SkPoint p1) const {
-        return this->map2Points(float4(skvx::bit_pun<float2>(p0), skvx::bit_pun<float2>(p1)));
+    SK_ALWAYS_INLINE skvx::float4 map2Points(SkPoint p0, SkPoint p1) const {
+        return this->map2Points(skvx::float4(skvx::bit_pun<skvx::float2>(p0),
+                                             skvx::bit_pun<skvx::float2>(p1)));
    }

-    SK_ALWAYS_INLINE float2 mapPoint(float2 p) const {
+    SK_ALWAYS_INLINE skvx::float2 mapPoint(skvx::float2 p) const {
        return fScale.lo * p + (fSkew.lo * p.yx() + fTrans.lo);
    }

-    SK_ALWAYS_INLINE float2 map1Point(const SkPoint pt[1]) const {
-        return this->mapPoint(float2::Load(pt));
+    SK_ALWAYS_INLINE skvx::float2 map1Point(const SkPoint pt[1]) const {
+        return this->mapPoint(skvx::float2::Load(pt));
    }

    SK_ALWAYS_INLINE SkPoint mapPoint(SkPoint p) const {
-        return skvx::bit_pun<SkPoint>(this->mapPoint(skvx::bit_pun<float2>(p)));
+        return skvx::bit_pun<SkPoint>(this->mapPoint(skvx::bit_pun<skvx::float2>(p)));
    }

 private:
-    float4 fScale;
-    float4 fSkew;
-    float4 fTrans;
+    skvx::float4 fScale;
+    skvx::float4 fSkew;
+    skvx::float4 fTrans;
 };

 }  // namespace skgpu::tess
--- a/src/gpu/tessellate/BUILD.bazel
+++ b/src/gpu/tessellate/BUILD.bazel
@ -102,6 +102,7 @@ generated_cc_atom(
        "//include/core:SkPoint_hdr",
        "//include/core:SkString_hdr",
        "//include/private:SkFloatingPoint_hdr",
+        "//include/private:SkVx_hdr",
    ],
 )

--- a/src/gpu/tessellate/CullTest.h
+++ b/src/gpu/tessellate/CullTest.h
@ -47,7 +47,7 @@ public:
        // devPt = [x, y, -x, -y] in device space.
        auto devPt = fMatX*p.fX + fMatY*p.fY;
        // i.e., l < x && t < y && r > x && b > y.
-        return skvx::all(fCullBounds < devPt);
+        return all(fCullBounds < devPt);
    }

    // Returns whether any region of the bounding box of M * p0..2 will be in the viewport.
@ -62,13 +62,13 @@ public:
        // At this point: valN = {xN, yN, -xN, -yN} in device space.

        // Find the device-space bounding box of p0..2.
-        val0 = skvx::max(val0, val1);
-        val0 = skvx::max(val0, val2);
+        val0 = max(val0, val1);
+        val0 = max(val0, val2);
        // At this point: val0 = [r, b, -l, -t] of the device-space bounding box of p0..2.

        // Does fCullBounds intersect the device-space bounding box of p0..2?
        // i.e., l0 < r1 && t0 < b1 && r0 > l1 && b0 > t1.
-        return skvx::all(fCullBounds < val0);
+        return all(fCullBounds < val0);
    }

    // Returns whether any region of the bounding box of M * p0..3 will be in the viewport.
@ -85,21 +85,21 @@ public:
        // At this point: valN = {xN, yN, -xN, -yN} in device space.

        // Find the device-space bounding box of p0..3.
-        val0 = skvx::max(val0, val1);
-        val2 = skvx::max(val2, val3);
-        val0 = skvx::max(val0, val2);
+        val0 = max(val0, val1);
+        val2 = max(val2, val3);
+        val0 = max(val0, val2);
        // At this point: val0 = [r, b, -l, -t] of the device-space bounding box of p0..3.

        // Does fCullBounds intersect the device-space bounding box of p0..3?
        // i.e., l0 < r1 && t0 < b1 && r0 > l1 && b0 > t1.
-        return skvx::all(fCullBounds < val0);
+        return all(fCullBounds < val0);
    }

 private:
    // [fMatX, fMatY] maps path coordinates to the float4 [x, y, -x, -y] in device space.
-    float4 fMatX;
-    float4 fMatY;
-    float4 fCullBounds;  // [l, t, -r, -b]
+    skvx::float4 fMatX;
+    skvx::float4 fMatY;
+    skvx::float4 fCullBounds;  // [l, t, -r, -b]
 };

 }  // namespace skgpu::tess
--- a/src/gpu/tessellate/PatchWriter.h
+++ b/src/gpu/tessellate/PatchWriter.h
@ -245,6 +245,9 @@ class PatchWriter {
    using InnerTriangulator = std::conditional_t<kAddTrianglesWhenChopping,
            MiddleOutPolygonTriangulator, NullTriangulator>;

+    using float2 = skvx::float2;
+    using float4 = skvx::float4;
+
    static_assert(!kTrackJoinControlPoints || req_attrib<PatchAttribs::kJoinControlPoint>::value,
                  "Deferred patches and auto-updating joins requires kJoinControlPoint attrib");
 public:
--- a/src/gpu/tessellate/Tessellation.cpp
+++ b/src/gpu/tessellate/Tessellation.cpp
@ -20,6 +20,9 @@ namespace skgpu::tess {

 namespace {

+using float2 = skvx::float2;
+using float4 = skvx::float4;
+
 // This value only protects us against getting stuck in infinite recursion due to fp32 precision
 // issues. Mathematically, every curve should reduce to manageable visible sections in O(log N)
 // chops, where N is the the magnitude of its control points.
--- a/src/gpu/tessellate/Tessellation.h
+++ b/src/gpu/tessellate/Tessellation.h
@ -18,36 +18,6 @@ class SkMatrix;
 class SkPath;
 struct SkRect;

-namespace skgpu {
-
-// Use familiar type names from SkSL.
-template<int N> using vec = skvx::Vec<N, float>;
-using float2 = vec<2>;
-using float4 = vec<4>;
-
-template<int N> using ivec = skvx::Vec<N, int32_t>;
-using int2 = ivec<2>;
-using int4 = ivec<4>;
-
-template<int N> using uvec = skvx::Vec<N, uint32_t>;
-using uint2 = uvec<2>;
-using uint4 = uvec<4>;
-
-#define AI SK_MAYBE_UNUSED SK_ALWAYS_INLINE
-
-AI float dot(float2 a, float2 b) {
-    float2 ab = a*b;
-    return ab.x() + ab.y();
-}
-
-AI float cross(float2 a, float2 b) {
-    float2 x = a * b.yx();
-    return x[0] - x[1];
-}
-
-#undef AI
-}  // namespace skgpu
-
 namespace skgpu::tess {

 // Don't allow linearized segments to be off by more than 1/4th of a pixel from the true curve.
--- a/src/gpu/tessellate/WangsFormula.h
+++ b/src/gpu/tessellate/WangsFormula.h
@ -12,6 +12,7 @@
 #include "include/core/SkPoint.h"
 #include "include/core/SkString.h"
 #include "include/private/SkFloatingPoint.h"
+#include "include/private/SkVx.h"
 #include "src/gpu/tessellate/Tessellation.h"

 #define AI SK_MAYBE_UNUSED SK_ALWAYS_INLINE
@ -82,18 +83,18 @@ public:
        }
        return *this;
    }
-    AI float2 operator()(float2 vector) const {
+    AI skvx::float2 operator()(skvx::float2 vector) const {
        switch (fType) {
            case Type::kIdentity:
                return vector;
            case Type::kScale:
                return fScaleXY * vector;
            case Type::kAffine:
-                return fScaleXSkewY * float2(vector[0]) + fSkewXScaleY * vector[1];
+                return fScaleXSkewY * skvx::float2(vector[0]) + fSkewXScaleY * vector[1];
        }
        SkUNREACHABLE;
    }
-    AI float4 operator()(float4 vectors) const {
+    AI skvx::float4 operator()(skvx::float4 vectors) const {
        switch (fType) {
            case Type::kIdentity:
                return vectors;
@ -106,28 +107,28 @@ public:
    }
 private:
    enum class Type { kIdentity, kScale, kAffine } fType;
-    union { float2 fScaleXY, fScaleXSkewY; };
-    float2 fSkewXScaleY;
-    float4 fScaleXYXY;
-    float4 fSkewXYXY;
+    union { skvx::float2 fScaleXY, fScaleXSkewY; };
+    skvx::float2 fSkewXScaleY;
+    skvx::float4 fScaleXYXY;
+    skvx::float4 fSkewXYXY;
 };

 // Returns Wang's formula, raised to the 4th power, specialized for a quadratic curve.
 AI float quadratic_p4(float precision,
-                      float2 p0, float2 p1, float2 p2,
+                      skvx::float2 p0, skvx::float2 p1, skvx::float2 p2,
                      const VectorXform& vectorXform = VectorXform()) {
-    float2 v = -2*p1 + p0 + p2;
+    skvx::float2 v = -2*p1 + p0 + p2;
    v = vectorXform(v);
-    float2 vv = v*v;
+    skvx::float2 vv = v*v;
    return (vv[0] + vv[1]) * length_term_p2<2>(precision);
 }
 AI float quadratic_p4(float precision,
                      const SkPoint pts[],
                      const VectorXform& vectorXform = VectorXform()) {
    return quadratic_p4(precision,
-                        skvx::bit_pun<float2>(pts[0]),
-                        skvx::bit_pun<float2>(pts[1]),
-                        skvx::bit_pun<float2>(pts[2]),
+                        skvx::bit_pun<skvx::float2>(pts[0]),
+                        skvx::bit_pun<skvx::float2>(pts[1]),
+                        skvx::bit_pun<skvx::float2>(pts[2]),
                        vectorXform);
 }

@ -149,24 +150,24 @@ AI int quadratic_log2(float precision,

 // Returns Wang's formula, raised to the 4th power, specialized for a cubic curve.
 AI float cubic_p4(float precision,
-                  float2 p0, float2 p1, float2 p2, float2 p3,
+                  skvx::float2 p0, skvx::float2 p1, skvx::float2 p2, skvx::float2 p3,
                  const VectorXform& vectorXform = VectorXform()) {
-    float4 p01{p0, p1};
-    float4 p12{p1, p2};
-    float4 p23{p2, p3};
-    float4 v = -2*p12 + p01 + p23;
+    skvx::float4 p01{p0, p1};
+    skvx::float4 p12{p1, p2};
+    skvx::float4 p23{p2, p3};
+    skvx::float4 v = -2*p12 + p01 + p23;
    v = vectorXform(v);
-    float4 vv = v*v;
+    skvx::float4 vv = v*v;
    return std::max(vv[0] + vv[1], vv[2] + vv[3]) * length_term_p2<3>(precision);
 }
 AI float cubic_p4(float precision,
                  const SkPoint pts[],
                  const VectorXform& vectorXform = VectorXform()) {
    return cubic_p4(precision,
-                    skvx::bit_pun<float2>(pts[0]),
-                    skvx::bit_pun<float2>(pts[1]),
-                    skvx::bit_pun<float2>(pts[2]),
-                    skvx::bit_pun<float2>(pts[3]),
+                    skvx::bit_pun<skvx::float2>(pts[0]),
+                    skvx::bit_pun<skvx::float2>(pts[1]),
+                    skvx::bit_pun<skvx::float2>(pts[2]),
+                    skvx::bit_pun<skvx::float2>(pts[3]),
                    vectorXform);
 }

@ -215,7 +216,7 @@ AI int worst_case_cubic_log2(float precision, float devWidth, float devHeight) {
 //   J. Zheng, T. Sederberg. "Estimating Tessellation Parameter Intervals for
 //   Rational Curves and Surfaces." ACM Transactions on Graphics 19(1). 2000.
 AI float conic_p2(float precision,
-                  float2 p0, float2 p1, float2 p2,
+                  skvx::float2 p0, skvx::float2 p1, skvx::float2 p2,
                  float w,
                  const VectorXform& vectorXform = VectorXform()) {
    p0 = vectorXform(p0);
@ -223,7 +224,7 @@ AI float conic_p2(float precision,
    p2 = vectorXform(p2);

    // Compute center of bounding box in projected space
-    const float2 C = 0.5f * (skvx::min(skvx::min(p0, p1), p2) + skvx::max(skvx::max(p0, p1), p2));
+    const skvx::float2 C = 0.5f * (min(min(p0, p1), p2) + max(max(p0, p1), p2));

    // Translate by -C. This improves translation-invariance of the formula,
    // see Sec. 3.3 of cited paper
@ -234,8 +235,9 @@ AI float conic_p2(float precision,
    // Compute max length
    const float max_len = sqrtf(std::max(dot(p0, p0), std::max(dot(p1, p1), dot(p2, p2))));

+
    // Compute forward differences
-    const float2 dp = -2*w*p1 + p0 + p2;
+    const skvx::float2 dp = -2*w*p1 + p0 + p2;
    const float dw = fabsf(-2 * w + 2);

    // Compute numerator and denominator for parametric step size of linearization. Here, the
@ -254,9 +256,9 @@ AI float conic_p2(float precision,
                  float w,
                  const VectorXform& vectorXform = VectorXform()) {
    return conic_p2(precision,
-                    skvx::bit_pun<float2>(pts[0]),
-                    skvx::bit_pun<float2>(pts[1]),
-                    skvx::bit_pun<float2>(pts[2]),
+                    skvx::bit_pun<skvx::float2>(pts[0]),
+                    skvx::bit_pun<skvx::float2>(pts[1]),
+                    skvx::bit_pun<skvx::float2>(pts[2]),
                    w,
                    vectorXform);
 }
--- a/tests/BUILD.bazel
+++ b/tests/BUILD.bazel
@ -368,7 +368,6 @@ GPU_TESTS = [
    "GrSurfaceTest.cpp",
    "GrTextBlobTest.cpp",
    "GrTextureMipMapInvalidationTest.cpp",
-    "GrVxTest.cpp",
    "ReadWritePixelsGpuTest.cpp",
    "SkSLDSLErrorLineNumbers.cpp",
    "SkSLDSLOnlyTest.cpp",
@ -2731,18 +2730,6 @@ generated_cc_atom(
    ],
 )

-generated_cc_atom(
-    name = "GrVxTest_src",
-    srcs = ["GrVxTest.cpp"],
-    visibility = ["//:__subpackages__"],
-    deps = [
-        ":Test_hdr",
-        "//include/utils:SkRandom_hdr",
-        "//src/core:SkGeometry_hdr",
-        "//src/gpu/ganesh:GrVx_hdr",
-    ],
-)
-
 generated_cc_atom(
    name = "GradientTest_src",
    srcs = ["GradientTest.cpp"],
@ -6090,6 +6077,7 @@ generated_cc_atom(
    deps = [
        ":Test_hdr",
        "//include/private:SkVx_hdr",
+        "//include/utils:SkRandom_hdr",
    ],
 )

--- a/tests/GrVxTest.cpp
+++ b/tests/GrVxTest.cpp
@ -1,41 +0,0 @@
-/*
- * Copyright 2020 Google Inc.
- *
- * Use of this source code is governed by a BSD-style license that can be
- * found in the LICENSE file.
- */
-
-#include "include/utils/SkRandom.h"
-#include "src/core/SkGeometry.h"
-#include "src/gpu/ganesh/GrVx.h"
-#include "tests/Test.h"
-#include <limits>
-#include <numeric>
-
-using namespace grvx;
-using skvx::bit_pun;
-
-DEF_TEST(grvx_cross_dot, r) {
-    REPORTER_ASSERT(r, grvx::cross({0,1}, {0,1}) == 0);
-    REPORTER_ASSERT(r, grvx::cross({1,0}, {1,0}) == 0);
-    REPORTER_ASSERT(r, grvx::cross({1,1}, {1,1}) == 0);
-    REPORTER_ASSERT(r, grvx::cross({1,1}, {1,-1}) == -2);
-    REPORTER_ASSERT(r, grvx::cross({1,1}, {-1,1}) == 2);
-
-    REPORTER_ASSERT(r, grvx::dot({0,1}, {1,0}) == 0);
-    REPORTER_ASSERT(r, grvx::dot({1,0}, {0,1}) == 0);
-    REPORTER_ASSERT(r, grvx::dot({1,1}, {1,-1}) == 0);
-    REPORTER_ASSERT(r, grvx::dot({1,1}, {1,1}) == 2);
-    REPORTER_ASSERT(r, grvx::dot({1,1}, {-1,-1}) == -2);
-
-    SkRandom rand;
-    for (int i = 0; i < 100; ++i) {
-        float a=rand.nextRangeF(-1,1), b=rand.nextRangeF(-1,1), c=rand.nextRangeF(-1,1),
-              d=rand.nextRangeF(-1,1);
-        constexpr static float kTolerance = 1.f / (1 << 20);
-        REPORTER_ASSERT(r, SkScalarNearlyEqual(
-                grvx::cross({a,b}, {c,d}), SkPoint::CrossProduct({a,b}, {c,d}), kTolerance));
-        REPORTER_ASSERT(r, SkScalarNearlyEqual(
-                grvx::dot({a,b}, {c,d}), SkPoint::DotProduct({a,b}, {c,d}), kTolerance));
-    }
-}
--- a/tests/SkVxTest.cpp
+++ b/tests/SkVxTest.cpp
@ -6,33 +6,11 @@
 */

 #include "include/private/SkVx.h"
+#include "include/utils/SkRandom.h"
 #include "tests/Test.h"
 #include <numeric>

-using float2 = skvx::Vec<2,float>;
-using float4 = skvx::Vec<4,float>;
-using float8 = skvx::Vec<8,float>;
-
-using double2 = skvx::Vec<2,double>;
-using double4 = skvx::Vec<4,double>;
-using double8 = skvx::Vec<8,double>;
-
-using byte2  = skvx::Vec< 2,uint8_t>;
-using byte4  = skvx::Vec< 4,uint8_t>;
-using byte8  = skvx::Vec< 8,uint8_t>;
-using byte16 = skvx::Vec<16,uint8_t>;
-
-using int2 = skvx::Vec<2,int32_t>;
-using int4 = skvx::Vec<4,int32_t>;
-using int8 = skvx::Vec<8,int32_t>;
-
-using uint2 = skvx::Vec<2,uint32_t>;
-using uint4 = skvx::Vec<4,uint32_t>;
-using uint8 = skvx::Vec<8,uint32_t>;
-
-using long2 = skvx::Vec<2,int64_t>;
-using long4 = skvx::Vec<4,int64_t>;
-using long8 = skvx::Vec<8,int64_t>;
+namespace skvx {

 DEF_TEST(SkVx, r) {
    static_assert(sizeof(float2) ==  8, "");
@ -95,7 +73,7 @@ DEF_TEST(SkVx, r) {
    REPORTER_ASSERT(r, all( sqrt(float4{2,3,4,5}) < float4{2,2,3,3}));
    REPORTER_ASSERT(r, all( sqrt(float2{2,3}) < float2{2,2}));

-    REPORTER_ASSERT(r, all(skvx::cast<int>(float4{-1.5f,0.5f,1.0f,1.5f}) == int4{-1,0,1,1}));
+    REPORTER_ASSERT(r, all(cast<int>(float4{-1.5f,0.5f,1.0f,1.5f}) == int4{-1,0,1,1}));

    float buf[] = {1,2,3,4,5,6};
    REPORTER_ASSERT(r, all(float4::Load(buf) == float4{1,2,3,4}));
@ -109,10 +87,10 @@ DEF_TEST(SkVx, r) {
    REPORTER_ASSERT(r, all(float4::Load(buf+0) == float4{2,3,4,5}));
    REPORTER_ASSERT(r, all(float4::Load(buf+2) == float4{4,5,5,6}));

-    REPORTER_ASSERT(r, all(skvx::shuffle<2,1,0,3>        (float4{1,2,3,4}) == float4{3,2,1,4}));
-    REPORTER_ASSERT(r, all(skvx::shuffle<2,1>            (float4{1,2,3,4}) == float2{3,2}));
-    REPORTER_ASSERT(r, all(skvx::shuffle<3,3,3,3>        (float4{1,2,3,4}) == float4{4,4,4,4}));
-    REPORTER_ASSERT(r, all(skvx::shuffle<2,1,2,1,2,1,2,1>(float4{1,2,3,4})
+    REPORTER_ASSERT(r, all(shuffle<2,1,0,3>        (float4{1,2,3,4}) == float4{3,2,1,4}));
+    REPORTER_ASSERT(r, all(shuffle<2,1>            (float4{1,2,3,4}) == float2{3,2}));
+    REPORTER_ASSERT(r, all(shuffle<3,3,3,3>        (float4{1,2,3,4}) == float4{4,4,4,4}));
+    REPORTER_ASSERT(r, all(shuffle<2,1,2,1,2,1,2,1>(float4{1,2,3,4})
                           == float8{3,2,3,2,3,2,3,2}));

    // Test that mixed types can be used where they make sense.  Mostly about ergonomics.
@ -130,14 +108,12 @@ DEF_TEST(SkVx, r) {
        uint8_t want = (uint8_t)( 255*(x/255.0 * y/255.0) + 0.5 );

        {
-            uint8_t got = skvx::div255(skvx::Vec<8, uint16_t>(x) *
-                                       skvx::Vec<8, uint16_t>(y) )[0];
+            uint8_t got = div255(Vec<8, uint16_t>(x) * Vec<8, uint16_t>(y) )[0];
            REPORTER_ASSERT(r, got == want);
        }

        {
-            uint8_t got = skvx::approx_scale(skvx::Vec<8,uint8_t>(x),
-                                             skvx::Vec<8,uint8_t>(y))[0];
+            uint8_t got = approx_scale(Vec<8,uint8_t>(x), Vec<8,uint8_t>(y))[0];

            REPORTER_ASSERT(r, got == want-1 ||
                               got == want   ||
@ -163,10 +139,10 @@ DEF_TEST(SkVx, r) {
        // Intentionally not testing -0, as we don't care if it's 0x0000 or 0x8000.
        float8 fs = {+0.0f,+0.5f,+1.0f,+2.0f,
                     -4.0f,-0.5f,-1.0f,-2.0f};
-        skvx::Vec<8,uint16_t> hs = {0x0000,0x3800,0x3c00,0x4000,
-                                    0xc400,0xb800,0xbc00,0xc000};
-        REPORTER_ASSERT(r, all(skvx::  to_half(fs) == hs));
-        REPORTER_ASSERT(r, all(skvx::from_half(hs) == fs));
+        Vec<8,uint16_t> hs = {0x0000,0x3800,0x3c00,0x4000,
+                              0xc400,0xb800,0xbc00,0xc000};
+        REPORTER_ASSERT(r, all(  to_half(fs) == hs));
+        REPORTER_ASSERT(r, all(from_half(hs) == fs));
    }
 }

@ -186,7 +162,7 @@ DEF_TEST(SkVx_xy, r) {
    REPORTER_ASSERT(r, all(f == float2(8,6)));
    f = f.yx();
    REPORTER_ASSERT(r, all(f == float2(6,8)));
-    REPORTER_ASSERT(r, skvx::bit_pun<SkPoint>(f) == SkPoint::Make(6,8));
+    REPORTER_ASSERT(r, bit_pun<SkPoint>(f) == SkPoint::Make(6,8));
    SkPoint p;
    f.store(&p);
    REPORTER_ASSERT(r, p == SkPoint::Make(6,8));
@ -194,11 +170,11 @@ DEF_TEST(SkVx_xy, r) {
    REPORTER_ASSERT(r, p == SkPoint::Make(8,6));
    REPORTER_ASSERT(r, all(f.xyxy() == float4(6,8,6,8)));
    REPORTER_ASSERT(r, all(f.xyxy() == float4(f,f)));
-    REPORTER_ASSERT(r, all(skvx::join(f,f) == f.xyxy()));
-    REPORTER_ASSERT(r, all(skvx::join(f.yx(),f) == float4(f.y(),f.x(),f)));
-    REPORTER_ASSERT(r, all(skvx::join(f.yx(),f) == float4(f.yx(),f.x(),f.y())));
-    REPORTER_ASSERT(r, all(skvx::join(f,f.yx()) == float4(f.x(),f.y(),f.yx())));
-    REPORTER_ASSERT(r, all(skvx::join(f.yx(),f.yx()) == float4(f.yx(),f.yx())));
+    REPORTER_ASSERT(r, all(join(f,f) == f.xyxy()));
+    REPORTER_ASSERT(r, all(join(f.yx(),f) == float4(f.y(),f.x(),f)));
+    REPORTER_ASSERT(r, all(join(f.yx(),f) == float4(f.yx(),f.x(),f.y())));
+    REPORTER_ASSERT(r, all(join(f,f.yx()) == float4(f.x(),f.y(),f.yx())));
+    REPORTER_ASSERT(r, all(join(f.yx(),f.yx()) == float4(f.yx(),f.yx())));
 }

 DEF_TEST(SkVx_xyzw, r) {
@ -225,11 +201,11 @@ DEF_TEST(SkVx_xyzw, r) {
    REPORTER_ASSERT(r, f.z() == 2);
    f[3] = 3;
    REPORTER_ASSERT(r, f.w() == 3);
-    REPORTER_ASSERT(r, skvx::all(f.xy() == float2(0,1)));
-    REPORTER_ASSERT(r, skvx::all(f.zw() == float2{2,3}));
+    REPORTER_ASSERT(r, all(f.xy() == float2(0,1)));
+    REPORTER_ASSERT(r, all(f.zw() == float2{2,3}));
    REPORTER_ASSERT(r, all(f == float4(0,1,2,3)));
-    REPORTER_ASSERT(r, all(f.yxwz().lo == skvx::shuffle<1,0>(f)));
-    REPORTER_ASSERT(r, all(f.yxwz().hi == skvx::shuffle<3,2>(f)));
+    REPORTER_ASSERT(r, all(f.yxwz().lo == shuffle<1,0>(f)));
+    REPORTER_ASSERT(r, all(f.yxwz().hi == shuffle<3,2>(f)));
    REPORTER_ASSERT(r, all(f.zwxy().lo.lo == f.z()));
    REPORTER_ASSERT(r, all(f.zwxy().lo.hi == f.w()));
    REPORTER_ASSERT(r, all(f.zwxy().hi.lo == f.x()));
@ -239,127 +215,60 @@ DEF_TEST(SkVx_xyzw, r) {
    REPORTER_ASSERT(r, f.yxwz().hi.lo.val == f.w());
    REPORTER_ASSERT(r, f.yxwz().hi.hi.val == f.z());

-    REPORTER_ASSERT(r, all(skvx::naive_if_then_else(int2(0,~0),
-                                                    skvx::shuffle<3,2>(float4(0,1,2,3)),
-                                                    float4(4,5,6,7).xy()) == float2(4,2)));
-    REPORTER_ASSERT(r, all(skvx::if_then_else(int2(0,~0),
-                                              skvx::shuffle<3,2>(float4(0,1,2,3)),
+    REPORTER_ASSERT(r, all(naive_if_then_else(int2(0,~0),
+                                              shuffle<3,2>(float4(0,1,2,3)),
                                              float4(4,5,6,7).xy()) == float2(4,2)));
-    REPORTER_ASSERT(r, all(skvx::naive_if_then_else(int2(0,~0).xyxy(),
-                                                    float4(0,1,2,3).zwxy(),
-                                                    float4(4,5,6,7)) == float4(4,3,6,1)));
-    REPORTER_ASSERT(r, all(skvx::if_then_else(int2(0,~0).xyxy(),
+    REPORTER_ASSERT(r, all(if_then_else(int2(0,~0),
+                                        shuffle<3,2>(float4(0,1,2,3)),
+                                        float4(4,5,6,7).xy()) == float2(4,2)));
+    REPORTER_ASSERT(r, all(naive_if_then_else(int2(0,~0).xyxy(),
                                              float4(0,1,2,3).zwxy(),
                                              float4(4,5,6,7)) == float4(4,3,6,1)));
+    REPORTER_ASSERT(r, all(if_then_else(int2(0,~0).xyxy(),
+                                        float4(0,1,2,3).zwxy(),
+                                        float4(4,5,6,7)) == float4(4,3,6,1)));

-    REPORTER_ASSERT(r, all(skvx::pin(float4(0,1,2,3).yxwz(),
-                                     float2(1).xyxy(),
-                                     float2(2).xyxy()) == float4(1,1,2,2)));
+    REPORTER_ASSERT(r, all(pin(float4(0,1,2,3).yxwz(),
+                               float2(1).xyxy(),
+                               float2(2).xyxy()) == float4(1,1,2,2)));
 }

-static bool check_approx_acos(skiatest::Reporter* r, float x, float approx_acos_x) {
-    float acosf_x = acosf(x);
-    float error = acosf_x - approx_acos_x;
-    if (!(fabsf(error) <= SKVX_APPROX_ACOS_MAX_ERROR)) {
-        ERRORF(r, "Larger-than-expected error from skvx::approx_acos\n"
-                  "  x=              %f\n"
-                  "  approx_acos_x=  %f  (%f degrees\n"
-                  "  acosf_x=        %f  (%f degrees\n"
-                  "  error=          %f  (%f degrees)\n"
-                  "  tolerance=      %f  (%f degrees)\n\n",
-                  x, approx_acos_x, SkRadiansToDegrees(approx_acos_x), acosf_x,
-                  SkRadiansToDegrees(acosf_x), error, SkRadiansToDegrees(error),
-                  SKVX_APPROX_ACOS_MAX_ERROR, SkRadiansToDegrees(SKVX_APPROX_ACOS_MAX_ERROR));
-        return false;
-    }
-    return true;
-}
+DEF_TEST(SkVx_cross_dot, r) {
+    REPORTER_ASSERT(r, cross({0,1}, {0,1}) == 0);
+    REPORTER_ASSERT(r, cross({1,0}, {1,0}) == 0);
+    REPORTER_ASSERT(r, cross({1,1}, {1,1}) == 0);
+    REPORTER_ASSERT(r, cross({1,1}, {1,-1}) == -2);
+    REPORTER_ASSERT(r, cross({1,1}, {-1,1}) == 2);

-DEF_TEST(SkVx_approx_acos, r) {
-    float4 boundaries = skvx::approx_acos(float4{-1, 0, 1, 0});
-    check_approx_acos(r, -1, boundaries[0]);
-    check_approx_acos(r, 0, boundaries[1]);
-    check_approx_acos(r, +1, boundaries[2]);
+    REPORTER_ASSERT(r, dot(int2{0,1}, int2{1,0}) == 0);
+    REPORTER_ASSERT(r, dot(int2{1,0}, int2{0,1}) == 0);
+    REPORTER_ASSERT(r, dot(int2{1,1}, int2{1,-1}) == 0);
+    REPORTER_ASSERT(r, dot(int2{1,1}, int2{1,1}) == 2);
+    REPORTER_ASSERT(r, dot(int2{1,1}, int2{-1,-1}) == -2);

-    // Select a distribution of starting points around which to begin testing approx_acos. These
-    // fall roughly around the known minimum and maximum errors. No need to include -1, 0, or 1
-    // since those were just tested above. (Those are tricky because 0 is an inflection and the
-    // derivative is infinite at 1 and -1.)
-    float8 x = {-.99f, -.8f, -.4f, -.2f, .2f, .4f, .8f, .99f};
-
-    // Converge at the various local minima and maxima of "approx_acos(x) - cosf(x)" and verify that
-    // approx_acos is always within "kTolerance" degrees of the expected answer.
-    float8 err_;
-    for (int iter = 0; iter < 10; ++iter) {
-        // Run our approximate inverse cosine approximation.
-        auto approx_acos_x = skvx::approx_acos(x);
-
-        // Find d/dx(error)
-        //    = d/dx(approx_acos(x) - acos(x))
-        //    = (f'g - fg')/gg + 1/sqrt(1 - x^2), [where f = bx^3 + ax, g = dx^4 + cx^2 + 1]
-        float8 xx = x*x;
-        float8 a = -0.939115566365855f;
-        float8 b =  0.9217841528914573f;
-        float8 c = -1.2845906244690837f;
-        float8 d =  0.295624144969963174f;
-        float8 f = (b*xx + a)*x;
-        float8 f_ = 3*b*xx + a;
-        float8 g = (d*xx + c)*xx + 1;
-        float8 g_ = (4*d*xx + 2*c)*x;
-        float8 gg = g*g;
-        float8 q = skvx::sqrt(1 - xx);
-        err_ = (f_*g - f*g_)/gg + 1/q;
-
-        // Find d^2/dx^2(error)
-        //    = ((f''g - fg'')g^2 - (f'g - fg')2gg') / g^4 + x(1 - x^2)^(-3/2)
-        //    = ((f''g - fg'')g - (f'g - fg')2g') / g^3 + x(1 - x^2)^(-3/2)
-        float8 f__ = 6*b*x;
-        float8 g__ = 12*d*xx + 2*c;
-        float8 err__ = ((f__*g - f*g__)*g - (f_*g - f*g_)*2*g_) / (gg*g) + x/((1 - xx)*q);
-
-#if 0
-        SkDebugf("\n\niter %i\n", iter);
-#endif
-        // Ensure each lane's approximation is within maximum error.
-        for (int j = 0; j < 8; ++j) {
-#if 0
-            SkDebugf("x=%f  err=%f  err'=%f  err''=%f\n",
-                     x[j], SkRadiansToDegrees(skvx::approx_acos_x[j] - acosf(x[j])),
-                     SkRadiansToDegrees(err_[j]), SkRadiansToDegrees(err__[j]));
-#endif
-            if (!check_approx_acos(r, x[j], approx_acos_x[j])) {
-                return;
-            }
-        }
-
-        // Use Newton's method to update the x values to locations closer to their local minimum or
-        // maximum. (This is where d/dx(error) == 0.)
-        x -= err_/err__;
-        x = skvx::pin<8,float>(x, -.99f, .99f);
-    }
-
-    // Ensure each lane converged to a local minimum or maximum.
-    for (int j = 0; j < 8; ++j) {
-        REPORTER_ASSERT(r, SkScalarNearlyZero(err_[j]));
-    }
-
-    // Make sure we found all the actual known locations of local min/max error.
-    for (float knownRoot : {-0.983536f, -0.867381f, -0.410923f, 0.410923f, 0.867381f, 0.983536f}) {
-        REPORTER_ASSERT(r, skvx::any(skvx::abs(x - knownRoot) < SK_ScalarNearlyZero));
+    SkRandom rand;
+    for (int i = 0; i < 100; ++i) {
+        float a=rand.nextRangeF(-1,1), b=rand.nextRangeF(-1,1), c=rand.nextRangeF(-1,1),
+              d=rand.nextRangeF(-1,1);
+        constexpr static float kTolerance = 1.f / (1 << 20);
+        REPORTER_ASSERT(r, SkScalarNearlyEqual(
+                cross({a,b}, {c,d}), SkPoint::CrossProduct({a,b}, {c,d}), kTolerance));
+        REPORTER_ASSERT(r, SkScalarNearlyEqual(
+                dot(float2{a,b}, float2{c,d}), SkPoint::DotProduct({a,b}, {c,d}), kTolerance));
    }
 }

 template<int N, typename T> void check_strided_loads(skiatest::Reporter* r) {
-    using Vec = skvx::Vec<N,T>;
+    using Vec = Vec<N,T>;
    T values[N*4];
    std::iota(values, values + N*4, 0);
    Vec a, b, c, d;
-    skvx::strided_load2(values, a, b);
+    strided_load2(values, a, b);
    for (int i = 0; i < N; ++i) {
        REPORTER_ASSERT(r, a[i] == values[i*2]);
        REPORTER_ASSERT(r, b[i] == values[i*2 + 1]);
    }
-    skvx::strided_load4(values, a, b, c, d);
+    strided_load4(values, a, b, c, d);
    for (int i = 0; i < N; ++i) {
        REPORTER_ASSERT(r, a[i] == values[i*4]);
        REPORTER_ASSERT(r, b[i] == values[i*4 + 1]);
@ -398,11 +307,12 @@ DEF_TEST(SkVM_ScaledDividerU32, r) {

    auto test = [&](uint32_t denom) {
        // half == 1 so, the max to check is kMax-1
-        skvx::ScaledDividerU32 d(denom);
-        uint32_t maxCheck = static_cast<uint32_t>(floor((double)(kMax - d.half()) / denom + 0.5));
+        ScaledDividerU32 d(denom);
+        uint32_t maxCheck = static_cast<uint32_t>(
+                std::floor((double)(kMax - d.half()) / denom + 0.5));
        REPORTER_ASSERT(r, errorBounds(d.divide((kMax))[0], maxCheck));
        for (uint32_t i = 0; i < kMax - d.half(); i += 65535) {
-            uint32_t expected = static_cast<uint32_t>(floor((double)i / denom + 0.5));
+            uint32_t expected = static_cast<uint32_t>(std::floor((double)i / denom + 0.5));
            auto actual = d.divide(i + d.half());
            if (!errorBounds(actual[0], expected)) {
                SkDebugf("i: %u expected: %u actual: %u\n", i, expected, actual[0]);
@ -423,3 +333,5 @@ DEF_TEST(SkVM_ScaledDividerU32, r) {
    test(15'485'863);
    test(512'927'377);
 }
+
+}  // namespace skvx
--- a/tests/graphite/IntersectionTreeTest.cpp
+++ b/tests/graphite/IntersectionTreeTest.cpp
@ -62,8 +62,7 @@ DEF_GRAPHITE_TEST(skgpu_IntersectionTree, reporter) {
    {
        SimpleIntersectionTree simpleTree;
        IntersectionTree tree;
-        CHECK(tree.add(Rect(float2(-std::numeric_limits<float>::infinity()),
-                                   float2(std::numeric_limits<float>::infinity()))));
+        CHECK(tree.add(Rect::Infinite()));
        CHECK(!tree.add(Rect::WH(1,1)));
        CHECK(!tree.add(Rect::WH(1,std::numeric_limits<float>::infinity())));
        CHECK(tree.add(Rect::WH(0, 0)));
--- a/tests/graphite/RectTest.cpp
+++ b/tests/graphite/RectTest.cpp
@ -13,6 +13,9 @@ namespace skgpu::graphite {
 #define CHECK(A) REPORTER_ASSERT(reporter, A)

 DEF_GRAPHITE_TEST(skgpu_Rect, reporter) {
+    using float2 = skvx::float2;
+    using float4 = skvx::float4;
+
    const SkRect skRect = SkRect::MakeLTRB(1,-3,4,0);
    const Rect rect = skRect;
    CHECK(rect == rect);