From a59925fb1dfaddff6add72929bfa882fc72d5cc3 Mon Sep 17 00:00:00 2001
From: Brian Salomon <bsalomon@google.com>
Date: Thu, 16 Sep 2021 09:25:47 -0400
Subject: [PATCH] Revert "Revert "New approach to GrProcessor uniforms.""

This reverts commit ae59426ea6e9b351d9d52f2a9c12d05023351994.

Bug: skia:12182
Change-Id: I591a0a89ffad1a3d5d867dd247ceeec71b6041a4
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/449516
Reviewed-by: Michael Ludwig <michaelludwig@google.com>
Commit-Queue: Brian Salomon <bsalomon@google.com>
---
 gn/gpu.gni                                    |   2 +
 include/private/GrTypesPriv.h                 |  63 +++-
 src/gpu/GrFragmentProcessor.h                 |   7 +-
 src/gpu/GrGeometryProcessor.h                 |  10 +-
 src/gpu/GrOpFlushState.cpp                    |   1 +
 src/gpu/GrProcessor.h                         | 130 +++++++-
 src/gpu/GrProgramInfo.cpp                     |  19 ++
 src/gpu/GrProgramInfo.h                       |   3 +
 src/gpu/GrSPIRVUniformHandler.cpp             |  38 ++-
 src/gpu/GrSPIRVUniformHandler.h               |  14 +-
 src/gpu/GrUniformAggregator.cpp               |  63 ++++
 src/gpu/GrUniformAggregator.h                 | 106 +++++++
 src/gpu/GrUniformDataManager.cpp              | 285 +++++++++++++++++-
 src/gpu/GrUniformDataManager.h                |  46 ++-
 src/gpu/GrXferProcessor.h                     |   6 +
 src/gpu/d3d/GrD3DPipelineState.cpp            |   5 +-
 src/gpu/d3d/GrD3DPipelineState.h              |   2 +
 src/gpu/d3d/GrD3DPipelineStateBuilder.cpp     |   3 +
 src/gpu/d3d/GrD3DPipelineStateDataManager.cpp |   8 +-
 src/gpu/d3d/GrD3DPipelineStateDataManager.h   |   6 +-
 src/gpu/dawn/GrDawnProgramBuilder.cpp         |   9 +-
 src/gpu/dawn/GrDawnProgramBuilder.h           |   6 +-
 src/gpu/dawn/GrDawnProgramDataManager.cpp     |   8 +-
 src/gpu/dawn/GrDawnProgramDataManager.h       |   5 +-
 src/gpu/effects/GrPorterDuffXferProcessor.cpp |  23 +-
 src/gpu/effects/GrRRectEffect.cpp             | 251 ++++++++-------
 src/gpu/gl/GrGLProgram.cpp                    |  17 +-
 src/gpu/gl/GrGLProgram.h                      |   5 +
 src/gpu/gl/GrGLProgramDataManager.cpp         | 188 +++++++++++-
 src/gpu/gl/GrGLProgramDataManager.h           |  39 ++-
 src/gpu/gl/GrGLUniformHandler.cpp             |  12 +-
 src/gpu/gl/GrGLUniformHandler.h               |   4 +-
 src/gpu/gl/builders/GrGLProgramBuilder.cpp    |   6 +-
 src/gpu/gl/builders/GrGLProgramBuilder.h      |   3 +-
 src/gpu/glsl/GrGLSLProgramBuilder.cpp         |  14 +-
 src/gpu/glsl/GrGLSLProgramBuilder.h           |   3 +
 src/gpu/glsl/GrGLSLUniformHandler.h           |   5 +-
 src/gpu/mtl/GrMtlPipelineState.h              |   2 +
 src/gpu/mtl/GrMtlPipelineState.mm             |   4 +-
 src/gpu/mtl/GrMtlPipelineStateBuilder.mm      |   3 +
 src/gpu/mtl/GrMtlPipelineStateDataManager.h   |   3 +-
 src/gpu/mtl/GrMtlPipelineStateDataManager.mm  |   8 +-
 src/gpu/mtl/GrMtlUniformHandler.h             |  16 +-
 src/gpu/mtl/GrMtlUniformHandler.mm            |  48 ++-
 src/gpu/ops/GrOvalOpFactory.cpp               |  96 +++---
 src/gpu/vk/GrVkPipelineState.cpp              |   5 +-
 src/gpu/vk/GrVkPipelineState.h                |   1 +
 src/gpu/vk/GrVkPipelineStateBuilder.cpp       |   3 +
 src/gpu/vk/GrVkPipelineStateDataManager.cpp   |  21 +-
 src/gpu/vk/GrVkPipelineStateDataManager.h     |   7 +-
 src/gpu/vk/GrVkUniformHandler.cpp             |  81 ++++-
 src/gpu/vk/GrVkUniformHandler.h               |  16 +-
 52 files changed, 1463 insertions(+), 266 deletions(-)
 create mode 100644 src/gpu/GrUniformAggregator.cpp
 create mode 100644 src/gpu/GrUniformAggregator.h

diff --git a/gn/gpu.gni b/gn/gpu.gni
index 152e69ccee..a14760e340 100644
--- a/gn/gpu.gni
+++ b/gn/gpu.gni
@@ -229,6 +229,8 @@ skia_gpu_sources = [
   "$_src/gpu/GrTracing.h",
   "$_src/gpu/GrTransferFromRenderTask.cpp",
   "$_src/gpu/GrTransferFromRenderTask.h",
+  "$_src/gpu/GrUniformAggregator.cpp",
+  "$_src/gpu/GrUniformAggregator.h",
   "$_src/gpu/GrUniformDataManager.cpp",
   "$_src/gpu/GrUniformDataManager.h",
   "$_src/gpu/GrUserStencilSettings.h",
diff --git a/include/private/GrTypesPriv.h b/include/private/GrTypesPriv.h
index 30bf996a44..0af6cc451e 100644
--- a/include/private/GrTypesPriv.h
+++ b/include/private/GrTypesPriv.h
@@ -284,7 +284,7 @@ static inline GrQuadAAFlags SkToGrQuadAAFlags(unsigned flags) {
 /**
  * Types of shader-language-specific boxed variables we can create.
  */
-enum GrSLType {
+enum GrSLType : unsigned {
     kVoid_GrSLType,
     kBool_GrSLType,
     kBool2_GrSLType,
@@ -560,6 +560,67 @@ static constexpr int GrSLTypeVecLength(GrSLType type) {
     SkUNREACHABLE;
 }
 
+/**
+ * Get dimensions of matrix types or {0, 0} if not a matrix. Note scalars and vectors are not
+ * considered matrices by this function.
+ */
+static constexpr SkISize GrSLTypeMatrixDims(GrSLType type) {
+    switch (type) {
+        case kFloat2x2_GrSLType: return {2, 2};
+        case kFloat3x3_GrSLType: return {3, 3};
+        case kFloat4x4_GrSLType: return {4, 4};
+        case kHalf2x2_GrSLType:  return {2, 2};
+        case kHalf3x3_GrSLType:  return {3, 3};
+        case kHalf4x4_GrSLType:  return {4, 4};
+
+        case kVoid_GrSLType:
+        case kFloat_GrSLType:
+        case kHalf_GrSLType:
+        case kBool_GrSLType:
+        case kByte_GrSLType:
+        case kUByte_GrSLType:
+        case kShort_GrSLType:
+        case kUShort_GrSLType:
+        case kInt_GrSLType:
+        case kUint_GrSLType:
+        case kFloat2_GrSLType:
+        case kHalf2_GrSLType:
+        case kBool2_GrSLType:
+        case kByte2_GrSLType:
+        case kUByte2_GrSLType:
+        case kShort2_GrSLType:
+        case kUShort2_GrSLType:
+        case kInt2_GrSLType:
+        case kUint2_GrSLType:
+        case kFloat3_GrSLType:
+        case kHalf3_GrSLType:
+        case kBool3_GrSLType:
+        case kByte3_GrSLType:
+        case kUByte3_GrSLType:
+        case kShort3_GrSLType:
+        case kUShort3_GrSLType:
+        case kInt3_GrSLType:
+        case kUint3_GrSLType:
+        case kFloat4_GrSLType:
+        case kHalf4_GrSLType:
+        case kBool4_GrSLType:
+        case kByte4_GrSLType:
+        case kUByte4_GrSLType:
+        case kShort4_GrSLType:
+        case kUShort4_GrSLType:
+        case kInt4_GrSLType:
+        case kUint4_GrSLType:
+        case kTexture2DSampler_GrSLType:
+        case kTextureExternalSampler_GrSLType:
+        case kTexture2DRectSampler_GrSLType:
+        case kTexture2D_GrSLType:
+        case kSampler_GrSLType:
+        case kInput_GrSLType:
+            return {0, 0};
+    }
+    SkUNREACHABLE;
+}
+
 static inline GrSLType GrSLCombinedSamplerTypeForTextureType(GrTextureType type) {
     switch (type) {
         case GrTextureType::k2D:
diff --git a/src/gpu/GrFragmentProcessor.h b/src/gpu/GrFragmentProcessor.h
index 89dbb839f0..b11cb255a7 100644
--- a/src/gpu/GrFragmentProcessor.h
+++ b/src/gpu/GrFragmentProcessor.h
@@ -11,6 +11,7 @@
 #include "include/private/SkSLSampleUsage.h"
 #include "include/private/SkSLString.h"
 #include "src/gpu/GrProcessor.h"
+#include "src/gpu/GrUniformAggregator.h"
 #include "src/gpu/glsl/GrGLSLUniformHandler.h"
 
 #include <tuple>
@@ -372,7 +373,7 @@ protected:
     }
 
     explicit GrFragmentProcessor(const GrFragmentProcessor& src)
-            : INHERITED(src.classID()), fFlags(src.fFlags) {
+            : INHERITED(src), fFlags(src.fFlags) {
         this->cloneAndRegisterAllChildProcessors(src);
     }
 
@@ -506,6 +507,7 @@ public:
         stages.
 
         @param fragBuilder       Interface used to emit code in the shaders.
+        @param uniforms          Used to get names of uniforms added by GrProcessor::uniforms().
         @param uniformHandler    Interface used for accessing information about our uniforms
         @param caps              The capabilities of the GPU which will render this FP
         @param fp                The processor that generated this program stage.
@@ -523,6 +525,7 @@ public:
      */
     struct EmitArgs {
         EmitArgs(GrGLSLFPFragmentBuilder* fragBuilder,
+                 GrUniformAggregator::ProcessorUniforms uniforms,
                  GrGLSLUniformHandler* uniformHandler,
                  const GrShaderCaps* caps,
                  const GrFragmentProcessor& fp,
@@ -530,6 +533,7 @@ public:
                  const char* destColor,
                  const char* sampleCoord)
                 : fFragBuilder(fragBuilder)
+                , fUniforms(std::move(uniforms))
                 , fUniformHandler(uniformHandler)
                 , fShaderCaps(caps)
                 , fFp(fp)
@@ -537,6 +541,7 @@ public:
                 , fDestColor(destColor)
                 , fSampleCoord(sampleCoord) {}
         GrGLSLFPFragmentBuilder* fFragBuilder;
+        GrUniformAggregator::ProcessorUniforms fUniforms;
         GrGLSLUniformHandler* fUniformHandler;
         const GrShaderCaps* fShaderCaps;
         const GrFragmentProcessor& fFp;
diff --git a/src/gpu/GrGeometryProcessor.h b/src/gpu/GrGeometryProcessor.h
index 31667b4b1a..19644163a3 100644
--- a/src/gpu/GrGeometryProcessor.h
+++ b/src/gpu/GrGeometryProcessor.h
@@ -14,6 +14,7 @@
 #include "src/gpu/GrShaderCaps.h"
 #include "src/gpu/GrShaderVar.h"
 #include "src/gpu/GrSwizzle.h"
+#include "src/gpu/GrUniformAggregator.h"
 #include "src/gpu/glsl/GrGLSLProgramDataManager.h"
 #include "src/gpu/glsl/GrGLSLUniformHandler.h"
 #include "src/gpu/glsl/GrGLSLVarying.h"
@@ -209,8 +210,8 @@ public:
             for (int i = 0; i < attrCount; ++i) {
                 const Attribute& attr = attrs[i];
                 b->appendComment(attr.isInitialized() ? attr.name() : "unusedAttr");
-                b->addBits(8, attr.isInitialized() ? attr.cpuType() : 0xff, "attrType");
-                b->addBits(8, attr.isInitialized() ? attr.gpuType() : 0xff, "attrGpuType");
+                b->addBits(8, attr.isInitialized() ? (int)attr.cpuType() : 0xff, "attrType");
+                b->addBits(8, attr.isInitialized() ? (int)attr.gpuType() : 0xff, "attrGpuType");
             }
         };
         b->add32(fVertexAttributes.fRawCount, "numVertexAttributes");
@@ -261,6 +262,8 @@ protected:
     inline static const TextureSampler& IthTextureSampler(int i);
 
 private:
+    GrGeometryProcessor(const GrGeometryProcessor&) = delete;
+
     virtual const TextureSampler& onTextureSampler(int) const { return IthTextureSampler(0); }
 
     GrShaderFlags fShaders = kVertex_GrShaderFlag | kFragment_GrShaderFlag;
@@ -292,6 +295,7 @@ public:
         EmitArgs(GrGLSLVertexBuilder* vertBuilder,
                  GrGLSLFPFragmentBuilder* fragBuilder,
                  GrGLSLVaryingHandler* varyingHandler,
+                 GrUniformAggregator::ProcessorUniforms uniforms,
                  GrGLSLUniformHandler* uniformHandler,
                  const GrShaderCaps* caps,
                  const GrGeometryProcessor& geomProc,
@@ -301,6 +305,7 @@ public:
                 : fVertBuilder(vertBuilder)
                 , fFragBuilder(fragBuilder)
                 , fVaryingHandler(varyingHandler)
+                , fUniforms(std::move(uniforms))
                 , fUniformHandler(uniformHandler)
                 , fShaderCaps(caps)
                 , fGeomProc(geomProc)
@@ -310,6 +315,7 @@ public:
         GrGLSLVertexBuilder* fVertBuilder;
         GrGLSLFPFragmentBuilder* fFragBuilder;
         GrGLSLVaryingHandler* fVaryingHandler;
+        GrUniformAggregator::ProcessorUniforms fUniforms;
         GrGLSLUniformHandler* fUniformHandler;
         const GrShaderCaps* fShaderCaps;
         const GrGeometryProcessor& fGeomProc;
diff --git a/src/gpu/GrOpFlushState.cpp b/src/gpu/GrOpFlushState.cpp
index ee1a7eb43f..2cc34123b4 100644
--- a/src/gpu/GrOpFlushState.cpp
+++ b/src/gpu/GrOpFlushState.cpp
@@ -12,6 +12,7 @@
 #include "src/gpu/GrDataUtils.h"
 #include "src/gpu/GrDirectContextPriv.h"
 #include "src/gpu/GrDrawOpAtlas.h"
+#include "src/gpu/GrGeometryProcessor.h"
 #include "src/gpu/GrGpu.h"
 #include "src/gpu/GrImageInfo.h"
 #include "src/gpu/GrProgramInfo.h"
diff --git a/src/gpu/GrProcessor.h b/src/gpu/GrProcessor.h
index 104537bd4d..f71f2b274f 100644
--- a/src/gpu/GrProcessor.h
+++ b/src/gpu/GrProcessor.h
@@ -9,6 +9,7 @@
 #define GrProcessor_DEFINED
 
 #include "include/core/SkMath.h"
+#include "include/core/SkSpan.h"
 #include "include/core/SkString.h"
 #include "src/gpu/GrColor.h"
 #include "src/gpu/GrGpuBuffer.h"
@@ -144,12 +145,139 @@ public:
 
     ClassID classID() const { return fClassID; }
 
+    /**
+     * Describes a uniform. Uniforms consist of:
+     *  type:       The type of the values in the shader
+     *  count:      Number of elements of 'type' in the array or GrShaderVar::kNonArray if not an
+     *              array.
+     *  offset:     byte offset of the data within the GrProcessor class (no relation to uniform
+     *              buffer offset).
+     *  ctype:      specifies the way the data at the 'offset' is represented. See CType enum
+     *              comments.
+     *  visibility: specifies in which shader stage(s) the uniform is declared.
+     */
+    class Uniform {
+    public:
+        enum class CType : unsigned {
+            // Any float/half, vector of floats/half, or matrices of floats/halfs are a tightly
+            // packed array of floats. Similarly, any bool/shorts/ints are a tightly packed array
+            // of int32_t.
+            kDefault,
+            // Can be used with kFloat3x3 or kHalf3x3
+            kSkMatrix,
+
+            kLast = kSkMatrix
+        };
+        static constexpr int kCTypeCount = static_cast<int>(CType::kLast) + 1;
+
+        constexpr Uniform()
+            : fType      (static_cast<unsigned>(kVoid_GrSLType))
+            , fCount     (static_cast<unsigned>(GrShaderVar::kNonArray))
+            , fVisibility(static_cast<unsigned>(GrShaderFlags::kNone_GrShaderFlags))
+            , fCType     (static_cast<unsigned>(CType::kDefault))
+            , fOffset    (0) {}
+
+        constexpr Uniform(GrSLType      type,
+                          ptrdiff_t     offset,
+                          GrShaderFlags visibility = kFragment_GrShaderFlag,
+                          CType         ctype      = CType::kDefault)
+                : Uniform(type, GrShaderVar::kNonArray, offset, visibility, ctype) {}
+
+        constexpr Uniform(GrSLType      type,
+                          int           arrayCount,
+                          size_t        offset,
+                          GrShaderFlags visibility = kFragment_GrShaderFlag,
+                          CType         ctype      = CType::kDefault)
+                : fType      (static_cast<unsigned>(type      ))
+                , fCount     (static_cast<unsigned>(arrayCount))
+                , fVisibility(static_cast<unsigned>(visibility))
+                , fCType     (static_cast<unsigned>(ctype     ))
+                , fOffset    (static_cast<unsigned>(offset    )) {
+            SkASSERT(CTypeCompatibleWithType(ctype, type));
+
+            SkASSERT(this->type()       == type      );
+            SkASSERT(this->count()      == arrayCount);
+            SkASSERT(this->offset()     == offset    );
+            SkASSERT(this->visibility() == visibility);
+            SkASSERT(this->ctype()      == ctype     );
+        }
+
+        constexpr Uniform(const Uniform&) = default;
+
+        Uniform& operator=(const Uniform&) = default;
+
+        constexpr bool isInitialized() const { return this->type() != kVoid_GrSLType; }
+
+        constexpr GrSLType      type      () const { return static_cast<GrSLType>     (fType);   }
+        constexpr int           count     () const { return static_cast<int>          (fCount);  }
+        constexpr CType         ctype     () const { return static_cast<CType>        (fCType);  }
+        constexpr size_t        offset    () const { return static_cast<GrShaderFlags>(fOffset); }
+        constexpr GrShaderFlags visibility() const {
+            return static_cast<GrShaderFlags>(fVisibility);
+        }
+
+        static constexpr bool CTypeCompatibleWithType(CType, GrSLType);
+
+    private:
+        unsigned    fType       : 6;
+        unsigned    fCount      : 8;
+        unsigned    fVisibility : 4;
+        unsigned    fCType      : 1;
+        unsigned    fOffset     : 32 - (6 + 8 + 4 + 1);
+
+        static_assert(kGrSLTypeCount <= (1 << 6));
+        static_assert(kCTypeCount    <= (1 << 1));
+    };
+
+    /** Returns the array of uniforms inserted into the program by this processor. */
+    SkSpan<const Uniform> uniforms() const { return fUniforms; }
+
+    template <typename T = void> const T* uniformData(size_t index) const {
+        SkASSERT(fUniforms[index].isInitialized());
+        return SkTAddOffset<const T>(this, fUniforms[index].offset());
+    }
+
 protected:
     GrProcessor(ClassID classID) : fClassID(classID) {}
-    GrProcessor(const GrProcessor&) = delete;
+    GrProcessor(const GrProcessor&) = default;
     GrProcessor& operator=(const GrProcessor&) = delete;
 
+    /**
+     * Specifies the uniforms used by this processor. Should be called when the processor is made
+     * (i.e. constructor or factory function). Any uniforms with type void are ignored. This allows
+     * a processor to have a contiguous array of data member uniforms where some are conditionally
+     * initialized.
+     */
+    void setUniforms(SkSpan<const Uniform> uniforms) { fUniforms = uniforms; }
+
     const ClassID fClassID;
+
+private:
+    SkSpan<const Uniform> fUniforms;
 };
 
+constexpr bool GrProcessor::Uniform::CTypeCompatibleWithType(CType ctype, GrSLType type) {
+    switch (ctype) {
+        case CType::kDefault:
+            return true;
+        case CType::kSkMatrix:
+            return type == kHalf3x3_GrSLType || type == kFloat3x3_GrSLType;
+    }
+    SkUNREACHABLE;
+}
+
+/**
+ * GCC, and clang sometimes but less often for reason, warns if offset_of or__builtin_offsetof is
+ * used on non-standard layout classes. This is because it is not required to be supported by the
+ * compiler (conditionally supported by c++17). clang, GCC, and MSVC all support it, however.
+ */
+#if defined(__GNUC__) || defined(__clang__)
+#   define GR_BEGIN_UNIFORM_DEFINITIONS _Pragma("GCC diagnostic push") \
+                                        _Pragma("GCC diagnostic ignored \"-Winvalid-offsetof\"")
+#   define GR_END_UNIFORM_DEFINITIONS   _Pragma("GCC diagnostic pop")
+#else
+#   define GR_BEGIN_UNIFORM_DEFINITIONS
+#   define GR_END_UNIFORM_DEFINITIONS
+#endif
+
 #endif
diff --git a/src/gpu/GrProgramInfo.cpp b/src/gpu/GrProgramInfo.cpp
index 091d1133ee..de394aab20 100644
--- a/src/gpu/GrProgramInfo.cpp
+++ b/src/gpu/GrProgramInfo.cpp
@@ -20,6 +20,25 @@ GrStencilSettings GrProgramInfo::nonGLStencilSettings() const {
     return stencil;
 }
 
+static void visit_fp_tree(const GrFragmentProcessor& fp,
+                          const std::function<void(const GrProcessor&)>& f) {
+    f(fp);
+    for (int i = 0; i < fp.numChildProcessors(); ++i) {
+        if (const GrFragmentProcessor* child = fp.childProcessor(i)) {
+            visit_fp_tree(*child, f);
+        }
+    }
+}
+
+void GrProgramInfo::visitProcessors(const std::function<void(const GrProcessor&)>& f) const {
+    f(*fGeomProc);
+
+    for (int i = 0; i < fPipeline->numFragmentProcessors(); ++i) {
+        visit_fp_tree(fPipeline->getFragmentProcessor(i), f);
+    }
+    f(fPipeline->getXferProcessor());
+}
+
 #ifdef SK_DEBUG
 #include "src/gpu/GrTexture.h"
 
diff --git a/src/gpu/GrProgramInfo.h b/src/gpu/GrProgramInfo.h
index 9f1f602b56..29d97d5545 100644
--- a/src/gpu/GrProgramInfo.h
+++ b/src/gpu/GrProgramInfo.h
@@ -93,6 +93,9 @@ public:
     // to call the visitor on its own primProc proxies.
     void visitFPProxies(const GrVisitProxyFunc& func) const { fPipeline->visitProxies(func); }
 
+    /** Visits the GP, then each root FP in a pre-order traversal, and finally the XP. */
+    void visitProcessors(const std::function<void(const GrProcessor&)>&) const;
+
 #ifdef SK_DEBUG
     void validate(bool flushTime) const;
     void checkAllInstantiated() const;
diff --git a/src/gpu/GrSPIRVUniformHandler.cpp b/src/gpu/GrSPIRVUniformHandler.cpp
index f073496c71..90505e6e3a 100644
--- a/src/gpu/GrSPIRVUniformHandler.cpp
+++ b/src/gpu/GrSPIRVUniformHandler.cpp
@@ -293,7 +293,37 @@ GrSwizzle GrSPIRVUniformHandler::samplerSwizzle(GrGLSLUniformHandler::SamplerHan
     return fSamplerSwizzles[handle.toIndex()];
 }
 
-void GrSPIRVUniformHandler::appendUniformDecls(GrShaderFlags visibility, SkString* out) const {
+GrUniformDataManager::ProgramUniforms GrSPIRVUniformHandler::getNewProgramUniforms(
+        const GrUniformAggregator& aggregator) {
+    GrUniformDataManager::ProgramUniforms result;
+    result.reserve(aggregator.numProcessors());
+    for (int p = 0; p < aggregator.numProcessors(); ++p) {
+        GrUniformDataManager::ProcessorUniforms uniforms;
+        auto records = aggregator.processorRecords(p);
+        uniforms.reserve(records.size());
+        for (const GrUniformAggregator::Record& record : records) {
+            const GrProcessor::Uniform& u = record.uniform();
+            uint32_t offset = get_ubo_offset(&fCurrentUBOOffset, u.type(), u.count());
+            uniforms.push_back({record.indexInProcessor, u.type(), u.count(), offset});
+
+            // Add to fNewUniforms so that these get declared.
+            SPIRVUniformInfo& info = fNewUniforms.push_back();
+            GrShaderVar var(record.name, u.type(), u.count());
+            SkString qualifier = SkStringPrintf("offset = %d", offset);
+            var.addLayoutQualifier(qualifier.c_str());
+            info.fUBOOffset  = offset;
+            info.fVariable   = var;
+            info.fVisibility = u.visibility();
+            info.fOwner      = nullptr;
+        }
+        result.push_back(std::move(uniforms));
+    }
+    return result;
+}
+
+void GrSPIRVUniformHandler::appendUniformDecls(const GrUniformAggregator&,
+                                               GrShaderFlags visibility,
+                                               SkString* out) const {
     auto textures = fTextures.items().begin();
     for (const SPIRVUniformInfo& sampler : fSamplers.items()) {
         if (sampler.fVisibility & visibility) {
@@ -311,6 +341,12 @@ void GrSPIRVUniformHandler::appendUniformDecls(GrShaderFlags visibility, SkStrin
             uniformsString.append(";\n");
         }
     }
+    for (const UniformInfo& uniform : fNewUniforms.items()) {
+        if (uniform.fVisibility & visibility) {
+            uniform.fVariable.appendDecl(fProgramBuilder->shaderCaps(), &uniformsString);
+            uniformsString.append(";\n");
+        }
+    }
     if (!uniformsString.isEmpty()) {
         out->appendf("layout (set = %d, binding = %d) uniform UniformBuffer\n{\n",
                      kUniformDescriptorSet, kUniformBinding);
diff --git a/src/gpu/GrSPIRVUniformHandler.h b/src/gpu/GrSPIRVUniformHandler.h
index 5cb1aaa911..44475c5e1e 100644
--- a/src/gpu/GrSPIRVUniformHandler.h
+++ b/src/gpu/GrSPIRVUniformHandler.h
@@ -9,6 +9,7 @@
 #define GrSPIRVUniformHandler_DEFINED
 
 #include "src/core/SkTBlockList.h"
+#include "src/gpu/GrUniformDataManager.h"
 #include "src/gpu/glsl/GrGLSLUniformHandler.h"
 
 /*
@@ -46,6 +47,14 @@ public:
         return fUniforms.item(idx);
     }
 
+    /**
+     * Call after all legacy style uniforms have been added to assign offsets to new style uniforms
+     * and create the data structure needed to transfer new style uniforms to GrUniformDataManager.
+     * This must be called before appendUniformDecls() in order to ensure new style uniforms get
+     * declared. It must be called only once.
+     */
+    GrUniformDataManager::ProgramUniforms getNewProgramUniforms(const GrUniformAggregator&);
+
 private:
     explicit GrSPIRVUniformHandler(GrGLSLProgramBuilder* program);
 
@@ -53,7 +62,9 @@ private:
                              const char* name, const GrShaderCaps*) override;
     const char* samplerVariable(SamplerHandle handle) const override;
     GrSwizzle samplerSwizzle(SamplerHandle handle) const override;
-    void appendUniformDecls(GrShaderFlags visibility, SkString*) const override;
+    void appendUniformDecls(const GrUniformAggregator&,
+                            GrShaderFlags visibility,
+                            SkString*) const override;
     UniformHandle internalAddUniformArray(const GrFragmentProcessor* owner,
                                           uint32_t visibility,
                                           GrSLType type,
@@ -63,6 +74,7 @@ private:
                                           const char** outName) override;
 
     UniformInfoArray    fUniforms;
+    UniformInfoArray    fNewUniforms;
     UniformInfoArray    fSamplers;
     UniformInfoArray    fTextures;
     SkTArray<GrSwizzle> fSamplerSwizzles;
diff --git a/src/gpu/GrUniformAggregator.cpp b/src/gpu/GrUniformAggregator.cpp
new file mode 100644
index 0000000000..8e2d6bf971
--- /dev/null
+++ b/src/gpu/GrUniformAggregator.cpp
@@ -0,0 +1,63 @@
+/*
+ * Copyright 2021 Google LLC
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "src/gpu/GrUniformAggregator.h"
+
+using ProcessorUniforms = GrUniformAggregator::ProcessorUniforms;
+
+ProcessorUniforms GrUniformAggregator::addUniforms(const GrProcessor& p,
+                                                   const SkString& mangleSuffix) {
+    Processor processor{
+            &p,
+            fUniforms.size(),
+            fUniforms.size(),
+    };
+    for (size_t i = 0; i < p.uniforms().size(); ++i) {
+        if (!p.uniforms()[i].isInitialized()) {
+            continue;
+        }
+        // We give every uniform an initial name so it always can be validly declared. When code is
+        // emitted the processor can give it a more meaningful name. The actual name doesn't matter,
+        // other than for readability.
+        SkString unusedName = SkStringPrintf("default_%zu%s", i, mangleSuffix.c_str());
+        fUniforms.push_back(Record{std::move(unusedName), &p, i});
+        ++processor.end;
+    }
+    fProcessors.push_back(processor);
+    return ProcessorUniforms(p, mangleSuffix, this);
+}
+
+//////////////////////////////////////////////////////////////////////////////
+
+ProcessorUniforms::ProcessorUniforms(const GrProcessor& p,
+                                     const SkString& mangleSuffix,
+                                     GrUniformAggregator* aggregator)
+        : fAgg(aggregator), fMangleSuffix(mangleSuffix) {
+    for (size_t i = 0; i < fAgg->fProcessors.size(); ++i) {
+        if (fAgg->fProcessors[i].processor == &p) {
+            fBegin = fAgg->fProcessors[i].begin;
+            fEnd   = fAgg->fProcessors[i].end;
+            return;
+        }
+    }
+}
+
+const char* ProcessorUniforms::getUniformName(size_t index, const char* newBaseName) const {
+    for (size_t i = fBegin; i < fEnd; ++i) {
+        if (fAgg->fUniforms[i].indexInProcessor == index) {
+            GrUniformAggregator::Record& r = fAgg->fUniforms[i];
+            if (newBaseName) {
+                SkString mangledName = SkStringPrintf("%s%s", newBaseName, fMangleSuffix.c_str());
+                r.name = mangledName;
+            }
+            return r.name.c_str();
+        } else if (fAgg->fUniforms[i].indexInProcessor > index) {
+            break;
+        }
+    }
+    return nullptr;
+}
diff --git a/src/gpu/GrUniformAggregator.h b/src/gpu/GrUniformAggregator.h
new file mode 100644
index 0000000000..af4f7fd8b7
--- /dev/null
+++ b/src/gpu/GrUniformAggregator.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright 2021 Google LLC
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef GrUniformAggregator_DEFINED
+#define GrUniformAggregator_DEFINED
+
+#include "include/core/SkString.h"
+#include "include/private/SkChecksum.h"
+#include "src/gpu/GrProcessor.h"
+
+#include <vector>
+
+/** Collects the uniforms from various processors comprising the shaders of a pipeline/program. */
+class GrUniformAggregator {
+public:
+    class ProcessorUniforms {
+    public:
+        ProcessorUniforms(ProcessorUniforms&&) = default;
+        ProcessorUniforms& operator=(ProcessorUniforms&&) = default;
+
+        /**
+         * Finds a uniform name by index. The uniform initially has a generic name. It can
+         * optionally be given a descriptive name via the newBaseName param. However, the caller
+         * must use the returned name because even if a name is passed the final uniform name will
+         * be mangled to be unique.
+         */
+        const char* getUniformName(size_t index, const char* newBaseName = nullptr) const;
+
+    private:
+        ProcessorUniforms(const GrProcessor& p,
+                          const SkString& mangleSuffix,
+                          GrUniformAggregator* aggregator);
+
+        ProcessorUniforms(const ProcessorUniforms&) = delete;
+        ProcessorUniforms& operator=(const ProcessorUniforms&) = delete;
+
+        GrUniformAggregator* fAgg;
+
+        SkString fMangleSuffix;
+
+        size_t fBegin = 0;
+        size_t fEnd = 0;
+
+        friend class GrUniformAggregator;
+    };
+
+    struct Record {
+        SkString           name;
+        const GrProcessor* processor        = nullptr;
+        size_t             indexInProcessor = -1;
+
+        const GrProcessor::Uniform& uniform() const {
+            return processor->uniforms()[indexInProcessor];
+        }
+    };
+
+    GrUniformAggregator() = default;
+
+    /**
+     * Aggregates the uniforms for a processor. This must be called for all processors in a
+     * program and must be called in this order: GP, FP0-T, FP1-T, ... XP. FPi-T is a pre-order
+     * traversal of the ith FP in the GrPipeline.
+     */
+    ProcessorUniforms addUniforms(const GrProcessor&, const SkString& mangleSuffix);
+
+    /**
+     * Iterable range of all uniform Records across all processors added.
+     */
+    SkSpan<const Record> records() const {
+        return SkSpan<const Record>(fUniforms.data(), fUniforms.size());
+    }
+
+    /**
+     * Iterable range of Records for a given processor index.
+     */
+    SkSpan<const Record> processorRecords(size_t processorIndex) const {
+        SkASSERT(processorIndex < fProcessors.size());
+        size_t size = fProcessors[processorIndex].end - fProcessors[processorIndex].begin;
+        return SkSpan<const Record>(fUniforms.data() + fProcessors[processorIndex].begin, size);
+    }
+
+    int uniformCount() const { return fUniforms.size(); }
+
+    /**
+     * The number of processors whose uniforms have been added to the aggregator, including
+     * processors that had no valid uniforms.
+     */
+    int numProcessors() const { return fProcessors.size(); }
+
+private:
+    struct Processor {
+        const GrProcessor* processor;
+        size_t             begin; // index of first uniform owned by processor in fUniforms.
+        size_t             end;   // index of last uniform + 1 owned by processor in fUniforms.
+    };
+    std::vector<Processor> fProcessors;
+    using Records = std::vector<Record>;
+
+    Records fUniforms;
+};
+
+#endif
diff --git a/src/gpu/GrUniformDataManager.cpp b/src/gpu/GrUniformDataManager.cpp
index 69842d0852..859ff1dbda 100644
--- a/src/gpu/GrUniformDataManager.cpp
+++ b/src/gpu/GrUniformDataManager.cpp
@@ -7,18 +7,295 @@
 
 #include "src/gpu/GrUniformDataManager.h"
 
+#include "src/gpu/GrProgramInfo.h"
 #include "src/gpu/GrShaderVar.h"
 
 // ensure that these types are the sizes the uniform data is expecting
 static_assert(sizeof(int32_t) == 4);
 static_assert(sizeof(float) == 4);
 
-GrUniformDataManager::GrUniformDataManager(uint32_t uniformCount, uint32_t uniformSize)
-    : fUniformSize(uniformSize)
-    , fUniformsDirty(false) {
+//////////////////////////////////////////////////////////////////////////////
+
+GrUniformDataManager::UniformManager::UniformManager(ProgramUniforms uniforms, Layout layout)
+        : fUniforms(std::move(uniforms)), fLayout(layout) {}
+
+template <typename BaseType> static constexpr size_t tight_vec_size(int vecLength) {
+    return sizeof(BaseType) * vecLength;
+}
+
+/**
+ * From Section 7.6.2.2 "Standard Uniform Block Layout":
+ *  1. If the member is a scalar consuming N basic machine units, the base alignment is N.
+ *  2. If the member is a two- or four-component vector with components consuming N basic machine
+ *     units, the base alignment is 2N or 4N, respectively.
+ *  3. If the member is a three-component vector with components consuming N
+ *     basic machine units, the base alignment is 4N.
+ *  4. If the member is an array of scalars or vectors, the base alignment and array
+ *     stride are set to match the base alignment of a single array element, according
+ *     to rules (1), (2), and (3), and rounded up to the base alignment of a vec4. The
+ *     array may have padding at the end; the base offset of the member following
+ *     the array is rounded up to the next multiple of the base alignment.
+ *  5. If the member is a column-major matrix with C columns and R rows, the
+ *     matrix is stored identically to an array of C column vectors with R components each,
+ *     according to rule (4).
+ *  6. If the member is an array of S column-major matrices with C columns and
+ *     R rows, the matrix is stored identically to a row of S × C column vectors
+ *     with R components each, according to rule (4).
+ *  7. If the member is a row-major matrix with C columns and R rows, the matrix
+ *     is stored identically to an array of R row vectors with C components each,
+ *     according to rule (4).
+ *  8. If the member is an array of S row-major matrices with C columns and R
+ *     rows, the matrix is stored identically to a row of S × R row vectors with C
+ *    components each, according to rule (4).
+ *  9. If the member is a structure, the base alignment of the structure is N, where
+ *     N is the largest base alignment value of any of its members, and rounded
+ *     up to the base alignment of a vec4. The individual members of this substructure are then
+ *     assigned offsets by applying this set of rules recursively,
+ *     where the base offset of the first member of the sub-structure is equal to the
+ *     aligned offset of the structure. The structure may have padding at the end;
+ *     the base offset of the member following the sub-structure is rounded up to
+ *     the next multiple of the base alignment of the structure.
+ * 10. If the member is an array of S structures, the S elements of the array are laid
+ *     out in order, according to rule (9).
+ */
+template <typename BaseType, int RowsOrVecLength = 1, int Cols = 1>
+struct Rules140 {
+    /**
+     * For an array of scalars or vectors this returns the stride between array elements. For
+     * matrices or arrays of matrices this returns the stride between columns of the matrix. Note
+     * that for single (non-array) scalars or vectors we don't require a stride.
+     */
+    static constexpr size_t Stride(int count) {
+        SkASSERT(count >= 1 || count == GrShaderVar::kNonArray);
+        static_assert(RowsOrVecLength >= 1 && RowsOrVecLength <= 4);
+        static_assert(Cols >= 1 && Cols <= 4);
+        if (Cols != 1) {
+            // This is a matrix or array of matrices. We return the stride between columns.
+            SkASSERT(RowsOrVecLength > 1);
+            return Rules140<BaseType, RowsOrVecLength>::Stride(1);
+        }
+        if (count == 0) {
+            // Stride doesn't matter for a non-array.
+            return 0;
+        }
+
+        // Rule 4.
+
+        // Alignment of vec4 by Rule 2.
+        constexpr size_t kVec4Alignment = tight_vec_size<float>(4);
+        // Get alignment of a single vector of BaseType by Rule 1, 2, or 3
+        int n = RowsOrVecLength == 3 ? 4 : RowsOrVecLength;
+        size_t kElementAlignment = tight_vec_size<BaseType>(n);
+        // Round kElementAlignment up to multiple of kVec4Alignment.
+        size_t m = (kElementAlignment + kVec4Alignment - 1)/kVec4Alignment;
+        return m*kVec4Alignment;
+    }
+};
+
+/**
+ * When using the std430 storage layout, shader storage blocks will be laid out in buffer storage
+ * identically to uniform and shader storage blocks using the std140 layout, except that the base
+ * alignment and stride of arrays of scalars and vectors in rule 4 and of structures in rule 9 are
+ * not rounded up a multiple of the base alignment of a vec4.
+ */
+template <typename BaseType, int RowsOrVecLength = 1, int Cols = 1>
+struct Rules430 {
+    static constexpr size_t Stride(int count) {
+        SkASSERT(count >= 1 || count == GrShaderVar::kNonArray);
+        static_assert(RowsOrVecLength >= 1 && RowsOrVecLength <= 4);
+        static_assert(Cols >= 1 && Cols <= 4);
+
+        if (Cols != 1) {
+            // This is a matrix or array of matrices. We return the stride between columns.
+            SkASSERT(RowsOrVecLength > 1);
+            return Rules430<BaseType, RowsOrVecLength>::Stride(1);
+        }
+        if (count == 0) {
+            // Stride doesn't matter for a non-array.
+            return 0;
+        }
+        // Rule 4 without the round up to a multiple of align-of vec4.
+        return tight_vec_size<BaseType>(RowsOrVecLength == 3 ? 4 : RowsOrVecLength);
+    }
+};
+
+// The strides used here were derived from the rules we've imposed on ourselves in
+// GrMtlPipelineStateDataManger. Everything is tight except 3-component which have the stride of
+// their 4-component equivalents.
+template <typename BaseType, int RowsOrVecLength = 1, int Cols = 1>
+struct RulesMetal {
+    static constexpr size_t Stride(int count) {
+        SkASSERT(count >= 1 || count == GrShaderVar::kNonArray);
+        static_assert(RowsOrVecLength >= 1 && RowsOrVecLength <= 4);
+        static_assert(Cols >= 1 && Cols <= 4);
+        if (Cols != 1) {
+            // This is a matrix or array of matrices. We return the stride between columns.
+            SkASSERT(RowsOrVecLength > 1);
+            return RulesMetal<BaseType, RowsOrVecLength>::Stride(1);
+        }
+        if (count == 0) {
+            // Stride doesn't matter for a non-array.
+            return 0;
+        }
+        return tight_vec_size<BaseType>(RowsOrVecLength == 3 ? 4 : RowsOrVecLength);
+    }
+};
+
+template <template <typename BaseType, int RowsOrVecLength, int Cols> class Rules>
+class Writer {
+private:
+    using CType = GrProcessor::Uniform::CType;
+
+    template<typename BaseType, int RowsOrVecLength = 1, int Cols = 1>
+    static void Write(void* dst, int n, const BaseType v[]) {
+        if (dst) {
+            size_t stride = Rules<BaseType, RowsOrVecLength, Cols>::Stride(n);
+            n = (n == GrShaderVar::kNonArray) ? 1 : n;
+            n *= Cols;
+            if (stride == RowsOrVecLength*sizeof(BaseType)) {
+                std::memcpy(dst, v, n*stride);
+            } else {
+                for (int i = 0; i < n; ++i) {
+                    std::memcpy(dst, v, RowsOrVecLength*sizeof(BaseType));
+                    v += RowsOrVecLength;
+                    dst = SkTAddOffset<void>(dst, stride);
+                }
+            }
+        }
+    }
+
+    static void WriteSkMatrices(void* d, int n, const SkMatrix m[]) {
+        size_t offset = 0;
+        for (int i = 0; i < std::max(n, 1); ++i) {
+            float mt[] = {
+                    m[i].get(SkMatrix::kMScaleX),
+                    m[i].get(SkMatrix::kMSkewY),
+                    m[i].get(SkMatrix::kMPersp0),
+                    m[i].get(SkMatrix::kMSkewX),
+                    m[i].get(SkMatrix::kMScaleY),
+                    m[i].get(SkMatrix::kMPersp1),
+                    m[i].get(SkMatrix::kMTransX),
+                    m[i].get(SkMatrix::kMTransY),
+                    m[i].get(SkMatrix::kMPersp2),
+            };
+            Write<float, 3, 3>(SkTAddOffset<void>(d, offset), 1, mt);
+            // Stride() will give us the stride of each column, so mul by 3 to get matrix stride.
+            offset += 3*Rules<float, 3, 3>::Stride(1);
+        }
+    }
+
+public:
+    static void WriteUniform(GrSLType type, CType ctype, void* d, int n, const void* v) {
+        SkASSERT(d);
+        SkASSERT(n >= 1 || n == GrShaderVar::kNonArray);
+        switch (type) {
+            case kInt_GrSLType:
+                return Write<int32_t>(d, n, static_cast<const int32_t*>(v));
+
+            case kInt2_GrSLType:
+                return Write<int32_t, 2>(d, n, static_cast<const int32_t*>(v));
+
+            case kInt3_GrSLType:
+                return Write<int32_t, 3>(d, n, static_cast<const int32_t*>(v));
+
+            case kInt4_GrSLType:
+                return Write<int32_t, 4>(d, n, static_cast<const int32_t*>(v));
+
+            case kHalf_GrSLType:
+            case kFloat_GrSLType:
+                return Write<float>(d, n, static_cast<const float*>(v));
+
+            case kHalf2_GrSLType:
+            case kFloat2_GrSLType:
+                return Write<float, 2>(d, n, static_cast<const float*>(v));
+
+            case kHalf3_GrSLType:
+            case kFloat3_GrSLType:
+                return Write<float, 3>(d, n, static_cast<const float*>(v));
+
+            case kHalf4_GrSLType:
+            case kFloat4_GrSLType:
+                return Write<float, 4>(d, n, static_cast<const float*>(v));
+
+            case kHalf2x2_GrSLType:
+            case kFloat2x2_GrSLType:
+                return Write<float, 2, 2>(d, n, static_cast<const float*>(v));
+
+            case kHalf3x3_GrSLType:
+            case kFloat3x3_GrSLType: {
+                switch (ctype) {
+                    case CType::kDefault:
+                        return Write<float, 3, 3>(d, n, static_cast<const float*>(v));
+                    case CType::kSkMatrix:
+                        return WriteSkMatrices(d, n, static_cast<const SkMatrix*>(v));
+                }
+                SkUNREACHABLE;
+            }
+
+            case kHalf4x4_GrSLType:
+            case kFloat4x4_GrSLType:
+                return Write<float, 4, 4>(d, n, static_cast<const float*>(v));
+
+            default:
+                SK_ABORT("Unexpect uniform type");
+        }
+    }
+};
+
+bool GrUniformDataManager::UniformManager::writeUniforms(const GrProgramInfo& info, void* buffer) {
+    decltype(&Writer<Rules140>::WriteUniform) write;
+    switch (fLayout) {
+        case Layout::kStd140:
+            write = Writer<Rules140>::WriteUniform;
+            break;
+        case Layout::kStd430:
+            write = Writer<Rules430>::WriteUniform;
+            break;
+        case Layout::kMetal:
+            write = Writer<RulesMetal>::WriteUniform;
+            break;
+    }
+
+    bool wrote = false;
+    auto set = [&, processorIndex = 0](const GrProcessor& p) mutable {
+        SkASSERT(buffer);
+        const ProcessorUniforms& uniforms = fUniforms[processorIndex];
+        for (const NewUniform& u : uniforms) {
+            if (u.type != kVoid_GrSLType) {
+                SkASSERT(u.count >= 0);
+                static_assert(GrShaderVar::kNonArray == 0);
+                void* d = SkTAddOffset<void>(buffer, u.offset);
+                size_t index = u.indexInProcessor;
+                const void* v = p.uniformData(index);
+                write(u.type, p.uniforms()[index].ctype(), d, u.count, v);
+                wrote = true;
+            }
+        }
+        ++processorIndex;
+    };
+
+    info.visitProcessors(set);
+    return wrote;
+}
+
+//////////////////////////////////////////////////////////////////////////////
+
+GrUniformDataManager::GrUniformDataManager(ProgramUniforms uniforms,
+                                           Layout layout,
+                                           uint32_t uniformCount,
+                                           uint32_t uniformSize)
+        : fUniformSize(uniformSize)
+        , fUniformsDirty(false)
+        , fUniformManager(std::move(uniforms), layout) {
     fUniformData.reset(uniformSize);
     fUniforms.push_back_n(uniformCount);
-    // subclasses fill in the uniforms in their constructor
+    // subclasses fill in the legacy uniforms in their constructor
+}
+
+void GrUniformDataManager::setUniforms(const GrProgramInfo& info) {
+    if (fUniformManager.writeUniforms(info, fUniformData.get())) {
+        this->markDirty();
+    }
 }
 
 void* GrUniformDataManager::getBufferPtrAndMarkDirty(const Uniform& uni) const {
diff --git a/src/gpu/GrUniformDataManager.h b/src/gpu/GrUniformDataManager.h
index b418960dde..9bb3539566 100644
--- a/src/gpu/GrUniformDataManager.h
+++ b/src/gpu/GrUniformDataManager.h
@@ -8,20 +8,41 @@
 #ifndef GrUniformDataManager_DEFINED
 #define GrUniformDataManager_DEFINED
 
-#include "src/gpu/glsl/GrGLSLProgramDataManager.h"
-
 #include "include/private/GrTypesPriv.h"
 #include "include/private/SkTArray.h"
 #include "src/core/SkAutoMalloc.h"
+#include "src/gpu/glsl/GrGLSLProgramDataManager.h"
+
+#include <vector>
+
+class GrProgramInfo;
 
 /**
  * Subclass of GrGLSLProgramDataManager used to store uniforms for a program in a CPU buffer that
- * can be uploaded to a UBO. This currently assumes uniform layouts that are compatible with
- * Vulkan, Dawn, and D3D12. It could be used more broadly if this aspect was made configurable.
+ * can be uploaded to a UBO.
  */
 class GrUniformDataManager : public GrGLSLProgramDataManager {
 public:
-    GrUniformDataManager(uint32_t uniformCount, uint32_t uniformSize);
+    enum class Layout {
+        kStd140,
+        kStd430,
+        kMetal, /** This is our own self-imposed layout we use for Metal. */
+    };
+
+    struct NewUniform {
+        size_t   indexInProcessor = ~0;
+        GrSLType type             = kVoid_GrSLType;
+        int      count            = 0;
+        uint32_t offset           = 0;
+    };
+
+    using ProcessorUniforms = std::vector<NewUniform>;
+    using ProgramUniforms   = std::vector<ProcessorUniforms>;
+
+    GrUniformDataManager(ProgramUniforms,
+                         Layout layout,
+                         uint32_t uniformCount,
+                         uint32_t uniformSize);
 
     void set1i(UniformHandle, int32_t) const override;
     void set1iv(UniformHandle, int arrayCount, const int32_t v[]) const override;
@@ -51,6 +72,8 @@ public:
     // For the uniform data to be dirty so that we will reupload on the next use.
     void markDirty() { fUniformsDirty = true; }
 
+    void setUniforms(const GrProgramInfo& info);
+
 protected:
     struct Uniform {
         uint32_t fOffset;
@@ -71,6 +94,19 @@ protected:
 
     mutable SkAutoMalloc fUniformData;
     mutable bool         fUniformsDirty;
+
+private:
+    class UniformManager {
+    public:
+        UniformManager(ProgramUniforms, Layout layout);
+        bool writeUniforms(const GrProgramInfo& info, void* buffer);
+
+    private:
+        ProgramUniforms fUniforms;
+        Layout          fLayout;
+    };
+
+    UniformManager fUniformManager;
 };
 
 #endif
diff --git a/src/gpu/GrXferProcessor.h b/src/gpu/GrXferProcessor.h
index 439895d4c7..36b32720d7 100644
--- a/src/gpu/GrXferProcessor.h
+++ b/src/gpu/GrXferProcessor.h
@@ -14,6 +14,7 @@
 #include "src/gpu/GrProcessor.h"
 #include "src/gpu/GrProcessorAnalysis.h"
 #include "src/gpu/GrSurfaceProxyView.h"
+#include "src/gpu/GrUniformAggregator.h"
 #include "src/gpu/glsl/GrGLSLUniformHandler.h"
 
 class GrGLSLXPFragmentBuilder;
@@ -144,6 +145,8 @@ protected:
     GrXferProcessor(ClassID classID, bool willReadDstColor, GrProcessorAnalysisCoverage);
 
 private:
+    GrXferProcessor(const GrXferProcessor&) = delete;
+
     /**
      * Adds a key on the GrProcessorKeyBuilder that reflects any variety in the code that may be
      * emitted by the xfer processor subclass.
@@ -287,6 +290,7 @@ public:
 
     struct EmitArgs {
         EmitArgs(GrGLSLXPFragmentBuilder* fragBuilder,
+                 GrUniformAggregator::ProcessorUniforms uniforms,
                  GrGLSLUniformHandler* uniformHandler,
                  const GrShaderCaps* caps,
                  const GrXferProcessor& xp,
@@ -298,6 +302,7 @@ public:
                  GrSurfaceOrigin dstTextureOrigin,
                  const GrSwizzle& writeSwizzle)
                 : fXPFragBuilder(fragBuilder)
+                , fUniforms(std::move(uniforms))
                 , fUniformHandler(uniformHandler)
                 , fShaderCaps(caps)
                 , fXP(xp)
@@ -309,6 +314,7 @@ public:
                 , fDstTextureOrigin(dstTextureOrigin)
                 , fWriteSwizzle(writeSwizzle) {}
         GrGLSLXPFragmentBuilder* fXPFragBuilder;
+        GrUniformAggregator::ProcessorUniforms fUniforms;
         GrGLSLUniformHandler* fUniformHandler;
         const GrShaderCaps* fShaderCaps;
         const GrXferProcessor& fXP;
diff --git a/src/gpu/d3d/GrD3DPipelineState.cpp b/src/gpu/d3d/GrD3DPipelineState.cpp
index 9db5ddf488..1412f2abbe 100644
--- a/src/gpu/d3d/GrD3DPipelineState.cpp
+++ b/src/gpu/d3d/GrD3DPipelineState.cpp
@@ -23,6 +23,7 @@
 GrD3DPipelineState::GrD3DPipelineState(
         sk_sp<GrD3DPipeline> pipeline,
         sk_sp<GrD3DRootSignature> rootSignature,
+        GrUniformDataManager::ProgramUniforms programUniforms,
         const GrGLSLBuiltinUniformHandles& builtinUniformHandles,
         const UniformInfoArray& uniforms,
         uint32_t uniformSize,
@@ -38,7 +39,7 @@ GrD3DPipelineState::GrD3DPipelineState(
         , fGPImpl(std::move(gpImpl))
         , fXPImpl(std::move(xpImpl))
         , fFPImpls(std::move(fpImpls))
-        , fDataManager(uniforms, uniformSize)
+        , fDataManager(std::move(programUniforms), uniforms, uniformSize)
         , fNumSamplers(numSamplers)
         , fVertexStride(vertexStride)
         , fInstanceStride(instanceStride) {}
@@ -46,6 +47,8 @@ GrD3DPipelineState::GrD3DPipelineState(
 void GrD3DPipelineState::setAndBindConstants(GrD3DGpu* gpu,
                                              const GrRenderTarget* renderTarget,
                                              const GrProgramInfo& programInfo) {
+    fDataManager.setUniforms(programInfo);
+
     this->setRenderTargetState(renderTarget, programInfo.origin());
 
     fGPImpl->setData(fDataManager, *gpu->caps()->shaderCaps(), programInfo.geomProc());
diff --git a/src/gpu/d3d/GrD3DPipelineState.h b/src/gpu/d3d/GrD3DPipelineState.h
index 9cd9ca51ff..0e3b0ffa10 100644
--- a/src/gpu/d3d/GrD3DPipelineState.h
+++ b/src/gpu/d3d/GrD3DPipelineState.h
@@ -12,6 +12,7 @@
 #include "include/gpu/GrTypes.h"
 #include "include/gpu/d3d/GrD3DTypes.h"
 #include "src/gpu/GrManagedResource.h"
+#include "src/gpu/GrUniformDataManager.h"
 #include "src/gpu/d3d/GrD3DPipelineStateDataManager.h"
 #include "src/gpu/glsl/GrGLSLProgramBuilder.h"
 
@@ -29,6 +30,7 @@ public:
 
     GrD3DPipelineState(sk_sp<GrD3DPipeline> pipeline,
                        sk_sp<GrD3DRootSignature> rootSignature,
+                       GrUniformDataManager::ProgramUniforms,
                        const GrGLSLBuiltinUniformHandles& builtinUniformHandles,
                        const UniformInfoArray& uniforms,
                        uint32_t uniformSize,
diff --git a/src/gpu/d3d/GrD3DPipelineStateBuilder.cpp b/src/gpu/d3d/GrD3DPipelineStateBuilder.cpp
index 738d36f394..ee0973553f 100644
--- a/src/gpu/d3d/GrD3DPipelineStateBuilder.cpp
+++ b/src/gpu/d3d/GrD3DPipelineStateBuilder.cpp
@@ -561,6 +561,8 @@ static constexpr SkFourByteTag kSKSL_Tag = SkSetFourByteTag('S', 'K', 'S', 'L');
 std::unique_ptr<GrD3DPipelineState> GrD3DPipelineStateBuilder::finalize() {
     TRACE_EVENT0("skia.shaders", TRACE_FUNC);
 
+    GrUniformDataManager::ProgramUniforms uniforms =
+            fUniformHandler.getNewProgramUniforms(fUniformAggregator);
     this->finalizeShaders();
 
     SkSL::Program::Settings settings;
@@ -655,6 +657,7 @@ std::unique_ptr<GrD3DPipelineState> GrD3DPipelineStateBuilder::finalize() {
     return std::unique_ptr<GrD3DPipelineState>(
             new GrD3DPipelineState(std::move(pipeline),
                                    std::move(rootSig),
+                                   std::move(uniforms),
                                    fUniformHandles,
                                    fUniformHandler.fUniforms,
                                    fUniformHandler.fCurrentUBOOffset,
diff --git a/src/gpu/d3d/GrD3DPipelineStateDataManager.cpp b/src/gpu/d3d/GrD3DPipelineStateDataManager.cpp
index a902bddc9a..ee6873db1f 100644
--- a/src/gpu/d3d/GrD3DPipelineStateDataManager.cpp
+++ b/src/gpu/d3d/GrD3DPipelineStateDataManager.cpp
@@ -10,9 +10,11 @@
 #include "src/gpu/d3d/GrD3DGpu.h"
 #include "src/gpu/d3d/GrD3DResourceProvider.h"
 
-GrD3DPipelineStateDataManager::GrD3DPipelineStateDataManager(const UniformInfoArray& uniforms,
-                                                             uint32_t uniformSize)
-    : INHERITED(uniforms.count(), uniformSize) {
+GrD3DPipelineStateDataManager::GrD3DPipelineStateDataManager(
+        GrUniformDataManager::ProgramUniforms programUniforms,
+        const UniformInfoArray& uniforms,
+        uint32_t uniformSize)
+        : INHERITED(std::move(programUniforms), Layout::kStd140, uniforms.count(), uniformSize) {
     // We must add uniforms in same order as the UniformInfoArray so that UniformHandles already
     // owned by other objects will still match up here.
     int i = 0;
diff --git a/src/gpu/d3d/GrD3DPipelineStateDataManager.h b/src/gpu/d3d/GrD3DPipelineStateDataManager.h
index 18e8278cd9..2e3fe80be8 100644
--- a/src/gpu/d3d/GrD3DPipelineStateDataManager.h
+++ b/src/gpu/d3d/GrD3DPipelineStateDataManager.h
@@ -8,10 +8,9 @@
 #ifndef GrD3DPipelineStateDataManager_DEFINED
 #define GrD3DPipelineStateDataManager_DEFINED
 
-#include "src/gpu/GrUniformDataManager.h"
-
 #include "include/gpu/d3d/GrD3DTypes.h"
 #include "src/gpu/GrSPIRVUniformHandler.h"
+#include "src/gpu/GrUniformDataManager.h"
 
 class GrD3DConstantRingBuffer;
 class GrD3DGpu;
@@ -20,7 +19,8 @@ class GrD3DPipelineStateDataManager : public GrUniformDataManager {
 public:
     typedef GrSPIRVUniformHandler::UniformInfoArray UniformInfoArray;
 
-    GrD3DPipelineStateDataManager(const UniformInfoArray&,
+    GrD3DPipelineStateDataManager(GrUniformDataManager::ProgramUniforms,
+                                  const UniformInfoArray&,
                                   uint32_t uniformSize);
 
     D3D12_GPU_VIRTUAL_ADDRESS uploadConstants(GrD3DGpu* gpu);
diff --git a/src/gpu/dawn/GrDawnProgramBuilder.cpp b/src/gpu/dawn/GrDawnProgramBuilder.cpp
index 185228684a..c7137ca4f0 100644
--- a/src/gpu/dawn/GrDawnProgramBuilder.cpp
+++ b/src/gpu/dawn/GrDawnProgramBuilder.cpp
@@ -263,6 +263,8 @@ sk_sp<GrDawnProgram> GrDawnProgramBuilder::Build(GrDawnGpu* gpu,
         return nullptr;
     }
 
+    GrUniformDataManager::ProgramUniforms uniforms =
+            builder.fUniformHandler.getNewProgramUniforms(builder.fUniformAggregator);
     builder.finalizeShaders();
 
     SkSL::Program::Inputs vertInputs, fragInputs;
@@ -271,9 +273,11 @@ sk_sp<GrDawnProgram> GrDawnProgramBuilder::Build(GrDawnGpu* gpu,
                                                &vertInputs);
     auto fsModule = builder.createShaderModule(builder.fFS, SkSL::ProgramKind::kFragment, flipY,
                                                &fragInputs);
-    GrSPIRVUniformHandler::UniformInfoArray& uniforms = builder.fUniformHandler.fUniforms;
+    GrSPIRVUniformHandler::UniformInfoArray& legacyUniforms = builder.fUniformHandler.fUniforms;
     uint32_t uniformBufferSize = builder.fUniformHandler.fCurrentUBOOffset;
-    sk_sp<GrDawnProgram> result(new GrDawnProgram(uniforms, uniformBufferSize));
+    sk_sp<GrDawnProgram> result(new GrDawnProgram(std::move(uniforms),
+                                                  legacyUniforms,
+                                                  uniformBufferSize));
     result->fGPImpl = std::move(builder.fGPImpl);
     result->fXPImpl = std::move(builder.fXPImpl);
     result->fFPImpls = std::move(builder.fFPImpls);
@@ -494,6 +498,7 @@ wgpu::BindGroup GrDawnProgram::setUniformData(GrDawnGpu* gpu, const GrRenderTarg
     if (0 == fDataManager.uniformBufferSize()) {
         return nullptr;
     }
+    fDataManager.setUniforms(programInfo);
     this->setRenderTargetState(renderTarget, programInfo.origin());
     const GrPipeline& pipeline = programInfo.pipeline();
     const GrGeometryProcessor& geomProc = programInfo.geomProc();
diff --git a/src/gpu/dawn/GrDawnProgramBuilder.h b/src/gpu/dawn/GrDawnProgramBuilder.h
index 4845ae2bd5..32cf0779af 100644
--- a/src/gpu/dawn/GrDawnProgramBuilder.h
+++ b/src/gpu/dawn/GrDawnProgramBuilder.h
@@ -32,10 +32,10 @@ struct GrDawnProgram : public SkRefCnt {
         }
     };
     typedef GrGLSLBuiltinUniformHandles BuiltinUniformHandles;
-    GrDawnProgram(const GrSPIRVUniformHandler::UniformInfoArray& uniforms,
+    GrDawnProgram(GrUniformDataManager::ProgramUniforms programUniforms,
+                  const GrSPIRVUniformHandler::UniformInfoArray& uniforms,
                   uint32_t uniformBufferSize)
-      : fDataManager(uniforms, uniformBufferSize) {
-    }
+            : fDataManager(std::move(programUniforms), uniforms, uniformBufferSize) {}
     std::unique_ptr<GrGeometryProcessor::ProgramImpl> fGPImpl;
     std::unique_ptr<GrXferProcessor::ProgramImpl> fXPImpl;
     std::vector<std::unique_ptr<GrFragmentProcessor::ProgramImpl>> fFPImpls;
diff --git a/src/gpu/dawn/GrDawnProgramDataManager.cpp b/src/gpu/dawn/GrDawnProgramDataManager.cpp
index 36280ab335..58691876ec 100644
--- a/src/gpu/dawn/GrDawnProgramDataManager.cpp
+++ b/src/gpu/dawn/GrDawnProgramDataManager.cpp
@@ -9,9 +9,13 @@
 
 #include "src/gpu/dawn/GrDawnGpu.h"
 
-GrDawnProgramDataManager::GrDawnProgramDataManager(const UniformInfoArray& uniforms,
+GrDawnProgramDataManager::GrDawnProgramDataManager(ProgramUniforms programUniforms,
+                                                   const UniformInfoArray& uniforms,
                                                    uint32_t uniformBufferSize)
-    : GrUniformDataManager(uniforms.count(), uniformBufferSize) {
+        : GrUniformDataManager(std::move(programUniforms),
+                               Layout::kStd140,
+                               uniforms.count(),
+                               uniformBufferSize) {
     memset(fUniformData.get(), 0, uniformBufferSize);
     // We must add uniforms in same order is the UniformInfoArray so that UniformHandles already
     // owned by other objects will still match up here.
diff --git a/src/gpu/dawn/GrDawnProgramDataManager.h b/src/gpu/dawn/GrDawnProgramDataManager.h
index f9a17ff682..f3d6a993e4 100644
--- a/src/gpu/dawn/GrDawnProgramDataManager.h
+++ b/src/gpu/dawn/GrDawnProgramDataManager.h
@@ -8,9 +8,8 @@
 #ifndef GrDawnProgramDataManager_DEFINED
 #define GrDawnProgramDataManager_DEFINED
 
-#include "src/gpu/GrUniformDataManager.h"
-
 #include "src/gpu/GrSPIRVUniformHandler.h"
+#include "src/gpu/GrUniformDataManager.h"
 #include "src/gpu/dawn/GrDawnRingBuffer.h"
 #include "dawn/webgpu_cpp.h"
 
@@ -23,7 +22,7 @@ class GrDawnProgramDataManager : public GrUniformDataManager {
 public:
     typedef GrSPIRVUniformHandler::UniformInfoArray UniformInfoArray;
 
-    GrDawnProgramDataManager(const UniformInfoArray&, uint32_t uniformBufferSize);
+    GrDawnProgramDataManager(ProgramUniforms, const UniformInfoArray&, uint32_t uniformBufferSize);
 
     uint32_t uniformBufferSize() const { return fUniformSize; }
 
diff --git a/src/gpu/effects/GrPorterDuffXferProcessor.cpp b/src/gpu/effects/GrPorterDuffXferProcessor.cpp
index d9da5aa34e..83f59edd96 100644
--- a/src/gpu/effects/GrPorterDuffXferProcessor.cpp
+++ b/src/gpu/effects/GrPorterDuffXferProcessor.cpp
@@ -14,6 +14,7 @@
 #include "src/gpu/GrPipeline.h"
 #include "src/gpu/GrProcessor.h"
 #include "src/gpu/GrProcessorAnalysis.h"
+#include "src/gpu/GrUniformAggregator.h"
 #include "src/gpu/GrXferProcessor.h"
 #include "src/gpu/glsl/GrGLSLBlend.h"
 #include "src/gpu/glsl/GrGLSLFragmentShaderBuilder.h"
@@ -573,6 +574,10 @@ PDLCDXferProcessor::PDLCDXferProcessor(const SkPMColor4f& blendConstant, float a
                 GrProcessorAnalysisCoverage::kLCD)
     , fBlendConstant(blendConstant)
     , fAlpha(alpha) {
+GR_BEGIN_UNIFORM_DEFINITIONS
+    static constexpr Uniform kAlphaU{kHalf_GrSLType, offsetof(PDLCDXferProcessor, fAlpha)};
+GR_END_UNIFORM_DEFINITIONS
+    this->setUniforms(SkMakeSpan(&kAlphaU, 1));
 }
 
 sk_sp<const GrXferProcessor> PDLCDXferProcessor::Make(SkBlendMode mode,
@@ -594,12 +599,7 @@ std::unique_ptr<GrXferProcessor::ProgramImpl> PDLCDXferProcessor::makeProgramImp
     class Impl : public ProgramImpl {
     private:
         void emitOutputsForBlendState(const EmitArgs& args) override {
-            const char* alpha;
-            fAlphaUniform = args.fUniformHandler->addUniform(nullptr,
-                                                             kFragment_GrShaderFlag,
-                                                             kHalf_GrSLType,
-                                                             "alpha",
-                                                             &alpha);
+            const char* alpha = args.fUniforms.getUniformName(0, "alpha");
             GrGLSLXPFragmentBuilder* fragBuilder = args.fXPFragBuilder;
             // We want to force our primary output to be alpha * Coverage, where alpha is the alpha
             // value of the src color. We know that there are no color stages (or we wouldn't have
@@ -611,16 +611,7 @@ std::unique_ptr<GrXferProcessor::ProgramImpl> PDLCDXferProcessor::makeProgramImp
                                      alpha, args.fInputCoverage);
         }
 
-        void onSetData(const GrGLSLProgramDataManager& pdm, const GrXferProcessor& xp) override {
-            float alpha = xp.cast<PDLCDXferProcessor>().fAlpha;
-            if (fLastAlpha != alpha) {
-                pdm.set1f(fAlphaUniform, alpha);
-                fLastAlpha = alpha;
-            }
-        }
-
-        GrGLSLUniformHandler::UniformHandle fAlphaUniform;
-        float fLastAlpha = SK_FloatNaN;
+        void onSetData(const GrGLSLProgramDataManager& pdm, const GrXferProcessor& xp) override {}
     };
 
     return std::make_unique<Impl>();
diff --git a/src/gpu/effects/GrRRectEffect.cpp b/src/gpu/effects/GrRRectEffect.cpp
index 7b8258c3c7..3ba1b14d69 100644
--- a/src/gpu/effects/GrRRectEffect.cpp
+++ b/src/gpu/effects/GrRRectEffect.cpp
@@ -11,6 +11,7 @@
 #include "src/core/SkTLazy.h"
 #include "src/gpu/GrFragmentProcessor.h"
 #include "src/gpu/GrShaderCaps.h"
+#include "src/gpu/GrUniformAggregator.h"
 #include "src/gpu/effects/GrConvexPolyEffect.h"
 #include "src/gpu/effects/GrOvalEffect.h"
 #include "src/gpu/glsl/GrGLSLFragmentShaderBuilder.h"
@@ -66,9 +67,10 @@ private:
 
     bool onIsEqual(const GrFragmentProcessor& other) const override;
 
-    SkRRect           fRRect;
-    GrClipEdgeType    fEdgeType;
-    uint32_t          fCircularCornerFlags;
+    SkRect         fRect;
+    float          fRadiusAndInverse[2];
+    GrClipEdgeType fEdgeType;
+    uint32_t       fCircularCornerFlags;
 
     GR_DECLARE_FRAGMENT_PROCESSOR_TEST
 
@@ -93,17 +95,92 @@ CircularRRectEffect::CircularRRectEffect(std::unique_ptr<GrFragmentProcessor> in
         : INHERITED(kCircularRRectEffect_ClassID,
                     ProcessorOptimizationFlags(inputFP.get()) &
                             kCompatibleWithCoverageAsAlpha_OptimizationFlag)
-        , fRRect(rrect)
         , fEdgeType(edgeType)
         , fCircularCornerFlags(circularCornerFlags) {
     this->registerChild(std::move(inputFP));
+
+    fRect = rrect.rect();
+    float radius;
+    switch (fCircularCornerFlags) {
+        case CircularRRectEffect::kAll_CornerFlags:
+            SkASSERT(SkRRectPriv::IsSimpleCircular(rrect));
+            radius = SkRRectPriv::GetSimpleRadii(rrect).fX;
+            SkASSERT(radius >= kRadiusMin);
+            fRect.inset(radius, radius);
+            break;
+        case CircularRRectEffect::kTopLeft_CornerFlag:
+            radius = rrect.radii(SkRRect::kUpperLeft_Corner).fX;
+            fRect.fLeft   += radius;
+            fRect.fTop    += radius;
+            fRect.fRight  += 0.5f;
+            fRect.fBottom += 0.5f;
+            break;
+        case CircularRRectEffect::kTopRight_CornerFlag:
+            radius = rrect.radii(SkRRect::kUpperRight_Corner).fX;
+            fRect.fLeft   -= 0.5f;
+            fRect.fTop    += radius;
+            fRect.fRight  -= radius;
+            fRect.fBottom += 0.5f;
+            break;
+        case CircularRRectEffect::kBottomRight_CornerFlag:
+            radius = rrect.radii(SkRRect::kLowerRight_Corner).fX;
+            fRect.fLeft   -= 0.5f;
+            fRect.fTop    -= 0.5f;
+            fRect.fRight  -= radius;
+            fRect.fBottom -= radius;
+            break;
+        case CircularRRectEffect::kBottomLeft_CornerFlag:
+            radius = rrect.radii(SkRRect::kLowerLeft_Corner).fX;
+            fRect.fLeft   += radius;
+            fRect.fTop    -= 0.5f;
+            fRect.fRight  += 0.5f;
+            fRect.fBottom -= radius;
+            break;
+        case CircularRRectEffect::kLeft_CornerFlags:
+            radius = rrect.radii(SkRRect::kUpperLeft_Corner).fX;
+            fRect.fLeft   += radius;
+            fRect.fTop    += radius;
+            fRect.fRight  += 0.5f;
+            fRect.fBottom -= radius;
+            break;
+        case CircularRRectEffect::kTop_CornerFlags:
+            radius = rrect.radii(SkRRect::kUpperLeft_Corner).fX;
+            fRect.fLeft   += radius;
+            fRect.fTop    += radius;
+            fRect.fRight  -= radius;
+            fRect.fBottom += 0.5f;
+            break;
+        case CircularRRectEffect::kRight_CornerFlags:
+            radius = rrect.radii(SkRRect::kUpperRight_Corner).fX;
+            fRect.fLeft   -= 0.5f;
+            fRect.fTop    += radius;
+            fRect.fRight  -= radius;
+            fRect.fBottom -= radius;
+            break;
+        case CircularRRectEffect::kBottom_CornerFlags:
+            radius = rrect.radii(SkRRect::kLowerLeft_Corner).fX;
+            fRect.fLeft   += radius;
+            fRect.fTop    -= 0.5f;
+            fRect.fRight  -= radius;
+            fRect.fBottom -= radius;
+            break;
+        default:
+            SkUNREACHABLE;
+    }
+    radius += 0.5f;
+    fRadiusAndInverse[0] = radius;
+    fRadiusAndInverse[1] = 1.f/ radius;
+
+GR_BEGIN_UNIFORM_DEFINITIONS
+    static constexpr Uniform kUniforms[2] {
+            {kFloat4_GrSLType, offsetof(CircularRRectEffect, fRect            )},  // inner rect
+            {kHalf2_GrSLType , offsetof(CircularRRectEffect, fRadiusAndInverse)},  // r, 1/r
+    };
+GR_END_UNIFORM_DEFINITIONS
+    this->setUniforms(SkMakeSpan(kUniforms));
 }
 
-CircularRRectEffect::CircularRRectEffect(const CircularRRectEffect& that)
-        : INHERITED(that)
-        , fRRect(that.fRRect)
-        , fEdgeType(that.fEdgeType)
-        , fCircularCornerFlags(that.fCircularCornerFlags) {}
+CircularRRectEffect::CircularRRectEffect(const CircularRRectEffect& that) = default;
 
 std::unique_ptr<GrFragmentProcessor> CircularRRectEffect::clone() const {
     return std::unique_ptr<GrFragmentProcessor>(new CircularRRectEffect(*this));
@@ -112,7 +189,9 @@ std::unique_ptr<GrFragmentProcessor> CircularRRectEffect::clone() const {
 bool CircularRRectEffect::onIsEqual(const GrFragmentProcessor& other) const {
     const CircularRRectEffect& crre = other.cast<CircularRRectEffect>();
     // The corner flags are derived from fRRect, so no need to check them.
-    return fEdgeType == crre.fEdgeType && fRRect == crre.fRRect;
+    return fEdgeType            == crre.fEdgeType &&
+           fRect                == crre.fRect     &&
+           fRadiusAndInverse[0] == crre.fRadiusAndInverse[0];
 }
 
 //////////////////////////////////////////////////////////////////////////////
@@ -145,36 +224,28 @@ public:
     void emitCode(EmitArgs&) override;
 
 private:
-    void onSetData(const GrGLSLProgramDataManager&, const GrFragmentProcessor&) override;
-
-    GrGLSLProgramDataManager::UniformHandle fInnerRectUniform;
-    GrGLSLProgramDataManager::UniformHandle fRadiusPlusHalfUniform;
     SkRRect                                 fPrevRRect;
 };
 
 void CircularRRectEffect::Impl::emitCode(EmitArgs& args) {
     const CircularRRectEffect& crre = args.fFp.cast<CircularRRectEffect>();
-    GrGLSLUniformHandler* uniformHandler = args.fUniformHandler;
-    const char *rectName;
-    const char *radiusPlusHalfName;
+    const char* rect           = args.fUniforms.getUniformName(0, "rect"          );
+    const char* radiusPlusHalf = args.fUniforms.getUniformName(1, "radiusPlusHalf");
+
     // The inner rect is the rrect bounds inset by the radius. Its left, top, right, and bottom
     // edges correspond to components x, y, z, and w, respectively. When a side of the rrect has
     // only rectangular corners, that side's value corresponds to the rect edge's value outset by
     // half a pixel.
-    fInnerRectUniform = uniformHandler->addUniform(&crre, kFragment_GrShaderFlag, kFloat4_GrSLType,
-                                                   "innerRect", &rectName);
+
     // x is (r + .5) and y is 1/(r + .5)
-    fRadiusPlusHalfUniform = uniformHandler->addUniform(&crre, kFragment_GrShaderFlag,
-                                                        kHalf2_GrSLType, "radiusPlusHalf",
-                                                        &radiusPlusHalfName);
 
     // If we're on a device where float != fp32 then the length calculation could overflow.
     SkString clampedCircleDistance;
     if (!args.fShaderCaps->floatIs32Bits()) {
         clampedCircleDistance.printf("saturate(%s.x * (1.0 - length(dxy * %s.y)))",
-                                     radiusPlusHalfName, radiusPlusHalfName);
+                                     radiusPlusHalf, radiusPlusHalf);
     } else {
-        clampedCircleDistance.printf("saturate(%s.x - length(dxy))", radiusPlusHalfName);
+        clampedCircleDistance.printf("saturate(%s.x - length(dxy))", radiusPlusHalf);
     }
 
     GrGLSLFPFragmentBuilder* fragBuilder = args.fFragBuilder;
@@ -195,86 +266,86 @@ void CircularRRectEffect::Impl::emitCode(EmitArgs& args) {
     // alphas together.
     switch (crre.fCircularCornerFlags) {
         case CircularRRectEffect::kAll_CornerFlags:
-            fragBuilder->codeAppendf("float2 dxy0 = %s.LT - sk_FragCoord.xy;", rectName);
-            fragBuilder->codeAppendf("float2 dxy1 = sk_FragCoord.xy - %s.RB;", rectName);
+            fragBuilder->codeAppendf("float2 dxy0 = %s.LT - sk_FragCoord.xy;", rect);
+            fragBuilder->codeAppendf("float2 dxy1 = sk_FragCoord.xy - %s.RB;", rect);
             fragBuilder->codeAppend("float2 dxy = max(max(dxy0, dxy1), 0.0);");
             fragBuilder->codeAppendf("half alpha = half(%s);", clampedCircleDistance.c_str());
             break;
         case CircularRRectEffect::kTopLeft_CornerFlag:
             fragBuilder->codeAppendf("float2 dxy = max(%s.LT - sk_FragCoord.xy, 0.0);",
-                                     rectName);
+                                     rect);
             fragBuilder->codeAppendf("half rightAlpha = half(saturate(%s.R - sk_FragCoord.x));",
-                                     rectName);
+                                     rect);
             fragBuilder->codeAppendf("half bottomAlpha = half(saturate(%s.B - sk_FragCoord.y));",
-                                     rectName);
+                                     rect);
             fragBuilder->codeAppendf("half alpha = bottomAlpha * rightAlpha * half(%s);",
                                      clampedCircleDistance.c_str());
             break;
         case CircularRRectEffect::kTopRight_CornerFlag:
             fragBuilder->codeAppendf("float2 dxy = max(float2(sk_FragCoord.x - %s.R, "
                                                              "%s.T - sk_FragCoord.y), 0.0);",
-                                     rectName, rectName);
+                                     rect, rect);
             fragBuilder->codeAppendf("half leftAlpha = half(saturate(sk_FragCoord.x - %s.L));",
-                                     rectName);
+                                     rect);
             fragBuilder->codeAppendf("half bottomAlpha = half(saturate(%s.B - sk_FragCoord.y));",
-                                     rectName);
+                                     rect);
             fragBuilder->codeAppendf("half alpha = bottomAlpha * leftAlpha * half(%s);",
                                      clampedCircleDistance.c_str());
             break;
         case CircularRRectEffect::kBottomRight_CornerFlag:
             fragBuilder->codeAppendf("float2 dxy = max(sk_FragCoord.xy - %s.RB, 0.0);",
-                                     rectName);
+                                     rect);
             fragBuilder->codeAppendf("half leftAlpha = half(saturate(sk_FragCoord.x - %s.L));",
-                                     rectName);
+                                     rect);
             fragBuilder->codeAppendf("half topAlpha = half(saturate(sk_FragCoord.y - %s.T));",
-                                     rectName);
+                                     rect);
             fragBuilder->codeAppendf("half alpha = topAlpha * leftAlpha * half(%s);",
                                      clampedCircleDistance.c_str());
             break;
         case CircularRRectEffect::kBottomLeft_CornerFlag:
             fragBuilder->codeAppendf("float2 dxy = max(float2(%s.L - sk_FragCoord.x, "
                                                              "sk_FragCoord.y - %s.B), 0.0);",
-                                     rectName, rectName);
+                                     rect, rect);
             fragBuilder->codeAppendf("half rightAlpha = half(saturate(%s.R - sk_FragCoord.x));",
-                                     rectName);
+                                     rect);
             fragBuilder->codeAppendf("half topAlpha = half(saturate(sk_FragCoord.y - %s.T));",
-                                     rectName);
+                                     rect);
             fragBuilder->codeAppendf("half alpha = topAlpha * rightAlpha * half(%s);",
                                      clampedCircleDistance.c_str());
             break;
         case CircularRRectEffect::kLeft_CornerFlags:
-            fragBuilder->codeAppendf("float2 dxy0 = %s.LT - sk_FragCoord.xy;", rectName);
-            fragBuilder->codeAppendf("float dy1 = sk_FragCoord.y - %s.B;", rectName);
+            fragBuilder->codeAppendf("float2 dxy0 = %s.LT - sk_FragCoord.xy;", rect);
+            fragBuilder->codeAppendf("float dy1 = sk_FragCoord.y - %s.B;", rect);
             fragBuilder->codeAppend("float2 dxy = max(float2(dxy0.x, max(dxy0.y, dy1)), 0.0);");
             fragBuilder->codeAppendf("half rightAlpha = half(saturate(%s.R - sk_FragCoord.x));",
-                                     rectName);
+                                     rect);
             fragBuilder->codeAppendf("half alpha = rightAlpha * half(%s);",
                                      clampedCircleDistance.c_str());
             break;
         case CircularRRectEffect::kTop_CornerFlags:
-            fragBuilder->codeAppendf("float2 dxy0 = %s.LT - sk_FragCoord.xy;", rectName);
-            fragBuilder->codeAppendf("float dx1 = sk_FragCoord.x - %s.R;", rectName);
+            fragBuilder->codeAppendf("float2 dxy0 = %s.LT - sk_FragCoord.xy;", rect);
+            fragBuilder->codeAppendf("float dx1 = sk_FragCoord.x - %s.R;", rect);
             fragBuilder->codeAppend("float2 dxy = max(float2(max(dxy0.x, dx1), dxy0.y), 0.0);");
             fragBuilder->codeAppendf("half bottomAlpha = half(saturate(%s.B - sk_FragCoord.y));",
-                                     rectName);
+                                     rect);
             fragBuilder->codeAppendf("half alpha = bottomAlpha * half(%s);",
                                      clampedCircleDistance.c_str());
             break;
         case CircularRRectEffect::kRight_CornerFlags:
-            fragBuilder->codeAppendf("float dy0 = %s.T - sk_FragCoord.y;", rectName);
-            fragBuilder->codeAppendf("float2 dxy1 = sk_FragCoord.xy - %s.RB;", rectName);
+            fragBuilder->codeAppendf("float dy0 = %s.T - sk_FragCoord.y;", rect);
+            fragBuilder->codeAppendf("float2 dxy1 = sk_FragCoord.xy - %s.RB;", rect);
             fragBuilder->codeAppend("float2 dxy = max(float2(dxy1.x, max(dy0, dxy1.y)), 0.0);");
             fragBuilder->codeAppendf("half leftAlpha = half(saturate(sk_FragCoord.x - %s.L));",
-                                     rectName);
+                                     rect);
             fragBuilder->codeAppendf("half alpha = leftAlpha * half(%s);",
                                      clampedCircleDistance.c_str());
             break;
         case CircularRRectEffect::kBottom_CornerFlags:
-            fragBuilder->codeAppendf("float dx0 = %s.L - sk_FragCoord.x;", rectName);
-            fragBuilder->codeAppendf("float2 dxy1 = sk_FragCoord.xy - %s.RB;", rectName);
+            fragBuilder->codeAppendf("float dx0 = %s.L - sk_FragCoord.x;", rect);
+            fragBuilder->codeAppendf("float2 dxy1 = sk_FragCoord.xy - %s.RB;", rect);
             fragBuilder->codeAppend("float2 dxy = max(float2(max(dx0, dxy1.x), dxy1.y), 0.0);");
             fragBuilder->codeAppendf("half topAlpha = half(saturate(sk_FragCoord.y - %s.T));",
-                                     rectName);
+                                     rect);
             fragBuilder->codeAppendf("half alpha = topAlpha * half(%s);",
                                      clampedCircleDistance.c_str());
             break;
@@ -289,86 +360,6 @@ void CircularRRectEffect::Impl::emitCode(EmitArgs& args) {
     fragBuilder->codeAppendf("return %s * alpha;", inputSample.c_str());
 }
 
-void CircularRRectEffect::Impl::onSetData(const GrGLSLProgramDataManager& pdman,
-                                          const GrFragmentProcessor& processor) {
-    const CircularRRectEffect& crre = processor.cast<CircularRRectEffect>();
-    const SkRRect& rrect = crre.fRRect;
-    if (rrect != fPrevRRect) {
-        SkRect rect = rrect.getBounds();
-        SkScalar radius = 0;
-        switch (crre.fCircularCornerFlags) {
-            case CircularRRectEffect::kAll_CornerFlags:
-                SkASSERT(SkRRectPriv::IsSimpleCircular(rrect));
-                radius = SkRRectPriv::GetSimpleRadii(rrect).fX;
-                SkASSERT(radius >= kRadiusMin);
-                rect.inset(radius, radius);
-                break;
-            case CircularRRectEffect::kTopLeft_CornerFlag:
-                radius = rrect.radii(SkRRect::kUpperLeft_Corner).fX;
-                rect.fLeft += radius;
-                rect.fTop += radius;
-                rect.fRight += 0.5f;
-                rect.fBottom += 0.5f;
-                break;
-            case CircularRRectEffect::kTopRight_CornerFlag:
-                radius = rrect.radii(SkRRect::kUpperRight_Corner).fX;
-                rect.fLeft -= 0.5f;
-                rect.fTop += radius;
-                rect.fRight -= radius;
-                rect.fBottom += 0.5f;
-                break;
-            case CircularRRectEffect::kBottomRight_CornerFlag:
-                radius = rrect.radii(SkRRect::kLowerRight_Corner).fX;
-                rect.fLeft -= 0.5f;
-                rect.fTop -= 0.5f;
-                rect.fRight -= radius;
-                rect.fBottom -= radius;
-                break;
-            case CircularRRectEffect::kBottomLeft_CornerFlag:
-                radius = rrect.radii(SkRRect::kLowerLeft_Corner).fX;
-                rect.fLeft += radius;
-                rect.fTop -= 0.5f;
-                rect.fRight += 0.5f;
-                rect.fBottom -= radius;
-                break;
-            case CircularRRectEffect::kLeft_CornerFlags:
-                radius = rrect.radii(SkRRect::kUpperLeft_Corner).fX;
-                rect.fLeft += radius;
-                rect.fTop += radius;
-                rect.fRight += 0.5f;
-                rect.fBottom -= radius;
-                break;
-            case CircularRRectEffect::kTop_CornerFlags:
-                radius = rrect.radii(SkRRect::kUpperLeft_Corner).fX;
-                rect.fLeft += radius;
-                rect.fTop += radius;
-                rect.fRight -= radius;
-                rect.fBottom += 0.5f;
-                break;
-            case CircularRRectEffect::kRight_CornerFlags:
-                radius = rrect.radii(SkRRect::kUpperRight_Corner).fX;
-                rect.fLeft -= 0.5f;
-                rect.fTop += radius;
-                rect.fRight -= radius;
-                rect.fBottom -= radius;
-                break;
-            case CircularRRectEffect::kBottom_CornerFlags:
-                radius = rrect.radii(SkRRect::kLowerLeft_Corner).fX;
-                rect.fLeft += radius;
-                rect.fTop -= 0.5f;
-                rect.fRight -= radius;
-                rect.fBottom -= radius;
-                break;
-            default:
-                SK_ABORT("Should have been one of the above cases.");
-        }
-        pdman.set4f(fInnerRectUniform, rect.fLeft, rect.fTop, rect.fRight, rect.fBottom);
-        radius += 0.5f;
-        pdman.set2f(fRadiusPlusHalfUniform, radius, 1.f / radius);
-        fPrevRRect = rrect;
-    }
-}
-
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 
 void CircularRRectEffect::onAddToKey(const GrShaderCaps& caps, GrProcessorKeyBuilder* b) const {
diff --git a/src/gpu/gl/GrGLProgram.cpp b/src/gpu/gl/GrGLProgram.cpp
index b590d8c5a9..530775bd00 100644
--- a/src/gpu/gl/GrGLProgram.cpp
+++ b/src/gpu/gl/GrGLProgram.cpp
@@ -22,14 +22,14 @@
 #define GL_CALL(X) GR_GL_CALL(fGpu->glInterface(), X)
 #define GL_CALL_RET(R, X) GR_GL_CALL_RET(fGpu->glInterface(), R, X)
 
-///////////////////////////////////////////////////////////////////////////////////////////////////
-
 sk_sp<GrGLProgram> GrGLProgram::Make(
         GrGLGpu* gpu,
         const GrGLSLBuiltinUniformHandles& builtinUniforms,
         GrGLuint programID,
+        const GrUniformAggregator& uniformAggregator,
         const UniformInfoArray& uniforms,
         const UniformInfoArray& textureSamplers,
+        bool usedProgramBinaries,
         std::unique_ptr<GrGeometryProcessor::ProgramImpl> gpImpl,
         std::unique_ptr<GrXferProcessor::ProgramImpl> xpImpl,
         std::vector<std::unique_ptr<GrFragmentProcessor::ProgramImpl>> fpImpls,
@@ -41,8 +41,10 @@ sk_sp<GrGLProgram> GrGLProgram::Make(
     sk_sp<GrGLProgram> program(new GrGLProgram(gpu,
                                                builtinUniforms,
                                                programID,
+                                               std::move(uniformAggregator),
                                                uniforms,
                                                textureSamplers,
+                                               usedProgramBinaries,
                                                std::move(gpImpl),
                                                std::move(xpImpl),
                                                std::move(fpImpls),
@@ -60,8 +62,10 @@ sk_sp<GrGLProgram> GrGLProgram::Make(
 GrGLProgram::GrGLProgram(GrGLGpu* gpu,
                          const GrGLSLBuiltinUniformHandles& builtinUniforms,
                          GrGLuint programID,
+                         const GrUniformAggregator& uniformAggregator,
                          const UniformInfoArray& uniforms,
                          const UniformInfoArray& textureSamplers,
+                         bool usedProgramBinaries,
                          std::unique_ptr<GrGeometryProcessor::ProgramImpl> gpImpl,
                          std::unique_ptr<GrXferProcessor::ProgramImpl> xpImpl,
                          std::vector<std::unique_ptr<GrFragmentProcessor::ProgramImpl>> fpImpls,
@@ -81,7 +85,12 @@ GrGLProgram::GrGLProgram(GrGLGpu* gpu,
         , fVertexStride(vertexStride)
         , fInstanceStride(instanceStride)
         , fGpu(gpu)
-        , fProgramDataManager(gpu, uniforms)
+        , fProgramDataManager(gpu,
+                              uniforms,
+                              textureSamplers,
+                              programID,
+                              usedProgramBinaries,
+                              uniformAggregator)
         , fNumTextureSamplers(textureSamplers.count()) {}
 
 GrGLProgram::~GrGLProgram() {
@@ -98,6 +107,8 @@ void GrGLProgram::abandon() {
 
 void GrGLProgram::updateUniforms(const GrRenderTarget* renderTarget,
                                  const GrProgramInfo& programInfo) {
+    fProgramDataManager.setUniforms(programInfo);
+
     this->setRenderTargetState(renderTarget, programInfo.origin(), programInfo.geomProc());
 
     // we set the uniforms for installed processors in a generic way, but subclasses of GLProgram
diff --git a/src/gpu/gl/GrGLProgram.h b/src/gpu/gl/GrGLProgram.h
index b07c78a751..7b5c39e90e 100644
--- a/src/gpu/gl/GrGLProgram.h
+++ b/src/gpu/gl/GrGLProgram.h
@@ -23,6 +23,7 @@ class GrGeometryProcessor;
 class GrProgramInfo;
 class GrRenderTarget;
 class GrTextureProxy;
+class GrUniformAggregator;
 
 /**
  * This class manages a GPU program and records per-program information. It also records the vertex
@@ -54,8 +55,10 @@ public:
             GrGLGpu*,
             const GrGLSLBuiltinUniformHandles&,
             GrGLuint programID,
+            const GrUniformAggregator&,
             const UniformInfoArray& uniforms,
             const UniformInfoArray& textureSamplers,
+            bool usedProgramBinaries,
             std::unique_ptr<GrGeometryProcessor::ProgramImpl>,
             std::unique_ptr<GrXferProcessor::ProgramImpl>,
             std::vector<std::unique_ptr<GrFragmentProcessor::ProgramImpl>> fps,
@@ -127,8 +130,10 @@ private:
     GrGLProgram(GrGLGpu*,
                 const GrGLSLBuiltinUniformHandles&,
                 GrGLuint programID,
+                const GrUniformAggregator&,
                 const UniformInfoArray& uniforms,
                 const UniformInfoArray& textureSamplers,
+                bool usedProgramBinaries,
                 std::unique_ptr<GrGeometryProcessor::ProgramImpl>,
                 std::unique_ptr<GrXferProcessor::ProgramImpl>,
                 std::vector<std::unique_ptr<GrFragmentProcessor::ProgramImpl>> fpImpls,
diff --git a/src/gpu/gl/GrGLProgramDataManager.cpp b/src/gpu/gl/GrGLProgramDataManager.cpp
index 9e3abeadb0..16e57b6995 100644
--- a/src/gpu/gl/GrGLProgramDataManager.cpp
+++ b/src/gpu/gl/GrGLProgramDataManager.cpp
@@ -5,17 +5,193 @@
  * found in the LICENSE file.
  */
 
-#include "include/core/SkMatrix.h"
-#include "src/gpu/gl/GrGLGpu.h"
 #include "src/gpu/gl/GrGLProgramDataManager.h"
+
+#include "include/core/SkMatrix.h"
+#include "src/gpu/GrProgramInfo.h"
+#include "src/gpu/gl/GrGLGpu.h"
 #include "src/gpu/glsl/GrGLSLUniformHandler.h"
 
+GrGLProgramDataManager::UniformManager::UniformManager(const GrUniformAggregator& uniformAggregator,
+                                                       GrGLuint programID,
+                                                       GrGLint firstUniformLocation,
+                                                       const GrGLContext& ctx) {
+    GrGLint location = firstUniformLocation;
+    fUniforms.reserve(uniformAggregator.uniformCount());
+    for (int p = 0; p < uniformAggregator.numProcessors(); ++p) {
+        fUniforms.push_back({});
+        for (const GrUniformAggregator::Record& record : uniformAggregator.processorRecords(p)) {
+            const GrProcessor::Uniform& u = record.uniform();
+            if (ctx.caps()->bindUniformLocationSupport() && firstUniformLocation >= 0) {
+                GR_GL_CALL(ctx.glInterface(),
+                           BindUniformLocation(programID, location, record.name.c_str()));
+            } else {
+                GR_GL_CALL_RET(ctx.glInterface(),
+                               location,
+                               GetUniformLocation(programID, record.name.c_str()));
+            }
+            fUniforms.back().push_back({
+                    record.indexInProcessor,
+                    u.type(),
+                    u.count(),
+                    location,
+            });
+            location++;
+        }
+    }
+}
+
+void GrGLProgramDataManager::UniformManager::setUniforms(const GrGLInterface* gl,
+                                                         const GrProgramInfo& info) {
+    auto set = [&, processorIndex = 0](const GrProcessor& p) mutable {
+        const ProcessorUniforms& uniforms = fUniforms[processorIndex];
+        for (const Uniform& u : uniforms) {
+            if (u.location < 0) {
+                // Presumably this got optimized out.
+                continue;
+            }
+            size_t index = u.indexInProcessor;
+            SkASSERT(u.count >= 0);
+            static_assert(GrShaderVar::kNonArray == 0);
+            int n = std::max(1, u.count);
+            switch (u.type) {
+                case kInt_GrSLType: {
+                    const int32_t* values = p.uniformData<int32_t>(index);
+                    GR_GL_CALL(gl, Uniform1iv(u.location, n, values));
+                    break;
+                }
+
+                case kInt2_GrSLType: {
+                    const int32_t* values = p.uniformData<int32_t>(index);
+                    GR_GL_CALL(gl, Uniform2iv(u.location, n, values));
+                    break;
+                }
+
+                case kInt3_GrSLType: {
+                    const int32_t* values = p.uniformData<int32_t>(index);
+                    GR_GL_CALL(gl, Uniform3iv(u.location, n, values));
+                    break;
+                }
+
+                case kInt4_GrSLType: {
+                    const int32_t* values = p.uniformData<int32_t>(index);
+                    GR_GL_CALL(gl, Uniform4iv(u.location, n, values));
+                    break;
+                }
+
+                case kHalf_GrSLType:
+                case kFloat_GrSLType: {
+                    const float* values = p.uniformData<float>(index);
+                    GR_GL_CALL(gl, Uniform1fv(u.location, n, values));
+                    break;
+                }
+
+                case kHalf2_GrSLType:
+                case kFloat2_GrSLType: {
+                    const float* values = p.uniformData<float>(index);
+                    GR_GL_CALL(gl, Uniform2fv(u.location, n, values));
+                    break;
+                }
+
+                case kHalf3_GrSLType:
+                case kFloat3_GrSLType: {
+                    const float* values = p.uniformData<float>(index);
+                    GR_GL_CALL(gl, Uniform3fv(u.location, n, values));
+                    break;
+                }
+
+                case kHalf4_GrSLType:
+                case kFloat4_GrSLType: {
+                    const float* values = p.uniformData<float>(index);
+                    GR_GL_CALL(gl, Uniform4fv(u.location, n, values));
+                    break;
+                }
+
+                case kHalf2x2_GrSLType:
+                case kFloat2x2_GrSLType: {
+                    const float* values = p.uniformData<float>(index);
+                    GR_GL_CALL(gl, UniformMatrix2fv(u.location, n, false, values));
+                    break;
+                }
+
+                case kHalf3x3_GrSLType:
+                case kFloat3x3_GrSLType: {
+                    switch (p.uniforms()[index].ctype()) {
+                        case GrProcessor::Uniform::CType::kDefault: {
+                            const float* values = p.uniformData<float>(index);
+                            GR_GL_CALL(gl, UniformMatrix3fv(u.location, n, false, values));
+                            break;
+                        }
+                        case GrProcessor::Uniform::CType::kSkMatrix: {
+                            const SkMatrix* matrix = p.uniformData<SkMatrix>(index);
+                            int location = u.location;
+                            for (int i = 0; i < n; ++i, ++matrix, ++location) {
+                                float mt[] = {
+                                        matrix->get(SkMatrix::kMScaleX),
+                                        matrix->get(SkMatrix::kMSkewY),
+                                        matrix->get(SkMatrix::kMPersp0),
+                                        matrix->get(SkMatrix::kMSkewX),
+                                        matrix->get(SkMatrix::kMScaleY),
+                                        matrix->get(SkMatrix::kMPersp1),
+                                        matrix->get(SkMatrix::kMTransX),
+                                        matrix->get(SkMatrix::kMTransY),
+                                        matrix->get(SkMatrix::kMPersp2),
+                                };
+                                GR_GL_CALL(gl, UniformMatrix3fv(location, 1, false, mt));
+                            }
+                            break;
+                        }
+                    }
+                    break;
+                }
+
+                case kHalf4x4_GrSLType:
+                case kFloat4x4_GrSLType: {
+                    const float* values = p.uniformData<float>(index);
+                    GR_GL_CALL(gl, UniformMatrix4fv(u.location, n, false, values));
+                    break;
+                }
+
+                default:
+                    SK_ABORT("Unexpect uniform type");
+            }
+        }
+        ++processorIndex;
+    };
+
+    info.visitProcessors(set);
+}
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
 #define ASSERT_ARRAY_UPLOAD_IN_BOUNDS(UNI, COUNT) \
          SkASSERT((COUNT) <= (UNI).fArrayCount || \
                   (1 == (COUNT) && GrShaderVar::kNonArray == (UNI).fArrayCount))
 
-GrGLProgramDataManager::GrGLProgramDataManager(GrGLGpu* gpu, const UniformInfoArray& uniforms)
-        : fGpu(gpu) {
+GrGLint get_first_unused_uniform_location(
+        const GrGLProgramDataManager::UniformInfoArray& uniforms,
+        const GrGLProgramDataManager::UniformInfoArray& samplers) {
+    GrGLint id = -1;
+    for (int i = 0; i < uniforms.count(); ++i) {
+        id = std::max(id, uniforms.item(i).fLocation);
+    }
+    for (int i = 0; i < samplers.count(); ++i) {
+        id = std::max(id, samplers.item(i).fLocation);
+    }
+    return id + 1;
+}
+
+GrGLProgramDataManager::GrGLProgramDataManager(GrGLGpu* gpu,
+                                               const UniformInfoArray& uniforms,
+                                               const UniformInfoArray& samplers,
+                                               GrGLuint programID,
+                                               bool usedProgramBinaries,
+                                               const GrUniformAggregator& uniformAggregator)
+        : fGpu(gpu)
+        , fManager(uniformAggregator,
+                   programID,
+                   usedProgramBinaries ? -1 : get_first_unused_uniform_location(uniforms, samplers),
+                   gpu->glContext()) {
     fUniforms.push_back_n(uniforms.count());
     int i = 0;
     for (const GLUniformInfo& builderUniform : uniforms.items()) {
@@ -30,6 +206,10 @@ GrGLProgramDataManager::GrGLProgramDataManager(GrGLGpu* gpu, const UniformInfoAr
     }
 }
 
+void GrGLProgramDataManager::setUniforms(const GrProgramInfo& info) {
+    fManager.setUniforms(fGpu->glInterface(), info);
+}
+
 void GrGLProgramDataManager::setSamplerUniforms(const UniformInfoArray& samplers,
                                                 int startUnit) const {
     int i = 0;
diff --git a/src/gpu/gl/GrGLProgramDataManager.h b/src/gpu/gl/GrGLProgramDataManager.h
index c01006c82a..4625c51cfa 100644
--- a/src/gpu/gl/GrGLProgramDataManager.h
+++ b/src/gpu/gl/GrGLProgramDataManager.h
@@ -9,16 +9,21 @@
 #define GrGLProgramDataManager_DEFINED
 
 #include "include/gpu/gl/GrGLTypes.h"
+#include "include/private/SkTArray.h"
 #include "src/core/SkTBlockList.h"
 #include "src/gpu/GrShaderVar.h"
 #include "src/gpu/glsl/GrGLSLProgramDataManager.h"
 #include "src/gpu/glsl/GrGLSLUniformHandler.h"
 
-#include "include/private/SkTArray.h"
+#include <vector>
 
 class GrGLGpu;
 class SkMatrix;
 class GrGLProgram;
+class GrGLContext;
+struct GrGLInterface;
+class GrProgramInfo;
+class GrUniformAggregator;
 
 /** Manages the resources used by a shader program.
  * The resources are objects the program uses to communicate with the
@@ -41,7 +46,12 @@ public:
     typedef SkTBlockList<GLUniformInfo> UniformInfoArray;
     typedef SkTBlockList<VaryingInfo>   VaryingInfoArray;
 
-    GrGLProgramDataManager(GrGLGpu*, const UniformInfoArray&);
+    GrGLProgramDataManager(GrGLGpu*,
+                           const UniformInfoArray& uniforms,
+                           const UniformInfoArray& samplers,
+                           GrGLuint programID,
+                           bool usedProgramBinaries,
+                           const GrUniformAggregator& aggregator);
 
     void setSamplerUniforms(const UniformInfoArray& samplers, int startUnit) const;
 
@@ -73,6 +83,8 @@ public:
     void setMatrix3fv(UniformHandle, int arrayCount, const float matrices[]) const override;
     void setMatrix4fv(UniformHandle, int arrayCount, const float matrices[]) const override;
 
+    void setUniforms(const GrProgramInfo& info);
+
 private:
     enum {
         kUnusedUniform = -1,
@@ -92,6 +104,29 @@ private:
     SkTArray<Uniform, true> fUniforms;
     GrGLGpu* fGpu;
 
+    class UniformManager {
+    public:
+        UniformManager(const GrUniformAggregator&,
+                       GrGLuint programID,
+                       // used for BindUniformLocation, negative means get the locations, don't bind
+                       GrGLint firstUnusedUniformID,
+                       const GrGLContext& ctx);
+
+        void setUniforms(const GrGLInterface* gl, const GrProgramInfo& info);
+
+    private:
+        struct Uniform {
+            size_t   indexInProcessor = -1;
+            GrSLType type             = kVoid_GrSLType;
+            int      count            = 0;
+            GrGLint  location         = -1;
+        };
+        using ProcessorUniforms = std::vector<Uniform>;
+        std::vector<ProcessorUniforms> fUniforms;
+    };
+
+    UniformManager fManager;
+
     using INHERITED = GrGLSLProgramDataManager;
 };
 
diff --git a/src/gpu/gl/GrGLUniformHandler.cpp b/src/gpu/gl/GrGLUniformHandler.cpp
index 49865d42c4..b96eb44a81 100644
--- a/src/gpu/gl/GrGLUniformHandler.cpp
+++ b/src/gpu/gl/GrGLUniformHandler.cpp
@@ -86,7 +86,9 @@ GrGLSLUniformHandler::SamplerHandle GrGLUniformHandler::addSampler(
     return GrGLSLUniformHandler::SamplerHandle(fSamplers.count() - 1);
 }
 
-void GrGLUniformHandler::appendUniformDecls(GrShaderFlags visibility, SkString* out) const {
+void GrGLUniformHandler::appendUniformDecls(const GrUniformAggregator& aggregator,
+                                            GrShaderFlags visibility,
+                                            SkString* out) const {
     for (const UniformInfo& uniform : fUniforms.items()) {
         if (uniform.fVisibility & visibility) {
             uniform.fVariable.appendDecl(fProgramBuilder->shaderCaps(), out);
@@ -99,6 +101,14 @@ void GrGLUniformHandler::appendUniformDecls(GrShaderFlags visibility, SkString*
             out->append(";\n");
         }
     }
+    for (const auto& record : aggregator.records()) {
+        const GrProcessor::Uniform& u = record.uniform();
+        if (u.visibility() & visibility) {
+            GrShaderVar var(record.name, u.type(), GrShaderVar::TypeModifier::Uniform, u.count());
+            var.appendDecl(fProgramBuilder->shaderCaps(), out);
+            out->append(";\n");
+        }
+    }
 }
 
 void GrGLUniformHandler::bindUniformLocations(GrGLuint programID, const GrGLCaps& caps) {
diff --git a/src/gpu/gl/GrGLUniformHandler.h b/src/gpu/gl/GrGLUniformHandler.h
index 76e8e3ce48..b9d6de3c4b 100644
--- a/src/gpu/gl/GrGLUniformHandler.h
+++ b/src/gpu/gl/GrGLUniformHandler.h
@@ -63,7 +63,9 @@ private:
         return fSamplerSwizzles[handle.toIndex()];
     }
 
-    void appendUniformDecls(GrShaderFlags visibility, SkString*) const override;
+    void appendUniformDecls(const GrUniformAggregator&,
+                            GrShaderFlags visibility,
+                            SkString*) const override;
 
     // Manually set uniform locations for all our uniforms.
     void bindUniformLocations(GrGLuint programID, const GrGLCaps& caps);
diff --git a/src/gpu/gl/builders/GrGLProgramBuilder.cpp b/src/gpu/gl/builders/GrGLProgramBuilder.cpp
index d58e863997..2ae5fa17d4 100644
--- a/src/gpu/gl/builders/GrGLProgramBuilder.cpp
+++ b/src/gpu/gl/builders/GrGLProgramBuilder.cpp
@@ -447,7 +447,7 @@ sk_sp<GrGLProgram> GrGLProgramBuilder::finalize(const GrGLPrecompiledProgram* pr
         }
         this->storeShaderInCache(inputs, programID, glsl, isSkSL, &settings);
     }
-    return this->createProgram(programID);
+    return this->createProgram(programID, usedProgramBinaries);
 }
 
 void GrGLProgramBuilder::bindProgramResourceLocations(GrGLuint programID) {
@@ -497,12 +497,14 @@ void GrGLProgramBuilder::resolveProgramResourceLocations(GrGLuint programID, boo
     fUniformHandler.getUniformLocations(programID, fGpu->glCaps(), force);
 }
 
-sk_sp<GrGLProgram> GrGLProgramBuilder::createProgram(GrGLuint programID) {
+sk_sp<GrGLProgram> GrGLProgramBuilder::createProgram(GrGLuint programID, bool usedProgramBinaries) {
     return GrGLProgram::Make(fGpu,
                              fUniformHandles,
                              programID,
+                             fUniformAggregator,
                              fUniformHandler.fUniforms,
                              fUniformHandler.fSamplers,
+                             usedProgramBinaries,
                              std::move(fGPImpl),
                              std::move(fXPImpl),
                              std::move(fFPImpls),
diff --git a/src/gpu/gl/builders/GrGLProgramBuilder.h b/src/gpu/gl/builders/GrGLProgramBuilder.h
index 67b88b69b6..e1fb4799bb 100644
--- a/src/gpu/gl/builders/GrGLProgramBuilder.h
+++ b/src/gpu/gl/builders/GrGLProgramBuilder.h
@@ -79,8 +79,7 @@ private:
                          SkSL::String* sksl[], const SkSL::String glsl[]);
     void resolveProgramResourceLocations(GrGLuint programID, bool force);
 
-    // Subclasses create different programs
-    sk_sp<GrGLProgram> createProgram(GrGLuint programID);
+    sk_sp<GrGLProgram> createProgram(GrGLuint programID, bool usedProgramBinaries);
 
     GrGLSLUniformHandler* uniformHandler() override { return &fUniformHandler; }
     const GrGLSLUniformHandler* uniformHandler() const override { return &fUniformHandler; }
diff --git a/src/gpu/glsl/GrGLSLProgramBuilder.cpp b/src/gpu/glsl/GrGLSLProgramBuilder.cpp
index 04f54a6a9b..efc05e0db6 100644
--- a/src/gpu/glsl/GrGLSLProgramBuilder.cpp
+++ b/src/gpu/glsl/GrGLSLProgramBuilder.cpp
@@ -77,6 +77,9 @@ bool GrGLSLProgramBuilder::emitAndInstallPrimProc(SkString* outputColor, SkStrin
     this->nameExpression(outputColor, "outputColor");
     this->nameExpression(outputCoverage, "outputCoverage");
 
+    GrUniformAggregator::ProcessorUniforms uniforms =
+            fUniformAggregator.addUniforms(geomProc, this->getMangleSuffix());
+
     SkASSERT(!fUniformHandles.fRTAdjustmentUni.isValid());
     GrShaderFlags rtAdjustVisibility;
     if (geomProc.willUseTessellationShaders()) {
@@ -110,6 +113,7 @@ bool GrGLSLProgramBuilder::emitAndInstallPrimProc(SkString* outputColor, SkStrin
     GrGeometryProcessor::ProgramImpl::EmitArgs args(&fVS,
                                                     &fFS,
                                                     this->varyingHandler(),
+                                                    std::move(uniforms),
                                                     this->uniformHandler(),
                                                     this->shaderCaps(),
                                                     geomProc,
@@ -215,6 +219,9 @@ void GrGLSLProgramBuilder::writeFPFunction(const GrFragmentProcessor& fp,
               const char* const inputColor   = fp.isBlendFunction() ? "_src" : "_input";
               const char*       sampleCoords = "_coords";
     fFS.nextStage();
+    GrUniformAggregator::ProcessorUniforms uniforms =
+            fUniformAggregator.addUniforms(fp, this->getMangleSuffix());
+
     // Conceptually, an FP is always sampled at a particular coordinate. However, if it is only
     // sampled by a chain of uniform matrix expressions (or legacy coord transforms), the value that
     // would have been passed to _coords is lifted to the vertex shader and
@@ -268,7 +275,9 @@ void GrGLSLProgramBuilder::writeFPFunction(const GrFragmentProcessor& fp,
     // First, emit every child's function. This needs to happen (even for children that aren't
     // sampled), so that all of the expected uniforms are registered.
     this->writeChildFPFunctions(fp, impl);
+
     GrFragmentProcessor::ProgramImpl::EmitArgs args(&fFS,
+                                                    std::move(uniforms),
                                                     this->uniformHandler(),
                                                     this->shaderCaps(),
                                                     fp,
@@ -353,6 +362,8 @@ bool GrGLSLProgramBuilder::emitAndInstallXferProc(const SkString& colorIn,
 
     SkASSERT(!fXPImpl);
     const GrXferProcessor& xp = this->pipeline().getXferProcessor();
+    GrUniformAggregator::ProcessorUniforms uniforms =
+            fUniformAggregator.addUniforms(xp, this->getMangleSuffix());
     fXPImpl = xp.makeProgramImpl();
 
     // Enable dual source secondary output if we have one
@@ -372,6 +383,7 @@ bool GrGLSLProgramBuilder::emitAndInstallXferProc(const SkString& colorIn,
 
     GrXferProcessor::ProgramImpl::EmitArgs args(
             &fFS,
+            std::move(uniforms),
             this->uniformHandler(),
             this->shaderCaps(),
             xp,
@@ -462,7 +474,7 @@ void GrGLSLProgramBuilder::nameExpression(SkString* output, const char* baseName
 }
 
 void GrGLSLProgramBuilder::appendUniformDecls(GrShaderFlags visibility, SkString* out) const {
-    this->uniformHandler()->appendUniformDecls(visibility, out);
+    this->uniformHandler()->appendUniformDecls(fUniformAggregator, visibility, out);
 }
 
 void GrGLSLProgramBuilder::addRTFlipUniform(const char* name) {
diff --git a/src/gpu/glsl/GrGLSLProgramBuilder.h b/src/gpu/glsl/GrGLSLProgramBuilder.h
index 994af7170e..3eec13f1b0 100644
--- a/src/gpu/glsl/GrGLSLProgramBuilder.h
+++ b/src/gpu/glsl/GrGLSLProgramBuilder.h
@@ -12,6 +12,7 @@
 #include "src/gpu/GrFragmentProcessor.h"
 #include "src/gpu/GrGeometryProcessor.h"
 #include "src/gpu/GrProgramInfo.h"
+#include "src/gpu/GrUniformAggregator.h"
 #include "src/gpu/GrXferProcessor.h"
 #include "src/gpu/glsl/GrGLSLFragmentShaderBuilder.h"
 #include "src/gpu/glsl/GrGLSLProgramDataManager.h"
@@ -111,6 +112,8 @@ public:
     GrSurfaceOrigin fDstTextureOrigin;
 
 protected:
+    GrUniformAggregator fUniformAggregator;
+
     explicit GrGLSLProgramBuilder(const GrProgramDesc&, const GrProgramInfo&);
 
     void addFeature(GrShaderFlags shaders, uint32_t featureBit, const char* extensionName);
diff --git a/src/gpu/glsl/GrGLSLUniformHandler.h b/src/gpu/glsl/GrGLSLUniformHandler.h
index af0a9304ef..e58eb3e4be 100644
--- a/src/gpu/glsl/GrGLSLUniformHandler.h
+++ b/src/gpu/glsl/GrGLSLUniformHandler.h
@@ -19,6 +19,7 @@ class GrGLSLProgramBuilder;
 class GrGLSLShaderBuilder;
 class GrSamplerState;
 class GrSurfaceProxy;
+class GrUniformAggregator;
 
 // Handles for program uniforms (other than per-effect uniforms)
 struct GrGLSLBuiltinUniformHandles {
@@ -134,7 +135,9 @@ private:
                                                   int arrayCount,
                                                   const char** outName) = 0;
 
-    virtual void appendUniformDecls(GrShaderFlags visibility, SkString*) const = 0;
+    virtual void appendUniformDecls(const GrUniformAggregator&,
+                                    GrShaderFlags visibility,
+                                    SkString*) const = 0;
 
     friend class GrGLSLProgramBuilder;
 };
diff --git a/src/gpu/mtl/GrMtlPipelineState.h b/src/gpu/mtl/GrMtlPipelineState.h
index 4cd8ed4d63..931a8d869f 100644
--- a/src/gpu/mtl/GrMtlPipelineState.h
+++ b/src/gpu/mtl/GrMtlPipelineState.h
@@ -10,6 +10,7 @@
 
 #include "include/private/GrTypesPriv.h"
 #include "src/gpu/GrStencilSettings.h"
+#include "src/gpu/GrUniformDataManager.h"
 #include "src/gpu/glsl/GrGLSLProgramBuilder.h"
 #include "src/gpu/mtl/GrMtlBuffer.h"
 #include "src/gpu/mtl/GrMtlPipeline.h"
@@ -38,6 +39,7 @@ public:
     GrMtlPipelineState(GrMtlGpu*,
                        sk_sp<GrMtlRenderPipeline> pipeline,
                        MTLPixelFormat,
+                       GrUniformDataManager::ProgramUniforms,
                        const GrGLSLBuiltinUniformHandles& builtinUniformHandles,
                        const UniformInfoArray& uniforms,
                        uint32_t uniformBufferSize,
diff --git a/src/gpu/mtl/GrMtlPipelineState.mm b/src/gpu/mtl/GrMtlPipelineState.mm
index f3ee0cfc11..601ffcbbd6 100644
--- a/src/gpu/mtl/GrMtlPipelineState.mm
+++ b/src/gpu/mtl/GrMtlPipelineState.mm
@@ -40,6 +40,7 @@ GrMtlPipelineState::GrMtlPipelineState(
         GrMtlGpu* gpu,
         sk_sp<GrMtlRenderPipeline> pipeline,
         MTLPixelFormat pixelFormat,
+        GrUniformDataManager::ProgramUniforms programUniforms,
         const GrGLSLBuiltinUniformHandles& builtinUniformHandles,
         const UniformInfoArray& uniforms,
         uint32_t uniformBufferSize,
@@ -55,12 +56,13 @@ GrMtlPipelineState::GrMtlPipelineState(
         , fGPImpl(std::move(gpImpl))
         , fXPImpl(std::move(xpImpl))
         , fFPImpls(std::move(fpImpls))
-        , fDataManager(uniforms, uniformBufferSize) {
+        , fDataManager(std::move(programUniforms), uniforms, uniformBufferSize) {
     (void) fPixelFormat; // Suppress unused-var warning.
 }
 
 void GrMtlPipelineState::setData(GrMtlFramebuffer* framebuffer,
                                  const GrProgramInfo& programInfo) {
+    fDataManager.setUniforms(programInfo);
     SkISize colorAttachmentDimensions = framebuffer->colorAttachment()->dimensions();
 
     this->setRenderTargetState(colorAttachmentDimensions, programInfo.origin());
diff --git a/src/gpu/mtl/GrMtlPipelineStateBuilder.mm b/src/gpu/mtl/GrMtlPipelineStateBuilder.mm
index 70132e0c3d..64dfda0003 100644
--- a/src/gpu/mtl/GrMtlPipelineStateBuilder.mm
+++ b/src/gpu/mtl/GrMtlPipelineStateBuilder.mm
@@ -531,6 +531,8 @@ GrMtlPipelineState* GrMtlPipelineStateBuilder::finalize(
     SkASSERT(pipelineDescriptor.vertexDescriptor);
     SkASSERT(pipelineDescriptor.colorAttachments[0]);
 
+    GrUniformDataManager::ProgramUniforms uniforms =
+            fUniformHandler.getNewProgramUniforms(fUniformAggregator);
     if (precompiledLibs) {
         SkASSERT(precompiledLibs->fVertexLibrary);
         SkASSERT(precompiledLibs->fFragmentLibrary);
@@ -724,6 +726,7 @@ GrMtlPipelineState* GrMtlPipelineStateBuilder::finalize(
     return new GrMtlPipelineState(fGpu,
                                   std::move(renderPipeline),
                                   pipelineDescriptor.colorAttachments[0].pixelFormat,
+                                  std::move(uniforms),
                                   fUniformHandles,
                                   fUniformHandler.fUniforms,
                                   bufferSize,
diff --git a/src/gpu/mtl/GrMtlPipelineStateDataManager.h b/src/gpu/mtl/GrMtlPipelineStateDataManager.h
index 4a4ddda96d..bdcde1c583 100644
--- a/src/gpu/mtl/GrMtlPipelineStateDataManager.h
+++ b/src/gpu/mtl/GrMtlPipelineStateDataManager.h
@@ -22,7 +22,8 @@ class GrMtlPipelineStateDataManager : public GrUniformDataManager {
 public:
     typedef GrMtlUniformHandler::UniformInfoArray UniformInfoArray;
 
-    GrMtlPipelineStateDataManager(const UniformInfoArray&,
+    GrMtlPipelineStateDataManager(GrUniformDataManager::ProgramUniforms,
+                                  const UniformInfoArray&,
                                   uint32_t uniformSize);
 
     void set1iv(UniformHandle, int arrayCount, const int32_t v[]) const override;
diff --git a/src/gpu/mtl/GrMtlPipelineStateDataManager.mm b/src/gpu/mtl/GrMtlPipelineStateDataManager.mm
index 47e9f10617..196503d640 100644
--- a/src/gpu/mtl/GrMtlPipelineStateDataManager.mm
+++ b/src/gpu/mtl/GrMtlPipelineStateDataManager.mm
@@ -17,9 +17,11 @@
 
 GR_NORETAIN_BEGIN
 
-GrMtlPipelineStateDataManager::GrMtlPipelineStateDataManager(const UniformInfoArray& uniforms,
-                                                             uint32_t uniformSize)
-        : INHERITED(uniforms.count(), uniformSize) {
+GrMtlPipelineStateDataManager::GrMtlPipelineStateDataManager(
+        GrUniformDataManager::ProgramUniforms programUniforms,
+        const UniformInfoArray& uniforms,
+        uint32_t uniformSize)
+        : INHERITED(std::move(programUniforms), Layout::kMetal, uniforms.count(), uniformSize) {
     // We must add uniforms in same order is the UniformInfoArray so that UniformHandles already
     // owned by other objects will still match up here.
     int i = 0;
diff --git a/src/gpu/mtl/GrMtlUniformHandler.h b/src/gpu/mtl/GrMtlUniformHandler.h
index 88e2b08c61..f17e03f8da 100644
--- a/src/gpu/mtl/GrMtlUniformHandler.h
+++ b/src/gpu/mtl/GrMtlUniformHandler.h
@@ -10,6 +10,7 @@
 
 #include "src/core/SkTBlockList.h"
 #include "src/gpu/GrShaderVar.h"
+#include "src/gpu/GrUniformDataManager.h"
 #include "src/gpu/glsl/GrGLSLUniformHandler.h"
 
 #include <vector>
@@ -53,6 +54,14 @@ public:
         return fUniforms.item(idx);
     }
 
+    /**
+     * Call after all legacy style uniforms have been added to assign offsets to new style uniforms
+     * and create the data structure needed to transfer new style uniforms to GrUniformDataManager.
+     * This must be called before appendUniformDecls() in order to ensure new style uniforms get
+     * declared. It must be called only once.
+     */
+    GrUniformDataManager::ProgramUniforms getNewProgramUniforms(const GrUniformAggregator&);
+
 private:
     explicit GrMtlUniformHandler(GrGLSLProgramBuilder* program)
         : INHERITED(program)
@@ -87,18 +96,19 @@ private:
         return fSamplers.item(handle.toIndex()).fVisibility;
     }
 
-    void appendUniformDecls(GrShaderFlags, SkString*) const override;
+    void appendUniformDecls(const GrUniformAggregator&, GrShaderFlags, SkString*) const override;
 
     const UniformInfo& getUniformInfo(UniformHandle u) const {
         return fUniforms.item(u.toIndex());
     }
 
     UniformInfoArray    fUniforms;
+    UniformInfoArray    fNewUniforms;
     UniformInfoArray    fSamplers;
     SkTArray<GrSwizzle> fSamplerSwizzles;
 
-    uint32_t            fCurrentUBOOffset;
-    uint32_t            fCurrentUBOMaxAlignment;
+    uint32_t fCurrentUBOOffset;
+    uint32_t fCurrentUBOMaxAlignment;
 
     friend class GrMtlPipelineStateBuilder;
 
diff --git a/src/gpu/mtl/GrMtlUniformHandler.mm b/src/gpu/mtl/GrMtlUniformHandler.mm
index 77b2a80379..45ac29c8b9 100644
--- a/src/gpu/mtl/GrMtlUniformHandler.mm
+++ b/src/gpu/mtl/GrMtlUniformHandler.mm
@@ -284,7 +284,43 @@ GrGLSLUniformHandler::SamplerHandle GrMtlUniformHandler::addSampler(
     return GrGLSLUniformHandler::SamplerHandle(fSamplers.count() - 1);
 }
 
-void GrMtlUniformHandler::appendUniformDecls(GrShaderFlags visibility, SkString* out) const {
+GrUniformDataManager::ProgramUniforms GrMtlUniformHandler::getNewProgramUniforms(
+        const GrUniformAggregator& aggregator) {
+    GrUniformDataManager::ProgramUniforms result;
+    result.reserve(aggregator.numProcessors());
+    for (int p = 0; p < aggregator.numProcessors(); ++p) {
+        GrUniformDataManager::ProcessorUniforms uniforms;
+        auto records = aggregator.processorRecords(p);
+        uniforms.reserve(records.size());
+        for (const GrUniformAggregator::Record& record : records) {
+            const GrProcessor::Uniform& u = record.uniform();
+            uint32_t offset = get_ubo_aligned_offset(&fCurrentUBOOffset,
+                                                     &fCurrentUBOMaxAlignment,
+                                                     u.type(),
+                                                     u.count());
+            uniforms.push_back({record.indexInProcessor, u.type(), u.count(), offset});
+
+            // Add to fNewUniforms so that these get declared.
+            MtlUniformInfo info;
+            GrShaderVar var(record.name, u.type(), u.count());
+            SkString qualifier = SkStringPrintf("offset = %d", offset);
+            var.addLayoutQualifier(qualifier.c_str());
+            info.fUBOffset   = offset;
+            info.fVariable   = var;
+            info.fVisibility = u.visibility();
+            info.fOwner      = nullptr;
+
+            fNewUniforms.emplace_back(info);
+        }
+        result.push_back(std::move(uniforms));
+    }
+
+    return result;
+}
+
+void GrMtlUniformHandler::appendUniformDecls(const GrUniformAggregator& aggregator,
+                                             GrShaderFlags visibility,
+                                             SkString* out) const {
     for (const UniformInfo& sampler : fSamplers.items()) {
         SkASSERT(sampler.fVariable.getType() == kTexture2DSampler_GrSLType);
         if (visibility == sampler.fVisibility) {
@@ -315,6 +351,16 @@ void GrMtlUniformHandler::appendUniformDecls(GrShaderFlags visibility, SkString*
         }
     }
 
+    for (const UniformInfo& localUniform : fNewUniforms.items()) {
+        // We don't check the visibility here. We want the same uniform block declaration in each
+        // shader. Note that internalAddUniform() sets both fragment and vertex visibility for all
+        // the legacy uniforms for the same reason.
+        if (GrSLTypeCanBeUniformValue(localUniform.fVariable.getType())) {
+            localUniform.fVariable.appendDecl(fProgramBuilder->shaderCaps(), &uniformsString);
+            uniformsString.append(";\n");
+        }
+    }
+
     if (!uniformsString.isEmpty()) {
         out->appendf("layout (binding=%d) uniform uniformBuffer\n{\n", kUniformBinding);
         out->appendf("%s\n};\n", uniformsString.c_str());
diff --git a/src/gpu/ops/GrOvalOpFactory.cpp b/src/gpu/ops/GrOvalOpFactory.cpp
index 5bff43b29c..ec050bb530 100644
--- a/src/gpu/ops/GrOvalOpFactory.cpp
+++ b/src/gpu/ops/GrOvalOpFactory.cpp
@@ -19,6 +19,7 @@
 #include "src/gpu/GrResourceProvider.h"
 #include "src/gpu/GrShaderCaps.h"
 #include "src/gpu/GrStyle.h"
+#include "src/gpu/GrUniformAggregator.h"
 #include "src/gpu/GrVertexWriter.h"
 #include "src/gpu/glsl/GrGLSLFragmentShaderBuilder.h"
 #include "src/gpu/glsl/GrGLSLProgramDataManager.h"
@@ -732,9 +733,7 @@ public:
 
     void addToKey(const GrShaderCaps& caps, GrProcessorKeyBuilder* b) const override {
         b->addBits(2, static_cast<uint32_t>(fStyle), "style");
-        b->addBits(ProgramImpl::kMatrixKeyBits,
-                   ProgramImpl::ComputeMatrixKey(caps, fViewMatrix),
-                   "viewMatrixType");
+        b->addBool(fViewMatrix.hasPerspective(), "matrix_perspective");
     }
 
     std::unique_ptr<ProgramImpl> makeProgramImpl(const GrShaderCaps&) const override {
@@ -742,41 +741,22 @@ public:
     }
 
 private:
-    DIEllipseGeometryProcessor(bool wideColor, bool useScale, const SkMatrix& viewMatrix,
-                               DIEllipseStyle style)
-            : INHERITED(kDIEllipseGeometryProcessor_ClassID)
-            , fViewMatrix(viewMatrix)
-            , fUseScale(useScale)
-            , fStyle(style) {
-        fInPosition = {"inPosition", kFloat2_GrVertexAttribType, kFloat2_GrSLType};
-        fInColor = MakeColorAttribute("inColor", wideColor);
-        if (useScale) {
-            fInEllipseOffsets0 = {"inEllipseOffsets0", kFloat3_GrVertexAttribType,
-                                  kFloat3_GrSLType};
-        } else {
-            fInEllipseOffsets0 = {"inEllipseOffsets0", kFloat2_GrVertexAttribType,
-                                  kFloat2_GrSLType};
-        }
-        fInEllipseOffsets1 = {"inEllipseOffsets1", kFloat2_GrVertexAttribType, kFloat2_GrSLType};
-        this->setVertexAttributes(&fInPosition, 4);
-    }
+    DIEllipseGeometryProcessor(bool wideColor,
+                               bool useScale,
+                               const SkMatrix& viewMatrix,
+                               DIEllipseStyle style);
 
     class Impl : public ProgramImpl {
     public:
-        void setData(const GrGLSLProgramDataManager& pdman,
-                     const GrShaderCaps& shaderCaps,
-                     const GrGeometryProcessor& geomProc) override {
-            const auto& diegp = geomProc.cast<DIEllipseGeometryProcessor>();
-
-            SetTransform(pdman, shaderCaps, fViewMatrixUniform, diegp.fViewMatrix, &fViewMatrix);
-        }
+        void setData(const GrGLSLProgramDataManager&,
+                     const GrShaderCaps&,
+                     const GrGeometryProcessor&) override {}
 
     private:
         void onEmitCode(EmitArgs& args, GrGPArgs* gpArgs) override {
             const auto& diegp = args.fGeomProc.cast<DIEllipseGeometryProcessor>();
             GrGLSLVertexBuilder* vertBuilder = args.fVertBuilder;
             GrGLSLVaryingHandler* varyingHandler = args.fVaryingHandler;
-            GrGLSLUniformHandler* uniformHandler = args.fUniformHandler;
 
             // emit attributes
             varyingHandler->emitAttributes(diegp);
@@ -795,14 +775,27 @@ private:
             varyingHandler->addPassThroughAttribute(diegp.fInColor.asShaderVar(),
                                                     args.fOutputColor);
 
-            // Setup position
-            WriteOutputPosition(vertBuilder,
-                                uniformHandler,
-                                *args.fShaderCaps,
-                                gpArgs,
-                                diegp.fInPosition.name(),
-                                diegp.fViewMatrix,
-                                &fViewMatrixUniform);
+            const char* vm = args.fUniforms.getUniformName(0, "viewMatrix");
+
+            auto posType = diegp.fViewMatrix.hasPerspective() ? kFloat3_GrSLType : kFloat2_GrSLType;
+            gpArgs->fPositionVar = GrShaderVar("outpos", posType);
+            vertBuilder->declAppend(gpArgs->fPositionVar);
+            if (posType == kFloat3_GrSLType) {
+                vertBuilder->codeAppendf("%s = %s * %s.xy1;\n",
+                                         gpArgs->fPositionVar.c_str(),
+                                         vm,
+                                         diegp.fInPosition.name());
+            } else if (args.fShaderCaps->nonsquareMatrixSupport()) {
+                vertBuilder->codeAppendf("%s = float3x2(%s) * %s.xy1;\n",
+                                         gpArgs->fPositionVar.c_str(),
+                                         vm,
+                                         diegp.fInPosition.name());
+            } else {
+                vertBuilder->codeAppendf("%s = (%s * %s.xy1).xy;\n",
+                                         gpArgs->fPositionVar.c_str(),
+                                         vm,
+                                         diegp.fInPosition.name());
+            }
             gpArgs->fLocalCoordVar = diegp.fInPosition.asShaderVar();
 
             // for outer curve
@@ -863,9 +856,6 @@ private:
 
             fragBuilder->codeAppendf("half4 %s = half4(half(edgeAlpha));", args.fOutputCoverage);
         }
-
-        SkMatrix fViewMatrix = SkMatrix::InvalidMatrix();
-        UniformHandle fViewMatrixUniform;
     };
 
     Attribute fInPosition;
@@ -884,6 +874,32 @@ private:
 
 GR_DEFINE_GEOMETRY_PROCESSOR_TEST(DIEllipseGeometryProcessor);
 
+DIEllipseGeometryProcessor::DIEllipseGeometryProcessor(bool wideColor,
+                                                       bool useScale,
+                                                       const SkMatrix& viewMatrix,
+                                                       DIEllipseStyle style)
+        : INHERITED(kDIEllipseGeometryProcessor_ClassID)
+        , fViewMatrix(viewMatrix)
+        , fUseScale(useScale)
+        , fStyle(style) {
+    fInPosition = {"inPosition", kFloat2_GrVertexAttribType, kFloat2_GrSLType};
+    fInColor = MakeColorAttribute("inColor", wideColor);
+    if (useScale) {
+        fInEllipseOffsets0 = {"inEllipseOffsets0", kFloat3_GrVertexAttribType, kFloat3_GrSLType};
+    } else {
+        fInEllipseOffsets0 = {"inEllipseOffsets0", kFloat2_GrVertexAttribType, kFloat2_GrSLType};
+    }
+    fInEllipseOffsets1 = {"inEllipseOffsets1", kFloat2_GrVertexAttribType, kFloat2_GrSLType};
+    this->setVertexAttributes(&fInPosition, 4);
+GR_BEGIN_UNIFORM_DEFINITIONS
+    static constexpr Uniform kViewMatrixU{kFloat3x3_GrSLType,
+                                          offsetof(DIEllipseGeometryProcessor, fViewMatrix),
+                                          kVertex_GrShaderFlag,
+                                          Uniform::CType::kSkMatrix};
+GR_END_UNIFORM_DEFINITIONS
+    this->setUniforms(SkMakeSpan(&kViewMatrixU, 1));
+}
+
 #if GR_TEST_UTILS
 GrGeometryProcessor* DIEllipseGeometryProcessor::TestCreate(GrProcessorTestData* d) {
     bool wideColor = d->fRandom->nextBool();
diff --git a/src/gpu/vk/GrVkPipelineState.cpp b/src/gpu/vk/GrVkPipelineState.cpp
index 73d00437ca..e7269e5d18 100644
--- a/src/gpu/vk/GrVkPipelineState.cpp
+++ b/src/gpu/vk/GrVkPipelineState.cpp
@@ -31,6 +31,7 @@ GrVkPipelineState::GrVkPipelineState(
         GrVkGpu* gpu,
         sk_sp<const GrVkPipeline> pipeline,
         const GrVkDescriptorSetManager::Handle& samplerDSHandle,
+        GrUniformDataManager::ProgramUniforms programUniforms,
         const GrGLSLBuiltinUniformHandles& builtinUniformHandles,
         const UniformInfoArray& uniforms,
         uint32_t uniformSize,
@@ -45,7 +46,7 @@ GrVkPipelineState::GrVkPipelineState(
         , fGPImpl(std::move(gpImpl))
         , fXPImpl(std::move(xpImpl))
         , fFPImpls(std::move(fpImpls))
-        , fDataManager(uniforms, uniformSize, usePushConstants) {
+        , fDataManager(std::move(programUniforms), uniforms, uniformSize, usePushConstants) {
     fNumSamplers = samplers.count();
     for (const auto& sampler : samplers.items()) {
         // We store the immutable samplers here and take a ref on the sampler. Once we switch to
@@ -77,6 +78,8 @@ bool GrVkPipelineState::setAndBindUniforms(GrVkGpu* gpu,
                                            SkISize colorAttachmentDimensions,
                                            const GrProgramInfo& programInfo,
                                            GrVkCommandBuffer* commandBuffer) {
+    fDataManager.setUniforms(programInfo);
+
     this->setRenderTargetState(colorAttachmentDimensions, programInfo.origin());
 
     fGPImpl->setData(fDataManager, *gpu->caps()->shaderCaps(), programInfo.geomProc());
diff --git a/src/gpu/vk/GrVkPipelineState.h b/src/gpu/vk/GrVkPipelineState.h
index 3a70512259..25862d00b2 100644
--- a/src/gpu/vk/GrVkPipelineState.h
+++ b/src/gpu/vk/GrVkPipelineState.h
@@ -42,6 +42,7 @@ public:
     GrVkPipelineState(GrVkGpu*,
                       sk_sp<const GrVkPipeline>,
                       const GrVkDescriptorSetManager::Handle& samplerDSHandle,
+                      GrUniformDataManager::ProgramUniforms,
                       const GrGLSLBuiltinUniformHandles& builtinUniformHandles,
                       const UniformInfoArray& uniforms,
                       uint32_t uniformSize,
diff --git a/src/gpu/vk/GrVkPipelineStateBuilder.cpp b/src/gpu/vk/GrVkPipelineStateBuilder.cpp
index 51c8a47f4c..bc7dbbcd6a 100644
--- a/src/gpu/vk/GrVkPipelineStateBuilder.cpp
+++ b/src/gpu/vk/GrVkPipelineStateBuilder.cpp
@@ -182,6 +182,8 @@ GrVkPipelineState* GrVkPipelineStateBuilder::finalize(const GrProgramDesc& desc,
 
     dsLayout[GrVkUniformHandler::kInputDescSet] = resourceProvider.getInputDSLayout();
 
+    GrUniformDataManager::ProgramUniforms uniforms =
+            fUniformHandler.getNewProgramUniforms(fUniformAggregator);
     this->finalizeShaders();
 
     bool usePushConstants = fUniformHandler.usePushConstants();
@@ -344,6 +346,7 @@ GrVkPipelineState* GrVkPipelineStateBuilder::finalize(const GrProgramDesc& desc,
     return new GrVkPipelineState(fGpu,
                                  std::move(pipeline),
                                  samplerDSHandle,
+                                 std::move(uniforms),
                                  fUniformHandles,
                                  fUniformHandler.fUniforms,
                                  fUniformHandler.currentOffset(),
diff --git a/src/gpu/vk/GrVkPipelineStateDataManager.cpp b/src/gpu/vk/GrVkPipelineStateDataManager.cpp
index c230113287..3d7f170c76 100644
--- a/src/gpu/vk/GrVkPipelineStateDataManager.cpp
+++ b/src/gpu/vk/GrVkPipelineStateDataManager.cpp
@@ -14,16 +14,27 @@
 #include "src/gpu/vk/GrVkCommandBuffer.h"
 #include "src/gpu/vk/GrVkGpu.h"
 
-GrVkPipelineStateDataManager::GrVkPipelineStateDataManager(const UniformInfoArray& uniforms,
-                                                           uint32_t uniformSize,
-                                                           bool usePushConstants)
-    : INHERITED(uniforms.count(), uniformSize)
-    , fUsePushConstants(usePushConstants) {
+static GrUniformDataManager::Layout get_layout(bool usePushConstants) {
+    return usePushConstants ? GrUniformDataManager::Layout::kStd430
+                            : GrUniformDataManager::Layout::kStd140;
+}
+
+GrVkPipelineStateDataManager::GrVkPipelineStateDataManager(
+        GrUniformDataManager::ProgramUniforms programUniforms,
+        const UniformInfoArray& uniforms,
+        uint32_t uniformSize,
+        bool usePushConstants)
+        : INHERITED(std::move(programUniforms),
+                    get_layout(usePushConstants),
+                    uniforms.count(),
+                    uniformSize)
+        , fUsePushConstants(usePushConstants) {
     // We must add uniforms in same order as the UniformInfoArray so that UniformHandles already
     // owned by other objects will still match up here.
     int i = 0;
     GrVkUniformHandler::Layout memLayout = usePushConstants ? GrVkUniformHandler::kStd430Layout
                                                             : GrVkUniformHandler::kStd140Layout;
+
     for (const auto& uniformInfo : uniforms.items()) {
         Uniform& uniform = fUniforms[i];
         SkASSERT(GrShaderVar::kNonArray == uniformInfo.fVariable.getArrayCount() ||
diff --git a/src/gpu/vk/GrVkPipelineStateDataManager.h b/src/gpu/vk/GrVkPipelineStateDataManager.h
index cf4451ef68..416d55ba75 100644
--- a/src/gpu/vk/GrVkPipelineStateDataManager.h
+++ b/src/gpu/vk/GrVkPipelineStateDataManager.h
@@ -8,9 +8,8 @@
 #ifndef GrVkPipelineStateDataManager_DEFINED
 #define GrVkPipelineStateDataManager_DEFINED
 
-#include "src/gpu/GrUniformDataManager.h"
-
 #include "include/gpu/vk/GrVkTypes.h"
+#include "src/gpu/GrUniformDataManager.h"
 #include "src/gpu/vk/GrVkUniformHandler.h"
 
 class GrGpuBuffer;
@@ -21,7 +20,9 @@ class GrVkPipelineStateDataManager : public GrUniformDataManager {
 public:
     typedef GrVkUniformHandler::UniformInfoArray UniformInfoArray;
 
-    GrVkPipelineStateDataManager(const UniformInfoArray&, uint32_t uniformSize,
+    GrVkPipelineStateDataManager(ProgramUniforms programUniforms,
+                                 const UniformInfoArray&,
+                                 uint32_t uniformSize,
                                  bool usePushConstants);
 
     // Returns the uniform buffer that holds all the uniform data. If there are no uniforms it
diff --git a/src/gpu/vk/GrVkUniformHandler.cpp b/src/gpu/vk/GrVkUniformHandler.cpp
index a0a5fd4978..88f9462fbb 100644
--- a/src/gpu/vk/GrVkUniformHandler.cpp
+++ b/src/gpu/vk/GrVkUniformHandler.cpp
@@ -324,7 +324,57 @@ GrGLSLUniformHandler::SamplerHandle GrVkUniformHandler::addInputSampler(const Gr
     return GrGLSLUniformHandler::SamplerHandle(0);
 }
 
-void GrVkUniformHandler::appendUniformDecls(GrShaderFlags visibility, SkString* out) const {
+GrUniformDataManager::ProgramUniforms GrVkUniformHandler::getNewProgramUniforms(
+        const GrUniformAggregator& aggregator) {
+    // First get all the offsets for both layouts, decide which layout will be used, and then
+    // build the result based on the decision.
+    std::vector<uint32_t> offsets[kLayoutCount];
+    for (int i = 0; i < kLayoutCount; ++i) {
+        offsets[i].reserve(aggregator.uniformCount());
+    }
+    for (const auto& record : aggregator.records()) {
+        const GrProcessor::Uniform& u = record.uniform();
+        for (int l = 0; l < kLayoutCount; ++l) {
+            offsets[l].push_back(get_aligned_offset(&fCurrentOffsets[l], u.type(), u.count(), l));
+        }
+    }
+
+    // At this point we determine whether we'll be using push constants based on the
+    // uniforms set so far. Later checks will use the internal bool we set here to
+    // keep things consistent.
+    this->determineIfUsePushConstants();
+
+    auto chosenOffsets = fUsePushConstants ? offsets[kStd430Layout] : offsets[kStd140Layout];
+    int idx = 0;
+    GrUniformDataManager::ProgramUniforms result;
+    result.reserve(aggregator.numProcessors());
+    for (int p = 0; p < aggregator.numProcessors(); ++p) {
+        GrUniformDataManager::ProcessorUniforms uniforms;
+        auto records = aggregator.processorRecords(p);
+        uniforms.reserve(records.size());
+        for (const GrUniformAggregator::Record& record : records) {
+            const GrProcessor::Uniform& u = record.uniform();
+            uint32_t offset = chosenOffsets[idx];
+            uniforms.push_back({record.indexInProcessor, u.type(), u.count(), offset});
+
+            // Add to fNewUniforms so that these get declared.
+            UniformInfo& info = fNewUniforms.push_back();
+            GrShaderVar var(record.name, u.type(), u.count());
+            SkString qualifier = SkStringPrintf("offset = %d", offset);
+            var.addLayoutQualifier(qualifier.c_str());
+            info.fVariable   = var;
+            info.fVisibility = u.visibility();
+            info.fOwner      = nullptr;
+            ++idx;
+        }
+        result.push_back(std::move(uniforms));
+    }
+    return result;
+}
+
+void GrVkUniformHandler::appendUniformDecls(const GrUniformAggregator& aggregator,
+                                            GrShaderFlags visibility,
+                                            SkString* out) const {
     for (const VkUniformInfo& sampler : fSamplers.items()) {
         SkASSERT(sampler.fVariable.getType() == kTexture2DSampler_GrSLType ||
                  sampler.fVariable.getType() == kTextureExternalSampler_GrSLType);
@@ -347,17 +397,22 @@ void GrVkUniformHandler::appendUniformDecls(GrShaderFlags visibility, SkString*
         if (!firstOffsetCheck) {
             // Check to make sure we are starting our offset at 0 so the offset qualifier we
             // set on each variable in the uniform block is valid.
-            SkASSERT(0 == localUniform.fOffsets[kStd140Layout] &&
-                     0 == localUniform.fOffsets[kStd430Layout]);
+            SkASSERT(localUniform.fOffsets[kStd140Layout] == 0 &&
+                     localUniform.fOffsets[kStd430Layout] == 0);
+            firstOffsetCheck = true;
+        }
+    }
+    for (const VkUniformInfo& localUniform : fNewUniforms.items()) {
+        if (!firstOffsetCheck) {
+            // Check to make sure we are starting our offset at 0 so the offset qualifier we
+            // set on each variable in the uniform block is valid.
+            SkASSERT(localUniform.fOffsets[kStd140Layout] == 0 &&
+                     localUniform.fOffsets[kStd430Layout] == 0);
             firstOffsetCheck = true;
         }
     }
 #endif
 
-    // At this point we determine whether we'll be using push constants based on the
-    // uniforms set so far. Later checks will use the internal bool we set here to
-    // keep things consistent.
-    this->determineIfUsePushConstants();
     SkString uniformsString;
     for (const VkUniformInfo& localUniform : fUniforms.items()) {
         if (visibility & localUniform.fVisibility) {
@@ -369,7 +424,15 @@ void GrVkUniformHandler::appendUniformDecls(GrShaderFlags visibility, SkString*
             }
         }
     }
-
+    SkASSERT(fNewUniforms.count() == aggregator.uniformCount());
+    for (const VkUniformInfo& localUniform : fNewUniforms.items()) {
+        if (visibility & localUniform.fVisibility) {
+            if (GrSLTypeCanBeUniformValue(localUniform.fVariable.getType())) {
+                localUniform.fVariable.appendDecl(fProgramBuilder->shaderCaps(), &uniformsString);
+                uniformsString.append(";\n");
+            }
+        }
+    }
     if (!uniformsString.isEmpty()) {
         if (fUsePushConstants) {
             out->append("layout (push_constant) ");
@@ -388,7 +451,7 @@ uint32_t GrVkUniformHandler::getRTFlipOffset() const {
     return get_aligned_offset(&currentOffset, kFloat2_GrSLType, 0, layout);
 }
 
-void GrVkUniformHandler::determineIfUsePushConstants() const {
+void GrVkUniformHandler::determineIfUsePushConstants() {
     // We may insert a uniform for flipping origin-sensitive language features (e.g. sk_FragCoord).
     // We won't know that for sure until then but we need to make this determination now,
     // so assume we will need it.
diff --git a/src/gpu/vk/GrVkUniformHandler.h b/src/gpu/vk/GrVkUniformHandler.h
index 3328780d0e..284dfb3fee 100644
--- a/src/gpu/vk/GrVkUniformHandler.h
+++ b/src/gpu/vk/GrVkUniformHandler.h
@@ -12,6 +12,7 @@
 #include "src/core/SkTBlockList.h"
 #include "src/gpu/GrSamplerState.h"
 #include "src/gpu/GrShaderVar.h"
+#include "src/gpu/GrUniformDataManager.h"
 #include "src/gpu/glsl/GrGLSLProgramBuilder.h"
 #include "src/gpu/glsl/GrGLSLUniformHandler.h"
 #include "src/gpu/vk/GrVkSampler.h"
@@ -93,6 +94,14 @@ public:
         return fUsePushConstants ? fCurrentOffsets[kStd430Layout] : fCurrentOffsets[kStd140Layout];
     }
 
+    /**
+     * Call after all legacy style uniforms have been added to assign offsets to new style uniforms
+     * and create the data structure needed to transfer new style uniforms to GrUniformDataManager.
+     * This must be called before appendUniformDecls() in order to ensure new style uniforms get
+     * declared. It must be called only once.
+     */
+    GrUniformDataManager::ProgramUniforms getNewProgramUniforms(const GrUniformAggregator&);
+
 private:
     explicit GrVkUniformHandler(GrGLSLProgramBuilder* program)
         : INHERITED(program)
@@ -145,20 +154,21 @@ private:
         return fInputSwizzle;
     }
 
-    void appendUniformDecls(GrShaderFlags, SkString*) const override;
+    void appendUniformDecls(const GrUniformAggregator&, GrShaderFlags, SkString*) const override;
 
     const VkUniformInfo& getUniformInfo(UniformHandle u) const {
         return fUniforms.item(u.toIndex());
     }
 
-    void determineIfUsePushConstants() const;
+    void determineIfUsePushConstants();
 
     UniformInfoArray    fUniforms;
+    UniformInfoArray    fNewUniforms;
     UniformInfoArray    fSamplers;
     SkTArray<GrSwizzle> fSamplerSwizzles;
     UniformInfo         fInputUniform;
     GrSwizzle           fInputSwizzle;
-    mutable bool        fUsePushConstants;
+    bool                fUsePushConstants;
 
     uint32_t            fCurrentOffsets[kLayoutCount];