Add resource provider flag to avoid client-side buffers

On some platforms, a newly-created buffer was liable to be CPU backed. This would break code that expected a VBO (aka instanced rendering). This change adds an optional flag to GrResourceProvider that requires a buffer to be created in GPU memory. It also moves the CPU backing logic into Gr land in order to properly cache real VBOs on platforms that prefer client-side buffers. BUG=skia: GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=2143333002 Review-Url: https://codereview.chromium.org/2143333002
2016-07-13 10:16:32 -07:00 · 2016-07-13 10:16:32 -07:00 · 485a12003a
commit 485a12003a
parent 6c3ada96ab
22 changed files with 212 additions and 161 deletions
--- a/gyp/gpu.gypi
+++ b/gyp/gpu.gypi
@ -78,6 +78,7 @@
      '<(skia_src_path)/gpu/GrBlend.cpp',
      '<(skia_src_path)/gpu/GrBlurUtils.cpp',
      '<(skia_src_path)/gpu/GrBlurUtils.h',
+      '<(skia_src_path)/gpu/GrBuffer.cpp',
      '<(skia_src_path)/gpu/GrBufferAllocPool.cpp',
      '<(skia_src_path)/gpu/GrBufferAllocPool.h',
      '<(skia_src_path)/gpu/GrCaps.cpp',
--- a/include/gpu/GrBuffer.h
+++ b/include/gpu/GrBuffer.h
@ -15,30 +15,26 @@ class GrGpu;
 class GrBuffer : public GrGpuResource {
 public:
    /**
-     * Computes a scratch key for a buffer with a "dynamic" access pattern. (Buffers with "static"
-     * and "stream" access patterns are disqualified by nature from being cached and reused.)
+     * Creates a client-side buffer.
     */
-    static void ComputeScratchKeyForDynamicBuffer(size_t size, GrBufferType intendedType,
-                                                  GrScratchKey* key) {
-        static const GrScratchKey::ResourceType kType = GrScratchKey::GenerateResourceType();
-        GrScratchKey::Builder builder(key, kType, 1 + (sizeof(size_t) + 3) / 4);
-        // TODO: There's not always reason to cache a buffer by type. In some (all?) APIs it's just
-        // a chunk of memory we can use/reuse for any type of data. We really only need to
-        // differentiate between the "read" types (e.g. kGpuToCpu_BufferType) and "draw" types.
-        builder[0] = intendedType;
-        builder[1] = (uint32_t)size;
-        if (sizeof(size_t) > 4) {
-            builder[2] = (uint32_t)((uint64_t)size >> 32);
-        }
-    }
+    static SK_WARN_UNUSED_RESULT GrBuffer* CreateCPUBacked(GrGpu*, size_t sizeInBytes, GrBufferType,
+                                                           const void* data = nullptr);
+
+    /**
+     * Computes a scratch key for a GPU-side buffer with a "dynamic" access pattern. (Buffers with
+     * "static" and "stream" patterns are disqualified by nature from being cached and reused.)
+     */
+    static void ComputeScratchKeyForDynamicVBO(size_t size, GrBufferType, GrScratchKey*);

    GrAccessPattern accessPattern() const { return fAccessPattern; }
+    size_t sizeInBytes() const { return fSizeInBytes; }

    /**
     * Returns true if the buffer is a wrapper around a CPU array. If true it
     * indicates that map will always succeed and will be free.
     */
-    bool isCPUBacked() const { return fCPUBacked; }
+    bool isCPUBacked() const { return SkToBool(fCPUData); }
+    size_t baseOffset() const { return reinterpret_cast<size_t>(fCPUData); }

    /**
     * Maps the buffer to be written by the CPU.
@ -103,40 +99,37 @@ public:
     */
    bool updateData(const void* src, size_t srcSizeInBytes) {
        SkASSERT(!this->isMapped());
-        SkASSERT(srcSizeInBytes <= fGpuMemorySize);
+        SkASSERT(srcSizeInBytes <= fSizeInBytes);
        return this->onUpdateData(src, srcSizeInBytes);
    }

-protected:
-    GrBuffer(GrGpu* gpu, size_t gpuMemorySize, GrBufferType intendedType,
-             GrAccessPattern accessPattern, bool cpuBacked)
-        : INHERITED(gpu),
-          fMapPtr(nullptr),
-          fGpuMemorySize(gpuMemorySize), // TODO: Zero for cpu backed buffers?
-          fAccessPattern(accessPattern),
-          fCPUBacked(cpuBacked),
-          fIntendedType(intendedType) {
+    ~GrBuffer() override {
+        sk_free(fCPUData);
    }

-    void computeScratchKey(GrScratchKey* key) const override {
-        if (!fCPUBacked && SkIsPow2(fGpuMemorySize) && kDynamic_GrAccessPattern == fAccessPattern) {
-            ComputeScratchKeyForDynamicBuffer(fGpuMemorySize, fIntendedType, key);
-        }
-    }
+protected:
+    GrBuffer(GrGpu*, size_t sizeInBytes, GrBufferType, GrAccessPattern);

    void* fMapPtr;

 private:
-    size_t onGpuMemorySize() const override { return fGpuMemorySize; }
+    /**
+     * Internal constructor to make a CPU-backed buffer.
+     */
+    GrBuffer(GrGpu*, size_t sizeInBytes, GrBufferType, void* cpuData);

-    virtual void onMap() = 0;
-    virtual void onUnmap() = 0;
-    virtual bool onUpdateData(const void* src, size_t srcSizeInBytes) = 0;
+    virtual void onMap() { SkASSERT(this->isCPUBacked()); fMapPtr = fCPUData; }
+    virtual void onUnmap() { SkASSERT(this->isCPUBacked()); }
+    virtual bool onUpdateData(const void* src, size_t srcSizeInBytes);

-    size_t            fGpuMemorySize;
+    size_t onGpuMemorySize() const override { return fSizeInBytes; } // TODO: zero for cpu backed?
+    void computeScratchKey(GrScratchKey* key) const override;
+
+    size_t            fSizeInBytes;
    GrAccessPattern   fAccessPattern;
-    bool              fCPUBacked;
+    void*             fCPUData;
    GrBufferType      fIntendedType;
+
    typedef GrGpuResource INHERITED;
 };

--- a/include/gpu/GrCaps.h
+++ b/include/gpu/GrCaps.h
@ -160,6 +160,7 @@ public:
    bool sampleLocationsSupport() const { return fSampleLocationsSupport; }
    bool multisampleDisableSupport() const { return fMultisampleDisableSupport; }
    bool usesMixedSamples() const { return fUsesMixedSamples; }
+    bool preferClientSideDynamicBuffers() const { return fPreferClientSideDynamicBuffers; }

    bool useDrawInsteadOfClear() const { return fUseDrawInsteadOfClear; }
    bool useDrawInsteadOfPartialRenderTargetWrite() const {
@ -297,6 +298,7 @@ protected:
    bool fSampleLocationsSupport                     : 1;
    bool fMultisampleDisableSupport                  : 1;
    bool fUsesMixedSamples                           : 1;
+    bool fPreferClientSideDynamicBuffers             : 1;
    bool fSupportsInstancedDraws                     : 1;
    bool fFullClearIsFree                            : 1;
    bool fMustClearUploadedBufferData                : 1;
--- a/src/gpu/GrBuffer.cpp
+++ b/src/gpu/GrBuffer.cpp
@ -0,0 +1,72 @@
+/*
+ * Copyright 2016 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "GrBuffer.h"
+#include "GrGpu.h"
+#include "GrCaps.h"
+
+GrBuffer* GrBuffer::CreateCPUBacked(GrGpu* gpu, size_t sizeInBytes, GrBufferType intendedType,
+                                    const void* data) {
+    SkASSERT(GrBufferTypeIsVertexOrIndex(intendedType));
+    void* cpuData;
+    if (gpu->caps()->mustClearUploadedBufferData()) {
+        cpuData = sk_calloc_throw(sizeInBytes);
+    } else {
+        cpuData = sk_malloc_flags(sizeInBytes, SK_MALLOC_THROW);
+    }
+    if (data) {
+        memcpy(cpuData, data, sizeInBytes);
+    }
+    return new GrBuffer(gpu, sizeInBytes, intendedType, cpuData);
+}
+
+GrBuffer::GrBuffer(GrGpu* gpu, size_t sizeInBytes, GrBufferType type, void* cpuData)
+    : INHERITED(gpu),
+      fMapPtr(nullptr),
+      fSizeInBytes(sizeInBytes),
+      fAccessPattern(kDynamic_GrAccessPattern),
+      fCPUData(cpuData),
+      fIntendedType(type) {
+    this->registerWithCache(SkBudgeted::kNo);
+}
+
+GrBuffer::GrBuffer(GrGpu* gpu, size_t sizeInBytes, GrBufferType type, GrAccessPattern pattern)
+    : INHERITED(gpu),
+      fMapPtr(nullptr),
+      fSizeInBytes(sizeInBytes),
+      fAccessPattern(pattern),
+      fCPUData(nullptr),
+      fIntendedType(type) {
+    // Subclass registers with cache.
+}
+
+void GrBuffer::ComputeScratchKeyForDynamicVBO(size_t size, GrBufferType intendedType,
+                                              GrScratchKey* key) {
+    static const GrScratchKey::ResourceType kType = GrScratchKey::GenerateResourceType();
+    GrScratchKey::Builder builder(key, kType, 1 + (sizeof(size_t) + 3) / 4);
+    // TODO: There's not always reason to cache a buffer by type. In some (all?) APIs it's just
+    // a chunk of memory we can use/reuse for any type of data. We really only need to
+    // differentiate between the "read" types (e.g. kGpuToCpu_BufferType) and "draw" types.
+    builder[0] = intendedType;
+    builder[1] = (uint32_t)size;
+    if (sizeof(size_t) > 4) {
+        builder[2] = (uint32_t)((uint64_t)size >> 32);
+    }
+}
+
+bool GrBuffer::onUpdateData(const void* src, size_t srcSizeInBytes) {
+    SkASSERT(this->isCPUBacked());
+    memcpy(fCPUData, src, srcSizeInBytes);
+    return true;
+}
+
+void GrBuffer::computeScratchKey(GrScratchKey* key) const {
+    if (!this->isCPUBacked() && SkIsPow2(fSizeInBytes) &&
+        kDynamic_GrAccessPattern == fAccessPattern) {
+        ComputeScratchKeyForDynamicVBO(fSizeInBytes, fIntendedType, key);
+    }
+}
--- a/src/gpu/GrCaps.cpp
+++ b/src/gpu/GrCaps.cpp
@ -98,6 +98,7 @@ GrCaps::GrCaps(const GrContextOptions& options) {
    fSampleLocationsSupport = false;
    fMultisampleDisableSupport = false;
    fUsesMixedSamples = false;
+    fPreferClientSideDynamicBuffers = false;
    fSupportsInstancedDraws = false;
    fFullClearIsFree = false;
    fMustClearUploadedBufferData = false;
@ -176,6 +177,7 @@ SkString GrCaps::dump() const {
    r.appendf("Sample Locations Support           : %s\n", gNY[fSampleLocationsSupport]);
    r.appendf("Multisample disable support        : %s\n", gNY[fMultisampleDisableSupport]);
    r.appendf("Uses Mixed Samples                 : %s\n", gNY[fUsesMixedSamples]);
+    r.appendf("Prefer client-side dynamic buffers : %s\n", gNY[fPreferClientSideDynamicBuffers]);
    r.appendf("Supports instanced draws           : %s\n", gNY[fSupportsInstancedDraws]);
    r.appendf("Full screen clear is free          : %s\n", gNY[fFullClearIsFree]);
    r.appendf("Must clear buffer memory           : %s\n", gNY[fMustClearUploadedBufferData]);
--- a/src/gpu/GrGpu.h
+++ b/src/gpu/GrGpu.h
@ -137,7 +137,7 @@ public:
    GrRenderTarget* wrapBackendTextureAsRenderTarget(const GrBackendTextureDesc&);

    /**
-     * Creates a buffer.
+     * Creates a buffer in GPU memory. For a client-side buffer use GrBuffer::CreateCPUBacked.
     *
     * @param size            size of buffer to create.
     * @param intendedType    hint to the graphics subsystem about what the buffer will be used for.
--- a/src/gpu/GrResourceProvider.cpp
+++ b/src/gpu/GrResourceProvider.cpp
@ -100,13 +100,19 @@ GrBuffer* GrResourceProvider::createBuffer(size_t size, GrBufferType intendedTyp
    if (kDynamic_GrAccessPattern != accessPattern) {
        return this->gpu()->createBuffer(size, intendedType, accessPattern, data);
    }
+    if (!(flags & kRequireGpuMemory_Flag) &&
+        this->gpu()->caps()->preferClientSideDynamicBuffers() &&
+        GrBufferTypeIsVertexOrIndex(intendedType) &&
+        kDynamic_GrAccessPattern == accessPattern) {
+        return GrBuffer::CreateCPUBacked(this->gpu(), size, intendedType, data);
+    }

    // bin by pow2 with a reasonable min
    static const uint32_t MIN_SIZE = 1 << 12;
    size_t allocSize = SkTMax(MIN_SIZE, GrNextPow2(SkToUInt(size)));

    GrScratchKey key;
-    GrBuffer::ComputeScratchKeyForDynamicBuffer(allocSize, intendedType, &key);
+    GrBuffer::ComputeScratchKeyForDynamicVBO(allocSize, intendedType, &key);
    uint32_t scratchFlags = 0;
    if (flags & kNoPendingIO_Flag) {
        scratchFlags = GrResourceCache::kRequireNoPendingIO_ScratchFlag;
@ -124,6 +130,7 @@ GrBuffer* GrResourceProvider::createBuffer(size_t size, GrBufferType intendedTyp
    if (data) {
        buffer->updateData(data, size);
    }
+    SkASSERT(!buffer->isCPUBacked()); // We should only cache real VBOs.
    return buffer;
 }

--- a/src/gpu/GrResourceProvider.h
+++ b/src/gpu/GrResourceProvider.h
@ -100,7 +100,12 @@ public:
         *  will occur out of order WRT the operations being flushed.
         *  Make this automatic: https://bug.skia.org/4156
         */
-        kNoPendingIO_Flag = kNoPendingIO_ScratchTextureFlag,
+        kNoPendingIO_Flag = 0x1,
+
+        /** Normally the caps may indicate a preference for client-side buffers. Set this flag when
+         *  creating a buffer to guarantee it resides in GPU memory.
+         */
+        kRequireGpuMemory_Flag = 0x2,
    };

    /**
--- a/src/gpu/gl/GrGLBuffer.cpp
+++ b/src/gpu/gl/GrGLBuffer.cpp
@ -30,12 +30,8 @@

 GrGLBuffer* GrGLBuffer::Create(GrGLGpu* gpu, size_t size, GrBufferType intendedType,
                               GrAccessPattern accessPattern, const void* data) {
-    bool cpuBacked = gpu->glCaps().useNonVBOVertexAndIndexDynamicData() &&
-                     GrBufferTypeIsVertexOrIndex(intendedType) &&
-                     kDynamic_GrAccessPattern == accessPattern;
-    SkAutoTUnref<GrGLBuffer> buffer(new GrGLBuffer(gpu, size, intendedType, accessPattern,
-                                                   cpuBacked, data));
-    if (!cpuBacked && 0 == buffer->bufferID()) {
+    SkAutoTUnref<GrGLBuffer> buffer(new GrGLBuffer(gpu, size, intendedType, accessPattern, data));
+    if (0 == buffer->bufferID()) {
        return nullptr;
    }
    return buffer.release();
@ -89,42 +85,27 @@ inline static GrGLenum gr_to_gl_access_pattern(GrBufferType bufferType,
 }

 GrGLBuffer::GrGLBuffer(GrGLGpu* gpu, size_t size, GrBufferType intendedType,
-                       GrAccessPattern accessPattern, bool cpuBacked, const void* data)
-    : INHERITED(gpu, size, intendedType, accessPattern, cpuBacked),
-      fCPUData(nullptr),
+                       GrAccessPattern accessPattern, const void* data)
+    : INHERITED(gpu, size, intendedType, accessPattern),
      fIntendedType(intendedType),
      fBufferID(0),
-      fSizeInBytes(size),
      fUsage(gr_to_gl_access_pattern(intendedType, accessPattern)),
      fGLSizeInBytes(0),
      fHasAttachedToTexture(false) {
-    if (this->isCPUBacked()) {
-        // Core profile uses vertex array objects, which disallow client side arrays.
-        SkASSERT(!gpu->glCaps().isCoreProfile());
-        if (gpu->caps()->mustClearUploadedBufferData()) {
-            fCPUData = sk_calloc_throw(fSizeInBytes);
+    GL_CALL(GenBuffers(1, &fBufferID));
+    if (fBufferID) {
+        GrGLenum target = gpu->bindBuffer(fIntendedType, this);
+        CLEAR_ERROR_BEFORE_ALLOC(gpu->glInterface());
+        // make sure driver can allocate memory for this buffer
+        GL_ALLOC_CALL(gpu->glInterface(), BufferData(target,
+                                                     (GrGLsizeiptr) size,
+                                                     data,
+                                                     fUsage));
+        if (CHECK_ALLOC_ERROR(gpu->glInterface()) != GR_GL_NO_ERROR) {
+            GL_CALL(DeleteBuffers(1, &fBufferID));
+            fBufferID = 0;
        } else {
-            fCPUData = sk_malloc_flags(fSizeInBytes, SK_MALLOC_THROW);
-        }
-        if (data) {
-            memcpy(fCPUData, data, fSizeInBytes);
-        }
-    } else {
-        GL_CALL(GenBuffers(1, &fBufferID));
-        if (fBufferID) {
-            GrGLenum target = gpu->bindBuffer(fIntendedType, this);
-            CLEAR_ERROR_BEFORE_ALLOC(gpu->glInterface());
-            // make sure driver can allocate memory for this buffer
-            GL_ALLOC_CALL(gpu->glInterface(), BufferData(target,
-                                                         (GrGLsizeiptr) fSizeInBytes,
-                                                         data,
-                                                         fUsage));
-            if (CHECK_ALLOC_ERROR(gpu->glInterface()) != GR_GL_NO_ERROR) {
-                GL_CALL(DeleteBuffers(1, &fBufferID));
-                fBufferID = 0;
-            } else {
-                fGLSizeInBytes = fSizeInBytes;
-            }
+            fGLSizeInBytes = size;
        }
    }
    VALIDATE();
@ -144,11 +125,7 @@ void GrGLBuffer::onRelease() {
    if (!this->wasDestroyed()) {
        VALIDATE();
        // make sure we've not been abandoned or already released
-        if (fCPUData) {
-            SkASSERT(!fBufferID);
-            sk_free(fCPUData);
-            fCPUData = nullptr;
-        } else if (fBufferID) {
+        if (fBufferID) {
            GL_CALL(DeleteBuffers(1, &fBufferID));
            fBufferID = 0;
            fGLSizeInBytes = 0;
@ -165,8 +142,6 @@ void GrGLBuffer::onAbandon() {
    fBufferID = 0;
    fGLSizeInBytes = 0;
    fMapPtr = nullptr;
-    sk_free(fCPUData);
-    fCPUData = nullptr;
    VALIDATE();
    INHERITED::onAbandon();
 }
@ -179,12 +154,6 @@ void GrGLBuffer::onMap() {
    VALIDATE();
    SkASSERT(!this->isMapped());

-    if (0 == fBufferID) {
-        fMapPtr = fCPUData;
-        VALIDATE();
-        return;
-    }
-
    // TODO: Make this a function parameter.
    bool readOnly = (kXferGpuToCpu_GrBufferType == fIntendedType);

@ -195,8 +164,8 @@ void GrGLBuffer::onMap() {
        case GrGLCaps::kMapBuffer_MapBufferType: {
            GrGLenum target = this->glGpu()->bindBuffer(fIntendedType, this);
            // Let driver know it can discard the old data
-            if (GR_GL_USE_BUFFER_DATA_NULL_HINT || fGLSizeInBytes != fSizeInBytes) {
-                GL_CALL(BufferData(target, fSizeInBytes, nullptr, fUsage));
+            if (GR_GL_USE_BUFFER_DATA_NULL_HINT || fGLSizeInBytes != this->sizeInBytes()) {
+                GL_CALL(BufferData(target, this->sizeInBytes(), nullptr, fUsage));
            }
            GL_CALL_RET(fMapPtr, MapBuffer(target, readOnly ? GR_GL_READ_ONLY : GR_GL_WRITE_ONLY));
            break;
@ -204,30 +173,30 @@ void GrGLBuffer::onMap() {
        case GrGLCaps::kMapBufferRange_MapBufferType: {
            GrGLenum target = this->glGpu()->bindBuffer(fIntendedType, this);
            // Make sure the GL buffer size agrees with fDesc before mapping.
-            if (fGLSizeInBytes != fSizeInBytes) {
-                GL_CALL(BufferData(target, fSizeInBytes, nullptr, fUsage));
+            if (fGLSizeInBytes != this->sizeInBytes()) {
+                GL_CALL(BufferData(target, this->sizeInBytes(), nullptr, fUsage));
            }
            GrGLbitfield writeAccess = GR_GL_MAP_WRITE_BIT;
            if (kXferCpuToGpu_GrBufferType != fIntendedType) {
                // TODO: Make this a function parameter.
                writeAccess |= GR_GL_MAP_INVALIDATE_BUFFER_BIT;
            }
-            GL_CALL_RET(fMapPtr, MapBufferRange(target, 0, fSizeInBytes,
+            GL_CALL_RET(fMapPtr, MapBufferRange(target, 0, this->sizeInBytes(),
                                                readOnly ?  GR_GL_MAP_READ_BIT : writeAccess));
            break;
        }
        case GrGLCaps::kChromium_MapBufferType: {
            GrGLenum target = this->glGpu()->bindBuffer(fIntendedType, this);
            // Make sure the GL buffer size agrees with fDesc before mapping.
-            if (fGLSizeInBytes != fSizeInBytes) {
-                GL_CALL(BufferData(target, fSizeInBytes, nullptr, fUsage));
+            if (fGLSizeInBytes != this->sizeInBytes()) {
+                GL_CALL(BufferData(target, this->sizeInBytes(), nullptr, fUsage));
            }
-            GL_CALL_RET(fMapPtr, MapBufferSubData(target, 0, fSizeInBytes,
+            GL_CALL_RET(fMapPtr, MapBufferSubData(target, 0, this->sizeInBytes(),
                                                  readOnly ?  GR_GL_READ_ONLY : GR_GL_WRITE_ONLY));
            break;
        }
    }
-    fGLSizeInBytes = fSizeInBytes;
+    fGLSizeInBytes = this->sizeInBytes();
    VALIDATE();
 }

@ -268,19 +237,15 @@ bool GrGLBuffer::onUpdateData(const void* src, size_t srcSizeInBytes) {

    SkASSERT(!this->isMapped());
    VALIDATE();
-    if (srcSizeInBytes > fSizeInBytes) {
+    if (srcSizeInBytes > this->sizeInBytes()) {
        return false;
    }
-    if (0 == fBufferID) {
-        memcpy(fCPUData, src, srcSizeInBytes);
-        return true;
-    }
-    SkASSERT(srcSizeInBytes <= fSizeInBytes);
+    SkASSERT(srcSizeInBytes <= this->sizeInBytes());
    // bindbuffer handles dirty context
    GrGLenum target = this->glGpu()->bindBuffer(fIntendedType, this);

 #if GR_GL_USE_BUFFER_DATA_NULL_HINT
-    if (fSizeInBytes == srcSizeInBytes) {
+    if (this->sizeInBytes() == srcSizeInBytes) {
        GL_CALL(BufferData(target, (GrGLsizeiptr) srcSizeInBytes, src, fUsage));
    } else {
        // Before we call glBufferSubData we give the driver a hint using
@ -290,10 +255,10 @@ bool GrGLBuffer::onUpdateData(const void* src, size_t srcSizeInBytes) {
        // assign a different allocation for the new contents to avoid
        // flushing the gpu past draws consuming the old contents.
        // TODO I think we actually want to try calling bufferData here
-        GL_CALL(BufferData(target, fSizeInBytes, nullptr, fUsage));
+        GL_CALL(BufferData(target, this->sizeInBytes(), nullptr, fUsage));
        GL_CALL(BufferSubData(target, 0, (GrGLsizeiptr) srcSizeInBytes, src));
    }
-    fGLSizeInBytes = fSizeInBytes;
+    fGLSizeInBytes = this->sizeInBytes();
 #else
    // Note that we're cheating on the size here. Currently no methods
    // allow a partial update that preserves contents of non-updated
@ -316,11 +281,8 @@ void GrGLBuffer::setMemoryBacking(SkTraceMemoryDump* traceMemoryDump,
 #ifdef SK_DEBUG

 void GrGLBuffer::validate() const {
-    // The following assert isn't valid when the buffer has been abandoned:
-    // SkASSERT((0 == fDesc.fID) == (fCPUData));
    SkASSERT(0 != fBufferID || 0 == fGLSizeInBytes);
-    SkASSERT(nullptr == fMapPtr || fCPUData || fGLSizeInBytes <= fSizeInBytes);
-    SkASSERT(nullptr == fCPUData || nullptr == fMapPtr || fCPUData == fMapPtr);
+    SkASSERT(nullptr == fMapPtr || fGLSizeInBytes <= this->sizeInBytes());
 }

 #endif
--- a/src/gpu/gl/GrGLBuffer.h
+++ b/src/gpu/gl/GrGLBuffer.h
@ -25,7 +25,6 @@ public:
    }

    GrGLuint bufferID() const { return fBufferID; }
-    size_t baseOffset() const { return reinterpret_cast<size_t>(fCPUData); }

    /**
     * Returns the actual size of the underlying GL buffer object. In certain cases we may make this
@ -37,8 +36,7 @@ public:
    bool hasAttachedToTexture() const { return fHasAttachedToTexture; }

 protected:
-    GrGLBuffer(GrGLGpu*, size_t size, GrBufferType intendedType, GrAccessPattern, bool cpuBacked,
-               const void* data);
+    GrGLBuffer(GrGLGpu*, size_t size, GrBufferType intendedType, GrAccessPattern, const void* data);

    void onAbandon() override;
    void onRelease() override;
@ -57,10 +55,8 @@ private:
    void validate() const;
 #endif

-    void*          fCPUData;
    GrBufferType   fIntendedType;
    GrGLuint       fBufferID;
-    size_t         fSizeInBytes;
    GrGLenum       fUsage;
    size_t         fGLSizeInBytes;
    bool           fHasAttachedToTexture;
--- a/src/gpu/gl/GrGLCaps.cpp
+++ b/src/gpu/gl/GrGLCaps.cpp
@ -41,7 +41,6 @@ GrGLCaps::GrGLCaps(const GrContextOptions& contextOptions,
    fMultiDrawIndirectSupport = false;
    fBaseInstanceSupport = false;
    fCanDrawIndirectToFloat = false;
-    fUseNonVBOVertexAndIndexDynamicData = false;
    fIsCoreProfile = false;
    fBindFragDataLocationSupport = false;
    fRectangleTextureSupport = false;
@ -132,18 +131,6 @@ void GrGLCaps::init(const GrContextOptions& contextOptions,
    fImagingSupport = kGL_GrGLStandard == standard &&
                      ctxInfo.hasExtension("GL_ARB_imaging");

-    // SGX and Mali GPUs that are based on a tiled-deferred architecture that have trouble with
-    // frequently changing VBOs. We've measured a performance increase using non-VBO vertex
-    // data for dynamic content on these GPUs. Perhaps we should read the renderer string and
-    // limit this decision to specific GPU families rather than basing it on the vendor alone.
-    if (!GR_GL_MUST_USE_VBO &&
-        !fIsCoreProfile &&
-        (kARM_GrGLVendor == ctxInfo.vendor() ||
-         kImagination_GrGLVendor == ctxInfo.vendor() ||
-         kQualcomm_GrGLVendor == ctxInfo.vendor())) {
-        fUseNonVBOVertexAndIndexDynamicData = true;
-    }
-
    // A driver but on the nexus 6 causes incorrect dst copies when invalidate is called beforehand.
    // Thus we are blacklisting this extension for now on Adreno4xx devices.
    if (kAdreno4xx_GrGLRenderer != ctxInfo.renderer() &&
@ -344,6 +331,18 @@ void GrGLCaps::init(const GrContextOptions& contextOptions,
        }
    }

+    // SGX and Mali GPUs that are based on a tiled-deferred architecture that have trouble with
+    // frequently changing VBOs. We've measured a performance increase using non-VBO vertex
+    // data for dynamic content on these GPUs. Perhaps we should read the renderer string and
+    // limit this decision to specific GPU families rather than basing it on the vendor alone.
+    if (!GR_GL_MUST_USE_VBO &&
+        !fIsCoreProfile &&
+        (kARM_GrGLVendor == ctxInfo.vendor() ||
+         kImagination_GrGLVendor == ctxInfo.vendor() ||
+         kQualcomm_GrGLVendor == ctxInfo.vendor())) {
+        fPreferClientSideDynamicBuffers = true;
+    }
+
    // fUsesMixedSamples must be set before calling initFSAASupport.
    this->initFSAASupport(ctxInfo, gli);
    this->initBlendEqationSupport(ctxInfo);
@ -1122,8 +1121,6 @@ SkString GrGLCaps::dump() const {
    r.appendf("Multi draw indirect support: %s\n", (fMultiDrawIndirectSupport ? "YES" : "NO"));
    r.appendf("Base instance support: %s\n", (fBaseInstanceSupport ? "YES" : "NO"));
    r.appendf("Can draw indirect to float: %s\n", (fCanDrawIndirectToFloat ? "YES" : "NO"));
-    r.appendf("Use non-VBO for dynamic data: %s\n",
-             (fUseNonVBOVertexAndIndexDynamicData ? "YES" : "NO"));
    r.appendf("RGBA 8888 pixel ops are slow: %s\n", (fRGBA8888PixelsOpsAreSlow ? "YES" : "NO"));
    r.appendf("Partial FBO read is slow: %s\n", (fPartialFBOReadIsSlow ? "YES" : "NO"));
    r.appendf("Bind uniform location support: %s\n", (fBindUniformLocationSupport ? "YES" : "NO"));
--- a/src/gpu/gl/GrGLGpu.cpp
+++ b/src/gpu/gl/GrGLGpu.cpp
@ -2053,9 +2053,7 @@ bool GrGLGpu::flushGLState(const GrPipeline& pipeline, const GrPrimitiveProcesso
 void GrGLGpu::setupGeometry(const GrPrimitiveProcessor& primProc,
                            const GrNonInstancedMesh& mesh,
                            size_t* indexOffsetInBytes) {
-    const GrGLBuffer* vbuf;
-    vbuf = static_cast<const GrGLBuffer*>(mesh.vertexBuffer());
-
+    const GrBuffer* vbuf = mesh.vertexBuffer();
    SkASSERT(vbuf);
    SkASSERT(!vbuf->isMapped());

@ -2064,8 +2062,7 @@ void GrGLGpu::setupGeometry(const GrPrimitiveProcessor& primProc,
        SkASSERT(indexOffsetInBytes);

        *indexOffsetInBytes = 0;
-        const GrGLBuffer* ibuf = static_cast<const GrGLBuffer*>(mesh.indexBuffer());
-
+        const GrBuffer* ibuf = mesh.indexBuffer();
        SkASSERT(ibuf);
        SkASSERT(!ibuf->isMapped());
        *indexOffsetInBytes += ibuf->baseOffset();
@ -2102,7 +2099,7 @@ void GrGLGpu::setupGeometry(const GrPrimitiveProcessor& primProc,
    }
 }

-GrGLenum GrGLGpu::bindBuffer(GrBufferType type, const GrGLBuffer* buffer) {
+GrGLenum GrGLGpu::bindBuffer(GrBufferType type, const GrBuffer* buffer) {
    this->handleDirtyContext();

    // Index buffer state is tied to the vertex array.
@ -2114,10 +2111,15 @@ GrGLenum GrGLGpu::bindBuffer(GrBufferType type, const GrGLBuffer* buffer) {
    auto& bufferState = fHWBufferState[type];

    if (buffer->getUniqueID() != bufferState.fBoundBufferUniqueID) {
-        if (!buffer->isCPUBacked() || !bufferState.fBufferZeroKnownBound) {
-            GL_CALL(BindBuffer(bufferState.fGLTarget, buffer->bufferID()));
-            bufferState.fBufferZeroKnownBound = buffer->isCPUBacked();
+        if (buffer->isCPUBacked()) {
+            if (!bufferState.fBufferZeroKnownBound) {
+                GL_CALL(BindBuffer(bufferState.fGLTarget, 0));
+            }
+        } else {
+            const GrGLBuffer* glBuffer = static_cast<const GrGLBuffer*>(buffer);
+            GL_CALL(BindBuffer(bufferState.fGLTarget, glBuffer->bufferID()));
        }
+        bufferState.fBufferZeroKnownBound = buffer->isCPUBacked();
        bufferState.fBoundBufferUniqueID = buffer->getUniqueID();
    }

@ -4568,7 +4570,7 @@ void GrGLGpu::resetShaderCacheForTesting() const {
 ///////////////////////////////////////////////////////////////////////////////

 GrGLAttribArrayState* GrGLGpu::HWVertexArrayState::bindInternalVertexArray(GrGLGpu* gpu,
-                                                                           const GrGLBuffer* ibuf) {
+                                                                           const GrBuffer* ibuf) {
    GrGLAttribArrayState* attribState;

    if (gpu->glCaps().isCoreProfile()) {
--- a/src/gpu/gl/GrGLGpu.h
+++ b/src/gpu/gl/GrGLGpu.h
@ -92,7 +92,7 @@ public:
    // returns the GL target the buffer was bound to.
    // When 'type' is kIndex_GrBufferType, this function will also implicitly bind the default VAO.
    // If the caller wishes to bind an index buffer to a specific VAO, it can call glBind directly.
-    GrGLenum bindBuffer(GrBufferType type, const GrGLBuffer*);
+    GrGLenum bindBuffer(GrBufferType type, const GrBuffer*);

    // Called by GrGLBuffer after its buffer object has been destroyed.
    void notifyBufferReleased(const GrGLBuffer*);
@ -467,7 +467,7 @@ private:
         *
         * The returned GrGLAttribArrayState should be used to set vertex attribute arrays.
         */
-        GrGLAttribArrayState* bindInternalVertexArray(GrGLGpu*, const GrGLBuffer* ibuff = nullptr);
+        GrGLAttribArrayState* bindInternalVertexArray(GrGLGpu*, const GrBuffer* ibuff = nullptr);

    private:
        GrGLuint                fBoundVertexArrayID;
--- a/src/gpu/gl/GrGLVertexArray.cpp
+++ b/src/gpu/gl/GrGLVertexArray.cpp
@ -39,7 +39,7 @@ GR_STATIC_ASSERT(8 == kUint_GrVertexAttribType);

 void GrGLAttribArrayState::set(GrGLGpu* gpu,
                               int index,
-                               const GrGLBuffer* vertexBuffer,
+                               const GrBuffer* vertexBuffer,
                               GrVertexAttribType type,
                               GrGLsizei stride,
                               GrGLvoid* offset) {
@ -112,10 +112,16 @@ GrGLAttribArrayState* GrGLVertexArray::bind(GrGLGpu* gpu) {
    return &fAttribArrays;
 }

-GrGLAttribArrayState* GrGLVertexArray::bindWithIndexBuffer(GrGLGpu* gpu, const GrGLBuffer* ibuff) {
+GrGLAttribArrayState* GrGLVertexArray::bindWithIndexBuffer(GrGLGpu* gpu, const GrBuffer* ibuff) {
    GrGLAttribArrayState* state = this->bind(gpu);
    if (state && fIndexBufferUniqueID != ibuff->getUniqueID()) {
-        GR_GL_CALL(gpu->glInterface(), BindBuffer(GR_GL_ELEMENT_ARRAY_BUFFER, ibuff->bufferID()));
+        if (ibuff->isCPUBacked()) {
+            GR_GL_CALL(gpu->glInterface(), BindBuffer(GR_GL_ELEMENT_ARRAY_BUFFER, 0));
+        } else {
+            const GrGLBuffer* glBuffer = static_cast<const GrGLBuffer*>(ibuff);
+            GR_GL_CALL(gpu->glInterface(), BindBuffer(GR_GL_ELEMENT_ARRAY_BUFFER,
+                                                      glBuffer->bufferID()));
+        }
        fIndexBufferUniqueID = ibuff->getUniqueID();
    }
    return state;
--- a/src/gpu/gl/GrGLVertexArray.h
+++ b/src/gpu/gl/GrGLVertexArray.h
@ -13,7 +13,7 @@
 #include "gl/GrGLTypes.h"
 #include "SkTArray.h"

-class GrGLBuffer;
+class GrBuffer;
 class GrGLGpu;

 /**
@ -40,7 +40,7 @@ public:
     */
    void set(GrGLGpu*,
             int attribIndex,
-             const GrGLBuffer* vertexBuffer,
+             const GrBuffer* vertexBuffer,
             GrVertexAttribType type,
             GrGLsizei stride,
             GrGLvoid* offset);
@ -103,7 +103,7 @@ public:
     * This is a version of the above function that also binds an index buffer to the vertex
     * array object.
     */
-    GrGLAttribArrayState* bindWithIndexBuffer(GrGLGpu* gpu, const GrGLBuffer* indexBuffer);
+    GrGLAttribArrayState* bindWithIndexBuffer(GrGLGpu* gpu, const GrBuffer* indexBuffer);

    GrGLuint arrayID() const { return fID; }

--- a/src/gpu/instanced/GLInstancedRendering.cpp
+++ b/src/gpu/instanced/GLInstancedRendering.cpp
@ -96,12 +96,12 @@ void GLInstancedRendering::onBeginFlush(GrResourceProvider* rp) {
        this->glGpu()->bindVertexArray(fVertexArrayID);

        // Attach our index buffer to the vertex array.
+        SkASSERT(!this->indexBuffer()->isCPUBacked());
        GL_CALL(BindBuffer(GR_GL_ELEMENT_ARRAY_BUFFER,
                           static_cast<const GrGLBuffer*>(this->indexBuffer())->bufferID()));

        // Set up the non-instanced attribs.
-        this->glGpu()->bindBuffer(kVertex_GrBufferType,
-                                  static_cast<const GrGLBuffer*>(this->vertexBuffer()));
+        this->glGpu()->bindBuffer(kVertex_GrBufferType, this->vertexBuffer());
        GL_CALL(EnableVertexAttribArray((int)Attrib::kShapeCoords));
        GL_CALL(VertexAttribPointer((int)Attrib::kShapeCoords, 2, GR_GL_FLOAT, GR_GL_FALSE,
                                    sizeof(ShapeVertex), (void*) offsetof(ShapeVertex, fX)));
@ -114,18 +114,21 @@ void GLInstancedRendering::onBeginFlush(GrResourceProvider* rp) {

    // Create and map instance and draw-indirect buffers.
    SkASSERT(!fInstanceBuffer);
-    fInstanceBuffer.reset(static_cast<GrGLBuffer*>(
+    fInstanceBuffer.reset(
        rp->createBuffer(sizeof(Instance) * numGLInstances, kVertex_GrBufferType,
-                         kDynamic_GrAccessPattern, GrResourceProvider::kNoPendingIO_Flag)));
+                         kDynamic_GrAccessPattern,
+                         GrResourceProvider::kNoPendingIO_Flag |
+                         GrResourceProvider::kRequireGpuMemory_Flag));
    if (!fInstanceBuffer) {
        return;
    }

    SkASSERT(!fDrawIndirectBuffer);
-    fDrawIndirectBuffer.reset(static_cast<GrGLBuffer*>(
+    fDrawIndirectBuffer.reset(
        rp->createBuffer(sizeof(GrGLDrawElementsIndirectCommand) * numGLDrawCmds,
                         kDrawIndirect_GrBufferType, kDynamic_GrAccessPattern,
-                         GrResourceProvider::kNoPendingIO_Flag)));
+                         GrResourceProvider::kNoPendingIO_Flag |
+                         GrResourceProvider::kRequireGpuMemory_Flag));
    if (!fDrawIndirectBuffer) {
        return;
    }
--- a/src/gpu/instanced/GLInstancedRendering.h
+++ b/src/gpu/instanced/GLInstancedRendering.h
@ -44,8 +44,8 @@ private:
    };

    GrGLuint                              fVertexArrayID;
-    SkAutoTUnref<GrGLBuffer>              fInstanceBuffer;
-    SkAutoTUnref<GrGLBuffer>              fDrawIndirectBuffer;
+    SkAutoTUnref<GrBuffer>                fInstanceBuffer;
+    SkAutoTUnref<GrBuffer>                fDrawIndirectBuffer;
    SkAutoSTMalloc<1024, GLDrawCmdInfo>   fGLDrawCmdsInfo;
    uint32_t                              fInstanceAttribsBufferUniqueId;
    int                                   fInstanceAttribsBaseInstance;
--- a/src/gpu/instanced/InstancedRendering.cpp
+++ b/src/gpu/instanced/InstancedRendering.cpp
@ -452,7 +452,8 @@ void InstancedRendering::beginFlush(GrResourceProvider* rp) {
    if (!fParams.empty()) {
        fParamsBuffer.reset(rp->createBuffer(fParams.count() * sizeof(ParamsTexel),
                                             kTexel_GrBufferType, kDynamic_GrAccessPattern,
-                                             GrResourceProvider::kNoPendingIO_Flag,
+                                             GrResourceProvider::kNoPendingIO_Flag |
+                                             GrResourceProvider::kRequireGpuMemory_Flag,
                                             fParams.begin()));
        if (!fParamsBuffer) {
            return;
--- a/src/gpu/vk/GrVkGpuCommandBuffer.cpp
+++ b/src/gpu/vk/GrVkGpuCommandBuffer.cpp
@ -295,6 +295,7 @@ void GrVkGpuCommandBuffer::bindGeometry(const GrPrimitiveProcessor& primProc,
    // When a command buffer is submitted to a queue, there is an implicit memory barrier that
    // occurs for all host writes. Additionally, BufferMemoryBarriers are not allowed inside of
    // an active RenderPass.
+    SkASSERT(!mesh.vertexBuffer()->isCPUBacked());
    GrVkVertexBuffer* vbuf;
    vbuf = (GrVkVertexBuffer*)mesh.vertexBuffer();
    SkASSERT(vbuf);
@ -303,6 +304,7 @@ void GrVkGpuCommandBuffer::bindGeometry(const GrPrimitiveProcessor& primProc,
    fCommandBuffer->bindVertexBuffer(fGpu, vbuf);

    if (mesh.isIndexed()) {
+        SkASSERT(!mesh.indexBuffer()->isCPUBacked());
        GrVkIndexBuffer* ibuf = (GrVkIndexBuffer*)mesh.indexBuffer();
        SkASSERT(ibuf);
        SkASSERT(!ibuf->isMapped());
--- a/src/gpu/vk/GrVkIndexBuffer.cpp
+++ b/src/gpu/vk/GrVkIndexBuffer.cpp
@ -11,7 +11,7 @@
 GrVkIndexBuffer::GrVkIndexBuffer(GrVkGpu* gpu, const GrVkBuffer::Desc& desc,
                                 const GrVkBuffer::Resource* bufferResource)
    : INHERITED(gpu, desc.fSizeInBytes, kIndex_GrBufferType,
-                desc.fDynamic ? kDynamic_GrAccessPattern : kStatic_GrAccessPattern, false)
+                desc.fDynamic ? kDynamic_GrAccessPattern : kStatic_GrAccessPattern)
    , GrVkBuffer(desc, bufferResource) {
    this->registerWithCache(SkBudgeted::kYes);
 }
--- a/src/gpu/vk/GrVkTransferBuffer.cpp
+++ b/src/gpu/vk/GrVkTransferBuffer.cpp
@ -34,7 +34,7 @@ GrVkTransferBuffer::GrVkTransferBuffer(GrVkGpu* gpu, const GrVkBuffer::Desc& des
    : INHERITED(gpu, desc.fSizeInBytes,
                kCopyRead_Type == desc.fType ?
                    kXferCpuToGpu_GrBufferType : kXferGpuToCpu_GrBufferType,
-                kStream_GrAccessPattern, false)
+                kStream_GrAccessPattern)
    , GrVkBuffer(desc, bufferResource) {
    this->registerWithCache(SkBudgeted::kYes);
 }
--- a/src/gpu/vk/GrVkVertexBuffer.cpp
+++ b/src/gpu/vk/GrVkVertexBuffer.cpp
@ -11,7 +11,7 @@
 GrVkVertexBuffer::GrVkVertexBuffer(GrVkGpu* gpu, const GrVkBuffer::Desc& desc,
                                   const GrVkBuffer::Resource* bufferResource)
    : INHERITED(gpu, desc.fSizeInBytes, kVertex_GrBufferType,
-                desc.fDynamic ? kDynamic_GrAccessPattern : kStatic_GrAccessPattern, false)
+                desc.fDynamic ? kDynamic_GrAccessPattern : kStatic_GrAccessPattern)
    , GrVkBuffer(desc, bufferResource) {
    this->registerWithCache(SkBudgeted::kYes);
 }