Revert "Disable instanced rendering on Skylake"

This reverts commit 240ea4c03a. Reason for revert: Too many regressions from not having CCPR Original change's description: > Disable instanced rendering on Skylake > > We previously had a glFlush() workaround for instanced rendering on > Skylake. However, the non-instanced approach is often faster than > instanced + glFlush(). This CL just disables instanced rendering > altogether on Skylake instead. The chip is old enough now that this > seems like a reasonable solution. > > Bug: skia:8566 > Change-Id: Ib82a519d8186b463b72b20203fb69d078e757aa7 > Reviewed-on: https://skia-review.googlesource.com/c/172470 > Reviewed-by: Brian Salomon <bsalomon@google.com> > Commit-Queue: Chris Dalton <csmartdalton@google.com> TBR=bsalomon@google.com,csmartdalton@google.com # Not skipping CQ checks because original CL landed > 1 day ago. Bug: skia:8566 Change-Id: I947cdd0026b7fc31a4f75f5f416299e27dd6f56e Reviewed-on: https://skia-review.googlesource.com/c/173128 Reviewed-by: Chris Dalton <csmartdalton@google.com> Commit-Queue: Chris Dalton <csmartdalton@google.com>
2018-11-28 00:05:35 +00:00 · 2018-11-28 00:05:35 +00:00 · af3dd43a80
commit af3dd43a80
parent a23de1d5c8
4 changed files with 33 additions and 4 deletions
--- a/src/gpu/gl/GrGLCaps.cpp
+++ b/src/gpu/gl/GrGLCaps.cpp
@ -61,6 +61,7 @@ GrGLCaps::GrGLCaps(const GrContextOptions& contextOptions,
    fDisallowTexSubImageForUnormConfigTexturesEverBoundToFBO = false;
    fUseDrawInsteadOfAllRenderTargetWrites = false;
    fRequiresCullFaceEnableDisableWhenDrawingLinesAfterNonLines = false;
+    fRequiresFlushBetweenNonAndInstancedDraws = false;
    fDetachStencilFromMSAABuffersBeforeReadPixels = false;
    fClampMaxTextureLevelToOne = false;
    fProgramBinarySupport = false;
@ -2468,13 +2469,10 @@ void GrGLCaps::applyDriverCorrectnessWorkarounds(const GrGLContextInfo& ctxInfo,
        fRequiresCullFaceEnableDisableWhenDrawingLinesAfterNonLines = true;
    }

-    // Intel Skylake instanced draws get corrupted if we mix them with normal ones. Adding a flush
-    // in between seems to resolve this, but it also tends to cause perf regressions. So we just
-    // disable instancing altogether on Skylake.
    if (kIntelSkylake_GrGLRenderer == ctxInfo.renderer() ||
        (kANGLE_GrGLRenderer == ctxInfo.renderer() &&
         GrGLANGLERenderer::kSkylake == ctxInfo.angleRenderer())) {
-        fInstanceAttribSupport = false;
+        fRequiresFlushBetweenNonAndInstancedDraws = true;
    }

    // This was reproduced on a Pixel 1, but the unit test + config + options that exercise it are
@ -2762,6 +2760,7 @@ void GrGLCaps::onApplyOptionsOverrides(const GrContextOptions& options) {
        SkASSERT(!fDisallowTexSubImageForUnormConfigTexturesEverBoundToFBO);
        SkASSERT(!fUseDrawInsteadOfAllRenderTargetWrites);
        SkASSERT(!fRequiresCullFaceEnableDisableWhenDrawingLinesAfterNonLines);
+        SkASSERT(!fRequiresFlushBetweenNonAndInstancedDraws);
        SkASSERT(!fDetachStencilFromMSAABuffersBeforeReadPixels);
    }
    if (GrContextOptions::Enable::kNo == options.fUseDrawInsteadOfGLClear) {
--- a/src/gpu/gl/GrGLCaps.h
+++ b/src/gpu/gl/GrGLCaps.h
@ -385,6 +385,12 @@ public:
        return fRequiresCullFaceEnableDisableWhenDrawingLinesAfterNonLines;
    }

+    // Intel Skylake instanced draws get corrupted if we mix them with normal ones. Adding a flush
+    // in between seems to resolve this.
+    bool requiresFlushBetweenNonAndInstancedDraws() const {
+        return fRequiresFlushBetweenNonAndInstancedDraws;
+    }
+
    // Some Adreno drivers refuse to ReadPixels from an MSAA buffer that has stencil attached.
    bool detachStencilFromMSAABuffersBeforeReadPixels() const {
        return fDetachStencilFromMSAABuffersBeforeReadPixels;
@ -529,6 +535,7 @@ private:
    bool fDisallowTexSubImageForUnormConfigTexturesEverBoundToFBO : 1;
    bool fUseDrawInsteadOfAllRenderTargetWrites : 1;
    bool fRequiresCullFaceEnableDisableWhenDrawingLinesAfterNonLines : 1;
+    bool fRequiresFlushBetweenNonAndInstancedDraws : 1;
    bool fDetachStencilFromMSAABuffersBeforeReadPixels : 1;
    bool fClampMaxTextureLevelToOne : 1;
    int fMaxInstancesPerDrawWithoutCrashing;
--- a/src/gpu/gl/GrGLGpu.cpp
+++ b/src/gpu/gl/GrGLGpu.cpp
@ -593,6 +593,9 @@ void GrGLGpu::onResetContext(uint32_t resetBits) {
        fHWVertexArrayState.invalidate();
        fHWBufferState[kVertex_GrBufferType].invalidate();
        fHWBufferState[kIndex_GrBufferType].invalidate();
+        if (this->glCaps().requiresFlushBetweenNonAndInstancedDraws()) {
+            fRequiresFlushBeforeNextInstancedDraw = true;
+        }
    }

    if (resetBits & kRenderTarget_GrGLBackendState) {
@ -2347,6 +2350,9 @@ void GrGLGpu::flushRenderTargetNoColorWrites(GrGLRenderTarget* target) {
            }
        }
 #endif
+        if (this->glCaps().requiresFlushBetweenNonAndInstancedDraws()) {
+            fRequiresFlushBeforeNextInstancedDraw = false;
+        }
        fHWBoundRenderTargetUniqueID = rtID;
        this->flushViewport(target->getViewport());
    }
@ -2489,6 +2495,9 @@ void GrGLGpu::sendMeshToGpu(GrPrimitiveType primitiveType, const GrBuffer* verte
        this->setupGeometry(nullptr, vertexBuffer, 0, nullptr, 0, GrPrimitiveRestart::kNo);
        GL_CALL(DrawArrays(glPrimType, baseVertex, vertexCount));
    }
+    if (this->glCaps().requiresFlushBetweenNonAndInstancedDraws()) {
+        fRequiresFlushBeforeNextInstancedDraw = true;
+    }
    fStats.incNumDraws();
 }

@ -2508,6 +2517,9 @@ void GrGLGpu::sendIndexedMeshToGpu(GrPrimitiveType primitiveType, const GrBuffer
    } else {
        GL_CALL(DrawElements(glPrimType, indexCount, GR_GL_UNSIGNED_SHORT, indices));
    }
+    if (this->glCaps().requiresFlushBetweenNonAndInstancedDraws()) {
+        fRequiresFlushBeforeNextInstancedDraw = true;
+    }
    fStats.incNumDraws();
 }

@ -2515,6 +2527,11 @@ void GrGLGpu::sendInstancedMeshToGpu(GrPrimitiveType primitiveType, const GrBuff
                                     int vertexCount, int baseVertex,
                                     const GrBuffer* instanceBuffer, int instanceCount,
                                     int baseInstance) {
+    if (fRequiresFlushBeforeNextInstancedDraw) {
+        SkASSERT(this->glCaps().requiresFlushBetweenNonAndInstancedDraws());
+        GL_CALL(Flush());
+        fRequiresFlushBeforeNextInstancedDraw = false;
+    }
    GrGLenum glPrimType = gr_primitive_type_to_gl_mode(primitiveType);
    int maxInstances = this->glCaps().maxInstancesPerDrawWithoutCrashing(instanceCount);
    for (int i = 0; i < instanceCount; i += maxInstances) {
@ -2532,6 +2549,11 @@ void GrGLGpu::sendIndexedInstancedMeshToGpu(GrPrimitiveType primitiveType,
                                            int baseVertex, const GrBuffer* instanceBuffer,
                                            int instanceCount, int baseInstance,
                                            GrPrimitiveRestart enablePrimitiveRestart) {
+    if (fRequiresFlushBeforeNextInstancedDraw) {
+        SkASSERT(this->glCaps().requiresFlushBetweenNonAndInstancedDraws());
+        GL_CALL(Flush());
+        fRequiresFlushBeforeNextInstancedDraw = false;
+    }
    const GrGLenum glPrimType = gr_primitive_type_to_gl_mode(primitiveType);
    GrGLvoid* indices = reinterpret_cast<void*>(indexBuffer->baseOffset() +
                                                sizeof(uint16_t) * baseIndex);
--- a/src/gpu/gl/GrGLGpu.h
+++ b/src/gpu/gl/GrGLGpu.h
@ -630,6 +630,7 @@ private:

    float fHWMinSampleShading;
    GrPrimitiveType fLastPrimitiveType;
+    bool fRequiresFlushBeforeNextInstancedDraw = false;

    class SamplerObjectCache;
    std::unique_ptr<SamplerObjectCache> fSamplerObjectCache;