From ef3913bcbff265ff86116ae4f3dd2768dc42ccca Mon Sep 17 00:00:00 2001 From: "senorblanco@chromium.org" Date: Thu, 19 May 2011 17:11:07 +0000 Subject: [PATCH] Batch up draws into triangle fans as large as possible when drawing convex edge AA polys, so we minimize state changes and GL calls. This requires querying GL for the maximum number of fragment uniforms. It also makes the shader generator produce custom shaders for the number of relevant edges. This gives a ~5X speedup on the "Shapes" SampleApp. Review URL: http://codereview.appspot.com/4536070/ git-svn-id: http://skia.googlecode.com/svn/trunk@1380 2bbb7eff-a529-9590-31e7-b0007b416f81 --- gpu/include/GrDrawTarget.h | 42 ++++++++++++++++++--- gpu/include/GrGLDefines.h | 2 + gpu/src/GrDrawTarget.cpp | 8 ++-- gpu/src/GrGLProgram.cpp | 44 ++++++++++++++-------- gpu/src/GrGLProgram.h | 2 +- gpu/src/GrGpuGL.cpp | 17 +++++++++ gpu/src/GrGpuGL.h | 4 ++ gpu/src/GrGpuGLShaders.cpp | 19 +++++----- gpu/src/GrTesselatedPathRenderer.cpp | 55 +++++++++++----------------- src/gpu/GrPrintf_skia.cpp | 2 +- 10 files changed, 128 insertions(+), 67 deletions(-) diff --git a/gpu/include/GrDrawTarget.h b/gpu/include/GrDrawTarget.h index 985cca7c1b..e89a2734f0 100644 --- a/gpu/include/GrDrawTarget.h +++ b/gpu/include/GrDrawTarget.h @@ -54,6 +54,17 @@ public: kMaxTexCoords = kNumStages }; + + /** + * The absolute maximum number of edges that may be specified for + * a single draw call when performing edge antialiasing. This is used for + * the size of several static buffers, so implementations of getMaxEdges() + * (below) should clamp to this value. + */ + enum { + kMaxEdges = 32 + }; + /** * Bitfield used to indicate which stages are in use. */ @@ -78,9 +89,6 @@ public: kNoColorWrites_StateBit = 0x08, // 0) { return false; } @@ -535,8 +535,10 @@ bool GrDrawTarget::canDisableBlend() const { } /////////////////////////////////////////////////////////////////////////////// -void GrDrawTarget::setEdgeAAData(const float edges[18]) { - memcpy(fCurrDrawState.fEdgeAAEdges, edges, sizeof(fCurrDrawState.fEdgeAAEdges)); +void GrDrawTarget::setEdgeAAData(const Edge* edges, int numEdges) { + GrAssert(numEdges <= kMaxEdges); + memcpy(fCurrDrawState.fEdgeAAEdges, edges, numEdges * sizeof(Edge)); + fCurrDrawState.fEdgeAANumEdges = numEdges; } diff --git a/gpu/src/GrGLProgram.cpp b/gpu/src/GrGLProgram.cpp index 0f82d2a40c..9a9e3c2f91 100644 --- a/gpu/src/GrGLProgram.cpp +++ b/gpu/src/GrGLProgram.cpp @@ -361,11 +361,6 @@ bool GrGLProgram::genProgram(GrGLProgram::CachedData* programData) const { } } - if (fProgramDesc.fUsesEdgeAA) { - segments.fFSUnis.append("uniform vec3 " EDGES_UNI_NAME "[6];\n"); - programData->fUniLocations.fEdgesUni = kUseUniform; - } - if (fProgramDesc.fEmitsPointSize){ segments.fVSCode.append("\tgl_PointSize = 1.0;\n"); } @@ -457,17 +452,36 @@ bool GrGLProgram::genProgram(GrGLProgram::CachedData* programData) const { // we will want to compute coverage for some blend when there is no // color (when dual source blending is enabled). But for now we have this if if (!wroteFragColorZero) { - if (fProgramDesc.fUsesEdgeAA) { - // FIXME: put the a's in a loop + if (fProgramDesc.fEdgeAANumEdges > 0) { + segments.fFSUnis.append("uniform vec3 " EDGES_UNI_NAME "["); + segments.fFSUnis.appendS32(fProgramDesc.fEdgeAANumEdges); + segments.fFSUnis.append("];\n"); + programData->fUniLocations.fEdgesUni = kUseUniform; + int count = fProgramDesc.fEdgeAANumEdges; segments.fFSCode.append( - "\tvec3 pos = vec3(gl_FragCoord.xy, 1);\n" - "\tfloat a0 = clamp(dot(uEdges[0], pos), 0.0, 1.0);\n" - "\tfloat a1 = clamp(dot(uEdges[1], pos), 0.0, 1.0);\n" - "\tfloat a2 = clamp(dot(uEdges[2], pos), 0.0, 1.0);\n" - "\tfloat a3 = clamp(dot(uEdges[3], pos), 0.0, 1.0);\n" - "\tfloat a4 = clamp(dot(uEdges[4], pos), 0.0, 1.0);\n" - "\tfloat a5 = clamp(dot(uEdges[5], pos), 0.0, 1.0);\n" - "\tfloat edgeAlpha = min(min(a0 * a1, a2 * a3), a4 * a5);\n"); + "\tvec3 pos = vec3(gl_FragCoord.xy, 1);\n"); + for (int i = 0; i < count; i++) { + segments.fFSCode.append("\tfloat a"); + segments.fFSCode.appendS32(i); + segments.fFSCode.append(" = clamp(dot(" EDGES_UNI_NAME "["); + segments.fFSCode.appendS32(i); + segments.fFSCode.append("], pos), 0.0, 1.0);\n"); + } + segments.fFSCode.append("\tfloat edgeAlpha = "); + for (int i = 0; i < count - 1; i++) { + segments.fFSCode.append("min(a"); + segments.fFSCode.appendS32(i); + segments.fFSCode.append(" * a"); + segments.fFSCode.appendS32(i + 1); + segments.fFSCode.append(", "); + } + segments.fFSCode.append("a"); + segments.fFSCode.appendS32(count - 1); + segments.fFSCode.append(" * a0"); + for (int i = 0; i < count - 1; i++) { + segments.fFSCode.append(")"); + } + segments.fFSCode.append(";\n"); inCoverage = "edgeAlpha"; coverageIsScalar = true; } diff --git a/gpu/src/GrGLProgram.h b/gpu/src/GrGLProgram.h index e02d15b617..d4a640626f 100644 --- a/gpu/src/GrGLProgram.h +++ b/gpu/src/GrGLProgram.h @@ -96,7 +96,7 @@ private: int fFirstCoverageStage; bool fEmitsPointSize; - bool fUsesEdgeAA; + int fEdgeAANumEdges; SkXfermode::Mode fColorFilterXfermode; diff --git a/gpu/src/GrGpuGL.cpp b/gpu/src/GrGpuGL.cpp index e8c7afb0ed..5a2d2bd042 100644 --- a/gpu/src/GrGpuGL.cpp +++ b/gpu/src/GrGpuGL.cpp @@ -16,6 +16,7 @@ #include "GrGpuGL.h" #include "GrMemory.h" +#include "GrTypes.h" static const GrGLuint GR_MAX_GLUINT = ~0; static const GrGLint GR_INVAL_GLINT = ~0; @@ -201,6 +202,16 @@ GrGpuGL::GrGpuGL() { GR_GL_GetIntegerv(GR_GL_MAX_TEXTURE_UNITS, &maxTextureUnits); GrAssert(maxTextureUnits > kNumStages); } + if (GR_GL_SUPPORT_ES2) { + GR_GL_GetIntegerv(GR_GL_MAX_FRAGMENT_UNIFORM_VECTORS, + &fMaxFragmentUniformVectors); + } else if (GR_GL_SUPPORT_DESKTOP) { + GrGLint max; + GR_GL_GetIntegerv(GR_GL_MAX_FRAGMENT_UNIFORM_COMPONENTS, &max); + fMaxFragmentUniformVectors = max / 4; + } else { + fMaxFragmentUniformVectors = 16; + } //////////////////////////////////////////////////////////////////////////// // Check for supported features. @@ -2064,3 +2075,9 @@ void GrGpuGL::setBuffers(bool indexed, } } } + +int GrGpuGL::getMaxEdges() const { + // FIXME: This is a pessimistic estimate based on how many other things + // want to add uniforms. This should be centralized somewhere. + return GR_CT_MIN(fMaxFragmentUniformVectors - 8, kMaxEdges); +} diff --git a/gpu/src/GrGpuGL.h b/gpu/src/GrGpuGL.h index da955cfc56..d48d69eccb 100644 --- a/gpu/src/GrGpuGL.h +++ b/gpu/src/GrGpuGL.h @@ -107,6 +107,7 @@ protected: virtual void flushScissor(const GrIRect* rect); void clearStencil(uint32_t value, uint32_t mask); virtual void clearStencilClip(const GrIRect& rect); + virtual int getMaxEdges() const; // binds texture unit in GL void setTextureUnit(int unitIdx); @@ -189,6 +190,9 @@ private: // Do we have stencil wrap ops. bool fHasStencilWrap; + // The maximum number of fragment uniform vectors (GLES has min. 16). + int fMaxFragmentUniformVectors; + // ES requires an extension to support RGBA8 in RenderBufferStorage bool fRGBA8Renderbuffer; diff --git a/gpu/src/GrGpuGLShaders.cpp b/gpu/src/GrGpuGLShaders.cpp index 08845a9258..bbf9719cf9 100644 --- a/gpu/src/GrGpuGLShaders.cpp +++ b/gpu/src/GrGpuGLShaders.cpp @@ -193,7 +193,7 @@ void GrGpuGLShaders::ProgramUnitTest() { idx = (int)(random.nextF() * (kNumStages+1)); pdesc.fFirstCoverageStage = idx; - pdesc.fUsesEdgeAA = (random.nextF() > .5f); + pdesc.fEdgeAANumEdges = (random.nextF() * (getMaxEdges() + 1)); for (int s = 0; s < kNumStages; ++s) { // enable the stage? @@ -442,16 +442,17 @@ void GrGpuGLShaders::flushTexelSize(int s) { void GrGpuGLShaders::flushEdgeAAData() { const int& uni = fProgramData->fUniLocations.fEdgesUni; if (GrGLProgram::kUnusedUniform != uni) { - float edges[18]; - memcpy(edges, fCurrDrawState.fEdgeAAEdges, sizeof(edges)); + int count = fCurrDrawState.fEdgeAANumEdges; + Edge edges[kMaxEdges]; // Flip the edges in Y float height = fCurrDrawState.fRenderTarget->height(); - for (int i = 0; i < 6; ++i) { - float b = edges[i * 3 + 1]; - edges[i * 3 + 1] = -b; - edges[i * 3 + 2] += b * height; + for (int i = 0; i < count; ++i) { + edges[i] = fCurrDrawState.fEdgeAAEdges[i]; + float b = edges[i].fY; + edges[i].fY = -b; + edges[i].fZ += b * height; } - GR_GL(Uniform3fv(uni, 6, edges)); + GR_GL(Uniform3fv(uni, count, &edges[0].fX)); } } @@ -701,7 +702,7 @@ void GrGpuGLShaders::buildProgram(GrPrimitiveType type) { desc.fColorType = GrGLProgram::ProgramDesc::kAttribute_ColorType; } - desc.fUsesEdgeAA = fCurrDrawState.fFlagBits & kEdgeAA_StateBit; + desc.fEdgeAANumEdges = fCurrDrawState.fEdgeAANumEdges; for (int s = 0; s < kNumStages; ++s) { GrGLProgram::ProgramDesc::StageDesc& stage = desc.fStages[s]; diff --git a/gpu/src/GrTesselatedPathRenderer.cpp b/gpu/src/GrTesselatedPathRenderer.cpp index da6da5cad8..8a33012872 100644 --- a/gpu/src/GrTesselatedPathRenderer.cpp +++ b/gpu/src/GrTesselatedPathRenderer.cpp @@ -85,19 +85,7 @@ static unsigned fill_type_to_glu_winding_rule(GrPathFill fill) { GrTesselatedPathRenderer::GrTesselatedPathRenderer() { } -class Edge { - public: - Edge() {} - Edge(float x, float y, float z) : fX(x), fY(y), fZ(z) {} - GrPoint intersect(const Edge& other) { - return GrPoint::Make( - (fY * other.fZ - other.fY * fZ) / (fX * other.fY - other.fX * fY), - (fX * other.fZ - other.fX * fZ) / (other.fX * fY - fX * other.fY)); - } - float fX, fY, fZ; -}; - -typedef GrTDArray EdgeArray; +typedef GrTDArray EdgeArray; bool isCCW(const GrPoint* pts) { @@ -121,15 +109,15 @@ static size_t computeEdgesAndOffsetVertices(const GrMatrix& matrix, GrVec tangent = GrVec::Make(p.fY - q.fY, q.fX - p.fX); float scale = sign / tangent.length(); float cross2 = p.fX * q.fY - q.fX * p.fY; - Edge edge(tangent.fX * scale, + GrDrawTarget::Edge edge(tangent.fX * scale, tangent.fY * scale, cross2 * scale + 0.5f); *edges->append() = edge; p = q; } - Edge prev_edge = *edges->back(); + GrDrawTarget::Edge prev_edge = *edges->back(); for (size_t i = 0; i < edges->count(); ++i) { - Edge edge = edges->at(i); + GrDrawTarget::Edge edge = edges->at(i); vertices[i] = prev_edge.intersect(edge); inverse.mapPoints(&vertices[i], 1); prev_edge = edge; @@ -262,29 +250,30 @@ FINISHED: if (subpathCnt == 1 && !inverted && path.isConvex()) { if (target->isAntialiasState()) { - target->enableState(GrDrawTarget::kEdgeAA_StateBit); EdgeArray edges; GrMatrix inverse, matrix = target->getViewMatrix(); target->getViewInverse(&inverse); count = computeEdgesAndOffsetVertices(matrix, inverse, base, count, &edges); - GrPoint triangle[3]; - triangle[0] = base[0]; - Edge triangleEdges[6]; - triangleEdges[0] = *edges.back(); - triangleEdges[1] = edges[0]; - for (size_t i = 1; i < count - 1; i++) { - triangle[1] = base[i]; - triangle[2] = base[i + 1]; - triangleEdges[2] = edges[i - 1]; - triangleEdges[3] = edges[i]; - triangleEdges[4] = edges[i]; - triangleEdges[5] = edges[i + 1]; - target->setVertexSourceToArray(layout, triangle, 3); - target->setEdgeAAData(&triangleEdges[0].fX); - target->drawNonIndexed(kTriangles_PrimitiveType, 0, 3); + int maxEdges = target->getMaxEdges(); + if (count <= maxEdges) { + // All edges fit; upload all edges and draw all verts as a fan + target->setVertexSourceToArray(layout, base, count); + target->setEdgeAAData(&edges[0], count); + target->drawNonIndexed(kTriangleFan_PrimitiveType, 0, count); + } else { + // Upload "maxEdges" edges and verts at a time, and draw as + // separate fans + for (size_t i = 0; i < count - 2; i += maxEdges - 2) { + edges[i] = edges[0]; + base[i] = base[0]; + int size = GR_CT_MIN(count - i, maxEdges); + target->setVertexSourceToArray(layout, &base[i], size); + target->setEdgeAAData(&edges[i], size); + target->drawNonIndexed(kTriangleFan_PrimitiveType, 0, size); + } } - target->disableState(GrDrawTarget::kEdgeAA_StateBit); + target->setEdgeAAData(NULL, 0); } else { target->setVertexSourceToArray(layout, base, count); target->drawNonIndexed(kTriangleFan_PrimitiveType, 0, count); diff --git a/src/gpu/GrPrintf_skia.cpp b/src/gpu/GrPrintf_skia.cpp index fa8b6a7647..6da8822cc4 100644 --- a/src/gpu/GrPrintf_skia.cpp +++ b/src/gpu/GrPrintf_skia.cpp @@ -23,7 +23,7 @@ #include "SkTypes.h" void GrPrintf(const char format[], ...) { - const size_t MAX_BUFFER_SIZE = 512; + const size_t MAX_BUFFER_SIZE = 2048; char buffer[MAX_BUFFER_SIZE + 1]; va_list args;