Batch up draws into triangle fans as large as possible when drawing convex

edge AA polys, so we minimize state changes and GL calls.  This requires
querying GL for the maximum number of fragment uniforms.  It also makes the
shader generator produce custom shaders for the number of relevant edges. 
This gives a ~5X speedup on the "Shapes" SampleApp.

Review URL:  http://codereview.appspot.com/4536070/



git-svn-id: http://skia.googlecode.com/svn/trunk@1380 2bbb7eff-a529-9590-31e7-b0007b416f81
This commit is contained in:
senorblanco@chromium.org 2011-05-19 17:11:07 +00:00
parent 27661181d7
commit ef3913bcbf
10 changed files with 128 additions and 67 deletions

View File

@ -54,6 +54,17 @@ public:
kMaxTexCoords = kNumStages
};
/**
* The absolute maximum number of edges that may be specified for
* a single draw call when performing edge antialiasing. This is used for
* the size of several static buffers, so implementations of getMaxEdges()
* (below) should clamp to this value.
*/
enum {
kMaxEdges = 32
};
/**
* Bitfield used to indicate which stages are in use.
*/
@ -78,9 +89,6 @@ public:
kNoColorWrites_StateBit = 0x08, //<! If set it disables writing colors.
// Useful while performing stencil
// ops.
kEdgeAA_StateBit = 0x10, //<! Perform edge anti-aliasing.
// Requires the edges to be passed in
// setEdgeAAData().
// subclass may use additional bits internally
kDummyStateBit,
@ -128,6 +136,20 @@ public:
fCurrDrawState.fStencilSettings.setDisabled();
}
class Edge {
public:
Edge() {}
Edge(float x, float y, float z) : fX(x), fY(y), fZ(z) {}
GrPoint intersect(const Edge& other) {
return GrPoint::Make(
(fY * other.fZ - other.fY * fZ) /
(fX * other.fY - other.fX * fY),
(fX * other.fZ - other.fX * fZ) /
(other.fX * fY - fX * other.fY));
}
float fX, fY, fZ;
};
protected:
struct DrState {
@ -164,7 +186,8 @@ protected:
GrStencilSettings fStencilSettings;
GrMatrix fViewMatrix;
float fEdgeAAEdges[18];
Edge fEdgeAAEdges[kMaxEdges];
int fEdgeAANumEdges;
bool operator ==(const DrState& s) const {
return 0 == memcmp(this, &s, sizeof(DrState));
}
@ -536,7 +559,7 @@ public:
* @param edges 3 * 6 float values, representing the edge
* equations in Ax + By + C form
*/
void setEdgeAAData(const float edges[18]);
void setEdgeAAData(const Edge* edges, int numEdges);
private:
static const int TEX_COORD_BIT_CNT = kNumStages*kMaxTexCoords;
@ -804,6 +827,15 @@ public:
*/
virtual void clear(const GrIRect* rect, GrColor color) = 0;
/**
* Returns the maximum number of edges that may be specified in a single
* draw call when performing edge antialiasing. This is usually limited
* by the number of fragment uniforms which may be uploaded. Must be a
* minimum of six, since a triangle's vertices each belong to two boundary
* edges which may be distinct.
*/
virtual int getMaxEdges() const { return 6; }
///////////////////////////////////////////////////////////////////////////
class AutoStateRestore : ::GrNoncopyable {

View File

@ -318,6 +318,8 @@
#define GR_GL_ACTIVE_ATTRIBUTE_MAX_LENGTH 0x8B8A
#define GR_GL_SHADING_LANGUAGE_VERSION 0x8B8C
#define GR_GL_CURRENT_PROGRAM 0x8B8D
#define GR_GL_MAX_FRAGMENT_UNIFORM_COMPONENTS 0x8B49
#define GR_GL_MAX_VERTEX_UNIFORM_COMPONENTS 0x8B4A
/* StencilFunction */
#define GR_GL_NEVER 0x0200

View File

@ -483,7 +483,7 @@ void GrDrawTarget::setIndexSourceToBuffer(const GrIndexBuffer* buffer) {
bool GrDrawTarget::canDisableBlend() const {
// If we're using edge antialiasing, we can't force blend off.
if (fCurrDrawState.fFlagBits & kEdgeAA_StateBit) {
if (fCurrDrawState.fEdgeAANumEdges > 0) {
return false;
}
@ -535,8 +535,10 @@ bool GrDrawTarget::canDisableBlend() const {
}
///////////////////////////////////////////////////////////////////////////////
void GrDrawTarget::setEdgeAAData(const float edges[18]) {
memcpy(fCurrDrawState.fEdgeAAEdges, edges, sizeof(fCurrDrawState.fEdgeAAEdges));
void GrDrawTarget::setEdgeAAData(const Edge* edges, int numEdges) {
GrAssert(numEdges <= kMaxEdges);
memcpy(fCurrDrawState.fEdgeAAEdges, edges, numEdges * sizeof(Edge));
fCurrDrawState.fEdgeAANumEdges = numEdges;
}

View File

@ -361,11 +361,6 @@ bool GrGLProgram::genProgram(GrGLProgram::CachedData* programData) const {
}
}
if (fProgramDesc.fUsesEdgeAA) {
segments.fFSUnis.append("uniform vec3 " EDGES_UNI_NAME "[6];\n");
programData->fUniLocations.fEdgesUni = kUseUniform;
}
if (fProgramDesc.fEmitsPointSize){
segments.fVSCode.append("\tgl_PointSize = 1.0;\n");
}
@ -457,17 +452,36 @@ bool GrGLProgram::genProgram(GrGLProgram::CachedData* programData) const {
// we will want to compute coverage for some blend when there is no
// color (when dual source blending is enabled). But for now we have this if
if (!wroteFragColorZero) {
if (fProgramDesc.fUsesEdgeAA) {
// FIXME: put the a's in a loop
if (fProgramDesc.fEdgeAANumEdges > 0) {
segments.fFSUnis.append("uniform vec3 " EDGES_UNI_NAME "[");
segments.fFSUnis.appendS32(fProgramDesc.fEdgeAANumEdges);
segments.fFSUnis.append("];\n");
programData->fUniLocations.fEdgesUni = kUseUniform;
int count = fProgramDesc.fEdgeAANumEdges;
segments.fFSCode.append(
"\tvec3 pos = vec3(gl_FragCoord.xy, 1);\n"
"\tfloat a0 = clamp(dot(uEdges[0], pos), 0.0, 1.0);\n"
"\tfloat a1 = clamp(dot(uEdges[1], pos), 0.0, 1.0);\n"
"\tfloat a2 = clamp(dot(uEdges[2], pos), 0.0, 1.0);\n"
"\tfloat a3 = clamp(dot(uEdges[3], pos), 0.0, 1.0);\n"
"\tfloat a4 = clamp(dot(uEdges[4], pos), 0.0, 1.0);\n"
"\tfloat a5 = clamp(dot(uEdges[5], pos), 0.0, 1.0);\n"
"\tfloat edgeAlpha = min(min(a0 * a1, a2 * a3), a4 * a5);\n");
"\tvec3 pos = vec3(gl_FragCoord.xy, 1);\n");
for (int i = 0; i < count; i++) {
segments.fFSCode.append("\tfloat a");
segments.fFSCode.appendS32(i);
segments.fFSCode.append(" = clamp(dot(" EDGES_UNI_NAME "[");
segments.fFSCode.appendS32(i);
segments.fFSCode.append("], pos), 0.0, 1.0);\n");
}
segments.fFSCode.append("\tfloat edgeAlpha = ");
for (int i = 0; i < count - 1; i++) {
segments.fFSCode.append("min(a");
segments.fFSCode.appendS32(i);
segments.fFSCode.append(" * a");
segments.fFSCode.appendS32(i + 1);
segments.fFSCode.append(", ");
}
segments.fFSCode.append("a");
segments.fFSCode.appendS32(count - 1);
segments.fFSCode.append(" * a0");
for (int i = 0; i < count - 1; i++) {
segments.fFSCode.append(")");
}
segments.fFSCode.append(";\n");
inCoverage = "edgeAlpha";
coverageIsScalar = true;
}

View File

@ -96,7 +96,7 @@ private:
int fFirstCoverageStage;
bool fEmitsPointSize;
bool fUsesEdgeAA;
int fEdgeAANumEdges;
SkXfermode::Mode fColorFilterXfermode;

View File

@ -16,6 +16,7 @@
#include "GrGpuGL.h"
#include "GrMemory.h"
#include "GrTypes.h"
static const GrGLuint GR_MAX_GLUINT = ~0;
static const GrGLint GR_INVAL_GLINT = ~0;
@ -201,6 +202,16 @@ GrGpuGL::GrGpuGL() {
GR_GL_GetIntegerv(GR_GL_MAX_TEXTURE_UNITS, &maxTextureUnits);
GrAssert(maxTextureUnits > kNumStages);
}
if (GR_GL_SUPPORT_ES2) {
GR_GL_GetIntegerv(GR_GL_MAX_FRAGMENT_UNIFORM_VECTORS,
&fMaxFragmentUniformVectors);
} else if (GR_GL_SUPPORT_DESKTOP) {
GrGLint max;
GR_GL_GetIntegerv(GR_GL_MAX_FRAGMENT_UNIFORM_COMPONENTS, &max);
fMaxFragmentUniformVectors = max / 4;
} else {
fMaxFragmentUniformVectors = 16;
}
////////////////////////////////////////////////////////////////////////////
// Check for supported features.
@ -2064,3 +2075,9 @@ void GrGpuGL::setBuffers(bool indexed,
}
}
}
int GrGpuGL::getMaxEdges() const {
// FIXME: This is a pessimistic estimate based on how many other things
// want to add uniforms. This should be centralized somewhere.
return GR_CT_MIN(fMaxFragmentUniformVectors - 8, kMaxEdges);
}

View File

@ -107,6 +107,7 @@ protected:
virtual void flushScissor(const GrIRect* rect);
void clearStencil(uint32_t value, uint32_t mask);
virtual void clearStencilClip(const GrIRect& rect);
virtual int getMaxEdges() const;
// binds texture unit in GL
void setTextureUnit(int unitIdx);
@ -189,6 +190,9 @@ private:
// Do we have stencil wrap ops.
bool fHasStencilWrap;
// The maximum number of fragment uniform vectors (GLES has min. 16).
int fMaxFragmentUniformVectors;
// ES requires an extension to support RGBA8 in RenderBufferStorage
bool fRGBA8Renderbuffer;

View File

@ -193,7 +193,7 @@ void GrGpuGLShaders::ProgramUnitTest() {
idx = (int)(random.nextF() * (kNumStages+1));
pdesc.fFirstCoverageStage = idx;
pdesc.fUsesEdgeAA = (random.nextF() > .5f);
pdesc.fEdgeAANumEdges = (random.nextF() * (getMaxEdges() + 1));
for (int s = 0; s < kNumStages; ++s) {
// enable the stage?
@ -442,16 +442,17 @@ void GrGpuGLShaders::flushTexelSize(int s) {
void GrGpuGLShaders::flushEdgeAAData() {
const int& uni = fProgramData->fUniLocations.fEdgesUni;
if (GrGLProgram::kUnusedUniform != uni) {
float edges[18];
memcpy(edges, fCurrDrawState.fEdgeAAEdges, sizeof(edges));
int count = fCurrDrawState.fEdgeAANumEdges;
Edge edges[kMaxEdges];
// Flip the edges in Y
float height = fCurrDrawState.fRenderTarget->height();
for (int i = 0; i < 6; ++i) {
float b = edges[i * 3 + 1];
edges[i * 3 + 1] = -b;
edges[i * 3 + 2] += b * height;
for (int i = 0; i < count; ++i) {
edges[i] = fCurrDrawState.fEdgeAAEdges[i];
float b = edges[i].fY;
edges[i].fY = -b;
edges[i].fZ += b * height;
}
GR_GL(Uniform3fv(uni, 6, edges));
GR_GL(Uniform3fv(uni, count, &edges[0].fX));
}
}
@ -701,7 +702,7 @@ void GrGpuGLShaders::buildProgram(GrPrimitiveType type) {
desc.fColorType = GrGLProgram::ProgramDesc::kAttribute_ColorType;
}
desc.fUsesEdgeAA = fCurrDrawState.fFlagBits & kEdgeAA_StateBit;
desc.fEdgeAANumEdges = fCurrDrawState.fEdgeAANumEdges;
for (int s = 0; s < kNumStages; ++s) {
GrGLProgram::ProgramDesc::StageDesc& stage = desc.fStages[s];

View File

@ -85,19 +85,7 @@ static unsigned fill_type_to_glu_winding_rule(GrPathFill fill) {
GrTesselatedPathRenderer::GrTesselatedPathRenderer() {
}
class Edge {
public:
Edge() {}
Edge(float x, float y, float z) : fX(x), fY(y), fZ(z) {}
GrPoint intersect(const Edge& other) {
return GrPoint::Make(
(fY * other.fZ - other.fY * fZ) / (fX * other.fY - other.fX * fY),
(fX * other.fZ - other.fX * fZ) / (other.fX * fY - fX * other.fY));
}
float fX, fY, fZ;
};
typedef GrTDArray<Edge> EdgeArray;
typedef GrTDArray<GrDrawTarget::Edge> EdgeArray;
bool isCCW(const GrPoint* pts)
{
@ -121,15 +109,15 @@ static size_t computeEdgesAndOffsetVertices(const GrMatrix& matrix,
GrVec tangent = GrVec::Make(p.fY - q.fY, q.fX - p.fX);
float scale = sign / tangent.length();
float cross2 = p.fX * q.fY - q.fX * p.fY;
Edge edge(tangent.fX * scale,
GrDrawTarget::Edge edge(tangent.fX * scale,
tangent.fY * scale,
cross2 * scale + 0.5f);
*edges->append() = edge;
p = q;
}
Edge prev_edge = *edges->back();
GrDrawTarget::Edge prev_edge = *edges->back();
for (size_t i = 0; i < edges->count(); ++i) {
Edge edge = edges->at(i);
GrDrawTarget::Edge edge = edges->at(i);
vertices[i] = prev_edge.intersect(edge);
inverse.mapPoints(&vertices[i], 1);
prev_edge = edge;
@ -262,29 +250,30 @@ FINISHED:
if (subpathCnt == 1 && !inverted && path.isConvex()) {
if (target->isAntialiasState()) {
target->enableState(GrDrawTarget::kEdgeAA_StateBit);
EdgeArray edges;
GrMatrix inverse, matrix = target->getViewMatrix();
target->getViewInverse(&inverse);
count = computeEdgesAndOffsetVertices(matrix, inverse, base, count, &edges);
GrPoint triangle[3];
triangle[0] = base[0];
Edge triangleEdges[6];
triangleEdges[0] = *edges.back();
triangleEdges[1] = edges[0];
for (size_t i = 1; i < count - 1; i++) {
triangle[1] = base[i];
triangle[2] = base[i + 1];
triangleEdges[2] = edges[i - 1];
triangleEdges[3] = edges[i];
triangleEdges[4] = edges[i];
triangleEdges[5] = edges[i + 1];
target->setVertexSourceToArray(layout, triangle, 3);
target->setEdgeAAData(&triangleEdges[0].fX);
target->drawNonIndexed(kTriangles_PrimitiveType, 0, 3);
int maxEdges = target->getMaxEdges();
if (count <= maxEdges) {
// All edges fit; upload all edges and draw all verts as a fan
target->setVertexSourceToArray(layout, base, count);
target->setEdgeAAData(&edges[0], count);
target->drawNonIndexed(kTriangleFan_PrimitiveType, 0, count);
} else {
// Upload "maxEdges" edges and verts at a time, and draw as
// separate fans
for (size_t i = 0; i < count - 2; i += maxEdges - 2) {
edges[i] = edges[0];
base[i] = base[0];
int size = GR_CT_MIN(count - i, maxEdges);
target->setVertexSourceToArray(layout, &base[i], size);
target->setEdgeAAData(&edges[i], size);
target->drawNonIndexed(kTriangleFan_PrimitiveType, 0, size);
}
}
target->disableState(GrDrawTarget::kEdgeAA_StateBit);
target->setEdgeAAData(NULL, 0);
} else {
target->setVertexSourceToArray(layout, base, count);
target->drawNonIndexed(kTriangleFan_PrimitiveType, 0, count);

View File

@ -23,7 +23,7 @@
#include "SkTypes.h"
void GrPrintf(const char format[], ...) {
const size_t MAX_BUFFER_SIZE = 512;
const size_t MAX_BUFFER_SIZE = 2048;
char buffer[MAX_BUFFER_SIZE + 1];
va_list args;