Merge pull request #741 from takahito-tejima/bspline

optimize PatchTableFactory::Create performance

The primary hesitation here is that this change alters the approximation for BSpline end caps, however the consensus is that BSpline end caps were already the most extreme approximation, so it makes sense to sacrifice quality for speed.

Reviewed by David, Barry, George and myself.
This commit is contained in:
Jeremy Cowles 2015-09-20 22:51:30 -07:00
commit fb4f3f6a62
5 changed files with 644 additions and 238 deletions

View File

@ -51,14 +51,61 @@ namespace {
EndCapBSplineBasisPatchFactory::EndCapBSplineBasisPatchFactory(
TopologyRefiner const & refiner) :
_refiner(&refiner), _numVertices(0), _numPatches(0) {
// Sanity check: the mesh must be adaptively refined
assert(not refiner.IsUniform());
// Reserve the patch point stencils. Ideally topology refiner
// would have an API to return how many endcap patches will be required.
// Instead we conservatively estimate by the number of patches at the
// finest level.
int numMaxLevelFaces = refiner.GetLevel(refiner.GetMaxLevel()).GetNumFaces();
_vertexStencils.reserve(numMaxLevelFaces*16);
_varyingStencils.reserve(numMaxLevelFaces*16);
}
ConstIndexArray
EndCapBSplineBasisPatchFactory::GetPatchPoints(
Vtr::internal::Level const * level, Index faceIndex,
PatchTableFactory::PatchFaceTag const * /*levelPatchTags*/,
Vtr::internal::Level const * level, Index thisFace,
PatchTableFactory::PatchFaceTag const *levelPatchTags,
int levelVertOffset) {
Vtr::ConstIndexArray facePoints = level->getFaceVertices(thisFace);
PatchTableFactory::PatchFaceTag patchTag = levelPatchTags[thisFace];
// if it's boundary, fallback to use GregoryBasis
if (patchTag._boundaryCount > 0) {
return getPatchPointsFromGregoryBasis(
level, thisFace, facePoints, levelVertOffset);
}
// there's a short-cut when the face contains only 1 extraordinary vertex.
// (we can achieve this by isolating 2 levels)
// look for the extraordinary vertex
int irregular = -1;
for (int i = 0; i < 4; ++i) {
int valence = level->getVertexFaces(facePoints[i]).size();
if (valence != 4) {
if (irregular != -1) {
// more than one extraoridinary vertices.
// fallback to use GregoryBasis
return getPatchPointsFromGregoryBasis(
level, thisFace, facePoints, levelVertOffset);
}
irregular = i;
}
}
// faster B-spline endcap generation
return getPatchPoints(level, thisFace, irregular, facePoints,
levelVertOffset);
}
ConstIndexArray
EndCapBSplineBasisPatchFactory::getPatchPointsFromGregoryBasis(
Vtr::internal::Level const * level, Index thisFace,
ConstIndexArray facePoints, int levelVertOffset) {
// XXX: For now, always create new 16 indices for each patch.
// we'll optimize later to share all regular control points with
// other patches as well as to try to make extra ordinary verts watertight.
@ -68,66 +115,386 @@ EndCapBSplineBasisPatchFactory::GetPatchPoints(
_patchPoints.push_back(_numVertices + offset);
++_numVertices;
}
GregoryBasis::ProtoBasis basis(*level, thisFace, levelVertOffset, -1);
// XXX: temporary hack. we should traverse topology and find existing
// vertices if available
//
// Reorder gregory basis stencils into regular bezier
GregoryBasis::ProtoBasis basis(*level, faceIndex, levelVertOffset, -1);
std::vector<GregoryBasis::Point> bezierCP;
bezierCP.reserve(16);
GregoryBasis::Point const *bezierCP[16];
bezierCP.push_back(basis.P[0]);
bezierCP.push_back(basis.Ep[0]);
bezierCP.push_back(basis.Em[1]);
bezierCP.push_back(basis.P[1]);
bezierCP[0] = &basis.P[0];
bezierCP[1] = &basis.Ep[0];
bezierCP[2] = &basis.Em[1];
bezierCP[3] = &basis.P[1];
bezierCP.push_back(basis.Em[0]);
bezierCP.push_back(basis.Fp[0]); // arbitrary
bezierCP.push_back(basis.Fp[1]); // arbitrary
bezierCP.push_back(basis.Ep[1]);
bezierCP[4] = &basis.Em[0];
bezierCP[5] = &basis.Fp[0]; // arbitrary
bezierCP[6] = &basis.Fp[1]; // arbitrary
bezierCP[7] = &basis.Ep[1];
bezierCP.push_back(basis.Ep[3]);
bezierCP.push_back(basis.Fp[3]); // arbitrary
bezierCP.push_back(basis.Fp[2]); // arbitrary
bezierCP.push_back(basis.Em[2]);
bezierCP[8] = &basis.Ep[3];
bezierCP[9] = &basis.Fp[3]; // arbitrary
bezierCP[10] = &basis.Fp[2]; // arbitrary
bezierCP[11] = &basis.Em[2];
bezierCP.push_back(basis.P[3]);
bezierCP.push_back(basis.Em[3]);
bezierCP.push_back(basis.Ep[2]);
bezierCP.push_back(basis.P[2]);
bezierCP[12] = &basis.P[3];
bezierCP[13] = &basis.Em[3];
bezierCP[14] = &basis.Ep[2];
bezierCP[15] = &basis.P[2];
// all stencils should have the same capacity.
int stencilCapacity = basis.P[0].GetCapacity();
// Apply basis conversion from bezier to b-spline
float Q[4][4] = {{ 6, -7, 2, 0},
{ 0, 2, -1, 0},
{ 0, -1, 2, 0},
{ 0, 2, -7, 6} };
std::vector<GregoryBasis::Point> H(16);
Vtr::internal::StackBuffer<GregoryBasis::Point, 16> H(16);
for (int i = 0; i < 4; ++i) {
for (int j = 0; j < 4; ++j) {
for (int k = 0; k < 4; ++k) {
if (isWeightNonZero(Q[i][k])) H[i*4+j] += bezierCP[j+k*4] * Q[i][k];
H[i*4+j].Clear(stencilCapacity);
for (int k = 0; k < 4; ++k) {
if (isWeightNonZero(Q[i][k])) {
H[i*4+j].AddWithWeight(*bezierCP[j+k*4], Q[i][k]);
}
}
}
}
for (int i = 0; i < 4; ++i) {
for (int j = 0; j < 4; ++j) {
GregoryBasis::Point p;
GregoryBasis::Point p(stencilCapacity);
for (int k = 0; k < 4; ++k) {
if (isWeightNonZero(Q[j][k])) p += H[i*4+k] * Q[j][k];
if (isWeightNonZero(Q[j][k])) {
p.AddWithWeight(H[i*4+k], Q[j][k]);
}
}
_vertexStencils.push_back(p);
}
}
int varyingIndices[] = { 0, 0, 1, 1,
0, 0, 1, 1,
3, 3, 2, 2,
3, 3, 2, 2,};
for (int i = 0; i < 16; ++i) {
_varyingStencils.push_back(basis.V[varyingIndices[i]]);
GregoryBasis::Point p(1);
p.AddWithWeight(facePoints[varyingIndices[i]] + levelVertOffset, 1.0f);
_varyingStencils.push_back(p);
}
++_numPatches;
return ConstIndexArray(&_patchPoints[(_numPatches-1)*16], 16);
}
void
EndCapBSplineBasisPatchFactory::computeLimitStencils(
Vtr::internal::Level const *level,
ConstIndexArray facePoints, int vid,
GregoryBasis::Point *P, GregoryBasis::Point *Ep, GregoryBasis::Point *Em)
{
int maxvalence = level->getMaxValence();
Vtr::internal::StackBuffer<Index, 40> manifoldRing;
manifoldRing.SetSize(maxvalence*2);
int ringSize =
level->gatherQuadRegularRingAroundVertex(
facePoints[vid], manifoldRing, /*fvarChannel*/-1);
// note: this function has not yet supported boundary.
assert((ringSize & 1) == 0);
int valence = ringSize/2;
int stencilCapacity = ringSize + 1;
Index start = -1, prev = -1;
{
int ip = (vid+1)%4, im = (vid+3)%4;
for (int i = 0; i < valence; ++i) {
if (manifoldRing[i*2] == facePoints[ip])
start = i;
if (manifoldRing[i*2] == facePoints[im])
prev = i;
}
}
assert(start > -1 && prev > -1);
GregoryBasis::Point e0, e1;
e0.Clear(stencilCapacity);
e1.Clear(stencilCapacity);
float t = 2.0f * float(M_PI) / float(valence);
float ef = 1.0f / (valence * (cosf(t) + 5.0f +
sqrtf((cosf(t) + 9) * (cosf(t) + 1)))/16.0f);
for (int i = 0; i < valence; ++i) {
Index ip = (i+1)%valence;
Index idx_neighbor = (manifoldRing[2*i + 0]),
idx_diagonal = (manifoldRing[2*i + 1]),
idx_neighbor_p = (manifoldRing[2*ip + 0]);
float d = float(valence)+5.0f;
GregoryBasis::Point f(4);
f.AddWithWeight(facePoints[vid], float(valence)/d);
f.AddWithWeight(idx_neighbor_p, 2.0f/d);
f.AddWithWeight(idx_neighbor, 2.0f/d);
f.AddWithWeight(idx_diagonal, 1.0f/d);
P->AddWithWeight(f, 1.0f/float(valence));
float c0 = 0.5*cosf((float(2*M_PI) * float(i)/float(valence)))
+ 0.5*cosf((float(2*M_PI) * float(ip)/float(valence)));
float c1 = 0.5*sinf((float(2*M_PI) * float(i)/float(valence)))
+ 0.5*sinf((float(2*M_PI) * float(ip)/float(valence)));
e0.AddWithWeight(f, c0*ef);
e1.AddWithWeight(f, c1*ef);
}
*Ep = *P;
Ep->AddWithWeight(e0, cosf((float(2*M_PI) * float(start)/float(valence))));
Ep->AddWithWeight(e1, sinf((float(2*M_PI) * float(start)/float(valence))));
*Em = *P;
Em->AddWithWeight(e0, cosf((float(2*M_PI) * float(prev)/float(valence))));
Em->AddWithWeight(e1, sinf((float(2*M_PI) * float(prev)/float(valence))));
}
ConstIndexArray
EndCapBSplineBasisPatchFactory::getPatchPoints(
Vtr::internal::Level const *level, Index thisFace,
Index extraOrdinaryIndex, ConstIndexArray facePoints,
int levelVertOffset) {
// Fast B-spline endcap construction.
//
// This function assumes the patch is not on boundary
// and it contains only 1 extraordinary vertex.
// The location of the extraoridnary vertex can be one of
// 0-ring quad corner.
//
// B-Spline control point gathering indice
//
// [5] (4)---(15)--(14) 0 : extraoridnary vertex
// | | |
// | | | 1,2,3,9,10,11,12,13 :
// (6)----0-----3-----13 B-Spline control points, gathered by
// | | | | traversing topology
// | | | |
// (7)----1-----2-----12 (5) :
// | | | | Fitted patch point (from limit position)
// | | | |
// (8)----9-----10----11 (4),(6),(7),(8),(14),(15) :
// Fitted patch points
// (from limit tangents and bezier CP)
//
static int const rotation[4][16] = {
/*= 0 ring =*/ /* ================ 1 ring ================== */
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13 ,14, 15},
{ 1, 2, 3, 0, 7, 8, 9, 10, 11, 12, 13, 14, 15, 4, 5, 6},
{ 2, 3, 0, 1, 10, 11, 12, 13, 14, 15, 4, 5, 6, 7, 8, 9},
{ 3, 0, 1, 2, 13, 14, 15, 4, 5, 6, 7, 8, 9, 10, 11, 12}};
int maxvalence = level->getMaxValence();
int stencilCapacity = 2*maxvalence + 16;
GregoryBasis::Point P(stencilCapacity), Em(stencilCapacity), Ep(stencilCapacity);
computeLimitStencils(level, facePoints, extraOrdinaryIndex, &P, &Em, &Ep);
P.OffsetIndices(levelVertOffset);
Em.OffsetIndices(levelVertOffset);
Ep.OffsetIndices(levelVertOffset);
// returning patch indices (a mix of cage vertices and patch points)
int patchPoints[16];
// first, we traverse the topology to gather 15 vertices. This process is
// similar to Vtr::Level::gatherQuadRegularInteriorPatchPoints
int pointIndex = 0;
int vid = extraOrdinaryIndex;
// 0-ring
patchPoints[pointIndex++] = facePoints[0] + levelVertOffset;
patchPoints[pointIndex++] = facePoints[1] + levelVertOffset;
patchPoints[pointIndex++] = facePoints[2] + levelVertOffset;
patchPoints[pointIndex++] = facePoints[3] + levelVertOffset;
// 1-ring
ConstIndexArray thisFaceVerts = level->getFaceVertices(thisFace);
for (int i = 0; i < 4; ++i) {
Index v = thisFaceVerts[i];
ConstIndexArray vFaces = level->getVertexFaces(v);
ConstLocalIndexArray vInFaces = level->getVertexFaceLocalIndices(v);
if (i != vid) {
// regular corner
int thisFaceInVFaces = vFaces.FindIndexIn4Tuple(thisFace);
int intFaceInVFaces = (thisFaceInVFaces + 2) & 0x3;
Index intFace = vFaces[intFaceInVFaces];
int vInIntFace = vInFaces[intFaceInVFaces];
ConstIndexArray facePoints = level->getFaceVertices(intFace);
patchPoints[pointIndex++] =
facePoints[(vInIntFace + 1)&3] + levelVertOffset;
patchPoints[pointIndex++] =
facePoints[(vInIntFace + 2)&3] + levelVertOffset;
patchPoints[pointIndex++] =
facePoints[(vInIntFace + 3)&3] + levelVertOffset;
} else {
// irregular corner
int thisFaceInVFaces = vFaces.FindIndex(thisFace);
int valence = vFaces.size();
{
// first
int intFaceInVFaces = (thisFaceInVFaces + 1) % valence;
Index intFace = vFaces[intFaceInVFaces];
int vInIntFace = vInFaces[intFaceInVFaces];
ConstIndexArray facePoints = level->getFaceVertices(intFace);
patchPoints[pointIndex++] =
facePoints[(vInIntFace+3)&3] + levelVertOffset;
}
{
// middle: (n-vertices) needs a limit stencil. skip for now
pointIndex++;
}
{
// end
int intFaceInVFaces = (thisFaceInVFaces + (valence-1)) %valence;
Index intFace = vFaces[intFaceInVFaces];
int vInIntFace = vInFaces[intFaceInVFaces];
ConstIndexArray facePoints = level->getFaceVertices(intFace);
patchPoints[pointIndex++] =
facePoints[(vInIntFace+1)&3] + levelVertOffset;
}
}
}
// stencils for patch points
GregoryBasis::Point X5(stencilCapacity),
X6(stencilCapacity),
X7(stencilCapacity),
X8(stencilCapacity),
X4(stencilCapacity),
X15(stencilCapacity),
X14(stencilCapacity);
// limit tangent : Em
// X6 = 1/3 * ( 36Em - 16P0 - 8P1 - 2P2 - 4P3 - P6 - 2P7)
// X7 = 1/3 * (-18Em + 8P0 + 4P1 + P2 + 2P3 + 2P6 + 4P7)
// X8 = X6 + (P8-P6)
X6.AddWithWeight(Em, 36.0f/3.0f);
X6.AddWithWeight(patchPoints[rotation[vid][0]], -16.0f/3.0f);
X6.AddWithWeight(patchPoints[rotation[vid][1]], -8.0f/3.0f);
X6.AddWithWeight(patchPoints[rotation[vid][2]], -2.0f/3.0f);
X6.AddWithWeight(patchPoints[rotation[vid][3]], -4.0f/3.0f);
X6.AddWithWeight(patchPoints[rotation[vid][6]], -1.0f/3.0f);
X6.AddWithWeight(patchPoints[rotation[vid][7]], -2.0f/3.0f);
X7.AddWithWeight(Em, -18.0f/3.0f);
X7.AddWithWeight(patchPoints[rotation[vid][0]], 8.0f/3.0f);
X7.AddWithWeight(patchPoints[rotation[vid][1]], 4.0f/3.0f);
X7.AddWithWeight(patchPoints[rotation[vid][2]], 1.0f/3.0f);
X7.AddWithWeight(patchPoints[rotation[vid][3]], 2.0f/3.0f);
X7.AddWithWeight(patchPoints[rotation[vid][6]], 2.0f/3.0f);
X7.AddWithWeight(patchPoints[rotation[vid][7]], 4.0f/3.0f);
X8 = X6;
X8.AddWithWeight(patchPoints[rotation[vid][8]], 1.0f);
X8.AddWithWeight(patchPoints[rotation[vid][6]], -1.0f);
// limit tangent : Ep
// X4 = 1/3 * ( 36EP - 16P0 - 4P1 - 2P15 - 2P2 - 8P3 - P4)
// X15 = 1/3 * (-18EP + 8P0 + 2P1 + 4P15 + P2 + 4P3 + 2P4)
// X14 = X4 + (P14 - P4)
X4.AddWithWeight(Ep, 36.0f/3.0f);
X4.AddWithWeight(patchPoints[rotation[vid][0]], -16.0f/3.0f);
X4.AddWithWeight(patchPoints[rotation[vid][1]], -4.0f/3.0f);
X4.AddWithWeight(patchPoints[rotation[vid][2]], -2.0f/3.0f);
X4.AddWithWeight(patchPoints[rotation[vid][3]], -8.0f/3.0f);
X4.AddWithWeight(patchPoints[rotation[vid][4]], -1.0f/3.0f);
X4.AddWithWeight(patchPoints[rotation[vid][15]], -2.0f/3.0f);
X15.AddWithWeight(Ep, -18.0f/3.0f);
X15.AddWithWeight(patchPoints[rotation[vid][0]], 8.0f/3.0f);
X15.AddWithWeight(patchPoints[rotation[vid][1]], 2.0f/3.0f);
X15.AddWithWeight(patchPoints[rotation[vid][2]], 1.0f/3.0f);
X15.AddWithWeight(patchPoints[rotation[vid][3]], 4.0f/3.0f);
X15.AddWithWeight(patchPoints[rotation[vid][4]], 2.0f/3.0f);
X15.AddWithWeight(patchPoints[rotation[vid][15]], 4.0f/3.0f);
X14 = X4;
X14.AddWithWeight(patchPoints[rotation[vid][14]], 1.0f);
X14.AddWithWeight(patchPoints[rotation[vid][4]], -1.0f);
// limit corner (16th free vert)
// X5 = 36LP - 16P0 - 4(P1 + P3 + P4 + P6) - (P2 + P7 + P15)
X5.AddWithWeight(P, 36.0f);
X5.AddWithWeight(patchPoints[rotation[vid][0]], -16.0f);
X5.AddWithWeight(patchPoints[rotation[vid][1]], -4.0f);
X5.AddWithWeight(patchPoints[rotation[vid][3]], -4.0f);
X5.AddWithWeight(X4, -4.0f);
X5.AddWithWeight(X6, -4.0f);
X5.AddWithWeight(patchPoints[rotation[vid][2]], -1.0f);
X5.AddWithWeight(X7, -1.0f);
X5.AddWithWeight(X15, -1.0f);
// [5] (4)---(15)--(14) 0 : extraoridnary vertex
// | | |
// | | | 1,2,3,9,10,11,12,13 :
// (6)----0-----3-----13 B-Spline control points, gathered by
// | | | | traversing topology
// | | | |
// (7)----1-----2-----12 (5) :
// | | | | Fitted patch point (from limit position)
// | | | |
// (8)----9-----10----11 (4),(6),(7),(8),(14),(15) :
//
// patch point stencils will be stored in this order
// (Em) 6, 7, 8, (Ep) 4, 15, 14, (P) 5
int offset = _refiner->GetNumVerticesTotal();
GregoryBasis::Point V0, V1, V3;
V0.AddWithWeight(facePoints[vid] + levelVertOffset, 1.0f);
V1.AddWithWeight(facePoints[(vid+1)&3] + levelVertOffset, 1.0f);
V3.AddWithWeight(facePoints[(vid+3)&3] + levelVertOffset, 1.0f);
// push back to stencils;
patchPoints[3* vid + 6] = (_numVertices++) + offset;
_vertexStencils.push_back(X6);
_varyingStencils.push_back(V0);
patchPoints[3*((vid+1)%4) + 4] = (_numVertices++) + offset;
_vertexStencils.push_back(X7);
_varyingStencils.push_back(V1);
patchPoints[3*((vid+1)%4) + 5] = (_numVertices++) + offset;
_vertexStencils.push_back(X8);
_varyingStencils.push_back(V1);
patchPoints[3* vid + 4] = (_numVertices++) + offset;
_vertexStencils.push_back(X4);
_varyingStencils.push_back(V0);
patchPoints[3*((vid+3)%4) + 6] = (_numVertices++) + offset;
_vertexStencils.push_back(X15);
_varyingStencils.push_back(V3);
patchPoints[3*((vid+3)%4) + 5] = (_numVertices++) + offset;
_vertexStencils.push_back(X14);
_varyingStencils.push_back(V3);
patchPoints[3*vid + 5] = (_numVertices++) + offset;
_vertexStencils.push_back(X5);
_varyingStencils.push_back(V0);
// reorder into UV row-column
static int const permuteRegular[16] =
{ 5, 6, 7, 8, 4, 0, 1, 9, 15, 3, 2, 10, 14, 13, 12, 11 };
for (int i = 0; i < 16; ++i) {
_patchPoints.push_back(patchPoints[permuteRegular[i]]);
}
++_numPatches;
return ConstIndexArray(&_patchPoints[(_numPatches-1)*16], 16);
}

View File

@ -91,6 +91,22 @@ public:
}
private:
ConstIndexArray getPatchPointsFromGregoryBasis(
Vtr::internal::Level const * level, Index thisFace,
ConstIndexArray facePoints,
int levelVertOffset);
ConstIndexArray getPatchPoints(
Vtr::internal::Level const *level, Index thisFace,
Index extraOrdinaryIndex, ConstIndexArray facePoints,
int levelVertOffset);
void computeLimitStencils(
Vtr::internal::Level const *level,
ConstIndexArray facePoints, int vid,
GregoryBasis::Point *P, GregoryBasis::Point *Ep, GregoryBasis::Point *Em);
TopologyRefiner const *_refiner;
GregoryBasis::PointsVector _vertexStencils;
GregoryBasis::PointsVector _varyingStencils;

View File

@ -47,6 +47,15 @@ EndCapGregoryBasisPatchFactory::EndCapGregoryBasisPatchFactory(
// Sanity check: the mesh must be adaptively refined
assert(not refiner.IsUniform());
// Reserve the patch point stencils. Ideally topology refiner
// would have an API to return how many endcap patches will be required.
// Instead we conservatively estimate by the number of patches at the
// finest level.
int numMaxLevelFaces = refiner.GetLevel(refiner.GetMaxLevel()).GetNumFaces();
_vertexStencils.reserve(numMaxLevelFaces*20);
_varyingStencils.reserve(numMaxLevelFaces*20);
}
//

View File

@ -36,57 +36,6 @@ namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Far {
// Builds a table of local indices pairs for each vertex of the patch.
//
// o
// N0 |
// | ....
// | .... : Gregory patch
// o ------ o ------ o ....
// N1 V | .... M3
// | .......
// | .......
// o .......
// N2
//
// [...] [N2 - N3] [...]
//
// Each value pair is composed of 2 index values in range [0-4[ pointing
// to the 2 neighbor vertices of the vertex 'V' belonging to the Gregory patch.
// Neighbor ordering is valence CCW and must match the winding of the 1-ring
// vertices.
//
static void
getQuadOffsets(Vtr::internal::Level const & level, Vtr::Index fIndex,
Vtr::Index offsets[], int fvarChannel=-1) {
Far::ConstIndexArray fPoints = (fvarChannel<0) ?
level.getFaceVertices(fIndex) :
level.getFaceFVarValues(fIndex, fvarChannel);
assert(fPoints.size()==4);
for (int i = 0; i < 4; ++i) {
Vtr::Index vIndex = fPoints[i];
Vtr::ConstIndexArray vFaces = level.getVertexFaces(vIndex),
vEdges = level.getVertexEdges(vIndex);
int thisFaceInVFaces = -1;
for (int j = 0; j < vFaces.size(); ++j) {
if (fIndex == vFaces[j]) {
thisFaceInVFaces = j;
break;
}
}
assert(thisFaceInVFaces != -1);
// we have to use the number of incident edges to modulo the local index
// because there could be 2 consecutive edges in the face belonging to
// the Gregory patch.
offsets[i*2+0] = thisFaceInVFaces;
offsets[i*2+1] = (thisFaceInVFaces + 1)%vEdges.size();
}
}
int
GregoryBasis::ProtoBasis::GetNumElements() const {
@ -153,6 +102,8 @@ GregoryBasis::ProtoBasis::ProtoBasis(
Vtr::internal::Level const & level, Index faceIndex,
int levelVertOffset, int fvarChannel) {
// XXX: This function is subject to refactor in 3.1
Vtr::ConstIndexArray facePoints = (fvarChannel<0) ?
level.getFaceVertices(faceIndex) :
level.getFaceFVarValues(faceIndex, fvarChannel);
@ -162,27 +113,45 @@ GregoryBasis::ProtoBasis::ProtoBasis(
valences[4],
zerothNeighbors[4];
Vtr::internal::StackBuffer<Index,40> manifoldRing((maxvalence+2)*2);
// XXX: a temporary hack for the performance issue
// ensure Point has a capacity for the neighborhood of
// 2 extraordinary verts + 2 regular verts
// worse case: n-valence verts at a corner of n-gon.
int stencilCapacity =
4/*0-ring*/ + 2*(2*(maxvalence-2)/*1-ring around extraordinaries*/
+ 2/*1-ring around regulars, excluding shared ones*/);
Vtr::internal::StackBuffer<Point,16> f(maxvalence);
Vtr::internal::StackBuffer<Point,64> r(maxvalence*4);
Point e0[4], e1[4];
for (int i = 0; i < 4; ++i) {
P[i].Clear(stencilCapacity);
e0[i].Clear(stencilCapacity);
e1[i].Clear(stencilCapacity);
V[i].Clear(1);
}
Point e0[4], e1[4], org[4];
Vtr::internal::StackBuffer<Index, 40> manifoldRings[4];
manifoldRings[0].SetSize(maxvalence*2);
manifoldRings[1].SetSize(maxvalence*2);
manifoldRings[2].SetSize(maxvalence*2);
manifoldRings[3].SetSize(maxvalence*2);
Vtr::internal::StackBuffer<Point, 10> f(maxvalence);
Vtr::internal::StackBuffer<Point, 40> r(maxvalence*4);
// the first phase
for (int vid=0; vid<4; ++vid) {
org[vid] = facePoints[vid];
// save for varying stencils
V[vid] = facePoints[vid];
V[vid].AddWithWeight(facePoints[vid], 1.0f);
int ringSize =
level.gatherQuadRegularRingAroundVertex(
facePoints[vid], manifoldRing, fvarChannel);
facePoints[vid], manifoldRings[vid], fvarChannel);
int valence;
if (ringSize & 1) {
// boundary vertex
manifoldRing[ringSize] = manifoldRing[ringSize-1];
manifoldRings[vid][ringSize] = manifoldRings[vid][ringSize-1];
++ringSize;
valence = -ringSize/2;
} else {
@ -196,21 +165,19 @@ GregoryBasis::ProtoBasis::ProtoBasis(
zerothNeighbor=0,
ibefore=0;
Point pos(facePoints[vid]);
for (int i=0; i<ivalence; ++i) {
Index im = (i+ivalence-1)%ivalence,
ip = (i+1)%ivalence;
Index idx_neighbor = (manifoldRing[2*i + 0]),
idx_diagonal = (manifoldRing[2*i + 1]),
idx_neighbor_p = (manifoldRing[2*ip + 0]),
idx_neighbor_m = (manifoldRing[2*im + 0]),
idx_diagonal_m = (manifoldRing[2*im + 1]);
Index idx_neighbor = (manifoldRings[vid][2*i + 0]),
idx_diagonal = (manifoldRings[vid][2*i + 1]),
idx_neighbor_p = (manifoldRings[vid][2*ip + 0]),
idx_neighbor_m = (manifoldRings[vid][2*im + 0]),
idx_diagonal_m = (manifoldRings[vid][2*im + 1]);
bool boundaryNeighbor = (level.getVertexEdges(idx_neighbor).size() >
level.getVertexFaces(idx_neighbor).size());
level.getVertexFaces(idx_neighbor).size());
if (fvarChannel>=0) {
// XXXX manuelk need logic to check for boundary in fvar
@ -232,21 +199,22 @@ GregoryBasis::ProtoBasis::ProtoBasis(
}
}
Point neighbor(idx_neighbor),
diagonal(idx_diagonal),
neighbor_p(idx_neighbor_p),
neighbor_m(idx_neighbor_m),
diagonal_m(idx_diagonal_m);
float d = float(ivalence)+5.0f;
f[i].Clear(4);
f[i].AddWithWeight(facePoints[vid], float(ivalence)/d);
f[i].AddWithWeight(idx_neighbor_p, 2.0f/d);
f[i].AddWithWeight(idx_neighbor, 2.0f/d);
f[i].AddWithWeight(idx_diagonal, 1.0f/d);
P[vid].AddWithWeight(f[i], 1.0f/float(ivalence));
f[i] = (pos*float(ivalence) + (neighbor_p+neighbor)*2.0f + diagonal) / (float(ivalence)+5.0f);
P[vid] += f[i];
r[vid*maxvalence+i] = (neighbor_p-neighbor_m)/3.0f + (diagonal-diagonal_m)/6.0f;
int rid = vid * maxvalence + i;
r[rid].Clear(4);
r[rid].AddWithWeight(idx_neighbor_p, 1.0f/3.0f);
r[rid].AddWithWeight(idx_neighbor_m, -1.0f/3.0f);
r[rid].AddWithWeight(idx_diagonal, 1.0f/6.0f);
r[rid].AddWithWeight(idx_diagonal_m, -1.0f/6.0f);
}
P[vid] /= float(ivalence);
zerothNeighbors[vid] = zerothNeighbor;
if (currentNeighbor == 1) {
boundaryEdgeNeighbors[1] = boundaryEdgeNeighbors[0];
@ -254,24 +222,27 @@ GregoryBasis::ProtoBasis::ProtoBasis(
for (int i=0; i<ivalence; ++i) {
int im = (i+ivalence-1)%ivalence;
Point e = (f[i]+f[im])*0.5f;
e0[vid] += e * csf(ivalence-3, 2*i);
e1[vid] += e * csf(ivalence-3, 2*i+1);
float c0 = 0.5f * csf(ivalence-3, 2*i);
float c1 = 0.5f * csf(ivalence-3, 2*i+1);
e0[vid].AddWithWeight(f[i ], c0);
e0[vid].AddWithWeight(f[im], c0);
e1[vid].AddWithWeight(f[i ], c1);
e1[vid].AddWithWeight(f[im], c1);
}
float ef = computeCoefficient(ivalence);
e0[vid] *= ef;
e1[vid] *= ef;
if (valence<0) {
Point b0(boundaryEdgeNeighbors[0]),
b1(boundaryEdgeNeighbors[1]);
// Boundary gregory case:
if (valence < 0) {
P[vid].Clear(stencilCapacity);
if (ivalence>2) {
P[vid] = (b0 + b1 + pos*4.0f)/6.0f;
P[vid].AddWithWeight(boundaryEdgeNeighbors[0], 1.0f/6.0f);
P[vid].AddWithWeight(boundaryEdgeNeighbors[1], 1.0f/6.0f);
P[vid].AddWithWeight(facePoints[vid], 4.0f/6.0f);
} else {
P[vid] = pos;
P[vid].AddWithWeight(facePoints[vid], 1.0f);
}
float k = float(float(ivalence) - 1.0f); //k is the number of faces
float c = cosf(float(M_PI)/k);
@ -280,10 +251,17 @@ GregoryBasis::ProtoBasis::ProtoBasis(
float alpha_0k = -((1.0f+2.0f*c)*sqrtf(1.0f+c))/((3.0f*k+c)*sqrtf(1.0f-c));
float beta_0 = s/(3.0f*k + c);
Point diagonal(manifoldRing[2*zerothNeighbor + 1]);
int idx_diagonal = manifoldRings[vid][2*zerothNeighbor + 1];
e0[vid] = (b0 - b1)/6.0f;
e1[vid] = pos*gamma + diagonal*beta_0 + (b0 + b1)*alpha_0k;
e0[vid].Clear(stencilCapacity);
e0[vid].AddWithWeight(boundaryEdgeNeighbors[0], 1.0f/6.0f);
e0[vid].AddWithWeight(boundaryEdgeNeighbors[1], -1.0f/6.0f);
e1[vid].Clear(stencilCapacity);
e1[vid].AddWithWeight(facePoints[vid], gamma);
e1[vid].AddWithWeight(idx_diagonal, beta_0);
e1[vid].AddWithWeight(boundaryEdgeNeighbors[0], alpha_0k);
e1[vid].AddWithWeight(boundaryEdgeNeighbors[1], alpha_0k);
for (int x=1; x<ivalence-1; ++x) {
@ -292,50 +270,68 @@ GregoryBasis::ProtoBasis::ProtoBasis(
float alpha = (4.0f*sinf((float(M_PI) * float(x))/k))/(3.0f*k+c),
beta = (sinf((float(M_PI) * float(x))/k) + sinf((float(M_PI) * float(x+1))/k))/(3.0f*k+c);
Index idx_neighbor = manifoldRing[2*curri + 0],
idx_diagonal = manifoldRing[2*curri + 1];
Index idx_neighbor = manifoldRings[vid][2*curri + 0],
idx_diagonal = manifoldRings[vid][2*curri + 1];
Point p_neighbor(idx_neighbor),
p_diagonal(idx_diagonal);
e1[vid] += p_neighbor*alpha + p_diagonal*beta;
e1[vid].AddWithWeight(idx_neighbor, alpha);
e1[vid].AddWithWeight(idx_diagonal, beta);
}
e1[vid] /= 3.0f;
e1[vid] *= 1.0f/3.0f;
}
}
Index quadOffsets[8];
getQuadOffsets(level, faceIndex, quadOffsets, fvarChannel);
// the second phase
for (int vid=0; vid<4; ++vid) {
int n = abs(valences[vid]),
ivalence = n;
int n = abs(valences[vid]);
int ivalence = n;
int ip = (vid+1)%4,
im = (vid+3)%4,
np = abs(valences[ip]),
nm = abs(valences[im]);
Index start = quadOffsets[vid*2+0],
prev = quadOffsets[vid*2+1],
start_m = quadOffsets[im*2],
prev_p = quadOffsets[ip*2+1];
Index start = -1, prev = -1, start_m = -1, prev_p = -1;
for (int i = 0; i < n; ++i) {
if (manifoldRings[vid][i*2] == facePoints[ip])
start = i;
if (manifoldRings[vid][i*2] == facePoints[im])
prev = i;
}
for (int i = 0; i < np; ++i) {
if (manifoldRings[ip][i*2] == facePoints[vid]) {
prev_p = i;
break;
}
}
for (int i = 0; i < nm; ++i) {
if (manifoldRings[im][i*2] == facePoints[vid]) {
start_m = i;
break;
}
}
assert(start != -1 && prev != -1 && start_m != -1 && prev_p != -1);
Point Em_ip, Ep_im;
Point Em_ip = P[ip];
Point Ep_im = P[im];
if (valences[ip]<-2) {
Index j = (np + prev_p - zerothNeighbors[ip]) % np;
Em_ip = P[ip] + e0[ip]*cosf((float(M_PI)*j)/float(np-1)) + e1[ip]*sinf((float(M_PI)*j)/float(np-1));
Em_ip.AddWithWeight(e0[ip], cosf((float(M_PI)*j)/float(np-1)));
Em_ip.AddWithWeight(e1[ip], sinf((float(M_PI)*j)/float(np-1)));
} else {
Em_ip = P[ip] + e0[ip]*csf(np-3,2*prev_p) + e1[ip]*csf(np-3,2*prev_p+1);
Em_ip.AddWithWeight(e0[ip], csf(np-3, 2*prev_p));
Em_ip.AddWithWeight(e1[ip], csf(np-3, 2*prev_p+1));
}
if (valences[im]<-2) {
Index j = (nm + start_m - zerothNeighbors[im]) % nm;
Ep_im = P[im] + e0[im]*cosf((float(M_PI)*j)/float(nm-1)) + e1[im]*sinf((float(M_PI)*j)/float(nm-1));
Ep_im.AddWithWeight(e0[im], cosf((float(M_PI)*j)/float(nm-1)));
Ep_im.AddWithWeight(e1[im], sinf((float(M_PI)*j)/float(nm-1)));
} else {
Ep_im = P[im] + e0[im]*csf(nm-3,2*start_m) + e1[im]*csf(nm-3,2*start_m+1);
Ep_im.AddWithWeight(e0[im], csf(nm-3, 2*start_m));
Ep_im.AddWithWeight(e1[im], csf(nm-3, 2*start_m+1));
}
if (valences[vid] < 0) {
@ -355,12 +351,25 @@ GregoryBasis::ProtoBasis::ProtoBasis(
float s1 = 3.0f - 2.0f*csf(n-3,2)-csf(np-3,2),
s2 = 2.0f*csf(n-3,2),
s3 = 3.0f -2.0f*cosf(2.0f*float(M_PI)/float(n)) - cosf(2.0f*float(M_PI)/float(nm));
Ep[vid] = P[vid];
Ep[vid].AddWithWeight(e0[vid], csf(n-3, 2*start));
Ep[vid].AddWithWeight(e1[vid], csf(n-3, 2*start +1));
Ep[vid] = P[vid] + e0[vid]*csf(n-3, 2*start) + e1[vid]*csf(n-3, 2*start +1);
Em[vid] = P[vid] + e0[vid]*csf(n-3, 2*prev ) + e1[vid]*csf(n-3, 2*prev + 1);
Fp[vid] = (P[vid]*csf(np-3,2) + Ep[vid]*s1 + Em_ip*s2 + rp[start])/3.0f;
Fm[vid] = (P[vid]*csf(nm-3,2) + Em[vid]*s3 + Ep_im*s2 - rp[prev])/3.0f;
Em[vid] = P[vid];
Em[vid].AddWithWeight(e0[vid], csf(n-3, 2*prev ));
Em[vid].AddWithWeight(e1[vid], csf(n-3, 2*prev + 1));
Fp[vid].Clear(stencilCapacity);
Fp[vid].AddWithWeight(P[vid], csf(np-3, 2)/3.0f);
Fp[vid].AddWithWeight(Ep[vid], s1/3.0f);
Fp[vid].AddWithWeight(Em_ip, s2/3.0f);
Fp[vid].AddWithWeight(rp[start], 1.0f/3.0f);
Fm[vid].Clear(stencilCapacity);
Fm[vid].AddWithWeight(P[vid], csf(nm-3, 2)/3.0f);
Fm[vid].AddWithWeight(Em[vid], s3/3.0f);
Fm[vid].AddWithWeight(Ep_im, s2/3.0f);
Fm[vid].AddWithWeight(rp[prev], -1.0f/3.0f);
} else if (valences[vid] < -2) {
Index jp = (ivalence + start - zerothNeighbors[vid]) % ivalence,
@ -370,24 +379,59 @@ GregoryBasis::ProtoBasis::ProtoBasis(
s2 = 2*csf(n-3,2),
s3 = 3.0f-2.0f*cosf(2.0f*float(M_PI)/n)-cosf(2.0f*float(M_PI)/nm);
Ep[vid] = P[vid] + e0[vid]*cosf((float(M_PI)*jp)/float(ivalence-1)) + e1[vid]*sinf((float(M_PI)*jp)/float(ivalence-1));
Em[vid] = P[vid] + e0[vid]*cosf((float(M_PI)*jm)/float(ivalence-1)) + e1[vid]*sinf((float(M_PI)*jm)/float(ivalence-1));
Fp[vid] = (P[vid]*csf(np-3,2) + Ep[vid]*s1 + Em_ip*s2 + rp[start])/3.0f;
Fm[vid] = (P[vid]*csf(nm-3,2) + Em[vid]*s3 + Ep_im*s2 - rp[prev])/3.0f;
Ep[vid] = P[vid];
Ep[vid].AddWithWeight(e0[vid], cosf((float(M_PI)*jp)/float(ivalence-1)));
Ep[vid].AddWithWeight(e1[vid], sinf((float(M_PI)*jp)/float(ivalence-1)));
Em[vid] = P[vid];
Em[vid].AddWithWeight(e0[vid], cosf((float(M_PI)*jm)/float(ivalence-1)));
Em[vid].AddWithWeight(e1[vid], sinf((float(M_PI)*jm)/float(ivalence-1)));
Fp[vid].Clear(stencilCapacity);
Fp[vid].AddWithWeight(P[vid], csf(np-3,2)/3.0f);
Fp[vid].AddWithWeight(Ep[vid], s1/3.0f);
Fp[vid].AddWithWeight(Em_ip, s2/3.0f);
Fp[vid].AddWithWeight(rp[start], 1.0f/3.0f);
Fm[vid].Clear(stencilCapacity);
Fm[vid].AddWithWeight(P[vid], csf(nm-3,2)/3.0f);
Fm[vid].AddWithWeight(Em[vid], s3/3.0f);
Fm[vid].AddWithWeight(Ep_im, s2/3.0f);
Fm[vid].AddWithWeight(rp[prev], -1.0f/3.0f);
if (valences[im]<0) {
s1=3-2*csf(n-3,2)-csf(np-3,2);
Fp[vid] = Fm[vid] = (P[vid]*csf(np-3,2) + Ep[vid]*s1 + Em_ip*s2 + rp[start])/3.0f;
Fp[vid].Clear(stencilCapacity);
Fp[vid].AddWithWeight(P[vid], csf(np-3,2)/3.0f);
Fp[vid].AddWithWeight(Ep[vid], s1/3.0f);
Fp[vid].AddWithWeight(Em_ip, s2/3.0f);
Fp[vid].AddWithWeight(rp[start], 1.0f/3.0f);
Fm[vid] = Fp[vid];
} else if (valences[ip]<0) {
s1 = 3.0f-2.0f*cosf(2.0f*float(M_PI)/n)-cosf(2.0f*float(M_PI)/nm);
Fm[vid] = Fp[vid] = (P[vid]*csf(nm-3,2) + Em[vid]*s1 + Ep_im*s2 - rp[prev])/3.0f;
Fm[vid].Clear(stencilCapacity);
Fm[vid].AddWithWeight(P[vid], csf(nm-3,2)/3.0f);
Fm[vid].AddWithWeight(Em[vid], s1/3.0f);
Fm[vid].AddWithWeight(Ep_im, s2/3.0f);
Fm[vid].AddWithWeight(rp[prev], -1.0f/3.0f);
Fp[vid] = Fm[vid];
}
} else if (valences[vid]==-2) {
Ep[vid].Clear(stencilCapacity);
Ep[vid].AddWithWeight(facePoints[vid], 2.0f/3.0f);
Ep[vid].AddWithWeight(facePoints[ip], 1.0f/3.0f);
Ep[vid] = (org[vid]*2.0f + org[ip])/3.0f;
Em[vid] = (org[vid]*2.0f + org[im])/3.0f;
Fp[vid] = Fm[vid] = (org[vid]*4.0f + org[((vid+2)%n)] + org[ip]*2.0f + org[im]*2.0f)/9.0f;
Em[vid].Clear(stencilCapacity);
Em[vid].AddWithWeight(facePoints[vid], 2.0f/3.0f);
Em[vid].AddWithWeight(facePoints[im], 1.0f/3.0f);
Fp[vid].Clear(stencilCapacity);
Fp[vid].AddWithWeight(facePoints[vid], 4.0f/9.0f);
Fp[vid].AddWithWeight(facePoints[((vid+2)%n)], 1.0f/9.0f);
Fp[vid].AddWithWeight(facePoints[ip], 2.0f/9.0f);
Fp[vid].AddWithWeight(facePoints[im], 2.0f/9.0f);
Fm[vid] = Fp[vid];
}
}
@ -429,16 +473,7 @@ GregoryBasis::CreateStencilTable(PointsVector const &stencils) {
float * weights = &stencilTable->_weights[0];
for (int i = 0; i < nStencils; ++i) {
GregoryBasis::Point const &src = stencils[i];
int size = src.GetSize();
memcpy(indices, src.GetIndices(), size*sizeof(Index));
memcpy(weights, src.GetWeights(), size*sizeof(float));
*sizes = size;
indices += size;
weights += size;
++sizes;
stencils[i].Copy(&sizes, &indices, &weights);
}
stencilTable->generateOffsets();

View File

@ -26,6 +26,7 @@
#define OPENSUBDIV3_FAR_GREGORY_BASIS_H
#include "../vtr/level.h"
#include "../vtr/stackBuffer.h"
#include "../far/types.h"
#include "../far/stencilTable.h"
#include <cstring>
@ -79,22 +80,15 @@ public:
//
class Point {
public:
static const int RESERVED_ENTRY_SIZE = 64;
// 40 means up to valence=10 is on stack
static const int RESERVED_STENCIL_SIZE = 40;
Point() : _size(0) {
_indices.reserve(RESERVED_ENTRY_SIZE);
_weights.reserve(RESERVED_ENTRY_SIZE);
}
Point(Vtr::Index idx, float weight = 1.0f) {
_indices.reserve(RESERVED_ENTRY_SIZE);
_weights.reserve(RESERVED_ENTRY_SIZE);
_size = 1;
_indices.push_back(idx);
_weights.push_back(weight);
Point(int stencilCapacity=RESERVED_STENCIL_SIZE) : _size(0) {
_stencils.SetSize(stencilCapacity);
}
Point(Point const & other) {
_stencils.SetSize(other._stencils.GetSize());
*this = other;
}
@ -102,96 +96,81 @@ public:
return _size;
}
Vtr::Index const * GetIndices() const {
return &_indices[0];
int GetCapacity() const {
return _stencils.GetSize();
}
float const * GetWeights() const {
return &_weights[0];
void Clear(int capacity) {
_size = 0;
if ((int)_stencils.GetSize() < capacity) {
_stencils.SetSize(capacity);
}
}
void AddWithWeight(Vtr::Index idx, float weight) {
for (int i = 0; i < _size; ++i) {
if (_stencils[i].index == idx) {
_stencils[i].weight += weight;
return;
}
}
assert(_size < (int)_stencils.GetSize());
_stencils[_size].index = idx;
_stencils[_size].weight = weight;
++_size;
}
void AddWithWeight(Point const &src, float weight) {
for (int i = 0; i < src._size; ++i) {
AddWithWeight(src._stencils[i].index,
src._stencils[i].weight * weight);
}
}
Point & operator = (Point const & other) {
Clear(other.GetCapacity());
_size = other._size;
_indices = other._indices;
_weights = other._weights;
return *this;
}
Point & operator += (Point const & other) {
for (int i=0; i<other._size; ++i) {
Vtr::Index idx = findIndex(other._indices[i]);
_weights[idx] += other._weights[i];
}
return *this;
}
Point & operator -= (Point const & other) {
for (int i=0; i<other._size; ++i) {
Vtr::Index idx = findIndex(other._indices[i]);
_weights[idx] -= other._weights[i];
assert(_size <= (int)_stencils.GetSize());
for (int i = 0; i < _size; ++i) {
_stencils[i] = other._stencils[i];
}
return *this;
}
Point & operator *= (float f) {
for (int i=0; i<_size; ++i) {
_weights[i] *= f;
_stencils[i].weight *= f;
}
return *this;
}
Point & operator /= (float f) {
return (*this)*=(1.0f/f);
}
friend Point operator * (Point const & src, float f) {
Point p( src ); return p*=f;
}
friend Point operator / (Point const & src, float f) {
Point p( src ); return p*= (1.0f/f);
}
Point operator + (Point const & other) {
Point p(*this); return p+=other;
}
Point operator - (Point const & other) {
Point p(*this); return p-=other;
}
void OffsetIndices(Vtr::Index offset) {
for (int i=0; i<_size; ++i) {
_indices[i] += offset;
_stencils[i].index += offset;
}
}
void Copy(int ** size, Vtr::Index ** indices, float ** weights) const {
memcpy(*indices, &_indices[0], _size*sizeof(Vtr::Index));
memcpy(*weights, &_weights[0], _size*sizeof(float));
for (int i = 0; i < _size; ++i) {
**indices = _stencils[i].index;
**weights = _stencils[i].weight;
++(*indices);
++(*weights);
}
**size = _size;
*indices += _size;
*weights += _size;
++(*size);
}
private:
int findIndex(Vtr::Index idx) {
for (int i=0; i<_size; ++i) {
if (_indices[i]==idx) {
return i;
}
}
_indices.push_back(idx);
_weights.push_back(0.0f);
++_size;
return _size-1;
}
int _size;
std::vector<Vtr::Index> _indices;
std::vector<float> _weights;
struct Stencil {
Vtr::Index index;
float weight;
};
Vtr::internal::StackBuffer<Stencil, RESERVED_STENCIL_SIZE> _stencils;
};
//