mirror of
https://github.com/PixarAnimationStudios/OpenSubdiv
synced 2024-11-27 05:50:05 +00:00
Merge pull request #308 from nathan-at-digitalfish/new_face_vertex_kernels
New face vertex kernels
This commit is contained in:
commit
626921d82e
@ -74,15 +74,26 @@ FarCatmarkSubdivisionTablesFactory<T,U>::Create( FarMeshFactory<T,U> * meshFacto
|
||||
|
||||
FarSubdivisionTables * result = new FarSubdivisionTables(maxlevel, FarSubdivisionTables::CATMARK);
|
||||
|
||||
// Calculate the size of the face-vertex index table
|
||||
int minCoarseFaceValence = tablesFactory.GetMinCoarseFaceValence();
|
||||
int maxCoarseFaceValence = tablesFactory.GetMaxCoarseFaceValence();
|
||||
bool coarseMeshAllQuadFaces = minCoarseFaceValence == 4 && maxCoarseFaceValence == 4;
|
||||
bool coarseMeshAllTriQuadFaces = minCoarseFaceValence >= 3 && maxCoarseFaceValence <= 4;
|
||||
|
||||
int F_IT_size = tablesFactory.GetFaceVertsValenceSum();
|
||||
if (coarseMeshAllTriQuadFaces)
|
||||
F_IT_size += tablesFactory.GetNumCoarseTriangleFaces(); // add padding for tri faces
|
||||
|
||||
// Allocate memory for the indexing tables
|
||||
result->_F_ITa.resize(tablesFactory.GetNumFaceVerticesTotal(maxlevel)*2);
|
||||
result->_F_IT.resize(tablesFactory.GetFaceVertsValenceSum());
|
||||
if (!coarseMeshAllTriQuadFaces)
|
||||
result->_F_ITa.resize(tablesFactory.GetNumFaceVerticesTotal(1) * 2);
|
||||
result->_F_IT.resize(F_IT_size);
|
||||
|
||||
result->_E_IT.resize(tablesFactory.GetNumEdgeVerticesTotal(maxlevel)*4);
|
||||
result->_E_W.resize(tablesFactory.GetNumEdgeVerticesTotal(maxlevel)*2);
|
||||
|
||||
result->_V_ITa.resize((tablesFactory.GetNumVertexVerticesTotal(maxlevel)
|
||||
- tablesFactory.GetNumVertexVerticesTotal(0))*5); // subtract corase cage vertices
|
||||
- tablesFactory.GetNumVertexVerticesTotal(0))*5); // subtract coarse cage vertices
|
||||
result->_V_IT.resize(tablesFactory.GetVertVertsValenceSum()*2);
|
||||
result->_V_W.resize(tablesFactory.GetNumVertexVerticesTotal(maxlevel)
|
||||
- tablesFactory.GetNumVertexVerticesTotal(0));
|
||||
@ -115,18 +126,44 @@ FarCatmarkSubdivisionTablesFactory<T,U>::Create( FarMeshFactory<T,U> * meshFacto
|
||||
// "For each vertex, gather all the vertices from the parent face."
|
||||
int nFaceVertices = (int)tablesFactory._faceVertsList[level].size();
|
||||
|
||||
// choose the kernel type that best fits the face topology
|
||||
int kernelType;
|
||||
if (level == 1) {
|
||||
if (coarseMeshAllQuadFaces)
|
||||
kernelType = FarKernelBatch::CATMARK_QUAD_FACE_VERTEX;
|
||||
else if (coarseMeshAllTriQuadFaces)
|
||||
kernelType = FarKernelBatch::CATMARK_TRI_QUAD_FACE_VERTEX;
|
||||
else
|
||||
kernelType = FarKernelBatch::CATMARK_FACE_VERTEX;
|
||||
} else {
|
||||
kernelType = FarKernelBatch::CATMARK_QUAD_FACE_VERTEX;
|
||||
}
|
||||
|
||||
// add a batch for face vertices
|
||||
if (nFaceVertices > 0) // in torus case, nfacevertices could be zero
|
||||
batches->push_back(FarKernelBatch( FarKernelBatch::CATMARK_FACE_VERTEX,
|
||||
level,
|
||||
0,
|
||||
0,
|
||||
nFaceVertices,
|
||||
faceTableOffset,
|
||||
vertexOffset) );
|
||||
if (nFaceVertices > 0) { // in torus case, nfacevertices could be zero
|
||||
if (kernelType == FarKernelBatch::CATMARK_FACE_VERTEX) {
|
||||
batches->push_back(FarKernelBatch( kernelType,
|
||||
level,
|
||||
0,
|
||||
0,
|
||||
nFaceVertices,
|
||||
faceTableOffset,
|
||||
vertexOffset) );
|
||||
} else {
|
||||
// quad and tri-quad kernels store the offset of the first vertex in the table offset
|
||||
batches->push_back(FarKernelBatch( kernelType,
|
||||
level,
|
||||
0,
|
||||
0,
|
||||
nFaceVertices,
|
||||
F_IT_offset,
|
||||
vertexOffset) );
|
||||
}
|
||||
}
|
||||
|
||||
vertexOffset += nFaceVertices;
|
||||
faceTableOffset += nFaceVertices;
|
||||
if (kernelType == FarKernelBatch::CATMARK_FACE_VERTEX)
|
||||
faceTableOffset += nFaceVertices;
|
||||
|
||||
for (int i=0; i < nFaceVertices; ++i) {
|
||||
|
||||
@ -138,13 +175,17 @@ FarCatmarkSubdivisionTablesFactory<T,U>::Create( FarMeshFactory<T,U> * meshFacto
|
||||
|
||||
int valence = f->GetNumVertices();
|
||||
|
||||
F_ITa[2*i+0] = F_IT_offset;
|
||||
F_ITa[2*i+1] = valence;
|
||||
if (kernelType == FarKernelBatch::CATMARK_FACE_VERTEX) {
|
||||
*F_ITa++ = F_IT_offset;
|
||||
*F_ITa++ = valence;
|
||||
}
|
||||
|
||||
for (int j=0; j<valence; ++j)
|
||||
F_IT[F_IT_offset++] = remap[f->GetVertex(j)->GetID()];
|
||||
|
||||
if (kernelType == FarKernelBatch::CATMARK_TRI_QUAD_FACE_VERTEX && valence == 3)
|
||||
F_IT[F_IT_offset++] = remap[f->GetVertex(2)->GetID()]; // repeat last index
|
||||
}
|
||||
F_ITa += nFaceVertices * 2;
|
||||
|
||||
// Edge vertices
|
||||
|
||||
|
@ -87,6 +87,12 @@ FarDispatcher::ApplyKernel(CONTROLLER *controller, CONTEXT *context, FarKernelBa
|
||||
case FarKernelBatch::CATMARK_FACE_VERTEX:
|
||||
controller->ApplyCatmarkFaceVerticesKernel(batch, context);
|
||||
break;
|
||||
case FarKernelBatch::CATMARK_QUAD_FACE_VERTEX:
|
||||
controller->ApplyCatmarkQuadFaceVerticesKernel(batch, context);
|
||||
break;
|
||||
case FarKernelBatch::CATMARK_TRI_QUAD_FACE_VERTEX:
|
||||
controller->ApplyCatmarkTriQuadFaceVerticesKernel(batch, context);
|
||||
break;
|
||||
case FarKernelBatch::CATMARK_EDGE_VERTEX:
|
||||
controller->ApplyCatmarkEdgeVerticesKernel(batch, context);
|
||||
break;
|
||||
@ -170,6 +176,12 @@ public:
|
||||
template <class CONTEXT>
|
||||
void ApplyCatmarkFaceVerticesKernel(FarKernelBatch const &batch, CONTEXT *context) const;
|
||||
|
||||
template <class CONTEXT>
|
||||
void ApplyCatmarkQuadFaceVerticesKernel(FarKernelBatch const &batch, CONTEXT *context) const;
|
||||
|
||||
template <class CONTEXT>
|
||||
void ApplyCatmarkTriQuadFaceVerticesKernel(FarKernelBatch const &batch, CONTEXT *context) const;
|
||||
|
||||
template <class CONTEXT>
|
||||
void ApplyCatmarkEdgeVerticesKernel(FarKernelBatch const &batch, CONTEXT *context) const;
|
||||
|
||||
@ -268,6 +280,36 @@ FarComputeController::ApplyCatmarkFaceVerticesKernel(FarKernelBatch const &batch
|
||||
vsrc );
|
||||
}
|
||||
|
||||
template <class CONTEXT> void
|
||||
FarComputeController::ApplyCatmarkQuadFaceVerticesKernel(FarKernelBatch const &batch, CONTEXT *context) const {
|
||||
|
||||
typename CONTEXT::VertexType *vsrc = &context->GetVertices().at(0);
|
||||
|
||||
FarSubdivisionTables const * subdivision = context->GetSubdivisionTables();
|
||||
assert(subdivision);
|
||||
|
||||
subdivision->computeCatmarkQuadFacePoints( batch.GetVertexOffset(),
|
||||
batch.GetTableOffset(),
|
||||
batch.GetStart(),
|
||||
batch.GetEnd(),
|
||||
vsrc );
|
||||
}
|
||||
|
||||
template <class CONTEXT> void
|
||||
FarComputeController::ApplyCatmarkTriQuadFaceVerticesKernel(FarKernelBatch const &batch, CONTEXT *context) const {
|
||||
|
||||
typename CONTEXT::VertexType *vsrc = &context->GetVertices().at(0);
|
||||
|
||||
FarSubdivisionTables const * subdivision = context->GetSubdivisionTables();
|
||||
assert(subdivision);
|
||||
|
||||
subdivision->computeCatmarkTriQuadFacePoints( batch.GetVertexOffset(),
|
||||
batch.GetTableOffset(),
|
||||
batch.GetStart(),
|
||||
batch.GetEnd(),
|
||||
vsrc );
|
||||
}
|
||||
|
||||
template <class CONTEXT> void
|
||||
FarComputeController::ApplyCatmarkEdgeVerticesKernel(FarKernelBatch const &batch, CONTEXT *context) const {
|
||||
|
||||
|
@ -66,6 +66,8 @@ public:
|
||||
|
||||
enum KernelType {
|
||||
CATMARK_FACE_VERTEX = 1,
|
||||
CATMARK_QUAD_FACE_VERTEX,
|
||||
CATMARK_TRI_QUAD_FACE_VERTEX,
|
||||
CATMARK_EDGE_VERTEX,
|
||||
CATMARK_VERT_VERTEX_A1,
|
||||
CATMARK_VERT_VERTEX_A2,
|
||||
|
@ -131,9 +131,9 @@ private:
|
||||
end;
|
||||
};
|
||||
|
||||
Range kernelB; // vertex batch reange (kernel B)
|
||||
Range kernelA1; // vertex batch reange (kernel A pass 1)
|
||||
Range kernelA2; // vertex batch reange (kernel A pass 2)
|
||||
Range kernelB; // vertex batch range (kernel B)
|
||||
Range kernelA1; // vertex batch range (kernel A pass 1)
|
||||
Range kernelA2; // vertex batch range (kernel A pass 2)
|
||||
};
|
||||
|
||||
inline void
|
||||
|
@ -161,21 +161,29 @@ public:
|
||||
|
||||
// Compute-kernel applied to vertices resulting from the refinement of a face.
|
||||
template <class U>
|
||||
void computeCatmarkFacePoints(int offset, int level, int start, int end, U * vsrc) const;
|
||||
void computeCatmarkFacePoints(int vertexOffset, int tableOffset, int start, int end, U * vsrc) const;
|
||||
|
||||
// Compute-kernel applied to vertices resulting from the refinement of a quad face.
|
||||
template <class U>
|
||||
void computeCatmarkQuadFacePoints(int vertexOffset, int tableOffset, int start, int end, U * vsrc) const;
|
||||
|
||||
// Compute-kernel applied to vertices resulting from the refinement of a tri or quad face.
|
||||
template <class U>
|
||||
void computeCatmarkTriQuadFacePoints(int vertexOffset, int tableOffset, int start, int end, U * vsrc) const;
|
||||
|
||||
// Compute-kernel applied to vertices resulting from the refinement of an edge.
|
||||
template <class U>
|
||||
void computeCatmarkEdgePoints(int offset, int level, int start, int end, U * vsrc) const;
|
||||
void computeCatmarkEdgePoints(int vertexOffset, int tableOffset, int start, int end, U * vsrc) const;
|
||||
|
||||
// Compute-kernel applied to vertices resulting from the refinement of a vertex
|
||||
// Kernel "A" Handles the k_Smooth and k_Dart rules
|
||||
// Kernel "A" Handles the k_Crease and k_Corner rules
|
||||
template <class U>
|
||||
void computeCatmarkVertexPointsA(int offset, bool pass, int level, int start, int end, U * vsrc) const;
|
||||
void computeCatmarkVertexPointsA(int vertexOffset, bool pass, int tableOffset, int start, int end, U * vsrc) const;
|
||||
|
||||
// Compute-kernel applied to vertices resulting from the refinement of a vertex
|
||||
// Kernel "B" Handles the k_Crease and k_Corner rules
|
||||
// Kernel "B" Handles the k_Smooth and k_Dart rules
|
||||
template <class U>
|
||||
void computeCatmarkVertexPointsB(int offset, int level, int start, int end, U * vsrc) const;
|
||||
void computeCatmarkVertexPointsB(int vertexOffset, int tableOffset, int start, int end, U * vsrc) const;
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Loop scheme
|
||||
@ -342,14 +350,15 @@ FarSubdivisionTables::computeBilinearVertexPoints( int offset, int tableOffset,
|
||||
vdst->AddVaryingWithWeight( vsrc[p], 1.0f );
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// Face-vertices compute Kernel - completely re-entrant
|
||||
//
|
||||
|
||||
template <class U> void
|
||||
FarSubdivisionTables::computeCatmarkFacePoints( int offset, int tableOffset, int start, int end, U * vsrc ) const {
|
||||
FarSubdivisionTables::computeCatmarkFacePoints( int vertexOffset, int tableOffset, int start, int end, U * vsrc ) const {
|
||||
|
||||
U * vdst = vsrc + offset + start;
|
||||
U * vdst = vsrc + vertexOffset + start;
|
||||
|
||||
for (int i=start+tableOffset; i<end+tableOffset; ++i, ++vdst ) {
|
||||
|
||||
@ -366,14 +375,73 @@ FarSubdivisionTables::computeCatmarkFacePoints( int offset, int tableOffset, int
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// Quad face-vertices compute Kernel - completely re-entrant
|
||||
//
|
||||
|
||||
template <class U> void
|
||||
FarSubdivisionTables::computeCatmarkQuadFacePoints( int vertexOffset, int tableOffset, int start, int end, U * vsrc ) const {
|
||||
|
||||
U * vdst = vsrc + vertexOffset + start;
|
||||
|
||||
for (int i=start; i<end; ++i, ++vdst ) {
|
||||
int fidx0 = _F_IT[tableOffset + 4 * i + 0];
|
||||
int fidx1 = _F_IT[tableOffset + 4 * i + 1];
|
||||
int fidx2 = _F_IT[tableOffset + 4 * i + 2];
|
||||
int fidx3 = _F_IT[tableOffset + 4 * i + 3];
|
||||
|
||||
vdst->Clear();
|
||||
vdst->AddWithWeight(vsrc[fidx0], 0.25f);
|
||||
vdst->AddVaryingWithWeight(vsrc[fidx0], 0.25f);
|
||||
vdst->AddWithWeight(vsrc[fidx1], 0.25f);
|
||||
vdst->AddVaryingWithWeight(vsrc[fidx1], 0.25f);
|
||||
vdst->AddWithWeight(vsrc[fidx2], 0.25f);
|
||||
vdst->AddVaryingWithWeight(vsrc[fidx2], 0.25f);
|
||||
vdst->AddWithWeight(vsrc[fidx3], 0.25f);
|
||||
vdst->AddVaryingWithWeight(vsrc[fidx3], 0.25f);
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// Tri/quad face-vertices compute Kernel - completely re-entrant
|
||||
//
|
||||
|
||||
template <class U> void
|
||||
FarSubdivisionTables::computeCatmarkTriQuadFacePoints( int vertexOffset, int tableOffset, int start, int end, U * vsrc ) const {
|
||||
|
||||
U * vdst = vsrc + vertexOffset + start;
|
||||
|
||||
for (int i=start; i<end; ++i, ++vdst ) {
|
||||
int fidx0 = _F_IT[tableOffset + 4 * i + 0];
|
||||
int fidx1 = _F_IT[tableOffset + 4 * i + 1];
|
||||
int fidx2 = _F_IT[tableOffset + 4 * i + 2];
|
||||
int fidx3 = _F_IT[tableOffset + 4 * i + 3];
|
||||
|
||||
bool triangle = (fidx3 == fidx2);
|
||||
float weight = triangle ? 1.0f / 3.0f : 1.0f / 4.0f;
|
||||
|
||||
vdst->Clear();
|
||||
vdst->AddWithWeight(vsrc[fidx0], weight);
|
||||
vdst->AddVaryingWithWeight(vsrc[fidx0], weight);
|
||||
vdst->AddWithWeight(vsrc[fidx1], weight);
|
||||
vdst->AddVaryingWithWeight(vsrc[fidx1], weight);
|
||||
vdst->AddWithWeight(vsrc[fidx2], weight);
|
||||
vdst->AddVaryingWithWeight(vsrc[fidx2], weight);
|
||||
if (!triangle) {
|
||||
vdst->AddWithWeight(vsrc[fidx3], weight);
|
||||
vdst->AddVaryingWithWeight(vsrc[fidx3], weight);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// Edge-vertices compute Kernel - completely re-entrant
|
||||
//
|
||||
|
||||
template <class U> void
|
||||
FarSubdivisionTables::computeCatmarkEdgePoints( int offset, int tableOffset, int start, int end, U * vsrc ) const {
|
||||
FarSubdivisionTables::computeCatmarkEdgePoints( int vertexOffset, int tableOffset, int start, int end, U * vsrc ) const {
|
||||
|
||||
U * vdst = vsrc + offset + start;
|
||||
U * vdst = vsrc + vertexOffset + start;
|
||||
|
||||
for (int i=start+tableOffset; i<end+tableOffset; ++i, ++vdst ) {
|
||||
|
||||
@ -409,16 +477,16 @@ FarSubdivisionTables::computeCatmarkEdgePoints( int offset, int tableOffset, int
|
||||
|
||||
// multi-pass kernel handling k_Crease and k_Corner rules
|
||||
template <class U> void
|
||||
FarSubdivisionTables::computeCatmarkVertexPointsA( int offset, bool pass, int tableOffset, int start, int end, U * vsrc ) const {
|
||||
FarSubdivisionTables::computeCatmarkVertexPointsA( int vertexOffset, bool pass, int tableOffset, int start, int end, U * vsrc ) const {
|
||||
|
||||
U * vdst = vsrc + offset + start;
|
||||
U * vdst = vsrc + vertexOffset + start;
|
||||
|
||||
for (int i=start+tableOffset; i<end+tableOffset; ++i, ++vdst ) {
|
||||
|
||||
if (not pass)
|
||||
vdst->Clear();
|
||||
|
||||
int n=this->_V_ITa[5*i+1], // number of vertices in the _VO_IT array (valence)
|
||||
int n=this->_V_ITa[5*i+1], // number of vertices in the _V_IT array (valence)
|
||||
p=this->_V_ITa[5*i+2], // index of the parent vertex
|
||||
eidx0=this->_V_ITa[5*i+3], // index of the first crease rule edge
|
||||
eidx1=this->_V_ITa[5*i+4]; // index of the second crease rule edge
|
||||
@ -448,16 +516,16 @@ FarSubdivisionTables::computeCatmarkVertexPointsA( int offset, bool pass, int ta
|
||||
|
||||
// multi-pass kernel handling k_Dart and k_Smooth rules
|
||||
template <class U> void
|
||||
FarSubdivisionTables::computeCatmarkVertexPointsB( int offset, int tableOffset, int start, int end, U * vsrc ) const {
|
||||
FarSubdivisionTables::computeCatmarkVertexPointsB( int vertexOffset, int tableOffset, int start, int end, U * vsrc ) const {
|
||||
|
||||
U * vdst = vsrc + offset + start;
|
||||
U * vdst = vsrc + vertexOffset + start;
|
||||
|
||||
for (int i=start+tableOffset; i<end+tableOffset; ++i, ++vdst ) {
|
||||
|
||||
vdst->Clear();
|
||||
|
||||
int h = this->_V_ITa[5*i ], // offset of the vertices in the _V0_IT array
|
||||
n = this->_V_ITa[5*i+1], // number of vertices in the _VO_IT array (valence)
|
||||
int h = this->_V_ITa[5*i ], // offset of the vertices in the _V_IT array
|
||||
n = this->_V_ITa[5*i+1], // number of vertices in the _V_IT array (valence)
|
||||
p = this->_V_ITa[5*i+2]; // index of the parent vertex
|
||||
|
||||
float weight = this->_V_W[i],
|
||||
@ -479,9 +547,9 @@ FarSubdivisionTables::computeCatmarkVertexPointsB( int offset, int tableOffset,
|
||||
//
|
||||
|
||||
template <class U> void
|
||||
FarSubdivisionTables::computeLoopEdgePoints( int offset, int tableOffset, int start, int end, U * vsrc ) const {
|
||||
FarSubdivisionTables::computeLoopEdgePoints( int vertexOffset, int tableOffset, int start, int end, U * vsrc ) const {
|
||||
|
||||
U * vdst = vsrc + offset + start;
|
||||
U * vdst = vsrc + vertexOffset + start;
|
||||
|
||||
for (int i=start+tableOffset; i<end+tableOffset; ++i, ++vdst ) {
|
||||
|
||||
@ -517,16 +585,16 @@ FarSubdivisionTables::computeLoopEdgePoints( int offset, int tableOffset, int st
|
||||
|
||||
// multi-pass kernel handling k_Crease and k_Corner rules
|
||||
template <class U> void
|
||||
FarSubdivisionTables::computeLoopVertexPointsA( int offset, bool pass, int tableOffset, int start, int end, U * vsrc ) const {
|
||||
FarSubdivisionTables::computeLoopVertexPointsA( int vertexOffset, bool pass, int tableOffset, int start, int end, U * vsrc ) const {
|
||||
|
||||
U * vdst = vsrc + offset + start;
|
||||
U * vdst = vsrc + vertexOffset + start;
|
||||
|
||||
for (int i=start+tableOffset; i<end+tableOffset; ++i, ++vdst ) {
|
||||
|
||||
if (not pass)
|
||||
vdst->Clear();
|
||||
|
||||
int n=this->_V_ITa[5*i+1], // number of vertices in the _VO_IT array (valence)
|
||||
int n=this->_V_ITa[5*i+1], // number of vertices in the _V_IT array (valence)
|
||||
p=this->_V_ITa[5*i+2], // index of the parent vertex
|
||||
eidx0=this->_V_ITa[5*i+3], // index of the first crease rule edge
|
||||
eidx1=this->_V_ITa[5*i+4]; // index of the second crease rule edge
|
||||
@ -556,16 +624,16 @@ FarSubdivisionTables::computeLoopVertexPointsA( int offset, bool pass, int table
|
||||
|
||||
// multi-pass kernel handling k_Dart and k_Smooth rules
|
||||
template <class U> void
|
||||
FarSubdivisionTables::computeLoopVertexPointsB( int offset, int tableOffset, int start, int end, U *vsrc ) const {
|
||||
FarSubdivisionTables::computeLoopVertexPointsB( int vertexOffset, int tableOffset, int start, int end, U *vsrc ) const {
|
||||
|
||||
U * vdst = vsrc + offset + start;
|
||||
U * vdst = vsrc + vertexOffset + start;
|
||||
|
||||
for (int i=start+tableOffset; i<end+tableOffset; ++i, ++vdst ) {
|
||||
|
||||
vdst->Clear();
|
||||
|
||||
int h = this->_V_ITa[5*i ], // offset of the vertices in the _V0_IT array
|
||||
n = this->_V_ITa[5*i+1], // number of vertices in the _VO_IT array (valence)
|
||||
int h = this->_V_ITa[5*i ], // offset of the vertices in the _V_IT array
|
||||
n = this->_V_ITa[5*i+1], // number of vertices in the _V_IT array (valence)
|
||||
p = this->_V_ITa[5*i+2]; // index of the parent vertex
|
||||
|
||||
float weight = this->_V_W[i],
|
||||
@ -583,12 +651,6 @@ FarSubdivisionTables::computeLoopVertexPointsB( int offset, int tableOffset, int
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
} // end namespace OPENSUBDIV_VERSION
|
||||
using namespace OPENSUBDIV_VERSION;
|
||||
|
||||
|
@ -94,6 +94,15 @@ protected:
|
||||
// Valence summation for face vertices
|
||||
int GetVertVertsValenceSum() const { return _vertVertsValenceSum; }
|
||||
|
||||
// Minimum valence for coarse faces
|
||||
int GetMinCoarseFaceValence() const { return _minCoarseFaceValence; }
|
||||
|
||||
// Maximum valence for coarse faces
|
||||
int GetMaxCoarseFaceValence() const { return _maxCoarseFaceValence; }
|
||||
|
||||
// Number of coarse triangle faces
|
||||
int GetNumCoarseTriangleFaces() const { return _numCoarseTriangleFaces; }
|
||||
|
||||
// Returns an integer based on the order in which the kernels are applied
|
||||
static int GetMaskRanking( unsigned char mask0, unsigned char mask1 );
|
||||
|
||||
@ -102,7 +111,7 @@ protected:
|
||||
_edgeVertIdx,
|
||||
_vertVertIdx;
|
||||
|
||||
// Mumber of indices required for the face-vert and vertex-vert
|
||||
// Number of indices required for the face-vert and vertex-vert
|
||||
// iteration tables at each level
|
||||
int _faceVertsValenceSum,
|
||||
_vertVertsValenceSum;
|
||||
@ -111,6 +120,14 @@ protected:
|
||||
std::vector<std::vector< HbrVertex<T> *> > _faceVertsList,
|
||||
_edgeVertsList,
|
||||
_vertVertsList;
|
||||
|
||||
// Minimum and maximum valence for coarse faces
|
||||
int _minCoarseFaceValence,
|
||||
_maxCoarseFaceValence;
|
||||
|
||||
// Number of coarse triangle faces
|
||||
int _numCoarseTriangleFaces;
|
||||
|
||||
private:
|
||||
|
||||
// Returns the subdivision level of a vertex
|
||||
@ -135,7 +152,10 @@ FarSubdivisionTablesFactory<T,U>::FarSubdivisionTablesFactory( HbrMesh<T> const
|
||||
_vertVertsValenceSum(0),
|
||||
_faceVertsList(maxlevel+1),
|
||||
_edgeVertsList(maxlevel+1),
|
||||
_vertVertsList(maxlevel+1)
|
||||
_vertVertsList(maxlevel+1),
|
||||
_minCoarseFaceValence(0),
|
||||
_maxCoarseFaceValence(0),
|
||||
_numCoarseTriangleFaces(0)
|
||||
{
|
||||
assert( mesh );
|
||||
|
||||
@ -170,7 +190,15 @@ FarSubdivisionTablesFactory<T,U>::FarSubdivisionTablesFactory( HbrMesh<T> const
|
||||
|
||||
if (v->GetParentFace()) {
|
||||
faceCounts[depth]++;
|
||||
_faceVertsValenceSum += v->GetParentFace()->GetNumVertices();
|
||||
int valence = v->GetParentFace()->GetNumVertices();
|
||||
_faceVertsValenceSum += valence;
|
||||
|
||||
if (depth == 1) {
|
||||
_minCoarseFaceValence = (_minCoarseFaceValence == 0 ? valence : std::min(_minCoarseFaceValence, valence));
|
||||
_maxCoarseFaceValence = (_maxCoarseFaceValence == 0 ? valence : std::max(_maxCoarseFaceValence, valence));
|
||||
if (valence == 3)
|
||||
++_numCoarseTriangleFaces;
|
||||
}
|
||||
} else if (v->GetParentEdge())
|
||||
edgeCounts[depth]++;
|
||||
else if (v->GetParentVertex()) {
|
||||
|
@ -180,6 +180,62 @@ OsdCLComputeController::ApplyCatmarkFaceVerticesKernel(
|
||||
CL_CHECK_ERROR(ciErrNum, "face kernel %d\n", ciErrNum);
|
||||
}
|
||||
|
||||
void
|
||||
OsdCLComputeController::ApplyCatmarkQuadFaceVerticesKernel(
|
||||
FarKernelBatch const &batch, OsdCLComputeContext const *context) const {
|
||||
|
||||
assert(context);
|
||||
|
||||
cl_int ciErrNum;
|
||||
size_t globalWorkSize[1] = { (size_t)(batch.GetEnd() - batch.GetStart()) };
|
||||
cl_kernel kernel = _currentBindState.kernelBundle->GetCatmarkQuadFaceKernel();
|
||||
|
||||
cl_mem F_IT = context->GetTable(FarSubdivisionTables::F_IT)->GetDevicePtr();
|
||||
|
||||
clSetKernelArg(kernel, 0, sizeof(cl_mem), &_currentBindState.vertexBuffer);
|
||||
clSetKernelArg(kernel, 1, sizeof(cl_mem), &_currentBindState.varyingBuffer);
|
||||
clSetKernelArg(kernel, 2, sizeof(cl_mem), &F_IT);
|
||||
clSetKernelArg(kernel, 3, sizeof(int), &_currentBindState.vertexDesc.offset);
|
||||
clSetKernelArg(kernel, 4, sizeof(int), &_currentBindState.varyingDesc.offset);
|
||||
clSetKernelArg(kernel, 5, sizeof(int), batch.GetVertexOffsetPtr());
|
||||
clSetKernelArg(kernel, 6, sizeof(int), batch.GetTableOffsetPtr());
|
||||
clSetKernelArg(kernel, 7, sizeof(int), batch.GetStartPtr());
|
||||
clSetKernelArg(kernel, 8, sizeof(int), batch.GetEndPtr());
|
||||
|
||||
ciErrNum = clEnqueueNDRangeKernel(_clQueue,
|
||||
kernel, 1, NULL, globalWorkSize,
|
||||
NULL, 0, NULL, NULL);
|
||||
CL_CHECK_ERROR(ciErrNum, "quad face kernel %d\n", ciErrNum);
|
||||
}
|
||||
|
||||
void
|
||||
OsdCLComputeController::ApplyCatmarkTriQuadFaceVerticesKernel(
|
||||
FarKernelBatch const &batch, OsdCLComputeContext const *context) const {
|
||||
|
||||
assert(context);
|
||||
|
||||
cl_int ciErrNum;
|
||||
size_t globalWorkSize[1] = { (size_t)(batch.GetEnd() - batch.GetStart()) };
|
||||
cl_kernel kernel = _currentBindState.kernelBundle->GetCatmarkTriQuadFaceKernel();
|
||||
|
||||
cl_mem F_IT = context->GetTable(FarSubdivisionTables::F_IT)->GetDevicePtr();
|
||||
|
||||
clSetKernelArg(kernel, 0, sizeof(cl_mem), &_currentBindState.vertexBuffer);
|
||||
clSetKernelArg(kernel, 1, sizeof(cl_mem), &_currentBindState.varyingBuffer);
|
||||
clSetKernelArg(kernel, 2, sizeof(cl_mem), &F_IT);
|
||||
clSetKernelArg(kernel, 3, sizeof(int), &_currentBindState.vertexDesc.offset);
|
||||
clSetKernelArg(kernel, 4, sizeof(int), &_currentBindState.varyingDesc.offset);
|
||||
clSetKernelArg(kernel, 5, sizeof(int), batch.GetVertexOffsetPtr());
|
||||
clSetKernelArg(kernel, 6, sizeof(int), batch.GetTableOffsetPtr());
|
||||
clSetKernelArg(kernel, 7, sizeof(int), batch.GetStartPtr());
|
||||
clSetKernelArg(kernel, 8, sizeof(int), batch.GetEndPtr());
|
||||
|
||||
ciErrNum = clEnqueueNDRangeKernel(_clQueue,
|
||||
kernel, 1, NULL, globalWorkSize,
|
||||
NULL, 0, NULL, NULL);
|
||||
CL_CHECK_ERROR(ciErrNum, "tri quad face kernel %d\n", ciErrNum);
|
||||
}
|
||||
|
||||
void
|
||||
OsdCLComputeController::ApplyCatmarkEdgeVerticesKernel(
|
||||
FarKernelBatch const &batch, OsdCLComputeContext const *context) const {
|
||||
@ -239,7 +295,7 @@ OsdCLComputeController::ApplyCatmarkVertexVerticesKernelB(
|
||||
ciErrNum = clEnqueueNDRangeKernel(_clQueue,
|
||||
kernel, 1, NULL, globalWorkSize,
|
||||
NULL, 0, NULL, NULL);
|
||||
CL_CHECK_ERROR(ciErrNum, "vertex kernel 1 %d\n", ciErrNum);
|
||||
CL_CHECK_ERROR(ciErrNum, "vertex kernel B %d\n", ciErrNum);
|
||||
}
|
||||
|
||||
void
|
||||
@ -271,7 +327,7 @@ OsdCLComputeController::ApplyCatmarkVertexVerticesKernelA1(
|
||||
ciErrNum = clEnqueueNDRangeKernel(_clQueue,
|
||||
kernel, 1, NULL, globalWorkSize,
|
||||
NULL, 0, NULL, NULL);
|
||||
CL_CHECK_ERROR(ciErrNum, "vertex kernel 2 %d\n", ciErrNum);
|
||||
CL_CHECK_ERROR(ciErrNum, "vertex kernel A1 %d\n", ciErrNum);
|
||||
}
|
||||
|
||||
void
|
||||
@ -303,7 +359,7 @@ OsdCLComputeController::ApplyCatmarkVertexVerticesKernelA2(
|
||||
ciErrNum = clEnqueueNDRangeKernel(_clQueue,
|
||||
kernel, 1, NULL, globalWorkSize,
|
||||
NULL, 0, NULL, NULL);
|
||||
CL_CHECK_ERROR(ciErrNum, "vertex kernel 2 %d\n", ciErrNum);
|
||||
CL_CHECK_ERROR(ciErrNum, "vertex kernel A2 %d\n", ciErrNum);
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -137,6 +137,10 @@ protected:
|
||||
|
||||
void ApplyCatmarkFaceVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkQuadFaceVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkTriQuadFaceVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkVertexVerticesKernelB(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
@ -182,6 +182,86 @@ __kernel void computeFace(__global float *vertex,
|
||||
if (varying) writeVarying(varying, vid, &dstVarying);
|
||||
}
|
||||
|
||||
__kernel void computeQuadFace(__global float *vertex,
|
||||
__global float *varying,
|
||||
__global int *F_IT,
|
||||
int vertexOffset, int varyingOffset,
|
||||
int offset, int tableOffset,
|
||||
int start, int end) {
|
||||
|
||||
int i = start + get_global_id(0);
|
||||
int vid = start + get_global_id(0) + offset;
|
||||
vertex += vertexOffset;
|
||||
varying += (varying ? varyingOffset :0);
|
||||
|
||||
struct Vertex dst;
|
||||
struct Varying dstVarying;
|
||||
clearVertex(&dst);
|
||||
clearVarying(&dstVarying);
|
||||
|
||||
int fidx0 = F_IT[tableOffset + 4 * i + 0];
|
||||
int fidx1 = F_IT[tableOffset + 4 * i + 1];
|
||||
int fidx2 = F_IT[tableOffset + 4 * i + 2];
|
||||
int fidx3 = F_IT[tableOffset + 4 * i + 3];
|
||||
|
||||
addWithWeight(&dst, vertex, fidx0, 0.25f);
|
||||
addWithWeight(&dst, vertex, fidx1, 0.25f);
|
||||
addWithWeight(&dst, vertex, fidx2, 0.25f);
|
||||
addWithWeight(&dst, vertex, fidx3, 0.25f);
|
||||
|
||||
if (varying) {
|
||||
addVaryingWithWeight(&dstVarying, varying, fidx0, 0.25f);
|
||||
addVaryingWithWeight(&dstVarying, varying, fidx1, 0.25f);
|
||||
addVaryingWithWeight(&dstVarying, varying, fidx2, 0.25f);
|
||||
addVaryingWithWeight(&dstVarying, varying, fidx3, 0.25f);
|
||||
}
|
||||
|
||||
writeVertex(vertex, vid, &dst);
|
||||
if (varying) writeVarying(varying, vid, &dstVarying);
|
||||
}
|
||||
|
||||
__kernel void computeTriQuadFace(__global float *vertex,
|
||||
__global float *varying,
|
||||
__global int *F_IT,
|
||||
int vertexOffset, int varyingOffset,
|
||||
int offset, int tableOffset,
|
||||
int start, int end) {
|
||||
|
||||
int i = start + get_global_id(0);
|
||||
int vid = start + get_global_id(0) + offset;
|
||||
vertex += vertexOffset;
|
||||
varying += (varying ? varyingOffset :0);
|
||||
|
||||
struct Vertex dst;
|
||||
struct Varying dstVarying;
|
||||
clearVertex(&dst);
|
||||
clearVarying(&dstVarying);
|
||||
|
||||
int fidx0 = F_IT[tableOffset + 4 * i + 0];
|
||||
int fidx1 = F_IT[tableOffset + 4 * i + 1];
|
||||
int fidx2 = F_IT[tableOffset + 4 * i + 2];
|
||||
int fidx3 = F_IT[tableOffset + 4 * i + 3];
|
||||
bool triangle = (fidx2 == fidx3);
|
||||
float weight = triangle ? 1.0f / 3.0f : 1.0f / 4.0f;
|
||||
|
||||
addWithWeight(&dst, vertex, fidx0, weight);
|
||||
addWithWeight(&dst, vertex, fidx1, weight);
|
||||
addWithWeight(&dst, vertex, fidx2, weight);
|
||||
if (!triangle)
|
||||
addWithWeight(&dst, vertex, fidx3, weight);
|
||||
|
||||
if (varying) {
|
||||
addVaryingWithWeight(&dstVarying, varying, fidx0, weight);
|
||||
addVaryingWithWeight(&dstVarying, varying, fidx1, weight);
|
||||
addVaryingWithWeight(&dstVarying, varying, fidx2, weight);
|
||||
if (!triangle)
|
||||
addVaryingWithWeight(&dstVarying, varying, fidx3, weight);
|
||||
}
|
||||
|
||||
writeVertex(vertex, vid, &dst);
|
||||
if (varying) writeVarying(varying, vid, &dstVarying);
|
||||
}
|
||||
|
||||
__kernel void computeEdge(__global float *vertex,
|
||||
__global float *varying,
|
||||
__global int *E_IT,
|
||||
|
@ -51,6 +51,8 @@ OsdCLKernelBundle::OsdCLKernelBundle() :
|
||||
_clBilinearEdge(NULL),
|
||||
_clBilinearVertex(NULL),
|
||||
_clCatmarkFace(NULL),
|
||||
_clCatmarkQuadFace(NULL),
|
||||
_clCatmarkTriQuadFace(NULL),
|
||||
_clCatmarkEdge(NULL),
|
||||
_clCatmarkVertexA(NULL),
|
||||
_clCatmarkVertexB(NULL),
|
||||
@ -72,6 +74,10 @@ OsdCLKernelBundle::~OsdCLKernelBundle() {
|
||||
|
||||
if (_clCatmarkFace)
|
||||
clReleaseKernel(_clCatmarkFace);
|
||||
if (_clCatmarkQuadFace)
|
||||
clReleaseKernel(_clCatmarkQuadFace);
|
||||
if (_clCatmarkTriQuadFace)
|
||||
clReleaseKernel(_clCatmarkTriQuadFace);
|
||||
if (_clCatmarkEdge)
|
||||
clReleaseKernel(_clCatmarkEdge);
|
||||
if (_clCatmarkVertexA)
|
||||
@ -145,16 +151,18 @@ OsdCLKernelBundle::Compile(cl_context clContext,
|
||||
return false;
|
||||
}
|
||||
|
||||
_clBilinearEdge = buildKernel(_clProgram, "computeBilinearEdge");
|
||||
_clBilinearVertex = buildKernel(_clProgram, "computeBilinearVertex");
|
||||
_clCatmarkFace = buildKernel(_clProgram, "computeFace");
|
||||
_clCatmarkEdge = buildKernel(_clProgram, "computeEdge");
|
||||
_clCatmarkVertexA = buildKernel(_clProgram, "computeVertexA");
|
||||
_clCatmarkVertexB = buildKernel(_clProgram, "computeVertexB");
|
||||
_clLoopEdge = buildKernel(_clProgram, "computeEdge");
|
||||
_clLoopVertexA = buildKernel(_clProgram, "computeVertexA");
|
||||
_clLoopVertexB = buildKernel(_clProgram, "computeLoopVertexB");
|
||||
_clVertexEditAdd = buildKernel(_clProgram, "editVertexAdd");
|
||||
_clBilinearEdge = buildKernel(_clProgram, "computeBilinearEdge");
|
||||
_clBilinearVertex = buildKernel(_clProgram, "computeBilinearVertex");
|
||||
_clCatmarkFace = buildKernel(_clProgram, "computeFace");
|
||||
_clCatmarkQuadFace = buildKernel(_clProgram, "computeQuadFace");
|
||||
_clCatmarkTriQuadFace = buildKernel(_clProgram, "computeTriQuadFace");
|
||||
_clCatmarkEdge = buildKernel(_clProgram, "computeEdge");
|
||||
_clCatmarkVertexA = buildKernel(_clProgram, "computeVertexA");
|
||||
_clCatmarkVertexB = buildKernel(_clProgram, "computeVertexB");
|
||||
_clLoopEdge = buildKernel(_clProgram, "computeEdge");
|
||||
_clLoopVertexA = buildKernel(_clProgram, "computeVertexA");
|
||||
_clLoopVertexB = buildKernel(_clProgram, "computeLoopVertexB");
|
||||
_clVertexEditAdd = buildKernel(_clProgram, "editVertexAdd");
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -44,25 +44,29 @@ public:
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc);
|
||||
|
||||
cl_kernel GetBilinearEdgeKernel() const { return _clBilinearEdge; }
|
||||
cl_kernel GetBilinearEdgeKernel() const { return _clBilinearEdge; }
|
||||
|
||||
cl_kernel GetBilinearVertexKernel() const { return _clBilinearVertex; }
|
||||
cl_kernel GetBilinearVertexKernel() const { return _clBilinearVertex; }
|
||||
|
||||
cl_kernel GetCatmarkFaceKernel() const { return _clCatmarkFace; }
|
||||
cl_kernel GetCatmarkFaceKernel() const { return _clCatmarkFace; }
|
||||
|
||||
cl_kernel GetCatmarkEdgeKernel() const { return _clCatmarkEdge; }
|
||||
cl_kernel GetCatmarkQuadFaceKernel() const { return _clCatmarkQuadFace; }
|
||||
|
||||
cl_kernel GetCatmarkVertexKernelA() const { return _clCatmarkVertexA; }
|
||||
cl_kernel GetCatmarkTriQuadFaceKernel() const { return _clCatmarkQuadFace; }
|
||||
|
||||
cl_kernel GetCatmarkVertexKernelB() const { return _clCatmarkVertexB; }
|
||||
cl_kernel GetCatmarkEdgeKernel() const { return _clCatmarkEdge; }
|
||||
|
||||
cl_kernel GetLoopEdgeKernel() const { return _clLoopEdge; }
|
||||
cl_kernel GetCatmarkVertexKernelA() const { return _clCatmarkVertexA; }
|
||||
|
||||
cl_kernel GetLoopVertexKernelA() const { return _clLoopVertexA; }
|
||||
cl_kernel GetCatmarkVertexKernelB() const { return _clCatmarkVertexB; }
|
||||
|
||||
cl_kernel GetLoopVertexKernelB() const { return _clLoopVertexB; }
|
||||
cl_kernel GetLoopEdgeKernel() const { return _clLoopEdge; }
|
||||
|
||||
cl_kernel GetVertexEditAdd() const { return _clVertexEditAdd; }
|
||||
cl_kernel GetLoopVertexKernelA() const { return _clLoopVertexA; }
|
||||
|
||||
cl_kernel GetLoopVertexKernelB() const { return _clLoopVertexB; }
|
||||
|
||||
cl_kernel GetVertexEditAdd() const { return _clVertexEditAdd; }
|
||||
|
||||
struct Match {
|
||||
/// Constructor
|
||||
@ -92,6 +96,8 @@ protected:
|
||||
cl_kernel _clBilinearEdge,
|
||||
_clBilinearVertex,
|
||||
_clCatmarkFace,
|
||||
_clCatmarkQuadFace,
|
||||
_clCatmarkTriQuadFace,
|
||||
_clCatmarkEdge,
|
||||
_clCatmarkVertexA,
|
||||
_clCatmarkVertexB,
|
||||
|
@ -90,6 +90,32 @@ OsdCpuComputeController::ApplyCatmarkFaceVerticesKernel(
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
}
|
||||
|
||||
void
|
||||
OsdCpuComputeController::ApplyCatmarkQuadFaceVerticesKernel(
|
||||
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {
|
||||
|
||||
assert(context);
|
||||
|
||||
OsdCpuComputeQuadFace(
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
|
||||
(const int*)context->GetTable(FarSubdivisionTables::F_IT)->GetBuffer(),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
}
|
||||
|
||||
void
|
||||
OsdCpuComputeController::ApplyCatmarkTriQuadFaceVerticesKernel(
|
||||
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {
|
||||
|
||||
assert(context);
|
||||
|
||||
OsdCpuComputeTriQuadFace(
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
|
||||
(const int*)context->GetTable(FarSubdivisionTables::F_IT)->GetBuffer(),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
}
|
||||
|
||||
void
|
||||
OsdCpuComputeController::ApplyCatmarkEdgeVerticesKernel(
|
||||
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {
|
||||
|
@ -120,6 +120,10 @@ protected:
|
||||
|
||||
void ApplyCatmarkFaceVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkQuadFaceVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkTriQuadFaceVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkVertexVerticesKernelB(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
@ -102,6 +102,86 @@ void OsdCpuComputeFace(
|
||||
}
|
||||
}
|
||||
|
||||
void OsdCpuComputeQuadFace(
|
||||
float * vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *F_IT, int vertexOffset, int tableOffset,
|
||||
int start, int end) {
|
||||
|
||||
float *vertexResults = (float*)alloca(vertexDesc.length * sizeof(float));
|
||||
float *varyingResults = (float*)alloca(varyingDesc.length * sizeof(float));
|
||||
|
||||
for (int i = start; i < end; i++) {
|
||||
int fidx0 = F_IT[tableOffset + 4 * i + 0];
|
||||
int fidx1 = F_IT[tableOffset + 4 * i + 1];
|
||||
int fidx2 = F_IT[tableOffset + 4 * i + 2];
|
||||
int fidx3 = F_IT[tableOffset + 4 * i + 3];
|
||||
|
||||
int dstIndex = i + vertexOffset;
|
||||
|
||||
// clear
|
||||
clear(vertexResults, vertexDesc);
|
||||
clear(varyingResults, varyingDesc);
|
||||
|
||||
// accum
|
||||
addWithWeight(vertexResults, vertex, fidx0, 0.25f, vertexDesc);
|
||||
addWithWeight(vertexResults, vertex, fidx1, 0.25f, vertexDesc);
|
||||
addWithWeight(vertexResults, vertex, fidx2, 0.25f, vertexDesc);
|
||||
addWithWeight(vertexResults, vertex, fidx3, 0.25f, vertexDesc);
|
||||
addWithWeight(varyingResults, varying, fidx0, 0.25f, varyingDesc);
|
||||
addWithWeight(varyingResults, varying, fidx1, 0.25f, varyingDesc);
|
||||
addWithWeight(varyingResults, varying, fidx2, 0.25f, varyingDesc);
|
||||
addWithWeight(varyingResults, varying, fidx3, 0.25f, varyingDesc);
|
||||
|
||||
// write results
|
||||
copy(vertex, vertexResults, dstIndex, vertexDesc);
|
||||
copy(varying, varyingResults, dstIndex, varyingDesc);
|
||||
}
|
||||
}
|
||||
|
||||
void OsdCpuComputeTriQuadFace(
|
||||
float * vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *F_IT, int vertexOffset, int tableOffset,
|
||||
int start, int end) {
|
||||
|
||||
float *vertexResults = (float*)alloca(vertexDesc.length * sizeof(float));
|
||||
float *varyingResults = (float*)alloca(varyingDesc.length * sizeof(float));
|
||||
|
||||
for (int i = start; i < end; i++) {
|
||||
int fidx0 = F_IT[tableOffset + 4 * i + 0];
|
||||
int fidx1 = F_IT[tableOffset + 4 * i + 1];
|
||||
int fidx2 = F_IT[tableOffset + 4 * i + 2];
|
||||
int fidx3 = F_IT[tableOffset + 4 * i + 3];
|
||||
bool triangle = (fidx2 == fidx3);
|
||||
float weight = (triangle ? 1.0f / 3.0f : 1.0f / 4.0f);
|
||||
|
||||
int dstIndex = i + vertexOffset;
|
||||
|
||||
// clear
|
||||
clear(vertexResults, vertexDesc);
|
||||
clear(varyingResults, varyingDesc);
|
||||
|
||||
// accum
|
||||
addWithWeight(vertexResults, vertex, fidx0, weight, vertexDesc);
|
||||
addWithWeight(vertexResults, vertex, fidx1, weight, vertexDesc);
|
||||
addWithWeight(vertexResults, vertex, fidx2, weight, vertexDesc);
|
||||
addWithWeight(varyingResults, varying, fidx0, weight, varyingDesc);
|
||||
addWithWeight(varyingResults, varying, fidx1, weight, varyingDesc);
|
||||
addWithWeight(varyingResults, varying, fidx2, weight, varyingDesc);
|
||||
if (!triangle) {
|
||||
addWithWeight(vertexResults, vertex, fidx3, weight, vertexDesc);
|
||||
addWithWeight(varyingResults, varying, fidx3, weight, varyingDesc);
|
||||
}
|
||||
|
||||
// write results
|
||||
copy(vertex, vertexResults, dstIndex, vertexDesc);
|
||||
copy(varying, varyingResults, dstIndex, varyingDesc);
|
||||
}
|
||||
}
|
||||
|
||||
void OsdCpuComputeEdge(
|
||||
float *vertex, float *varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
|
@ -95,6 +95,20 @@ void OsdCpuComputeFace(float * vertex, float * varying,
|
||||
int vertexOffset, int tableOffset,
|
||||
int start, int end);
|
||||
|
||||
void OsdCpuComputeQuadFace(float * vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *F_IT,
|
||||
int vertexOffset, int tableOffset,
|
||||
int start, int end);
|
||||
|
||||
void OsdCpuComputeTriQuadFace(float * vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *F_IT,
|
||||
int vertexOffset, int tableOffset,
|
||||
int start, int end);
|
||||
|
||||
template<int numVertexElements>
|
||||
void ComputeEdgeKernel( float *vertex,
|
||||
const int *E_IT,
|
||||
@ -151,7 +165,7 @@ void ComputeEdgeKernel( float *vertex,
|
||||
void OsdCpuComputeEdge(float *vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *E_IT, const float *E_ITa,
|
||||
const int *E_IT, const float *E_W,
|
||||
int vertexOffset, int tableOffset,
|
||||
int start, int end);
|
||||
|
||||
|
@ -35,6 +35,16 @@ void OsdCudaComputeFace(float *vertex, float *varying,
|
||||
int varyingLength, int varyingStride,
|
||||
int *F_IT, int *F_ITa, int offset, int tableOffset, int start, int end);
|
||||
|
||||
void OsdCudaComputeQuadFace(float *vertex, float *varying,
|
||||
int vertexLength, int vertexStride,
|
||||
int varyingLength, int varyingStride,
|
||||
int *F_IT, int offset, int tableOffset, int start, int end);
|
||||
|
||||
void OsdCudaComputeTriQuadFace(float *vertex, float *varying,
|
||||
int vertexLength, int vertexStride,
|
||||
int varyingLength, int varyingStride,
|
||||
int *F_IT, int offset, int tableOffset, int start, int end);
|
||||
|
||||
void OsdCudaComputeEdge(float *vertex, float *varying,
|
||||
int vertexLength, int vertexStride,
|
||||
int varyingLength, int varyingStride,
|
||||
@ -171,6 +181,46 @@ OsdCudaComputeController::ApplyCatmarkFaceVerticesKernel(
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
}
|
||||
|
||||
void
|
||||
OsdCudaComputeController::ApplyCatmarkQuadFaceVerticesKernel(
|
||||
FarKernelBatch const &batch, OsdCudaComputeContext const *context) const {
|
||||
|
||||
assert(context);
|
||||
|
||||
const OsdCudaTable * F_IT = context->GetTable(FarSubdivisionTables::F_IT);
|
||||
assert(F_IT);
|
||||
|
||||
float *vertex = _currentBindState.GetOffsettedVertexBuffer();
|
||||
float *varying = _currentBindState.GetOffsettedVaryingBuffer();
|
||||
|
||||
OsdCudaComputeQuadFace(
|
||||
vertex, varying,
|
||||
_currentBindState.vertexDesc.length, _currentBindState.vertexDesc.stride,
|
||||
_currentBindState.varyingDesc.length, _currentBindState.varyingDesc.stride,
|
||||
static_cast<int*>(F_IT->GetCudaMemory()),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
}
|
||||
|
||||
void
|
||||
OsdCudaComputeController::ApplyCatmarkTriQuadFaceVerticesKernel(
|
||||
FarKernelBatch const &batch, OsdCudaComputeContext const *context) const {
|
||||
|
||||
assert(context);
|
||||
|
||||
const OsdCudaTable * F_IT = context->GetTable(FarSubdivisionTables::F_IT);
|
||||
assert(F_IT);
|
||||
|
||||
float *vertex = _currentBindState.GetOffsettedVertexBuffer();
|
||||
float *varying = _currentBindState.GetOffsettedVaryingBuffer();
|
||||
|
||||
OsdCudaComputeTriQuadFace(
|
||||
vertex, varying,
|
||||
_currentBindState.vertexDesc.length, _currentBindState.vertexDesc.stride,
|
||||
_currentBindState.varyingDesc.length, _currentBindState.varyingDesc.stride,
|
||||
static_cast<int*>(F_IT->GetCudaMemory()),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
}
|
||||
|
||||
void
|
||||
OsdCudaComputeController::ApplyCatmarkEdgeVerticesKernel(
|
||||
FarKernelBatch const &batch, OsdCudaComputeContext const *context) const {
|
||||
|
@ -120,6 +120,10 @@ protected:
|
||||
|
||||
void ApplyCatmarkFaceVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkQuadFaceVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkTriQuadFaceVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkVertexVerticesKernelB(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
@ -140,6 +140,163 @@ computeFace(float *fVertex, float *fVarying,
|
||||
}
|
||||
}
|
||||
|
||||
template <int NUM_VERTEX_ELEMENTS, int NUM_VARYING_ELEMENTS> __global__ void
|
||||
computeQuadFace(float *fVertex, float *fVaryings, int *F0_IT, int offset, int tableOffset, int start, int end)
|
||||
{
|
||||
DeviceVertex<NUM_VERTEX_ELEMENTS> *vertex = (DeviceVertex<NUM_VERTEX_ELEMENTS>*)fVertex;
|
||||
DeviceVertex<NUM_VARYING_ELEMENTS> *varyings = (DeviceVertex<NUM_VARYING_ELEMENTS>*)fVaryings;
|
||||
for (int i = start + threadIdx.x + blockIdx.x*blockDim.x;
|
||||
i < end;
|
||||
i += blockDim.x * gridDim.x) {
|
||||
|
||||
int fidx0 = F0_IT[tableOffset + 4 * i + 0];
|
||||
int fidx1 = F0_IT[tableOffset + 4 * i + 1];
|
||||
int fidx2 = F0_IT[tableOffset + 4 * i + 2];
|
||||
int fidx3 = F0_IT[tableOffset + 4 * i + 3];
|
||||
|
||||
DeviceVertex<NUM_VERTEX_ELEMENTS> dst;
|
||||
dst.clear();
|
||||
|
||||
if(NUM_VARYING_ELEMENTS > 0){
|
||||
DeviceVertex<NUM_VARYING_ELEMENTS> dstVarying;
|
||||
dstVarying.clear();
|
||||
|
||||
dst.addWithWeight(&vertex[fidx0], 0.25f);
|
||||
dst.addWithWeight(&vertex[fidx1], 0.25f);
|
||||
dst.addWithWeight(&vertex[fidx2], 0.25f);
|
||||
dst.addWithWeight(&vertex[fidx3], 0.25f);
|
||||
dstVarying.addWithWeight(&varyings[fidx0], 0.25f);
|
||||
dstVarying.addWithWeight(&varyings[fidx1], 0.25f);
|
||||
dstVarying.addWithWeight(&varyings[fidx2], 0.25f);
|
||||
dstVarying.addWithWeight(&varyings[fidx3], 0.25f);
|
||||
vertex[offset + i] = dst;
|
||||
varyings[offset + i] = dstVarying;
|
||||
}else{
|
||||
dst.addWithWeight(&vertex[fidx0], 0.25f);
|
||||
dst.addWithWeight(&vertex[fidx1], 0.25f);
|
||||
dst.addWithWeight(&vertex[fidx2], 0.25f);
|
||||
dst.addWithWeight(&vertex[fidx3], 0.25f);
|
||||
vertex[offset + i] = dst;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void
|
||||
computeQuadFace(float *fVertex, float *fVarying,
|
||||
int vertexLength, int vertexStride,
|
||||
int varyingLength, int varyingStride,
|
||||
int *F0_IT, int offset, int tableOffset, int start, int end)
|
||||
{
|
||||
for (int i = start +threadIdx.x + blockIdx.x*blockDim.x;
|
||||
i < end;
|
||||
i += blockDim.x * gridDim.x){
|
||||
|
||||
int fidx0 = F0_IT[tableOffset + 4 * i + 0];
|
||||
int fidx1 = F0_IT[tableOffset + 4 * i + 1];
|
||||
int fidx2 = F0_IT[tableOffset + 4 * i + 2];
|
||||
int fidx3 = F0_IT[tableOffset + 4 * i + 3];
|
||||
|
||||
// XXX: can we use local stack like alloca?
|
||||
float *dstVertex = fVertex + (i+offset)*vertexStride;
|
||||
clear(dstVertex, vertexLength);
|
||||
float *dstVarying = fVarying + (i+offset)*varyingStride;
|
||||
clear(dstVarying, varyingLength);
|
||||
|
||||
addWithWeight(dstVertex, fVertex + fidx0*vertexStride, 0.25f, vertexLength);
|
||||
addWithWeight(dstVertex, fVertex + fidx1*vertexStride, 0.25f, vertexLength);
|
||||
addWithWeight(dstVertex, fVertex + fidx2*vertexStride, 0.25f, vertexLength);
|
||||
addWithWeight(dstVertex, fVertex + fidx3*vertexStride, 0.25f, vertexLength);
|
||||
addWithWeight(dstVarying, fVarying + fidx0*varyingStride, 0.25f, varyingLength);
|
||||
addWithWeight(dstVarying, fVarying + fidx1*varyingStride, 0.25f, varyingLength);
|
||||
addWithWeight(dstVarying, fVarying + fidx2*varyingStride, 0.25f, varyingLength);
|
||||
addWithWeight(dstVarying, fVarying + fidx3*varyingStride, 0.25f, varyingLength);
|
||||
}
|
||||
}
|
||||
|
||||
template <int NUM_VERTEX_ELEMENTS, int NUM_VARYING_ELEMENTS> __global__ void
|
||||
computeTriQuadFace(float *fVertex, float *fVaryings, int *F0_IT, int offset, int tableOffset, int start, int end)
|
||||
{
|
||||
DeviceVertex<NUM_VERTEX_ELEMENTS> *vertex = (DeviceVertex<NUM_VERTEX_ELEMENTS>*)fVertex;
|
||||
DeviceVertex<NUM_VARYING_ELEMENTS> *varyings = (DeviceVertex<NUM_VARYING_ELEMENTS>*)fVaryings;
|
||||
for (int i = start + threadIdx.x + blockIdx.x*blockDim.x;
|
||||
i < end;
|
||||
i += blockDim.x * gridDim.x) {
|
||||
|
||||
int fidx0 = F0_IT[tableOffset + 4 * i + 0];
|
||||
int fidx1 = F0_IT[tableOffset + 4 * i + 1];
|
||||
int fidx2 = F0_IT[tableOffset + 4 * i + 2];
|
||||
int fidx3 = F0_IT[tableOffset + 4 * i + 3];
|
||||
|
||||
bool triangle = (fidx2 == fidx3);
|
||||
float weight = triangle ? 1.0f / 3.0f : 1.0f / 4.0f;
|
||||
|
||||
DeviceVertex<NUM_VERTEX_ELEMENTS> dst;
|
||||
dst.clear();
|
||||
|
||||
if(NUM_VARYING_ELEMENTS > 0){
|
||||
DeviceVertex<NUM_VARYING_ELEMENTS> dstVarying;
|
||||
dstVarying.clear();
|
||||
|
||||
dst.addWithWeight(&vertex[fidx0], weight);
|
||||
dst.addWithWeight(&vertex[fidx1], weight);
|
||||
dst.addWithWeight(&vertex[fidx2], weight);
|
||||
dstVarying.addWithWeight(&varyings[fidx0], weight);
|
||||
dstVarying.addWithWeight(&varyings[fidx1], weight);
|
||||
dstVarying.addWithWeight(&varyings[fidx2], weight);
|
||||
if (!triangle) {
|
||||
dst.addWithWeight(&vertex[fidx3], weight);
|
||||
dstVarying.addWithWeight(&varyings[fidx3], 0.25f);
|
||||
}
|
||||
vertex[offset + i] = dst;
|
||||
varyings[offset + i] = dstVarying;
|
||||
}else{
|
||||
dst.addWithWeight(&vertex[fidx0], weight);
|
||||
dst.addWithWeight(&vertex[fidx1], weight);
|
||||
dst.addWithWeight(&vertex[fidx2], weight);
|
||||
if (!triangle)
|
||||
dst.addWithWeight(&vertex[fidx3], weight);
|
||||
vertex[offset + i] = dst;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void
|
||||
computeTriQuadFace(float *fVertex, float *fVarying,
|
||||
int vertexLength, int vertexStride,
|
||||
int varyingLength, int varyingStride,
|
||||
int *F0_IT, int offset, int tableOffset, int start, int end)
|
||||
{
|
||||
for (int i = start +threadIdx.x + blockIdx.x*blockDim.x;
|
||||
i < end;
|
||||
i += blockDim.x * gridDim.x){
|
||||
|
||||
int fidx0 = F0_IT[tableOffset + 4 * i + 0];
|
||||
int fidx1 = F0_IT[tableOffset + 4 * i + 1];
|
||||
int fidx2 = F0_IT[tableOffset + 4 * i + 2];
|
||||
int fidx3 = F0_IT[tableOffset + 4 * i + 3];
|
||||
|
||||
bool triangle = (fidx2 == fidx3);
|
||||
float weight = triangle ? 1.0f / 3.0f : 1.0f / 4.0f;
|
||||
|
||||
// XXX: can we use local stack like alloca?
|
||||
float *dstVertex = fVertex + (i+offset)*vertexStride;
|
||||
clear(dstVertex, vertexLength);
|
||||
float *dstVarying = fVarying + (i+offset)*varyingStride;
|
||||
clear(dstVarying, varyingLength);
|
||||
|
||||
addWithWeight(dstVertex, fVertex + fidx0*vertexStride, weight, vertexLength);
|
||||
addWithWeight(dstVertex, fVertex + fidx1*vertexStride, weight, vertexLength);
|
||||
addWithWeight(dstVertex, fVertex + fidx2*vertexStride, weight, vertexLength);
|
||||
addWithWeight(dstVarying, fVarying + fidx0*varyingStride, weight, varyingLength);
|
||||
addWithWeight(dstVarying, fVarying + fidx1*varyingStride, weight, varyingLength);
|
||||
addWithWeight(dstVarying, fVarying + fidx2*varyingStride, weight, varyingLength);
|
||||
if (!triangle) {
|
||||
addWithWeight(dstVertex, fVertex + fidx3*vertexStride, weight, vertexLength);
|
||||
addWithWeight(dstVarying, fVarying + fidx3*varyingStride, weight, varyingLength);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <int NUM_VERTEX_ELEMENTS, int NUM_VARYING_ELEMENTS> __global__ void
|
||||
computeEdge(float *fVertex, float *fVaryings, int *E0_IT, float *E0_S, int offset, int tableOffset, int start, int end)
|
||||
{
|
||||
@ -639,6 +796,40 @@ void OsdCudaComputeFace(float *vertex, float *varying,
|
||||
F_IT, F_ITa, offset, tableOffset, start, end);
|
||||
}
|
||||
|
||||
void OsdCudaComputeQuadFace(float *vertex, float *varying,
|
||||
int vertexLength, int vertexStride,
|
||||
int varyingLength, int varyingStride,
|
||||
int *F_IT, int offset, int tableOffset, int start, int end)
|
||||
{
|
||||
//computeQuadFace<3, 0><<<512,32>>>(vertex, varying, F_IT, offset, start, end);
|
||||
OPT_KERNEL(0, 0, computeQuadFace, 512, 32, (vertex, varying, F_IT, offset, tableOffset, start, end));
|
||||
OPT_KERNEL(0, 3, computeQuadFace, 512, 32, (vertex, varying, F_IT, offset, tableOffset, start, end));
|
||||
OPT_KERNEL(3, 0, computeQuadFace, 512, 32, (vertex, varying, F_IT, offset, tableOffset, start, end));
|
||||
OPT_KERNEL(3, 3, computeQuadFace, 512, 32, (vertex, varying, F_IT, offset, tableOffset, start, end));
|
||||
|
||||
// fallback kernel (slow)
|
||||
computeQuadFace<<<512, 32>>>(vertex, varying,
|
||||
vertexLength, vertexStride, varyingLength, varyingStride,
|
||||
F_IT, offset, tableOffset, start, end);
|
||||
}
|
||||
|
||||
void OsdCudaComputeTriQuadFace(float *vertex, float *varying,
|
||||
int vertexLength, int vertexStride,
|
||||
int varyingLength, int varyingStride,
|
||||
int *F_IT, int offset, int tableOffset, int start, int end)
|
||||
{
|
||||
//computeTriQuadFace<3, 0><<<512,32>>>(vertex, varying, F_IT, offset, start, end);
|
||||
OPT_KERNEL(0, 0, computeTriQuadFace, 512, 32, (vertex, varying, F_IT, offset, tableOffset, start, end));
|
||||
OPT_KERNEL(0, 3, computeTriQuadFace, 512, 32, (vertex, varying, F_IT, offset, tableOffset, start, end));
|
||||
OPT_KERNEL(3, 0, computeTriQuadFace, 512, 32, (vertex, varying, F_IT, offset, tableOffset, start, end));
|
||||
OPT_KERNEL(3, 3, computeTriQuadFace, 512, 32, (vertex, varying, F_IT, offset, tableOffset, start, end));
|
||||
|
||||
// fallback kernel (slow)
|
||||
computeTriQuadFace<<<512, 32>>>(vertex, varying,
|
||||
vertexLength, vertexStride, varyingLength, varyingStride,
|
||||
F_IT, offset, tableOffset, start, end);
|
||||
}
|
||||
|
||||
|
||||
void OsdCudaComputeEdge(float *vertex, float *varying,
|
||||
int vertexLength, int vertexStride,
|
||||
|
@ -163,7 +163,29 @@ OsdD3D11ComputeController::ApplyCatmarkFaceVerticesKernel(
|
||||
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset);
|
||||
}
|
||||
|
||||
void
|
||||
OsdD3D11ComputeController::ApplyCatmarkQuadFaceVerticesKernel(
|
||||
FarKernelBatch const &batch, OsdD3D11ComputeContext const *context) const {
|
||||
|
||||
assert(context);
|
||||
|
||||
_currentBindState.kernelBundle->ApplyCatmarkQuadFaceVerticesKernel(
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(),
|
||||
batch.GetStart(), batch.GetEnd(),
|
||||
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset);
|
||||
}
|
||||
|
||||
void
|
||||
OsdD3D11ComputeController::ApplyCatmarkTriQuadFaceVerticesKernel(
|
||||
FarKernelBatch const &batch, OsdD3D11ComputeContext const *context) const {
|
||||
|
||||
assert(context);
|
||||
|
||||
_currentBindState.kernelBundle->ApplyCatmarkTriQuadFaceVerticesKernel(
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(),
|
||||
batch.GetStart(), batch.GetEnd(),
|
||||
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset);
|
||||
}
|
||||
|
||||
void
|
||||
OsdD3D11ComputeController::ApplyCatmarkEdgeVerticesKernel(
|
||||
|
@ -136,6 +136,10 @@ protected:
|
||||
|
||||
void ApplyCatmarkFaceVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkQuadFaceVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkTriQuadFaceVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkVertexVerticesKernelB(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
@ -52,6 +52,8 @@ OsdD3D11ComputeKernelBundle::OsdD3D11ComputeKernelBundle(
|
||||
_classLinkage(0),
|
||||
_kernelCB(0),
|
||||
_kernelComputeFace(0),
|
||||
_kernelComputeQuadFace(0),
|
||||
_kernelComputeTriQuadFace(0),
|
||||
_kernelComputeEdge(0),
|
||||
_kernelComputeBilinearEdge(0),
|
||||
_kernelComputeVertex(0),
|
||||
@ -69,6 +71,8 @@ OsdD3D11ComputeKernelBundle::~OsdD3D11ComputeKernelBundle() {
|
||||
SAFE_RELEASE(_classLinkage);
|
||||
SAFE_RELEASE(_kernelCB);
|
||||
SAFE_RELEASE(_kernelComputeFace);
|
||||
SAFE_RELEASE(_kernelComputeQuadFace);
|
||||
SAFE_RELEASE(_kernelComputeTriQuadFace);
|
||||
SAFE_RELEASE(_kernelComputeEdge);
|
||||
SAFE_RELEASE(_kernelComputeBilinearEdge);
|
||||
SAFE_RELEASE(_kernelComputeVertex);
|
||||
@ -163,6 +167,12 @@ OsdD3D11ComputeKernelBundle::Compile(
|
||||
_classLinkage->GetClassInstance(
|
||||
"catmarkComputeFace", 0, &_kernelComputeFace);
|
||||
assert(_kernelComputeFace);
|
||||
_classLinkage->GetClassInstance(
|
||||
"catmarkComputeQuadFace", 0, &_kernelComputeQuadFace);
|
||||
assert(_kernelComputeQuadFace);
|
||||
_classLinkage->GetClassInstance(
|
||||
"catmarkComputeTriQuadFace", 0, &_kernelComputeTriQuadFace);
|
||||
assert(_kernelComputeTriQuadFace);
|
||||
_classLinkage->GetClassInstance(
|
||||
"catmarkComputeEdge", 0, &_kernelComputeEdge);
|
||||
assert(_kernelComputeEdge);
|
||||
@ -302,6 +312,38 @@ OsdD3D11ComputeKernelBundle::ApplyCatmarkFaceVerticesKernel(
|
||||
dispatchCompute(_kernelComputeFace, args);
|
||||
}
|
||||
|
||||
void
|
||||
OsdD3D11ComputeKernelBundle::ApplyCatmarkQuadFaceVerticesKernel(
|
||||
int vertexOffset, int tableOffset, int start, int end,
|
||||
int vertexBaseOffset, int varyingBaseOffset) {
|
||||
|
||||
KernelCB args;
|
||||
ZeroMemory(&args, sizeof(args));
|
||||
args.vertexOffset = vertexOffset;
|
||||
args.tableOffset = tableOffset;
|
||||
args.indexStart = start;
|
||||
args.indexEnd = end;
|
||||
args.vertexBaseOffset = vertexBaseOffset;
|
||||
args.varyingBaseOffset = varyingBaseOffset;
|
||||
dispatchCompute(_kernelComputeQuadFace, args);
|
||||
}
|
||||
|
||||
void
|
||||
OsdD3D11ComputeKernelBundle::ApplyCatmarkTriQuadFaceVerticesKernel(
|
||||
int vertexOffset, int tableOffset, int start, int end,
|
||||
int vertexBaseOffset, int varyingBaseOffset) {
|
||||
|
||||
KernelCB args;
|
||||
ZeroMemory(&args, sizeof(args));
|
||||
args.vertexOffset = vertexOffset;
|
||||
args.tableOffset = tableOffset;
|
||||
args.indexStart = start;
|
||||
args.indexEnd = end;
|
||||
args.vertexBaseOffset = vertexBaseOffset;
|
||||
args.varyingBaseOffset = varyingBaseOffset;
|
||||
dispatchCompute(_kernelComputeTriQuadFace, args);
|
||||
}
|
||||
|
||||
void
|
||||
OsdD3D11ComputeKernelBundle::ApplyCatmarkEdgeVerticesKernel(
|
||||
int vertexOffset, int tableOffset, int start, int end,
|
||||
|
@ -67,6 +67,14 @@ public:
|
||||
int vertexOffset, int tableOffset, int start, int end,
|
||||
int vertexBaseOffset, int varyingBaseOffset);
|
||||
|
||||
void ApplyCatmarkQuadFaceVerticesKernel(
|
||||
int vertexOffset, int tableOffset, int start, int end,
|
||||
int vertexBaseOffset, int varyingBaseOffset);
|
||||
|
||||
void ApplyCatmarkTriQuadFaceVerticesKernel(
|
||||
int vertexOffset, int tableOffset, int start, int end,
|
||||
int vertexBaseOffset, int varyingBaseOffset);
|
||||
|
||||
void ApplyCatmarkEdgeVerticesKernel(
|
||||
int vertexOffset, int tableOffset, int start, int end,
|
||||
int vertexBaseOffset, int varyingBaseOffset);
|
||||
@ -132,6 +140,10 @@ protected:
|
||||
|
||||
ID3D11ClassInstance * _kernelComputeFace; // general face-vertex kernel (all schemes)
|
||||
|
||||
ID3D11ClassInstance * _kernelComputeQuadFace; // quad face-vertex kernel (catmark)
|
||||
|
||||
ID3D11ClassInstance * _kernelComputeTriQuadFace; // tri-quad face-vertex kernel (catmark)
|
||||
|
||||
ID3D11ClassInstance * _kernelComputeEdge; // edge-vertex kernel (catmark + loop schemes)
|
||||
|
||||
ID3D11ClassInstance * _kernelComputeBilinearEdge; // edge-vertex kernel (bilinear scheme)
|
||||
|
@ -92,6 +92,34 @@ OsdGcdComputeController::ApplyCatmarkFaceVerticesKernel(
|
||||
_gcd_queue);
|
||||
}
|
||||
|
||||
void
|
||||
OsdGcdComputeController::ApplyCatmarkQuadFaceVerticesKernel(
|
||||
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {
|
||||
|
||||
assert(context);
|
||||
|
||||
OsdGcdComputeQuadFace(
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
|
||||
(const int*)context->GetTable(FarSubdivisionTables::F_IT)->GetBuffer(),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(),
|
||||
_gcd_queue);
|
||||
}
|
||||
|
||||
void
|
||||
OsdGcdComputeController::ApplyCatmarkTriQuadFaceVerticesKernel(
|
||||
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {
|
||||
|
||||
assert(context);
|
||||
|
||||
OsdGcdComputeTriQuadFace(
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
|
||||
(const int*)context->GetTable(FarSubdivisionTables::F_IT)->GetBuffer(),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(),
|
||||
_gcd_queue);
|
||||
}
|
||||
|
||||
void
|
||||
OsdGcdComputeController::ApplyCatmarkEdgeVerticesKernel(
|
||||
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {
|
||||
|
@ -120,6 +120,10 @@ protected:
|
||||
|
||||
void ApplyCatmarkFaceVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkQuadFaceVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkTriQuadFaceVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkVertexVerticesKernelB(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
@ -79,6 +79,54 @@ void OsdGcdComputeFace(
|
||||
vertexOffset, tableOffset, start_e, end_e);
|
||||
}
|
||||
|
||||
void OsdGcdComputeQuadFace(
|
||||
float * vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *F_IT,
|
||||
int vertexOffset, int tableOffset, int start, int end,
|
||||
dispatch_queue_t gcdq) {
|
||||
|
||||
const int workSize = end-start;
|
||||
dispatch_apply(workSize/GCD_WORK_STRIDE, gcdq, ^(size_t blockIdx){
|
||||
const int start_i = start + blockIdx*GCD_WORK_STRIDE;
|
||||
const int end_i = start_i + GCD_WORK_STRIDE;
|
||||
OsdCpuComputeQuadFace(vertex, varying, vertexDesc, varyingDesc,
|
||||
F_IT,
|
||||
vertexOffset, tableOffset, start_i, end_i);
|
||||
});
|
||||
const int start_e = end - workSize%GCD_WORK_STRIDE;
|
||||
const int end_e = end;
|
||||
if (start_e < end_e)
|
||||
OsdCpuComputeQuadFace(vertex, varying, vertexDesc, varyingDesc,
|
||||
F_IT,
|
||||
vertexOffset, tableOffset, start_e, end_e);
|
||||
}
|
||||
|
||||
void OsdGcdComputeTriQuadFace(
|
||||
float * vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *F_IT,
|
||||
int vertexOffset, int tableOffset, int start, int end,
|
||||
dispatch_queue_t gcdq) {
|
||||
|
||||
const int workSize = end-start;
|
||||
dispatch_apply(workSize/GCD_WORK_STRIDE, gcdq, ^(size_t blockIdx){
|
||||
const int start_i = start + blockIdx*GCD_WORK_STRIDE;
|
||||
const int end_i = start_i + GCD_WORK_STRIDE;
|
||||
OsdCpuComputeTriQuadFace(vertex, varying, vertexDesc, varyingDesc,
|
||||
F_IT,
|
||||
vertexOffset, tableOffset, start_i, end_i);
|
||||
});
|
||||
const int start_e = end - workSize%GCD_WORK_STRIDE;
|
||||
const int end_e = end;
|
||||
if (start_e < end_e)
|
||||
OsdCpuComputeTriQuadFace(vertex, varying, vertexDesc, varyingDesc,
|
||||
F_IT,
|
||||
vertexOffset, tableOffset, start_e, end_e);
|
||||
}
|
||||
|
||||
void OsdGcdComputeEdge(
|
||||
float * vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
|
@ -42,6 +42,22 @@ void OsdGcdComputeFace(float * vertex, float * varying,
|
||||
int start, int end,
|
||||
dispatch_queue_t gcdq);
|
||||
|
||||
void OsdGcdComputeQuadFace(float * vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *F_IT,
|
||||
int vertexOffset, int tableOffset,
|
||||
int start, int end,
|
||||
dispatch_queue_t gcdq);
|
||||
|
||||
void OsdGcdComputeTriQuadFace(float * vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *F_IT,
|
||||
int vertexOffset, int tableOffset,
|
||||
int start, int end,
|
||||
dispatch_queue_t gcdq);
|
||||
|
||||
void OsdGcdComputeEdge(float *vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
|
@ -139,6 +139,28 @@ OsdGLSLComputeController::ApplyCatmarkFaceVerticesKernel(
|
||||
batch.GetStart(), batch.GetEnd());
|
||||
}
|
||||
|
||||
void
|
||||
OsdGLSLComputeController::ApplyCatmarkQuadFaceVerticesKernel(
|
||||
FarKernelBatch const &batch, OsdGLSLComputeContext const *context) const {
|
||||
|
||||
assert(context);
|
||||
|
||||
_currentBindState.kernelBundle->ApplyCatmarkQuadFaceVerticesKernel(
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(),
|
||||
batch.GetStart(), batch.GetEnd());
|
||||
}
|
||||
|
||||
void
|
||||
OsdGLSLComputeController::ApplyCatmarkTriQuadFaceVerticesKernel(
|
||||
FarKernelBatch const &batch, OsdGLSLComputeContext const *context) const {
|
||||
|
||||
assert(context);
|
||||
|
||||
_currentBindState.kernelBundle->ApplyCatmarkTriQuadFaceVerticesKernel(
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(),
|
||||
batch.GetStart(), batch.GetEnd());
|
||||
}
|
||||
|
||||
void
|
||||
OsdGLSLComputeController::ApplyCatmarkEdgeVerticesKernel(
|
||||
FarKernelBatch const &batch, OsdGLSLComputeContext const *context) const {
|
||||
|
@ -129,6 +129,10 @@ protected:
|
||||
|
||||
void ApplyCatmarkFaceVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkQuadFaceVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkTriQuadFaceVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkVertexVerticesKernelB(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
@ -178,6 +178,63 @@ void catmarkComputeFace()
|
||||
writeVertex(vid, dst);
|
||||
}
|
||||
|
||||
// Quad face-vertices compute Kernel
|
||||
subroutine(computeKernelType)
|
||||
void catmarkComputeQuadFace()
|
||||
{
|
||||
int i = int(gl_GlobalInvocationID.x) + indexStart;
|
||||
if (i >= indexEnd) return;
|
||||
int vid = i + vertexOffset;
|
||||
int fidx0 = _F_IT[tableOffset + i * 4 + 0];
|
||||
int fidx1 = _F_IT[tableOffset + i * 4 + 1];
|
||||
int fidx2 = _F_IT[tableOffset + i * 4 + 2];
|
||||
int fidx3 = _F_IT[tableOffset + i * 4 + 3];
|
||||
|
||||
Vertex dst;
|
||||
clear(dst);
|
||||
addWithWeight(dst, readVertex(fidx0), 0.25);
|
||||
addWithWeight(dst, readVertex(fidx1), 0.25);
|
||||
addWithWeight(dst, readVertex(fidx2), 0.25);
|
||||
addWithWeight(dst, readVertex(fidx3), 0.25);
|
||||
addVaryingWithWeight(dst, readVertex(fidx0), 0.25);
|
||||
addVaryingWithWeight(dst, readVertex(fidx1), 0.25);
|
||||
addVaryingWithWeight(dst, readVertex(fidx2), 0.25);
|
||||
addVaryingWithWeight(dst, readVertex(fidx3), 0.25);
|
||||
|
||||
writeVertex(vid, dst);
|
||||
}
|
||||
|
||||
// Tri-quad face-vertices compute Kernel
|
||||
subroutine(computeKernelType)
|
||||
void catmarkComputeTriQuadFace()
|
||||
{
|
||||
int i = int(gl_GlobalInvocationID.x) + indexStart;
|
||||
if (i >= indexEnd) return;
|
||||
int vid = i + vertexOffset;
|
||||
int fidx0 = _F_IT[tableOffset + i * 4 + 0];
|
||||
int fidx1 = _F_IT[tableOffset + i * 4 + 1];
|
||||
int fidx2 = _F_IT[tableOffset + i * 4 + 2];
|
||||
int fidx3 = _F_IT[tableOffset + i * 4 + 3];
|
||||
|
||||
bool triangle = (fidx2 == fidx3);
|
||||
float weight = triangle ? 1.0f / 3.0f : 1.0f / 4.0f;
|
||||
|
||||
Vertex dst;
|
||||
clear(dst);
|
||||
addWithWeight(dst, readVertex(fidx0), weight);
|
||||
addWithWeight(dst, readVertex(fidx1), weight);
|
||||
addWithWeight(dst, readVertex(fidx2), weight);
|
||||
addVaryingWithWeight(dst, readVertex(fidx0), weight);
|
||||
addVaryingWithWeight(dst, readVertex(fidx1), weight);
|
||||
addVaryingWithWeight(dst, readVertex(fidx2), weight);
|
||||
if (!triangle) {
|
||||
addWithWeight(dst, readVertex(fidx3), weight);
|
||||
addVaryingWithWeight(dst, readVertex(fidx3), weight);
|
||||
}
|
||||
|
||||
writeVertex(vid, dst);
|
||||
}
|
||||
|
||||
// Edge-vertices compute Kernepl
|
||||
subroutine(computeKernelType)
|
||||
void catmarkComputeEdge()
|
||||
|
@ -117,6 +117,12 @@ OsdGLSLComputeKernelBundle::Compile(
|
||||
_subComputeFace = glGetSubroutineIndex(_program,
|
||||
GL_COMPUTE_SHADER,
|
||||
"catmarkComputeFace");
|
||||
_subComputeQuadFace = glGetSubroutineIndex(_program,
|
||||
GL_COMPUTE_SHADER,
|
||||
"catmarkComputeQuadFace");
|
||||
_subComputeTriQuadFace = glGetSubroutineIndex(_program,
|
||||
GL_COMPUTE_SHADER,
|
||||
"catmarkComputeTriQuadFace");
|
||||
_subComputeEdge = glGetSubroutineIndex(_program,
|
||||
GL_COMPUTE_SHADER,
|
||||
"catmarkComputeEdge");
|
||||
@ -229,6 +235,22 @@ OsdGLSLComputeKernelBundle::ApplyCatmarkFaceVerticesKernel(
|
||||
// glMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT);
|
||||
}
|
||||
|
||||
void
|
||||
OsdGLSLComputeKernelBundle::ApplyCatmarkQuadFaceVerticesKernel(
|
||||
int vertexOffset, int tableOffset, int start, int end) {
|
||||
|
||||
glUniformSubroutinesuiv(GL_COMPUTE_SHADER, 1, &_subComputeQuadFace);
|
||||
dispatchCompute(vertexOffset, tableOffset, start, end);
|
||||
}
|
||||
|
||||
void
|
||||
OsdGLSLComputeKernelBundle::ApplyCatmarkTriQuadFaceVerticesKernel(
|
||||
int vertexOffset, int tableOffset, int start, int end) {
|
||||
|
||||
glUniformSubroutinesuiv(GL_COMPUTE_SHADER, 1, &_subComputeTriQuadFace);
|
||||
dispatchCompute(vertexOffset, tableOffset, start, end);
|
||||
}
|
||||
|
||||
void
|
||||
OsdGLSLComputeKernelBundle::ApplyCatmarkEdgeVerticesKernel(
|
||||
int vertexOffset, int tableOffset, int start, int end) {
|
||||
|
@ -57,6 +57,12 @@ public:
|
||||
void ApplyCatmarkFaceVerticesKernel(
|
||||
int vertexOffset, int tableOffset, int start, int end);
|
||||
|
||||
void ApplyCatmarkQuadFaceVerticesKernel(
|
||||
int vertexOffset, int tableOffset, int start, int end);
|
||||
|
||||
void ApplyCatmarkTriQuadFaceVerticesKernel(
|
||||
int vertexOffset, int tableOffset, int start, int end);
|
||||
|
||||
void ApplyCatmarkEdgeVerticesKernel(
|
||||
int vertexOffset, int tableOffset, int start, int end);
|
||||
|
||||
@ -130,6 +136,10 @@ protected:
|
||||
|
||||
GLuint _subComputeFace; // general face-vertex kernel (all schemes)
|
||||
|
||||
GLuint _subComputeQuadFace; // quad face-vertex kernel (catmark)
|
||||
|
||||
GLuint _subComputeTriQuadFace; // tri-quad face-vertex kernel (catmark)
|
||||
|
||||
GLuint _subComputeEdge; // edge-vertex kernel (catmark + loop schemes)
|
||||
|
||||
GLuint _subComputeBilinearEdge; // edge-vertex kernel (bilinear scheme)
|
||||
|
@ -207,6 +207,29 @@ OsdGLSLTransformFeedbackComputeController::ApplyCatmarkFaceVerticesKernel(
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
}
|
||||
|
||||
void
|
||||
OsdGLSLTransformFeedbackComputeController::ApplyCatmarkQuadFaceVerticesKernel(
|
||||
FarKernelBatch const &batch, OsdGLSLTransformFeedbackComputeContext const *context) const {
|
||||
|
||||
assert(context);
|
||||
|
||||
_currentBindState.kernelBundle->ApplyCatmarkQuadFaceVerticesKernel(
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset,
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
}
|
||||
|
||||
void
|
||||
OsdGLSLTransformFeedbackComputeController::ApplyCatmarkTriQuadFaceVerticesKernel(
|
||||
FarKernelBatch const &batch, OsdGLSLTransformFeedbackComputeContext const *context) const {
|
||||
|
||||
assert(context);
|
||||
|
||||
_currentBindState.kernelBundle->ApplyCatmarkTriQuadFaceVerticesKernel(
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset,
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
|
@ -127,6 +127,10 @@ protected:
|
||||
|
||||
void ApplyCatmarkFaceVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkQuadFaceVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkTriQuadFaceVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkVertexVerticesKernelB(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
@ -186,6 +186,57 @@ void catmarkComputeFace()
|
||||
writeVertex(dst);
|
||||
}
|
||||
|
||||
// Quad face-vertices compute Kernel
|
||||
subroutine(computeKernelType)
|
||||
void catmarkComputeQuadFace()
|
||||
{
|
||||
int i = gl_VertexID + indexStart;
|
||||
int fidx0 = texelFetch(_F0_IT, tableOffset + 4 * i + 0).x;
|
||||
int fidx1 = texelFetch(_F0_IT, tableOffset + 4 * i + 1).x;
|
||||
int fidx2 = texelFetch(_F0_IT, tableOffset + 4 * i + 2).x;
|
||||
int fidx3 = texelFetch(_F0_IT, tableOffset + 4 * i + 3).x;
|
||||
|
||||
Vertex dst;
|
||||
clear(dst);
|
||||
addWithWeight(dst, readVertex(fidx0), 0.25);
|
||||
addWithWeight(dst, readVertex(fidx1), 0.25);
|
||||
addWithWeight(dst, readVertex(fidx2), 0.25);
|
||||
addWithWeight(dst, readVertex(fidx3), 0.25);
|
||||
addVaryingWithWeight(dst, readVertex(fidx0), 0.25);
|
||||
addVaryingWithWeight(dst, readVertex(fidx1), 0.25);
|
||||
addVaryingWithWeight(dst, readVertex(fidx2), 0.25);
|
||||
addVaryingWithWeight(dst, readVertex(fidx3), 0.25);
|
||||
writeVertex(dst);
|
||||
}
|
||||
|
||||
// Tri-quad face-vertices compute Kernel
|
||||
subroutine(computeKernelType)
|
||||
void catmarkComputeTriQuadFace()
|
||||
{
|
||||
int i = gl_VertexID + indexStart;
|
||||
int fidx0 = texelFetch(_F0_IT, tableOffset + 4 * i + 0).x;
|
||||
int fidx1 = texelFetch(_F0_IT, tableOffset + 4 * i + 1).x;
|
||||
int fidx2 = texelFetch(_F0_IT, tableOffset + 4 * i + 2).x;
|
||||
int fidx3 = texelFetch(_F0_IT, tableOffset + 4 * i + 3).x;
|
||||
bool triangle = (fidx2 == fidx3);
|
||||
float weight = triangle ? 1.0f / 3.0f : 1.0f / 4.0f;
|
||||
|
||||
Vertex dst;
|
||||
clear(dst);
|
||||
addWithWeight(dst, readVertex(fidx0), weight);
|
||||
addWithWeight(dst, readVertex(fidx1), weight);
|
||||
addWithWeight(dst, readVertex(fidx2), weight);
|
||||
addVaryingWithWeight(dst, readVertex(fidx0), weight);
|
||||
addVaryingWithWeight(dst, readVertex(fidx1), weight);
|
||||
addVaryingWithWeight(dst, readVertex(fidx2), weight);
|
||||
if (!triangle) {
|
||||
addWithWeight(dst, readVertex(fidx3), weight);
|
||||
addVaryingWithWeight(dst, readVertex(fidx3), weight);
|
||||
}
|
||||
|
||||
writeVertex(dst);
|
||||
}
|
||||
|
||||
// Edge-vertices compute Kernel
|
||||
subroutine(computeKernelType)
|
||||
void catmarkComputeEdge()
|
||||
|
@ -225,6 +225,8 @@ OsdGLSLTransformFeedbackKernelBundle::Compile(
|
||||
_uniformVaryingBuffer = glGetUniformLocation(_program, "varyingData");
|
||||
|
||||
_subComputeFace = glGetSubroutineIndex(_program, GL_VERTEX_SHADER, "catmarkComputeFace");
|
||||
_subComputeQuadFace = glGetSubroutineIndex(_program, GL_VERTEX_SHADER, "catmarkComputeQuadFace");
|
||||
_subComputeTriQuadFace = glGetSubroutineIndex(_program, GL_VERTEX_SHADER, "catmarkComputeTriQuadFace");
|
||||
_subComputeEdge = glGetSubroutineIndex(_program, GL_VERTEX_SHADER, "catmarkComputeEdge");
|
||||
_subComputeBilinearEdge = glGetSubroutineIndex(_program, GL_VERTEX_SHADER, "bilinearComputeEdge");
|
||||
_subComputeVertex = glGetSubroutineIndex(_program, GL_VERTEX_SHADER, "bilinearComputeVertex");
|
||||
@ -356,6 +358,30 @@ OsdGLSLTransformFeedbackKernelBundle::ApplyBilinearVertexVerticesKernel(
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
OsdGLSLTransformFeedbackKernelBundle::ApplyCatmarkQuadFaceVerticesKernel(
|
||||
GLuint vertexBuffer, GLuint varyingBuffer,
|
||||
int vertexOffset, int varyingOffset,
|
||||
int offset, int tableOffset, int start, int end) {
|
||||
|
||||
glUniformSubroutinesuiv(GL_VERTEX_SHADER, 1, &_subComputeQuadFace);
|
||||
transformGpuBufferData(vertexBuffer, varyingBuffer,
|
||||
vertexOffset, varyingOffset,
|
||||
offset, tableOffset, start, end);
|
||||
}
|
||||
|
||||
void
|
||||
OsdGLSLTransformFeedbackKernelBundle::ApplyCatmarkTriQuadFaceVerticesKernel(
|
||||
GLuint vertexBuffer, GLuint varyingBuffer,
|
||||
int vertexOffset, int varyingOffset,
|
||||
int offset, int tableOffset, int start, int end) {
|
||||
|
||||
glUniformSubroutinesuiv(GL_VERTEX_SHADER, 1, &_subComputeTriQuadFace);
|
||||
transformGpuBufferData(vertexBuffer, varyingBuffer,
|
||||
vertexOffset, varyingOffset,
|
||||
offset, tableOffset, start, end);
|
||||
}
|
||||
|
||||
void
|
||||
OsdGLSLTransformFeedbackKernelBundle::ApplyCatmarkFaceVerticesKernel(
|
||||
GLuint vertexBuffer, GLuint varyingBuffer,
|
||||
|
@ -68,6 +68,16 @@ public:
|
||||
int vertexOffset, int varyingOffset,
|
||||
int offset, int tableOffset, int start, int end);
|
||||
|
||||
void ApplyCatmarkQuadFaceVerticesKernel(
|
||||
GLuint vertexBuffer, GLuint varyingBuffer,
|
||||
int vertexOffset, int varyingOffset,
|
||||
int offset, int tableOffset, int start, int end);
|
||||
|
||||
void ApplyCatmarkTriQuadFaceVerticesKernel(
|
||||
GLuint vertexBuffer, GLuint varyingBuffer,
|
||||
int vertexOffset, int varyingOffset,
|
||||
int offset, int tableOffset, int start, int end);
|
||||
|
||||
void ApplyCatmarkEdgeVerticesKernel(
|
||||
GLuint vertexBuffer, GLuint varyingBuffer,
|
||||
int vertexOffset, int varyingOffset,
|
||||
@ -181,6 +191,10 @@ protected:
|
||||
|
||||
GLuint _subComputeFace; // general face-vertex kernel (all schemes)
|
||||
|
||||
GLuint _subComputeQuadFace; // quad face-vertex kernel (catmark)
|
||||
|
||||
GLuint _subComputeTriQuadFace; // tri-quad face-vertex kernel (catmark)
|
||||
|
||||
GLuint _subComputeEdge; // edge-vertex kernel (catmark + loop schemes)
|
||||
|
||||
GLuint _subComputeBilinearEdge; // edge-vertex kernel (bilinear scheme)
|
||||
|
@ -168,6 +168,70 @@ void runKernel( uint3 ID )
|
||||
}
|
||||
};
|
||||
|
||||
// Quad face-vertices compute Kernel
|
||||
class CatmarkComputeQuadFace : IComputeKernel {
|
||||
int placeholder;
|
||||
void runKernel( uint3 ID )
|
||||
{
|
||||
int i = int(ID.x) + indexStart;
|
||||
if (i >= indexEnd) return;
|
||||
int vid = i + vertexOffset;
|
||||
|
||||
int fidx0 = _F_IT[tableOffset + 4 * i + 0];
|
||||
int fidx1 = _F_IT[tableOffset + 4 * i + 1];
|
||||
int fidx2 = _F_IT[tableOffset + 4 * i + 2];
|
||||
int fidx3 = _F_IT[tableOffset + 4 * i + 3];
|
||||
|
||||
Vertex dst;
|
||||
clear(dst);
|
||||
|
||||
addWithWeight(dst, readVertex(fidx0), 0.25f);
|
||||
addWithWeight(dst, readVertex(fidx1), 0.25f);
|
||||
addWithWeight(dst, readVertex(fidx2), 0.25f);
|
||||
addWithWeight(dst, readVertex(fidx3), 0.25f);
|
||||
addVaryingWithWeight(dst, readVertex(fidx0), 0.25f);
|
||||
addVaryingWithWeight(dst, readVertex(fidx1), 0.25f);
|
||||
addVaryingWithWeight(dst, readVertex(fidx2), 0.25f);
|
||||
addVaryingWithWeight(dst, readVertex(fidx3), 0.25f);
|
||||
|
||||
writeVertex(vid, dst);
|
||||
}
|
||||
};
|
||||
|
||||
// Tri-quad face-vertices compute Kernel
|
||||
class CatmarkComputeTriQuadFace : IComputeKernel {
|
||||
int placeholder;
|
||||
void runKernel( uint3 ID )
|
||||
{
|
||||
int i = int(ID.x) + indexStart;
|
||||
if (i >= indexEnd) return;
|
||||
int vid = i + vertexOffset;
|
||||
|
||||
int fidx0 = _F_IT[tableOffset + 4 * i + 0];
|
||||
int fidx1 = _F_IT[tableOffset + 4 * i + 1];
|
||||
int fidx2 = _F_IT[tableOffset + 4 * i + 2];
|
||||
int fidx3 = _F_IT[tableOffset + 4 * i + 3];
|
||||
bool triangle = (fidx2 == fidx3);
|
||||
float weight = (triangle ? 1.0f / 3.0f : 1.0f / 4.0f);
|
||||
|
||||
Vertex dst;
|
||||
clear(dst);
|
||||
|
||||
addWithWeight(dst, readVertex(fidx0), weight);
|
||||
addWithWeight(dst, readVertex(fidx1), weight);
|
||||
addWithWeight(dst, readVertex(fidx2), weight);
|
||||
addVaryingWithWeight(dst, readVertex(fidx0), weight);
|
||||
addVaryingWithWeight(dst, readVertex(fidx1), weight);
|
||||
addVaryingWithWeight(dst, readVertex(fidx2), weight);
|
||||
if (!triangle) {
|
||||
addWithWeight(dst, readVertex(fidx3), weight);
|
||||
addVaryingWithWeight(dst, readVertex(fidx3), weight);
|
||||
}
|
||||
|
||||
writeVertex(vid, dst);
|
||||
}
|
||||
};
|
||||
|
||||
// Edge-vertices compute Kernel
|
||||
class CatmarkComputeEdge : IComputeKernel {
|
||||
int placeholder;
|
||||
@ -391,6 +455,8 @@ void runKernel( uint3 ID )
|
||||
};
|
||||
|
||||
CatmarkComputeFace catmarkComputeFace;
|
||||
CatmarkComputeQuadFace catmarkComputeQuadFace;
|
||||
CatmarkComputeTriQuadFace catmarkComputeTriQuadFace;
|
||||
CatmarkComputeEdge catmarkComputeEdge;
|
||||
BilinearComputeEdge bilinearComputeEdge;
|
||||
BilinearComputeVertex bilinearComputeVertex;
|
||||
|
@ -94,6 +94,32 @@ OsdOmpComputeController::ApplyCatmarkFaceVerticesKernel(
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
}
|
||||
|
||||
void
|
||||
OsdOmpComputeController::ApplyCatmarkQuadFaceVerticesKernel(
|
||||
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {
|
||||
|
||||
assert(context);
|
||||
|
||||
OsdOmpComputeQuadFace(
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
|
||||
(const int*)context->GetTable(FarSubdivisionTables::F_IT)->GetBuffer(),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
}
|
||||
|
||||
void
|
||||
OsdOmpComputeController::ApplyCatmarkTriQuadFaceVerticesKernel(
|
||||
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {
|
||||
|
||||
assert(context);
|
||||
|
||||
OsdOmpComputeTriQuadFace(
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
|
||||
(const int*)context->GetTable(FarSubdivisionTables::F_IT)->GetBuffer(),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
}
|
||||
|
||||
void
|
||||
OsdOmpComputeController::ApplyCatmarkEdgeVerticesKernel(
|
||||
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {
|
||||
|
@ -128,6 +128,10 @@ protected:
|
||||
|
||||
void ApplyCatmarkFaceVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkQuadFaceVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkTriQuadFaceVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkVertexVerticesKernelB(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
@ -103,6 +103,98 @@ void OsdOmpComputeFace(
|
||||
}
|
||||
}
|
||||
|
||||
void OsdOmpComputeQuadFace(
|
||||
float * vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *F_IT, int offset, int tableOffset, int start, int end) {
|
||||
|
||||
int numThreads = omp_get_max_threads();
|
||||
float *vertexResultsArray = (float*)alloca(vertexDesc.length * sizeof(float) * numThreads);
|
||||
float *varyingResultsArray = (float*)alloca(varyingDesc.length * sizeof(float) * numThreads);
|
||||
|
||||
#pragma omp parallel for
|
||||
for (int i = start ; i < end ; i++) {
|
||||
int fidx0 = F_IT[tableOffset + 4 * i + 0];
|
||||
int fidx1 = F_IT[tableOffset + 4 * i + 1];
|
||||
int fidx2 = F_IT[tableOffset + 4 * i + 2];
|
||||
int fidx3 = F_IT[tableOffset + 4 * i + 3];
|
||||
|
||||
int dstIndex = offset + i;
|
||||
|
||||
int threadId = omp_get_thread_num();
|
||||
float *vertexResults = vertexResultsArray +
|
||||
vertexDesc.length * threadId;
|
||||
float *varyingResults = varyingResultsArray +
|
||||
varyingDesc.length * threadId;
|
||||
|
||||
// clear
|
||||
clear(vertexResults, vertexDesc);
|
||||
clear(varyingResults, varyingDesc);
|
||||
|
||||
addWithWeight(vertexResults, vertex, fidx0, 0.25f, vertexDesc);
|
||||
addWithWeight(vertexResults, vertex, fidx1, 0.25f, vertexDesc);
|
||||
addWithWeight(vertexResults, vertex, fidx2, 0.25f, vertexDesc);
|
||||
addWithWeight(vertexResults, vertex, fidx3, 0.25f, vertexDesc);
|
||||
addWithWeight(varyingResults, varying, fidx0, 0.25f, varyingDesc);
|
||||
addWithWeight(varyingResults, varying, fidx1, 0.25f, varyingDesc);
|
||||
addWithWeight(varyingResults, varying, fidx2, 0.25f, varyingDesc);
|
||||
addWithWeight(varyingResults, varying, fidx3, 0.25f, varyingDesc);
|
||||
|
||||
// write results
|
||||
copy(vertex, vertexResults, dstIndex, vertexDesc);
|
||||
copy(varying, varyingResults, dstIndex, varyingDesc);
|
||||
}
|
||||
}
|
||||
|
||||
void OsdOmpComputeTriQuadFace(
|
||||
float * vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *F_IT, int offset, int tableOffset, int start, int end) {
|
||||
|
||||
int numThreads = omp_get_max_threads();
|
||||
float *vertexResultsArray = (float*)alloca(vertexDesc.length * sizeof(float) * numThreads);
|
||||
float *varyingResultsArray = (float*)alloca(varyingDesc.length * sizeof(float) * numThreads);
|
||||
|
||||
#pragma omp parallel for
|
||||
for (int i = start ; i < end ; i++) {
|
||||
int fidx0 = F_IT[tableOffset + 4 * i + 0];
|
||||
int fidx1 = F_IT[tableOffset + 4 * i + 1];
|
||||
int fidx2 = F_IT[tableOffset + 4 * i + 2];
|
||||
int fidx3 = F_IT[tableOffset + 4 * i + 3];
|
||||
bool triangle = (fidx2 == fidx3);
|
||||
float weight = (triangle ? 1.0f / 3.0f : 1.0f / 4.0f);
|
||||
|
||||
int dstIndex = offset + i;
|
||||
|
||||
int threadId = omp_get_thread_num();
|
||||
float *vertexResults = vertexResultsArray +
|
||||
vertexDesc.length * threadId;
|
||||
float *varyingResults = varyingResultsArray +
|
||||
varyingDesc.length * threadId;
|
||||
|
||||
// clear
|
||||
clear(vertexResults, vertexDesc);
|
||||
clear(varyingResults, varyingDesc);
|
||||
|
||||
addWithWeight(vertexResults, vertex, fidx0, weight, vertexDesc);
|
||||
addWithWeight(vertexResults, vertex, fidx1, weight, vertexDesc);
|
||||
addWithWeight(vertexResults, vertex, fidx2, weight, vertexDesc);
|
||||
addWithWeight(varyingResults, varying, fidx0, weight, varyingDesc);
|
||||
addWithWeight(varyingResults, varying, fidx1, weight, varyingDesc);
|
||||
addWithWeight(varyingResults, varying, fidx2, weight, varyingDesc);
|
||||
if (!triangle) {
|
||||
addWithWeight(vertexResults, vertex, fidx3, weight, vertexDesc);
|
||||
addWithWeight(varyingResults, varying, fidx3, weight, varyingDesc);
|
||||
}
|
||||
|
||||
// write results
|
||||
copy(vertex, vertexResults, dstIndex, vertexDesc);
|
||||
copy(varying, varyingResults, dstIndex, varyingDesc);
|
||||
}
|
||||
}
|
||||
|
||||
void OsdOmpComputeEdge(
|
||||
float * vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
|
@ -40,6 +40,20 @@ void OsdOmpComputeFace(float * vertex, float * varying,
|
||||
int vertexOffset, int tableOffset,
|
||||
int start, int end);
|
||||
|
||||
void OsdOmpComputeQuadFace(float * vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *F_IT,
|
||||
int vertexOffset, int tableOffset,
|
||||
int start, int end);
|
||||
|
||||
void OsdOmpComputeTriQuadFace(float * vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *F_IT,
|
||||
int vertexOffset, int tableOffset,
|
||||
int start, int end);
|
||||
|
||||
void OsdOmpComputeEdge(float *vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
|
@ -100,6 +100,32 @@ OsdTbbComputeController::ApplyCatmarkFaceVerticesKernel(
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
}
|
||||
|
||||
void
|
||||
OsdTbbComputeController::ApplyCatmarkQuadFaceVerticesKernel(
|
||||
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {
|
||||
|
||||
assert(context);
|
||||
|
||||
OsdTbbComputeQuadFace(
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
|
||||
(const int*)context->GetTable(FarSubdivisionTables::F_IT)->GetBuffer(),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
}
|
||||
|
||||
void
|
||||
OsdTbbComputeController::ApplyCatmarkTriQuadFaceVerticesKernel(
|
||||
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {
|
||||
|
||||
assert(context);
|
||||
|
||||
OsdTbbComputeTriQuadFace(
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
|
||||
(const int*)context->GetTable(FarSubdivisionTables::F_IT)->GetBuffer(),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
}
|
||||
|
||||
void
|
||||
OsdTbbComputeController::ApplyCatmarkEdgeVerticesKernel(
|
||||
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {
|
||||
|
@ -120,6 +120,10 @@ protected:
|
||||
|
||||
void ApplyCatmarkFaceVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkQuadFaceVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkTriQuadFaceVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkVertexVerticesKernelB(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
@ -142,6 +142,162 @@ void OsdTbbComputeFace(
|
||||
tbb::parallel_for(range, kernel);
|
||||
}
|
||||
|
||||
class TBBQuadFaceKernel {
|
||||
float *vertex;
|
||||
float *varying;
|
||||
OsdVertexBufferDescriptor vertexDesc;
|
||||
OsdVertexBufferDescriptor varyingDesc;
|
||||
int const *F_IT;
|
||||
int vertexOffset;
|
||||
int tableOffset;
|
||||
|
||||
public:
|
||||
void operator() (tbb::blocked_range<int> const &r) const {
|
||||
for (int i = r.begin(); i < r.end(); i++) {
|
||||
int fidx0 = F_IT[tableOffset + 4 * i + 0];
|
||||
int fidx1 = F_IT[tableOffset + 4 * i + 1];
|
||||
int fidx2 = F_IT[tableOffset + 4 * i + 2];
|
||||
int fidx3 = F_IT[tableOffset + 4 * i + 3];
|
||||
|
||||
// XXX: should use local vertex struct variable instead of
|
||||
// accumulating directly into global memory.
|
||||
int dstIndex = i + vertexOffset;
|
||||
|
||||
clear(vertex, dstIndex, vertexDesc);
|
||||
clear(varying, dstIndex, varyingDesc);
|
||||
|
||||
addWithWeight(vertex, dstIndex, fidx0, 0.25f, vertexDesc);
|
||||
addWithWeight(vertex, dstIndex, fidx1, 0.25f, vertexDesc);
|
||||
addWithWeight(vertex, dstIndex, fidx2, 0.25f, vertexDesc);
|
||||
addWithWeight(vertex, dstIndex, fidx3, 0.25f, vertexDesc);
|
||||
addWithWeight(varying, dstIndex, fidx0, 0.25f, varyingDesc);
|
||||
addWithWeight(varying, dstIndex, fidx1, 0.25f, varyingDesc);
|
||||
addWithWeight(varying, dstIndex, fidx2, 0.25f, varyingDesc);
|
||||
addWithWeight(varying, dstIndex, fidx3, 0.25f, varyingDesc);
|
||||
}
|
||||
}
|
||||
|
||||
TBBQuadFaceKernel(TBBQuadFaceKernel const &other)
|
||||
{
|
||||
this->vertex = other.vertex;
|
||||
this->varying= other.varying;
|
||||
this->vertexDesc = other.vertexDesc;
|
||||
this->varyingDesc = other.varyingDesc;
|
||||
this->F_IT = other.F_IT;
|
||||
this->vertexOffset = other.vertexOffset;
|
||||
this->tableOffset = other.tableOffset;
|
||||
}
|
||||
|
||||
TBBQuadFaceKernel(float *vertex_in,
|
||||
float *varying_in,
|
||||
OsdVertexBufferDescriptor const &vertexDesc_in,
|
||||
OsdVertexBufferDescriptor const &varyingDesc_in,
|
||||
int const *F_IT_in,
|
||||
int vertexOffset_in,
|
||||
int tableOffset_in) :
|
||||
vertex (vertex_in),
|
||||
varying(varying_in),
|
||||
vertexDesc(vertexDesc_in),
|
||||
varyingDesc(varyingDesc_in),
|
||||
F_IT (F_IT_in),
|
||||
vertexOffset(vertexOffset_in),
|
||||
tableOffset(tableOffset_in)
|
||||
{};
|
||||
};
|
||||
|
||||
void OsdTbbComputeQuadFace(
|
||||
float * vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
int const *F_IT, int vertexOffset, int tableOffset,
|
||||
int start, int end) {
|
||||
|
||||
TBBQuadFaceKernel kernel(vertex, varying, vertexDesc, varyingDesc, F_IT,
|
||||
vertexOffset, tableOffset);
|
||||
tbb::blocked_range<int> range(start, end, grain_size);
|
||||
tbb::parallel_for(range, kernel);
|
||||
}
|
||||
|
||||
class TBBTriQuadFaceKernel {
|
||||
float *vertex;
|
||||
float *varying;
|
||||
OsdVertexBufferDescriptor vertexDesc;
|
||||
OsdVertexBufferDescriptor varyingDesc;
|
||||
int const *F_IT;
|
||||
int vertexOffset;
|
||||
int tableOffset;
|
||||
|
||||
public:
|
||||
void operator() (tbb::blocked_range<int> const &r) const {
|
||||
for (int i = r.begin(); i < r.end(); i++) {
|
||||
int fidx0 = F_IT[tableOffset + 4 * i + 0];
|
||||
int fidx1 = F_IT[tableOffset + 4 * i + 1];
|
||||
int fidx2 = F_IT[tableOffset + 4 * i + 2];
|
||||
int fidx3 = F_IT[tableOffset + 4 * i + 3];
|
||||
bool triangle = (fidx2 == fidx3);
|
||||
float weight = (triangle ? 1.0f / 3.0f : 1.0f / 4.0f);
|
||||
|
||||
// XXX: should use local vertex struct variable instead of
|
||||
// accumulating directly into global memory.
|
||||
int dstIndex = i + vertexOffset;
|
||||
|
||||
clear(vertex, dstIndex, vertexDesc);
|
||||
clear(varying, dstIndex, varyingDesc);
|
||||
|
||||
addWithWeight(vertex, dstIndex, fidx0, weight, vertexDesc);
|
||||
addWithWeight(vertex, dstIndex, fidx1, weight, vertexDesc);
|
||||
addWithWeight(vertex, dstIndex, fidx2, weight, vertexDesc);
|
||||
addWithWeight(varying, dstIndex, fidx0, weight, varyingDesc);
|
||||
addWithWeight(varying, dstIndex, fidx1, weight, varyingDesc);
|
||||
addWithWeight(varying, dstIndex, fidx2, weight, varyingDesc);
|
||||
if (!triangle) {
|
||||
addWithWeight(vertex, dstIndex, fidx3, weight, vertexDesc);
|
||||
addWithWeight(varying, dstIndex, fidx3, weight, varyingDesc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TBBTriQuadFaceKernel(TBBTriQuadFaceKernel const &other)
|
||||
{
|
||||
this->vertex = other.vertex;
|
||||
this->varying= other.varying;
|
||||
this->vertexDesc = other.vertexDesc;
|
||||
this->varyingDesc = other.varyingDesc;
|
||||
this->F_IT = other.F_IT;
|
||||
this->vertexOffset = other.vertexOffset;
|
||||
this->tableOffset = other.tableOffset;
|
||||
}
|
||||
|
||||
TBBTriQuadFaceKernel(float *vertex_in,
|
||||
float *varying_in,
|
||||
OsdVertexBufferDescriptor const &vertexDesc_in,
|
||||
OsdVertexBufferDescriptor const &varyingDesc_in,
|
||||
int const *F_IT_in,
|
||||
int vertexOffset_in,
|
||||
int tableOffset_in) :
|
||||
vertex (vertex_in),
|
||||
varying(varying_in),
|
||||
vertexDesc(vertexDesc_in),
|
||||
varyingDesc(varyingDesc_in),
|
||||
F_IT (F_IT_in),
|
||||
vertexOffset(vertexOffset_in),
|
||||
tableOffset(tableOffset_in)
|
||||
{};
|
||||
};
|
||||
|
||||
void OsdTbbComputeTriQuadFace(
|
||||
float * vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
int const *F_IT, int vertexOffset, int tableOffset,
|
||||
int start, int end) {
|
||||
|
||||
TBBTriQuadFaceKernel kernel(vertex, varying, vertexDesc, varyingDesc, F_IT,
|
||||
vertexOffset, tableOffset);
|
||||
tbb::blocked_range<int> range(start, end, grain_size);
|
||||
tbb::parallel_for(range, kernel);
|
||||
}
|
||||
|
||||
class TBBEdgeKernel {
|
||||
float *vertex;
|
||||
float *varying;
|
||||
|
@ -39,6 +39,20 @@ void OsdTbbComputeFace(float * vertex, float * varying,
|
||||
int vertexOffset, int tableOffset,
|
||||
int start, int end);
|
||||
|
||||
void OsdTbbComputeQuadFace(float * vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
int const *F_IT,
|
||||
int vertexOffset, int tableOffset,
|
||||
int start, int end);
|
||||
|
||||
void OsdTbbComputeTriQuadFace(float * vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
int const *F_IT,
|
||||
int vertexOffset, int tableOffset,
|
||||
int start, int end);
|
||||
|
||||
void OsdTbbComputeEdge(float *vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
|
Loading…
Reference in New Issue
Block a user