Merge pull request #308 from nathan-at-digitalfish/new_face_vertex_kernels

New face vertex kernels
This commit is contained in:
Manuel Kraemer 2014-05-30 10:56:12 -07:00
commit 626921d82e
45 changed files with 1586 additions and 77 deletions

View File

@ -74,15 +74,26 @@ FarCatmarkSubdivisionTablesFactory<T,U>::Create( FarMeshFactory<T,U> * meshFacto
FarSubdivisionTables * result = new FarSubdivisionTables(maxlevel, FarSubdivisionTables::CATMARK);
// Calculate the size of the face-vertex index table
int minCoarseFaceValence = tablesFactory.GetMinCoarseFaceValence();
int maxCoarseFaceValence = tablesFactory.GetMaxCoarseFaceValence();
bool coarseMeshAllQuadFaces = minCoarseFaceValence == 4 && maxCoarseFaceValence == 4;
bool coarseMeshAllTriQuadFaces = minCoarseFaceValence >= 3 && maxCoarseFaceValence <= 4;
int F_IT_size = tablesFactory.GetFaceVertsValenceSum();
if (coarseMeshAllTriQuadFaces)
F_IT_size += tablesFactory.GetNumCoarseTriangleFaces(); // add padding for tri faces
// Allocate memory for the indexing tables
result->_F_ITa.resize(tablesFactory.GetNumFaceVerticesTotal(maxlevel)*2);
result->_F_IT.resize(tablesFactory.GetFaceVertsValenceSum());
if (!coarseMeshAllTriQuadFaces)
result->_F_ITa.resize(tablesFactory.GetNumFaceVerticesTotal(1) * 2);
result->_F_IT.resize(F_IT_size);
result->_E_IT.resize(tablesFactory.GetNumEdgeVerticesTotal(maxlevel)*4);
result->_E_W.resize(tablesFactory.GetNumEdgeVerticesTotal(maxlevel)*2);
result->_V_ITa.resize((tablesFactory.GetNumVertexVerticesTotal(maxlevel)
- tablesFactory.GetNumVertexVerticesTotal(0))*5); // subtract corase cage vertices
- tablesFactory.GetNumVertexVerticesTotal(0))*5); // subtract coarse cage vertices
result->_V_IT.resize(tablesFactory.GetVertVertsValenceSum()*2);
result->_V_W.resize(tablesFactory.GetNumVertexVerticesTotal(maxlevel)
- tablesFactory.GetNumVertexVerticesTotal(0));
@ -115,18 +126,44 @@ FarCatmarkSubdivisionTablesFactory<T,U>::Create( FarMeshFactory<T,U> * meshFacto
// "For each vertex, gather all the vertices from the parent face."
int nFaceVertices = (int)tablesFactory._faceVertsList[level].size();
// choose the kernel type that best fits the face topology
int kernelType;
if (level == 1) {
if (coarseMeshAllQuadFaces)
kernelType = FarKernelBatch::CATMARK_QUAD_FACE_VERTEX;
else if (coarseMeshAllTriQuadFaces)
kernelType = FarKernelBatch::CATMARK_TRI_QUAD_FACE_VERTEX;
else
kernelType = FarKernelBatch::CATMARK_FACE_VERTEX;
} else {
kernelType = FarKernelBatch::CATMARK_QUAD_FACE_VERTEX;
}
// add a batch for face vertices
if (nFaceVertices > 0) // in torus case, nfacevertices could be zero
batches->push_back(FarKernelBatch( FarKernelBatch::CATMARK_FACE_VERTEX,
level,
0,
0,
nFaceVertices,
faceTableOffset,
vertexOffset) );
if (nFaceVertices > 0) { // in torus case, nfacevertices could be zero
if (kernelType == FarKernelBatch::CATMARK_FACE_VERTEX) {
batches->push_back(FarKernelBatch( kernelType,
level,
0,
0,
nFaceVertices,
faceTableOffset,
vertexOffset) );
} else {
// quad and tri-quad kernels store the offset of the first vertex in the table offset
batches->push_back(FarKernelBatch( kernelType,
level,
0,
0,
nFaceVertices,
F_IT_offset,
vertexOffset) );
}
}
vertexOffset += nFaceVertices;
faceTableOffset += nFaceVertices;
if (kernelType == FarKernelBatch::CATMARK_FACE_VERTEX)
faceTableOffset += nFaceVertices;
for (int i=0; i < nFaceVertices; ++i) {
@ -138,13 +175,17 @@ FarCatmarkSubdivisionTablesFactory<T,U>::Create( FarMeshFactory<T,U> * meshFacto
int valence = f->GetNumVertices();
F_ITa[2*i+0] = F_IT_offset;
F_ITa[2*i+1] = valence;
if (kernelType == FarKernelBatch::CATMARK_FACE_VERTEX) {
*F_ITa++ = F_IT_offset;
*F_ITa++ = valence;
}
for (int j=0; j<valence; ++j)
F_IT[F_IT_offset++] = remap[f->GetVertex(j)->GetID()];
if (kernelType == FarKernelBatch::CATMARK_TRI_QUAD_FACE_VERTEX && valence == 3)
F_IT[F_IT_offset++] = remap[f->GetVertex(2)->GetID()]; // repeat last index
}
F_ITa += nFaceVertices * 2;
// Edge vertices

View File

@ -87,6 +87,12 @@ FarDispatcher::ApplyKernel(CONTROLLER *controller, CONTEXT *context, FarKernelBa
case FarKernelBatch::CATMARK_FACE_VERTEX:
controller->ApplyCatmarkFaceVerticesKernel(batch, context);
break;
case FarKernelBatch::CATMARK_QUAD_FACE_VERTEX:
controller->ApplyCatmarkQuadFaceVerticesKernel(batch, context);
break;
case FarKernelBatch::CATMARK_TRI_QUAD_FACE_VERTEX:
controller->ApplyCatmarkTriQuadFaceVerticesKernel(batch, context);
break;
case FarKernelBatch::CATMARK_EDGE_VERTEX:
controller->ApplyCatmarkEdgeVerticesKernel(batch, context);
break;
@ -170,6 +176,12 @@ public:
template <class CONTEXT>
void ApplyCatmarkFaceVerticesKernel(FarKernelBatch const &batch, CONTEXT *context) const;
template <class CONTEXT>
void ApplyCatmarkQuadFaceVerticesKernel(FarKernelBatch const &batch, CONTEXT *context) const;
template <class CONTEXT>
void ApplyCatmarkTriQuadFaceVerticesKernel(FarKernelBatch const &batch, CONTEXT *context) const;
template <class CONTEXT>
void ApplyCatmarkEdgeVerticesKernel(FarKernelBatch const &batch, CONTEXT *context) const;
@ -268,6 +280,36 @@ FarComputeController::ApplyCatmarkFaceVerticesKernel(FarKernelBatch const &batch
vsrc );
}
template <class CONTEXT> void
FarComputeController::ApplyCatmarkQuadFaceVerticesKernel(FarKernelBatch const &batch, CONTEXT *context) const {
typename CONTEXT::VertexType *vsrc = &context->GetVertices().at(0);
FarSubdivisionTables const * subdivision = context->GetSubdivisionTables();
assert(subdivision);
subdivision->computeCatmarkQuadFacePoints( batch.GetVertexOffset(),
batch.GetTableOffset(),
batch.GetStart(),
batch.GetEnd(),
vsrc );
}
template <class CONTEXT> void
FarComputeController::ApplyCatmarkTriQuadFaceVerticesKernel(FarKernelBatch const &batch, CONTEXT *context) const {
typename CONTEXT::VertexType *vsrc = &context->GetVertices().at(0);
FarSubdivisionTables const * subdivision = context->GetSubdivisionTables();
assert(subdivision);
subdivision->computeCatmarkTriQuadFacePoints( batch.GetVertexOffset(),
batch.GetTableOffset(),
batch.GetStart(),
batch.GetEnd(),
vsrc );
}
template <class CONTEXT> void
FarComputeController::ApplyCatmarkEdgeVerticesKernel(FarKernelBatch const &batch, CONTEXT *context) const {

View File

@ -66,6 +66,8 @@ public:
enum KernelType {
CATMARK_FACE_VERTEX = 1,
CATMARK_QUAD_FACE_VERTEX,
CATMARK_TRI_QUAD_FACE_VERTEX,
CATMARK_EDGE_VERTEX,
CATMARK_VERT_VERTEX_A1,
CATMARK_VERT_VERTEX_A2,

View File

@ -131,9 +131,9 @@ private:
end;
};
Range kernelB; // vertex batch reange (kernel B)
Range kernelA1; // vertex batch reange (kernel A pass 1)
Range kernelA2; // vertex batch reange (kernel A pass 2)
Range kernelB; // vertex batch range (kernel B)
Range kernelA1; // vertex batch range (kernel A pass 1)
Range kernelA2; // vertex batch range (kernel A pass 2)
};
inline void

View File

@ -161,21 +161,29 @@ public:
// Compute-kernel applied to vertices resulting from the refinement of a face.
template <class U>
void computeCatmarkFacePoints(int offset, int level, int start, int end, U * vsrc) const;
void computeCatmarkFacePoints(int vertexOffset, int tableOffset, int start, int end, U * vsrc) const;
// Compute-kernel applied to vertices resulting from the refinement of a quad face.
template <class U>
void computeCatmarkQuadFacePoints(int vertexOffset, int tableOffset, int start, int end, U * vsrc) const;
// Compute-kernel applied to vertices resulting from the refinement of a tri or quad face.
template <class U>
void computeCatmarkTriQuadFacePoints(int vertexOffset, int tableOffset, int start, int end, U * vsrc) const;
// Compute-kernel applied to vertices resulting from the refinement of an edge.
template <class U>
void computeCatmarkEdgePoints(int offset, int level, int start, int end, U * vsrc) const;
void computeCatmarkEdgePoints(int vertexOffset, int tableOffset, int start, int end, U * vsrc) const;
// Compute-kernel applied to vertices resulting from the refinement of a vertex
// Kernel "A" Handles the k_Smooth and k_Dart rules
// Kernel "A" Handles the k_Crease and k_Corner rules
template <class U>
void computeCatmarkVertexPointsA(int offset, bool pass, int level, int start, int end, U * vsrc) const;
void computeCatmarkVertexPointsA(int vertexOffset, bool pass, int tableOffset, int start, int end, U * vsrc) const;
// Compute-kernel applied to vertices resulting from the refinement of a vertex
// Kernel "B" Handles the k_Crease and k_Corner rules
// Kernel "B" Handles the k_Smooth and k_Dart rules
template <class U>
void computeCatmarkVertexPointsB(int offset, int level, int start, int end, U * vsrc) const;
void computeCatmarkVertexPointsB(int vertexOffset, int tableOffset, int start, int end, U * vsrc) const;
// -------------------------------------------------------------------------
// Loop scheme
@ -342,14 +350,15 @@ FarSubdivisionTables::computeBilinearVertexPoints( int offset, int tableOffset,
vdst->AddVaryingWithWeight( vsrc[p], 1.0f );
}
}
//
// Face-vertices compute Kernel - completely re-entrant
//
template <class U> void
FarSubdivisionTables::computeCatmarkFacePoints( int offset, int tableOffset, int start, int end, U * vsrc ) const {
FarSubdivisionTables::computeCatmarkFacePoints( int vertexOffset, int tableOffset, int start, int end, U * vsrc ) const {
U * vdst = vsrc + offset + start;
U * vdst = vsrc + vertexOffset + start;
for (int i=start+tableOffset; i<end+tableOffset; ++i, ++vdst ) {
@ -366,14 +375,73 @@ FarSubdivisionTables::computeCatmarkFacePoints( int offset, int tableOffset, int
}
}
//
// Quad face-vertices compute Kernel - completely re-entrant
//
template <class U> void
FarSubdivisionTables::computeCatmarkQuadFacePoints( int vertexOffset, int tableOffset, int start, int end, U * vsrc ) const {
U * vdst = vsrc + vertexOffset + start;
for (int i=start; i<end; ++i, ++vdst ) {
int fidx0 = _F_IT[tableOffset + 4 * i + 0];
int fidx1 = _F_IT[tableOffset + 4 * i + 1];
int fidx2 = _F_IT[tableOffset + 4 * i + 2];
int fidx3 = _F_IT[tableOffset + 4 * i + 3];
vdst->Clear();
vdst->AddWithWeight(vsrc[fidx0], 0.25f);
vdst->AddVaryingWithWeight(vsrc[fidx0], 0.25f);
vdst->AddWithWeight(vsrc[fidx1], 0.25f);
vdst->AddVaryingWithWeight(vsrc[fidx1], 0.25f);
vdst->AddWithWeight(vsrc[fidx2], 0.25f);
vdst->AddVaryingWithWeight(vsrc[fidx2], 0.25f);
vdst->AddWithWeight(vsrc[fidx3], 0.25f);
vdst->AddVaryingWithWeight(vsrc[fidx3], 0.25f);
}
}
//
// Tri/quad face-vertices compute Kernel - completely re-entrant
//
template <class U> void
FarSubdivisionTables::computeCatmarkTriQuadFacePoints( int vertexOffset, int tableOffset, int start, int end, U * vsrc ) const {
U * vdst = vsrc + vertexOffset + start;
for (int i=start; i<end; ++i, ++vdst ) {
int fidx0 = _F_IT[tableOffset + 4 * i + 0];
int fidx1 = _F_IT[tableOffset + 4 * i + 1];
int fidx2 = _F_IT[tableOffset + 4 * i + 2];
int fidx3 = _F_IT[tableOffset + 4 * i + 3];
bool triangle = (fidx3 == fidx2);
float weight = triangle ? 1.0f / 3.0f : 1.0f / 4.0f;
vdst->Clear();
vdst->AddWithWeight(vsrc[fidx0], weight);
vdst->AddVaryingWithWeight(vsrc[fidx0], weight);
vdst->AddWithWeight(vsrc[fidx1], weight);
vdst->AddVaryingWithWeight(vsrc[fidx1], weight);
vdst->AddWithWeight(vsrc[fidx2], weight);
vdst->AddVaryingWithWeight(vsrc[fidx2], weight);
if (!triangle) {
vdst->AddWithWeight(vsrc[fidx3], weight);
vdst->AddVaryingWithWeight(vsrc[fidx3], weight);
}
}
}
//
// Edge-vertices compute Kernel - completely re-entrant
//
template <class U> void
FarSubdivisionTables::computeCatmarkEdgePoints( int offset, int tableOffset, int start, int end, U * vsrc ) const {
FarSubdivisionTables::computeCatmarkEdgePoints( int vertexOffset, int tableOffset, int start, int end, U * vsrc ) const {
U * vdst = vsrc + offset + start;
U * vdst = vsrc + vertexOffset + start;
for (int i=start+tableOffset; i<end+tableOffset; ++i, ++vdst ) {
@ -409,16 +477,16 @@ FarSubdivisionTables::computeCatmarkEdgePoints( int offset, int tableOffset, int
// multi-pass kernel handling k_Crease and k_Corner rules
template <class U> void
FarSubdivisionTables::computeCatmarkVertexPointsA( int offset, bool pass, int tableOffset, int start, int end, U * vsrc ) const {
FarSubdivisionTables::computeCatmarkVertexPointsA( int vertexOffset, bool pass, int tableOffset, int start, int end, U * vsrc ) const {
U * vdst = vsrc + offset + start;
U * vdst = vsrc + vertexOffset + start;
for (int i=start+tableOffset; i<end+tableOffset; ++i, ++vdst ) {
if (not pass)
vdst->Clear();
int n=this->_V_ITa[5*i+1], // number of vertices in the _VO_IT array (valence)
int n=this->_V_ITa[5*i+1], // number of vertices in the _V_IT array (valence)
p=this->_V_ITa[5*i+2], // index of the parent vertex
eidx0=this->_V_ITa[5*i+3], // index of the first crease rule edge
eidx1=this->_V_ITa[5*i+4]; // index of the second crease rule edge
@ -448,16 +516,16 @@ FarSubdivisionTables::computeCatmarkVertexPointsA( int offset, bool pass, int ta
// multi-pass kernel handling k_Dart and k_Smooth rules
template <class U> void
FarSubdivisionTables::computeCatmarkVertexPointsB( int offset, int tableOffset, int start, int end, U * vsrc ) const {
FarSubdivisionTables::computeCatmarkVertexPointsB( int vertexOffset, int tableOffset, int start, int end, U * vsrc ) const {
U * vdst = vsrc + offset + start;
U * vdst = vsrc + vertexOffset + start;
for (int i=start+tableOffset; i<end+tableOffset; ++i, ++vdst ) {
vdst->Clear();
int h = this->_V_ITa[5*i ], // offset of the vertices in the _V0_IT array
n = this->_V_ITa[5*i+1], // number of vertices in the _VO_IT array (valence)
int h = this->_V_ITa[5*i ], // offset of the vertices in the _V_IT array
n = this->_V_ITa[5*i+1], // number of vertices in the _V_IT array (valence)
p = this->_V_ITa[5*i+2]; // index of the parent vertex
float weight = this->_V_W[i],
@ -479,9 +547,9 @@ FarSubdivisionTables::computeCatmarkVertexPointsB( int offset, int tableOffset,
//
template <class U> void
FarSubdivisionTables::computeLoopEdgePoints( int offset, int tableOffset, int start, int end, U * vsrc ) const {
FarSubdivisionTables::computeLoopEdgePoints( int vertexOffset, int tableOffset, int start, int end, U * vsrc ) const {
U * vdst = vsrc + offset + start;
U * vdst = vsrc + vertexOffset + start;
for (int i=start+tableOffset; i<end+tableOffset; ++i, ++vdst ) {
@ -517,16 +585,16 @@ FarSubdivisionTables::computeLoopEdgePoints( int offset, int tableOffset, int st
// multi-pass kernel handling k_Crease and k_Corner rules
template <class U> void
FarSubdivisionTables::computeLoopVertexPointsA( int offset, bool pass, int tableOffset, int start, int end, U * vsrc ) const {
FarSubdivisionTables::computeLoopVertexPointsA( int vertexOffset, bool pass, int tableOffset, int start, int end, U * vsrc ) const {
U * vdst = vsrc + offset + start;
U * vdst = vsrc + vertexOffset + start;
for (int i=start+tableOffset; i<end+tableOffset; ++i, ++vdst ) {
if (not pass)
vdst->Clear();
int n=this->_V_ITa[5*i+1], // number of vertices in the _VO_IT array (valence)
int n=this->_V_ITa[5*i+1], // number of vertices in the _V_IT array (valence)
p=this->_V_ITa[5*i+2], // index of the parent vertex
eidx0=this->_V_ITa[5*i+3], // index of the first crease rule edge
eidx1=this->_V_ITa[5*i+4]; // index of the second crease rule edge
@ -556,16 +624,16 @@ FarSubdivisionTables::computeLoopVertexPointsA( int offset, bool pass, int table
// multi-pass kernel handling k_Dart and k_Smooth rules
template <class U> void
FarSubdivisionTables::computeLoopVertexPointsB( int offset, int tableOffset, int start, int end, U *vsrc ) const {
FarSubdivisionTables::computeLoopVertexPointsB( int vertexOffset, int tableOffset, int start, int end, U *vsrc ) const {
U * vdst = vsrc + offset + start;
U * vdst = vsrc + vertexOffset + start;
for (int i=start+tableOffset; i<end+tableOffset; ++i, ++vdst ) {
vdst->Clear();
int h = this->_V_ITa[5*i ], // offset of the vertices in the _V0_IT array
n = this->_V_ITa[5*i+1], // number of vertices in the _VO_IT array (valence)
int h = this->_V_ITa[5*i ], // offset of the vertices in the _V_IT array
n = this->_V_ITa[5*i+1], // number of vertices in the _V_IT array (valence)
p = this->_V_ITa[5*i+2]; // index of the parent vertex
float weight = this->_V_W[i],
@ -583,12 +651,6 @@ FarSubdivisionTables::computeLoopVertexPointsB( int offset, int tableOffset, int
}
}
} // end namespace OPENSUBDIV_VERSION
using namespace OPENSUBDIV_VERSION;

View File

@ -94,6 +94,15 @@ protected:
// Valence summation for face vertices
int GetVertVertsValenceSum() const { return _vertVertsValenceSum; }
// Minimum valence for coarse faces
int GetMinCoarseFaceValence() const { return _minCoarseFaceValence; }
// Maximum valence for coarse faces
int GetMaxCoarseFaceValence() const { return _maxCoarseFaceValence; }
// Number of coarse triangle faces
int GetNumCoarseTriangleFaces() const { return _numCoarseTriangleFaces; }
// Returns an integer based on the order in which the kernels are applied
static int GetMaskRanking( unsigned char mask0, unsigned char mask1 );
@ -102,7 +111,7 @@ protected:
_edgeVertIdx,
_vertVertIdx;
// Mumber of indices required for the face-vert and vertex-vert
// Number of indices required for the face-vert and vertex-vert
// iteration tables at each level
int _faceVertsValenceSum,
_vertVertsValenceSum;
@ -111,6 +120,14 @@ protected:
std::vector<std::vector< HbrVertex<T> *> > _faceVertsList,
_edgeVertsList,
_vertVertsList;
// Minimum and maximum valence for coarse faces
int _minCoarseFaceValence,
_maxCoarseFaceValence;
// Number of coarse triangle faces
int _numCoarseTriangleFaces;
private:
// Returns the subdivision level of a vertex
@ -135,7 +152,10 @@ FarSubdivisionTablesFactory<T,U>::FarSubdivisionTablesFactory( HbrMesh<T> const
_vertVertsValenceSum(0),
_faceVertsList(maxlevel+1),
_edgeVertsList(maxlevel+1),
_vertVertsList(maxlevel+1)
_vertVertsList(maxlevel+1),
_minCoarseFaceValence(0),
_maxCoarseFaceValence(0),
_numCoarseTriangleFaces(0)
{
assert( mesh );
@ -170,7 +190,15 @@ FarSubdivisionTablesFactory<T,U>::FarSubdivisionTablesFactory( HbrMesh<T> const
if (v->GetParentFace()) {
faceCounts[depth]++;
_faceVertsValenceSum += v->GetParentFace()->GetNumVertices();
int valence = v->GetParentFace()->GetNumVertices();
_faceVertsValenceSum += valence;
if (depth == 1) {
_minCoarseFaceValence = (_minCoarseFaceValence == 0 ? valence : std::min(_minCoarseFaceValence, valence));
_maxCoarseFaceValence = (_maxCoarseFaceValence == 0 ? valence : std::max(_maxCoarseFaceValence, valence));
if (valence == 3)
++_numCoarseTriangleFaces;
}
} else if (v->GetParentEdge())
edgeCounts[depth]++;
else if (v->GetParentVertex()) {

View File

@ -180,6 +180,62 @@ OsdCLComputeController::ApplyCatmarkFaceVerticesKernel(
CL_CHECK_ERROR(ciErrNum, "face kernel %d\n", ciErrNum);
}
void
OsdCLComputeController::ApplyCatmarkQuadFaceVerticesKernel(
FarKernelBatch const &batch, OsdCLComputeContext const *context) const {
assert(context);
cl_int ciErrNum;
size_t globalWorkSize[1] = { (size_t)(batch.GetEnd() - batch.GetStart()) };
cl_kernel kernel = _currentBindState.kernelBundle->GetCatmarkQuadFaceKernel();
cl_mem F_IT = context->GetTable(FarSubdivisionTables::F_IT)->GetDevicePtr();
clSetKernelArg(kernel, 0, sizeof(cl_mem), &_currentBindState.vertexBuffer);
clSetKernelArg(kernel, 1, sizeof(cl_mem), &_currentBindState.varyingBuffer);
clSetKernelArg(kernel, 2, sizeof(cl_mem), &F_IT);
clSetKernelArg(kernel, 3, sizeof(int), &_currentBindState.vertexDesc.offset);
clSetKernelArg(kernel, 4, sizeof(int), &_currentBindState.varyingDesc.offset);
clSetKernelArg(kernel, 5, sizeof(int), batch.GetVertexOffsetPtr());
clSetKernelArg(kernel, 6, sizeof(int), batch.GetTableOffsetPtr());
clSetKernelArg(kernel, 7, sizeof(int), batch.GetStartPtr());
clSetKernelArg(kernel, 8, sizeof(int), batch.GetEndPtr());
ciErrNum = clEnqueueNDRangeKernel(_clQueue,
kernel, 1, NULL, globalWorkSize,
NULL, 0, NULL, NULL);
CL_CHECK_ERROR(ciErrNum, "quad face kernel %d\n", ciErrNum);
}
void
OsdCLComputeController::ApplyCatmarkTriQuadFaceVerticesKernel(
FarKernelBatch const &batch, OsdCLComputeContext const *context) const {
assert(context);
cl_int ciErrNum;
size_t globalWorkSize[1] = { (size_t)(batch.GetEnd() - batch.GetStart()) };
cl_kernel kernel = _currentBindState.kernelBundle->GetCatmarkTriQuadFaceKernel();
cl_mem F_IT = context->GetTable(FarSubdivisionTables::F_IT)->GetDevicePtr();
clSetKernelArg(kernel, 0, sizeof(cl_mem), &_currentBindState.vertexBuffer);
clSetKernelArg(kernel, 1, sizeof(cl_mem), &_currentBindState.varyingBuffer);
clSetKernelArg(kernel, 2, sizeof(cl_mem), &F_IT);
clSetKernelArg(kernel, 3, sizeof(int), &_currentBindState.vertexDesc.offset);
clSetKernelArg(kernel, 4, sizeof(int), &_currentBindState.varyingDesc.offset);
clSetKernelArg(kernel, 5, sizeof(int), batch.GetVertexOffsetPtr());
clSetKernelArg(kernel, 6, sizeof(int), batch.GetTableOffsetPtr());
clSetKernelArg(kernel, 7, sizeof(int), batch.GetStartPtr());
clSetKernelArg(kernel, 8, sizeof(int), batch.GetEndPtr());
ciErrNum = clEnqueueNDRangeKernel(_clQueue,
kernel, 1, NULL, globalWorkSize,
NULL, 0, NULL, NULL);
CL_CHECK_ERROR(ciErrNum, "tri quad face kernel %d\n", ciErrNum);
}
void
OsdCLComputeController::ApplyCatmarkEdgeVerticesKernel(
FarKernelBatch const &batch, OsdCLComputeContext const *context) const {
@ -239,7 +295,7 @@ OsdCLComputeController::ApplyCatmarkVertexVerticesKernelB(
ciErrNum = clEnqueueNDRangeKernel(_clQueue,
kernel, 1, NULL, globalWorkSize,
NULL, 0, NULL, NULL);
CL_CHECK_ERROR(ciErrNum, "vertex kernel 1 %d\n", ciErrNum);
CL_CHECK_ERROR(ciErrNum, "vertex kernel B %d\n", ciErrNum);
}
void
@ -271,7 +327,7 @@ OsdCLComputeController::ApplyCatmarkVertexVerticesKernelA1(
ciErrNum = clEnqueueNDRangeKernel(_clQueue,
kernel, 1, NULL, globalWorkSize,
NULL, 0, NULL, NULL);
CL_CHECK_ERROR(ciErrNum, "vertex kernel 2 %d\n", ciErrNum);
CL_CHECK_ERROR(ciErrNum, "vertex kernel A1 %d\n", ciErrNum);
}
void
@ -303,7 +359,7 @@ OsdCLComputeController::ApplyCatmarkVertexVerticesKernelA2(
ciErrNum = clEnqueueNDRangeKernel(_clQueue,
kernel, 1, NULL, globalWorkSize,
NULL, 0, NULL, NULL);
CL_CHECK_ERROR(ciErrNum, "vertex kernel 2 %d\n", ciErrNum);
CL_CHECK_ERROR(ciErrNum, "vertex kernel A2 %d\n", ciErrNum);
}
void

View File

@ -137,6 +137,10 @@ protected:
void ApplyCatmarkFaceVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkQuadFaceVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkTriQuadFaceVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkVertexVerticesKernelB(FarKernelBatch const &batch, ComputeContext const *context) const;

View File

@ -182,6 +182,86 @@ __kernel void computeFace(__global float *vertex,
if (varying) writeVarying(varying, vid, &dstVarying);
}
__kernel void computeQuadFace(__global float *vertex,
__global float *varying,
__global int *F_IT,
int vertexOffset, int varyingOffset,
int offset, int tableOffset,
int start, int end) {
int i = start + get_global_id(0);
int vid = start + get_global_id(0) + offset;
vertex += vertexOffset;
varying += (varying ? varyingOffset :0);
struct Vertex dst;
struct Varying dstVarying;
clearVertex(&dst);
clearVarying(&dstVarying);
int fidx0 = F_IT[tableOffset + 4 * i + 0];
int fidx1 = F_IT[tableOffset + 4 * i + 1];
int fidx2 = F_IT[tableOffset + 4 * i + 2];
int fidx3 = F_IT[tableOffset + 4 * i + 3];
addWithWeight(&dst, vertex, fidx0, 0.25f);
addWithWeight(&dst, vertex, fidx1, 0.25f);
addWithWeight(&dst, vertex, fidx2, 0.25f);
addWithWeight(&dst, vertex, fidx3, 0.25f);
if (varying) {
addVaryingWithWeight(&dstVarying, varying, fidx0, 0.25f);
addVaryingWithWeight(&dstVarying, varying, fidx1, 0.25f);
addVaryingWithWeight(&dstVarying, varying, fidx2, 0.25f);
addVaryingWithWeight(&dstVarying, varying, fidx3, 0.25f);
}
writeVertex(vertex, vid, &dst);
if (varying) writeVarying(varying, vid, &dstVarying);
}
__kernel void computeTriQuadFace(__global float *vertex,
__global float *varying,
__global int *F_IT,
int vertexOffset, int varyingOffset,
int offset, int tableOffset,
int start, int end) {
int i = start + get_global_id(0);
int vid = start + get_global_id(0) + offset;
vertex += vertexOffset;
varying += (varying ? varyingOffset :0);
struct Vertex dst;
struct Varying dstVarying;
clearVertex(&dst);
clearVarying(&dstVarying);
int fidx0 = F_IT[tableOffset + 4 * i + 0];
int fidx1 = F_IT[tableOffset + 4 * i + 1];
int fidx2 = F_IT[tableOffset + 4 * i + 2];
int fidx3 = F_IT[tableOffset + 4 * i + 3];
bool triangle = (fidx2 == fidx3);
float weight = triangle ? 1.0f / 3.0f : 1.0f / 4.0f;
addWithWeight(&dst, vertex, fidx0, weight);
addWithWeight(&dst, vertex, fidx1, weight);
addWithWeight(&dst, vertex, fidx2, weight);
if (!triangle)
addWithWeight(&dst, vertex, fidx3, weight);
if (varying) {
addVaryingWithWeight(&dstVarying, varying, fidx0, weight);
addVaryingWithWeight(&dstVarying, varying, fidx1, weight);
addVaryingWithWeight(&dstVarying, varying, fidx2, weight);
if (!triangle)
addVaryingWithWeight(&dstVarying, varying, fidx3, weight);
}
writeVertex(vertex, vid, &dst);
if (varying) writeVarying(varying, vid, &dstVarying);
}
__kernel void computeEdge(__global float *vertex,
__global float *varying,
__global int *E_IT,

View File

@ -51,6 +51,8 @@ OsdCLKernelBundle::OsdCLKernelBundle() :
_clBilinearEdge(NULL),
_clBilinearVertex(NULL),
_clCatmarkFace(NULL),
_clCatmarkQuadFace(NULL),
_clCatmarkTriQuadFace(NULL),
_clCatmarkEdge(NULL),
_clCatmarkVertexA(NULL),
_clCatmarkVertexB(NULL),
@ -72,6 +74,10 @@ OsdCLKernelBundle::~OsdCLKernelBundle() {
if (_clCatmarkFace)
clReleaseKernel(_clCatmarkFace);
if (_clCatmarkQuadFace)
clReleaseKernel(_clCatmarkQuadFace);
if (_clCatmarkTriQuadFace)
clReleaseKernel(_clCatmarkTriQuadFace);
if (_clCatmarkEdge)
clReleaseKernel(_clCatmarkEdge);
if (_clCatmarkVertexA)
@ -145,16 +151,18 @@ OsdCLKernelBundle::Compile(cl_context clContext,
return false;
}
_clBilinearEdge = buildKernel(_clProgram, "computeBilinearEdge");
_clBilinearVertex = buildKernel(_clProgram, "computeBilinearVertex");
_clCatmarkFace = buildKernel(_clProgram, "computeFace");
_clCatmarkEdge = buildKernel(_clProgram, "computeEdge");
_clCatmarkVertexA = buildKernel(_clProgram, "computeVertexA");
_clCatmarkVertexB = buildKernel(_clProgram, "computeVertexB");
_clLoopEdge = buildKernel(_clProgram, "computeEdge");
_clLoopVertexA = buildKernel(_clProgram, "computeVertexA");
_clLoopVertexB = buildKernel(_clProgram, "computeLoopVertexB");
_clVertexEditAdd = buildKernel(_clProgram, "editVertexAdd");
_clBilinearEdge = buildKernel(_clProgram, "computeBilinearEdge");
_clBilinearVertex = buildKernel(_clProgram, "computeBilinearVertex");
_clCatmarkFace = buildKernel(_clProgram, "computeFace");
_clCatmarkQuadFace = buildKernel(_clProgram, "computeQuadFace");
_clCatmarkTriQuadFace = buildKernel(_clProgram, "computeTriQuadFace");
_clCatmarkEdge = buildKernel(_clProgram, "computeEdge");
_clCatmarkVertexA = buildKernel(_clProgram, "computeVertexA");
_clCatmarkVertexB = buildKernel(_clProgram, "computeVertexB");
_clLoopEdge = buildKernel(_clProgram, "computeEdge");
_clLoopVertexA = buildKernel(_clProgram, "computeVertexA");
_clLoopVertexB = buildKernel(_clProgram, "computeLoopVertexB");
_clVertexEditAdd = buildKernel(_clProgram, "editVertexAdd");
return true;
}

View File

@ -44,25 +44,29 @@ public:
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc);
cl_kernel GetBilinearEdgeKernel() const { return _clBilinearEdge; }
cl_kernel GetBilinearEdgeKernel() const { return _clBilinearEdge; }
cl_kernel GetBilinearVertexKernel() const { return _clBilinearVertex; }
cl_kernel GetBilinearVertexKernel() const { return _clBilinearVertex; }
cl_kernel GetCatmarkFaceKernel() const { return _clCatmarkFace; }
cl_kernel GetCatmarkFaceKernel() const { return _clCatmarkFace; }
cl_kernel GetCatmarkEdgeKernel() const { return _clCatmarkEdge; }
cl_kernel GetCatmarkQuadFaceKernel() const { return _clCatmarkQuadFace; }
cl_kernel GetCatmarkVertexKernelA() const { return _clCatmarkVertexA; }
cl_kernel GetCatmarkTriQuadFaceKernel() const { return _clCatmarkQuadFace; }
cl_kernel GetCatmarkVertexKernelB() const { return _clCatmarkVertexB; }
cl_kernel GetCatmarkEdgeKernel() const { return _clCatmarkEdge; }
cl_kernel GetLoopEdgeKernel() const { return _clLoopEdge; }
cl_kernel GetCatmarkVertexKernelA() const { return _clCatmarkVertexA; }
cl_kernel GetLoopVertexKernelA() const { return _clLoopVertexA; }
cl_kernel GetCatmarkVertexKernelB() const { return _clCatmarkVertexB; }
cl_kernel GetLoopVertexKernelB() const { return _clLoopVertexB; }
cl_kernel GetLoopEdgeKernel() const { return _clLoopEdge; }
cl_kernel GetVertexEditAdd() const { return _clVertexEditAdd; }
cl_kernel GetLoopVertexKernelA() const { return _clLoopVertexA; }
cl_kernel GetLoopVertexKernelB() const { return _clLoopVertexB; }
cl_kernel GetVertexEditAdd() const { return _clVertexEditAdd; }
struct Match {
/// Constructor
@ -92,6 +96,8 @@ protected:
cl_kernel _clBilinearEdge,
_clBilinearVertex,
_clCatmarkFace,
_clCatmarkQuadFace,
_clCatmarkTriQuadFace,
_clCatmarkEdge,
_clCatmarkVertexA,
_clCatmarkVertexB,

View File

@ -90,6 +90,32 @@ OsdCpuComputeController::ApplyCatmarkFaceVerticesKernel(
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
}
void
OsdCpuComputeController::ApplyCatmarkQuadFaceVerticesKernel(
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {
assert(context);
OsdCpuComputeQuadFace(
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
(const int*)context->GetTable(FarSubdivisionTables::F_IT)->GetBuffer(),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
}
void
OsdCpuComputeController::ApplyCatmarkTriQuadFaceVerticesKernel(
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {
assert(context);
OsdCpuComputeTriQuadFace(
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
(const int*)context->GetTable(FarSubdivisionTables::F_IT)->GetBuffer(),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
}
void
OsdCpuComputeController::ApplyCatmarkEdgeVerticesKernel(
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {

View File

@ -120,6 +120,10 @@ protected:
void ApplyCatmarkFaceVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkQuadFaceVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkTriQuadFaceVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkVertexVerticesKernelB(FarKernelBatch const &batch, ComputeContext const *context) const;

View File

@ -102,6 +102,86 @@ void OsdCpuComputeFace(
}
}
void OsdCpuComputeQuadFace(
float * vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *F_IT, int vertexOffset, int tableOffset,
int start, int end) {
float *vertexResults = (float*)alloca(vertexDesc.length * sizeof(float));
float *varyingResults = (float*)alloca(varyingDesc.length * sizeof(float));
for (int i = start; i < end; i++) {
int fidx0 = F_IT[tableOffset + 4 * i + 0];
int fidx1 = F_IT[tableOffset + 4 * i + 1];
int fidx2 = F_IT[tableOffset + 4 * i + 2];
int fidx3 = F_IT[tableOffset + 4 * i + 3];
int dstIndex = i + vertexOffset;
// clear
clear(vertexResults, vertexDesc);
clear(varyingResults, varyingDesc);
// accum
addWithWeight(vertexResults, vertex, fidx0, 0.25f, vertexDesc);
addWithWeight(vertexResults, vertex, fidx1, 0.25f, vertexDesc);
addWithWeight(vertexResults, vertex, fidx2, 0.25f, vertexDesc);
addWithWeight(vertexResults, vertex, fidx3, 0.25f, vertexDesc);
addWithWeight(varyingResults, varying, fidx0, 0.25f, varyingDesc);
addWithWeight(varyingResults, varying, fidx1, 0.25f, varyingDesc);
addWithWeight(varyingResults, varying, fidx2, 0.25f, varyingDesc);
addWithWeight(varyingResults, varying, fidx3, 0.25f, varyingDesc);
// write results
copy(vertex, vertexResults, dstIndex, vertexDesc);
copy(varying, varyingResults, dstIndex, varyingDesc);
}
}
void OsdCpuComputeTriQuadFace(
float * vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *F_IT, int vertexOffset, int tableOffset,
int start, int end) {
float *vertexResults = (float*)alloca(vertexDesc.length * sizeof(float));
float *varyingResults = (float*)alloca(varyingDesc.length * sizeof(float));
for (int i = start; i < end; i++) {
int fidx0 = F_IT[tableOffset + 4 * i + 0];
int fidx1 = F_IT[tableOffset + 4 * i + 1];
int fidx2 = F_IT[tableOffset + 4 * i + 2];
int fidx3 = F_IT[tableOffset + 4 * i + 3];
bool triangle = (fidx2 == fidx3);
float weight = (triangle ? 1.0f / 3.0f : 1.0f / 4.0f);
int dstIndex = i + vertexOffset;
// clear
clear(vertexResults, vertexDesc);
clear(varyingResults, varyingDesc);
// accum
addWithWeight(vertexResults, vertex, fidx0, weight, vertexDesc);
addWithWeight(vertexResults, vertex, fidx1, weight, vertexDesc);
addWithWeight(vertexResults, vertex, fidx2, weight, vertexDesc);
addWithWeight(varyingResults, varying, fidx0, weight, varyingDesc);
addWithWeight(varyingResults, varying, fidx1, weight, varyingDesc);
addWithWeight(varyingResults, varying, fidx2, weight, varyingDesc);
if (!triangle) {
addWithWeight(vertexResults, vertex, fidx3, weight, vertexDesc);
addWithWeight(varyingResults, varying, fidx3, weight, varyingDesc);
}
// write results
copy(vertex, vertexResults, dstIndex, vertexDesc);
copy(varying, varyingResults, dstIndex, varyingDesc);
}
}
void OsdCpuComputeEdge(
float *vertex, float *varying,
OsdVertexBufferDescriptor const &vertexDesc,

View File

@ -95,6 +95,20 @@ void OsdCpuComputeFace(float * vertex, float * varying,
int vertexOffset, int tableOffset,
int start, int end);
void OsdCpuComputeQuadFace(float * vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *F_IT,
int vertexOffset, int tableOffset,
int start, int end);
void OsdCpuComputeTriQuadFace(float * vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *F_IT,
int vertexOffset, int tableOffset,
int start, int end);
template<int numVertexElements>
void ComputeEdgeKernel( float *vertex,
const int *E_IT,
@ -151,7 +165,7 @@ void ComputeEdgeKernel( float *vertex,
void OsdCpuComputeEdge(float *vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *E_IT, const float *E_ITa,
const int *E_IT, const float *E_W,
int vertexOffset, int tableOffset,
int start, int end);

View File

@ -35,6 +35,16 @@ void OsdCudaComputeFace(float *vertex, float *varying,
int varyingLength, int varyingStride,
int *F_IT, int *F_ITa, int offset, int tableOffset, int start, int end);
void OsdCudaComputeQuadFace(float *vertex, float *varying,
int vertexLength, int vertexStride,
int varyingLength, int varyingStride,
int *F_IT, int offset, int tableOffset, int start, int end);
void OsdCudaComputeTriQuadFace(float *vertex, float *varying,
int vertexLength, int vertexStride,
int varyingLength, int varyingStride,
int *F_IT, int offset, int tableOffset, int start, int end);
void OsdCudaComputeEdge(float *vertex, float *varying,
int vertexLength, int vertexStride,
int varyingLength, int varyingStride,
@ -171,6 +181,46 @@ OsdCudaComputeController::ApplyCatmarkFaceVerticesKernel(
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
}
void
OsdCudaComputeController::ApplyCatmarkQuadFaceVerticesKernel(
FarKernelBatch const &batch, OsdCudaComputeContext const *context) const {
assert(context);
const OsdCudaTable * F_IT = context->GetTable(FarSubdivisionTables::F_IT);
assert(F_IT);
float *vertex = _currentBindState.GetOffsettedVertexBuffer();
float *varying = _currentBindState.GetOffsettedVaryingBuffer();
OsdCudaComputeQuadFace(
vertex, varying,
_currentBindState.vertexDesc.length, _currentBindState.vertexDesc.stride,
_currentBindState.varyingDesc.length, _currentBindState.varyingDesc.stride,
static_cast<int*>(F_IT->GetCudaMemory()),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
}
void
OsdCudaComputeController::ApplyCatmarkTriQuadFaceVerticesKernel(
FarKernelBatch const &batch, OsdCudaComputeContext const *context) const {
assert(context);
const OsdCudaTable * F_IT = context->GetTable(FarSubdivisionTables::F_IT);
assert(F_IT);
float *vertex = _currentBindState.GetOffsettedVertexBuffer();
float *varying = _currentBindState.GetOffsettedVaryingBuffer();
OsdCudaComputeTriQuadFace(
vertex, varying,
_currentBindState.vertexDesc.length, _currentBindState.vertexDesc.stride,
_currentBindState.varyingDesc.length, _currentBindState.varyingDesc.stride,
static_cast<int*>(F_IT->GetCudaMemory()),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
}
void
OsdCudaComputeController::ApplyCatmarkEdgeVerticesKernel(
FarKernelBatch const &batch, OsdCudaComputeContext const *context) const {

View File

@ -120,6 +120,10 @@ protected:
void ApplyCatmarkFaceVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkQuadFaceVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkTriQuadFaceVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkVertexVerticesKernelB(FarKernelBatch const &batch, ComputeContext const *context) const;

View File

@ -140,6 +140,163 @@ computeFace(float *fVertex, float *fVarying,
}
}
template <int NUM_VERTEX_ELEMENTS, int NUM_VARYING_ELEMENTS> __global__ void
computeQuadFace(float *fVertex, float *fVaryings, int *F0_IT, int offset, int tableOffset, int start, int end)
{
DeviceVertex<NUM_VERTEX_ELEMENTS> *vertex = (DeviceVertex<NUM_VERTEX_ELEMENTS>*)fVertex;
DeviceVertex<NUM_VARYING_ELEMENTS> *varyings = (DeviceVertex<NUM_VARYING_ELEMENTS>*)fVaryings;
for (int i = start + threadIdx.x + blockIdx.x*blockDim.x;
i < end;
i += blockDim.x * gridDim.x) {
int fidx0 = F0_IT[tableOffset + 4 * i + 0];
int fidx1 = F0_IT[tableOffset + 4 * i + 1];
int fidx2 = F0_IT[tableOffset + 4 * i + 2];
int fidx3 = F0_IT[tableOffset + 4 * i + 3];
DeviceVertex<NUM_VERTEX_ELEMENTS> dst;
dst.clear();
if(NUM_VARYING_ELEMENTS > 0){
DeviceVertex<NUM_VARYING_ELEMENTS> dstVarying;
dstVarying.clear();
dst.addWithWeight(&vertex[fidx0], 0.25f);
dst.addWithWeight(&vertex[fidx1], 0.25f);
dst.addWithWeight(&vertex[fidx2], 0.25f);
dst.addWithWeight(&vertex[fidx3], 0.25f);
dstVarying.addWithWeight(&varyings[fidx0], 0.25f);
dstVarying.addWithWeight(&varyings[fidx1], 0.25f);
dstVarying.addWithWeight(&varyings[fidx2], 0.25f);
dstVarying.addWithWeight(&varyings[fidx3], 0.25f);
vertex[offset + i] = dst;
varyings[offset + i] = dstVarying;
}else{
dst.addWithWeight(&vertex[fidx0], 0.25f);
dst.addWithWeight(&vertex[fidx1], 0.25f);
dst.addWithWeight(&vertex[fidx2], 0.25f);
dst.addWithWeight(&vertex[fidx3], 0.25f);
vertex[offset + i] = dst;
}
}
}
__global__ void
computeQuadFace(float *fVertex, float *fVarying,
int vertexLength, int vertexStride,
int varyingLength, int varyingStride,
int *F0_IT, int offset, int tableOffset, int start, int end)
{
for (int i = start +threadIdx.x + blockIdx.x*blockDim.x;
i < end;
i += blockDim.x * gridDim.x){
int fidx0 = F0_IT[tableOffset + 4 * i + 0];
int fidx1 = F0_IT[tableOffset + 4 * i + 1];
int fidx2 = F0_IT[tableOffset + 4 * i + 2];
int fidx3 = F0_IT[tableOffset + 4 * i + 3];
// XXX: can we use local stack like alloca?
float *dstVertex = fVertex + (i+offset)*vertexStride;
clear(dstVertex, vertexLength);
float *dstVarying = fVarying + (i+offset)*varyingStride;
clear(dstVarying, varyingLength);
addWithWeight(dstVertex, fVertex + fidx0*vertexStride, 0.25f, vertexLength);
addWithWeight(dstVertex, fVertex + fidx1*vertexStride, 0.25f, vertexLength);
addWithWeight(dstVertex, fVertex + fidx2*vertexStride, 0.25f, vertexLength);
addWithWeight(dstVertex, fVertex + fidx3*vertexStride, 0.25f, vertexLength);
addWithWeight(dstVarying, fVarying + fidx0*varyingStride, 0.25f, varyingLength);
addWithWeight(dstVarying, fVarying + fidx1*varyingStride, 0.25f, varyingLength);
addWithWeight(dstVarying, fVarying + fidx2*varyingStride, 0.25f, varyingLength);
addWithWeight(dstVarying, fVarying + fidx3*varyingStride, 0.25f, varyingLength);
}
}
template <int NUM_VERTEX_ELEMENTS, int NUM_VARYING_ELEMENTS> __global__ void
computeTriQuadFace(float *fVertex, float *fVaryings, int *F0_IT, int offset, int tableOffset, int start, int end)
{
DeviceVertex<NUM_VERTEX_ELEMENTS> *vertex = (DeviceVertex<NUM_VERTEX_ELEMENTS>*)fVertex;
DeviceVertex<NUM_VARYING_ELEMENTS> *varyings = (DeviceVertex<NUM_VARYING_ELEMENTS>*)fVaryings;
for (int i = start + threadIdx.x + blockIdx.x*blockDim.x;
i < end;
i += blockDim.x * gridDim.x) {
int fidx0 = F0_IT[tableOffset + 4 * i + 0];
int fidx1 = F0_IT[tableOffset + 4 * i + 1];
int fidx2 = F0_IT[tableOffset + 4 * i + 2];
int fidx3 = F0_IT[tableOffset + 4 * i + 3];
bool triangle = (fidx2 == fidx3);
float weight = triangle ? 1.0f / 3.0f : 1.0f / 4.0f;
DeviceVertex<NUM_VERTEX_ELEMENTS> dst;
dst.clear();
if(NUM_VARYING_ELEMENTS > 0){
DeviceVertex<NUM_VARYING_ELEMENTS> dstVarying;
dstVarying.clear();
dst.addWithWeight(&vertex[fidx0], weight);
dst.addWithWeight(&vertex[fidx1], weight);
dst.addWithWeight(&vertex[fidx2], weight);
dstVarying.addWithWeight(&varyings[fidx0], weight);
dstVarying.addWithWeight(&varyings[fidx1], weight);
dstVarying.addWithWeight(&varyings[fidx2], weight);
if (!triangle) {
dst.addWithWeight(&vertex[fidx3], weight);
dstVarying.addWithWeight(&varyings[fidx3], 0.25f);
}
vertex[offset + i] = dst;
varyings[offset + i] = dstVarying;
}else{
dst.addWithWeight(&vertex[fidx0], weight);
dst.addWithWeight(&vertex[fidx1], weight);
dst.addWithWeight(&vertex[fidx2], weight);
if (!triangle)
dst.addWithWeight(&vertex[fidx3], weight);
vertex[offset + i] = dst;
}
}
}
__global__ void
computeTriQuadFace(float *fVertex, float *fVarying,
int vertexLength, int vertexStride,
int varyingLength, int varyingStride,
int *F0_IT, int offset, int tableOffset, int start, int end)
{
for (int i = start +threadIdx.x + blockIdx.x*blockDim.x;
i < end;
i += blockDim.x * gridDim.x){
int fidx0 = F0_IT[tableOffset + 4 * i + 0];
int fidx1 = F0_IT[tableOffset + 4 * i + 1];
int fidx2 = F0_IT[tableOffset + 4 * i + 2];
int fidx3 = F0_IT[tableOffset + 4 * i + 3];
bool triangle = (fidx2 == fidx3);
float weight = triangle ? 1.0f / 3.0f : 1.0f / 4.0f;
// XXX: can we use local stack like alloca?
float *dstVertex = fVertex + (i+offset)*vertexStride;
clear(dstVertex, vertexLength);
float *dstVarying = fVarying + (i+offset)*varyingStride;
clear(dstVarying, varyingLength);
addWithWeight(dstVertex, fVertex + fidx0*vertexStride, weight, vertexLength);
addWithWeight(dstVertex, fVertex + fidx1*vertexStride, weight, vertexLength);
addWithWeight(dstVertex, fVertex + fidx2*vertexStride, weight, vertexLength);
addWithWeight(dstVarying, fVarying + fidx0*varyingStride, weight, varyingLength);
addWithWeight(dstVarying, fVarying + fidx1*varyingStride, weight, varyingLength);
addWithWeight(dstVarying, fVarying + fidx2*varyingStride, weight, varyingLength);
if (!triangle) {
addWithWeight(dstVertex, fVertex + fidx3*vertexStride, weight, vertexLength);
addWithWeight(dstVarying, fVarying + fidx3*varyingStride, weight, varyingLength);
}
}
}
template <int NUM_VERTEX_ELEMENTS, int NUM_VARYING_ELEMENTS> __global__ void
computeEdge(float *fVertex, float *fVaryings, int *E0_IT, float *E0_S, int offset, int tableOffset, int start, int end)
{
@ -639,6 +796,40 @@ void OsdCudaComputeFace(float *vertex, float *varying,
F_IT, F_ITa, offset, tableOffset, start, end);
}
void OsdCudaComputeQuadFace(float *vertex, float *varying,
int vertexLength, int vertexStride,
int varyingLength, int varyingStride,
int *F_IT, int offset, int tableOffset, int start, int end)
{
//computeQuadFace<3, 0><<<512,32>>>(vertex, varying, F_IT, offset, start, end);
OPT_KERNEL(0, 0, computeQuadFace, 512, 32, (vertex, varying, F_IT, offset, tableOffset, start, end));
OPT_KERNEL(0, 3, computeQuadFace, 512, 32, (vertex, varying, F_IT, offset, tableOffset, start, end));
OPT_KERNEL(3, 0, computeQuadFace, 512, 32, (vertex, varying, F_IT, offset, tableOffset, start, end));
OPT_KERNEL(3, 3, computeQuadFace, 512, 32, (vertex, varying, F_IT, offset, tableOffset, start, end));
// fallback kernel (slow)
computeQuadFace<<<512, 32>>>(vertex, varying,
vertexLength, vertexStride, varyingLength, varyingStride,
F_IT, offset, tableOffset, start, end);
}
void OsdCudaComputeTriQuadFace(float *vertex, float *varying,
int vertexLength, int vertexStride,
int varyingLength, int varyingStride,
int *F_IT, int offset, int tableOffset, int start, int end)
{
//computeTriQuadFace<3, 0><<<512,32>>>(vertex, varying, F_IT, offset, start, end);
OPT_KERNEL(0, 0, computeTriQuadFace, 512, 32, (vertex, varying, F_IT, offset, tableOffset, start, end));
OPT_KERNEL(0, 3, computeTriQuadFace, 512, 32, (vertex, varying, F_IT, offset, tableOffset, start, end));
OPT_KERNEL(3, 0, computeTriQuadFace, 512, 32, (vertex, varying, F_IT, offset, tableOffset, start, end));
OPT_KERNEL(3, 3, computeTriQuadFace, 512, 32, (vertex, varying, F_IT, offset, tableOffset, start, end));
// fallback kernel (slow)
computeTriQuadFace<<<512, 32>>>(vertex, varying,
vertexLength, vertexStride, varyingLength, varyingStride,
F_IT, offset, tableOffset, start, end);
}
void OsdCudaComputeEdge(float *vertex, float *varying,
int vertexLength, int vertexStride,

View File

@ -163,7 +163,29 @@ OsdD3D11ComputeController::ApplyCatmarkFaceVerticesKernel(
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset);
}
void
OsdD3D11ComputeController::ApplyCatmarkQuadFaceVerticesKernel(
FarKernelBatch const &batch, OsdD3D11ComputeContext const *context) const {
assert(context);
_currentBindState.kernelBundle->ApplyCatmarkQuadFaceVerticesKernel(
batch.GetVertexOffset(), batch.GetTableOffset(),
batch.GetStart(), batch.GetEnd(),
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset);
}
void
OsdD3D11ComputeController::ApplyCatmarkTriQuadFaceVerticesKernel(
FarKernelBatch const &batch, OsdD3D11ComputeContext const *context) const {
assert(context);
_currentBindState.kernelBundle->ApplyCatmarkTriQuadFaceVerticesKernel(
batch.GetVertexOffset(), batch.GetTableOffset(),
batch.GetStart(), batch.GetEnd(),
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset);
}
void
OsdD3D11ComputeController::ApplyCatmarkEdgeVerticesKernel(

View File

@ -136,6 +136,10 @@ protected:
void ApplyCatmarkFaceVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkQuadFaceVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkTriQuadFaceVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkVertexVerticesKernelB(FarKernelBatch const &batch, ComputeContext const *context) const;

View File

@ -52,6 +52,8 @@ OsdD3D11ComputeKernelBundle::OsdD3D11ComputeKernelBundle(
_classLinkage(0),
_kernelCB(0),
_kernelComputeFace(0),
_kernelComputeQuadFace(0),
_kernelComputeTriQuadFace(0),
_kernelComputeEdge(0),
_kernelComputeBilinearEdge(0),
_kernelComputeVertex(0),
@ -69,6 +71,8 @@ OsdD3D11ComputeKernelBundle::~OsdD3D11ComputeKernelBundle() {
SAFE_RELEASE(_classLinkage);
SAFE_RELEASE(_kernelCB);
SAFE_RELEASE(_kernelComputeFace);
SAFE_RELEASE(_kernelComputeQuadFace);
SAFE_RELEASE(_kernelComputeTriQuadFace);
SAFE_RELEASE(_kernelComputeEdge);
SAFE_RELEASE(_kernelComputeBilinearEdge);
SAFE_RELEASE(_kernelComputeVertex);
@ -163,6 +167,12 @@ OsdD3D11ComputeKernelBundle::Compile(
_classLinkage->GetClassInstance(
"catmarkComputeFace", 0, &_kernelComputeFace);
assert(_kernelComputeFace);
_classLinkage->GetClassInstance(
"catmarkComputeQuadFace", 0, &_kernelComputeQuadFace);
assert(_kernelComputeQuadFace);
_classLinkage->GetClassInstance(
"catmarkComputeTriQuadFace", 0, &_kernelComputeTriQuadFace);
assert(_kernelComputeTriQuadFace);
_classLinkage->GetClassInstance(
"catmarkComputeEdge", 0, &_kernelComputeEdge);
assert(_kernelComputeEdge);
@ -302,6 +312,38 @@ OsdD3D11ComputeKernelBundle::ApplyCatmarkFaceVerticesKernel(
dispatchCompute(_kernelComputeFace, args);
}
void
OsdD3D11ComputeKernelBundle::ApplyCatmarkQuadFaceVerticesKernel(
int vertexOffset, int tableOffset, int start, int end,
int vertexBaseOffset, int varyingBaseOffset) {
KernelCB args;
ZeroMemory(&args, sizeof(args));
args.vertexOffset = vertexOffset;
args.tableOffset = tableOffset;
args.indexStart = start;
args.indexEnd = end;
args.vertexBaseOffset = vertexBaseOffset;
args.varyingBaseOffset = varyingBaseOffset;
dispatchCompute(_kernelComputeQuadFace, args);
}
void
OsdD3D11ComputeKernelBundle::ApplyCatmarkTriQuadFaceVerticesKernel(
int vertexOffset, int tableOffset, int start, int end,
int vertexBaseOffset, int varyingBaseOffset) {
KernelCB args;
ZeroMemory(&args, sizeof(args));
args.vertexOffset = vertexOffset;
args.tableOffset = tableOffset;
args.indexStart = start;
args.indexEnd = end;
args.vertexBaseOffset = vertexBaseOffset;
args.varyingBaseOffset = varyingBaseOffset;
dispatchCompute(_kernelComputeTriQuadFace, args);
}
void
OsdD3D11ComputeKernelBundle::ApplyCatmarkEdgeVerticesKernel(
int vertexOffset, int tableOffset, int start, int end,

View File

@ -67,6 +67,14 @@ public:
int vertexOffset, int tableOffset, int start, int end,
int vertexBaseOffset, int varyingBaseOffset);
void ApplyCatmarkQuadFaceVerticesKernel(
int vertexOffset, int tableOffset, int start, int end,
int vertexBaseOffset, int varyingBaseOffset);
void ApplyCatmarkTriQuadFaceVerticesKernel(
int vertexOffset, int tableOffset, int start, int end,
int vertexBaseOffset, int varyingBaseOffset);
void ApplyCatmarkEdgeVerticesKernel(
int vertexOffset, int tableOffset, int start, int end,
int vertexBaseOffset, int varyingBaseOffset);
@ -132,6 +140,10 @@ protected:
ID3D11ClassInstance * _kernelComputeFace; // general face-vertex kernel (all schemes)
ID3D11ClassInstance * _kernelComputeQuadFace; // quad face-vertex kernel (catmark)
ID3D11ClassInstance * _kernelComputeTriQuadFace; // tri-quad face-vertex kernel (catmark)
ID3D11ClassInstance * _kernelComputeEdge; // edge-vertex kernel (catmark + loop schemes)
ID3D11ClassInstance * _kernelComputeBilinearEdge; // edge-vertex kernel (bilinear scheme)

View File

@ -92,6 +92,34 @@ OsdGcdComputeController::ApplyCatmarkFaceVerticesKernel(
_gcd_queue);
}
void
OsdGcdComputeController::ApplyCatmarkQuadFaceVerticesKernel(
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {
assert(context);
OsdGcdComputeQuadFace(
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
(const int*)context->GetTable(FarSubdivisionTables::F_IT)->GetBuffer(),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(),
_gcd_queue);
}
void
OsdGcdComputeController::ApplyCatmarkTriQuadFaceVerticesKernel(
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {
assert(context);
OsdGcdComputeTriQuadFace(
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
(const int*)context->GetTable(FarSubdivisionTables::F_IT)->GetBuffer(),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(),
_gcd_queue);
}
void
OsdGcdComputeController::ApplyCatmarkEdgeVerticesKernel(
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {

View File

@ -120,6 +120,10 @@ protected:
void ApplyCatmarkFaceVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkQuadFaceVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkTriQuadFaceVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkVertexVerticesKernelB(FarKernelBatch const &batch, ComputeContext const *context) const;

View File

@ -79,6 +79,54 @@ void OsdGcdComputeFace(
vertexOffset, tableOffset, start_e, end_e);
}
void OsdGcdComputeQuadFace(
float * vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *F_IT,
int vertexOffset, int tableOffset, int start, int end,
dispatch_queue_t gcdq) {
const int workSize = end-start;
dispatch_apply(workSize/GCD_WORK_STRIDE, gcdq, ^(size_t blockIdx){
const int start_i = start + blockIdx*GCD_WORK_STRIDE;
const int end_i = start_i + GCD_WORK_STRIDE;
OsdCpuComputeQuadFace(vertex, varying, vertexDesc, varyingDesc,
F_IT,
vertexOffset, tableOffset, start_i, end_i);
});
const int start_e = end - workSize%GCD_WORK_STRIDE;
const int end_e = end;
if (start_e < end_e)
OsdCpuComputeQuadFace(vertex, varying, vertexDesc, varyingDesc,
F_IT,
vertexOffset, tableOffset, start_e, end_e);
}
void OsdGcdComputeTriQuadFace(
float * vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *F_IT,
int vertexOffset, int tableOffset, int start, int end,
dispatch_queue_t gcdq) {
const int workSize = end-start;
dispatch_apply(workSize/GCD_WORK_STRIDE, gcdq, ^(size_t blockIdx){
const int start_i = start + blockIdx*GCD_WORK_STRIDE;
const int end_i = start_i + GCD_WORK_STRIDE;
OsdCpuComputeTriQuadFace(vertex, varying, vertexDesc, varyingDesc,
F_IT,
vertexOffset, tableOffset, start_i, end_i);
});
const int start_e = end - workSize%GCD_WORK_STRIDE;
const int end_e = end;
if (start_e < end_e)
OsdCpuComputeTriQuadFace(vertex, varying, vertexDesc, varyingDesc,
F_IT,
vertexOffset, tableOffset, start_e, end_e);
}
void OsdGcdComputeEdge(
float * vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,

View File

@ -42,6 +42,22 @@ void OsdGcdComputeFace(float * vertex, float * varying,
int start, int end,
dispatch_queue_t gcdq);
void OsdGcdComputeQuadFace(float * vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *F_IT,
int vertexOffset, int tableOffset,
int start, int end,
dispatch_queue_t gcdq);
void OsdGcdComputeTriQuadFace(float * vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *F_IT,
int vertexOffset, int tableOffset,
int start, int end,
dispatch_queue_t gcdq);
void OsdGcdComputeEdge(float *vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,

View File

@ -139,6 +139,28 @@ OsdGLSLComputeController::ApplyCatmarkFaceVerticesKernel(
batch.GetStart(), batch.GetEnd());
}
void
OsdGLSLComputeController::ApplyCatmarkQuadFaceVerticesKernel(
FarKernelBatch const &batch, OsdGLSLComputeContext const *context) const {
assert(context);
_currentBindState.kernelBundle->ApplyCatmarkQuadFaceVerticesKernel(
batch.GetVertexOffset(), batch.GetTableOffset(),
batch.GetStart(), batch.GetEnd());
}
void
OsdGLSLComputeController::ApplyCatmarkTriQuadFaceVerticesKernel(
FarKernelBatch const &batch, OsdGLSLComputeContext const *context) const {
assert(context);
_currentBindState.kernelBundle->ApplyCatmarkTriQuadFaceVerticesKernel(
batch.GetVertexOffset(), batch.GetTableOffset(),
batch.GetStart(), batch.GetEnd());
}
void
OsdGLSLComputeController::ApplyCatmarkEdgeVerticesKernel(
FarKernelBatch const &batch, OsdGLSLComputeContext const *context) const {

View File

@ -129,6 +129,10 @@ protected:
void ApplyCatmarkFaceVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkQuadFaceVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkTriQuadFaceVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkVertexVerticesKernelB(FarKernelBatch const &batch, ComputeContext const *context) const;

View File

@ -178,6 +178,63 @@ void catmarkComputeFace()
writeVertex(vid, dst);
}
// Quad face-vertices compute Kernel
subroutine(computeKernelType)
void catmarkComputeQuadFace()
{
int i = int(gl_GlobalInvocationID.x) + indexStart;
if (i >= indexEnd) return;
int vid = i + vertexOffset;
int fidx0 = _F_IT[tableOffset + i * 4 + 0];
int fidx1 = _F_IT[tableOffset + i * 4 + 1];
int fidx2 = _F_IT[tableOffset + i * 4 + 2];
int fidx3 = _F_IT[tableOffset + i * 4 + 3];
Vertex dst;
clear(dst);
addWithWeight(dst, readVertex(fidx0), 0.25);
addWithWeight(dst, readVertex(fidx1), 0.25);
addWithWeight(dst, readVertex(fidx2), 0.25);
addWithWeight(dst, readVertex(fidx3), 0.25);
addVaryingWithWeight(dst, readVertex(fidx0), 0.25);
addVaryingWithWeight(dst, readVertex(fidx1), 0.25);
addVaryingWithWeight(dst, readVertex(fidx2), 0.25);
addVaryingWithWeight(dst, readVertex(fidx3), 0.25);
writeVertex(vid, dst);
}
// Tri-quad face-vertices compute Kernel
subroutine(computeKernelType)
void catmarkComputeTriQuadFace()
{
int i = int(gl_GlobalInvocationID.x) + indexStart;
if (i >= indexEnd) return;
int vid = i + vertexOffset;
int fidx0 = _F_IT[tableOffset + i * 4 + 0];
int fidx1 = _F_IT[tableOffset + i * 4 + 1];
int fidx2 = _F_IT[tableOffset + i * 4 + 2];
int fidx3 = _F_IT[tableOffset + i * 4 + 3];
bool triangle = (fidx2 == fidx3);
float weight = triangle ? 1.0f / 3.0f : 1.0f / 4.0f;
Vertex dst;
clear(dst);
addWithWeight(dst, readVertex(fidx0), weight);
addWithWeight(dst, readVertex(fidx1), weight);
addWithWeight(dst, readVertex(fidx2), weight);
addVaryingWithWeight(dst, readVertex(fidx0), weight);
addVaryingWithWeight(dst, readVertex(fidx1), weight);
addVaryingWithWeight(dst, readVertex(fidx2), weight);
if (!triangle) {
addWithWeight(dst, readVertex(fidx3), weight);
addVaryingWithWeight(dst, readVertex(fidx3), weight);
}
writeVertex(vid, dst);
}
// Edge-vertices compute Kernepl
subroutine(computeKernelType)
void catmarkComputeEdge()

View File

@ -117,6 +117,12 @@ OsdGLSLComputeKernelBundle::Compile(
_subComputeFace = glGetSubroutineIndex(_program,
GL_COMPUTE_SHADER,
"catmarkComputeFace");
_subComputeQuadFace = glGetSubroutineIndex(_program,
GL_COMPUTE_SHADER,
"catmarkComputeQuadFace");
_subComputeTriQuadFace = glGetSubroutineIndex(_program,
GL_COMPUTE_SHADER,
"catmarkComputeTriQuadFace");
_subComputeEdge = glGetSubroutineIndex(_program,
GL_COMPUTE_SHADER,
"catmarkComputeEdge");
@ -229,6 +235,22 @@ OsdGLSLComputeKernelBundle::ApplyCatmarkFaceVerticesKernel(
// glMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT);
}
void
OsdGLSLComputeKernelBundle::ApplyCatmarkQuadFaceVerticesKernel(
int vertexOffset, int tableOffset, int start, int end) {
glUniformSubroutinesuiv(GL_COMPUTE_SHADER, 1, &_subComputeQuadFace);
dispatchCompute(vertexOffset, tableOffset, start, end);
}
void
OsdGLSLComputeKernelBundle::ApplyCatmarkTriQuadFaceVerticesKernel(
int vertexOffset, int tableOffset, int start, int end) {
glUniformSubroutinesuiv(GL_COMPUTE_SHADER, 1, &_subComputeTriQuadFace);
dispatchCompute(vertexOffset, tableOffset, start, end);
}
void
OsdGLSLComputeKernelBundle::ApplyCatmarkEdgeVerticesKernel(
int vertexOffset, int tableOffset, int start, int end) {

View File

@ -57,6 +57,12 @@ public:
void ApplyCatmarkFaceVerticesKernel(
int vertexOffset, int tableOffset, int start, int end);
void ApplyCatmarkQuadFaceVerticesKernel(
int vertexOffset, int tableOffset, int start, int end);
void ApplyCatmarkTriQuadFaceVerticesKernel(
int vertexOffset, int tableOffset, int start, int end);
void ApplyCatmarkEdgeVerticesKernel(
int vertexOffset, int tableOffset, int start, int end);
@ -130,6 +136,10 @@ protected:
GLuint _subComputeFace; // general face-vertex kernel (all schemes)
GLuint _subComputeQuadFace; // quad face-vertex kernel (catmark)
GLuint _subComputeTriQuadFace; // tri-quad face-vertex kernel (catmark)
GLuint _subComputeEdge; // edge-vertex kernel (catmark + loop schemes)
GLuint _subComputeBilinearEdge; // edge-vertex kernel (bilinear scheme)

View File

@ -207,6 +207,29 @@ OsdGLSLTransformFeedbackComputeController::ApplyCatmarkFaceVerticesKernel(
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
}
void
OsdGLSLTransformFeedbackComputeController::ApplyCatmarkQuadFaceVerticesKernel(
FarKernelBatch const &batch, OsdGLSLTransformFeedbackComputeContext const *context) const {
assert(context);
_currentBindState.kernelBundle->ApplyCatmarkQuadFaceVerticesKernel(
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset,
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
}
void
OsdGLSLTransformFeedbackComputeController::ApplyCatmarkTriQuadFaceVerticesKernel(
FarKernelBatch const &batch, OsdGLSLTransformFeedbackComputeContext const *context) const {
assert(context);
_currentBindState.kernelBundle->ApplyCatmarkTriQuadFaceVerticesKernel(
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset,
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
}
void

View File

@ -127,6 +127,10 @@ protected:
void ApplyCatmarkFaceVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkQuadFaceVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkTriQuadFaceVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkVertexVerticesKernelB(FarKernelBatch const &batch, ComputeContext const *context) const;

View File

@ -186,6 +186,57 @@ void catmarkComputeFace()
writeVertex(dst);
}
// Quad face-vertices compute Kernel
subroutine(computeKernelType)
void catmarkComputeQuadFace()
{
int i = gl_VertexID + indexStart;
int fidx0 = texelFetch(_F0_IT, tableOffset + 4 * i + 0).x;
int fidx1 = texelFetch(_F0_IT, tableOffset + 4 * i + 1).x;
int fidx2 = texelFetch(_F0_IT, tableOffset + 4 * i + 2).x;
int fidx3 = texelFetch(_F0_IT, tableOffset + 4 * i + 3).x;
Vertex dst;
clear(dst);
addWithWeight(dst, readVertex(fidx0), 0.25);
addWithWeight(dst, readVertex(fidx1), 0.25);
addWithWeight(dst, readVertex(fidx2), 0.25);
addWithWeight(dst, readVertex(fidx3), 0.25);
addVaryingWithWeight(dst, readVertex(fidx0), 0.25);
addVaryingWithWeight(dst, readVertex(fidx1), 0.25);
addVaryingWithWeight(dst, readVertex(fidx2), 0.25);
addVaryingWithWeight(dst, readVertex(fidx3), 0.25);
writeVertex(dst);
}
// Tri-quad face-vertices compute Kernel
subroutine(computeKernelType)
void catmarkComputeTriQuadFace()
{
int i = gl_VertexID + indexStart;
int fidx0 = texelFetch(_F0_IT, tableOffset + 4 * i + 0).x;
int fidx1 = texelFetch(_F0_IT, tableOffset + 4 * i + 1).x;
int fidx2 = texelFetch(_F0_IT, tableOffset + 4 * i + 2).x;
int fidx3 = texelFetch(_F0_IT, tableOffset + 4 * i + 3).x;
bool triangle = (fidx2 == fidx3);
float weight = triangle ? 1.0f / 3.0f : 1.0f / 4.0f;
Vertex dst;
clear(dst);
addWithWeight(dst, readVertex(fidx0), weight);
addWithWeight(dst, readVertex(fidx1), weight);
addWithWeight(dst, readVertex(fidx2), weight);
addVaryingWithWeight(dst, readVertex(fidx0), weight);
addVaryingWithWeight(dst, readVertex(fidx1), weight);
addVaryingWithWeight(dst, readVertex(fidx2), weight);
if (!triangle) {
addWithWeight(dst, readVertex(fidx3), weight);
addVaryingWithWeight(dst, readVertex(fidx3), weight);
}
writeVertex(dst);
}
// Edge-vertices compute Kernel
subroutine(computeKernelType)
void catmarkComputeEdge()

View File

@ -225,6 +225,8 @@ OsdGLSLTransformFeedbackKernelBundle::Compile(
_uniformVaryingBuffer = glGetUniformLocation(_program, "varyingData");
_subComputeFace = glGetSubroutineIndex(_program, GL_VERTEX_SHADER, "catmarkComputeFace");
_subComputeQuadFace = glGetSubroutineIndex(_program, GL_VERTEX_SHADER, "catmarkComputeQuadFace");
_subComputeTriQuadFace = glGetSubroutineIndex(_program, GL_VERTEX_SHADER, "catmarkComputeTriQuadFace");
_subComputeEdge = glGetSubroutineIndex(_program, GL_VERTEX_SHADER, "catmarkComputeEdge");
_subComputeBilinearEdge = glGetSubroutineIndex(_program, GL_VERTEX_SHADER, "bilinearComputeEdge");
_subComputeVertex = glGetSubroutineIndex(_program, GL_VERTEX_SHADER, "bilinearComputeVertex");
@ -356,6 +358,30 @@ OsdGLSLTransformFeedbackKernelBundle::ApplyBilinearVertexVerticesKernel(
}
void
OsdGLSLTransformFeedbackKernelBundle::ApplyCatmarkQuadFaceVerticesKernel(
GLuint vertexBuffer, GLuint varyingBuffer,
int vertexOffset, int varyingOffset,
int offset, int tableOffset, int start, int end) {
glUniformSubroutinesuiv(GL_VERTEX_SHADER, 1, &_subComputeQuadFace);
transformGpuBufferData(vertexBuffer, varyingBuffer,
vertexOffset, varyingOffset,
offset, tableOffset, start, end);
}
void
OsdGLSLTransformFeedbackKernelBundle::ApplyCatmarkTriQuadFaceVerticesKernel(
GLuint vertexBuffer, GLuint varyingBuffer,
int vertexOffset, int varyingOffset,
int offset, int tableOffset, int start, int end) {
glUniformSubroutinesuiv(GL_VERTEX_SHADER, 1, &_subComputeTriQuadFace);
transformGpuBufferData(vertexBuffer, varyingBuffer,
vertexOffset, varyingOffset,
offset, tableOffset, start, end);
}
void
OsdGLSLTransformFeedbackKernelBundle::ApplyCatmarkFaceVerticesKernel(
GLuint vertexBuffer, GLuint varyingBuffer,

View File

@ -68,6 +68,16 @@ public:
int vertexOffset, int varyingOffset,
int offset, int tableOffset, int start, int end);
void ApplyCatmarkQuadFaceVerticesKernel(
GLuint vertexBuffer, GLuint varyingBuffer,
int vertexOffset, int varyingOffset,
int offset, int tableOffset, int start, int end);
void ApplyCatmarkTriQuadFaceVerticesKernel(
GLuint vertexBuffer, GLuint varyingBuffer,
int vertexOffset, int varyingOffset,
int offset, int tableOffset, int start, int end);
void ApplyCatmarkEdgeVerticesKernel(
GLuint vertexBuffer, GLuint varyingBuffer,
int vertexOffset, int varyingOffset,
@ -181,6 +191,10 @@ protected:
GLuint _subComputeFace; // general face-vertex kernel (all schemes)
GLuint _subComputeQuadFace; // quad face-vertex kernel (catmark)
GLuint _subComputeTriQuadFace; // tri-quad face-vertex kernel (catmark)
GLuint _subComputeEdge; // edge-vertex kernel (catmark + loop schemes)
GLuint _subComputeBilinearEdge; // edge-vertex kernel (bilinear scheme)

View File

@ -168,6 +168,70 @@ void runKernel( uint3 ID )
}
};
// Quad face-vertices compute Kernel
class CatmarkComputeQuadFace : IComputeKernel {
int placeholder;
void runKernel( uint3 ID )
{
int i = int(ID.x) + indexStart;
if (i >= indexEnd) return;
int vid = i + vertexOffset;
int fidx0 = _F_IT[tableOffset + 4 * i + 0];
int fidx1 = _F_IT[tableOffset + 4 * i + 1];
int fidx2 = _F_IT[tableOffset + 4 * i + 2];
int fidx3 = _F_IT[tableOffset + 4 * i + 3];
Vertex dst;
clear(dst);
addWithWeight(dst, readVertex(fidx0), 0.25f);
addWithWeight(dst, readVertex(fidx1), 0.25f);
addWithWeight(dst, readVertex(fidx2), 0.25f);
addWithWeight(dst, readVertex(fidx3), 0.25f);
addVaryingWithWeight(dst, readVertex(fidx0), 0.25f);
addVaryingWithWeight(dst, readVertex(fidx1), 0.25f);
addVaryingWithWeight(dst, readVertex(fidx2), 0.25f);
addVaryingWithWeight(dst, readVertex(fidx3), 0.25f);
writeVertex(vid, dst);
}
};
// Tri-quad face-vertices compute Kernel
class CatmarkComputeTriQuadFace : IComputeKernel {
int placeholder;
void runKernel( uint3 ID )
{
int i = int(ID.x) + indexStart;
if (i >= indexEnd) return;
int vid = i + vertexOffset;
int fidx0 = _F_IT[tableOffset + 4 * i + 0];
int fidx1 = _F_IT[tableOffset + 4 * i + 1];
int fidx2 = _F_IT[tableOffset + 4 * i + 2];
int fidx3 = _F_IT[tableOffset + 4 * i + 3];
bool triangle = (fidx2 == fidx3);
float weight = (triangle ? 1.0f / 3.0f : 1.0f / 4.0f);
Vertex dst;
clear(dst);
addWithWeight(dst, readVertex(fidx0), weight);
addWithWeight(dst, readVertex(fidx1), weight);
addWithWeight(dst, readVertex(fidx2), weight);
addVaryingWithWeight(dst, readVertex(fidx0), weight);
addVaryingWithWeight(dst, readVertex(fidx1), weight);
addVaryingWithWeight(dst, readVertex(fidx2), weight);
if (!triangle) {
addWithWeight(dst, readVertex(fidx3), weight);
addVaryingWithWeight(dst, readVertex(fidx3), weight);
}
writeVertex(vid, dst);
}
};
// Edge-vertices compute Kernel
class CatmarkComputeEdge : IComputeKernel {
int placeholder;
@ -391,6 +455,8 @@ void runKernel( uint3 ID )
};
CatmarkComputeFace catmarkComputeFace;
CatmarkComputeQuadFace catmarkComputeQuadFace;
CatmarkComputeTriQuadFace catmarkComputeTriQuadFace;
CatmarkComputeEdge catmarkComputeEdge;
BilinearComputeEdge bilinearComputeEdge;
BilinearComputeVertex bilinearComputeVertex;

View File

@ -94,6 +94,32 @@ OsdOmpComputeController::ApplyCatmarkFaceVerticesKernel(
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
}
void
OsdOmpComputeController::ApplyCatmarkQuadFaceVerticesKernel(
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {
assert(context);
OsdOmpComputeQuadFace(
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
(const int*)context->GetTable(FarSubdivisionTables::F_IT)->GetBuffer(),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
}
void
OsdOmpComputeController::ApplyCatmarkTriQuadFaceVerticesKernel(
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {
assert(context);
OsdOmpComputeTriQuadFace(
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
(const int*)context->GetTable(FarSubdivisionTables::F_IT)->GetBuffer(),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
}
void
OsdOmpComputeController::ApplyCatmarkEdgeVerticesKernel(
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {

View File

@ -128,6 +128,10 @@ protected:
void ApplyCatmarkFaceVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkQuadFaceVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkTriQuadFaceVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkVertexVerticesKernelB(FarKernelBatch const &batch, ComputeContext const *context) const;

View File

@ -103,6 +103,98 @@ void OsdOmpComputeFace(
}
}
void OsdOmpComputeQuadFace(
float * vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *F_IT, int offset, int tableOffset, int start, int end) {
int numThreads = omp_get_max_threads();
float *vertexResultsArray = (float*)alloca(vertexDesc.length * sizeof(float) * numThreads);
float *varyingResultsArray = (float*)alloca(varyingDesc.length * sizeof(float) * numThreads);
#pragma omp parallel for
for (int i = start ; i < end ; i++) {
int fidx0 = F_IT[tableOffset + 4 * i + 0];
int fidx1 = F_IT[tableOffset + 4 * i + 1];
int fidx2 = F_IT[tableOffset + 4 * i + 2];
int fidx3 = F_IT[tableOffset + 4 * i + 3];
int dstIndex = offset + i;
int threadId = omp_get_thread_num();
float *vertexResults = vertexResultsArray +
vertexDesc.length * threadId;
float *varyingResults = varyingResultsArray +
varyingDesc.length * threadId;
// clear
clear(vertexResults, vertexDesc);
clear(varyingResults, varyingDesc);
addWithWeight(vertexResults, vertex, fidx0, 0.25f, vertexDesc);
addWithWeight(vertexResults, vertex, fidx1, 0.25f, vertexDesc);
addWithWeight(vertexResults, vertex, fidx2, 0.25f, vertexDesc);
addWithWeight(vertexResults, vertex, fidx3, 0.25f, vertexDesc);
addWithWeight(varyingResults, varying, fidx0, 0.25f, varyingDesc);
addWithWeight(varyingResults, varying, fidx1, 0.25f, varyingDesc);
addWithWeight(varyingResults, varying, fidx2, 0.25f, varyingDesc);
addWithWeight(varyingResults, varying, fidx3, 0.25f, varyingDesc);
// write results
copy(vertex, vertexResults, dstIndex, vertexDesc);
copy(varying, varyingResults, dstIndex, varyingDesc);
}
}
void OsdOmpComputeTriQuadFace(
float * vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *F_IT, int offset, int tableOffset, int start, int end) {
int numThreads = omp_get_max_threads();
float *vertexResultsArray = (float*)alloca(vertexDesc.length * sizeof(float) * numThreads);
float *varyingResultsArray = (float*)alloca(varyingDesc.length * sizeof(float) * numThreads);
#pragma omp parallel for
for (int i = start ; i < end ; i++) {
int fidx0 = F_IT[tableOffset + 4 * i + 0];
int fidx1 = F_IT[tableOffset + 4 * i + 1];
int fidx2 = F_IT[tableOffset + 4 * i + 2];
int fidx3 = F_IT[tableOffset + 4 * i + 3];
bool triangle = (fidx2 == fidx3);
float weight = (triangle ? 1.0f / 3.0f : 1.0f / 4.0f);
int dstIndex = offset + i;
int threadId = omp_get_thread_num();
float *vertexResults = vertexResultsArray +
vertexDesc.length * threadId;
float *varyingResults = varyingResultsArray +
varyingDesc.length * threadId;
// clear
clear(vertexResults, vertexDesc);
clear(varyingResults, varyingDesc);
addWithWeight(vertexResults, vertex, fidx0, weight, vertexDesc);
addWithWeight(vertexResults, vertex, fidx1, weight, vertexDesc);
addWithWeight(vertexResults, vertex, fidx2, weight, vertexDesc);
addWithWeight(varyingResults, varying, fidx0, weight, varyingDesc);
addWithWeight(varyingResults, varying, fidx1, weight, varyingDesc);
addWithWeight(varyingResults, varying, fidx2, weight, varyingDesc);
if (!triangle) {
addWithWeight(vertexResults, vertex, fidx3, weight, vertexDesc);
addWithWeight(varyingResults, varying, fidx3, weight, varyingDesc);
}
// write results
copy(vertex, vertexResults, dstIndex, vertexDesc);
copy(varying, varyingResults, dstIndex, varyingDesc);
}
}
void OsdOmpComputeEdge(
float * vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,

View File

@ -40,6 +40,20 @@ void OsdOmpComputeFace(float * vertex, float * varying,
int vertexOffset, int tableOffset,
int start, int end);
void OsdOmpComputeQuadFace(float * vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *F_IT,
int vertexOffset, int tableOffset,
int start, int end);
void OsdOmpComputeTriQuadFace(float * vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *F_IT,
int vertexOffset, int tableOffset,
int start, int end);
void OsdOmpComputeEdge(float *vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,

View File

@ -100,6 +100,32 @@ OsdTbbComputeController::ApplyCatmarkFaceVerticesKernel(
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
}
void
OsdTbbComputeController::ApplyCatmarkQuadFaceVerticesKernel(
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {
assert(context);
OsdTbbComputeQuadFace(
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
(const int*)context->GetTable(FarSubdivisionTables::F_IT)->GetBuffer(),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
}
void
OsdTbbComputeController::ApplyCatmarkTriQuadFaceVerticesKernel(
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {
assert(context);
OsdTbbComputeTriQuadFace(
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
(const int*)context->GetTable(FarSubdivisionTables::F_IT)->GetBuffer(),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
}
void
OsdTbbComputeController::ApplyCatmarkEdgeVerticesKernel(
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {

View File

@ -120,6 +120,10 @@ protected:
void ApplyCatmarkFaceVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkQuadFaceVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkTriQuadFaceVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkVertexVerticesKernelB(FarKernelBatch const &batch, ComputeContext const *context) const;

View File

@ -142,6 +142,162 @@ void OsdTbbComputeFace(
tbb::parallel_for(range, kernel);
}
class TBBQuadFaceKernel {
float *vertex;
float *varying;
OsdVertexBufferDescriptor vertexDesc;
OsdVertexBufferDescriptor varyingDesc;
int const *F_IT;
int vertexOffset;
int tableOffset;
public:
void operator() (tbb::blocked_range<int> const &r) const {
for (int i = r.begin(); i < r.end(); i++) {
int fidx0 = F_IT[tableOffset + 4 * i + 0];
int fidx1 = F_IT[tableOffset + 4 * i + 1];
int fidx2 = F_IT[tableOffset + 4 * i + 2];
int fidx3 = F_IT[tableOffset + 4 * i + 3];
// XXX: should use local vertex struct variable instead of
// accumulating directly into global memory.
int dstIndex = i + vertexOffset;
clear(vertex, dstIndex, vertexDesc);
clear(varying, dstIndex, varyingDesc);
addWithWeight(vertex, dstIndex, fidx0, 0.25f, vertexDesc);
addWithWeight(vertex, dstIndex, fidx1, 0.25f, vertexDesc);
addWithWeight(vertex, dstIndex, fidx2, 0.25f, vertexDesc);
addWithWeight(vertex, dstIndex, fidx3, 0.25f, vertexDesc);
addWithWeight(varying, dstIndex, fidx0, 0.25f, varyingDesc);
addWithWeight(varying, dstIndex, fidx1, 0.25f, varyingDesc);
addWithWeight(varying, dstIndex, fidx2, 0.25f, varyingDesc);
addWithWeight(varying, dstIndex, fidx3, 0.25f, varyingDesc);
}
}
TBBQuadFaceKernel(TBBQuadFaceKernel const &other)
{
this->vertex = other.vertex;
this->varying= other.varying;
this->vertexDesc = other.vertexDesc;
this->varyingDesc = other.varyingDesc;
this->F_IT = other.F_IT;
this->vertexOffset = other.vertexOffset;
this->tableOffset = other.tableOffset;
}
TBBQuadFaceKernel(float *vertex_in,
float *varying_in,
OsdVertexBufferDescriptor const &vertexDesc_in,
OsdVertexBufferDescriptor const &varyingDesc_in,
int const *F_IT_in,
int vertexOffset_in,
int tableOffset_in) :
vertex (vertex_in),
varying(varying_in),
vertexDesc(vertexDesc_in),
varyingDesc(varyingDesc_in),
F_IT (F_IT_in),
vertexOffset(vertexOffset_in),
tableOffset(tableOffset_in)
{};
};
void OsdTbbComputeQuadFace(
float * vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
int const *F_IT, int vertexOffset, int tableOffset,
int start, int end) {
TBBQuadFaceKernel kernel(vertex, varying, vertexDesc, varyingDesc, F_IT,
vertexOffset, tableOffset);
tbb::blocked_range<int> range(start, end, grain_size);
tbb::parallel_for(range, kernel);
}
class TBBTriQuadFaceKernel {
float *vertex;
float *varying;
OsdVertexBufferDescriptor vertexDesc;
OsdVertexBufferDescriptor varyingDesc;
int const *F_IT;
int vertexOffset;
int tableOffset;
public:
void operator() (tbb::blocked_range<int> const &r) const {
for (int i = r.begin(); i < r.end(); i++) {
int fidx0 = F_IT[tableOffset + 4 * i + 0];
int fidx1 = F_IT[tableOffset + 4 * i + 1];
int fidx2 = F_IT[tableOffset + 4 * i + 2];
int fidx3 = F_IT[tableOffset + 4 * i + 3];
bool triangle = (fidx2 == fidx3);
float weight = (triangle ? 1.0f / 3.0f : 1.0f / 4.0f);
// XXX: should use local vertex struct variable instead of
// accumulating directly into global memory.
int dstIndex = i + vertexOffset;
clear(vertex, dstIndex, vertexDesc);
clear(varying, dstIndex, varyingDesc);
addWithWeight(vertex, dstIndex, fidx0, weight, vertexDesc);
addWithWeight(vertex, dstIndex, fidx1, weight, vertexDesc);
addWithWeight(vertex, dstIndex, fidx2, weight, vertexDesc);
addWithWeight(varying, dstIndex, fidx0, weight, varyingDesc);
addWithWeight(varying, dstIndex, fidx1, weight, varyingDesc);
addWithWeight(varying, dstIndex, fidx2, weight, varyingDesc);
if (!triangle) {
addWithWeight(vertex, dstIndex, fidx3, weight, vertexDesc);
addWithWeight(varying, dstIndex, fidx3, weight, varyingDesc);
}
}
}
TBBTriQuadFaceKernel(TBBTriQuadFaceKernel const &other)
{
this->vertex = other.vertex;
this->varying= other.varying;
this->vertexDesc = other.vertexDesc;
this->varyingDesc = other.varyingDesc;
this->F_IT = other.F_IT;
this->vertexOffset = other.vertexOffset;
this->tableOffset = other.tableOffset;
}
TBBTriQuadFaceKernel(float *vertex_in,
float *varying_in,
OsdVertexBufferDescriptor const &vertexDesc_in,
OsdVertexBufferDescriptor const &varyingDesc_in,
int const *F_IT_in,
int vertexOffset_in,
int tableOffset_in) :
vertex (vertex_in),
varying(varying_in),
vertexDesc(vertexDesc_in),
varyingDesc(varyingDesc_in),
F_IT (F_IT_in),
vertexOffset(vertexOffset_in),
tableOffset(tableOffset_in)
{};
};
void OsdTbbComputeTriQuadFace(
float * vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
int const *F_IT, int vertexOffset, int tableOffset,
int start, int end) {
TBBTriQuadFaceKernel kernel(vertex, varying, vertexDesc, varyingDesc, F_IT,
vertexOffset, tableOffset);
tbb::blocked_range<int> range(start, end, grain_size);
tbb::parallel_for(range, kernel);
}
class TBBEdgeKernel {
float *vertex;
float *varying;

View File

@ -39,6 +39,20 @@ void OsdTbbComputeFace(float * vertex, float * varying,
int vertexOffset, int tableOffset,
int start, int end);
void OsdTbbComputeQuadFace(float * vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
int const *F_IT,
int vertexOffset, int tableOffset,
int start, int end);
void OsdTbbComputeTriQuadFace(float * vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
int const *F_IT,
int vertexOffset, int tableOffset,
int start, int end);
void OsdTbbComputeEdge(float *vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,