Added the CATMARK_RESTRICTED_VERT_VERTEX_A, CATMARK_RESTRICTED_VERT_VERTEX_B1, and CATMARK_RESTRICTED_VERT_VERTEX_B2 kernels which compute vertices resulting from the refinement of a smooth or (fully) sharp vertex.

* CATMARK_RESTRICTED_VERT_VERTEX_A handles k_Crease and k_Corner rules
* CATMARK_RESTRICTED_VERT_VERTEX_B1 handles regular k_Smooth and k_Dart rules
* CATMARK_RESTRICTED_VERT_VERTEX_B2 handles irregular k_Smooth and k_Dart rules
This commit is contained in:
Nathan Litke 2014-06-10 16:31:44 -07:00
parent af3424e1da
commit b7a763853c
45 changed files with 2238 additions and 65 deletions

View File

@ -56,6 +56,10 @@ protected:
/// will reserve and append refinement tasks
///
static FarSubdivisionTables * Create( FarMeshFactory<T,U> * meshFactory, FarKernelBatchVector *batches );
// Compares vertices based on their topological configuration
// (see subdivisionTables::GetMaskRanking for more details)
static bool CompareVertices( HbrVertex<T> const *x, HbrVertex<T> const *y );
};
// This factory walks the Hbr vertices and accumulates the weights and adjacency
@ -97,9 +101,18 @@ FarCatmarkSubdivisionTablesFactory<T,U>::Create( FarMeshFactory<T,U> * meshFacto
typename HbrCatmarkSubdivision<T>::TriangleSubdivision triangleMethod =
dynamic_cast<HbrCatmarkSubdivision<T> *>(meshFactory->GetHbrMesh()->GetSubdivision())->GetTriangleSubdivisionMethod();
bool hasFractionalEdgeSharpness = tablesFactory.HasFractionalEdgeSharpness();
bool useRestrictedEdgeVertexKernel = meshFactory->IsKernelTypeSupported(FarKernelBatch::CATMARK_RESTRICTED_EDGE_VERTEX);;
bool useRestrictedEdgeVertexKernel = meshFactory->IsKernelTypeSupported(FarKernelBatch::CATMARK_RESTRICTED_EDGE_VERTEX);
useRestrictedEdgeVertexKernel &= !hasFractionalEdgeSharpness && triangleMethod != HbrCatmarkSubdivision<T>::k_New;
bool hasFractionalVertexSharpness = tablesFactory.HasFractionalVertexSharpness();
bool hasStandardVertexVertexKernels = meshFactory->IsKernelTypeSupported(FarKernelBatch::CATMARK_VERT_VERTEX_A1) &&
meshFactory->IsKernelTypeSupported(FarKernelBatch::CATMARK_VERT_VERTEX_A2) &&
meshFactory->IsKernelTypeSupported(FarKernelBatch::CATMARK_VERT_VERTEX_B);
bool useRestrictedVertexVertexKernels = meshFactory->IsKernelTypeSupported(FarKernelBatch::CATMARK_RESTRICTED_VERT_VERTEX_A) &&
meshFactory->IsKernelTypeSupported(FarKernelBatch::CATMARK_RESTRICTED_VERT_VERTEX_B1) &&
meshFactory->IsKernelTypeSupported(FarKernelBatch::CATMARK_RESTRICTED_VERT_VERTEX_B2);
useRestrictedVertexVertexKernels &= !hasFractionalVertexSharpness && !hasFractionalEdgeSharpness;
// Allocate memory for the indexing tables
result->_F_ITa.resize(F_ITa_size);
result->_F_IT.resize(F_IT_size);
@ -111,8 +124,9 @@ FarCatmarkSubdivisionTablesFactory<T,U>::Create( FarMeshFactory<T,U> * meshFacto
result->_V_ITa.resize((tablesFactory.GetNumVertexVerticesTotal(maxlevel)
- tablesFactory.GetNumVertexVerticesTotal(0))*5); // subtract coarse cage vertices
result->_V_IT.resize(tablesFactory.GetVertVertsValenceSum()*2);
result->_V_W.resize(tablesFactory.GetNumVertexVerticesTotal(maxlevel)
- tablesFactory.GetNumVertexVerticesTotal(0));
if (!useRestrictedVertexVertexKernels)
result->_V_W.resize(tablesFactory.GetNumVertexVerticesTotal(maxlevel)
- tablesFactory.GetNumVertexVerticesTotal(0));
// Prepare batch table
batches->reserve(maxlevel*5);
@ -382,30 +396,49 @@ FarCatmarkSubdivisionTablesFactory<T,U>::Create( FarMeshFactory<T,U> * meshFacto
break;
}
case HbrVertex<T>::k_Corner :
// in the case of a k_Crease / k_Corner pass combination, we
// need to set the valence to -1 to tell the "B" Kernel to
// switch to k_Corner rule (as edge indices won't be -1)
if (V_ITa[5*i+1]==0)
V_ITa[5*i+1] = -1;
if (!useRestrictedVertexVertexKernels) {
// in the case of a k_Crease / k_Corner pass combination, we
// need to set the valence to -1 to tell the "B" Kernel to
// switch to k_Corner rule (as edge indices won't be -1)
if (V_ITa[5*i+1]==0)
V_ITa[5*i+1] = -1;
} else {
// in the case of a k_Corner, repeat the vertex
V_ITa[5*i+3] = V_ITa[5*i+2];
V_ITa[5*i+4] = V_ITa[5*i+2];
}
default : break;
}
if (rank>7)
// the k_Corner and k_Crease single-pass cases apply a weight of 1.0
// but this value is inverted in the kernel
V_W[i] = 0.0;
else
V_W[i] = weights[0];
if (!useRestrictedVertexVertexKernels) {
if (rank>7)
// the k_Corner and k_Crease single-pass cases apply a weight of 1.0
// but this value is inverted in the kernel
V_W[i] = 0.0;
else
V_W[i] = weights[0];
}
batchFactory.AddVertex( i, rank );
if (!useRestrictedVertexVertexKernels)
batchFactory.AddVertex( i, rank );
else
batchFactory.AddCatmarkRestrictedVertex( i, rank, V_ITa[5*i+1] );
}
V_ITa += nVertVertices*5;
V_W += nVertVertices;
if (!useRestrictedVertexVertexKernels)
V_W += nVertVertices;
// add batches for vert vertices
if (nVertVertices > 0)
batchFactory.AppendCatmarkBatches(level, vertTableOffset, vertexOffset, batches);
if (nVertVertices > 0) {
if (!useRestrictedVertexVertexKernels) {
assert(hasStandardVertexVertexKernels);
batchFactory.AppendCatmarkBatches(level, vertTableOffset, vertexOffset, batches);
} else {
batchFactory.AppendCatmarkRestrictedBatches(level, vertTableOffset, vertexOffset, batches);
}
}
vertexOffset += nVertVertices;
vertTableOffset += nVertVertices;
}
@ -413,6 +446,25 @@ FarCatmarkSubdivisionTablesFactory<T,U>::Create( FarMeshFactory<T,U> * meshFacto
return result;
}
template <class T, class U> bool
FarCatmarkSubdivisionTablesFactory<T,U>::CompareVertices( HbrVertex<T> const * x, HbrVertex<T> const * y ) {
// Masks of the parent vertex decide for the current vertex.
HbrVertex<T> * px=x->GetParentVertex(),
* py=y->GetParentVertex();
int rankx = GetMaskRanking(px->GetMask(false), px->GetMask(true) );
int ranky = GetMaskRanking(py->GetMask(false), py->GetMask(true) );
assert( rankx!=0xFF and ranky!=0xFF );
// Arrange regular vertices before irregular vertices within the same kernel
if ((rankx <= 2 && ranky <= 2) || (rankx >= 3 && rankx <= 7 && ranky >= 3 && ranky <= 7) || (rankx >= 8 && ranky >= 8))
return px->GetValence() == 4 && py->GetValence() != 4;
else
return rankx < ranky;
}
} // end namespace OPENSUBDIV_VERSION
using namespace OPENSUBDIV_VERSION;

View File

@ -108,6 +108,15 @@ FarDispatcher::ApplyKernel(CONTROLLER *controller, CONTEXT *context, FarKernelBa
case FarKernelBatch::CATMARK_VERT_VERTEX_A2:
controller->ApplyCatmarkVertexVerticesKernelA2(batch, context);
break;
case FarKernelBatch::CATMARK_RESTRICTED_VERT_VERTEX_B1:
controller->ApplyCatmarkRestrictedVertexVerticesKernelB1(batch, context);
break;
case FarKernelBatch::CATMARK_RESTRICTED_VERT_VERTEX_B2:
controller->ApplyCatmarkRestrictedVertexVerticesKernelB2(batch, context);
break;
case FarKernelBatch::CATMARK_RESTRICTED_VERT_VERTEX_A:
controller->ApplyCatmarkRestrictedVertexVerticesKernelA(batch, context);
break;
case FarKernelBatch::LOOP_EDGE_VERTEX:
controller->ApplyLoopEdgeVerticesKernel(batch, context);
@ -200,6 +209,15 @@ public:
template <class CONTEXT>
void ApplyCatmarkVertexVerticesKernelA2(FarKernelBatch const &batch, CONTEXT *context) const;
template <class CONTEXT>
void ApplyCatmarkRestrictedVertexVerticesKernelB1(FarKernelBatch const &batch, CONTEXT *context) const;
template <class CONTEXT>
void ApplyCatmarkRestrictedVertexVerticesKernelB2(FarKernelBatch const &batch, CONTEXT *context) const;
template <class CONTEXT>
void ApplyCatmarkRestrictedVertexVerticesKernelA(FarKernelBatch const &batch, CONTEXT *context) const;
template <class CONTEXT>
void ApplyLoopEdgeVerticesKernel(FarKernelBatch const &batch, CONTEXT *context) const;
@ -393,6 +411,51 @@ FarComputeController::ApplyCatmarkVertexVerticesKernelA2(FarKernelBatch const &b
vsrc );
}
template <class CONTEXT> void
FarComputeController::ApplyCatmarkRestrictedVertexVerticesKernelB1(FarKernelBatch const &batch, CONTEXT *context) const {
typename CONTEXT::VertexType *vsrc = &context->GetVertices().at(0);
FarSubdivisionTables const * subdivision = context->GetSubdivisionTables();
assert(subdivision);
subdivision->computeCatmarkRestrictedVertexPointsB1( batch.GetVertexOffset(),
batch.GetTableOffset(),
batch.GetStart(),
batch.GetEnd(),
vsrc );
}
template <class CONTEXT> void
FarComputeController::ApplyCatmarkRestrictedVertexVerticesKernelB2(FarKernelBatch const &batch, CONTEXT *context) const {
typename CONTEXT::VertexType *vsrc = &context->GetVertices().at(0);
FarSubdivisionTables const * subdivision = context->GetSubdivisionTables();
assert(subdivision);
subdivision->computeCatmarkRestrictedVertexPointsB2( batch.GetVertexOffset(),
batch.GetTableOffset(),
batch.GetStart(),
batch.GetEnd(),
vsrc );
}
template <class CONTEXT> void
FarComputeController::ApplyCatmarkRestrictedVertexVerticesKernelA(FarKernelBatch const &batch, CONTEXT *context) const {
typename CONTEXT::VertexType *vsrc = &context->GetVertices().at(0);
FarSubdivisionTables const * subdivision = context->GetSubdivisionTables();
assert(subdivision);
subdivision->computeCatmarkRestrictedVertexPointsA( batch.GetVertexOffset(),
batch.GetTableOffset(),
batch.GetStart(),
batch.GetEnd(),
vsrc );
}
template <class CONTEXT> void
FarComputeController::ApplyLoopEdgeVerticesKernel(FarKernelBatch const &batch, CONTEXT *context) const {

View File

@ -74,6 +74,9 @@ public:
CATMARK_VERT_VERTEX_A1,
CATMARK_VERT_VERTEX_A2,
CATMARK_VERT_VERTEX_B,
CATMARK_RESTRICTED_VERT_VERTEX_A,
CATMARK_RESTRICTED_VERT_VERTEX_B1,
CATMARK_RESTRICTED_VERT_VERTEX_B2,
LOOP_EDGE_VERTEX,
LOOP_VERT_VERTEX_A1,
LOOP_VERT_VERTEX_A2,

View File

@ -46,6 +46,8 @@ public:
FarVertexKernelBatchFactory(int start, int end) {
kernelB.start = kernelA1.start = kernelA2.start = start;
kernelB.end = kernelA1.end = kernelA2.end = end;
restrictedKernelB1.start = restrictedKernelB2.start = restrictedKernelA.start = start;
restrictedKernelB1.end = restrictedKernelB2.end = restrictedKernelA.end = end;
}
@ -97,6 +99,17 @@ public:
void AddVertex( int index, int rank );
/// \brief Adds a vertex-vertex to the appropriate restricted compute batch based on "Rank" and valence.
///
/// @param index the index of the vertex
///
/// @param rank the rank of the vertex (see
/// FarSubdivisionTables::GetMaskRanking())
///
/// @param valence the valence of the vertex
///
void AddCatmarkRestrictedVertex( int index, int rank, int valence );
/// \brief Appends a FarKernelBatch to a vector of batches for Catmark subdivision
///
@ -111,6 +124,18 @@ public:
void AppendCatmarkBatches(int level, int tableOffset, int vertexOffset, FarKernelBatchVector *result);
/// \brief Appends a restricted FarKernelBatch to a vector of batches for Catmark subdivision
///
/// @param level the subdivision level of the vertices in the batch
///
/// @param tableOffset XXXX
///
/// @param vertexOffset XXXX
///
/// @param result the expanded batch vector
///
void AppendCatmarkRestrictedBatches(int level, int tableOffset, int vertexOffset, FarKernelBatchVector *result);
/// \brief Appends a FarKernelBatch to a vector of batches for Loop subdivision
///
@ -131,9 +156,12 @@ private:
end;
};
Range kernelB; // vertex batch range (kernel B)
Range kernelA1; // vertex batch range (kernel A pass 1)
Range kernelA2; // vertex batch range (kernel A pass 2)
Range kernelB; // vertex batch range (kernel B)
Range kernelA1; // vertex batch range (kernel A pass 1)
Range kernelA2; // vertex batch range (kernel A pass 2)
Range restrictedKernelB1; // vertex batch range (restricted kernel B regular)
Range restrictedKernelB2; // vertex batch range (restricted kernel B irregular)
Range restrictedKernelA; // vertex batch range (restricted kernel A)
};
inline void
@ -160,6 +188,29 @@ FarVertexKernelBatchFactory::AddVertex( int index, int rank ) {
}
}
inline void
FarVertexKernelBatchFactory::AddCatmarkRestrictedVertex( int index, int rank, int valence ) {
assert(rank <= 2 || rank >= 8);
if (rank <= 2 && valence == 4) {
if (index < restrictedKernelB1.start)
restrictedKernelB1.start=index;
if (index > restrictedKernelB1.end)
restrictedKernelB1.end=index;
} else if (rank <= 2 && valence != 4) {
if (index < restrictedKernelB2.start)
restrictedKernelB2.start=index;
if (index > restrictedKernelB2.end)
restrictedKernelB2.end=index;
} else if (rank >= 8) {
if (index < restrictedKernelA.start)
restrictedKernelA.start=index;
if (index > restrictedKernelA.end)
restrictedKernelA.end=index;
}
}
inline void
FarVertexKernelBatchFactory::AppendCatmarkBatches(int level,
int tableOffset,
@ -180,6 +231,26 @@ FarVertexKernelBatchFactory::AppendCatmarkBatches(int level,
tableOffset, vertexOffset) );
}
inline void
FarVertexKernelBatchFactory::AppendCatmarkRestrictedBatches(int level,
int tableOffset,
int vertexOffset,
FarKernelBatchVector *result) {
if (restrictedKernelB1.end >= restrictedKernelB1.start)
result->push_back(FarKernelBatch( FarKernelBatch::CATMARK_RESTRICTED_VERT_VERTEX_B1, level, 0,
restrictedKernelB1.start, restrictedKernelB1.end+1,
tableOffset, vertexOffset));
if (restrictedKernelB2.end >= restrictedKernelB2.start)
result->push_back(FarKernelBatch( FarKernelBatch::CATMARK_RESTRICTED_VERT_VERTEX_B2, level, 0,
restrictedKernelB2.start, restrictedKernelB2.end+1,
tableOffset, vertexOffset) );
if (restrictedKernelA.end >= restrictedKernelA.start)
result->push_back(FarKernelBatch( FarKernelBatch::CATMARK_RESTRICTED_VERT_VERTEX_A, level, 0,
restrictedKernelA.start, restrictedKernelA.end+1,
tableOffset, vertexOffset) );
}
inline void
FarVertexKernelBatchFactory::AppendLoopBatches(int level,
int tableOffset,

View File

@ -189,6 +189,22 @@ public:
template <class U>
void computeCatmarkVertexPointsB(int vertexOffset, int tableOffset, int start, int end, U * vsrc) const;
// Compute-kernel applied to vertices resulting from the refinement of a smooth or sharp vertex
// Kernel "A" handles the k_Crease and k_Corner rules
template <class U>
void computeCatmarkRestrictedVertexPointsA(int vertexOffset, int tableOffset, int start, int end, U * vsrc) const;
// Compute-kernel applied to vertices resulting from the refinement of a smooth or sharp vertex
// Kernel "B1" handles the regular k_Smooth and k_Dart rules
template <class U>
void computeCatmarkRestrictedVertexPointsB1(int vertexOffset, int tableOffset, int start, int end, U * vsrc) const;
// Compute-kernel applied to vertices resulting from the refinement of a smooth or sharp vertex
// Kernel "B2" handles the irregular k_Smooth and k_Dart rules
template <class U>
void computeCatmarkRestrictedVertexPointsB2(int vertexOffset, int tableOffset, int start, int end, U * vsrc) const;
// -------------------------------------------------------------------------
// Loop scheme
@ -573,6 +589,76 @@ FarSubdivisionTables::computeCatmarkVertexPointsB( int vertexOffset, int tableOf
}
}
// single-pass kernel handling k_Crease and k_Corner rules
template <class U> void
FarSubdivisionTables::computeCatmarkRestrictedVertexPointsA( int vertexOffset, int tableOffset, int start, int end, U * vsrc ) const {
U * vdst = vsrc + vertexOffset + start;
for (int i=start+tableOffset; i<end+tableOffset; ++i, ++vdst ) {
vdst->Clear();
int p=this->_V_ITa[5*i+2], // index of the parent vertex
eidx0=this->_V_ITa[5*i+3], // index of the first crease rule edge
eidx1=this->_V_ITa[5*i+4]; // index of the second crease rule edge
vdst->AddWithWeight( vsrc[p], 0.75f );
vdst->AddWithWeight( vsrc[eidx0], 0.125f );
vdst->AddWithWeight( vsrc[eidx1], 0.125f );
vdst->AddVaryingWithWeight( vsrc[p], 1.0f );
}
}
// single-pass kernel handling regular k_Smooth and k_Dart rules
template <class U> void
FarSubdivisionTables::computeCatmarkRestrictedVertexPointsB1( int vertexOffset, int tableOffset, int start, int end, U * vsrc ) const {
U * vdst = vsrc + vertexOffset + start;
for (int i=start+tableOffset; i<end+tableOffset; ++i, ++vdst ) {
vdst->Clear();
int h = this->_V_ITa[5*i ], // offset of the vertices in the _V_IT array
p = this->_V_ITa[5*i+2]; // index of the parent vertex
vdst->AddWithWeight( vsrc[p], 0.5f );
for (int j=0; j<8; ++j, ++h)
vdst->AddWithWeight( vsrc[this->_V_IT[h]], 0.0625f );
vdst->AddVaryingWithWeight( vsrc[p], 1.0f );
}
}
// single-pass kernel handling irregular k_Smooth and k_Dart rules
template <class U> void
FarSubdivisionTables::computeCatmarkRestrictedVertexPointsB2( int vertexOffset, int tableOffset, int start, int end, U * vsrc ) const {
U * vdst = vsrc + vertexOffset + start;
for (int i=start+tableOffset; i<end+tableOffset; ++i, ++vdst ) {
vdst->Clear();
int h = this->_V_ITa[5*i ], // offset of the vertices in the _V_IT array
n = this->_V_ITa[5*i+1], // number of vertices in the _V_IT array (valence)
p = this->_V_ITa[5*i+2]; // index of the parent vertex
float wp = 1.0f/(n*n),
wv = (n-2.0f)*n*wp;
vdst->AddWithWeight( vsrc[p], wv );
for (int j=0; j<n; ++j) {
vdst->AddWithWeight( vsrc[this->_V_IT[h+j*2 ]], wp );
vdst->AddWithWeight( vsrc[this->_V_IT[h+j*2+1]], wp );
}
vdst->AddVaryingWithWeight( vsrc[p], 1.0f );
}
}
//
// Edge-vertices compute Kernel - completely re-entrant
//

View File

@ -62,11 +62,13 @@ protected:
template <class X, class Y> friend class FarMeshFactory;
typedef bool (*CompareVerticesOperator)(const HbrVertex<T> *, const HbrVertex<T> *);
// This factory accumulates vertex topology data that will be shared among the
// specialized subdivision scheme factories (Bilinear / Catmark / Loop).
// It also populates the FarMeshFactory vertex remapping vector that ties the
// Hbr vertex indices to the FarVertexEdit tables.
FarSubdivisionTablesFactory( HbrMesh<T> const * mesh, int maxlevel, std::vector<int> & remapTable );
FarSubdivisionTablesFactory( HbrMesh<T> const * mesh, int maxlevel, std::vector<int> & remapTable, CompareVerticesOperator compareVertices = CompareVertices );
// Returns the number of coarse vertices found in the mesh
int GetNumCoarseVertices() const {
@ -108,6 +110,15 @@ protected:
bool HasFractionalEdgeSharpness() const { return _hasFractionalEdgeSharpness; }
bool HasFractionalVertexSharpness() const { return _hasFractionalVertexSharpness; }
// Compares vertices based on their topological configuration
// (see subdivisionTables::GetMaskRanking for more details)
static bool CompareVertices( HbrVertex<T> const *x, HbrVertex<T> const *y );
// Compare vertices operator
CompareVerticesOperator _compareVertices;
// Per-level counters and offsets for each type of vertex (face,edge,vert)
std::vector<int> _faceVertIdx,
_edgeVertIdx,
@ -130,8 +141,9 @@ protected:
// Number of coarse triangle faces
int _numCoarseTriangleFaces;
// Indicates if an edge has a fractional (non-integer) sharpness
bool _hasFractionalEdgeSharpness;
// Indicates if an edge or vertex has a fractional (non-integer) sharpness
bool _hasFractionalEdgeSharpness,
_hasFractionalVertexSharpness;
private:
@ -142,14 +154,11 @@ private:
// Sums the number of adjacent vertices required to interpolate a Vert-Vertex
static int sumVertVertexValence(HbrVertex<T> * vertex);
// Compares vertices based on their topological configuration
// (see subdivisionTables::GetMaskRanking for more details)
static bool compareVertices( HbrVertex<T> const *x, HbrVertex<T> const *y );
};
template <class T, class U>
FarSubdivisionTablesFactory<T,U>::FarSubdivisionTablesFactory( HbrMesh<T> const * mesh, int maxlevel, std::vector<int> & remapTable ) :
FarSubdivisionTablesFactory<T,U>::FarSubdivisionTablesFactory( HbrMesh<T> const * mesh, int maxlevel, std::vector<int> & remapTable, CompareVerticesOperator compareVertices ) :
_compareVertices(compareVertices),
_faceVertIdx(maxlevel+1,0),
_edgeVertIdx(maxlevel+1,0),
_vertVertIdx(maxlevel+1,0),
@ -161,7 +170,8 @@ FarSubdivisionTablesFactory<T,U>::FarSubdivisionTablesFactory( HbrMesh<T> const
_minCoarseFaceValence(0),
_maxCoarseFaceValence(0),
_numCoarseTriangleFaces(0),
_hasFractionalEdgeSharpness(false)
_hasFractionalEdgeSharpness(false),
_hasFractionalVertexSharpness(false)
{
assert( mesh );
@ -215,6 +225,9 @@ FarSubdivisionTablesFactory<T,U>::FarSubdivisionTablesFactory( HbrMesh<T> const
} else if (v->GetParentVertex()) {
vertCounts[depth]++;
_vertVertsValenceSum+=sumVertVertexValence(v);
float sharpness = v->GetParentVertex()->GetSharpness();
if (sharpness > 0.0f && sharpness < 1.0f)
_hasFractionalVertexSharpness = true;
}
}
@ -278,7 +291,7 @@ FarSubdivisionTablesFactory<T,U>::FarSubdivisionTablesFactory( HbrMesh<T> const
// mask. The masks combinations are ordered so as to minimize the compute
// kernel switching.(see subdivisionTables::GetMaskRanking for more details)
for (size_t i=1; i<_vertVertsList.size(); ++i)
std::sort( _vertVertsList[i].begin(), _vertVertsList[i].end(), compareVertices );
std::sort( _vertVertsList[i].begin(), _vertVertsList[i].end(), _compareVertices );
// These vertices still need a remapped index
@ -384,7 +397,7 @@ FarSubdivisionTablesFactory<T,U>::sumVertVertexValence(HbrVertex<T> * vertex) {
// The vertices should be sorted so as to minimize the number execution calls of
// these kernels to match the 2 pass interpolation scheme used in Hbr.
template <class T, class U> bool
FarSubdivisionTablesFactory<T,U>::compareVertices( HbrVertex<T> const * x, HbrVertex<T> const * y ) {
FarSubdivisionTablesFactory<T,U>::CompareVertices( HbrVertex<T> const * x, HbrVertex<T> const * y ) {
// Masks of the parent vertex decide for the current vertex.
HbrVertex<T> * px=x->GetParentVertex(),
@ -604,6 +617,9 @@ FarSubdivisionTablesFactory<T,U>::Splice(FarMeshVector const &meshes, FarKernelB
} else if (batch._kernelType == FarKernelBatch::CATMARK_VERT_VERTEX_A1 or
batch._kernelType == FarKernelBatch::CATMARK_VERT_VERTEX_A2 or
batch._kernelType == FarKernelBatch::CATMARK_VERT_VERTEX_B or
batch._kernelType == FarKernelBatch::CATMARK_RESTRICTED_VERT_VERTEX_A or
batch._kernelType == FarKernelBatch::CATMARK_RESTRICTED_VERT_VERTEX_B1 or
batch._kernelType == FarKernelBatch::CATMARK_RESTRICTED_VERT_VERTEX_B2 or
batch._kernelType == FarKernelBatch::LOOP_VERT_VERTEX_A1 or
batch._kernelType == FarKernelBatch::LOOP_VERT_VERTEX_A2 or
batch._kernelType == FarKernelBatch::LOOP_VERT_VERTEX_B or

View File

@ -390,6 +390,94 @@ OsdCLComputeController::ApplyCatmarkVertexVerticesKernelA2(
CL_CHECK_ERROR(ciErrNum, "vertex kernel A2 %d\n", ciErrNum);
}
void
OsdCLComputeController::ApplyCatmarkRestrictedVertexVerticesKernelB1(
FarKernelBatch const &batch, OsdCLComputeContext const *context) const {
assert(context);
cl_int ciErrNum;
size_t globalWorkSize[1] = { (size_t)(batch.GetEnd() - batch.GetStart()) };
cl_kernel kernel = _currentBindState.kernelBundle->GetCatmarkRestrictedVertexKernelB1();
cl_mem V_ITa = context->GetTable(FarSubdivisionTables::V_ITa)->GetDevicePtr();
cl_mem V_IT = context->GetTable(FarSubdivisionTables::V_IT)->GetDevicePtr();
clSetKernelArg(kernel, 0, sizeof(cl_mem), &_currentBindState.vertexBuffer);
clSetKernelArg(kernel, 1, sizeof(cl_mem), &_currentBindState.varyingBuffer);
clSetKernelArg(kernel, 2, sizeof(cl_mem), &V_ITa);
clSetKernelArg(kernel, 3, sizeof(cl_mem), &V_IT);
clSetKernelArg(kernel, 4, sizeof(int), &_currentBindState.vertexDesc.offset);
clSetKernelArg(kernel, 5, sizeof(int), &_currentBindState.varyingDesc.offset);
clSetKernelArg(kernel, 6, sizeof(int), batch.GetVertexOffsetPtr());
clSetKernelArg(kernel, 7, sizeof(int), batch.GetTableOffsetPtr());
clSetKernelArg(kernel, 8, sizeof(int), batch.GetStartPtr());
clSetKernelArg(kernel, 9, sizeof(int), batch.GetEndPtr());
ciErrNum = clEnqueueNDRangeKernel(_clQueue,
kernel, 1, NULL, globalWorkSize,
NULL, 0, NULL, NULL);
CL_CHECK_ERROR(ciErrNum, "restricted vertex kernel B1 %d\n", ciErrNum);
}
void
OsdCLComputeController::ApplyCatmarkRestrictedVertexVerticesKernelB2(
FarKernelBatch const &batch, OsdCLComputeContext const *context) const {
assert(context);
cl_int ciErrNum;
size_t globalWorkSize[1] = { (size_t)(batch.GetEnd() - batch.GetStart()) };
cl_kernel kernel = _currentBindState.kernelBundle->GetCatmarkRestrictedVertexKernelB2();
cl_mem V_ITa = context->GetTable(FarSubdivisionTables::V_ITa)->GetDevicePtr();
cl_mem V_IT = context->GetTable(FarSubdivisionTables::V_IT)->GetDevicePtr();
clSetKernelArg(kernel, 0, sizeof(cl_mem), &_currentBindState.vertexBuffer);
clSetKernelArg(kernel, 1, sizeof(cl_mem), &_currentBindState.varyingBuffer);
clSetKernelArg(kernel, 2, sizeof(cl_mem), &V_ITa);
clSetKernelArg(kernel, 3, sizeof(cl_mem), &V_IT);
clSetKernelArg(kernel, 4, sizeof(int), &_currentBindState.vertexDesc.offset);
clSetKernelArg(kernel, 5, sizeof(int), &_currentBindState.varyingDesc.offset);
clSetKernelArg(kernel, 6, sizeof(int), batch.GetVertexOffsetPtr());
clSetKernelArg(kernel, 7, sizeof(int), batch.GetTableOffsetPtr());
clSetKernelArg(kernel, 8, sizeof(int), batch.GetStartPtr());
clSetKernelArg(kernel, 9, sizeof(int), batch.GetEndPtr());
ciErrNum = clEnqueueNDRangeKernel(_clQueue,
kernel, 1, NULL, globalWorkSize,
NULL, 0, NULL, NULL);
CL_CHECK_ERROR(ciErrNum, "restricted vertex kernel B2 %d\n", ciErrNum);
}
void
OsdCLComputeController::ApplyCatmarkRestrictedVertexVerticesKernelA(
FarKernelBatch const &batch, OsdCLComputeContext const *context) const {
assert(context);
cl_int ciErrNum;
size_t globalWorkSize[1] = { (size_t)(batch.GetEnd() - batch.GetStart()) };
cl_kernel kernel = _currentBindState.kernelBundle->GetCatmarkRestrictedVertexKernelA();
cl_mem V_ITa = context->GetTable(FarSubdivisionTables::V_ITa)->GetDevicePtr();
clSetKernelArg(kernel, 0, sizeof(cl_mem), &_currentBindState.vertexBuffer);
clSetKernelArg(kernel, 1, sizeof(cl_mem), &_currentBindState.varyingBuffer);
clSetKernelArg(kernel, 2, sizeof(cl_mem), &V_ITa);
clSetKernelArg(kernel, 3, sizeof(int), &_currentBindState.vertexDesc.offset);
clSetKernelArg(kernel, 4, sizeof(int), &_currentBindState.varyingDesc.offset);
clSetKernelArg(kernel, 5, sizeof(int), batch.GetVertexOffsetPtr());
clSetKernelArg(kernel, 6, sizeof(int), batch.GetTableOffsetPtr());
clSetKernelArg(kernel, 7, sizeof(int), batch.GetStartPtr());
clSetKernelArg(kernel, 8, sizeof(int), batch.GetEndPtr());
ciErrNum = clEnqueueNDRangeKernel(_clQueue,
kernel, 1, NULL, globalWorkSize,
NULL, 0, NULL, NULL);
CL_CHECK_ERROR(ciErrNum, "restricted vertex kernel A %d\n", ciErrNum);
}
void
OsdCLComputeController::ApplyLoopEdgeVerticesKernel(
FarKernelBatch const &batch, OsdCLComputeContext const *context) const {

View File

@ -151,6 +151,12 @@ protected:
void ApplyCatmarkVertexVerticesKernelA2(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkRestrictedVertexVerticesKernelB1(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkRestrictedVertexVerticesKernelB2(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkRestrictedVertexVerticesKernelA(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyLoopEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;

View File

@ -428,6 +428,107 @@ __kernel void computeVertexB(__global float *vertex,
}
}
__kernel void computeRestrictedVertexA(__global float *vertex,
__global float *varying,
__global int *V_ITa,
int vertexOffset, int varyingOffset,
int offset, int tableOffset,
int start, int end) {
int i = start + get_global_id(0) + tableOffset;
int vid = start + get_global_id(0) + offset;
int p = V_ITa[5*i+2];
int eidx0 = V_ITa[5*i+3];
int eidx1 = V_ITa[5*i+4];
vertex += vertexOffset;
varying += (varying ? varyingOffset :0);
struct Vertex dst;
clearVertex(&dst);
addWithWeight(&dst, vertex, p, 0.75f);
addWithWeight(&dst, vertex, eidx0, 0.125f);
addWithWeight(&dst, vertex, eidx1, 0.125f);
writeVertex(vertex, vid, &dst);
if (varying) {
struct Varying dstVarying;
clearVarying(&dstVarying);
addVaryingWithWeight(&dstVarying, varying, p, 1.0f);
writeVarying(varying, vid, &dstVarying);
}
}
__kernel void computeRestrictedVertexB1(__global float *vertex,
__global float *varying,
__global int *V_ITa,
__global int *V_IT,
int vertexOffset, int varyingOffset,
int offset, int tableOffset,
int start, int end) {
int i = start + get_global_id(0) + tableOffset;
int vid = start + get_global_id(0) + offset;
int h = V_ITa[5*i];
int p = V_ITa[5*i+2];
vertex += vertexOffset;
varying += (varying ? varyingOffset :0);
struct Vertex dst;
clearVertex(&dst);
addWithWeight(&dst, vertex, p, 0.5f);
for (int j = 0; j < 8; ++j, ++h) {
addWithWeight(&dst, vertex, V_IT[h], 0.0625f);
}
writeVertex(vertex, vid, &dst);
if (varying) {
struct Varying dstVarying;
clearVarying(&dstVarying);
addVaryingWithWeight(&dstVarying, varying, p, 1.0f);
writeVarying(varying, vid, &dstVarying);
}
}
__kernel void computeRestrictedVertexB2(__global float *vertex,
__global float *varying,
__global int *V_ITa,
__global int *V_IT,
int vertexOffset, int varyingOffset,
int offset, int tableOffset,
int start, int end) {
int i = start + get_global_id(0) + tableOffset;
int vid = start + get_global_id(0) + offset;
int h = V_ITa[5*i];
int n = V_ITa[5*i+1];
int p = V_ITa[5*i+2];
vertex += vertexOffset;
varying += (varying ? varyingOffset :0);
float wp = 1.0f/(float)(n*n);
float wv = (n-2.0f) * n * wp;
struct Vertex dst;
clearVertex(&dst);
addWithWeight(&dst, vertex, p, wv);
for (int j = 0; j < n; ++j) {
addWithWeight(&dst, vertex, V_IT[h+j*2], wp);
addWithWeight(&dst, vertex, V_IT[h+j*2+1], wp);
}
writeVertex(vertex, vid, &dst);
if (varying) {
struct Varying dstVarying;
clearVarying(&dstVarying);
addVaryingWithWeight(&dstVarying, varying, p, 1.0f);
writeVarying(varying, vid, &dstVarying);
}
}
__kernel void computeLoopVertexB(__global float *vertex,
__global float *varying,
__global int *V_ITa,

View File

@ -57,6 +57,9 @@ OsdCLKernelBundle::OsdCLKernelBundle() :
_clCatmarkRestrictedEdge(NULL),
_clCatmarkVertexA(NULL),
_clCatmarkVertexB(NULL),
_clCatmarkRestrictedVertexA(NULL),
_clCatmarkRestrictedVertexB1(NULL),
_clCatmarkRestrictedVertexB2(NULL),
_clLoopEdge(NULL),
_clLoopVertexA(NULL),
_clLoopVertexB(NULL),
@ -87,6 +90,12 @@ OsdCLKernelBundle::~OsdCLKernelBundle() {
clReleaseKernel(_clCatmarkVertexA);
if (_clCatmarkVertexB)
clReleaseKernel(_clCatmarkVertexB);
if (_clCatmarkRestrictedVertexA)
clReleaseKernel(_clCatmarkRestrictedVertexA);
if (_clCatmarkRestrictedVertexB1)
clReleaseKernel(_clCatmarkRestrictedVertexB1);
if (_clCatmarkRestrictedVertexB2)
clReleaseKernel(_clCatmarkRestrictedVertexB2);
if (_clLoopEdge)
clReleaseKernel(_clLoopEdge);
@ -163,6 +172,9 @@ OsdCLKernelBundle::Compile(cl_context clContext,
_clCatmarkRestrictedEdge = buildKernel(_clProgram, "computeRestrictedEdge");
_clCatmarkVertexA = buildKernel(_clProgram, "computeVertexA");
_clCatmarkVertexB = buildKernel(_clProgram, "computeVertexB");
_clCatmarkRestrictedVertexA = buildKernel(_clProgram, "computeRestrictedVertexA");
_clCatmarkRestrictedVertexB1 = buildKernel(_clProgram, "computeRestrictedVertexB1");
_clCatmarkRestrictedVertexB2 = buildKernel(_clProgram, "computeRestrictedVertexB2");
_clLoopEdge = buildKernel(_clProgram, "computeEdge");
_clLoopVertexA = buildKernel(_clProgram, "computeVertexA");
_clLoopVertexB = buildKernel(_clProgram, "computeLoopVertexB");

View File

@ -62,6 +62,12 @@ public:
cl_kernel GetCatmarkVertexKernelB() const { return _clCatmarkVertexB; }
cl_kernel GetCatmarkRestrictedVertexKernelA() const { return _clCatmarkRestrictedVertexA; }
cl_kernel GetCatmarkRestrictedVertexKernelB1() const { return _clCatmarkRestrictedVertexB1; }
cl_kernel GetCatmarkRestrictedVertexKernelB2() const { return _clCatmarkRestrictedVertexB2; }
cl_kernel GetLoopEdgeKernel() const { return _clLoopEdge; }
cl_kernel GetLoopVertexKernelA() const { return _clLoopVertexA; }
@ -104,6 +110,9 @@ protected:
_clCatmarkRestrictedEdge,
_clCatmarkVertexA,
_clCatmarkVertexB,
_clCatmarkRestrictedVertexA,
_clCatmarkRestrictedVertexB1,
_clCatmarkRestrictedVertexB2,
_clLoopEdge,
_clLoopVertexA,
_clLoopVertexB,

View File

@ -186,6 +186,47 @@ OsdCpuComputeController::ApplyCatmarkVertexVerticesKernelA2(
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(), true);
}
void
OsdCpuComputeController::ApplyCatmarkRestrictedVertexVerticesKernelB1(
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {
assert(context);
OsdCpuComputeRestrictedVertexB1(
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
(const int*)context->GetTable(FarSubdivisionTables::V_IT)->GetBuffer(),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
}
void
OsdCpuComputeController::ApplyCatmarkRestrictedVertexVerticesKernelB2(
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {
assert(context);
OsdCpuComputeRestrictedVertexB2(
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
(const int*)context->GetTable(FarSubdivisionTables::V_IT)->GetBuffer(),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
}
void
OsdCpuComputeController::ApplyCatmarkRestrictedVertexVerticesKernelA(
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {
assert(context);
OsdCpuComputeRestrictedVertexA(
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
}
void
OsdCpuComputeController::ApplyLoopEdgeVerticesKernel(
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {

View File

@ -134,6 +134,12 @@ protected:
void ApplyCatmarkVertexVerticesKernelA2(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkRestrictedVertexVerticesKernelB1(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkRestrictedVertexVerticesKernelB2(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkRestrictedVertexVerticesKernelA(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyLoopEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;

View File

@ -367,6 +367,100 @@ void OsdCpuComputeVertexB(
}
}
void OsdCpuComputeRestrictedVertexB1(
float *vertex, float *varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *V_ITa, const int *V_IT,
int vertexOffset, int tableOffset, int start, int end) {
float *vertexResults = (float*)alloca(vertexDesc.length * sizeof(float));
float *varyingResults = (float*)alloca(varyingDesc.length * sizeof(float));
for (int i = start + tableOffset; i < end + tableOffset; i++) {
int h = V_ITa[5*i];
int p = V_ITa[5*i+2];
int dstIndex = i + vertexOffset - tableOffset;
clear(vertexResults, vertexDesc);
clear(varyingResults, varyingDesc);
addWithWeight(vertexResults, vertex, p, 0.5f, vertexDesc);
for (int j = 0; j < 8; ++j, ++h)
addWithWeight(vertexResults, vertex, V_IT[h], 0.0625f, vertexDesc);
addWithWeight(varyingResults, varying, p, 1.0f, varyingDesc);
copy(vertex, vertexResults, dstIndex, vertexDesc);
copy(varying, varyingResults, dstIndex, varyingDesc);
}
}
void OsdCpuComputeRestrictedVertexB2(
float *vertex, float *varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *V_ITa, const int *V_IT,
int vertexOffset, int tableOffset, int start, int end) {
float *vertexResults = (float*)alloca(vertexDesc.length * sizeof(float));
float *varyingResults = (float*)alloca(varyingDesc.length * sizeof(float));
for (int i = start + tableOffset; i < end + tableOffset; i++) {
int h = V_ITa[5*i];
int n = V_ITa[5*i+1];
int p = V_ITa[5*i+2];
float wp = 1.0f/static_cast<float>(n*n);
float wv = (n-2.0f) * n * wp;
int dstIndex = i + vertexOffset - tableOffset;
clear(vertexResults, vertexDesc);
clear(varyingResults, varyingDesc);
addWithWeight(vertexResults, vertex, p, wv, vertexDesc);
for (int j = 0; j < n; ++j) {
addWithWeight(vertexResults, vertex, V_IT[h+j*2], wp, vertexDesc);
addWithWeight(vertexResults, vertex, V_IT[h+j*2+1], wp, vertexDesc);
}
addWithWeight(varyingResults, varying, p, 1.0f, varyingDesc);
copy(vertex, vertexResults, dstIndex, vertexDesc);
copy(varying, varyingResults, dstIndex, varyingDesc);
}
}
void OsdCpuComputeRestrictedVertexA(
float *vertex, float *varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *V_ITa,
int vertexOffset, int tableOffset, int start, int end) {
float *vertexResults = (float*)alloca(vertexDesc.length * sizeof(float));
float *varyingResults = (float*)alloca(varyingDesc.length * sizeof(float));
for (int i = start + tableOffset; i < end + tableOffset; i++) {
int p = V_ITa[5*i+2];
int eidx0 = V_ITa[5*i+3];
int eidx1 = V_ITa[5*i+4];
int dstIndex = i + vertexOffset - tableOffset;
clear(vertexResults, vertexDesc);
clear(varyingResults, varyingDesc);
addWithWeight(vertexResults, vertex, p, 0.75f, vertexDesc);
addWithWeight(vertexResults, vertex, eidx0, 0.125f, vertexDesc);
addWithWeight(vertexResults, vertex, eidx1, 0.125f, vertexDesc);
addWithWeight(varyingResults, varying, p, 1.0f, varyingDesc);
copy(vertex, vertexResults, dstIndex, vertexDesc);
copy(varying, varyingResults, dstIndex, varyingDesc);
}
}
void OsdCpuComputeLoopVertexB(
float *vertex, float *varying,
OsdVertexBufferDescriptor const &vertexDesc,

View File

@ -322,6 +322,27 @@ void OsdCpuComputeVertexB(float *vertex, float * varying,
int vertexOffset, int tableOffset,
int start, int end);
void OsdCpuComputeRestrictedVertexB1(float *vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *V_ITa, const int *V_IT,
int vertexOffset, int tableOffset,
int start, int end);
void OsdCpuComputeRestrictedVertexB2(float *vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *V_ITa, const int *V_IT,
int vertexOffset, int tableOffset,
int start, int end);
void OsdCpuComputeRestrictedVertexA(float *vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *V_ITa,
int vertexOffset, int tableOffset,
int start, int end);
template<int numVertexElements>
void ComputeLoopVertexBKernel( float *vertex,
const int *V_ITa,

View File

@ -67,6 +67,24 @@ void OsdCudaComputeVertexB(float *vertex, float *varying,
int *V_ITa, int *V_IT, float *V_W, int offset, int tableOffset,
int start, int end);
void OsdCudaComputeRestrictedVertexA(float *vertex, float *varying,
int vertexLength, int vertexStride,
int varyingLength, int varyingStride,
int *V_ITa, int offset, int tableOffset,
int start, int end);
void OsdCudaComputeRestrictedVertexB1(float *vertex, float *varying,
int vertexLength, int vertexStride,
int varyingLength, int varyingStride,
int *V_ITa, int *V_IT, int offset, int tableOffset,
int start, int end);
void OsdCudaComputeRestrictedVertexB2(float *vertex, float *varying,
int vertexLength, int vertexStride,
int varyingLength, int varyingStride,
int *V_ITa, int *V_IT, int offset, int tableOffset,
int start, int end);
void OsdCudaComputeLoopVertexB(float *vertex, float *varying,
int vertexLength, int vertexStride,
int varyingLength, int varyingStride,
@ -341,6 +359,72 @@ OsdCudaComputeController::ApplyCatmarkVertexVerticesKernelA2(
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(), true);
}
void
OsdCudaComputeController::ApplyCatmarkRestrictedVertexVerticesKernelB1(
FarKernelBatch const &batch, OsdCudaComputeContext const *context) const {
assert(context);
const OsdCudaTable * V_ITa = context->GetTable(FarSubdivisionTables::V_ITa);
const OsdCudaTable * V_IT = context->GetTable(FarSubdivisionTables::V_IT);
assert(V_ITa);
assert(V_IT);
float *vertex = _currentBindState.GetOffsettedVertexBuffer();
float *varying = _currentBindState.GetOffsettedVaryingBuffer();
OsdCudaComputeRestrictedVertexB1(
vertex, varying,
_currentBindState.vertexDesc.length, _currentBindState.vertexDesc.stride,
_currentBindState.varyingDesc.length, _currentBindState.varyingDesc.stride,
static_cast<int*>(V_ITa->GetCudaMemory()),
static_cast<int*>(V_IT->GetCudaMemory()),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
}
void
OsdCudaComputeController::ApplyCatmarkRestrictedVertexVerticesKernelB2(
FarKernelBatch const &batch, OsdCudaComputeContext const *context) const {
assert(context);
const OsdCudaTable * V_ITa = context->GetTable(FarSubdivisionTables::V_ITa);
const OsdCudaTable * V_IT = context->GetTable(FarSubdivisionTables::V_IT);
assert(V_ITa);
assert(V_IT);
float *vertex = _currentBindState.GetOffsettedVertexBuffer();
float *varying = _currentBindState.GetOffsettedVaryingBuffer();
OsdCudaComputeRestrictedVertexB2(
vertex, varying,
_currentBindState.vertexDesc.length, _currentBindState.vertexDesc.stride,
_currentBindState.varyingDesc.length, _currentBindState.varyingDesc.stride,
static_cast<int*>(V_ITa->GetCudaMemory()),
static_cast<int*>(V_IT->GetCudaMemory()),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
}
void
OsdCudaComputeController::ApplyCatmarkRestrictedVertexVerticesKernelA(
FarKernelBatch const &batch, OsdCudaComputeContext const *context) const {
assert(context);
const OsdCudaTable * V_ITa = context->GetTable(FarSubdivisionTables::V_ITa);
assert(V_ITa);
float *vertex = _currentBindState.GetOffsettedVertexBuffer();
float *varying = _currentBindState.GetOffsettedVaryingBuffer();
OsdCudaComputeRestrictedVertexA(
vertex, varying,
_currentBindState.vertexDesc.length, _currentBindState.vertexDesc.stride,
_currentBindState.varyingDesc.length, _currentBindState.varyingDesc.stride,
static_cast<int*>(V_ITa->GetCudaMemory()),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
}
void
OsdCudaComputeController::ApplyLoopEdgeVerticesKernel(
FarKernelBatch const &batch, OsdCudaComputeContext const *context) const {

View File

@ -134,6 +134,12 @@ protected:
void ApplyCatmarkVertexVerticesKernelA2(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkRestrictedVertexVerticesKernelB1(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkRestrictedVertexVerticesKernelB2(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkRestrictedVertexVerticesKernelA(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyLoopEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;

View File

@ -619,6 +619,192 @@ computeVertexB(float *fVertex, float *fVarying,
}
}
template <int NUM_VERTEX_ELEMENTS, int NUM_VARYING_ELEMENTS> __global__ void
computeRestrictedVertexA(float *fVertex, float *fVaryings, int *V0_ITa, int offset, int tableOffset, int start, int end)
{
DeviceVertex<NUM_VERTEX_ELEMENTS> *vertex = (DeviceVertex<NUM_VERTEX_ELEMENTS>*)fVertex;
DeviceVertex<NUM_VARYING_ELEMENTS> *varyings = (DeviceVertex<NUM_VARYING_ELEMENTS>*)fVaryings;
for (int i = start + tableOffset + threadIdx.x + blockIdx.x*blockDim.x;
i < end+tableOffset;
i += blockDim.x * gridDim.x) {
int p = V0_ITa[5*i+2];
int eidx0 = V0_ITa[5*i+3];
int eidx1 = V0_ITa[5*i+4];
DeviceVertex<NUM_VERTEX_ELEMENTS> dst;
dst.clear();
dst.addWithWeight(&vertex[p], 0.75f);
dst.addWithWeight(&vertex[eidx0], 0.125f);
dst.addWithWeight(&vertex[eidx1], 0.125f);
vertex[i+offset-tableOffset] = dst;
if(NUM_VARYING_ELEMENTS > 0){
DeviceVertex<NUM_VARYING_ELEMENTS> dstVarying;
dstVarying.clear();
dstVarying.addWithWeight(&varyings[p], 1.0f);
varyings[i+offset-tableOffset] = dstVarying;
}
}
}
__global__ void
computeRestrictedVertexA(float *fVertex, float *fVaryings,
int vertexLength, int vertexStride,
int varyingLength, int varyingStride,
int *V0_ITa, int offset, int tableOffset, int start, int end)
{
for (int i = start + tableOffset + threadIdx.x + blockIdx.x*blockDim.x;
i < end + tableOffset;
i += blockDim.x * gridDim.x){
int p = V0_ITa[5*i+2];
int eidx0 = V0_ITa[5*i+3];
int eidx1 = V0_ITa[5*i+4];
float *dstVertex = fVertex + (i+offset-tableOffset)*vertexStride;
clear(dstVertex, vertexLength);
addWithWeight(dstVertex, fVertex + p*vertexStride, 0.75f, vertexLength);
addWithWeight(dstVertex, fVertex + eidx0*vertexStride, 0.125f, vertexLength);
addWithWeight(dstVertex, fVertex + eidx1*vertexStride, 0.125f, vertexLength);
if(varyingLength > 0){
float *dstVarying = fVaryings + (i+offset-tableOffset)*varyingStride;
clear(dstVarying, varyingLength);
addWithWeight(dstVarying, fVaryings + p*varyingStride, 1.0f, varyingLength);
}
}
}
template <int NUM_VERTEX_ELEMENTS, int NUM_VARYING_ELEMENTS> __global__ void
computeRestrictedVertexB1(float *fVertex, float *fVaryings,
const int *V0_ITa, const int *V0_IT, int offset, int tableOffset, int start, int end)
{
DeviceVertex<NUM_VERTEX_ELEMENTS> *vertex = (DeviceVertex<NUM_VERTEX_ELEMENTS>*)fVertex;
DeviceVertex<NUM_VARYING_ELEMENTS> *varyings = (DeviceVertex<NUM_VARYING_ELEMENTS>*)fVaryings;
for (int i = start + tableOffset + threadIdx.x + blockIdx.x*blockDim.x;
i < end + tableOffset;
i += blockDim.x * gridDim.x) {
int h = V0_ITa[5*i];
int p = V0_ITa[5*i+2];
DeviceVertex<NUM_VERTEX_ELEMENTS> dst;
dst.clear();
dst.addWithWeight(&vertex[p], 0.5f);
for (int j = 0; j < 8; ++j)
dst.addWithWeight(&vertex[V0_IT[h+j]], 0.0625f);
vertex[i+offset-tableOffset] = dst;
if(NUM_VARYING_ELEMENTS > 0){
DeviceVertex<NUM_VARYING_ELEMENTS> dstVarying;
dstVarying.clear();
dstVarying.addWithWeight(&varyings[p], 1.0f);
varyings[i+offset-tableOffset] = dstVarying;
}
}
}
__global__ void
computeRestrictedVertexB1(float *fVertex, float *fVarying,
int vertexLength, int vertexStride,
int varyingLength, int varyingStride,
const int *V0_ITa, const int *V0_IT, int offset, int tableOffset, int start, int end)
{
for (int i = start + tableOffset + threadIdx.x + blockIdx.x*blockDim.x;
i < end + tableOffset;
i += blockDim.x * gridDim.x) {
int h = V0_ITa[5*i];
int p = V0_ITa[5*i+2];
float *dstVertex = fVertex + (i+offset-tableOffset)*vertexStride;
clear(dstVertex, vertexLength);
addWithWeight(dstVertex, fVertex + p*vertexStride, 0.5f, vertexLength);
for (int j = 0; j < 8; ++j)
addWithWeight(dstVertex, fVertex + V0_IT[h+j]*vertexStride, 0.0625f, vertexLength);
if (varyingLength > 0) {
float *dstVarying = fVarying + (i+offset-tableOffset)*varyingStride;
clear(dstVarying, varyingLength);
addWithWeight(dstVarying, fVarying + p*varyingStride, 1.0f, varyingLength);
}
}
}
template <int NUM_VERTEX_ELEMENTS, int NUM_VARYING_ELEMENTS> __global__ void
computeRestrictedVertexB2(float *fVertex, float *fVaryings,
const int *V0_ITa, const int *V0_IT, int offset, int tableOffset, int start, int end)
{
DeviceVertex<NUM_VERTEX_ELEMENTS> *vertex = (DeviceVertex<NUM_VERTEX_ELEMENTS>*)fVertex;
DeviceVertex<NUM_VARYING_ELEMENTS> *varyings = (DeviceVertex<NUM_VARYING_ELEMENTS>*)fVaryings;
for (int i = start + tableOffset + threadIdx.x + blockIdx.x*blockDim.x;
i < end + tableOffset;
i += blockDim.x * gridDim.x) {
int h = V0_ITa[5*i];
int n = V0_ITa[5*i+1];
int p = V0_ITa[5*i+2];
float wp = 1.0f/float(n*n);
float wv = (n-2.0f) * n * wp;
DeviceVertex<NUM_VERTEX_ELEMENTS> dst;
dst.clear();
dst.addWithWeight(&vertex[p], wv);
for (int j = 0; j < n; ++j) {
dst.addWithWeight(&vertex[V0_IT[h+j*2]], wp);
dst.addWithWeight(&vertex[V0_IT[h+j*2+1]], wp);
}
vertex[i+offset-tableOffset] = dst;
if(NUM_VARYING_ELEMENTS > 0){
DeviceVertex<NUM_VARYING_ELEMENTS> dstVarying;
dstVarying.clear();
dstVarying.addWithWeight(&varyings[p], 1.0f);
varyings[i+offset-tableOffset] = dstVarying;
}
}
}
__global__ void
computeRestrictedVertexB2(float *fVertex, float *fVarying,
int vertexLength, int vertexStride,
int varyingLength, int varyingStride,
const int *V0_ITa, const int *V0_IT, int offset, int tableOffset, int start, int end)
{
for (int i = start + tableOffset + threadIdx.x + blockIdx.x*blockDim.x;
i < end + tableOffset;
i += blockDim.x * gridDim.x) {
int h = V0_ITa[5*i];
int n = V0_ITa[5*i+1];
int p = V0_ITa[5*i+2];
float wp = 1.0f/float(n*n);
float wv = (n-2.0f) * n * wp;
float *dstVertex = fVertex + (i+offset-tableOffset)*vertexStride;
clear(dstVertex, vertexLength);
addWithWeight(dstVertex, fVertex + p*vertexStride, wv, vertexLength);
for (int j = 0; j < n; ++j) {
addWithWeight(dstVertex, fVertex + V0_IT[h+j*2]*vertexStride, wp, vertexLength);
addWithWeight(dstVertex, fVertex + V0_IT[h+j*2+1]*vertexStride, wp, vertexLength);
}
if (varyingLength > 0) {
float *dstVarying = fVarying + (i+offset-tableOffset)*varyingStride;
clear(dstVarying, varyingLength);
addWithWeight(dstVarying, fVarying + p*varyingStride, 1.0f, varyingLength);
}
}
}
// --------------------------------------------------------------------------------------------
@ -960,6 +1146,54 @@ void OsdCudaComputeVertexB(float *vertex, float *varying,
V_ITa, V_IT, V_W, offset, tableOffset, start, end);
}
void OsdCudaComputeRestrictedVertexA(float *vertex, float *varying,
int vertexLength, int vertexStride,
int varyingLength, int varyingStride,
int *V_ITa, int offset, int tableOffset, int start, int end, int pass)
{
// computeRestrictedVertexA<0, 3><<<512,32>>>(vertex, varying, V_ITa, offset, start, end);
OPT_KERNEL(0, 0, computeRestrictedVertexA, 512, 32, (vertex, varying, V_ITa, offset, tableOffset, start, end));
OPT_KERNEL(0, 3, computeRestrictedVertexA, 512, 32, (vertex, varying, V_ITa, offset, tableOffset, start, end));
OPT_KERNEL(3, 0, computeRestrictedVertexA, 512, 32, (vertex, varying, V_ITa, offset, tableOffset, start, end));
OPT_KERNEL(3, 3, computeRestrictedVertexA, 512, 32, (vertex, varying, V_ITa, offset, tableOffset, start, end));
computeRestrictedVertexA<<<512, 32>>>(vertex, varying,
vertexLength, vertexStride, varyingLength, varyingStride,
V_ITa, offset, tableOffset, start, end);
}
void OsdCudaComputeRestrictedVertexB1(float *vertex, float *varying,
int vertexLength, int vertexStride,
int varyingLength, int varyingStride,
int *V_ITa, int *V_IT, int offset, int tableOffset, int start, int end)
{
// computeRestrictedVertexB1<0, 3><<<512,32>>>(vertex, varying, V_ITa, V_IT, offset, start, end);
OPT_KERNEL(0, 0, computeRestrictedVertexB1, 512, 32, (vertex, varying, V_ITa, V_IT, offset, tableOffset, start, end));
OPT_KERNEL(0, 3, computeRestrictedVertexB1, 512, 32, (vertex, varying, V_ITa, V_IT, offset, tableOffset, start, end));
OPT_KERNEL(3, 0, computeRestrictedVertexB1, 512, 32, (vertex, varying, V_ITa, V_IT, offset, tableOffset, start, end));
OPT_KERNEL(3, 3, computeRestrictedVertexB1, 512, 32, (vertex, varying, V_ITa, V_IT, offset, tableOffset, start, end));
computeRestrictedVertexB1 <<<512, 32>>>(vertex, varying,
vertexLength, vertexStride, varyingLength, varyingStride,
V_ITa, V_IT, offset, tableOffset, start, end);
}
void OsdCudaComputeRestrictedVertexB2(float *vertex, float *varying,
int vertexLength, int vertexStride,
int varyingLength, int varyingStride,
int *V_ITa, int *V_IT, int offset, int tableOffset, int start, int end)
{
// computeRestrictedVertexB2<0, 3><<<512,32>>>(vertex, varying, V_ITa, V_IT, offset, start, end);
OPT_KERNEL(0, 0, computeRestrictedVertexB2, 512, 32, (vertex, varying, V_ITa, V_IT, offset, tableOffset, start, end));
OPT_KERNEL(0, 3, computeRestrictedVertexB2, 512, 32, (vertex, varying, V_ITa, V_IT, offset, tableOffset, start, end));
OPT_KERNEL(3, 0, computeRestrictedVertexB2, 512, 32, (vertex, varying, V_ITa, V_IT, offset, tableOffset, start, end));
OPT_KERNEL(3, 3, computeRestrictedVertexB2, 512, 32, (vertex, varying, V_ITa, V_IT, offset, tableOffset, start, end));
computeRestrictedVertexB2 <<<512, 32>>>(vertex, varying,
vertexLength, vertexStride, varyingLength, varyingStride,
V_ITa, V_IT, offset, tableOffset, start, end);
}
void OsdCudaComputeLoopVertexB(float *vertex, float *varying,
int vertexLength, int vertexStride,
int varyingLength, int varyingStride,

View File

@ -247,6 +247,42 @@ OsdD3D11ComputeController::ApplyCatmarkVertexVerticesKernelA2(
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset);
}
void
OsdD3D11ComputeController::ApplyCatmarkRestrictedVertexVerticesKernelB1(
FarKernelBatch const &batch, OsdD3D11ComputeContext const *context) const {
assert(context);
_currentBindState.kernelBundle->ApplyCatmarkRestrictedVertexVerticesKernelB1(
batch.GetVertexOffset(), batch.GetTableOffset(),
batch.GetStart(), batch.GetEnd(),
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset);
}
void
OsdD3D11ComputeController::ApplyCatmarkRestrictedVertexVerticesKernelB2(
FarKernelBatch const &batch, OsdD3D11ComputeContext const *context) const {
assert(context);
_currentBindState.kernelBundle->ApplyCatmarkRestrictedVertexVerticesKernelB2(
batch.GetVertexOffset(), batch.GetTableOffset(),
batch.GetStart(), batch.GetEnd(),
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset);
}
void
OsdD3D11ComputeController::ApplyCatmarkRestrictedVertexVerticesKernelA(
FarKernelBatch const &batch, OsdD3D11ComputeContext const *context) const {
assert(context);
_currentBindState.kernelBundle->ApplyCatmarkRestrictedVertexVerticesKernelA(
batch.GetVertexOffset(), batch.GetTableOffset(),
batch.GetStart(), batch.GetEnd(),
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset);
}
void
OsdD3D11ComputeController::ApplyLoopEdgeVerticesKernel(
FarKernelBatch const &batch, OsdD3D11ComputeContext const *context) const {

View File

@ -150,6 +150,12 @@ protected:
void ApplyCatmarkVertexVerticesKernelA2(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkRestrictedVertexVerticesKernelB1(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkRestrictedVertexVerticesKernelB2(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkRestrictedVertexVerticesKernelA(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyLoopEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;

View File

@ -60,6 +60,9 @@ OsdD3D11ComputeKernelBundle::OsdD3D11ComputeKernelBundle(
_kernelComputeVertex(0),
_kernelComputeVertexA(0),
_kernelComputeCatmarkVertexB(0),
_kernelComputeCatmarkRestrictedVertexA(0),
_kernelComputeCatmarkRestrictedVertexB1(0),
_kernelComputeCatmarkRestrictedVertexB2(0),
_kernelComputeLoopVertexB(0),
_kernelEditAdd(0) {
@ -80,6 +83,9 @@ OsdD3D11ComputeKernelBundle::~OsdD3D11ComputeKernelBundle() {
SAFE_RELEASE(_kernelComputeVertex);
SAFE_RELEASE(_kernelComputeVertexA);
SAFE_RELEASE(_kernelComputeCatmarkVertexB);
SAFE_RELEASE(_kernelComputeCatmarkRestrictedVertexA);
SAFE_RELEASE(_kernelComputeCatmarkRestrictedVertexB1);
SAFE_RELEASE(_kernelComputeCatmarkRestrictedVertexB2);
SAFE_RELEASE(_kernelComputeLoopVertexB);
SAFE_RELEASE(_kernelEditAdd);
}
@ -193,6 +199,15 @@ OsdD3D11ComputeKernelBundle::Compile(
_classLinkage->GetClassInstance(
"catmarkComputeVertexB", 0, &_kernelComputeCatmarkVertexB);
assert(_kernelComputeCatmarkVertexB);
_classLinkage->GetClassInstance(
"catmarkComputeRestrictedVertexA", 0, &_kernelComputeCatmarkRestrictedVertexA);
assert(_kernelComputeCatmarkRestrictedVertexA);
_classLinkage->GetClassInstance(
"catmarkComputeRestrictedVertexB1", 0, &_kernelComputeCatmarkRestrictedVertexB1);
assert(_kernelComputeCatmarkRestrictedVertexB1);
_classLinkage->GetClassInstance(
"catmarkComputeRestrictedVertexB2", 0, &_kernelComputeCatmarkRestrictedVertexB2);
assert(_kernelComputeCatmarkRestrictedVertexA);
_classLinkage->GetClassInstance(
"loopComputeVertexB", 0, &_kernelComputeLoopVertexB);
assert(_kernelComputeLoopVertexB);
@ -414,6 +429,54 @@ OsdD3D11ComputeKernelBundle::ApplyCatmarkVertexVerticesKernelA(
dispatchCompute(_kernelComputeVertexA, args);
}
void
OsdD3D11ComputeKernelBundle::ApplyCatmarkRestrictedVertexVerticesKernelB1(
int vertexOffset, int tableOffset, int start, int end,
int vertexBaseOffset, int varyingBaseOffset) {
KernelCB args;
ZeroMemory(&args, sizeof(args));
args.vertexOffset = vertexOffset;
args.tableOffset = tableOffset;
args.indexStart = start;
args.indexEnd = end;
args.vertexBaseOffset = vertexBaseOffset;
args.varyingBaseOffset = varyingBaseOffset;
dispatchCompute(_kernelComputeCatmarkRestrictedVertexB1, args);
}
void
OsdD3D11ComputeKernelBundle::ApplyCatmarkRestrictedVertexVerticesKernelB2(
int vertexOffset, int tableOffset, int start, int end,
int vertexBaseOffset, int varyingBaseOffset) {
KernelCB args;
ZeroMemory(&args, sizeof(args));
args.vertexOffset = vertexOffset;
args.tableOffset = tableOffset;
args.indexStart = start;
args.indexEnd = end;
args.vertexBaseOffset = vertexBaseOffset;
args.varyingBaseOffset = varyingBaseOffset;
dispatchCompute(_kernelComputeCatmarkRestrictedVertexB2, args);
}
void
OsdD3D11ComputeKernelBundle::ApplyCatmarkRestrictedVertexVerticesKernelA(
int vertexOffset, int tableOffset, int start, int end,
int vertexBaseOffset, int varyingBaseOffset) {
KernelCB args;
ZeroMemory(&args, sizeof(args));
args.vertexOffset = vertexOffset;
args.tableOffset = tableOffset;
args.indexStart = start;
args.indexEnd = end;
args.vertexBaseOffset = vertexBaseOffset;
args.varyingBaseOffset = varyingBaseOffset;
dispatchCompute(_kernelComputeCatmarkRestrictedVertexA, args);
}
void
OsdD3D11ComputeKernelBundle::ApplyLoopEdgeVerticesKernel(
int vertexOffset, int tableOffset, int start, int end,

View File

@ -91,6 +91,18 @@ public:
int vertexOffset, int tableOffset, int start, int end, bool pass,
int vertexBaseOffset, int varyingBaseOffset);
void ApplyCatmarkRestrictedVertexVerticesKernelB1(
int vertexOffset, int tableOffset, int start, int end,
int vertexBaseOffset, int varyingBaseOffset);
void ApplyCatmarkRestrictedVertexVerticesKernelB2(
int vertexOffset, int tableOffset, int start, int end,
int vertexBaseOffset, int varyingBaseOffset);
void ApplyCatmarkRestrictedVertexVerticesKernelA(
int vertexOffset, int tableOffset, int start, int end,
int vertexBaseOffset, int varyingBaseOffset);
void ApplyLoopEdgeVerticesKernel(
int vertexOffset, int tableOffset, int start, int end,
int vertexBaseOffset, int varyingBaseOffset);
@ -160,6 +172,12 @@ protected:
ID3D11ClassInstance * _kernelComputeCatmarkVertexB; // vertex-vertex kernel B (catmark scheme)
ID3D11ClassInstance * _kernelComputeCatmarkRestrictedVertexA; // restricted vertex-vertex kernel A (catmark scheme)
ID3D11ClassInstance * _kernelComputeCatmarkRestrictedVertexB1; // restricted vertex-vertex kernel B1 (catmark scheme)
ID3D11ClassInstance * _kernelComputeCatmarkRestrictedVertexB2; // restricted vertex-vertex kernel B2 (catmark scheme)
ID3D11ClassInstance * _kernelComputeLoopVertexB; // vertex-vertex kernel B (loop scheme)
ID3D11ClassInstance * _kernelEditAdd; // hedit kernel (add)

View File

@ -195,6 +195,50 @@ OsdGcdComputeController::ApplyCatmarkVertexVerticesKernelA2(
_gcd_queue);
}
void
OsdGcdComputeController::ApplyCatmarkRestrictedVertexVerticesKernelB1(
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {
assert(context);
OsdGcdComputeRestrictedVertexB1(
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
(const int*)context->GetTable(FarSubdivisionTables::V_IT)->GetBuffer(),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(),
_gcd_queue);
}
void
OsdGcdComputeController::ApplyCatmarkRestrictedVertexVerticesKernelB2(
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {
assert(context);
OsdGcdComputeRestrictedVertexB2(
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
(const int*)context->GetTable(FarSubdivisionTables::V_IT)->GetBuffer(),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(),
_gcd_queue);
}
void
OsdGcdComputeController::ApplyCatmarkRestrictedVertexVerticesKernelA(
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {
assert(context);
OsdGcdComputeRestrictedVertexA(
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(),
_gcd_queue);
}
void
OsdGcdComputeController::ApplyLoopEdgeVerticesKernel(
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {

View File

@ -134,6 +134,11 @@ protected:
void ApplyCatmarkVertexVerticesKernelA2(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkRestrictedVertexVerticesKernelB1(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkRestrictedVertexVerticesKernelB2(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkRestrictedVertexVerticesKernelA(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyLoopEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;

View File

@ -222,6 +222,78 @@ void OsdGcdComputeVertexB(
vertexOffset, tableOffset, start_e, end_e);
}
void OsdGcdComputeRestrictedVertexA(
float * vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *V_ITa,
int vertexOffset, int tableOffset, int start, int end,
dispatch_queue_t gcdq) {
const int workSize = end-start;
dispatch_apply(workSize/GCD_WORK_STRIDE, gcdq, ^(size_t blockIdx){
const int start_i = start + blockIdx*GCD_WORK_STRIDE;
const int end_i = start_i + GCD_WORK_STRIDE;
OsdCpuComputeRestrictedVertexA(vertex, varying, vertexDesc, varyingDesc,
V_ITa,
vertexOffset, tableOffset, start_i, end_i);
});
const int start_e = end - workSize%GCD_WORK_STRIDE;
const int end_e = end;
if (start_e < end_e)
OsdCpuComputeRestrictedVertexA(vertex, varying, vertexDesc, varyingDesc,
V_ITa,
vertexOffset, tableOffset, start_e, end_e);
}
void OsdGcdComputeRestrictedVertexB1(
float * vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *V_ITa, const int *V_IT,
int vertexOffset, int tableOffset, int start, int end,
dispatch_queue_t gcdq) {
const int workSize = end-start;
dispatch_apply(workSize/GCD_WORK_STRIDE, gcdq, ^(size_t blockIdx){
const int start_i = start + blockIdx*GCD_WORK_STRIDE;
const int end_i = start_i + GCD_WORK_STRIDE;
OsdCpuComputeRestrictedVertexB1(vertex, varying, vertexDesc, varyingDesc,
V_ITa, V_IT,
vertexOffset, tableOffset, start_i, end_i);
});
const int start_e = end - workSize%GCD_WORK_STRIDE;
const int end_e = end;
if (start_e < end_e)
OsdCpuComputeRestrictedVertexB1(vertex, varying, vertexDesc, varyingDesc,
V_ITa, V_IT,
vertexOffset, tableOffset, start_e, end_e);
}
void OsdGcdComputeRestrictedVertexB2(
float * vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *V_ITa, const int *V_IT,
int vertexOffset, int tableOffset, int start, int end,
dispatch_queue_t gcdq) {
const int workSize = end-start;
dispatch_apply(workSize/GCD_WORK_STRIDE, gcdq, ^(size_t blockIdx){
const int start_i = start + blockIdx*GCD_WORK_STRIDE;
const int end_i = start_i + GCD_WORK_STRIDE;
OsdCpuComputeRestrictedVertexB2(vertex, varying, vertexDesc, varyingDesc,
V_ITa, V_IT,
vertexOffset, tableOffset, start_i, end_i);
});
const int start_e = end - workSize%GCD_WORK_STRIDE;
const int end_e = end;
if (start_e < end_e)
OsdCpuComputeRestrictedVertexB2(vertex, varying, vertexDesc, varyingDesc,
V_ITa, V_IT,
vertexOffset, tableOffset, start_e, end_e);
}
void OsdGcdComputeLoopVertexB(
float * vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,

View File

@ -90,6 +90,30 @@ void OsdGcdComputeVertexB(float *vertex, float * varying,
int start, int end,
dispatch_queue_t gcdq);
void OsdGcdComputeRestrictedVertexA(float *vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *V_ITa,
int vertexOffset, int tableOffset,
int start, int end,
dispatch_queue_t gcdq);
void OsdGcdComputeRestrictedVertexB1(float *vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *V_ITa, const int *V_IT,
int vertexOffset, int tableOffset,
int start, int end,
dispatch_queue_t gcdq);
void OsdGcdComputeRestrictedVertexB2(float *vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *V_ITa, const int *V_IT,
int vertexOffset, int tableOffset,
int start, int end,
dispatch_queue_t gcdq);
void OsdGcdComputeLoopVertexB(float *vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,

View File

@ -216,6 +216,39 @@ OsdGLSLComputeController::ApplyCatmarkVertexVerticesKernelA2(
batch.GetStart(), batch.GetEnd(), true);
}
void
OsdGLSLComputeController::ApplyCatmarkRestrictedVertexVerticesKernelB1(
FarKernelBatch const &batch, OsdGLSLComputeContext const *context) const {
assert(context);
_currentBindState.kernelBundle->ApplyCatmarkRestrictedVertexVerticesKernelB1(
batch.GetVertexOffset(), batch.GetTableOffset(),
batch.GetStart(), batch.GetEnd());
}
void
OsdGLSLComputeController::ApplyCatmarkRestrictedVertexVerticesKernelB2(
FarKernelBatch const &batch, OsdGLSLComputeContext const *context) const {
assert(context);
_currentBindState.kernelBundle->ApplyCatmarkRestrictedVertexVerticesKernelB2(
batch.GetVertexOffset(), batch.GetTableOffset(),
batch.GetStart(), batch.GetEnd());
}
void
OsdGLSLComputeController::ApplyCatmarkRestrictedVertexVerticesKernelA(
FarKernelBatch const &batch, OsdGLSLComputeContext const *context) const {
assert(context);
_currentBindState.kernelBundle->ApplyCatmarkRestrictedVertexVerticesKernelA(
batch.GetVertexOffset(), batch.GetTableOffset(),
batch.GetStart(), batch.GetEnd());
}
void
OsdGLSLComputeController::ApplyLoopEdgeVerticesKernel(
FarKernelBatch const &batch, OsdGLSLComputeContext const *context) const {

View File

@ -143,6 +143,12 @@ protected:
void ApplyCatmarkVertexVerticesKernelA2(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkRestrictedVertexVerticesKernelB1(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkRestrictedVertexVerticesKernelB2(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkRestrictedVertexVerticesKernelA(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyLoopEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;

View File

@ -416,6 +416,82 @@ void catmarkComputeVertexB()
writeVertex(vid, dst);
}
// Restricted vertex-vertices compute Kernels 'A' / k_Crease and k_Corner rules
subroutine(computeKernelType)
void catmarkComputeRestrictedVertexA()
{
int i = int(gl_GlobalInvocationID.x) + indexStart;
if (i >= indexEnd) return;
int vid = i + vertexOffset;
i += tableOffset;
int p = _V_ITa[5*i+2];
int eidx0 = _V_ITa[5*i+3];
int eidx1 = _V_ITa[5*i+4];
Vertex dst;
clear(dst);
addWithWeight(dst, readVertex(p), 0.75f);
addWithWeight(dst, readVertex(eidx0), 0.125f);
addWithWeight(dst, readVertex(eidx1), 0.125f);
addVaryingWithWeight(dst, readVertex(p), 1);
writeVertex(vid, dst);
}
// Vertex-vertices compute Kernels 'B' / regular k_Dart and k_Smooth rules
subroutine(computeKernelType)
void catmarkComputeRestrictedVertexB1()
{
int i = int(gl_GlobalInvocationID.x) + indexStart;
if (i >= indexEnd) return;
int vid = i + vertexOffset;
i += tableOffset;
int h = _V_ITa[5*i];
int p = _V_ITa[5*i+2];
Vertex dst;
clear(dst);
addWithWeight(dst, readVertex(p), 0.5f);
for(int j = 0; j < 8; ++j)
addWithWeight(dst, readVertex(_V_IT[h+j]), 0.0625f);
addVaryingWithWeight(dst, readVertex(p), 1);
writeVertex(vid, dst);
}
// Vertex-vertices compute Kernels 'B' / irregular k_Dart and k_Smooth rules
subroutine(computeKernelType)
void catmarkComputeRestrictedVertexB2()
{
int i = int(gl_GlobalInvocationID.x) + indexStart;
if (i >= indexEnd) return;
int vid = i + vertexOffset;
i += tableOffset;
int h = _V_ITa[5*i];
int n = _V_ITa[5*i+1];
int p = _V_ITa[5*i+2];
float wp = 1.0/float(n*n);
float wv = (n-2.0) * n * wp;
Vertex dst;
clear(dst);
addWithWeight(dst, readVertex(p), wv);
for(int j = 0; j < n; ++j){
addWithWeight(dst, readVertex(_V_IT[h+j*2]), wp);
addWithWeight(dst, readVertex(_V_IT[h+j*2+1]), wp);
}
addVaryingWithWeight(dst, readVertex(p), 1);
writeVertex(vid, dst);
}
// Vertex-vertices compute Kernels 'B' / k_Dart and k_Smooth rules
subroutine(computeKernelType)
void loopComputeVertexB()

View File

@ -114,36 +114,45 @@ OsdGLSLComputeKernelBundle::Compile(
glDeleteShader(shader);
_subComputeFace = glGetSubroutineIndex(_program,
GL_COMPUTE_SHADER,
"catmarkComputeFace");
_subComputeQuadFace = glGetSubroutineIndex(_program,
GL_COMPUTE_SHADER,
"catmarkComputeQuadFace");
_subComputeTriQuadFace = glGetSubroutineIndex(_program,
GL_COMPUTE_SHADER,
"catmarkComputeTriQuadFace");
_subComputeEdge = glGetSubroutineIndex(_program,
GL_COMPUTE_SHADER,
"catmarkComputeEdge");
_subComputeRestrictedEdge = glGetSubroutineIndex(_program,
GL_COMPUTE_SHADER,
"catmarkComputeRestrictedEdge");
_subComputeBilinearEdge = glGetSubroutineIndex(_program,
GL_COMPUTE_SHADER,
"bilinearComputeEdge");
_subComputeVertex = glGetSubroutineIndex(_program,
GL_COMPUTE_SHADER,
"bilinearComputeVertex");
_subComputeVertexA = glGetSubroutineIndex(_program,
GL_COMPUTE_SHADER,
"catmarkComputeVertexA");
_subComputeCatmarkVertexB = glGetSubroutineIndex(_program,
GL_COMPUTE_SHADER,
"catmarkComputeVertexB");
_subComputeLoopVertexB = glGetSubroutineIndex(_program,
GL_COMPUTE_SHADER,
"loopComputeVertexB");
_subComputeFace = glGetSubroutineIndex(_program,
GL_COMPUTE_SHADER,
"catmarkComputeFace");
_subComputeQuadFace = glGetSubroutineIndex(_program,
GL_COMPUTE_SHADER,
"catmarkComputeQuadFace");
_subComputeTriQuadFace = glGetSubroutineIndex(_program,
GL_COMPUTE_SHADER,
"catmarkComputeTriQuadFace");
_subComputeEdge = glGetSubroutineIndex(_program,
GL_COMPUTE_SHADER,
"catmarkComputeEdge");
_subComputeRestrictedEdge = glGetSubroutineIndex(_program,
GL_COMPUTE_SHADER,
"catmarkComputeRestrictedEdge");
_subComputeRestrictedVertexA = glGetSubroutineIndex(_program,
GL_COMPUTE_SHADER,
"catmarkComputeRestrictedVertexA");
_subComputeRestrictedVertexB1 = glGetSubroutineIndex(_program,
GL_COMPUTE_SHADER,
"catmarkComputeRestrictedVertexB1");
_subComputeRestrictedVertexB2 = glGetSubroutineIndex(_program,
GL_COMPUTE_SHADER,
"catmarkComputeRestrictedVertexB2");
_subComputeBilinearEdge = glGetSubroutineIndex(_program,
GL_COMPUTE_SHADER,
"bilinearComputeEdge");
_subComputeVertex = glGetSubroutineIndex(_program,
GL_COMPUTE_SHADER,
"bilinearComputeVertex");
_subComputeVertexA = glGetSubroutineIndex(_program,
GL_COMPUTE_SHADER,
"catmarkComputeVertexA");
_subComputeCatmarkVertexB = glGetSubroutineIndex(_program,
GL_COMPUTE_SHADER,
"catmarkComputeVertexB");
_subComputeLoopVertexB = glGetSubroutineIndex(_program,
GL_COMPUTE_SHADER,
"loopComputeVertexB");
// set uniform locations for compute
_uniformVertexPass = glGetUniformLocation(_program, "vertexPass");
@ -287,6 +296,30 @@ OsdGLSLComputeKernelBundle::ApplyCatmarkVertexVerticesKernelA(
dispatchCompute(vertexOffset, tableOffset, start, end);
}
void
OsdGLSLComputeKernelBundle::ApplyCatmarkRestrictedVertexVerticesKernelB1(
int vertexOffset, int tableOffset, int start, int end) {
glUniformSubroutinesuiv(GL_COMPUTE_SHADER, 1, &_subComputeRestrictedVertexB1);
dispatchCompute(vertexOffset, tableOffset, start, end);
}
void
OsdGLSLComputeKernelBundle::ApplyCatmarkRestrictedVertexVerticesKernelB2(
int vertexOffset, int tableOffset, int start, int end) {
glUniformSubroutinesuiv(GL_COMPUTE_SHADER, 1, &_subComputeRestrictedVertexB2);
dispatchCompute(vertexOffset, tableOffset, start, end);
}
void
OsdGLSLComputeKernelBundle::ApplyCatmarkRestrictedVertexVerticesKernelA(
int vertexOffset, int tableOffset, int start, int end) {
glUniformSubroutinesuiv(GL_COMPUTE_SHADER, 1, &_subComputeRestrictedVertexA);
dispatchCompute(vertexOffset, tableOffset, start, end);
}
void
OsdGLSLComputeKernelBundle::ApplyLoopEdgeVerticesKernel(
int vertexOffset, int tableOffset, int start, int end) {

View File

@ -78,6 +78,15 @@ public:
void ApplyLoopEdgeVerticesKernel(
int vertexOffset, int tableOffset, int start, int end);
void ApplyCatmarkRestrictedVertexVerticesKernelB1(
int vertexOffset, int tableOffset, int start, int end);
void ApplyCatmarkRestrictedVertexVerticesKernelB2(
int vertexOffset, int tableOffset, int start, int end);
void ApplyCatmarkRestrictedVertexVerticesKernelA(
int vertexOffset, int tableOffset, int start, int end);
void ApplyLoopVertexVerticesKernelB(
int vertexOffset, int tableOffset, int start, int end);
@ -147,6 +156,12 @@ protected:
GLuint _subComputeRestrictedEdge; // restricted edge-vertex kernel (catmark scheme)
GLuint _subComputeRestrictedVertexA; // restricted vertex-vertex kernel A (catmark scheme)
GLuint _subComputeRestrictedVertexB1; // restricted vertex-vertex kernel B1 (catmark scheme)
GLuint _subComputeRestrictedVertexB2; // restricted vertex-vertex kernel B2 (catmark scheme)
GLuint _subComputeBilinearEdge; // edge-vertex kernel (bilinear scheme)
GLuint _subComputeVertex; // vertex-vertex kernel (bilinear scheme)

View File

@ -291,6 +291,42 @@ OsdGLSLTransformFeedbackComputeController::ApplyCatmarkVertexVerticesKernelA2(
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(), true);
}
void
OsdGLSLTransformFeedbackComputeController::ApplyCatmarkRestrictedVertexVerticesKernelB1(
FarKernelBatch const &batch, OsdGLSLTransformFeedbackComputeContext const *context) const {
assert(context);
_currentBindState.kernelBundle->ApplyCatmarkRestrictedVertexVerticesKernelB1(
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset,
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
}
void
OsdGLSLTransformFeedbackComputeController::ApplyCatmarkRestrictedVertexVerticesKernelB2(
FarKernelBatch const &batch, OsdGLSLTransformFeedbackComputeContext const *context) const {
assert(context);
_currentBindState.kernelBundle->ApplyCatmarkRestrictedVertexVerticesKernelB2(
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset,
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
}
void
OsdGLSLTransformFeedbackComputeController::ApplyCatmarkRestrictedVertexVerticesKernelA(
FarKernelBatch const &batch, OsdGLSLTransformFeedbackComputeContext const *context) const {
assert(context);
_currentBindState.kernelBundle->ApplyCatmarkRestrictedVertexVerticesKernelA(
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset,
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
}
void
OsdGLSLTransformFeedbackComputeController::ApplyLoopEdgeVerticesKernel(
FarKernelBatch const &batch, OsdGLSLTransformFeedbackComputeContext const *context) const {

View File

@ -141,6 +141,12 @@ protected:
void ApplyCatmarkVertexVerticesKernelA2(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkRestrictedVertexVerticesKernelB1(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkRestrictedVertexVerticesKernelB2(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkRestrictedVertexVerticesKernelA(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyLoopEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;

View File

@ -433,6 +433,94 @@ void catmarkComputeVertexB()
writeVertex(dst);
}
// Restricted vertex-vertices compute Kernels 'A' / k_Crease and k_Corner rules
subroutine(computeKernelType)
void catmarkComputeRestrictedVertexA()
{
int i = gl_VertexID + indexStart + tableOffset;
int vid = gl_VertexID + indexStart + vertexOffset;
int p = texelFetch(_V0_ITa, 5*i+2).x;
int eidx0 = texelFetch(_V0_ITa, 5*i+3).x;
int eidx1 = texelFetch(_V0_ITa, 5*i+4).x;
Vertex dst;
clear(dst);
addWithWeight(dst, readVertex(p), 0.75f);
addWithWeight(dst, readVertex(eidx0), 0.125f);
addWithWeight(dst, readVertex(eidx1), 0.125f);
addVaryingWithWeight(dst, readVertex(p), 1.0f);
writeVertex(dst);
}
// Restricted vertex-vertices compute Kernels 'B' / regular k_Dart and k_Smooth rules
subroutine(computeKernelType)
void catmarkComputeRestrictedVertexB1()
{
int i = gl_VertexID + indexStart + tableOffset;
int h = texelFetch(_V0_ITa, 5*i).x;
#ifdef OPT_CATMARK_V_IT_VEC2
int h2 = h/2;
#endif
int p = texelFetch(_V0_ITa, 5*i+2).x;
Vertex dst;
clear(dst);
addWithWeight(dst, readVertex(p), 0.5f);
for(int j = 0; j < 4; ++j){
#ifdef OPT_CATMARK_V_IT_VEC2
ivec2 v0it = texelFetch(_V0_IT, h2+j).xy;
addWithWeight(dst, readVertex(v0it.x), 0.0625f);
addWithWeight(dst, readVertex(v0it.y), 0.0625f);
#else
addWithWeight(dst, readVertex(texelFetch(_V0_IT, h+j*2).x), 0.0625f);
addWithWeight(dst, readVertex(texelFetch(_V0_IT, h+j*2+1).x), 0.0625f);
#endif
}
addVaryingWithWeight(dst, readVertex(p), 1.0f);
writeVertex(dst);
}
// Restricted vertex-vertices compute Kernels 'B' / irregular k_Dart and k_Smooth rules
subroutine(computeKernelType)
void catmarkComputeRestrictedVertexB2()
{
int i = gl_VertexID + indexStart + tableOffset;
int h = texelFetch(_V0_ITa, 5*i).x;
#ifdef OPT_CATMARK_V_IT_VEC2
int h2 = h/2;
#endif
int n = texelFetch(_V0_ITa, 5*i+1).x;
int p = texelFetch(_V0_ITa, 5*i+2).x;
float wp = 1.0/float(n*n);
float wv = (n-2.0) * n * wp;
Vertex dst;
clear(dst);
addWithWeight(dst, readVertex(p), wv);
for(int j = 0; j < n; ++j){
#ifdef OPT_CATMARK_V_IT_VEC2
ivec2 v0it = texelFetch(_V0_IT, h2+j).xy;
addWithWeight(dst, readVertex(v0it.x), wp);
addWithWeight(dst, readVertex(v0it.y), wp);
#else
addWithWeight(dst, readVertex(texelFetch(_V0_IT, h+j*2).x), wp);
addWithWeight(dst, readVertex(texelFetch(_V0_IT, h+j*2+1).x), wp);
#endif
}
addVaryingWithWeight(dst, readVertex(p), 1.0f);
writeVertex(dst);
}
// Vertex-vertices compute Kernels 'B' / k_Dart and k_Smooth rules
subroutine(computeKernelType)
void loopComputeVertexB()

View File

@ -233,6 +233,9 @@ OsdGLSLTransformFeedbackKernelBundle::Compile(
_subComputeVertex = glGetSubroutineIndex(_program, GL_VERTEX_SHADER, "bilinearComputeVertex");
_subComputeVertexA = glGetSubroutineIndex(_program, GL_VERTEX_SHADER, "catmarkComputeVertexA");
_subComputeCatmarkVertexB = glGetSubroutineIndex(_program, GL_VERTEX_SHADER, "catmarkComputeVertexB");
_subComputeRestrictedVertexA = glGetSubroutineIndex(_program, GL_VERTEX_SHADER, "catmarkComputeRestrictedVertexA");
_subComputeRestrictedVertexB1 = glGetSubroutineIndex(_program, GL_VERTEX_SHADER, "catmarkComputeRestrictedVertexB1");
_subComputeRestrictedVertexB2 = glGetSubroutineIndex(_program, GL_VERTEX_SHADER, "catmarkComputeRestrictedVertexB2");
_subComputeLoopVertexB = glGetSubroutineIndex(_program, GL_VERTEX_SHADER, "loopComputeVertexB");
_uniformVertexPass = glGetUniformLocation(_program, "vertexPass");
@ -444,6 +447,42 @@ OsdGLSLTransformFeedbackKernelBundle::ApplyCatmarkVertexVerticesKernelA(
offset, tableOffset, start, end);
}
void
OsdGLSLTransformFeedbackKernelBundle::ApplyCatmarkRestrictedVertexVerticesKernelB1(
GLuint vertexBuffer, GLuint varyingBuffer,
int vertexOffset, int varyingOffset,
int offset, int tableOffset, int start, int end) {
glUniformSubroutinesuiv(GL_VERTEX_SHADER, 1, &_subComputeRestrictedVertexB1);
transformGpuBufferData(vertexBuffer, varyingBuffer,
vertexOffset, varyingOffset,
offset, tableOffset, start, end);
}
void
OsdGLSLTransformFeedbackKernelBundle::ApplyCatmarkRestrictedVertexVerticesKernelB2(
GLuint vertexBuffer, GLuint varyingBuffer,
int vertexOffset, int varyingOffset,
int offset, int tableOffset, int start, int end) {
glUniformSubroutinesuiv(GL_VERTEX_SHADER, 1, &_subComputeRestrictedVertexB2);
transformGpuBufferData(vertexBuffer, varyingBuffer,
vertexOffset, varyingOffset,
offset, tableOffset, start, end);
}
void
OsdGLSLTransformFeedbackKernelBundle::ApplyCatmarkRestrictedVertexVerticesKernelA(
GLuint vertexBuffer, GLuint varyingBuffer,
int vertexOffset, int varyingOffset,
int offset, int tableOffset, int start, int end) {
glUniformSubroutinesuiv(GL_VERTEX_SHADER, 1, &_subComputeRestrictedVertexA);
transformGpuBufferData(vertexBuffer, varyingBuffer,
vertexOffset, varyingOffset,
offset, tableOffset, start, end);
}
void
OsdGLSLTransformFeedbackKernelBundle::ApplyLoopEdgeVerticesKernel(
GLuint vertexBuffer, GLuint varyingBuffer,

View File

@ -98,6 +98,21 @@ public:
int vertexOffset, int varyingOffset,
int offset, int tableOffset, int start, int end, bool pass);
void ApplyCatmarkRestrictedVertexVerticesKernelB1(
GLuint vertexBuffer, GLuint varyingBuffer,
int vertexOffset, int varyingOffset,
int offset, int tableOffset, int start, int end);
void ApplyCatmarkRestrictedVertexVerticesKernelB2(
GLuint vertexBuffer, GLuint varyingBuffer,
int vertexOffset, int varyingOffset,
int offset, int tableOffset, int start, int end);
void ApplyCatmarkRestrictedVertexVerticesKernelA(
GLuint vertexBuffer, GLuint varyingBuffer,
int vertexOffset, int varyingOffset,
int offset, int tableOffset, int start, int end);
void ApplyLoopEdgeVerticesKernel(
GLuint vertexBuffer, GLuint varyingBuffer,
int vertexOffset, int varyingOffset,
@ -212,6 +227,12 @@ protected:
GLuint _subComputeCatmarkVertexB;// vertex-vertex kernel B (catmark scheme)
GLuint _subComputeRestrictedVertexA; // restricted vertex-vertex kernel A (catmark scheme)
GLuint _subComputeRestrictedVertexB1; // restricted vertex-vertex kernel B1 (catmark scheme)
GLuint _subComputeRestrictedVertexB2; // restricted vertex-vertex kernel B2 (catmark scheme)
GLuint _subComputeLoopVertexB; // vertex-vertex kernel B (loop scheme)
GLuint _subEditAdd; // hedit kernel (add)

View File

@ -427,6 +427,86 @@ void runKernel( uint3 ID )
}
};
// Restricted vertex-vertices compute Kernels 'A' / k_Crease and k_Corner rules
class CatmarkComputeRestrictedVertexA : IComputeKernel {
int placeholder;
void runKernel( uint3 ID )
{
int i = int(ID.x) + indexStart;
if (i >= indexEnd) return;
int vid = i + vertexOffset;
i += tableOffset;
int p = _V_ITa[5*i+2];
int eidx0 = _V_ITa[5*i+3];
int eidx1 = _V_ITa[5*i+4];
Vertex dst;
clear(dst);
addWithWeight(dst, readVertex(p), 0.75f);
addWithWeight(dst, readVertex(eidx0), 0.125f);
addWithWeight(dst, readVertex(eidx1), 0.125f);
addVaryingWithWeight(dst, readVertex(p), 1);
writeVertex(vid, dst);
}
};
// Restricted vertex-vertices compute Kernels 'B' / regular k_Dart and k_Smooth rules
class CatmarkComputeRestrictedVertexB1 : IComputeKernel {
int placeholder;
void runKernel( uint3 ID )
{
int i = int(ID.x) + indexStart;
if (i >= indexEnd) return;
int vid = i + vertexOffset;
i += tableOffset;
int h = _V_ITa[5*i];
int p = _V_ITa[5*i+2];
Vertex dst;
clear(dst);
addWithWeight(dst, readVertex(p), 0.5f);
for(int j = 0; j < 8; ++j)
addWithWeight(dst, readVertex(_V_IT[h+j]), 0.0625f);
addVaryingWithWeight(dst, readVertex(p), 1);
writeVertex(vid, dst);
}
};
// Restricted vertex-vertices compute Kernels 'B' / irregular k_Dart and k_Smooth rules
class CatmarkComputeRestrictedVertexB2 : IComputeKernel {
int placeholder;
void runKernel( uint3 ID )
{
int i = int(ID.x) + indexStart;
if (i >= indexEnd) return;
int vid = i + vertexOffset;
i += tableOffset;
int h = _V_ITa[5*i];
int n = _V_ITa[5*i+1];
int p = _V_ITa[5*i+2];
float wp = 1.0/float(n*n);
float wv = (n-2.0) * n * wp;
Vertex dst;
clear(dst);
addWithWeight(dst, readVertex(p), wv);
for(int j = 0; j < n; ++j){
addWithWeight(dst, readVertex(_V_IT[h+j*2]), wp);
addWithWeight(dst, readVertex(_V_IT[h+j*2+1]), wp);
}
addVaryingWithWeight(dst, readVertex(p), 1);
writeVertex(vid, dst);
}
};
// Vertex-vertices compute Kernels 'B' / k_Dart and k_Smooth rules
class LoopComputeVertexB : IComputeKernel {
int placeholder;
@ -493,6 +573,9 @@ BilinearComputeEdge bilinearComputeEdge;
BilinearComputeVertex bilinearComputeVertex;
CatmarkComputeVertexA catmarkComputeVertexA;
CatmarkComputeVertexB catmarkComputeVertexB;
CatmarkComputeRestrictedVertexA catmarkComputeRestrictedVertexA;
CatmarkComputeRestrictedVertexB1 catmarkComputeRestrictedVertexB1;
CatmarkComputeRestrictedVertexB2 catmarkComputeRestrictedVertexB2;
LoopComputeVertexB loopComputeVertexB;
EditAdd editAdd;

View File

@ -190,6 +190,47 @@ OsdOmpComputeController::ApplyCatmarkVertexVerticesKernelA2(
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(), true);
}
void
OsdOmpComputeController::ApplyCatmarkRestrictedVertexVerticesKernelB1(
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {
assert(context);
OsdOmpComputeRestrictedVertexB1(
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
(const int*)context->GetTable(FarSubdivisionTables::V_IT)->GetBuffer(),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
}
void
OsdOmpComputeController::ApplyCatmarkRestrictedVertexVerticesKernelB2(
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {
assert(context);
OsdOmpComputeRestrictedVertexB2(
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
(const int*)context->GetTable(FarSubdivisionTables::V_IT)->GetBuffer(),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
}
void
OsdOmpComputeController::ApplyCatmarkRestrictedVertexVerticesKernelA(
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {
assert(context);
OsdOmpComputeRestrictedVertexA(
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
}
void
OsdOmpComputeController::ApplyLoopEdgeVerticesKernel(
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {

View File

@ -142,6 +142,12 @@ protected:
void ApplyCatmarkVertexVerticesKernelA2(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkRestrictedVertexVerticesKernelB1(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkRestrictedVertexVerticesKernelB2(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkRestrictedVertexVerticesKernelA(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyLoopEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;

View File

@ -386,6 +386,126 @@ void OsdOmpComputeVertexB(
}
}
void OsdOmpComputeRestrictedVertexA(
float * vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *V_ITa,
int offset, int tableOffset, int start, int end) {
int numThreads = omp_get_max_threads();
float *vertexResultsArray = (float*)alloca(vertexDesc.length * sizeof(float) * numThreads);
float *varyingResultsArray = (float*)alloca(varyingDesc.length * sizeof(float) * numThreads);
#pragma omp parallel for
for (int i = start + tableOffset; i < end + tableOffset; i++) {
int p = V_ITa[5*i+2];
int eidx0 = V_ITa[5*i+3];
int eidx1 = V_ITa[5*i+4];
int dstIndex = offset + i - tableOffset;
int threadId = omp_get_thread_num();
float *vertexResults = vertexResultsArray +
vertexDesc.length * threadId;
float *varyingResults = varyingResultsArray +
varyingDesc.length * threadId;
clear(vertexResults, vertexDesc);
clear(varyingResults, varyingDesc);
addWithWeight(vertexResults, vertex, p, 0.75f, vertexDesc);
addWithWeight(vertexResults, vertex, eidx0, 0.125f, vertexDesc);
addWithWeight(vertexResults, vertex, eidx1, 0.125f, vertexDesc);
copy(vertex, vertexResults, dstIndex, vertexDesc);
addWithWeight(varyingResults, varying, p, 1.0f, varyingDesc);
copy(varying, varyingResults, dstIndex, varyingDesc);
}
}
void OsdOmpComputeRestrictedVertexB1(
float * vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *V_ITa, const int *V_IT,
int offset, int tableOffset, int start, int end) {
int numThreads = omp_get_max_threads();
float *vertexResultsArray = (float*)alloca(vertexDesc.length * sizeof(float) * numThreads);
float *varyingResultsArray = (float*)alloca(varyingDesc.length * sizeof(float) * numThreads);
#pragma omp parallel for
for (int i = start + tableOffset; i < end + tableOffset; i++) {
int h = V_ITa[5*i];
int p = V_ITa[5*i+2];
int dstIndex = offset + i - tableOffset;
int threadId = omp_get_thread_num();
float *vertexResults = vertexResultsArray +
vertexDesc.length * threadId;
float *varyingResults = varyingResultsArray +
varyingDesc.length * threadId;
clear(vertexResults, vertexDesc);
clear(varyingResults, varyingDesc);
addWithWeight(vertexResults, vertex, p, 0.5f, vertexDesc);
for (int j = 0; j < 8; ++j)
addWithWeight(vertexResults, vertex, V_IT[h+j], 0.0625f, vertexDesc);
addWithWeight(varyingResults, varying, p, 1.0f, varyingDesc);
copy(vertex, vertexResults, dstIndex, vertexDesc);
copy(varying, varyingResults, dstIndex, varyingDesc);
}
}
void OsdOmpComputeRestrictedVertexB2(
float * vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *V_ITa, const int *V_IT,
int offset, int tableOffset, int start, int end) {
int numThreads = omp_get_max_threads();
float *vertexResultsArray = (float*)alloca(vertexDesc.length * sizeof(float) * numThreads);
float *varyingResultsArray = (float*)alloca(varyingDesc.length * sizeof(float) * numThreads);
#pragma omp parallel for
for (int i = start + tableOffset; i < end + tableOffset; i++) {
int h = V_ITa[5*i];
int n = V_ITa[5*i+1];
int p = V_ITa[5*i+2];
float wp = 1.0f/static_cast<float>(n*n);
float wv = (n-2.0f) * n * wp;
int dstIndex = offset + i - tableOffset;
int threadId = omp_get_thread_num();
float *vertexResults = vertexResultsArray +
vertexDesc.length * threadId;
float *varyingResults = varyingResultsArray +
varyingDesc.length * threadId;
clear(vertexResults, vertexDesc);
clear(varyingResults, varyingDesc);
addWithWeight(vertexResults, vertex, p, wv, vertexDesc);
for (int j = 0; j < n; ++j) {
addWithWeight(vertexResults, vertex, V_IT[h+j*2], wp, vertexDesc);
addWithWeight(vertexResults, vertex, V_IT[h+j*2+1], wp, vertexDesc);
}
addWithWeight(varyingResults, varying, p, 1.0f, varyingDesc);
copy(vertex, vertexResults, dstIndex, vertexDesc);
copy(varying, varyingResults, dstIndex, varyingDesc);
}
}
void OsdOmpComputeLoopVertexB(
float * vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,

View File

@ -57,7 +57,7 @@ void OsdOmpComputeTriQuadFace(float * vertex, float * varying,
void OsdOmpComputeEdge(float *vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *E_IT, const float *E_ITa,
const int *E_IT, const float *E_W,
int vertexOffset, int tableOffset,
int start, int end);
@ -71,7 +71,7 @@ void OsdOmpComputeRestrictedEdge(float *vertex, float * varying,
void OsdOmpComputeVertexA(float *vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *V_ITa, const float *V_IT,
const int *V_ITa, const float *V_W,
int vertexOffset, int tableOffset,
int start, int end, int pass);
@ -82,6 +82,27 @@ void OsdOmpComputeVertexB(float *vertex, float * varying,
int vertexOffset, int tableOffset,
int start, int end);
void OsdOmpComputeRestrictedVertexA(float *vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *V_ITa,
int vertexOffset, int tableOffset,
int start, int end);
void OsdOmpComputeRestrictedVertexB1(float *vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *V_ITa, const int *V_IT,
int vertexOffset, int tableOffset,
int start, int end);
void OsdOmpComputeRestrictedVertexB2(float *vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *V_ITa, const int *V_IT,
int vertexOffset, int tableOffset,
int start, int end);
void OsdOmpComputeLoopVertexB(float *vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,

View File

@ -196,6 +196,47 @@ OsdTbbComputeController::ApplyCatmarkVertexVerticesKernelA2(
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(), true);
}
void
OsdTbbComputeController::ApplyCatmarkRestrictedVertexVerticesKernelB1(
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {
assert(context);
OsdTbbComputeRestrictedVertexB1(
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
(const int*)context->GetTable(FarSubdivisionTables::V_IT)->GetBuffer(),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
}
void
OsdTbbComputeController::ApplyCatmarkRestrictedVertexVerticesKernelB2(
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {
assert(context);
OsdTbbComputeRestrictedVertexB2(
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
(const int*)context->GetTable(FarSubdivisionTables::V_IT)->GetBuffer(),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
}
void
OsdTbbComputeController::ApplyCatmarkRestrictedVertexVerticesKernelA(
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {
assert(context);
OsdTbbComputeRestrictedVertexA(
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
}
void
OsdTbbComputeController::ApplyLoopEdgeVerticesKernel(
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {

View File

@ -134,6 +134,12 @@ protected:
void ApplyCatmarkVertexVerticesKernelA2(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkRestrictedVertexVerticesKernelB1(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkRestrictedVertexVerticesKernelB2(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkRestrictedVertexVerticesKernelA(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyLoopEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;

View File

@ -658,6 +658,226 @@ void OsdTbbComputeVertexB(
tbb::parallel_for(range, kernel);
}
class TBBRestrictedVertexKernelA {
float *vertex;
float *varying;
OsdVertexBufferDescriptor vertexDesc;
OsdVertexBufferDescriptor varyingDesc;
int const *V_ITa;
int vertexOffset;
int tableOffset;
public:
void operator() (tbb::blocked_range<int> const &r) const {
for (int i = r.begin() + tableOffset; i < r.end() + tableOffset; i++) {
int p = V_ITa[5*i+2];
int eidx0 = V_ITa[5*i+3];
int eidx1 = V_ITa[5*i+4];
int dstIndex = i + vertexOffset - tableOffset;
clear(vertex, dstIndex, vertexDesc);
clear(varying, dstIndex, varyingDesc);
addWithWeight(vertex, dstIndex, p, 0.75f, vertexDesc);
addWithWeight(vertex, dstIndex, eidx0, 0.125f, vertexDesc);
addWithWeight(vertex, dstIndex, eidx1, 0.125f, vertexDesc);
addWithWeight(varying, dstIndex, p, 1.0f, varyingDesc);
}
}
TBBRestrictedVertexKernelA(TBBRestrictedVertexKernelA const &other)
{
this->vertex = other.vertex;
this->varying= other.varying;
this->vertexDesc = other.vertexDesc;
this->varyingDesc = other.varyingDesc;
this->V_ITa = other.V_ITa;
this->vertexOffset = other.vertexOffset;
this->tableOffset = other.tableOffset;
}
TBBRestrictedVertexKernelA(float *vertex_in,
float *varying_in,
OsdVertexBufferDescriptor const &vertexDesc_in,
OsdVertexBufferDescriptor const &varyingDesc_in,
int const *V_ITa_in,
int vertexOffset_in,
int tableOffset_in) :
vertex (vertex_in),
varying(varying_in),
vertexDesc(vertexDesc_in),
varyingDesc(varyingDesc_in),
V_ITa (V_ITa_in),
vertexOffset(vertexOffset_in),
tableOffset(tableOffset_in)
{};
};
void OsdTbbComputeRestrictedVertexA(
float *vertex, float *varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
int const *V_ITa, int vertexOffset, int tableOffset,
int start, int end) {
tbb::blocked_range<int> range(start, end, grain_size);
TBBRestrictedVertexKernelA kernel(vertex, varying, vertexDesc, varyingDesc,
V_ITa,
vertexOffset, tableOffset);
tbb::parallel_for(range, kernel);
}
class TBBRestrictedVertexKernelB1 {
float *vertex;
float *varying;
OsdVertexBufferDescriptor vertexDesc;
OsdVertexBufferDescriptor varyingDesc;
int const *V_ITa;
int const *V_IT;
int vertexOffset;
int tableOffset;
public:
void operator() (tbb::blocked_range<int> const &r) const {
for (int i = r.begin() + tableOffset; i < r.end() + tableOffset; i++) {
int h = V_ITa[5*i];
int p = V_ITa[5*i+2];
int dstIndex = i + vertexOffset - tableOffset;
clear(vertex, dstIndex, vertexDesc);
clear(varying, dstIndex, varyingDesc);
addWithWeight(vertex, dstIndex, p, 0.5f, vertexDesc);
for (int j = 0; j < 8; ++j)
addWithWeight(vertex, dstIndex, V_IT[h+j], 0.0625f, vertexDesc);
addWithWeight(varying, dstIndex, p, 1.0f, varyingDesc);
}
}
TBBRestrictedVertexKernelB1(TBBRestrictedVertexKernelB1 const &other)
{
this->vertex = other.vertex;
this->varying= other.varying;
this->vertexDesc = other.vertexDesc;
this->varyingDesc = other.varyingDesc;
this->V_ITa = other.V_ITa;
this->V_IT = other.V_IT;
this->vertexOffset = other.vertexOffset;
this->tableOffset = other.tableOffset;
}
TBBRestrictedVertexKernelB1(float *vertex_in,
float *varying_in,
OsdVertexBufferDescriptor const &vertexDesc_in,
OsdVertexBufferDescriptor const &varyingDesc_in,
int const *V_ITa_in,
int const *V_IT_in,
int vertexOffset_in,
int tableOffset_in) :
vertex (vertex_in),
varying(varying_in),
vertexDesc(vertexDesc_in),
varyingDesc(varyingDesc_in),
V_ITa (V_ITa_in),
V_IT (V_IT_in),
vertexOffset(vertexOffset_in),
tableOffset(tableOffset_in)
{};
};
void OsdTbbComputeRestrictedVertexB1(
float *vertex, float *varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
int const *V_ITa, int const *V_IT,
int vertexOffset, int tableOffset, int start, int end) {
tbb::blocked_range<int> range(start, end, grain_size);
TBBRestrictedVertexKernelB1 kernel(vertex, varying, vertexDesc, varyingDesc,
V_ITa, V_IT,
vertexOffset, tableOffset);
tbb::parallel_for(range, kernel);
}
class TBBRestrictedVertexKernelB2 {
float *vertex;
float *varying;
OsdVertexBufferDescriptor vertexDesc;
OsdVertexBufferDescriptor varyingDesc;
int const *V_ITa;
int const *V_IT;
int vertexOffset;
int tableOffset;
public:
void operator() (tbb::blocked_range<int> const &r) const {
for (int i = r.begin() + tableOffset; i < r.end() + tableOffset; i++) {
int h = V_ITa[5*i];
int n = V_ITa[5*i+1];
int p = V_ITa[5*i+2];
float wp = 1.0f/static_cast<float>(n*n);
float wv = (n-2.0f) * n * wp;
int dstIndex = i + vertexOffset - tableOffset;
clear(vertex, dstIndex, vertexDesc);
clear(varying, dstIndex, varyingDesc);
addWithWeight(vertex, dstIndex, p, wv, vertexDesc);
for (int j = 0; j < n; ++j) {
addWithWeight(vertex, dstIndex, V_IT[h+j*2], wp, vertexDesc);
addWithWeight(vertex, dstIndex, V_IT[h+j*2+1], wp, vertexDesc);
}
addWithWeight(varying, dstIndex, p, 1.0f, varyingDesc);
}
}
TBBRestrictedVertexKernelB2(TBBRestrictedVertexKernelB2 const &other)
{
this->vertex = other.vertex;
this->varying= other.varying;
this->vertexDesc = other.vertexDesc;
this->varyingDesc = other.varyingDesc;
this->V_ITa = other.V_ITa;
this->V_IT = other.V_IT;
this->vertexOffset = other.vertexOffset;
this->tableOffset = other.tableOffset;
}
TBBRestrictedVertexKernelB2(float *vertex_in,
float *varying_in,
OsdVertexBufferDescriptor const &vertexDesc_in,
OsdVertexBufferDescriptor const &varyingDesc_in,
int const *V_ITa_in,
int const *V_IT_in,
int vertexOffset_in,
int tableOffset_in) :
vertex (vertex_in),
varying(varying_in),
vertexDesc(vertexDesc_in),
varyingDesc(varyingDesc_in),
V_ITa (V_ITa_in),
V_IT (V_IT_in),
vertexOffset(vertexOffset_in),
tableOffset(tableOffset_in)
{};
};
void OsdTbbComputeRestrictedVertexB2(
float *vertex, float *varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
int const *V_ITa, int const *V_IT,
int vertexOffset, int tableOffset, int start, int end) {
tbb::blocked_range<int> range(start, end, grain_size);
TBBRestrictedVertexKernelB2 kernel(vertex, varying, vertexDesc, varyingDesc,
V_ITa, V_IT,
vertexOffset, tableOffset);
tbb::parallel_for(range, kernel);
}
class TBBLoopVertexKernelB {
float *vertex;
float *varying;

View File

@ -70,7 +70,7 @@ void OsdTbbComputeRestrictedEdge(float *vertex, float * varying,
void OsdTbbComputeVertexA(float *vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
int const *V_ITa, float const *V_IT,
int const *V_ITa, float const *V_W,
int vertexOffset, int tableOffset,
int start, int end, int pass);
@ -81,6 +81,27 @@ void OsdTbbComputeVertexB(float *vertex, float * varying,
int vertexOffset, int tableOffset,
int start, int end);
void OsdTbbComputeRestrictedVertexA(float *vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
int const *V_ITa,
int vertexOffset, int tableOffset,
int start, int end);
void OsdTbbComputeRestrictedVertexB1(float *vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
int const *V_ITa, int const *V_IT,
int vertexOffset, int tableOffset,
int start, int end);
void OsdTbbComputeRestrictedVertexB2(float *vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
int const *V_ITa, int const *V_IT,
int vertexOffset, int tableOffset,
int start, int end);
void OsdTbbComputeLoopVertexB(float *vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,