mirror of
https://github.com/PixarAnimationStudios/OpenSubdiv
synced 2024-12-23 16:30:07 +00:00
Added the CATMARK_RESTRICTED_VERT_VERTEX_A, CATMARK_RESTRICTED_VERT_VERTEX_B1, and CATMARK_RESTRICTED_VERT_VERTEX_B2 kernels which compute vertices resulting from the refinement of a smooth or (fully) sharp vertex.
* CATMARK_RESTRICTED_VERT_VERTEX_A handles k_Crease and k_Corner rules * CATMARK_RESTRICTED_VERT_VERTEX_B1 handles regular k_Smooth and k_Dart rules * CATMARK_RESTRICTED_VERT_VERTEX_B2 handles irregular k_Smooth and k_Dart rules
This commit is contained in:
parent
af3424e1da
commit
b7a763853c
@ -56,6 +56,10 @@ protected:
|
||||
/// will reserve and append refinement tasks
|
||||
///
|
||||
static FarSubdivisionTables * Create( FarMeshFactory<T,U> * meshFactory, FarKernelBatchVector *batches );
|
||||
|
||||
// Compares vertices based on their topological configuration
|
||||
// (see subdivisionTables::GetMaskRanking for more details)
|
||||
static bool CompareVertices( HbrVertex<T> const *x, HbrVertex<T> const *y );
|
||||
};
|
||||
|
||||
// This factory walks the Hbr vertices and accumulates the weights and adjacency
|
||||
@ -97,9 +101,18 @@ FarCatmarkSubdivisionTablesFactory<T,U>::Create( FarMeshFactory<T,U> * meshFacto
|
||||
typename HbrCatmarkSubdivision<T>::TriangleSubdivision triangleMethod =
|
||||
dynamic_cast<HbrCatmarkSubdivision<T> *>(meshFactory->GetHbrMesh()->GetSubdivision())->GetTriangleSubdivisionMethod();
|
||||
bool hasFractionalEdgeSharpness = tablesFactory.HasFractionalEdgeSharpness();
|
||||
bool useRestrictedEdgeVertexKernel = meshFactory->IsKernelTypeSupported(FarKernelBatch::CATMARK_RESTRICTED_EDGE_VERTEX);;
|
||||
bool useRestrictedEdgeVertexKernel = meshFactory->IsKernelTypeSupported(FarKernelBatch::CATMARK_RESTRICTED_EDGE_VERTEX);
|
||||
useRestrictedEdgeVertexKernel &= !hasFractionalEdgeSharpness && triangleMethod != HbrCatmarkSubdivision<T>::k_New;
|
||||
|
||||
bool hasFractionalVertexSharpness = tablesFactory.HasFractionalVertexSharpness();
|
||||
bool hasStandardVertexVertexKernels = meshFactory->IsKernelTypeSupported(FarKernelBatch::CATMARK_VERT_VERTEX_A1) &&
|
||||
meshFactory->IsKernelTypeSupported(FarKernelBatch::CATMARK_VERT_VERTEX_A2) &&
|
||||
meshFactory->IsKernelTypeSupported(FarKernelBatch::CATMARK_VERT_VERTEX_B);
|
||||
bool useRestrictedVertexVertexKernels = meshFactory->IsKernelTypeSupported(FarKernelBatch::CATMARK_RESTRICTED_VERT_VERTEX_A) &&
|
||||
meshFactory->IsKernelTypeSupported(FarKernelBatch::CATMARK_RESTRICTED_VERT_VERTEX_B1) &&
|
||||
meshFactory->IsKernelTypeSupported(FarKernelBatch::CATMARK_RESTRICTED_VERT_VERTEX_B2);
|
||||
useRestrictedVertexVertexKernels &= !hasFractionalVertexSharpness && !hasFractionalEdgeSharpness;
|
||||
|
||||
// Allocate memory for the indexing tables
|
||||
result->_F_ITa.resize(F_ITa_size);
|
||||
result->_F_IT.resize(F_IT_size);
|
||||
@ -111,8 +124,9 @@ FarCatmarkSubdivisionTablesFactory<T,U>::Create( FarMeshFactory<T,U> * meshFacto
|
||||
result->_V_ITa.resize((tablesFactory.GetNumVertexVerticesTotal(maxlevel)
|
||||
- tablesFactory.GetNumVertexVerticesTotal(0))*5); // subtract coarse cage vertices
|
||||
result->_V_IT.resize(tablesFactory.GetVertVertsValenceSum()*2);
|
||||
result->_V_W.resize(tablesFactory.GetNumVertexVerticesTotal(maxlevel)
|
||||
- tablesFactory.GetNumVertexVerticesTotal(0));
|
||||
if (!useRestrictedVertexVertexKernels)
|
||||
result->_V_W.resize(tablesFactory.GetNumVertexVerticesTotal(maxlevel)
|
||||
- tablesFactory.GetNumVertexVerticesTotal(0));
|
||||
|
||||
// Prepare batch table
|
||||
batches->reserve(maxlevel*5);
|
||||
@ -382,30 +396,49 @@ FarCatmarkSubdivisionTablesFactory<T,U>::Create( FarMeshFactory<T,U> * meshFacto
|
||||
break;
|
||||
}
|
||||
case HbrVertex<T>::k_Corner :
|
||||
// in the case of a k_Crease / k_Corner pass combination, we
|
||||
// need to set the valence to -1 to tell the "B" Kernel to
|
||||
// switch to k_Corner rule (as edge indices won't be -1)
|
||||
if (V_ITa[5*i+1]==0)
|
||||
V_ITa[5*i+1] = -1;
|
||||
if (!useRestrictedVertexVertexKernels) {
|
||||
// in the case of a k_Crease / k_Corner pass combination, we
|
||||
// need to set the valence to -1 to tell the "B" Kernel to
|
||||
// switch to k_Corner rule (as edge indices won't be -1)
|
||||
if (V_ITa[5*i+1]==0)
|
||||
V_ITa[5*i+1] = -1;
|
||||
} else {
|
||||
// in the case of a k_Corner, repeat the vertex
|
||||
V_ITa[5*i+3] = V_ITa[5*i+2];
|
||||
V_ITa[5*i+4] = V_ITa[5*i+2];
|
||||
}
|
||||
|
||||
default : break;
|
||||
}
|
||||
|
||||
if (rank>7)
|
||||
// the k_Corner and k_Crease single-pass cases apply a weight of 1.0
|
||||
// but this value is inverted in the kernel
|
||||
V_W[i] = 0.0;
|
||||
else
|
||||
V_W[i] = weights[0];
|
||||
if (!useRestrictedVertexVertexKernels) {
|
||||
if (rank>7)
|
||||
// the k_Corner and k_Crease single-pass cases apply a weight of 1.0
|
||||
// but this value is inverted in the kernel
|
||||
V_W[i] = 0.0;
|
||||
else
|
||||
V_W[i] = weights[0];
|
||||
}
|
||||
|
||||
batchFactory.AddVertex( i, rank );
|
||||
if (!useRestrictedVertexVertexKernels)
|
||||
batchFactory.AddVertex( i, rank );
|
||||
else
|
||||
batchFactory.AddCatmarkRestrictedVertex( i, rank, V_ITa[5*i+1] );
|
||||
}
|
||||
V_ITa += nVertVertices*5;
|
||||
V_W += nVertVertices;
|
||||
if (!useRestrictedVertexVertexKernels)
|
||||
V_W += nVertVertices;
|
||||
|
||||
// add batches for vert vertices
|
||||
if (nVertVertices > 0)
|
||||
batchFactory.AppendCatmarkBatches(level, vertTableOffset, vertexOffset, batches);
|
||||
if (nVertVertices > 0) {
|
||||
if (!useRestrictedVertexVertexKernels) {
|
||||
assert(hasStandardVertexVertexKernels);
|
||||
batchFactory.AppendCatmarkBatches(level, vertTableOffset, vertexOffset, batches);
|
||||
} else {
|
||||
batchFactory.AppendCatmarkRestrictedBatches(level, vertTableOffset, vertexOffset, batches);
|
||||
}
|
||||
}
|
||||
|
||||
vertexOffset += nVertVertices;
|
||||
vertTableOffset += nVertVertices;
|
||||
}
|
||||
@ -413,6 +446,25 @@ FarCatmarkSubdivisionTablesFactory<T,U>::Create( FarMeshFactory<T,U> * meshFacto
|
||||
return result;
|
||||
}
|
||||
|
||||
template <class T, class U> bool
|
||||
FarCatmarkSubdivisionTablesFactory<T,U>::CompareVertices( HbrVertex<T> const * x, HbrVertex<T> const * y ) {
|
||||
|
||||
// Masks of the parent vertex decide for the current vertex.
|
||||
HbrVertex<T> * px=x->GetParentVertex(),
|
||||
* py=y->GetParentVertex();
|
||||
|
||||
int rankx = GetMaskRanking(px->GetMask(false), px->GetMask(true) );
|
||||
int ranky = GetMaskRanking(py->GetMask(false), py->GetMask(true) );
|
||||
|
||||
assert( rankx!=0xFF and ranky!=0xFF );
|
||||
|
||||
// Arrange regular vertices before irregular vertices within the same kernel
|
||||
if ((rankx <= 2 && ranky <= 2) || (rankx >= 3 && rankx <= 7 && ranky >= 3 && ranky <= 7) || (rankx >= 8 && ranky >= 8))
|
||||
return px->GetValence() == 4 && py->GetValence() != 4;
|
||||
else
|
||||
return rankx < ranky;
|
||||
}
|
||||
|
||||
} // end namespace OPENSUBDIV_VERSION
|
||||
using namespace OPENSUBDIV_VERSION;
|
||||
|
||||
|
@ -108,6 +108,15 @@ FarDispatcher::ApplyKernel(CONTROLLER *controller, CONTEXT *context, FarKernelBa
|
||||
case FarKernelBatch::CATMARK_VERT_VERTEX_A2:
|
||||
controller->ApplyCatmarkVertexVerticesKernelA2(batch, context);
|
||||
break;
|
||||
case FarKernelBatch::CATMARK_RESTRICTED_VERT_VERTEX_B1:
|
||||
controller->ApplyCatmarkRestrictedVertexVerticesKernelB1(batch, context);
|
||||
break;
|
||||
case FarKernelBatch::CATMARK_RESTRICTED_VERT_VERTEX_B2:
|
||||
controller->ApplyCatmarkRestrictedVertexVerticesKernelB2(batch, context);
|
||||
break;
|
||||
case FarKernelBatch::CATMARK_RESTRICTED_VERT_VERTEX_A:
|
||||
controller->ApplyCatmarkRestrictedVertexVerticesKernelA(batch, context);
|
||||
break;
|
||||
|
||||
case FarKernelBatch::LOOP_EDGE_VERTEX:
|
||||
controller->ApplyLoopEdgeVerticesKernel(batch, context);
|
||||
@ -200,6 +209,15 @@ public:
|
||||
template <class CONTEXT>
|
||||
void ApplyCatmarkVertexVerticesKernelA2(FarKernelBatch const &batch, CONTEXT *context) const;
|
||||
|
||||
template <class CONTEXT>
|
||||
void ApplyCatmarkRestrictedVertexVerticesKernelB1(FarKernelBatch const &batch, CONTEXT *context) const;
|
||||
|
||||
template <class CONTEXT>
|
||||
void ApplyCatmarkRestrictedVertexVerticesKernelB2(FarKernelBatch const &batch, CONTEXT *context) const;
|
||||
|
||||
template <class CONTEXT>
|
||||
void ApplyCatmarkRestrictedVertexVerticesKernelA(FarKernelBatch const &batch, CONTEXT *context) const;
|
||||
|
||||
template <class CONTEXT>
|
||||
void ApplyLoopEdgeVerticesKernel(FarKernelBatch const &batch, CONTEXT *context) const;
|
||||
|
||||
@ -393,6 +411,51 @@ FarComputeController::ApplyCatmarkVertexVerticesKernelA2(FarKernelBatch const &b
|
||||
vsrc );
|
||||
}
|
||||
|
||||
template <class CONTEXT> void
|
||||
FarComputeController::ApplyCatmarkRestrictedVertexVerticesKernelB1(FarKernelBatch const &batch, CONTEXT *context) const {
|
||||
|
||||
typename CONTEXT::VertexType *vsrc = &context->GetVertices().at(0);
|
||||
|
||||
FarSubdivisionTables const * subdivision = context->GetSubdivisionTables();
|
||||
assert(subdivision);
|
||||
|
||||
subdivision->computeCatmarkRestrictedVertexPointsB1( batch.GetVertexOffset(),
|
||||
batch.GetTableOffset(),
|
||||
batch.GetStart(),
|
||||
batch.GetEnd(),
|
||||
vsrc );
|
||||
}
|
||||
|
||||
template <class CONTEXT> void
|
||||
FarComputeController::ApplyCatmarkRestrictedVertexVerticesKernelB2(FarKernelBatch const &batch, CONTEXT *context) const {
|
||||
|
||||
typename CONTEXT::VertexType *vsrc = &context->GetVertices().at(0);
|
||||
|
||||
FarSubdivisionTables const * subdivision = context->GetSubdivisionTables();
|
||||
assert(subdivision);
|
||||
|
||||
subdivision->computeCatmarkRestrictedVertexPointsB2( batch.GetVertexOffset(),
|
||||
batch.GetTableOffset(),
|
||||
batch.GetStart(),
|
||||
batch.GetEnd(),
|
||||
vsrc );
|
||||
}
|
||||
|
||||
template <class CONTEXT> void
|
||||
FarComputeController::ApplyCatmarkRestrictedVertexVerticesKernelA(FarKernelBatch const &batch, CONTEXT *context) const {
|
||||
|
||||
typename CONTEXT::VertexType *vsrc = &context->GetVertices().at(0);
|
||||
|
||||
FarSubdivisionTables const * subdivision = context->GetSubdivisionTables();
|
||||
assert(subdivision);
|
||||
|
||||
subdivision->computeCatmarkRestrictedVertexPointsA( batch.GetVertexOffset(),
|
||||
batch.GetTableOffset(),
|
||||
batch.GetStart(),
|
||||
batch.GetEnd(),
|
||||
vsrc );
|
||||
}
|
||||
|
||||
template <class CONTEXT> void
|
||||
FarComputeController::ApplyLoopEdgeVerticesKernel(FarKernelBatch const &batch, CONTEXT *context) const {
|
||||
|
||||
|
@ -74,6 +74,9 @@ public:
|
||||
CATMARK_VERT_VERTEX_A1,
|
||||
CATMARK_VERT_VERTEX_A2,
|
||||
CATMARK_VERT_VERTEX_B,
|
||||
CATMARK_RESTRICTED_VERT_VERTEX_A,
|
||||
CATMARK_RESTRICTED_VERT_VERTEX_B1,
|
||||
CATMARK_RESTRICTED_VERT_VERTEX_B2,
|
||||
LOOP_EDGE_VERTEX,
|
||||
LOOP_VERT_VERTEX_A1,
|
||||
LOOP_VERT_VERTEX_A2,
|
||||
|
@ -46,6 +46,8 @@ public:
|
||||
FarVertexKernelBatchFactory(int start, int end) {
|
||||
kernelB.start = kernelA1.start = kernelA2.start = start;
|
||||
kernelB.end = kernelA1.end = kernelA2.end = end;
|
||||
restrictedKernelB1.start = restrictedKernelB2.start = restrictedKernelA.start = start;
|
||||
restrictedKernelB1.end = restrictedKernelB2.end = restrictedKernelA.end = end;
|
||||
}
|
||||
|
||||
|
||||
@ -97,6 +99,17 @@ public:
|
||||
void AddVertex( int index, int rank );
|
||||
|
||||
|
||||
/// \brief Adds a vertex-vertex to the appropriate restricted compute batch based on "Rank" and valence.
|
||||
///
|
||||
/// @param index the index of the vertex
|
||||
///
|
||||
/// @param rank the rank of the vertex (see
|
||||
/// FarSubdivisionTables::GetMaskRanking())
|
||||
///
|
||||
/// @param valence the valence of the vertex
|
||||
///
|
||||
void AddCatmarkRestrictedVertex( int index, int rank, int valence );
|
||||
|
||||
|
||||
/// \brief Appends a FarKernelBatch to a vector of batches for Catmark subdivision
|
||||
///
|
||||
@ -111,6 +124,18 @@ public:
|
||||
void AppendCatmarkBatches(int level, int tableOffset, int vertexOffset, FarKernelBatchVector *result);
|
||||
|
||||
|
||||
/// \brief Appends a restricted FarKernelBatch to a vector of batches for Catmark subdivision
|
||||
///
|
||||
/// @param level the subdivision level of the vertices in the batch
|
||||
///
|
||||
/// @param tableOffset XXXX
|
||||
///
|
||||
/// @param vertexOffset XXXX
|
||||
///
|
||||
/// @param result the expanded batch vector
|
||||
///
|
||||
void AppendCatmarkRestrictedBatches(int level, int tableOffset, int vertexOffset, FarKernelBatchVector *result);
|
||||
|
||||
|
||||
/// \brief Appends a FarKernelBatch to a vector of batches for Loop subdivision
|
||||
///
|
||||
@ -131,9 +156,12 @@ private:
|
||||
end;
|
||||
};
|
||||
|
||||
Range kernelB; // vertex batch range (kernel B)
|
||||
Range kernelA1; // vertex batch range (kernel A pass 1)
|
||||
Range kernelA2; // vertex batch range (kernel A pass 2)
|
||||
Range kernelB; // vertex batch range (kernel B)
|
||||
Range kernelA1; // vertex batch range (kernel A pass 1)
|
||||
Range kernelA2; // vertex batch range (kernel A pass 2)
|
||||
Range restrictedKernelB1; // vertex batch range (restricted kernel B regular)
|
||||
Range restrictedKernelB2; // vertex batch range (restricted kernel B irregular)
|
||||
Range restrictedKernelA; // vertex batch range (restricted kernel A)
|
||||
};
|
||||
|
||||
inline void
|
||||
@ -160,6 +188,29 @@ FarVertexKernelBatchFactory::AddVertex( int index, int rank ) {
|
||||
}
|
||||
}
|
||||
|
||||
inline void
|
||||
FarVertexKernelBatchFactory::AddCatmarkRestrictedVertex( int index, int rank, int valence ) {
|
||||
|
||||
assert(rank <= 2 || rank >= 8);
|
||||
|
||||
if (rank <= 2 && valence == 4) {
|
||||
if (index < restrictedKernelB1.start)
|
||||
restrictedKernelB1.start=index;
|
||||
if (index > restrictedKernelB1.end)
|
||||
restrictedKernelB1.end=index;
|
||||
} else if (rank <= 2 && valence != 4) {
|
||||
if (index < restrictedKernelB2.start)
|
||||
restrictedKernelB2.start=index;
|
||||
if (index > restrictedKernelB2.end)
|
||||
restrictedKernelB2.end=index;
|
||||
} else if (rank >= 8) {
|
||||
if (index < restrictedKernelA.start)
|
||||
restrictedKernelA.start=index;
|
||||
if (index > restrictedKernelA.end)
|
||||
restrictedKernelA.end=index;
|
||||
}
|
||||
}
|
||||
|
||||
inline void
|
||||
FarVertexKernelBatchFactory::AppendCatmarkBatches(int level,
|
||||
int tableOffset,
|
||||
@ -180,6 +231,26 @@ FarVertexKernelBatchFactory::AppendCatmarkBatches(int level,
|
||||
tableOffset, vertexOffset) );
|
||||
}
|
||||
|
||||
inline void
|
||||
FarVertexKernelBatchFactory::AppendCatmarkRestrictedBatches(int level,
|
||||
int tableOffset,
|
||||
int vertexOffset,
|
||||
FarKernelBatchVector *result) {
|
||||
|
||||
if (restrictedKernelB1.end >= restrictedKernelB1.start)
|
||||
result->push_back(FarKernelBatch( FarKernelBatch::CATMARK_RESTRICTED_VERT_VERTEX_B1, level, 0,
|
||||
restrictedKernelB1.start, restrictedKernelB1.end+1,
|
||||
tableOffset, vertexOffset));
|
||||
if (restrictedKernelB2.end >= restrictedKernelB2.start)
|
||||
result->push_back(FarKernelBatch( FarKernelBatch::CATMARK_RESTRICTED_VERT_VERTEX_B2, level, 0,
|
||||
restrictedKernelB2.start, restrictedKernelB2.end+1,
|
||||
tableOffset, vertexOffset) );
|
||||
if (restrictedKernelA.end >= restrictedKernelA.start)
|
||||
result->push_back(FarKernelBatch( FarKernelBatch::CATMARK_RESTRICTED_VERT_VERTEX_A, level, 0,
|
||||
restrictedKernelA.start, restrictedKernelA.end+1,
|
||||
tableOffset, vertexOffset) );
|
||||
}
|
||||
|
||||
inline void
|
||||
FarVertexKernelBatchFactory::AppendLoopBatches(int level,
|
||||
int tableOffset,
|
||||
|
@ -189,6 +189,22 @@ public:
|
||||
template <class U>
|
||||
void computeCatmarkVertexPointsB(int vertexOffset, int tableOffset, int start, int end, U * vsrc) const;
|
||||
|
||||
// Compute-kernel applied to vertices resulting from the refinement of a smooth or sharp vertex
|
||||
// Kernel "A" handles the k_Crease and k_Corner rules
|
||||
template <class U>
|
||||
void computeCatmarkRestrictedVertexPointsA(int vertexOffset, int tableOffset, int start, int end, U * vsrc) const;
|
||||
|
||||
// Compute-kernel applied to vertices resulting from the refinement of a smooth or sharp vertex
|
||||
// Kernel "B1" handles the regular k_Smooth and k_Dart rules
|
||||
template <class U>
|
||||
void computeCatmarkRestrictedVertexPointsB1(int vertexOffset, int tableOffset, int start, int end, U * vsrc) const;
|
||||
|
||||
// Compute-kernel applied to vertices resulting from the refinement of a smooth or sharp vertex
|
||||
// Kernel "B2" handles the irregular k_Smooth and k_Dart rules
|
||||
template <class U>
|
||||
void computeCatmarkRestrictedVertexPointsB2(int vertexOffset, int tableOffset, int start, int end, U * vsrc) const;
|
||||
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Loop scheme
|
||||
|
||||
@ -573,6 +589,76 @@ FarSubdivisionTables::computeCatmarkVertexPointsB( int vertexOffset, int tableOf
|
||||
}
|
||||
}
|
||||
|
||||
// single-pass kernel handling k_Crease and k_Corner rules
|
||||
template <class U> void
|
||||
FarSubdivisionTables::computeCatmarkRestrictedVertexPointsA( int vertexOffset, int tableOffset, int start, int end, U * vsrc ) const {
|
||||
|
||||
U * vdst = vsrc + vertexOffset + start;
|
||||
|
||||
for (int i=start+tableOffset; i<end+tableOffset; ++i, ++vdst ) {
|
||||
|
||||
vdst->Clear();
|
||||
|
||||
int p=this->_V_ITa[5*i+2], // index of the parent vertex
|
||||
eidx0=this->_V_ITa[5*i+3], // index of the first crease rule edge
|
||||
eidx1=this->_V_ITa[5*i+4]; // index of the second crease rule edge
|
||||
|
||||
vdst->AddWithWeight( vsrc[p], 0.75f );
|
||||
vdst->AddWithWeight( vsrc[eidx0], 0.125f );
|
||||
vdst->AddWithWeight( vsrc[eidx1], 0.125f );
|
||||
vdst->AddVaryingWithWeight( vsrc[p], 1.0f );
|
||||
}
|
||||
}
|
||||
|
||||
// single-pass kernel handling regular k_Smooth and k_Dart rules
|
||||
template <class U> void
|
||||
FarSubdivisionTables::computeCatmarkRestrictedVertexPointsB1( int vertexOffset, int tableOffset, int start, int end, U * vsrc ) const {
|
||||
|
||||
U * vdst = vsrc + vertexOffset + start;
|
||||
|
||||
for (int i=start+tableOffset; i<end+tableOffset; ++i, ++vdst ) {
|
||||
|
||||
vdst->Clear();
|
||||
|
||||
int h = this->_V_ITa[5*i ], // offset of the vertices in the _V_IT array
|
||||
p = this->_V_ITa[5*i+2]; // index of the parent vertex
|
||||
|
||||
vdst->AddWithWeight( vsrc[p], 0.5f );
|
||||
|
||||
for (int j=0; j<8; ++j, ++h)
|
||||
vdst->AddWithWeight( vsrc[this->_V_IT[h]], 0.0625f );
|
||||
|
||||
vdst->AddVaryingWithWeight( vsrc[p], 1.0f );
|
||||
}
|
||||
}
|
||||
|
||||
// single-pass kernel handling irregular k_Smooth and k_Dart rules
|
||||
template <class U> void
|
||||
FarSubdivisionTables::computeCatmarkRestrictedVertexPointsB2( int vertexOffset, int tableOffset, int start, int end, U * vsrc ) const {
|
||||
|
||||
U * vdst = vsrc + vertexOffset + start;
|
||||
|
||||
for (int i=start+tableOffset; i<end+tableOffset; ++i, ++vdst ) {
|
||||
|
||||
vdst->Clear();
|
||||
|
||||
int h = this->_V_ITa[5*i ], // offset of the vertices in the _V_IT array
|
||||
n = this->_V_ITa[5*i+1], // number of vertices in the _V_IT array (valence)
|
||||
p = this->_V_ITa[5*i+2]; // index of the parent vertex
|
||||
|
||||
float wp = 1.0f/(n*n),
|
||||
wv = (n-2.0f)*n*wp;
|
||||
|
||||
vdst->AddWithWeight( vsrc[p], wv );
|
||||
|
||||
for (int j=0; j<n; ++j) {
|
||||
vdst->AddWithWeight( vsrc[this->_V_IT[h+j*2 ]], wp );
|
||||
vdst->AddWithWeight( vsrc[this->_V_IT[h+j*2+1]], wp );
|
||||
}
|
||||
vdst->AddVaryingWithWeight( vsrc[p], 1.0f );
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// Edge-vertices compute Kernel - completely re-entrant
|
||||
//
|
||||
|
@ -62,11 +62,13 @@ protected:
|
||||
|
||||
template <class X, class Y> friend class FarMeshFactory;
|
||||
|
||||
typedef bool (*CompareVerticesOperator)(const HbrVertex<T> *, const HbrVertex<T> *);
|
||||
|
||||
// This factory accumulates vertex topology data that will be shared among the
|
||||
// specialized subdivision scheme factories (Bilinear / Catmark / Loop).
|
||||
// It also populates the FarMeshFactory vertex remapping vector that ties the
|
||||
// Hbr vertex indices to the FarVertexEdit tables.
|
||||
FarSubdivisionTablesFactory( HbrMesh<T> const * mesh, int maxlevel, std::vector<int> & remapTable );
|
||||
FarSubdivisionTablesFactory( HbrMesh<T> const * mesh, int maxlevel, std::vector<int> & remapTable, CompareVerticesOperator compareVertices = CompareVertices );
|
||||
|
||||
// Returns the number of coarse vertices found in the mesh
|
||||
int GetNumCoarseVertices() const {
|
||||
@ -108,6 +110,15 @@ protected:
|
||||
|
||||
bool HasFractionalEdgeSharpness() const { return _hasFractionalEdgeSharpness; }
|
||||
|
||||
bool HasFractionalVertexSharpness() const { return _hasFractionalVertexSharpness; }
|
||||
|
||||
// Compares vertices based on their topological configuration
|
||||
// (see subdivisionTables::GetMaskRanking for more details)
|
||||
static bool CompareVertices( HbrVertex<T> const *x, HbrVertex<T> const *y );
|
||||
|
||||
// Compare vertices operator
|
||||
CompareVerticesOperator _compareVertices;
|
||||
|
||||
// Per-level counters and offsets for each type of vertex (face,edge,vert)
|
||||
std::vector<int> _faceVertIdx,
|
||||
_edgeVertIdx,
|
||||
@ -130,8 +141,9 @@ protected:
|
||||
// Number of coarse triangle faces
|
||||
int _numCoarseTriangleFaces;
|
||||
|
||||
// Indicates if an edge has a fractional (non-integer) sharpness
|
||||
bool _hasFractionalEdgeSharpness;
|
||||
// Indicates if an edge or vertex has a fractional (non-integer) sharpness
|
||||
bool _hasFractionalEdgeSharpness,
|
||||
_hasFractionalVertexSharpness;
|
||||
|
||||
private:
|
||||
|
||||
@ -142,14 +154,11 @@ private:
|
||||
|
||||
// Sums the number of adjacent vertices required to interpolate a Vert-Vertex
|
||||
static int sumVertVertexValence(HbrVertex<T> * vertex);
|
||||
|
||||
// Compares vertices based on their topological configuration
|
||||
// (see subdivisionTables::GetMaskRanking for more details)
|
||||
static bool compareVertices( HbrVertex<T> const *x, HbrVertex<T> const *y );
|
||||
};
|
||||
|
||||
template <class T, class U>
|
||||
FarSubdivisionTablesFactory<T,U>::FarSubdivisionTablesFactory( HbrMesh<T> const * mesh, int maxlevel, std::vector<int> & remapTable ) :
|
||||
FarSubdivisionTablesFactory<T,U>::FarSubdivisionTablesFactory( HbrMesh<T> const * mesh, int maxlevel, std::vector<int> & remapTable, CompareVerticesOperator compareVertices ) :
|
||||
_compareVertices(compareVertices),
|
||||
_faceVertIdx(maxlevel+1,0),
|
||||
_edgeVertIdx(maxlevel+1,0),
|
||||
_vertVertIdx(maxlevel+1,0),
|
||||
@ -161,7 +170,8 @@ FarSubdivisionTablesFactory<T,U>::FarSubdivisionTablesFactory( HbrMesh<T> const
|
||||
_minCoarseFaceValence(0),
|
||||
_maxCoarseFaceValence(0),
|
||||
_numCoarseTriangleFaces(0),
|
||||
_hasFractionalEdgeSharpness(false)
|
||||
_hasFractionalEdgeSharpness(false),
|
||||
_hasFractionalVertexSharpness(false)
|
||||
{
|
||||
assert( mesh );
|
||||
|
||||
@ -215,6 +225,9 @@ FarSubdivisionTablesFactory<T,U>::FarSubdivisionTablesFactory( HbrMesh<T> const
|
||||
} else if (v->GetParentVertex()) {
|
||||
vertCounts[depth]++;
|
||||
_vertVertsValenceSum+=sumVertVertexValence(v);
|
||||
float sharpness = v->GetParentVertex()->GetSharpness();
|
||||
if (sharpness > 0.0f && sharpness < 1.0f)
|
||||
_hasFractionalVertexSharpness = true;
|
||||
}
|
||||
}
|
||||
|
||||
@ -278,7 +291,7 @@ FarSubdivisionTablesFactory<T,U>::FarSubdivisionTablesFactory( HbrMesh<T> const
|
||||
// mask. The masks combinations are ordered so as to minimize the compute
|
||||
// kernel switching.(see subdivisionTables::GetMaskRanking for more details)
|
||||
for (size_t i=1; i<_vertVertsList.size(); ++i)
|
||||
std::sort( _vertVertsList[i].begin(), _vertVertsList[i].end(), compareVertices );
|
||||
std::sort( _vertVertsList[i].begin(), _vertVertsList[i].end(), _compareVertices );
|
||||
|
||||
|
||||
// These vertices still need a remapped index
|
||||
@ -384,7 +397,7 @@ FarSubdivisionTablesFactory<T,U>::sumVertVertexValence(HbrVertex<T> * vertex) {
|
||||
// The vertices should be sorted so as to minimize the number execution calls of
|
||||
// these kernels to match the 2 pass interpolation scheme used in Hbr.
|
||||
template <class T, class U> bool
|
||||
FarSubdivisionTablesFactory<T,U>::compareVertices( HbrVertex<T> const * x, HbrVertex<T> const * y ) {
|
||||
FarSubdivisionTablesFactory<T,U>::CompareVertices( HbrVertex<T> const * x, HbrVertex<T> const * y ) {
|
||||
|
||||
// Masks of the parent vertex decide for the current vertex.
|
||||
HbrVertex<T> * px=x->GetParentVertex(),
|
||||
@ -604,6 +617,9 @@ FarSubdivisionTablesFactory<T,U>::Splice(FarMeshVector const &meshes, FarKernelB
|
||||
} else if (batch._kernelType == FarKernelBatch::CATMARK_VERT_VERTEX_A1 or
|
||||
batch._kernelType == FarKernelBatch::CATMARK_VERT_VERTEX_A2 or
|
||||
batch._kernelType == FarKernelBatch::CATMARK_VERT_VERTEX_B or
|
||||
batch._kernelType == FarKernelBatch::CATMARK_RESTRICTED_VERT_VERTEX_A or
|
||||
batch._kernelType == FarKernelBatch::CATMARK_RESTRICTED_VERT_VERTEX_B1 or
|
||||
batch._kernelType == FarKernelBatch::CATMARK_RESTRICTED_VERT_VERTEX_B2 or
|
||||
batch._kernelType == FarKernelBatch::LOOP_VERT_VERTEX_A1 or
|
||||
batch._kernelType == FarKernelBatch::LOOP_VERT_VERTEX_A2 or
|
||||
batch._kernelType == FarKernelBatch::LOOP_VERT_VERTEX_B or
|
||||
|
@ -390,6 +390,94 @@ OsdCLComputeController::ApplyCatmarkVertexVerticesKernelA2(
|
||||
CL_CHECK_ERROR(ciErrNum, "vertex kernel A2 %d\n", ciErrNum);
|
||||
}
|
||||
|
||||
void
|
||||
OsdCLComputeController::ApplyCatmarkRestrictedVertexVerticesKernelB1(
|
||||
FarKernelBatch const &batch, OsdCLComputeContext const *context) const {
|
||||
|
||||
assert(context);
|
||||
|
||||
cl_int ciErrNum;
|
||||
size_t globalWorkSize[1] = { (size_t)(batch.GetEnd() - batch.GetStart()) };
|
||||
cl_kernel kernel = _currentBindState.kernelBundle->GetCatmarkRestrictedVertexKernelB1();
|
||||
|
||||
cl_mem V_ITa = context->GetTable(FarSubdivisionTables::V_ITa)->GetDevicePtr();
|
||||
cl_mem V_IT = context->GetTable(FarSubdivisionTables::V_IT)->GetDevicePtr();
|
||||
|
||||
clSetKernelArg(kernel, 0, sizeof(cl_mem), &_currentBindState.vertexBuffer);
|
||||
clSetKernelArg(kernel, 1, sizeof(cl_mem), &_currentBindState.varyingBuffer);
|
||||
clSetKernelArg(kernel, 2, sizeof(cl_mem), &V_ITa);
|
||||
clSetKernelArg(kernel, 3, sizeof(cl_mem), &V_IT);
|
||||
clSetKernelArg(kernel, 4, sizeof(int), &_currentBindState.vertexDesc.offset);
|
||||
clSetKernelArg(kernel, 5, sizeof(int), &_currentBindState.varyingDesc.offset);
|
||||
clSetKernelArg(kernel, 6, sizeof(int), batch.GetVertexOffsetPtr());
|
||||
clSetKernelArg(kernel, 7, sizeof(int), batch.GetTableOffsetPtr());
|
||||
clSetKernelArg(kernel, 8, sizeof(int), batch.GetStartPtr());
|
||||
clSetKernelArg(kernel, 9, sizeof(int), batch.GetEndPtr());
|
||||
|
||||
ciErrNum = clEnqueueNDRangeKernel(_clQueue,
|
||||
kernel, 1, NULL, globalWorkSize,
|
||||
NULL, 0, NULL, NULL);
|
||||
CL_CHECK_ERROR(ciErrNum, "restricted vertex kernel B1 %d\n", ciErrNum);
|
||||
}
|
||||
|
||||
void
|
||||
OsdCLComputeController::ApplyCatmarkRestrictedVertexVerticesKernelB2(
|
||||
FarKernelBatch const &batch, OsdCLComputeContext const *context) const {
|
||||
|
||||
assert(context);
|
||||
|
||||
cl_int ciErrNum;
|
||||
size_t globalWorkSize[1] = { (size_t)(batch.GetEnd() - batch.GetStart()) };
|
||||
cl_kernel kernel = _currentBindState.kernelBundle->GetCatmarkRestrictedVertexKernelB2();
|
||||
|
||||
cl_mem V_ITa = context->GetTable(FarSubdivisionTables::V_ITa)->GetDevicePtr();
|
||||
cl_mem V_IT = context->GetTable(FarSubdivisionTables::V_IT)->GetDevicePtr();
|
||||
|
||||
clSetKernelArg(kernel, 0, sizeof(cl_mem), &_currentBindState.vertexBuffer);
|
||||
clSetKernelArg(kernel, 1, sizeof(cl_mem), &_currentBindState.varyingBuffer);
|
||||
clSetKernelArg(kernel, 2, sizeof(cl_mem), &V_ITa);
|
||||
clSetKernelArg(kernel, 3, sizeof(cl_mem), &V_IT);
|
||||
clSetKernelArg(kernel, 4, sizeof(int), &_currentBindState.vertexDesc.offset);
|
||||
clSetKernelArg(kernel, 5, sizeof(int), &_currentBindState.varyingDesc.offset);
|
||||
clSetKernelArg(kernel, 6, sizeof(int), batch.GetVertexOffsetPtr());
|
||||
clSetKernelArg(kernel, 7, sizeof(int), batch.GetTableOffsetPtr());
|
||||
clSetKernelArg(kernel, 8, sizeof(int), batch.GetStartPtr());
|
||||
clSetKernelArg(kernel, 9, sizeof(int), batch.GetEndPtr());
|
||||
|
||||
ciErrNum = clEnqueueNDRangeKernel(_clQueue,
|
||||
kernel, 1, NULL, globalWorkSize,
|
||||
NULL, 0, NULL, NULL);
|
||||
CL_CHECK_ERROR(ciErrNum, "restricted vertex kernel B2 %d\n", ciErrNum);
|
||||
}
|
||||
|
||||
void
|
||||
OsdCLComputeController::ApplyCatmarkRestrictedVertexVerticesKernelA(
|
||||
FarKernelBatch const &batch, OsdCLComputeContext const *context) const {
|
||||
|
||||
assert(context);
|
||||
|
||||
cl_int ciErrNum;
|
||||
size_t globalWorkSize[1] = { (size_t)(batch.GetEnd() - batch.GetStart()) };
|
||||
cl_kernel kernel = _currentBindState.kernelBundle->GetCatmarkRestrictedVertexKernelA();
|
||||
|
||||
cl_mem V_ITa = context->GetTable(FarSubdivisionTables::V_ITa)->GetDevicePtr();
|
||||
|
||||
clSetKernelArg(kernel, 0, sizeof(cl_mem), &_currentBindState.vertexBuffer);
|
||||
clSetKernelArg(kernel, 1, sizeof(cl_mem), &_currentBindState.varyingBuffer);
|
||||
clSetKernelArg(kernel, 2, sizeof(cl_mem), &V_ITa);
|
||||
clSetKernelArg(kernel, 3, sizeof(int), &_currentBindState.vertexDesc.offset);
|
||||
clSetKernelArg(kernel, 4, sizeof(int), &_currentBindState.varyingDesc.offset);
|
||||
clSetKernelArg(kernel, 5, sizeof(int), batch.GetVertexOffsetPtr());
|
||||
clSetKernelArg(kernel, 6, sizeof(int), batch.GetTableOffsetPtr());
|
||||
clSetKernelArg(kernel, 7, sizeof(int), batch.GetStartPtr());
|
||||
clSetKernelArg(kernel, 8, sizeof(int), batch.GetEndPtr());
|
||||
|
||||
ciErrNum = clEnqueueNDRangeKernel(_clQueue,
|
||||
kernel, 1, NULL, globalWorkSize,
|
||||
NULL, 0, NULL, NULL);
|
||||
CL_CHECK_ERROR(ciErrNum, "restricted vertex kernel A %d\n", ciErrNum);
|
||||
}
|
||||
|
||||
void
|
||||
OsdCLComputeController::ApplyLoopEdgeVerticesKernel(
|
||||
FarKernelBatch const &batch, OsdCLComputeContext const *context) const {
|
||||
|
@ -151,6 +151,12 @@ protected:
|
||||
|
||||
void ApplyCatmarkVertexVerticesKernelA2(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkRestrictedVertexVerticesKernelB1(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkRestrictedVertexVerticesKernelB2(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkRestrictedVertexVerticesKernelA(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
|
||||
void ApplyLoopEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
|
@ -428,6 +428,107 @@ __kernel void computeVertexB(__global float *vertex,
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void computeRestrictedVertexA(__global float *vertex,
|
||||
__global float *varying,
|
||||
__global int *V_ITa,
|
||||
int vertexOffset, int varyingOffset,
|
||||
int offset, int tableOffset,
|
||||
int start, int end) {
|
||||
|
||||
int i = start + get_global_id(0) + tableOffset;
|
||||
int vid = start + get_global_id(0) + offset;
|
||||
int p = V_ITa[5*i+2];
|
||||
int eidx0 = V_ITa[5*i+3];
|
||||
int eidx1 = V_ITa[5*i+4];
|
||||
vertex += vertexOffset;
|
||||
varying += (varying ? varyingOffset :0);
|
||||
|
||||
struct Vertex dst;
|
||||
clearVertex(&dst);
|
||||
addWithWeight(&dst, vertex, p, 0.75f);
|
||||
addWithWeight(&dst, vertex, eidx0, 0.125f);
|
||||
addWithWeight(&dst, vertex, eidx1, 0.125f);
|
||||
writeVertex(vertex, vid, &dst);
|
||||
|
||||
if (varying) {
|
||||
struct Varying dstVarying;
|
||||
clearVarying(&dstVarying);
|
||||
addVaryingWithWeight(&dstVarying, varying, p, 1.0f);
|
||||
writeVarying(varying, vid, &dstVarying);
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void computeRestrictedVertexB1(__global float *vertex,
|
||||
__global float *varying,
|
||||
__global int *V_ITa,
|
||||
__global int *V_IT,
|
||||
int vertexOffset, int varyingOffset,
|
||||
int offset, int tableOffset,
|
||||
int start, int end) {
|
||||
|
||||
int i = start + get_global_id(0) + tableOffset;
|
||||
int vid = start + get_global_id(0) + offset;
|
||||
int h = V_ITa[5*i];
|
||||
int p = V_ITa[5*i+2];
|
||||
vertex += vertexOffset;
|
||||
varying += (varying ? varyingOffset :0);
|
||||
|
||||
struct Vertex dst;
|
||||
clearVertex(&dst);
|
||||
|
||||
addWithWeight(&dst, vertex, p, 0.5f);
|
||||
|
||||
for (int j = 0; j < 8; ++j, ++h) {
|
||||
addWithWeight(&dst, vertex, V_IT[h], 0.0625f);
|
||||
}
|
||||
writeVertex(vertex, vid, &dst);
|
||||
|
||||
if (varying) {
|
||||
struct Varying dstVarying;
|
||||
clearVarying(&dstVarying);
|
||||
addVaryingWithWeight(&dstVarying, varying, p, 1.0f);
|
||||
writeVarying(varying, vid, &dstVarying);
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void computeRestrictedVertexB2(__global float *vertex,
|
||||
__global float *varying,
|
||||
__global int *V_ITa,
|
||||
__global int *V_IT,
|
||||
int vertexOffset, int varyingOffset,
|
||||
int offset, int tableOffset,
|
||||
int start, int end) {
|
||||
|
||||
int i = start + get_global_id(0) + tableOffset;
|
||||
int vid = start + get_global_id(0) + offset;
|
||||
int h = V_ITa[5*i];
|
||||
int n = V_ITa[5*i+1];
|
||||
int p = V_ITa[5*i+2];
|
||||
vertex += vertexOffset;
|
||||
varying += (varying ? varyingOffset :0);
|
||||
|
||||
float wp = 1.0f/(float)(n*n);
|
||||
float wv = (n-2.0f) * n * wp;
|
||||
|
||||
struct Vertex dst;
|
||||
clearVertex(&dst);
|
||||
|
||||
addWithWeight(&dst, vertex, p, wv);
|
||||
|
||||
for (int j = 0; j < n; ++j) {
|
||||
addWithWeight(&dst, vertex, V_IT[h+j*2], wp);
|
||||
addWithWeight(&dst, vertex, V_IT[h+j*2+1], wp);
|
||||
}
|
||||
writeVertex(vertex, vid, &dst);
|
||||
|
||||
if (varying) {
|
||||
struct Varying dstVarying;
|
||||
clearVarying(&dstVarying);
|
||||
addVaryingWithWeight(&dstVarying, varying, p, 1.0f);
|
||||
writeVarying(varying, vid, &dstVarying);
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void computeLoopVertexB(__global float *vertex,
|
||||
__global float *varying,
|
||||
__global int *V_ITa,
|
||||
|
@ -57,6 +57,9 @@ OsdCLKernelBundle::OsdCLKernelBundle() :
|
||||
_clCatmarkRestrictedEdge(NULL),
|
||||
_clCatmarkVertexA(NULL),
|
||||
_clCatmarkVertexB(NULL),
|
||||
_clCatmarkRestrictedVertexA(NULL),
|
||||
_clCatmarkRestrictedVertexB1(NULL),
|
||||
_clCatmarkRestrictedVertexB2(NULL),
|
||||
_clLoopEdge(NULL),
|
||||
_clLoopVertexA(NULL),
|
||||
_clLoopVertexB(NULL),
|
||||
@ -87,6 +90,12 @@ OsdCLKernelBundle::~OsdCLKernelBundle() {
|
||||
clReleaseKernel(_clCatmarkVertexA);
|
||||
if (_clCatmarkVertexB)
|
||||
clReleaseKernel(_clCatmarkVertexB);
|
||||
if (_clCatmarkRestrictedVertexA)
|
||||
clReleaseKernel(_clCatmarkRestrictedVertexA);
|
||||
if (_clCatmarkRestrictedVertexB1)
|
||||
clReleaseKernel(_clCatmarkRestrictedVertexB1);
|
||||
if (_clCatmarkRestrictedVertexB2)
|
||||
clReleaseKernel(_clCatmarkRestrictedVertexB2);
|
||||
|
||||
if (_clLoopEdge)
|
||||
clReleaseKernel(_clLoopEdge);
|
||||
@ -163,6 +172,9 @@ OsdCLKernelBundle::Compile(cl_context clContext,
|
||||
_clCatmarkRestrictedEdge = buildKernel(_clProgram, "computeRestrictedEdge");
|
||||
_clCatmarkVertexA = buildKernel(_clProgram, "computeVertexA");
|
||||
_clCatmarkVertexB = buildKernel(_clProgram, "computeVertexB");
|
||||
_clCatmarkRestrictedVertexA = buildKernel(_clProgram, "computeRestrictedVertexA");
|
||||
_clCatmarkRestrictedVertexB1 = buildKernel(_clProgram, "computeRestrictedVertexB1");
|
||||
_clCatmarkRestrictedVertexB2 = buildKernel(_clProgram, "computeRestrictedVertexB2");
|
||||
_clLoopEdge = buildKernel(_clProgram, "computeEdge");
|
||||
_clLoopVertexA = buildKernel(_clProgram, "computeVertexA");
|
||||
_clLoopVertexB = buildKernel(_clProgram, "computeLoopVertexB");
|
||||
|
@ -62,6 +62,12 @@ public:
|
||||
|
||||
cl_kernel GetCatmarkVertexKernelB() const { return _clCatmarkVertexB; }
|
||||
|
||||
cl_kernel GetCatmarkRestrictedVertexKernelA() const { return _clCatmarkRestrictedVertexA; }
|
||||
|
||||
cl_kernel GetCatmarkRestrictedVertexKernelB1() const { return _clCatmarkRestrictedVertexB1; }
|
||||
|
||||
cl_kernel GetCatmarkRestrictedVertexKernelB2() const { return _clCatmarkRestrictedVertexB2; }
|
||||
|
||||
cl_kernel GetLoopEdgeKernel() const { return _clLoopEdge; }
|
||||
|
||||
cl_kernel GetLoopVertexKernelA() const { return _clLoopVertexA; }
|
||||
@ -104,6 +110,9 @@ protected:
|
||||
_clCatmarkRestrictedEdge,
|
||||
_clCatmarkVertexA,
|
||||
_clCatmarkVertexB,
|
||||
_clCatmarkRestrictedVertexA,
|
||||
_clCatmarkRestrictedVertexB1,
|
||||
_clCatmarkRestrictedVertexB2,
|
||||
_clLoopEdge,
|
||||
_clLoopVertexA,
|
||||
_clLoopVertexB,
|
||||
|
@ -186,6 +186,47 @@ OsdCpuComputeController::ApplyCatmarkVertexVerticesKernelA2(
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(), true);
|
||||
}
|
||||
|
||||
void
|
||||
OsdCpuComputeController::ApplyCatmarkRestrictedVertexVerticesKernelB1(
|
||||
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {
|
||||
|
||||
assert(context);
|
||||
|
||||
OsdCpuComputeRestrictedVertexB1(
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
|
||||
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
|
||||
(const int*)context->GetTable(FarSubdivisionTables::V_IT)->GetBuffer(),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
}
|
||||
|
||||
void
|
||||
OsdCpuComputeController::ApplyCatmarkRestrictedVertexVerticesKernelB2(
|
||||
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {
|
||||
|
||||
assert(context);
|
||||
|
||||
OsdCpuComputeRestrictedVertexB2(
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
|
||||
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
|
||||
(const int*)context->GetTable(FarSubdivisionTables::V_IT)->GetBuffer(),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
}
|
||||
|
||||
void
|
||||
OsdCpuComputeController::ApplyCatmarkRestrictedVertexVerticesKernelA(
|
||||
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {
|
||||
|
||||
assert(context);
|
||||
|
||||
OsdCpuComputeRestrictedVertexA(
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
|
||||
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
}
|
||||
|
||||
void
|
||||
OsdCpuComputeController::ApplyLoopEdgeVerticesKernel(
|
||||
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {
|
||||
|
@ -134,6 +134,12 @@ protected:
|
||||
|
||||
void ApplyCatmarkVertexVerticesKernelA2(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkRestrictedVertexVerticesKernelB1(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkRestrictedVertexVerticesKernelB2(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkRestrictedVertexVerticesKernelA(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
|
||||
void ApplyLoopEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
|
@ -367,6 +367,100 @@ void OsdCpuComputeVertexB(
|
||||
}
|
||||
}
|
||||
|
||||
void OsdCpuComputeRestrictedVertexB1(
|
||||
float *vertex, float *varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *V_ITa, const int *V_IT,
|
||||
int vertexOffset, int tableOffset, int start, int end) {
|
||||
|
||||
float *vertexResults = (float*)alloca(vertexDesc.length * sizeof(float));
|
||||
float *varyingResults = (float*)alloca(varyingDesc.length * sizeof(float));
|
||||
|
||||
for (int i = start + tableOffset; i < end + tableOffset; i++) {
|
||||
int h = V_ITa[5*i];
|
||||
int p = V_ITa[5*i+2];
|
||||
|
||||
int dstIndex = i + vertexOffset - tableOffset;
|
||||
clear(vertexResults, vertexDesc);
|
||||
clear(varyingResults, varyingDesc);
|
||||
|
||||
addWithWeight(vertexResults, vertex, p, 0.5f, vertexDesc);
|
||||
|
||||
for (int j = 0; j < 8; ++j, ++h)
|
||||
addWithWeight(vertexResults, vertex, V_IT[h], 0.0625f, vertexDesc);
|
||||
|
||||
addWithWeight(varyingResults, varying, p, 1.0f, varyingDesc);
|
||||
|
||||
copy(vertex, vertexResults, dstIndex, vertexDesc);
|
||||
copy(varying, varyingResults, dstIndex, varyingDesc);
|
||||
}
|
||||
}
|
||||
|
||||
void OsdCpuComputeRestrictedVertexB2(
|
||||
float *vertex, float *varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *V_ITa, const int *V_IT,
|
||||
int vertexOffset, int tableOffset, int start, int end) {
|
||||
|
||||
float *vertexResults = (float*)alloca(vertexDesc.length * sizeof(float));
|
||||
float *varyingResults = (float*)alloca(varyingDesc.length * sizeof(float));
|
||||
|
||||
for (int i = start + tableOffset; i < end + tableOffset; i++) {
|
||||
int h = V_ITa[5*i];
|
||||
int n = V_ITa[5*i+1];
|
||||
int p = V_ITa[5*i+2];
|
||||
|
||||
float wp = 1.0f/static_cast<float>(n*n);
|
||||
float wv = (n-2.0f) * n * wp;
|
||||
|
||||
int dstIndex = i + vertexOffset - tableOffset;
|
||||
clear(vertexResults, vertexDesc);
|
||||
clear(varyingResults, varyingDesc);
|
||||
|
||||
addWithWeight(vertexResults, vertex, p, wv, vertexDesc);
|
||||
|
||||
for (int j = 0; j < n; ++j) {
|
||||
addWithWeight(vertexResults, vertex, V_IT[h+j*2], wp, vertexDesc);
|
||||
addWithWeight(vertexResults, vertex, V_IT[h+j*2+1], wp, vertexDesc);
|
||||
}
|
||||
addWithWeight(varyingResults, varying, p, 1.0f, varyingDesc);
|
||||
|
||||
copy(vertex, vertexResults, dstIndex, vertexDesc);
|
||||
copy(varying, varyingResults, dstIndex, varyingDesc);
|
||||
}
|
||||
}
|
||||
|
||||
void OsdCpuComputeRestrictedVertexA(
|
||||
float *vertex, float *varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *V_ITa,
|
||||
int vertexOffset, int tableOffset, int start, int end) {
|
||||
|
||||
float *vertexResults = (float*)alloca(vertexDesc.length * sizeof(float));
|
||||
float *varyingResults = (float*)alloca(varyingDesc.length * sizeof(float));
|
||||
|
||||
for (int i = start + tableOffset; i < end + tableOffset; i++) {
|
||||
int p = V_ITa[5*i+2];
|
||||
int eidx0 = V_ITa[5*i+3];
|
||||
int eidx1 = V_ITa[5*i+4];
|
||||
|
||||
int dstIndex = i + vertexOffset - tableOffset;
|
||||
clear(vertexResults, vertexDesc);
|
||||
clear(varyingResults, varyingDesc);
|
||||
|
||||
addWithWeight(vertexResults, vertex, p, 0.75f, vertexDesc);
|
||||
addWithWeight(vertexResults, vertex, eidx0, 0.125f, vertexDesc);
|
||||
addWithWeight(vertexResults, vertex, eidx1, 0.125f, vertexDesc);
|
||||
addWithWeight(varyingResults, varying, p, 1.0f, varyingDesc);
|
||||
|
||||
copy(vertex, vertexResults, dstIndex, vertexDesc);
|
||||
copy(varying, varyingResults, dstIndex, varyingDesc);
|
||||
}
|
||||
}
|
||||
|
||||
void OsdCpuComputeLoopVertexB(
|
||||
float *vertex, float *varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
|
@ -322,6 +322,27 @@ void OsdCpuComputeVertexB(float *vertex, float * varying,
|
||||
int vertexOffset, int tableOffset,
|
||||
int start, int end);
|
||||
|
||||
void OsdCpuComputeRestrictedVertexB1(float *vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *V_ITa, const int *V_IT,
|
||||
int vertexOffset, int tableOffset,
|
||||
int start, int end);
|
||||
|
||||
void OsdCpuComputeRestrictedVertexB2(float *vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *V_ITa, const int *V_IT,
|
||||
int vertexOffset, int tableOffset,
|
||||
int start, int end);
|
||||
|
||||
void OsdCpuComputeRestrictedVertexA(float *vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *V_ITa,
|
||||
int vertexOffset, int tableOffset,
|
||||
int start, int end);
|
||||
|
||||
template<int numVertexElements>
|
||||
void ComputeLoopVertexBKernel( float *vertex,
|
||||
const int *V_ITa,
|
||||
|
@ -67,6 +67,24 @@ void OsdCudaComputeVertexB(float *vertex, float *varying,
|
||||
int *V_ITa, int *V_IT, float *V_W, int offset, int tableOffset,
|
||||
int start, int end);
|
||||
|
||||
void OsdCudaComputeRestrictedVertexA(float *vertex, float *varying,
|
||||
int vertexLength, int vertexStride,
|
||||
int varyingLength, int varyingStride,
|
||||
int *V_ITa, int offset, int tableOffset,
|
||||
int start, int end);
|
||||
|
||||
void OsdCudaComputeRestrictedVertexB1(float *vertex, float *varying,
|
||||
int vertexLength, int vertexStride,
|
||||
int varyingLength, int varyingStride,
|
||||
int *V_ITa, int *V_IT, int offset, int tableOffset,
|
||||
int start, int end);
|
||||
|
||||
void OsdCudaComputeRestrictedVertexB2(float *vertex, float *varying,
|
||||
int vertexLength, int vertexStride,
|
||||
int varyingLength, int varyingStride,
|
||||
int *V_ITa, int *V_IT, int offset, int tableOffset,
|
||||
int start, int end);
|
||||
|
||||
void OsdCudaComputeLoopVertexB(float *vertex, float *varying,
|
||||
int vertexLength, int vertexStride,
|
||||
int varyingLength, int varyingStride,
|
||||
@ -341,6 +359,72 @@ OsdCudaComputeController::ApplyCatmarkVertexVerticesKernelA2(
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(), true);
|
||||
}
|
||||
|
||||
void
|
||||
OsdCudaComputeController::ApplyCatmarkRestrictedVertexVerticesKernelB1(
|
||||
FarKernelBatch const &batch, OsdCudaComputeContext const *context) const {
|
||||
|
||||
assert(context);
|
||||
|
||||
const OsdCudaTable * V_ITa = context->GetTable(FarSubdivisionTables::V_ITa);
|
||||
const OsdCudaTable * V_IT = context->GetTable(FarSubdivisionTables::V_IT);
|
||||
assert(V_ITa);
|
||||
assert(V_IT);
|
||||
|
||||
float *vertex = _currentBindState.GetOffsettedVertexBuffer();
|
||||
float *varying = _currentBindState.GetOffsettedVaryingBuffer();
|
||||
|
||||
OsdCudaComputeRestrictedVertexB1(
|
||||
vertex, varying,
|
||||
_currentBindState.vertexDesc.length, _currentBindState.vertexDesc.stride,
|
||||
_currentBindState.varyingDesc.length, _currentBindState.varyingDesc.stride,
|
||||
static_cast<int*>(V_ITa->GetCudaMemory()),
|
||||
static_cast<int*>(V_IT->GetCudaMemory()),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
}
|
||||
|
||||
void
|
||||
OsdCudaComputeController::ApplyCatmarkRestrictedVertexVerticesKernelB2(
|
||||
FarKernelBatch const &batch, OsdCudaComputeContext const *context) const {
|
||||
|
||||
assert(context);
|
||||
|
||||
const OsdCudaTable * V_ITa = context->GetTable(FarSubdivisionTables::V_ITa);
|
||||
const OsdCudaTable * V_IT = context->GetTable(FarSubdivisionTables::V_IT);
|
||||
assert(V_ITa);
|
||||
assert(V_IT);
|
||||
|
||||
float *vertex = _currentBindState.GetOffsettedVertexBuffer();
|
||||
float *varying = _currentBindState.GetOffsettedVaryingBuffer();
|
||||
|
||||
OsdCudaComputeRestrictedVertexB2(
|
||||
vertex, varying,
|
||||
_currentBindState.vertexDesc.length, _currentBindState.vertexDesc.stride,
|
||||
_currentBindState.varyingDesc.length, _currentBindState.varyingDesc.stride,
|
||||
static_cast<int*>(V_ITa->GetCudaMemory()),
|
||||
static_cast<int*>(V_IT->GetCudaMemory()),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
}
|
||||
|
||||
void
|
||||
OsdCudaComputeController::ApplyCatmarkRestrictedVertexVerticesKernelA(
|
||||
FarKernelBatch const &batch, OsdCudaComputeContext const *context) const {
|
||||
|
||||
assert(context);
|
||||
|
||||
const OsdCudaTable * V_ITa = context->GetTable(FarSubdivisionTables::V_ITa);
|
||||
assert(V_ITa);
|
||||
|
||||
float *vertex = _currentBindState.GetOffsettedVertexBuffer();
|
||||
float *varying = _currentBindState.GetOffsettedVaryingBuffer();
|
||||
|
||||
OsdCudaComputeRestrictedVertexA(
|
||||
vertex, varying,
|
||||
_currentBindState.vertexDesc.length, _currentBindState.vertexDesc.stride,
|
||||
_currentBindState.varyingDesc.length, _currentBindState.varyingDesc.stride,
|
||||
static_cast<int*>(V_ITa->GetCudaMemory()),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
}
|
||||
|
||||
void
|
||||
OsdCudaComputeController::ApplyLoopEdgeVerticesKernel(
|
||||
FarKernelBatch const &batch, OsdCudaComputeContext const *context) const {
|
||||
|
@ -134,6 +134,12 @@ protected:
|
||||
|
||||
void ApplyCatmarkVertexVerticesKernelA2(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkRestrictedVertexVerticesKernelB1(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkRestrictedVertexVerticesKernelB2(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkRestrictedVertexVerticesKernelA(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
|
||||
void ApplyLoopEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
|
@ -619,6 +619,192 @@ computeVertexB(float *fVertex, float *fVarying,
|
||||
}
|
||||
}
|
||||
|
||||
template <int NUM_VERTEX_ELEMENTS, int NUM_VARYING_ELEMENTS> __global__ void
|
||||
computeRestrictedVertexA(float *fVertex, float *fVaryings, int *V0_ITa, int offset, int tableOffset, int start, int end)
|
||||
{
|
||||
DeviceVertex<NUM_VERTEX_ELEMENTS> *vertex = (DeviceVertex<NUM_VERTEX_ELEMENTS>*)fVertex;
|
||||
DeviceVertex<NUM_VARYING_ELEMENTS> *varyings = (DeviceVertex<NUM_VARYING_ELEMENTS>*)fVaryings;
|
||||
for (int i = start + tableOffset + threadIdx.x + blockIdx.x*blockDim.x;
|
||||
i < end+tableOffset;
|
||||
i += blockDim.x * gridDim.x) {
|
||||
|
||||
int p = V0_ITa[5*i+2];
|
||||
int eidx0 = V0_ITa[5*i+3];
|
||||
int eidx1 = V0_ITa[5*i+4];
|
||||
|
||||
DeviceVertex<NUM_VERTEX_ELEMENTS> dst;
|
||||
dst.clear();
|
||||
|
||||
dst.addWithWeight(&vertex[p], 0.75f);
|
||||
dst.addWithWeight(&vertex[eidx0], 0.125f);
|
||||
dst.addWithWeight(&vertex[eidx1], 0.125f);
|
||||
vertex[i+offset-tableOffset] = dst;
|
||||
|
||||
if(NUM_VARYING_ELEMENTS > 0){
|
||||
DeviceVertex<NUM_VARYING_ELEMENTS> dstVarying;
|
||||
dstVarying.clear();
|
||||
dstVarying.addWithWeight(&varyings[p], 1.0f);
|
||||
varyings[i+offset-tableOffset] = dstVarying;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void
|
||||
computeRestrictedVertexA(float *fVertex, float *fVaryings,
|
||||
int vertexLength, int vertexStride,
|
||||
int varyingLength, int varyingStride,
|
||||
int *V0_ITa, int offset, int tableOffset, int start, int end)
|
||||
{
|
||||
for (int i = start + tableOffset + threadIdx.x + blockIdx.x*blockDim.x;
|
||||
i < end + tableOffset;
|
||||
i += blockDim.x * gridDim.x){
|
||||
|
||||
int p = V0_ITa[5*i+2];
|
||||
int eidx0 = V0_ITa[5*i+3];
|
||||
int eidx1 = V0_ITa[5*i+4];
|
||||
|
||||
float *dstVertex = fVertex + (i+offset-tableOffset)*vertexStride;
|
||||
clear(dstVertex, vertexLength);
|
||||
|
||||
addWithWeight(dstVertex, fVertex + p*vertexStride, 0.75f, vertexLength);
|
||||
addWithWeight(dstVertex, fVertex + eidx0*vertexStride, 0.125f, vertexLength);
|
||||
addWithWeight(dstVertex, fVertex + eidx1*vertexStride, 0.125f, vertexLength);
|
||||
|
||||
if(varyingLength > 0){
|
||||
float *dstVarying = fVaryings + (i+offset-tableOffset)*varyingStride;
|
||||
clear(dstVarying, varyingLength);
|
||||
addWithWeight(dstVarying, fVaryings + p*varyingStride, 1.0f, varyingLength);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <int NUM_VERTEX_ELEMENTS, int NUM_VARYING_ELEMENTS> __global__ void
|
||||
computeRestrictedVertexB1(float *fVertex, float *fVaryings,
|
||||
const int *V0_ITa, const int *V0_IT, int offset, int tableOffset, int start, int end)
|
||||
{
|
||||
DeviceVertex<NUM_VERTEX_ELEMENTS> *vertex = (DeviceVertex<NUM_VERTEX_ELEMENTS>*)fVertex;
|
||||
DeviceVertex<NUM_VARYING_ELEMENTS> *varyings = (DeviceVertex<NUM_VARYING_ELEMENTS>*)fVaryings;
|
||||
for (int i = start + tableOffset + threadIdx.x + blockIdx.x*blockDim.x;
|
||||
i < end + tableOffset;
|
||||
i += blockDim.x * gridDim.x) {
|
||||
|
||||
int h = V0_ITa[5*i];
|
||||
int p = V0_ITa[5*i+2];
|
||||
|
||||
DeviceVertex<NUM_VERTEX_ELEMENTS> dst;
|
||||
dst.clear();
|
||||
dst.addWithWeight(&vertex[p], 0.5f);
|
||||
|
||||
for (int j = 0; j < 8; ++j)
|
||||
dst.addWithWeight(&vertex[V0_IT[h+j]], 0.0625f);
|
||||
vertex[i+offset-tableOffset] = dst;
|
||||
|
||||
if(NUM_VARYING_ELEMENTS > 0){
|
||||
DeviceVertex<NUM_VARYING_ELEMENTS> dstVarying;
|
||||
dstVarying.clear();
|
||||
dstVarying.addWithWeight(&varyings[p], 1.0f);
|
||||
varyings[i+offset-tableOffset] = dstVarying;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void
|
||||
computeRestrictedVertexB1(float *fVertex, float *fVarying,
|
||||
int vertexLength, int vertexStride,
|
||||
int varyingLength, int varyingStride,
|
||||
const int *V0_ITa, const int *V0_IT, int offset, int tableOffset, int start, int end)
|
||||
{
|
||||
for (int i = start + tableOffset + threadIdx.x + blockIdx.x*blockDim.x;
|
||||
i < end + tableOffset;
|
||||
i += blockDim.x * gridDim.x) {
|
||||
|
||||
int h = V0_ITa[5*i];
|
||||
int p = V0_ITa[5*i+2];
|
||||
|
||||
float *dstVertex = fVertex + (i+offset-tableOffset)*vertexStride;
|
||||
clear(dstVertex, vertexLength);
|
||||
addWithWeight(dstVertex, fVertex + p*vertexStride, 0.5f, vertexLength);
|
||||
|
||||
for (int j = 0; j < 8; ++j)
|
||||
addWithWeight(dstVertex, fVertex + V0_IT[h+j]*vertexStride, 0.0625f, vertexLength);
|
||||
|
||||
if (varyingLength > 0) {
|
||||
float *dstVarying = fVarying + (i+offset-tableOffset)*varyingStride;
|
||||
clear(dstVarying, varyingLength);
|
||||
addWithWeight(dstVarying, fVarying + p*varyingStride, 1.0f, varyingLength);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <int NUM_VERTEX_ELEMENTS, int NUM_VARYING_ELEMENTS> __global__ void
|
||||
computeRestrictedVertexB2(float *fVertex, float *fVaryings,
|
||||
const int *V0_ITa, const int *V0_IT, int offset, int tableOffset, int start, int end)
|
||||
{
|
||||
DeviceVertex<NUM_VERTEX_ELEMENTS> *vertex = (DeviceVertex<NUM_VERTEX_ELEMENTS>*)fVertex;
|
||||
DeviceVertex<NUM_VARYING_ELEMENTS> *varyings = (DeviceVertex<NUM_VARYING_ELEMENTS>*)fVaryings;
|
||||
for (int i = start + tableOffset + threadIdx.x + blockIdx.x*blockDim.x;
|
||||
i < end + tableOffset;
|
||||
i += blockDim.x * gridDim.x) {
|
||||
|
||||
int h = V0_ITa[5*i];
|
||||
int n = V0_ITa[5*i+1];
|
||||
int p = V0_ITa[5*i+2];
|
||||
|
||||
float wp = 1.0f/float(n*n);
|
||||
float wv = (n-2.0f) * n * wp;
|
||||
|
||||
DeviceVertex<NUM_VERTEX_ELEMENTS> dst;
|
||||
dst.clear();
|
||||
dst.addWithWeight(&vertex[p], wv);
|
||||
|
||||
for (int j = 0; j < n; ++j) {
|
||||
dst.addWithWeight(&vertex[V0_IT[h+j*2]], wp);
|
||||
dst.addWithWeight(&vertex[V0_IT[h+j*2+1]], wp);
|
||||
}
|
||||
vertex[i+offset-tableOffset] = dst;
|
||||
|
||||
if(NUM_VARYING_ELEMENTS > 0){
|
||||
DeviceVertex<NUM_VARYING_ELEMENTS> dstVarying;
|
||||
dstVarying.clear();
|
||||
dstVarying.addWithWeight(&varyings[p], 1.0f);
|
||||
varyings[i+offset-tableOffset] = dstVarying;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void
|
||||
computeRestrictedVertexB2(float *fVertex, float *fVarying,
|
||||
int vertexLength, int vertexStride,
|
||||
int varyingLength, int varyingStride,
|
||||
const int *V0_ITa, const int *V0_IT, int offset, int tableOffset, int start, int end)
|
||||
{
|
||||
for (int i = start + tableOffset + threadIdx.x + blockIdx.x*blockDim.x;
|
||||
i < end + tableOffset;
|
||||
i += blockDim.x * gridDim.x) {
|
||||
|
||||
int h = V0_ITa[5*i];
|
||||
int n = V0_ITa[5*i+1];
|
||||
int p = V0_ITa[5*i+2];
|
||||
|
||||
float wp = 1.0f/float(n*n);
|
||||
float wv = (n-2.0f) * n * wp;
|
||||
|
||||
float *dstVertex = fVertex + (i+offset-tableOffset)*vertexStride;
|
||||
clear(dstVertex, vertexLength);
|
||||
addWithWeight(dstVertex, fVertex + p*vertexStride, wv, vertexLength);
|
||||
|
||||
for (int j = 0; j < n; ++j) {
|
||||
addWithWeight(dstVertex, fVertex + V0_IT[h+j*2]*vertexStride, wp, vertexLength);
|
||||
addWithWeight(dstVertex, fVertex + V0_IT[h+j*2+1]*vertexStride, wp, vertexLength);
|
||||
}
|
||||
|
||||
if (varyingLength > 0) {
|
||||
float *dstVarying = fVarying + (i+offset-tableOffset)*varyingStride;
|
||||
clear(dstVarying, varyingLength);
|
||||
addWithWeight(dstVarying, fVarying + p*varyingStride, 1.0f, varyingLength);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------------------
|
||||
|
||||
@ -960,6 +1146,54 @@ void OsdCudaComputeVertexB(float *vertex, float *varying,
|
||||
V_ITa, V_IT, V_W, offset, tableOffset, start, end);
|
||||
}
|
||||
|
||||
void OsdCudaComputeRestrictedVertexA(float *vertex, float *varying,
|
||||
int vertexLength, int vertexStride,
|
||||
int varyingLength, int varyingStride,
|
||||
int *V_ITa, int offset, int tableOffset, int start, int end, int pass)
|
||||
{
|
||||
// computeRestrictedVertexA<0, 3><<<512,32>>>(vertex, varying, V_ITa, offset, start, end);
|
||||
OPT_KERNEL(0, 0, computeRestrictedVertexA, 512, 32, (vertex, varying, V_ITa, offset, tableOffset, start, end));
|
||||
OPT_KERNEL(0, 3, computeRestrictedVertexA, 512, 32, (vertex, varying, V_ITa, offset, tableOffset, start, end));
|
||||
OPT_KERNEL(3, 0, computeRestrictedVertexA, 512, 32, (vertex, varying, V_ITa, offset, tableOffset, start, end));
|
||||
OPT_KERNEL(3, 3, computeRestrictedVertexA, 512, 32, (vertex, varying, V_ITa, offset, tableOffset, start, end));
|
||||
|
||||
computeRestrictedVertexA<<<512, 32>>>(vertex, varying,
|
||||
vertexLength, vertexStride, varyingLength, varyingStride,
|
||||
V_ITa, offset, tableOffset, start, end);
|
||||
}
|
||||
|
||||
void OsdCudaComputeRestrictedVertexB1(float *vertex, float *varying,
|
||||
int vertexLength, int vertexStride,
|
||||
int varyingLength, int varyingStride,
|
||||
int *V_ITa, int *V_IT, int offset, int tableOffset, int start, int end)
|
||||
{
|
||||
// computeRestrictedVertexB1<0, 3><<<512,32>>>(vertex, varying, V_ITa, V_IT, offset, start, end);
|
||||
OPT_KERNEL(0, 0, computeRestrictedVertexB1, 512, 32, (vertex, varying, V_ITa, V_IT, offset, tableOffset, start, end));
|
||||
OPT_KERNEL(0, 3, computeRestrictedVertexB1, 512, 32, (vertex, varying, V_ITa, V_IT, offset, tableOffset, start, end));
|
||||
OPT_KERNEL(3, 0, computeRestrictedVertexB1, 512, 32, (vertex, varying, V_ITa, V_IT, offset, tableOffset, start, end));
|
||||
OPT_KERNEL(3, 3, computeRestrictedVertexB1, 512, 32, (vertex, varying, V_ITa, V_IT, offset, tableOffset, start, end));
|
||||
|
||||
computeRestrictedVertexB1 <<<512, 32>>>(vertex, varying,
|
||||
vertexLength, vertexStride, varyingLength, varyingStride,
|
||||
V_ITa, V_IT, offset, tableOffset, start, end);
|
||||
}
|
||||
|
||||
void OsdCudaComputeRestrictedVertexB2(float *vertex, float *varying,
|
||||
int vertexLength, int vertexStride,
|
||||
int varyingLength, int varyingStride,
|
||||
int *V_ITa, int *V_IT, int offset, int tableOffset, int start, int end)
|
||||
{
|
||||
// computeRestrictedVertexB2<0, 3><<<512,32>>>(vertex, varying, V_ITa, V_IT, offset, start, end);
|
||||
OPT_KERNEL(0, 0, computeRestrictedVertexB2, 512, 32, (vertex, varying, V_ITa, V_IT, offset, tableOffset, start, end));
|
||||
OPT_KERNEL(0, 3, computeRestrictedVertexB2, 512, 32, (vertex, varying, V_ITa, V_IT, offset, tableOffset, start, end));
|
||||
OPT_KERNEL(3, 0, computeRestrictedVertexB2, 512, 32, (vertex, varying, V_ITa, V_IT, offset, tableOffset, start, end));
|
||||
OPT_KERNEL(3, 3, computeRestrictedVertexB2, 512, 32, (vertex, varying, V_ITa, V_IT, offset, tableOffset, start, end));
|
||||
|
||||
computeRestrictedVertexB2 <<<512, 32>>>(vertex, varying,
|
||||
vertexLength, vertexStride, varyingLength, varyingStride,
|
||||
V_ITa, V_IT, offset, tableOffset, start, end);
|
||||
}
|
||||
|
||||
void OsdCudaComputeLoopVertexB(float *vertex, float *varying,
|
||||
int vertexLength, int vertexStride,
|
||||
int varyingLength, int varyingStride,
|
||||
|
@ -247,6 +247,42 @@ OsdD3D11ComputeController::ApplyCatmarkVertexVerticesKernelA2(
|
||||
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset);
|
||||
}
|
||||
|
||||
void
|
||||
OsdD3D11ComputeController::ApplyCatmarkRestrictedVertexVerticesKernelB1(
|
||||
FarKernelBatch const &batch, OsdD3D11ComputeContext const *context) const {
|
||||
|
||||
assert(context);
|
||||
|
||||
_currentBindState.kernelBundle->ApplyCatmarkRestrictedVertexVerticesKernelB1(
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(),
|
||||
batch.GetStart(), batch.GetEnd(),
|
||||
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset);
|
||||
}
|
||||
|
||||
void
|
||||
OsdD3D11ComputeController::ApplyCatmarkRestrictedVertexVerticesKernelB2(
|
||||
FarKernelBatch const &batch, OsdD3D11ComputeContext const *context) const {
|
||||
|
||||
assert(context);
|
||||
|
||||
_currentBindState.kernelBundle->ApplyCatmarkRestrictedVertexVerticesKernelB2(
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(),
|
||||
batch.GetStart(), batch.GetEnd(),
|
||||
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset);
|
||||
}
|
||||
|
||||
void
|
||||
OsdD3D11ComputeController::ApplyCatmarkRestrictedVertexVerticesKernelA(
|
||||
FarKernelBatch const &batch, OsdD3D11ComputeContext const *context) const {
|
||||
|
||||
assert(context);
|
||||
|
||||
_currentBindState.kernelBundle->ApplyCatmarkRestrictedVertexVerticesKernelA(
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(),
|
||||
batch.GetStart(), batch.GetEnd(),
|
||||
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset);
|
||||
}
|
||||
|
||||
void
|
||||
OsdD3D11ComputeController::ApplyLoopEdgeVerticesKernel(
|
||||
FarKernelBatch const &batch, OsdD3D11ComputeContext const *context) const {
|
||||
|
@ -150,6 +150,12 @@ protected:
|
||||
|
||||
void ApplyCatmarkVertexVerticesKernelA2(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkRestrictedVertexVerticesKernelB1(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkRestrictedVertexVerticesKernelB2(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkRestrictedVertexVerticesKernelA(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
|
||||
void ApplyLoopEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
|
@ -60,6 +60,9 @@ OsdD3D11ComputeKernelBundle::OsdD3D11ComputeKernelBundle(
|
||||
_kernelComputeVertex(0),
|
||||
_kernelComputeVertexA(0),
|
||||
_kernelComputeCatmarkVertexB(0),
|
||||
_kernelComputeCatmarkRestrictedVertexA(0),
|
||||
_kernelComputeCatmarkRestrictedVertexB1(0),
|
||||
_kernelComputeCatmarkRestrictedVertexB2(0),
|
||||
_kernelComputeLoopVertexB(0),
|
||||
_kernelEditAdd(0) {
|
||||
|
||||
@ -80,6 +83,9 @@ OsdD3D11ComputeKernelBundle::~OsdD3D11ComputeKernelBundle() {
|
||||
SAFE_RELEASE(_kernelComputeVertex);
|
||||
SAFE_RELEASE(_kernelComputeVertexA);
|
||||
SAFE_RELEASE(_kernelComputeCatmarkVertexB);
|
||||
SAFE_RELEASE(_kernelComputeCatmarkRestrictedVertexA);
|
||||
SAFE_RELEASE(_kernelComputeCatmarkRestrictedVertexB1);
|
||||
SAFE_RELEASE(_kernelComputeCatmarkRestrictedVertexB2);
|
||||
SAFE_RELEASE(_kernelComputeLoopVertexB);
|
||||
SAFE_RELEASE(_kernelEditAdd);
|
||||
}
|
||||
@ -193,6 +199,15 @@ OsdD3D11ComputeKernelBundle::Compile(
|
||||
_classLinkage->GetClassInstance(
|
||||
"catmarkComputeVertexB", 0, &_kernelComputeCatmarkVertexB);
|
||||
assert(_kernelComputeCatmarkVertexB);
|
||||
_classLinkage->GetClassInstance(
|
||||
"catmarkComputeRestrictedVertexA", 0, &_kernelComputeCatmarkRestrictedVertexA);
|
||||
assert(_kernelComputeCatmarkRestrictedVertexA);
|
||||
_classLinkage->GetClassInstance(
|
||||
"catmarkComputeRestrictedVertexB1", 0, &_kernelComputeCatmarkRestrictedVertexB1);
|
||||
assert(_kernelComputeCatmarkRestrictedVertexB1);
|
||||
_classLinkage->GetClassInstance(
|
||||
"catmarkComputeRestrictedVertexB2", 0, &_kernelComputeCatmarkRestrictedVertexB2);
|
||||
assert(_kernelComputeCatmarkRestrictedVertexA);
|
||||
_classLinkage->GetClassInstance(
|
||||
"loopComputeVertexB", 0, &_kernelComputeLoopVertexB);
|
||||
assert(_kernelComputeLoopVertexB);
|
||||
@ -414,6 +429,54 @@ OsdD3D11ComputeKernelBundle::ApplyCatmarkVertexVerticesKernelA(
|
||||
dispatchCompute(_kernelComputeVertexA, args);
|
||||
}
|
||||
|
||||
void
|
||||
OsdD3D11ComputeKernelBundle::ApplyCatmarkRestrictedVertexVerticesKernelB1(
|
||||
int vertexOffset, int tableOffset, int start, int end,
|
||||
int vertexBaseOffset, int varyingBaseOffset) {
|
||||
|
||||
KernelCB args;
|
||||
ZeroMemory(&args, sizeof(args));
|
||||
args.vertexOffset = vertexOffset;
|
||||
args.tableOffset = tableOffset;
|
||||
args.indexStart = start;
|
||||
args.indexEnd = end;
|
||||
args.vertexBaseOffset = vertexBaseOffset;
|
||||
args.varyingBaseOffset = varyingBaseOffset;
|
||||
dispatchCompute(_kernelComputeCatmarkRestrictedVertexB1, args);
|
||||
}
|
||||
|
||||
void
|
||||
OsdD3D11ComputeKernelBundle::ApplyCatmarkRestrictedVertexVerticesKernelB2(
|
||||
int vertexOffset, int tableOffset, int start, int end,
|
||||
int vertexBaseOffset, int varyingBaseOffset) {
|
||||
|
||||
KernelCB args;
|
||||
ZeroMemory(&args, sizeof(args));
|
||||
args.vertexOffset = vertexOffset;
|
||||
args.tableOffset = tableOffset;
|
||||
args.indexStart = start;
|
||||
args.indexEnd = end;
|
||||
args.vertexBaseOffset = vertexBaseOffset;
|
||||
args.varyingBaseOffset = varyingBaseOffset;
|
||||
dispatchCompute(_kernelComputeCatmarkRestrictedVertexB2, args);
|
||||
}
|
||||
|
||||
void
|
||||
OsdD3D11ComputeKernelBundle::ApplyCatmarkRestrictedVertexVerticesKernelA(
|
||||
int vertexOffset, int tableOffset, int start, int end,
|
||||
int vertexBaseOffset, int varyingBaseOffset) {
|
||||
|
||||
KernelCB args;
|
||||
ZeroMemory(&args, sizeof(args));
|
||||
args.vertexOffset = vertexOffset;
|
||||
args.tableOffset = tableOffset;
|
||||
args.indexStart = start;
|
||||
args.indexEnd = end;
|
||||
args.vertexBaseOffset = vertexBaseOffset;
|
||||
args.varyingBaseOffset = varyingBaseOffset;
|
||||
dispatchCompute(_kernelComputeCatmarkRestrictedVertexA, args);
|
||||
}
|
||||
|
||||
void
|
||||
OsdD3D11ComputeKernelBundle::ApplyLoopEdgeVerticesKernel(
|
||||
int vertexOffset, int tableOffset, int start, int end,
|
||||
|
@ -91,6 +91,18 @@ public:
|
||||
int vertexOffset, int tableOffset, int start, int end, bool pass,
|
||||
int vertexBaseOffset, int varyingBaseOffset);
|
||||
|
||||
void ApplyCatmarkRestrictedVertexVerticesKernelB1(
|
||||
int vertexOffset, int tableOffset, int start, int end,
|
||||
int vertexBaseOffset, int varyingBaseOffset);
|
||||
|
||||
void ApplyCatmarkRestrictedVertexVerticesKernelB2(
|
||||
int vertexOffset, int tableOffset, int start, int end,
|
||||
int vertexBaseOffset, int varyingBaseOffset);
|
||||
|
||||
void ApplyCatmarkRestrictedVertexVerticesKernelA(
|
||||
int vertexOffset, int tableOffset, int start, int end,
|
||||
int vertexBaseOffset, int varyingBaseOffset);
|
||||
|
||||
void ApplyLoopEdgeVerticesKernel(
|
||||
int vertexOffset, int tableOffset, int start, int end,
|
||||
int vertexBaseOffset, int varyingBaseOffset);
|
||||
@ -160,6 +172,12 @@ protected:
|
||||
|
||||
ID3D11ClassInstance * _kernelComputeCatmarkVertexB; // vertex-vertex kernel B (catmark scheme)
|
||||
|
||||
ID3D11ClassInstance * _kernelComputeCatmarkRestrictedVertexA; // restricted vertex-vertex kernel A (catmark scheme)
|
||||
|
||||
ID3D11ClassInstance * _kernelComputeCatmarkRestrictedVertexB1; // restricted vertex-vertex kernel B1 (catmark scheme)
|
||||
|
||||
ID3D11ClassInstance * _kernelComputeCatmarkRestrictedVertexB2; // restricted vertex-vertex kernel B2 (catmark scheme)
|
||||
|
||||
ID3D11ClassInstance * _kernelComputeLoopVertexB; // vertex-vertex kernel B (loop scheme)
|
||||
|
||||
ID3D11ClassInstance * _kernelEditAdd; // hedit kernel (add)
|
||||
|
@ -195,6 +195,50 @@ OsdGcdComputeController::ApplyCatmarkVertexVerticesKernelA2(
|
||||
_gcd_queue);
|
||||
}
|
||||
|
||||
void
|
||||
OsdGcdComputeController::ApplyCatmarkRestrictedVertexVerticesKernelB1(
|
||||
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {
|
||||
|
||||
assert(context);
|
||||
|
||||
OsdGcdComputeRestrictedVertexB1(
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
|
||||
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
|
||||
(const int*)context->GetTable(FarSubdivisionTables::V_IT)->GetBuffer(),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(),
|
||||
_gcd_queue);
|
||||
}
|
||||
|
||||
void
|
||||
OsdGcdComputeController::ApplyCatmarkRestrictedVertexVerticesKernelB2(
|
||||
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {
|
||||
|
||||
assert(context);
|
||||
|
||||
OsdGcdComputeRestrictedVertexB2(
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
|
||||
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
|
||||
(const int*)context->GetTable(FarSubdivisionTables::V_IT)->GetBuffer(),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(),
|
||||
_gcd_queue);
|
||||
}
|
||||
|
||||
void
|
||||
OsdGcdComputeController::ApplyCatmarkRestrictedVertexVerticesKernelA(
|
||||
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {
|
||||
|
||||
assert(context);
|
||||
|
||||
OsdGcdComputeRestrictedVertexA(
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
|
||||
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(),
|
||||
_gcd_queue);
|
||||
}
|
||||
|
||||
void
|
||||
OsdGcdComputeController::ApplyLoopEdgeVerticesKernel(
|
||||
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {
|
||||
|
@ -134,6 +134,11 @@ protected:
|
||||
|
||||
void ApplyCatmarkVertexVerticesKernelA2(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkRestrictedVertexVerticesKernelB1(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkRestrictedVertexVerticesKernelB2(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkRestrictedVertexVerticesKernelA(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyLoopEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
|
@ -222,6 +222,78 @@ void OsdGcdComputeVertexB(
|
||||
vertexOffset, tableOffset, start_e, end_e);
|
||||
}
|
||||
|
||||
void OsdGcdComputeRestrictedVertexA(
|
||||
float * vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *V_ITa,
|
||||
int vertexOffset, int tableOffset, int start, int end,
|
||||
dispatch_queue_t gcdq) {
|
||||
|
||||
const int workSize = end-start;
|
||||
dispatch_apply(workSize/GCD_WORK_STRIDE, gcdq, ^(size_t blockIdx){
|
||||
const int start_i = start + blockIdx*GCD_WORK_STRIDE;
|
||||
const int end_i = start_i + GCD_WORK_STRIDE;
|
||||
OsdCpuComputeRestrictedVertexA(vertex, varying, vertexDesc, varyingDesc,
|
||||
V_ITa,
|
||||
vertexOffset, tableOffset, start_i, end_i);
|
||||
});
|
||||
const int start_e = end - workSize%GCD_WORK_STRIDE;
|
||||
const int end_e = end;
|
||||
if (start_e < end_e)
|
||||
OsdCpuComputeRestrictedVertexA(vertex, varying, vertexDesc, varyingDesc,
|
||||
V_ITa,
|
||||
vertexOffset, tableOffset, start_e, end_e);
|
||||
}
|
||||
|
||||
void OsdGcdComputeRestrictedVertexB1(
|
||||
float * vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *V_ITa, const int *V_IT,
|
||||
int vertexOffset, int tableOffset, int start, int end,
|
||||
dispatch_queue_t gcdq) {
|
||||
|
||||
const int workSize = end-start;
|
||||
dispatch_apply(workSize/GCD_WORK_STRIDE, gcdq, ^(size_t blockIdx){
|
||||
const int start_i = start + blockIdx*GCD_WORK_STRIDE;
|
||||
const int end_i = start_i + GCD_WORK_STRIDE;
|
||||
OsdCpuComputeRestrictedVertexB1(vertex, varying, vertexDesc, varyingDesc,
|
||||
V_ITa, V_IT,
|
||||
vertexOffset, tableOffset, start_i, end_i);
|
||||
});
|
||||
const int start_e = end - workSize%GCD_WORK_STRIDE;
|
||||
const int end_e = end;
|
||||
if (start_e < end_e)
|
||||
OsdCpuComputeRestrictedVertexB1(vertex, varying, vertexDesc, varyingDesc,
|
||||
V_ITa, V_IT,
|
||||
vertexOffset, tableOffset, start_e, end_e);
|
||||
}
|
||||
|
||||
void OsdGcdComputeRestrictedVertexB2(
|
||||
float * vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *V_ITa, const int *V_IT,
|
||||
int vertexOffset, int tableOffset, int start, int end,
|
||||
dispatch_queue_t gcdq) {
|
||||
|
||||
const int workSize = end-start;
|
||||
dispatch_apply(workSize/GCD_WORK_STRIDE, gcdq, ^(size_t blockIdx){
|
||||
const int start_i = start + blockIdx*GCD_WORK_STRIDE;
|
||||
const int end_i = start_i + GCD_WORK_STRIDE;
|
||||
OsdCpuComputeRestrictedVertexB2(vertex, varying, vertexDesc, varyingDesc,
|
||||
V_ITa, V_IT,
|
||||
vertexOffset, tableOffset, start_i, end_i);
|
||||
});
|
||||
const int start_e = end - workSize%GCD_WORK_STRIDE;
|
||||
const int end_e = end;
|
||||
if (start_e < end_e)
|
||||
OsdCpuComputeRestrictedVertexB2(vertex, varying, vertexDesc, varyingDesc,
|
||||
V_ITa, V_IT,
|
||||
vertexOffset, tableOffset, start_e, end_e);
|
||||
}
|
||||
|
||||
void OsdGcdComputeLoopVertexB(
|
||||
float * vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
|
@ -90,6 +90,30 @@ void OsdGcdComputeVertexB(float *vertex, float * varying,
|
||||
int start, int end,
|
||||
dispatch_queue_t gcdq);
|
||||
|
||||
void OsdGcdComputeRestrictedVertexA(float *vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *V_ITa,
|
||||
int vertexOffset, int tableOffset,
|
||||
int start, int end,
|
||||
dispatch_queue_t gcdq);
|
||||
|
||||
void OsdGcdComputeRestrictedVertexB1(float *vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *V_ITa, const int *V_IT,
|
||||
int vertexOffset, int tableOffset,
|
||||
int start, int end,
|
||||
dispatch_queue_t gcdq);
|
||||
|
||||
void OsdGcdComputeRestrictedVertexB2(float *vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *V_ITa, const int *V_IT,
|
||||
int vertexOffset, int tableOffset,
|
||||
int start, int end,
|
||||
dispatch_queue_t gcdq);
|
||||
|
||||
void OsdGcdComputeLoopVertexB(float *vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
|
@ -216,6 +216,39 @@ OsdGLSLComputeController::ApplyCatmarkVertexVerticesKernelA2(
|
||||
batch.GetStart(), batch.GetEnd(), true);
|
||||
}
|
||||
|
||||
void
|
||||
OsdGLSLComputeController::ApplyCatmarkRestrictedVertexVerticesKernelB1(
|
||||
FarKernelBatch const &batch, OsdGLSLComputeContext const *context) const {
|
||||
|
||||
assert(context);
|
||||
|
||||
_currentBindState.kernelBundle->ApplyCatmarkRestrictedVertexVerticesKernelB1(
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(),
|
||||
batch.GetStart(), batch.GetEnd());
|
||||
}
|
||||
|
||||
void
|
||||
OsdGLSLComputeController::ApplyCatmarkRestrictedVertexVerticesKernelB2(
|
||||
FarKernelBatch const &batch, OsdGLSLComputeContext const *context) const {
|
||||
|
||||
assert(context);
|
||||
|
||||
_currentBindState.kernelBundle->ApplyCatmarkRestrictedVertexVerticesKernelB2(
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(),
|
||||
batch.GetStart(), batch.GetEnd());
|
||||
}
|
||||
|
||||
void
|
||||
OsdGLSLComputeController::ApplyCatmarkRestrictedVertexVerticesKernelA(
|
||||
FarKernelBatch const &batch, OsdGLSLComputeContext const *context) const {
|
||||
|
||||
assert(context);
|
||||
|
||||
_currentBindState.kernelBundle->ApplyCatmarkRestrictedVertexVerticesKernelA(
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(),
|
||||
batch.GetStart(), batch.GetEnd());
|
||||
}
|
||||
|
||||
void
|
||||
OsdGLSLComputeController::ApplyLoopEdgeVerticesKernel(
|
||||
FarKernelBatch const &batch, OsdGLSLComputeContext const *context) const {
|
||||
|
@ -143,6 +143,12 @@ protected:
|
||||
|
||||
void ApplyCatmarkVertexVerticesKernelA2(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkRestrictedVertexVerticesKernelB1(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkRestrictedVertexVerticesKernelB2(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkRestrictedVertexVerticesKernelA(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
|
||||
void ApplyLoopEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
|
@ -416,6 +416,82 @@ void catmarkComputeVertexB()
|
||||
writeVertex(vid, dst);
|
||||
}
|
||||
|
||||
// Restricted vertex-vertices compute Kernels 'A' / k_Crease and k_Corner rules
|
||||
subroutine(computeKernelType)
|
||||
void catmarkComputeRestrictedVertexA()
|
||||
{
|
||||
int i = int(gl_GlobalInvocationID.x) + indexStart;
|
||||
if (i >= indexEnd) return;
|
||||
int vid = i + vertexOffset;
|
||||
i += tableOffset;
|
||||
|
||||
int p = _V_ITa[5*i+2];
|
||||
int eidx0 = _V_ITa[5*i+3];
|
||||
int eidx1 = _V_ITa[5*i+4];
|
||||
|
||||
Vertex dst;
|
||||
clear(dst);
|
||||
|
||||
addWithWeight(dst, readVertex(p), 0.75f);
|
||||
addWithWeight(dst, readVertex(eidx0), 0.125f);
|
||||
addWithWeight(dst, readVertex(eidx1), 0.125f);
|
||||
addVaryingWithWeight(dst, readVertex(p), 1);
|
||||
|
||||
writeVertex(vid, dst);
|
||||
}
|
||||
|
||||
// Vertex-vertices compute Kernels 'B' / regular k_Dart and k_Smooth rules
|
||||
subroutine(computeKernelType)
|
||||
void catmarkComputeRestrictedVertexB1()
|
||||
{
|
||||
int i = int(gl_GlobalInvocationID.x) + indexStart;
|
||||
if (i >= indexEnd) return;
|
||||
int vid = i + vertexOffset;
|
||||
i += tableOffset;
|
||||
|
||||
int h = _V_ITa[5*i];
|
||||
int p = _V_ITa[5*i+2];
|
||||
|
||||
Vertex dst;
|
||||
clear(dst);
|
||||
|
||||
addWithWeight(dst, readVertex(p), 0.5f);
|
||||
|
||||
for(int j = 0; j < 8; ++j)
|
||||
addWithWeight(dst, readVertex(_V_IT[h+j]), 0.0625f);
|
||||
addVaryingWithWeight(dst, readVertex(p), 1);
|
||||
writeVertex(vid, dst);
|
||||
}
|
||||
|
||||
// Vertex-vertices compute Kernels 'B' / irregular k_Dart and k_Smooth rules
|
||||
subroutine(computeKernelType)
|
||||
void catmarkComputeRestrictedVertexB2()
|
||||
{
|
||||
int i = int(gl_GlobalInvocationID.x) + indexStart;
|
||||
if (i >= indexEnd) return;
|
||||
int vid = i + vertexOffset;
|
||||
i += tableOffset;
|
||||
|
||||
int h = _V_ITa[5*i];
|
||||
int n = _V_ITa[5*i+1];
|
||||
int p = _V_ITa[5*i+2];
|
||||
|
||||
float wp = 1.0/float(n*n);
|
||||
float wv = (n-2.0) * n * wp;
|
||||
|
||||
Vertex dst;
|
||||
clear(dst);
|
||||
|
||||
addWithWeight(dst, readVertex(p), wv);
|
||||
|
||||
for(int j = 0; j < n; ++j){
|
||||
addWithWeight(dst, readVertex(_V_IT[h+j*2]), wp);
|
||||
addWithWeight(dst, readVertex(_V_IT[h+j*2+1]), wp);
|
||||
}
|
||||
addVaryingWithWeight(dst, readVertex(p), 1);
|
||||
writeVertex(vid, dst);
|
||||
}
|
||||
|
||||
// Vertex-vertices compute Kernels 'B' / k_Dart and k_Smooth rules
|
||||
subroutine(computeKernelType)
|
||||
void loopComputeVertexB()
|
||||
|
@ -114,36 +114,45 @@ OsdGLSLComputeKernelBundle::Compile(
|
||||
|
||||
glDeleteShader(shader);
|
||||
|
||||
_subComputeFace = glGetSubroutineIndex(_program,
|
||||
GL_COMPUTE_SHADER,
|
||||
"catmarkComputeFace");
|
||||
_subComputeQuadFace = glGetSubroutineIndex(_program,
|
||||
GL_COMPUTE_SHADER,
|
||||
"catmarkComputeQuadFace");
|
||||
_subComputeTriQuadFace = glGetSubroutineIndex(_program,
|
||||
GL_COMPUTE_SHADER,
|
||||
"catmarkComputeTriQuadFace");
|
||||
_subComputeEdge = glGetSubroutineIndex(_program,
|
||||
GL_COMPUTE_SHADER,
|
||||
"catmarkComputeEdge");
|
||||
_subComputeRestrictedEdge = glGetSubroutineIndex(_program,
|
||||
GL_COMPUTE_SHADER,
|
||||
"catmarkComputeRestrictedEdge");
|
||||
_subComputeBilinearEdge = glGetSubroutineIndex(_program,
|
||||
GL_COMPUTE_SHADER,
|
||||
"bilinearComputeEdge");
|
||||
_subComputeVertex = glGetSubroutineIndex(_program,
|
||||
GL_COMPUTE_SHADER,
|
||||
"bilinearComputeVertex");
|
||||
_subComputeVertexA = glGetSubroutineIndex(_program,
|
||||
GL_COMPUTE_SHADER,
|
||||
"catmarkComputeVertexA");
|
||||
_subComputeCatmarkVertexB = glGetSubroutineIndex(_program,
|
||||
GL_COMPUTE_SHADER,
|
||||
"catmarkComputeVertexB");
|
||||
_subComputeLoopVertexB = glGetSubroutineIndex(_program,
|
||||
GL_COMPUTE_SHADER,
|
||||
"loopComputeVertexB");
|
||||
_subComputeFace = glGetSubroutineIndex(_program,
|
||||
GL_COMPUTE_SHADER,
|
||||
"catmarkComputeFace");
|
||||
_subComputeQuadFace = glGetSubroutineIndex(_program,
|
||||
GL_COMPUTE_SHADER,
|
||||
"catmarkComputeQuadFace");
|
||||
_subComputeTriQuadFace = glGetSubroutineIndex(_program,
|
||||
GL_COMPUTE_SHADER,
|
||||
"catmarkComputeTriQuadFace");
|
||||
_subComputeEdge = glGetSubroutineIndex(_program,
|
||||
GL_COMPUTE_SHADER,
|
||||
"catmarkComputeEdge");
|
||||
_subComputeRestrictedEdge = glGetSubroutineIndex(_program,
|
||||
GL_COMPUTE_SHADER,
|
||||
"catmarkComputeRestrictedEdge");
|
||||
_subComputeRestrictedVertexA = glGetSubroutineIndex(_program,
|
||||
GL_COMPUTE_SHADER,
|
||||
"catmarkComputeRestrictedVertexA");
|
||||
_subComputeRestrictedVertexB1 = glGetSubroutineIndex(_program,
|
||||
GL_COMPUTE_SHADER,
|
||||
"catmarkComputeRestrictedVertexB1");
|
||||
_subComputeRestrictedVertexB2 = glGetSubroutineIndex(_program,
|
||||
GL_COMPUTE_SHADER,
|
||||
"catmarkComputeRestrictedVertexB2");
|
||||
_subComputeBilinearEdge = glGetSubroutineIndex(_program,
|
||||
GL_COMPUTE_SHADER,
|
||||
"bilinearComputeEdge");
|
||||
_subComputeVertex = glGetSubroutineIndex(_program,
|
||||
GL_COMPUTE_SHADER,
|
||||
"bilinearComputeVertex");
|
||||
_subComputeVertexA = glGetSubroutineIndex(_program,
|
||||
GL_COMPUTE_SHADER,
|
||||
"catmarkComputeVertexA");
|
||||
_subComputeCatmarkVertexB = glGetSubroutineIndex(_program,
|
||||
GL_COMPUTE_SHADER,
|
||||
"catmarkComputeVertexB");
|
||||
_subComputeLoopVertexB = glGetSubroutineIndex(_program,
|
||||
GL_COMPUTE_SHADER,
|
||||
"loopComputeVertexB");
|
||||
|
||||
// set uniform locations for compute
|
||||
_uniformVertexPass = glGetUniformLocation(_program, "vertexPass");
|
||||
@ -287,6 +296,30 @@ OsdGLSLComputeKernelBundle::ApplyCatmarkVertexVerticesKernelA(
|
||||
dispatchCompute(vertexOffset, tableOffset, start, end);
|
||||
}
|
||||
|
||||
void
|
||||
OsdGLSLComputeKernelBundle::ApplyCatmarkRestrictedVertexVerticesKernelB1(
|
||||
int vertexOffset, int tableOffset, int start, int end) {
|
||||
|
||||
glUniformSubroutinesuiv(GL_COMPUTE_SHADER, 1, &_subComputeRestrictedVertexB1);
|
||||
dispatchCompute(vertexOffset, tableOffset, start, end);
|
||||
}
|
||||
|
||||
void
|
||||
OsdGLSLComputeKernelBundle::ApplyCatmarkRestrictedVertexVerticesKernelB2(
|
||||
int vertexOffset, int tableOffset, int start, int end) {
|
||||
|
||||
glUniformSubroutinesuiv(GL_COMPUTE_SHADER, 1, &_subComputeRestrictedVertexB2);
|
||||
dispatchCompute(vertexOffset, tableOffset, start, end);
|
||||
}
|
||||
|
||||
void
|
||||
OsdGLSLComputeKernelBundle::ApplyCatmarkRestrictedVertexVerticesKernelA(
|
||||
int vertexOffset, int tableOffset, int start, int end) {
|
||||
|
||||
glUniformSubroutinesuiv(GL_COMPUTE_SHADER, 1, &_subComputeRestrictedVertexA);
|
||||
dispatchCompute(vertexOffset, tableOffset, start, end);
|
||||
}
|
||||
|
||||
void
|
||||
OsdGLSLComputeKernelBundle::ApplyLoopEdgeVerticesKernel(
|
||||
int vertexOffset, int tableOffset, int start, int end) {
|
||||
|
@ -78,6 +78,15 @@ public:
|
||||
void ApplyLoopEdgeVerticesKernel(
|
||||
int vertexOffset, int tableOffset, int start, int end);
|
||||
|
||||
void ApplyCatmarkRestrictedVertexVerticesKernelB1(
|
||||
int vertexOffset, int tableOffset, int start, int end);
|
||||
|
||||
void ApplyCatmarkRestrictedVertexVerticesKernelB2(
|
||||
int vertexOffset, int tableOffset, int start, int end);
|
||||
|
||||
void ApplyCatmarkRestrictedVertexVerticesKernelA(
|
||||
int vertexOffset, int tableOffset, int start, int end);
|
||||
|
||||
void ApplyLoopVertexVerticesKernelB(
|
||||
int vertexOffset, int tableOffset, int start, int end);
|
||||
|
||||
@ -147,6 +156,12 @@ protected:
|
||||
|
||||
GLuint _subComputeRestrictedEdge; // restricted edge-vertex kernel (catmark scheme)
|
||||
|
||||
GLuint _subComputeRestrictedVertexA; // restricted vertex-vertex kernel A (catmark scheme)
|
||||
|
||||
GLuint _subComputeRestrictedVertexB1; // restricted vertex-vertex kernel B1 (catmark scheme)
|
||||
|
||||
GLuint _subComputeRestrictedVertexB2; // restricted vertex-vertex kernel B2 (catmark scheme)
|
||||
|
||||
GLuint _subComputeBilinearEdge; // edge-vertex kernel (bilinear scheme)
|
||||
|
||||
GLuint _subComputeVertex; // vertex-vertex kernel (bilinear scheme)
|
||||
|
@ -291,6 +291,42 @@ OsdGLSLTransformFeedbackComputeController::ApplyCatmarkVertexVerticesKernelA2(
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(), true);
|
||||
}
|
||||
|
||||
void
|
||||
OsdGLSLTransformFeedbackComputeController::ApplyCatmarkRestrictedVertexVerticesKernelB1(
|
||||
FarKernelBatch const &batch, OsdGLSLTransformFeedbackComputeContext const *context) const {
|
||||
|
||||
assert(context);
|
||||
|
||||
_currentBindState.kernelBundle->ApplyCatmarkRestrictedVertexVerticesKernelB1(
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset,
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
}
|
||||
|
||||
void
|
||||
OsdGLSLTransformFeedbackComputeController::ApplyCatmarkRestrictedVertexVerticesKernelB2(
|
||||
FarKernelBatch const &batch, OsdGLSLTransformFeedbackComputeContext const *context) const {
|
||||
|
||||
assert(context);
|
||||
|
||||
_currentBindState.kernelBundle->ApplyCatmarkRestrictedVertexVerticesKernelB2(
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset,
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
}
|
||||
|
||||
void
|
||||
OsdGLSLTransformFeedbackComputeController::ApplyCatmarkRestrictedVertexVerticesKernelA(
|
||||
FarKernelBatch const &batch, OsdGLSLTransformFeedbackComputeContext const *context) const {
|
||||
|
||||
assert(context);
|
||||
|
||||
_currentBindState.kernelBundle->ApplyCatmarkRestrictedVertexVerticesKernelA(
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset,
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
}
|
||||
|
||||
void
|
||||
OsdGLSLTransformFeedbackComputeController::ApplyLoopEdgeVerticesKernel(
|
||||
FarKernelBatch const &batch, OsdGLSLTransformFeedbackComputeContext const *context) const {
|
||||
|
@ -141,6 +141,12 @@ protected:
|
||||
|
||||
void ApplyCatmarkVertexVerticesKernelA2(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkRestrictedVertexVerticesKernelB1(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkRestrictedVertexVerticesKernelB2(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkRestrictedVertexVerticesKernelA(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
|
||||
void ApplyLoopEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
|
@ -433,6 +433,94 @@ void catmarkComputeVertexB()
|
||||
writeVertex(dst);
|
||||
}
|
||||
|
||||
// Restricted vertex-vertices compute Kernels 'A' / k_Crease and k_Corner rules
|
||||
subroutine(computeKernelType)
|
||||
void catmarkComputeRestrictedVertexA()
|
||||
{
|
||||
int i = gl_VertexID + indexStart + tableOffset;
|
||||
int vid = gl_VertexID + indexStart + vertexOffset;
|
||||
|
||||
int p = texelFetch(_V0_ITa, 5*i+2).x;
|
||||
int eidx0 = texelFetch(_V0_ITa, 5*i+3).x;
|
||||
int eidx1 = texelFetch(_V0_ITa, 5*i+4).x;
|
||||
|
||||
Vertex dst;
|
||||
clear(dst);
|
||||
|
||||
addWithWeight(dst, readVertex(p), 0.75f);
|
||||
addWithWeight(dst, readVertex(eidx0), 0.125f);
|
||||
addWithWeight(dst, readVertex(eidx1), 0.125f);
|
||||
addVaryingWithWeight(dst, readVertex(p), 1.0f);
|
||||
|
||||
writeVertex(dst);
|
||||
}
|
||||
|
||||
// Restricted vertex-vertices compute Kernels 'B' / regular k_Dart and k_Smooth rules
|
||||
subroutine(computeKernelType)
|
||||
void catmarkComputeRestrictedVertexB1()
|
||||
{
|
||||
int i = gl_VertexID + indexStart + tableOffset;
|
||||
|
||||
int h = texelFetch(_V0_ITa, 5*i).x;
|
||||
#ifdef OPT_CATMARK_V_IT_VEC2
|
||||
int h2 = h/2;
|
||||
#endif
|
||||
int p = texelFetch(_V0_ITa, 5*i+2).x;
|
||||
|
||||
Vertex dst;
|
||||
clear(dst);
|
||||
|
||||
addWithWeight(dst, readVertex(p), 0.5f);
|
||||
|
||||
for(int j = 0; j < 4; ++j){
|
||||
#ifdef OPT_CATMARK_V_IT_VEC2
|
||||
ivec2 v0it = texelFetch(_V0_IT, h2+j).xy;
|
||||
addWithWeight(dst, readVertex(v0it.x), 0.0625f);
|
||||
addWithWeight(dst, readVertex(v0it.y), 0.0625f);
|
||||
#else
|
||||
addWithWeight(dst, readVertex(texelFetch(_V0_IT, h+j*2).x), 0.0625f);
|
||||
addWithWeight(dst, readVertex(texelFetch(_V0_IT, h+j*2+1).x), 0.0625f);
|
||||
#endif
|
||||
}
|
||||
addVaryingWithWeight(dst, readVertex(p), 1.0f);
|
||||
writeVertex(dst);
|
||||
}
|
||||
|
||||
// Restricted vertex-vertices compute Kernels 'B' / irregular k_Dart and k_Smooth rules
|
||||
subroutine(computeKernelType)
|
||||
void catmarkComputeRestrictedVertexB2()
|
||||
{
|
||||
int i = gl_VertexID + indexStart + tableOffset;
|
||||
|
||||
int h = texelFetch(_V0_ITa, 5*i).x;
|
||||
#ifdef OPT_CATMARK_V_IT_VEC2
|
||||
int h2 = h/2;
|
||||
#endif
|
||||
int n = texelFetch(_V0_ITa, 5*i+1).x;
|
||||
int p = texelFetch(_V0_ITa, 5*i+2).x;
|
||||
|
||||
float wp = 1.0/float(n*n);
|
||||
float wv = (n-2.0) * n * wp;
|
||||
|
||||
Vertex dst;
|
||||
clear(dst);
|
||||
|
||||
addWithWeight(dst, readVertex(p), wv);
|
||||
|
||||
for(int j = 0; j < n; ++j){
|
||||
#ifdef OPT_CATMARK_V_IT_VEC2
|
||||
ivec2 v0it = texelFetch(_V0_IT, h2+j).xy;
|
||||
addWithWeight(dst, readVertex(v0it.x), wp);
|
||||
addWithWeight(dst, readVertex(v0it.y), wp);
|
||||
#else
|
||||
addWithWeight(dst, readVertex(texelFetch(_V0_IT, h+j*2).x), wp);
|
||||
addWithWeight(dst, readVertex(texelFetch(_V0_IT, h+j*2+1).x), wp);
|
||||
#endif
|
||||
}
|
||||
addVaryingWithWeight(dst, readVertex(p), 1.0f);
|
||||
writeVertex(dst);
|
||||
}
|
||||
|
||||
// Vertex-vertices compute Kernels 'B' / k_Dart and k_Smooth rules
|
||||
subroutine(computeKernelType)
|
||||
void loopComputeVertexB()
|
||||
|
@ -233,6 +233,9 @@ OsdGLSLTransformFeedbackKernelBundle::Compile(
|
||||
_subComputeVertex = glGetSubroutineIndex(_program, GL_VERTEX_SHADER, "bilinearComputeVertex");
|
||||
_subComputeVertexA = glGetSubroutineIndex(_program, GL_VERTEX_SHADER, "catmarkComputeVertexA");
|
||||
_subComputeCatmarkVertexB = glGetSubroutineIndex(_program, GL_VERTEX_SHADER, "catmarkComputeVertexB");
|
||||
_subComputeRestrictedVertexA = glGetSubroutineIndex(_program, GL_VERTEX_SHADER, "catmarkComputeRestrictedVertexA");
|
||||
_subComputeRestrictedVertexB1 = glGetSubroutineIndex(_program, GL_VERTEX_SHADER, "catmarkComputeRestrictedVertexB1");
|
||||
_subComputeRestrictedVertexB2 = glGetSubroutineIndex(_program, GL_VERTEX_SHADER, "catmarkComputeRestrictedVertexB2");
|
||||
_subComputeLoopVertexB = glGetSubroutineIndex(_program, GL_VERTEX_SHADER, "loopComputeVertexB");
|
||||
|
||||
_uniformVertexPass = glGetUniformLocation(_program, "vertexPass");
|
||||
@ -444,6 +447,42 @@ OsdGLSLTransformFeedbackKernelBundle::ApplyCatmarkVertexVerticesKernelA(
|
||||
offset, tableOffset, start, end);
|
||||
}
|
||||
|
||||
void
|
||||
OsdGLSLTransformFeedbackKernelBundle::ApplyCatmarkRestrictedVertexVerticesKernelB1(
|
||||
GLuint vertexBuffer, GLuint varyingBuffer,
|
||||
int vertexOffset, int varyingOffset,
|
||||
int offset, int tableOffset, int start, int end) {
|
||||
|
||||
glUniformSubroutinesuiv(GL_VERTEX_SHADER, 1, &_subComputeRestrictedVertexB1);
|
||||
transformGpuBufferData(vertexBuffer, varyingBuffer,
|
||||
vertexOffset, varyingOffset,
|
||||
offset, tableOffset, start, end);
|
||||
}
|
||||
|
||||
void
|
||||
OsdGLSLTransformFeedbackKernelBundle::ApplyCatmarkRestrictedVertexVerticesKernelB2(
|
||||
GLuint vertexBuffer, GLuint varyingBuffer,
|
||||
int vertexOffset, int varyingOffset,
|
||||
int offset, int tableOffset, int start, int end) {
|
||||
|
||||
glUniformSubroutinesuiv(GL_VERTEX_SHADER, 1, &_subComputeRestrictedVertexB2);
|
||||
transformGpuBufferData(vertexBuffer, varyingBuffer,
|
||||
vertexOffset, varyingOffset,
|
||||
offset, tableOffset, start, end);
|
||||
}
|
||||
|
||||
void
|
||||
OsdGLSLTransformFeedbackKernelBundle::ApplyCatmarkRestrictedVertexVerticesKernelA(
|
||||
GLuint vertexBuffer, GLuint varyingBuffer,
|
||||
int vertexOffset, int varyingOffset,
|
||||
int offset, int tableOffset, int start, int end) {
|
||||
|
||||
glUniformSubroutinesuiv(GL_VERTEX_SHADER, 1, &_subComputeRestrictedVertexA);
|
||||
transformGpuBufferData(vertexBuffer, varyingBuffer,
|
||||
vertexOffset, varyingOffset,
|
||||
offset, tableOffset, start, end);
|
||||
}
|
||||
|
||||
void
|
||||
OsdGLSLTransformFeedbackKernelBundle::ApplyLoopEdgeVerticesKernel(
|
||||
GLuint vertexBuffer, GLuint varyingBuffer,
|
||||
|
@ -98,6 +98,21 @@ public:
|
||||
int vertexOffset, int varyingOffset,
|
||||
int offset, int tableOffset, int start, int end, bool pass);
|
||||
|
||||
void ApplyCatmarkRestrictedVertexVerticesKernelB1(
|
||||
GLuint vertexBuffer, GLuint varyingBuffer,
|
||||
int vertexOffset, int varyingOffset,
|
||||
int offset, int tableOffset, int start, int end);
|
||||
|
||||
void ApplyCatmarkRestrictedVertexVerticesKernelB2(
|
||||
GLuint vertexBuffer, GLuint varyingBuffer,
|
||||
int vertexOffset, int varyingOffset,
|
||||
int offset, int tableOffset, int start, int end);
|
||||
|
||||
void ApplyCatmarkRestrictedVertexVerticesKernelA(
|
||||
GLuint vertexBuffer, GLuint varyingBuffer,
|
||||
int vertexOffset, int varyingOffset,
|
||||
int offset, int tableOffset, int start, int end);
|
||||
|
||||
void ApplyLoopEdgeVerticesKernel(
|
||||
GLuint vertexBuffer, GLuint varyingBuffer,
|
||||
int vertexOffset, int varyingOffset,
|
||||
@ -212,6 +227,12 @@ protected:
|
||||
|
||||
GLuint _subComputeCatmarkVertexB;// vertex-vertex kernel B (catmark scheme)
|
||||
|
||||
GLuint _subComputeRestrictedVertexA; // restricted vertex-vertex kernel A (catmark scheme)
|
||||
|
||||
GLuint _subComputeRestrictedVertexB1; // restricted vertex-vertex kernel B1 (catmark scheme)
|
||||
|
||||
GLuint _subComputeRestrictedVertexB2; // restricted vertex-vertex kernel B2 (catmark scheme)
|
||||
|
||||
GLuint _subComputeLoopVertexB; // vertex-vertex kernel B (loop scheme)
|
||||
|
||||
GLuint _subEditAdd; // hedit kernel (add)
|
||||
|
@ -427,6 +427,86 @@ void runKernel( uint3 ID )
|
||||
}
|
||||
};
|
||||
|
||||
// Restricted vertex-vertices compute Kernels 'A' / k_Crease and k_Corner rules
|
||||
class CatmarkComputeRestrictedVertexA : IComputeKernel {
|
||||
int placeholder;
|
||||
void runKernel( uint3 ID )
|
||||
{
|
||||
int i = int(ID.x) + indexStart;
|
||||
if (i >= indexEnd) return;
|
||||
int vid = i + vertexOffset;
|
||||
i += tableOffset;
|
||||
|
||||
int p = _V_ITa[5*i+2];
|
||||
int eidx0 = _V_ITa[5*i+3];
|
||||
int eidx1 = _V_ITa[5*i+4];
|
||||
|
||||
Vertex dst;
|
||||
clear(dst);
|
||||
|
||||
addWithWeight(dst, readVertex(p), 0.75f);
|
||||
addWithWeight(dst, readVertex(eidx0), 0.125f);
|
||||
addWithWeight(dst, readVertex(eidx1), 0.125f);
|
||||
addVaryingWithWeight(dst, readVertex(p), 1);
|
||||
|
||||
writeVertex(vid, dst);
|
||||
}
|
||||
};
|
||||
|
||||
// Restricted vertex-vertices compute Kernels 'B' / regular k_Dart and k_Smooth rules
|
||||
class CatmarkComputeRestrictedVertexB1 : IComputeKernel {
|
||||
int placeholder;
|
||||
void runKernel( uint3 ID )
|
||||
{
|
||||
int i = int(ID.x) + indexStart;
|
||||
if (i >= indexEnd) return;
|
||||
int vid = i + vertexOffset;
|
||||
i += tableOffset;
|
||||
|
||||
int h = _V_ITa[5*i];
|
||||
int p = _V_ITa[5*i+2];
|
||||
|
||||
Vertex dst;
|
||||
clear(dst);
|
||||
|
||||
addWithWeight(dst, readVertex(p), 0.5f);
|
||||
for(int j = 0; j < 8; ++j)
|
||||
addWithWeight(dst, readVertex(_V_IT[h+j]), 0.0625f);
|
||||
addVaryingWithWeight(dst, readVertex(p), 1);
|
||||
writeVertex(vid, dst);
|
||||
}
|
||||
};
|
||||
|
||||
// Restricted vertex-vertices compute Kernels 'B' / irregular k_Dart and k_Smooth rules
|
||||
class CatmarkComputeRestrictedVertexB2 : IComputeKernel {
|
||||
int placeholder;
|
||||
void runKernel( uint3 ID )
|
||||
{
|
||||
int i = int(ID.x) + indexStart;
|
||||
if (i >= indexEnd) return;
|
||||
int vid = i + vertexOffset;
|
||||
i += tableOffset;
|
||||
|
||||
int h = _V_ITa[5*i];
|
||||
int n = _V_ITa[5*i+1];
|
||||
int p = _V_ITa[5*i+2];
|
||||
|
||||
float wp = 1.0/float(n*n);
|
||||
float wv = (n-2.0) * n * wp;
|
||||
|
||||
Vertex dst;
|
||||
clear(dst);
|
||||
|
||||
addWithWeight(dst, readVertex(p), wv);
|
||||
for(int j = 0; j < n; ++j){
|
||||
addWithWeight(dst, readVertex(_V_IT[h+j*2]), wp);
|
||||
addWithWeight(dst, readVertex(_V_IT[h+j*2+1]), wp);
|
||||
}
|
||||
addVaryingWithWeight(dst, readVertex(p), 1);
|
||||
writeVertex(vid, dst);
|
||||
}
|
||||
};
|
||||
|
||||
// Vertex-vertices compute Kernels 'B' / k_Dart and k_Smooth rules
|
||||
class LoopComputeVertexB : IComputeKernel {
|
||||
int placeholder;
|
||||
@ -493,6 +573,9 @@ BilinearComputeEdge bilinearComputeEdge;
|
||||
BilinearComputeVertex bilinearComputeVertex;
|
||||
CatmarkComputeVertexA catmarkComputeVertexA;
|
||||
CatmarkComputeVertexB catmarkComputeVertexB;
|
||||
CatmarkComputeRestrictedVertexA catmarkComputeRestrictedVertexA;
|
||||
CatmarkComputeRestrictedVertexB1 catmarkComputeRestrictedVertexB1;
|
||||
CatmarkComputeRestrictedVertexB2 catmarkComputeRestrictedVertexB2;
|
||||
LoopComputeVertexB loopComputeVertexB;
|
||||
EditAdd editAdd;
|
||||
|
||||
|
@ -190,6 +190,47 @@ OsdOmpComputeController::ApplyCatmarkVertexVerticesKernelA2(
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(), true);
|
||||
}
|
||||
|
||||
void
|
||||
OsdOmpComputeController::ApplyCatmarkRestrictedVertexVerticesKernelB1(
|
||||
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {
|
||||
|
||||
assert(context);
|
||||
|
||||
OsdOmpComputeRestrictedVertexB1(
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
|
||||
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
|
||||
(const int*)context->GetTable(FarSubdivisionTables::V_IT)->GetBuffer(),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
}
|
||||
|
||||
void
|
||||
OsdOmpComputeController::ApplyCatmarkRestrictedVertexVerticesKernelB2(
|
||||
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {
|
||||
|
||||
assert(context);
|
||||
|
||||
OsdOmpComputeRestrictedVertexB2(
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
|
||||
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
|
||||
(const int*)context->GetTable(FarSubdivisionTables::V_IT)->GetBuffer(),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
}
|
||||
|
||||
void
|
||||
OsdOmpComputeController::ApplyCatmarkRestrictedVertexVerticesKernelA(
|
||||
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {
|
||||
|
||||
assert(context);
|
||||
|
||||
OsdOmpComputeRestrictedVertexA(
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
|
||||
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
}
|
||||
|
||||
void
|
||||
OsdOmpComputeController::ApplyLoopEdgeVerticesKernel(
|
||||
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {
|
||||
|
@ -142,6 +142,12 @@ protected:
|
||||
|
||||
void ApplyCatmarkVertexVerticesKernelA2(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkRestrictedVertexVerticesKernelB1(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkRestrictedVertexVerticesKernelB2(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkRestrictedVertexVerticesKernelA(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
|
||||
void ApplyLoopEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
|
@ -386,6 +386,126 @@ void OsdOmpComputeVertexB(
|
||||
}
|
||||
}
|
||||
|
||||
void OsdOmpComputeRestrictedVertexA(
|
||||
float * vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *V_ITa,
|
||||
int offset, int tableOffset, int start, int end) {
|
||||
|
||||
int numThreads = omp_get_max_threads();
|
||||
float *vertexResultsArray = (float*)alloca(vertexDesc.length * sizeof(float) * numThreads);
|
||||
float *varyingResultsArray = (float*)alloca(varyingDesc.length * sizeof(float) * numThreads);
|
||||
|
||||
#pragma omp parallel for
|
||||
for (int i = start + tableOffset; i < end + tableOffset; i++) {
|
||||
int p = V_ITa[5*i+2];
|
||||
int eidx0 = V_ITa[5*i+3];
|
||||
int eidx1 = V_ITa[5*i+4];
|
||||
|
||||
int dstIndex = offset + i - tableOffset;
|
||||
|
||||
int threadId = omp_get_thread_num();
|
||||
float *vertexResults = vertexResultsArray +
|
||||
vertexDesc.length * threadId;
|
||||
float *varyingResults = varyingResultsArray +
|
||||
varyingDesc.length * threadId;
|
||||
|
||||
clear(vertexResults, vertexDesc);
|
||||
clear(varyingResults, varyingDesc);
|
||||
|
||||
addWithWeight(vertexResults, vertex, p, 0.75f, vertexDesc);
|
||||
addWithWeight(vertexResults, vertex, eidx0, 0.125f, vertexDesc);
|
||||
addWithWeight(vertexResults, vertex, eidx1, 0.125f, vertexDesc);
|
||||
copy(vertex, vertexResults, dstIndex, vertexDesc);
|
||||
|
||||
addWithWeight(varyingResults, varying, p, 1.0f, varyingDesc);
|
||||
copy(varying, varyingResults, dstIndex, varyingDesc);
|
||||
}
|
||||
}
|
||||
|
||||
void OsdOmpComputeRestrictedVertexB1(
|
||||
float * vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *V_ITa, const int *V_IT,
|
||||
int offset, int tableOffset, int start, int end) {
|
||||
|
||||
int numThreads = omp_get_max_threads();
|
||||
float *vertexResultsArray = (float*)alloca(vertexDesc.length * sizeof(float) * numThreads);
|
||||
float *varyingResultsArray = (float*)alloca(varyingDesc.length * sizeof(float) * numThreads);
|
||||
|
||||
#pragma omp parallel for
|
||||
for (int i = start + tableOffset; i < end + tableOffset; i++) {
|
||||
int h = V_ITa[5*i];
|
||||
int p = V_ITa[5*i+2];
|
||||
|
||||
int dstIndex = offset + i - tableOffset;
|
||||
|
||||
int threadId = omp_get_thread_num();
|
||||
float *vertexResults = vertexResultsArray +
|
||||
vertexDesc.length * threadId;
|
||||
float *varyingResults = varyingResultsArray +
|
||||
varyingDesc.length * threadId;
|
||||
|
||||
clear(vertexResults, vertexDesc);
|
||||
clear(varyingResults, varyingDesc);
|
||||
|
||||
addWithWeight(vertexResults, vertex, p, 0.5f, vertexDesc);
|
||||
|
||||
for (int j = 0; j < 8; ++j)
|
||||
addWithWeight(vertexResults, vertex, V_IT[h+j], 0.0625f, vertexDesc);
|
||||
addWithWeight(varyingResults, varying, p, 1.0f, varyingDesc);
|
||||
|
||||
copy(vertex, vertexResults, dstIndex, vertexDesc);
|
||||
copy(varying, varyingResults, dstIndex, varyingDesc);
|
||||
}
|
||||
}
|
||||
|
||||
void OsdOmpComputeRestrictedVertexB2(
|
||||
float * vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *V_ITa, const int *V_IT,
|
||||
int offset, int tableOffset, int start, int end) {
|
||||
|
||||
int numThreads = omp_get_max_threads();
|
||||
float *vertexResultsArray = (float*)alloca(vertexDesc.length * sizeof(float) * numThreads);
|
||||
float *varyingResultsArray = (float*)alloca(varyingDesc.length * sizeof(float) * numThreads);
|
||||
|
||||
#pragma omp parallel for
|
||||
for (int i = start + tableOffset; i < end + tableOffset; i++) {
|
||||
int h = V_ITa[5*i];
|
||||
int n = V_ITa[5*i+1];
|
||||
int p = V_ITa[5*i+2];
|
||||
|
||||
float wp = 1.0f/static_cast<float>(n*n);
|
||||
float wv = (n-2.0f) * n * wp;
|
||||
|
||||
int dstIndex = offset + i - tableOffset;
|
||||
|
||||
int threadId = omp_get_thread_num();
|
||||
float *vertexResults = vertexResultsArray +
|
||||
vertexDesc.length * threadId;
|
||||
float *varyingResults = varyingResultsArray +
|
||||
varyingDesc.length * threadId;
|
||||
|
||||
clear(vertexResults, vertexDesc);
|
||||
clear(varyingResults, varyingDesc);
|
||||
|
||||
addWithWeight(vertexResults, vertex, p, wv, vertexDesc);
|
||||
|
||||
for (int j = 0; j < n; ++j) {
|
||||
addWithWeight(vertexResults, vertex, V_IT[h+j*2], wp, vertexDesc);
|
||||
addWithWeight(vertexResults, vertex, V_IT[h+j*2+1], wp, vertexDesc);
|
||||
}
|
||||
addWithWeight(varyingResults, varying, p, 1.0f, varyingDesc);
|
||||
|
||||
copy(vertex, vertexResults, dstIndex, vertexDesc);
|
||||
copy(varying, varyingResults, dstIndex, varyingDesc);
|
||||
}
|
||||
}
|
||||
|
||||
void OsdOmpComputeLoopVertexB(
|
||||
float * vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
|
@ -57,7 +57,7 @@ void OsdOmpComputeTriQuadFace(float * vertex, float * varying,
|
||||
void OsdOmpComputeEdge(float *vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *E_IT, const float *E_ITa,
|
||||
const int *E_IT, const float *E_W,
|
||||
int vertexOffset, int tableOffset,
|
||||
int start, int end);
|
||||
|
||||
@ -71,7 +71,7 @@ void OsdOmpComputeRestrictedEdge(float *vertex, float * varying,
|
||||
void OsdOmpComputeVertexA(float *vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *V_ITa, const float *V_IT,
|
||||
const int *V_ITa, const float *V_W,
|
||||
int vertexOffset, int tableOffset,
|
||||
int start, int end, int pass);
|
||||
|
||||
@ -82,6 +82,27 @@ void OsdOmpComputeVertexB(float *vertex, float * varying,
|
||||
int vertexOffset, int tableOffset,
|
||||
int start, int end);
|
||||
|
||||
void OsdOmpComputeRestrictedVertexA(float *vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *V_ITa,
|
||||
int vertexOffset, int tableOffset,
|
||||
int start, int end);
|
||||
|
||||
void OsdOmpComputeRestrictedVertexB1(float *vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *V_ITa, const int *V_IT,
|
||||
int vertexOffset, int tableOffset,
|
||||
int start, int end);
|
||||
|
||||
void OsdOmpComputeRestrictedVertexB2(float *vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *V_ITa, const int *V_IT,
|
||||
int vertexOffset, int tableOffset,
|
||||
int start, int end);
|
||||
|
||||
void OsdOmpComputeLoopVertexB(float *vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
|
@ -196,6 +196,47 @@ OsdTbbComputeController::ApplyCatmarkVertexVerticesKernelA2(
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(), true);
|
||||
}
|
||||
|
||||
void
|
||||
OsdTbbComputeController::ApplyCatmarkRestrictedVertexVerticesKernelB1(
|
||||
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {
|
||||
|
||||
assert(context);
|
||||
|
||||
OsdTbbComputeRestrictedVertexB1(
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
|
||||
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
|
||||
(const int*)context->GetTable(FarSubdivisionTables::V_IT)->GetBuffer(),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
}
|
||||
|
||||
void
|
||||
OsdTbbComputeController::ApplyCatmarkRestrictedVertexVerticesKernelB2(
|
||||
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {
|
||||
|
||||
assert(context);
|
||||
|
||||
OsdTbbComputeRestrictedVertexB2(
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
|
||||
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
|
||||
(const int*)context->GetTable(FarSubdivisionTables::V_IT)->GetBuffer(),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
}
|
||||
|
||||
void
|
||||
OsdTbbComputeController::ApplyCatmarkRestrictedVertexVerticesKernelA(
|
||||
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {
|
||||
|
||||
assert(context);
|
||||
|
||||
OsdTbbComputeRestrictedVertexA(
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
|
||||
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
}
|
||||
|
||||
void
|
||||
OsdTbbComputeController::ApplyLoopEdgeVerticesKernel(
|
||||
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {
|
||||
|
@ -134,6 +134,12 @@ protected:
|
||||
|
||||
void ApplyCatmarkVertexVerticesKernelA2(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkRestrictedVertexVerticesKernelB1(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkRestrictedVertexVerticesKernelB2(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkRestrictedVertexVerticesKernelA(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
|
||||
void ApplyLoopEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
|
@ -658,6 +658,226 @@ void OsdTbbComputeVertexB(
|
||||
tbb::parallel_for(range, kernel);
|
||||
}
|
||||
|
||||
class TBBRestrictedVertexKernelA {
|
||||
float *vertex;
|
||||
float *varying;
|
||||
OsdVertexBufferDescriptor vertexDesc;
|
||||
OsdVertexBufferDescriptor varyingDesc;
|
||||
int const *V_ITa;
|
||||
int vertexOffset;
|
||||
int tableOffset;
|
||||
|
||||
public:
|
||||
void operator() (tbb::blocked_range<int> const &r) const {
|
||||
for (int i = r.begin() + tableOffset; i < r.end() + tableOffset; i++) {
|
||||
int p = V_ITa[5*i+2];
|
||||
int eidx0 = V_ITa[5*i+3];
|
||||
int eidx1 = V_ITa[5*i+4];
|
||||
|
||||
int dstIndex = i + vertexOffset - tableOffset;
|
||||
|
||||
clear(vertex, dstIndex, vertexDesc);
|
||||
clear(varying, dstIndex, varyingDesc);
|
||||
addWithWeight(vertex, dstIndex, p, 0.75f, vertexDesc);
|
||||
addWithWeight(vertex, dstIndex, eidx0, 0.125f, vertexDesc);
|
||||
addWithWeight(vertex, dstIndex, eidx1, 0.125f, vertexDesc);
|
||||
addWithWeight(varying, dstIndex, p, 1.0f, varyingDesc);
|
||||
}
|
||||
}
|
||||
|
||||
TBBRestrictedVertexKernelA(TBBRestrictedVertexKernelA const &other)
|
||||
{
|
||||
this->vertex = other.vertex;
|
||||
this->varying= other.varying;
|
||||
this->vertexDesc = other.vertexDesc;
|
||||
this->varyingDesc = other.varyingDesc;
|
||||
this->V_ITa = other.V_ITa;
|
||||
this->vertexOffset = other.vertexOffset;
|
||||
this->tableOffset = other.tableOffset;
|
||||
}
|
||||
|
||||
TBBRestrictedVertexKernelA(float *vertex_in,
|
||||
float *varying_in,
|
||||
OsdVertexBufferDescriptor const &vertexDesc_in,
|
||||
OsdVertexBufferDescriptor const &varyingDesc_in,
|
||||
int const *V_ITa_in,
|
||||
int vertexOffset_in,
|
||||
int tableOffset_in) :
|
||||
vertex (vertex_in),
|
||||
varying(varying_in),
|
||||
vertexDesc(vertexDesc_in),
|
||||
varyingDesc(varyingDesc_in),
|
||||
V_ITa (V_ITa_in),
|
||||
vertexOffset(vertexOffset_in),
|
||||
tableOffset(tableOffset_in)
|
||||
{};
|
||||
};
|
||||
|
||||
void OsdTbbComputeRestrictedVertexA(
|
||||
float *vertex, float *varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
int const *V_ITa, int vertexOffset, int tableOffset,
|
||||
int start, int end) {
|
||||
tbb::blocked_range<int> range(start, end, grain_size);
|
||||
TBBRestrictedVertexKernelA kernel(vertex, varying, vertexDesc, varyingDesc,
|
||||
V_ITa,
|
||||
vertexOffset, tableOffset);
|
||||
tbb::parallel_for(range, kernel);
|
||||
}
|
||||
|
||||
class TBBRestrictedVertexKernelB1 {
|
||||
float *vertex;
|
||||
float *varying;
|
||||
OsdVertexBufferDescriptor vertexDesc;
|
||||
OsdVertexBufferDescriptor varyingDesc;
|
||||
int const *V_ITa;
|
||||
int const *V_IT;
|
||||
int vertexOffset;
|
||||
int tableOffset;
|
||||
|
||||
public:
|
||||
void operator() (tbb::blocked_range<int> const &r) const {
|
||||
for (int i = r.begin() + tableOffset; i < r.end() + tableOffset; i++) {
|
||||
int h = V_ITa[5*i];
|
||||
int p = V_ITa[5*i+2];
|
||||
|
||||
int dstIndex = i + vertexOffset - tableOffset;
|
||||
clear(vertex, dstIndex, vertexDesc);
|
||||
clear(varying, dstIndex, varyingDesc);
|
||||
|
||||
addWithWeight(vertex, dstIndex, p, 0.5f, vertexDesc);
|
||||
|
||||
for (int j = 0; j < 8; ++j)
|
||||
addWithWeight(vertex, dstIndex, V_IT[h+j], 0.0625f, vertexDesc);
|
||||
addWithWeight(varying, dstIndex, p, 1.0f, varyingDesc);
|
||||
}
|
||||
}
|
||||
|
||||
TBBRestrictedVertexKernelB1(TBBRestrictedVertexKernelB1 const &other)
|
||||
{
|
||||
this->vertex = other.vertex;
|
||||
this->varying= other.varying;
|
||||
this->vertexDesc = other.vertexDesc;
|
||||
this->varyingDesc = other.varyingDesc;
|
||||
this->V_ITa = other.V_ITa;
|
||||
this->V_IT = other.V_IT;
|
||||
this->vertexOffset = other.vertexOffset;
|
||||
this->tableOffset = other.tableOffset;
|
||||
}
|
||||
|
||||
TBBRestrictedVertexKernelB1(float *vertex_in,
|
||||
float *varying_in,
|
||||
OsdVertexBufferDescriptor const &vertexDesc_in,
|
||||
OsdVertexBufferDescriptor const &varyingDesc_in,
|
||||
int const *V_ITa_in,
|
||||
int const *V_IT_in,
|
||||
int vertexOffset_in,
|
||||
int tableOffset_in) :
|
||||
vertex (vertex_in),
|
||||
varying(varying_in),
|
||||
vertexDesc(vertexDesc_in),
|
||||
varyingDesc(varyingDesc_in),
|
||||
V_ITa (V_ITa_in),
|
||||
V_IT (V_IT_in),
|
||||
vertexOffset(vertexOffset_in),
|
||||
tableOffset(tableOffset_in)
|
||||
{};
|
||||
};
|
||||
|
||||
void OsdTbbComputeRestrictedVertexB1(
|
||||
float *vertex, float *varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
int const *V_ITa, int const *V_IT,
|
||||
int vertexOffset, int tableOffset, int start, int end) {
|
||||
|
||||
tbb::blocked_range<int> range(start, end, grain_size);
|
||||
TBBRestrictedVertexKernelB1 kernel(vertex, varying, vertexDesc, varyingDesc,
|
||||
V_ITa, V_IT,
|
||||
vertexOffset, tableOffset);
|
||||
tbb::parallel_for(range, kernel);
|
||||
}
|
||||
|
||||
class TBBRestrictedVertexKernelB2 {
|
||||
float *vertex;
|
||||
float *varying;
|
||||
OsdVertexBufferDescriptor vertexDesc;
|
||||
OsdVertexBufferDescriptor varyingDesc;
|
||||
int const *V_ITa;
|
||||
int const *V_IT;
|
||||
int vertexOffset;
|
||||
int tableOffset;
|
||||
|
||||
public:
|
||||
void operator() (tbb::blocked_range<int> const &r) const {
|
||||
for (int i = r.begin() + tableOffset; i < r.end() + tableOffset; i++) {
|
||||
int h = V_ITa[5*i];
|
||||
int n = V_ITa[5*i+1];
|
||||
int p = V_ITa[5*i+2];
|
||||
|
||||
float wp = 1.0f/static_cast<float>(n*n);
|
||||
float wv = (n-2.0f) * n * wp;
|
||||
|
||||
int dstIndex = i + vertexOffset - tableOffset;
|
||||
clear(vertex, dstIndex, vertexDesc);
|
||||
clear(varying, dstIndex, varyingDesc);
|
||||
|
||||
addWithWeight(vertex, dstIndex, p, wv, vertexDesc);
|
||||
|
||||
for (int j = 0; j < n; ++j) {
|
||||
addWithWeight(vertex, dstIndex, V_IT[h+j*2], wp, vertexDesc);
|
||||
addWithWeight(vertex, dstIndex, V_IT[h+j*2+1], wp, vertexDesc);
|
||||
}
|
||||
addWithWeight(varying, dstIndex, p, 1.0f, varyingDesc);
|
||||
}
|
||||
}
|
||||
|
||||
TBBRestrictedVertexKernelB2(TBBRestrictedVertexKernelB2 const &other)
|
||||
{
|
||||
this->vertex = other.vertex;
|
||||
this->varying= other.varying;
|
||||
this->vertexDesc = other.vertexDesc;
|
||||
this->varyingDesc = other.varyingDesc;
|
||||
this->V_ITa = other.V_ITa;
|
||||
this->V_IT = other.V_IT;
|
||||
this->vertexOffset = other.vertexOffset;
|
||||
this->tableOffset = other.tableOffset;
|
||||
}
|
||||
|
||||
TBBRestrictedVertexKernelB2(float *vertex_in,
|
||||
float *varying_in,
|
||||
OsdVertexBufferDescriptor const &vertexDesc_in,
|
||||
OsdVertexBufferDescriptor const &varyingDesc_in,
|
||||
int const *V_ITa_in,
|
||||
int const *V_IT_in,
|
||||
int vertexOffset_in,
|
||||
int tableOffset_in) :
|
||||
vertex (vertex_in),
|
||||
varying(varying_in),
|
||||
vertexDesc(vertexDesc_in),
|
||||
varyingDesc(varyingDesc_in),
|
||||
V_ITa (V_ITa_in),
|
||||
V_IT (V_IT_in),
|
||||
vertexOffset(vertexOffset_in),
|
||||
tableOffset(tableOffset_in)
|
||||
{};
|
||||
};
|
||||
|
||||
void OsdTbbComputeRestrictedVertexB2(
|
||||
float *vertex, float *varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
int const *V_ITa, int const *V_IT,
|
||||
int vertexOffset, int tableOffset, int start, int end) {
|
||||
|
||||
tbb::blocked_range<int> range(start, end, grain_size);
|
||||
TBBRestrictedVertexKernelB2 kernel(vertex, varying, vertexDesc, varyingDesc,
|
||||
V_ITa, V_IT,
|
||||
vertexOffset, tableOffset);
|
||||
tbb::parallel_for(range, kernel);
|
||||
}
|
||||
|
||||
class TBBLoopVertexKernelB {
|
||||
float *vertex;
|
||||
float *varying;
|
||||
|
@ -70,7 +70,7 @@ void OsdTbbComputeRestrictedEdge(float *vertex, float * varying,
|
||||
void OsdTbbComputeVertexA(float *vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
int const *V_ITa, float const *V_IT,
|
||||
int const *V_ITa, float const *V_W,
|
||||
int vertexOffset, int tableOffset,
|
||||
int start, int end, int pass);
|
||||
|
||||
@ -81,6 +81,27 @@ void OsdTbbComputeVertexB(float *vertex, float * varying,
|
||||
int vertexOffset, int tableOffset,
|
||||
int start, int end);
|
||||
|
||||
void OsdTbbComputeRestrictedVertexA(float *vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
int const *V_ITa,
|
||||
int vertexOffset, int tableOffset,
|
||||
int start, int end);
|
||||
|
||||
void OsdTbbComputeRestrictedVertexB1(float *vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
int const *V_ITa, int const *V_IT,
|
||||
int vertexOffset, int tableOffset,
|
||||
int start, int end);
|
||||
|
||||
void OsdTbbComputeRestrictedVertexB2(float *vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
int const *V_ITa, int const *V_IT,
|
||||
int vertexOffset, int tableOffset,
|
||||
int start, int end);
|
||||
|
||||
void OsdTbbComputeLoopVertexB(float *vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
|
Loading…
Reference in New Issue
Block a user