mirror of
https://github.com/PixarAnimationStudios/OpenSubdiv
synced 2025-01-11 17:10:08 +00:00
Merge pull request #310 from nathan-at-digitalfish/new_edge_vertex_kernel
New edge vertex kernel
This commit is contained in:
commit
af3424e1da
@ -74,23 +74,39 @@ FarCatmarkSubdivisionTablesFactory<T,U>::Create( FarMeshFactory<T,U> * meshFacto
|
||||
|
||||
FarSubdivisionTables * result = new FarSubdivisionTables(maxlevel, FarSubdivisionTables::CATMARK);
|
||||
|
||||
// Calculate the size of the face-vertex index table
|
||||
// Calculate the size of the face-vertex indexing tables
|
||||
int minCoarseFaceValence = tablesFactory.GetMinCoarseFaceValence();
|
||||
int maxCoarseFaceValence = tablesFactory.GetMaxCoarseFaceValence();
|
||||
bool coarseMeshAllQuadFaces = minCoarseFaceValence == 4 && maxCoarseFaceValence == 4;
|
||||
bool coarseMeshAllTriQuadFaces = minCoarseFaceValence >= 3 && maxCoarseFaceValence <= 4;
|
||||
bool hasQuadFaceVertexKernel = meshFactory->IsKernelTypeSupported(FarKernelBatch::CATMARK_QUAD_FACE_VERTEX);
|
||||
bool hasTriQuadFaceVertexKernel = meshFactory->IsKernelTypeSupported(FarKernelBatch::CATMARK_TRI_QUAD_FACE_VERTEX);
|
||||
|
||||
int F_ITa_size = 0;
|
||||
if (!hasQuadFaceVertexKernel && !hasTriQuadFaceVertexKernel)
|
||||
F_ITa_size = tablesFactory.GetNumFaceVerticesTotal(maxlevel) * 2;
|
||||
else if (!coarseMeshAllTriQuadFaces || !hasTriQuadFaceVertexKernel)
|
||||
F_ITa_size = tablesFactory.GetNumFaceVerticesTotal(1) * 2;
|
||||
|
||||
int F_IT_size = tablesFactory.GetFaceVertsValenceSum();
|
||||
if (coarseMeshAllTriQuadFaces)
|
||||
if (coarseMeshAllTriQuadFaces && hasTriQuadFaceVertexKernel)
|
||||
F_IT_size += tablesFactory.GetNumCoarseTriangleFaces(); // add padding for tri faces
|
||||
|
||||
// Triangular interpolation mode :
|
||||
// see "smoothtriangle" tag introduced in prman 3.9 and HbrCatmarkSubdivision<T>
|
||||
typename HbrCatmarkSubdivision<T>::TriangleSubdivision triangleMethod =
|
||||
dynamic_cast<HbrCatmarkSubdivision<T> *>(meshFactory->GetHbrMesh()->GetSubdivision())->GetTriangleSubdivisionMethod();
|
||||
bool hasFractionalEdgeSharpness = tablesFactory.HasFractionalEdgeSharpness();
|
||||
bool useRestrictedEdgeVertexKernel = meshFactory->IsKernelTypeSupported(FarKernelBatch::CATMARK_RESTRICTED_EDGE_VERTEX);;
|
||||
useRestrictedEdgeVertexKernel &= !hasFractionalEdgeSharpness && triangleMethod != HbrCatmarkSubdivision<T>::k_New;
|
||||
|
||||
// Allocate memory for the indexing tables
|
||||
if (!coarseMeshAllTriQuadFaces)
|
||||
result->_F_ITa.resize(tablesFactory.GetNumFaceVerticesTotal(1) * 2);
|
||||
result->_F_ITa.resize(F_ITa_size);
|
||||
result->_F_IT.resize(F_IT_size);
|
||||
|
||||
result->_E_IT.resize(tablesFactory.GetNumEdgeVerticesTotal(maxlevel)*4);
|
||||
result->_E_W.resize(tablesFactory.GetNumEdgeVerticesTotal(maxlevel)*2);
|
||||
if (!useRestrictedEdgeVertexKernel)
|
||||
result->_E_W.resize(tablesFactory.GetNumEdgeVerticesTotal(maxlevel)*2);
|
||||
|
||||
result->_V_ITa.resize((tablesFactory.GetNumVertexVerticesTotal(maxlevel)
|
||||
- tablesFactory.GetNumVertexVerticesTotal(0))*5); // subtract coarse cage vertices
|
||||
@ -127,20 +143,22 @@ FarCatmarkSubdivisionTablesFactory<T,U>::Create( FarMeshFactory<T,U> * meshFacto
|
||||
int nFaceVertices = (int)tablesFactory._faceVertsList[level].size();
|
||||
|
||||
// choose the kernel type that best fits the face topology
|
||||
int kernelType;
|
||||
int kernelType = FarKernelBatch::CATMARK_FACE_VERTEX;
|
||||
if (level == 1) {
|
||||
if (coarseMeshAllQuadFaces)
|
||||
if (coarseMeshAllQuadFaces && hasQuadFaceVertexKernel)
|
||||
kernelType = FarKernelBatch::CATMARK_QUAD_FACE_VERTEX;
|
||||
else if (coarseMeshAllTriQuadFaces)
|
||||
else if (coarseMeshAllTriQuadFaces && hasTriQuadFaceVertexKernel)
|
||||
kernelType = FarKernelBatch::CATMARK_TRI_QUAD_FACE_VERTEX;
|
||||
else
|
||||
kernelType = FarKernelBatch::CATMARK_FACE_VERTEX;
|
||||
} else {
|
||||
kernelType = FarKernelBatch::CATMARK_QUAD_FACE_VERTEX;
|
||||
if (hasQuadFaceVertexKernel)
|
||||
kernelType = FarKernelBatch::CATMARK_QUAD_FACE_VERTEX;
|
||||
if (hasTriQuadFaceVertexKernel)
|
||||
kernelType = FarKernelBatch::CATMARK_TRI_QUAD_FACE_VERTEX;
|
||||
}
|
||||
|
||||
// add a batch for face vertices
|
||||
if (nFaceVertices > 0) { // in torus case, nfacevertices could be zero
|
||||
assert(meshFactory->IsKernelTypeSupported(kernelType));
|
||||
if (kernelType == FarKernelBatch::CATMARK_FACE_VERTEX) {
|
||||
batches->push_back(FarKernelBatch( kernelType,
|
||||
level,
|
||||
@ -189,19 +207,19 @@ FarCatmarkSubdivisionTablesFactory<T,U>::Create( FarMeshFactory<T,U> * meshFacto
|
||||
|
||||
// Edge vertices
|
||||
|
||||
// Triangular interpolation mode :
|
||||
// see "smoothtriangle" tag introduced in prman 3.9 and HbrCatmarkSubdivision<T>
|
||||
typename HbrCatmarkSubdivision<T>::TriangleSubdivision triangleMethod =
|
||||
dynamic_cast<HbrCatmarkSubdivision<T> *>(meshFactory->GetHbrMesh()->GetSubdivision())->GetTriangleSubdivisionMethod();
|
||||
|
||||
// "For each vertex, gather the 2 vertices from the parent edege and the
|
||||
// 2 child vertices from the faces to the left and right of that edge.
|
||||
// Adjust if edge has a crease or is on a boundary."
|
||||
int nEdgeVertices = (int)tablesFactory._edgeVertsList[level].size();
|
||||
|
||||
// add a batch for edge vertices
|
||||
kernelType = (useRestrictedEdgeVertexKernel ?
|
||||
FarKernelBatch::CATMARK_RESTRICTED_EDGE_VERTEX :
|
||||
FarKernelBatch::CATMARK_EDGE_VERTEX);
|
||||
|
||||
if (nEdgeVertices > 0)
|
||||
batches->push_back(FarKernelBatch( FarKernelBatch::CATMARK_EDGE_VERTEX,
|
||||
assert(meshFactory->IsKernelTypeSupported(kernelType));
|
||||
batches->push_back(FarKernelBatch( kernelType,
|
||||
level,
|
||||
0,
|
||||
0,
|
||||
@ -227,8 +245,20 @@ FarCatmarkSubdivisionTablesFactory<T,U>::Create( FarMeshFactory<T,U> * meshFacto
|
||||
|
||||
float faceWeight=0.5f, vertWeight=0.5f;
|
||||
|
||||
// in the case of a fractional sharpness, set the adjacent faces vertices
|
||||
if (!e->IsBoundary() && esharp <= 1.0f) {
|
||||
if (kernelType == FarKernelBatch::CATMARK_RESTRICTED_EDGE_VERTEX) {
|
||||
// in the case of a sharp edge, repeat the endpoint vertices
|
||||
if (!e->IsBoundary() && esharp == HbrHalfedge<T>::k_Smooth) {
|
||||
HbrFace<T>* rf = e->GetRightFace();
|
||||
HbrFace<T>* lf = e->GetLeftFace();
|
||||
|
||||
E_IT[4*i+2] = remap[lf->Subdivide()->GetID()];
|
||||
E_IT[4*i+3] = remap[rf->Subdivide()->GetID()];
|
||||
} else {
|
||||
E_IT[4*i+2] = E_IT[4*i+0];
|
||||
E_IT[4*i+3] = E_IT[4*i+1];
|
||||
}
|
||||
} else if (!e->IsBoundary() && esharp <= HbrHalfedge<T>::k_Sharp) {
|
||||
// in the case of a fractional sharpness, set the adjacent faces vertices
|
||||
|
||||
float leftWeight, rightWeight;
|
||||
HbrFace<T>* rf = e->GetRightFace();
|
||||
@ -250,11 +280,14 @@ FarCatmarkSubdivisionTablesFactory<T,U>::Create( FarMeshFactory<T,U> * meshFacto
|
||||
E_IT[4*i+2] = -1;
|
||||
E_IT[4*i+3] = -1;
|
||||
}
|
||||
E_W[2*i+0] = vertWeight;
|
||||
E_W[2*i+1] = faceWeight;
|
||||
if (kernelType == FarKernelBatch::CATMARK_EDGE_VERTEX) {
|
||||
E_W[2*i+0] = vertWeight;
|
||||
E_W[2*i+1] = faceWeight;
|
||||
}
|
||||
}
|
||||
E_IT += 4 * nEdgeVertices;
|
||||
E_W += 2 * nEdgeVertices;
|
||||
if (kernelType == FarKernelBatch::CATMARK_EDGE_VERTEX)
|
||||
E_W += 2 * nEdgeVertices;
|
||||
|
||||
// Vertex vertices
|
||||
|
||||
|
@ -96,6 +96,9 @@ FarDispatcher::ApplyKernel(CONTROLLER *controller, CONTEXT *context, FarKernelBa
|
||||
case FarKernelBatch::CATMARK_EDGE_VERTEX:
|
||||
controller->ApplyCatmarkEdgeVerticesKernel(batch, context);
|
||||
break;
|
||||
case FarKernelBatch::CATMARK_RESTRICTED_EDGE_VERTEX:
|
||||
controller->ApplyCatmarkRestrictedEdgeVerticesKernel(batch, context);
|
||||
break;
|
||||
case FarKernelBatch::CATMARK_VERT_VERTEX_B:
|
||||
controller->ApplyCatmarkVertexVerticesKernelB(batch, context);
|
||||
break;
|
||||
@ -185,6 +188,9 @@ public:
|
||||
template <class CONTEXT>
|
||||
void ApplyCatmarkEdgeVerticesKernel(FarKernelBatch const &batch, CONTEXT *context) const;
|
||||
|
||||
template <class CONTEXT>
|
||||
void ApplyCatmarkRestrictedEdgeVerticesKernel(FarKernelBatch const &batch, CONTEXT *context) const;
|
||||
|
||||
template <class CONTEXT>
|
||||
void ApplyCatmarkVertexVerticesKernelB(FarKernelBatch const &batch, CONTEXT *context) const;
|
||||
|
||||
@ -325,6 +331,21 @@ FarComputeController::ApplyCatmarkEdgeVerticesKernel(FarKernelBatch const &batch
|
||||
vsrc );
|
||||
}
|
||||
|
||||
template <class CONTEXT> void
|
||||
FarComputeController::ApplyCatmarkRestrictedEdgeVerticesKernel(FarKernelBatch const &batch, CONTEXT *context) const {
|
||||
|
||||
typename CONTEXT::VertexType *vsrc = &context->GetVertices().at(0);
|
||||
|
||||
FarSubdivisionTables const * subdivision = context->GetSubdivisionTables();
|
||||
assert(subdivision);
|
||||
|
||||
subdivision->computeCatmarkRestrictedEdgePoints( batch.GetVertexOffset(),
|
||||
batch.GetTableOffset(),
|
||||
batch.GetStart(),
|
||||
batch.GetEnd(),
|
||||
vsrc );
|
||||
}
|
||||
|
||||
template <class CONTEXT> void
|
||||
FarComputeController::ApplyCatmarkVertexVerticesKernelB(FarKernelBatch const &batch, CONTEXT *context) const {
|
||||
|
||||
|
@ -65,10 +65,12 @@ class FarKernelBatch {
|
||||
public:
|
||||
|
||||
enum KernelType {
|
||||
CATMARK_FACE_VERTEX = 1,
|
||||
FIRST_KERNEL_TYPE = 1,
|
||||
CATMARK_FACE_VERTEX = FIRST_KERNEL_TYPE,
|
||||
CATMARK_QUAD_FACE_VERTEX,
|
||||
CATMARK_TRI_QUAD_FACE_VERTEX,
|
||||
CATMARK_EDGE_VERTEX,
|
||||
CATMARK_RESTRICTED_EDGE_VERTEX,
|
||||
CATMARK_VERT_VERTEX_A1,
|
||||
CATMARK_VERT_VERTEX_A2,
|
||||
CATMARK_VERT_VERTEX_B,
|
||||
@ -80,7 +82,8 @@ public:
|
||||
BILINEAR_EDGE_VERTEX,
|
||||
BILINEAR_VERT_VERTEX,
|
||||
HIERARCHICAL_EDIT,
|
||||
USER_DEFINED_KERNEL_START
|
||||
NUM_KERNEL_TYPES,
|
||||
USER_DEFINED_KERNEL_START = NUM_KERNEL_TYPES
|
||||
};
|
||||
|
||||
/// \brief Constructor.
|
||||
|
@ -93,8 +93,13 @@ public:
|
||||
/// @param patchType The type of patch to create: QUADS or TRIANGLES
|
||||
/// Note : patchType is only applicable if adaptive is false
|
||||
///
|
||||
/// @param kernelTypes A zero-terminated list of kernel types supported by the
|
||||
/// controller.
|
||||
/// Note : NULL indicates that all kernel types are supported
|
||||
///
|
||||
FarMeshFactory(HbrMesh<T> * mesh, int maxlevel, bool adaptive=false, int firstLevel=-1,
|
||||
FarPatchTables::Type patchType=FarPatchTables::QUADS);
|
||||
FarPatchTables::Type patchType=FarPatchTables::QUADS,
|
||||
const int * kernelTypes = NULL);
|
||||
|
||||
/// \brief Create a table-based mesh representation
|
||||
///
|
||||
@ -150,13 +155,24 @@ public:
|
||||
///
|
||||
int GetVertexID( HbrVertex<T> * v );
|
||||
|
||||
/// \brief Returns a the mapping between HbrVertex<T>->GetID() and Far
|
||||
/// \brief Returns the mapping between HbrVertex<T>->GetID() and Far
|
||||
/// vertices indices
|
||||
///
|
||||
/// @return the table that maps HbrMesh to FarMesh vertex indices
|
||||
///
|
||||
std::vector<int> const & GetRemappingTable( ) const { return _remapTable; }
|
||||
|
||||
/// \brief Returns true if the specified kernel type is supported by the
|
||||
/// controller
|
||||
///
|
||||
/// @return true if the kernel type is supported
|
||||
///
|
||||
bool IsKernelTypeSupported(int kernelType) const {
|
||||
assert(kernelType >= FarKernelBatch::FIRST_KERNEL_TYPE &&
|
||||
kernelType < FarKernelBatch::NUM_KERNEL_TYPES);
|
||||
return _supportedKernelTypes[kernelType];
|
||||
}
|
||||
|
||||
private:
|
||||
friend class FarBilinearSubdivisionTablesFactory<T,U>;
|
||||
friend class FarCatmarkSubdivisionTablesFactory<T,U>;
|
||||
@ -231,6 +247,8 @@ private:
|
||||
|
||||
FarPatchTables::Type _patchType;
|
||||
|
||||
bool _supportedKernelTypes[FarKernelBatch::NUM_KERNEL_TYPES];
|
||||
|
||||
// remapping table to translate vertex ID's between Hbr indices and the
|
||||
// order of the same vertices in the tables
|
||||
std::vector<int> _remapTable;
|
||||
@ -614,7 +632,8 @@ FarMeshFactory<T,U>::refineAdaptive( HbrMesh<T> * mesh, int maxIsolate ) {
|
||||
// random order, so the builder runs 2 passes over the entire vertex list to
|
||||
// gather the counters needed to generate the indexing tables.
|
||||
template <class T, class U>
|
||||
FarMeshFactory<T,U>::FarMeshFactory( HbrMesh<T> * mesh, int maxlevel, bool adaptive, int firstlevel, FarPatchTables::Type patchType ) :
|
||||
FarMeshFactory<T,U>::FarMeshFactory( HbrMesh<T> * mesh, int maxlevel, bool adaptive,
|
||||
int firstlevel, FarPatchTables::Type patchType, const int * kernelTypes ) :
|
||||
_hbrMesh(mesh),
|
||||
_adaptive(adaptive),
|
||||
_maxlevel(maxlevel),
|
||||
@ -630,6 +649,17 @@ FarMeshFactory<T,U>::FarMeshFactory( HbrMesh<T> * mesh, int maxlevel, bool adapt
|
||||
_numCoarseVertices = mesh->GetNumVertices();
|
||||
_numPtexFaces = getNumPtexFaces(mesh);
|
||||
|
||||
// Select the kernel types that are supported by the controller.
|
||||
for (int i = FarKernelBatch::FIRST_KERNEL_TYPE; i < FarKernelBatch::NUM_KERNEL_TYPES; ++i) {
|
||||
_supportedKernelTypes[i] = kernelTypes ? false : true;
|
||||
}
|
||||
|
||||
for (int i = kernelTypes ? *kernelTypes++ : 0; i; i = *kernelTypes++) {
|
||||
assert(i >= FarKernelBatch::FIRST_KERNEL_TYPE &&
|
||||
i < FarKernelBatch::NUM_KERNEL_TYPES);
|
||||
_supportedKernelTypes[i] = true;
|
||||
}
|
||||
|
||||
// Subdivide the Hbr mesh up to maxlevel.
|
||||
//
|
||||
// Note : using a placeholder vertex class 'T' can greatly speed up the
|
||||
|
@ -175,6 +175,10 @@ public:
|
||||
template <class U>
|
||||
void computeCatmarkEdgePoints(int vertexOffset, int tableOffset, int start, int end, U * vsrc) const;
|
||||
|
||||
// Compute-kernel applied to vertices resulting from the refinement of a smooth or sharp edge.
|
||||
template <class U>
|
||||
void computeCatmarkRestrictedEdgePoints(int vertexOffset, int tableOffset, int start, int end, U * vsrc) const;
|
||||
|
||||
// Compute-kernel applied to vertices resulting from the refinement of a vertex
|
||||
// Kernel "A" Handles the k_Crease and k_Corner rules
|
||||
template <class U>
|
||||
@ -471,6 +475,33 @@ FarSubdivisionTables::computeCatmarkEdgePoints( int vertexOffset, int tableOffse
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// Restricted edge-vertices compute Kernel - completely re-entrant
|
||||
//
|
||||
|
||||
template <class U> void
|
||||
FarSubdivisionTables::computeCatmarkRestrictedEdgePoints( int vertexOffset, int tableOffset, int start, int end, U * vsrc ) const {
|
||||
|
||||
U * vdst = vsrc + vertexOffset + start;
|
||||
|
||||
for (int i=start+tableOffset; i<end+tableOffset; ++i, ++vdst ) {
|
||||
|
||||
vdst->Clear();
|
||||
|
||||
int eidx0 = this->_E_IT[4*i+0],
|
||||
eidx1 = this->_E_IT[4*i+1],
|
||||
eidx2 = this->_E_IT[4*i+2],
|
||||
eidx3 = this->_E_IT[4*i+3];
|
||||
|
||||
vdst->AddWithWeight( vsrc[eidx0], 0.25f );
|
||||
vdst->AddWithWeight( vsrc[eidx1], 0.25f );
|
||||
vdst->AddWithWeight( vsrc[eidx2], 0.25f );
|
||||
vdst->AddWithWeight( vsrc[eidx3], 0.25f );
|
||||
vdst->AddVaryingWithWeight( vsrc[eidx0], 0.5f );
|
||||
vdst->AddVaryingWithWeight( vsrc[eidx1], 0.5f );
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// Vertex-vertices compute Kernels "A" and "B" - completely re-entrant
|
||||
//
|
||||
|
@ -106,6 +106,8 @@ protected:
|
||||
// Returns an integer based on the order in which the kernels are applied
|
||||
static int GetMaskRanking( unsigned char mask0, unsigned char mask1 );
|
||||
|
||||
bool HasFractionalEdgeSharpness() const { return _hasFractionalEdgeSharpness; }
|
||||
|
||||
// Per-level counters and offsets for each type of vertex (face,edge,vert)
|
||||
std::vector<int> _faceVertIdx,
|
||||
_edgeVertIdx,
|
||||
@ -128,6 +130,9 @@ protected:
|
||||
// Number of coarse triangle faces
|
||||
int _numCoarseTriangleFaces;
|
||||
|
||||
// Indicates if an edge has a fractional (non-integer) sharpness
|
||||
bool _hasFractionalEdgeSharpness;
|
||||
|
||||
private:
|
||||
|
||||
// Returns the subdivision level of a vertex
|
||||
@ -155,7 +160,8 @@ FarSubdivisionTablesFactory<T,U>::FarSubdivisionTablesFactory( HbrMesh<T> const
|
||||
_vertVertsList(maxlevel+1),
|
||||
_minCoarseFaceValence(0),
|
||||
_maxCoarseFaceValence(0),
|
||||
_numCoarseTriangleFaces(0)
|
||||
_numCoarseTriangleFaces(0),
|
||||
_hasFractionalEdgeSharpness(false)
|
||||
{
|
||||
assert( mesh );
|
||||
|
||||
@ -199,9 +205,14 @@ FarSubdivisionTablesFactory<T,U>::FarSubdivisionTablesFactory( HbrMesh<T> const
|
||||
if (valence == 3)
|
||||
++_numCoarseTriangleFaces;
|
||||
}
|
||||
} else if (v->GetParentEdge())
|
||||
} else if (v->GetParentEdge()) {
|
||||
edgeCounts[depth]++;
|
||||
else if (v->GetParentVertex()) {
|
||||
|
||||
// Determine if any edges have fractional sharpness.
|
||||
float sharpness = v->GetParentEdge()->GetSharpness();
|
||||
if (sharpness > HbrHalfedge<T>::k_Smooth && sharpness < HbrHalfedge<T>::k_Sharp)
|
||||
_hasFractionalEdgeSharpness = true;
|
||||
} else if (v->GetParentVertex()) {
|
||||
vertCounts[depth]++;
|
||||
_vertVertsValenceSum+=sumVertVertexValence(v);
|
||||
}
|
||||
@ -461,6 +472,10 @@ FarSubdivisionTablesFactory<T,U>::Splice(FarMeshVector const &meshes, FarKernelB
|
||||
}
|
||||
}
|
||||
|
||||
// pad E_W to align with E_IT when only some meshes use CATMARK_RESTRICTED_EDGE_VERTEX kernel
|
||||
if (total_E_W != 0)
|
||||
total_E_W = total_E_IT / 2;
|
||||
|
||||
FarSubdivisionTables *result = new FarSubdivisionTables(maxLevel, scheme);
|
||||
|
||||
result->_F_ITa.resize(total_F_ITa);
|
||||
@ -543,7 +558,10 @@ FarSubdivisionTablesFactory<T,U>::Splice(FarMeshVector const &meshes, FarKernelB
|
||||
|
||||
// copy edge tables
|
||||
E_IT = copyWithOffsetE_IT(E_IT, tables->Get_E_IT(), vertexOffsets[i]);
|
||||
E_W = copyWithOffset(E_W, tables->Get_E_W(), 0);
|
||||
if (!tables->Get_E_W().empty())
|
||||
E_W = copyWithOffset(E_W, tables->Get_E_W(), 0);
|
||||
else
|
||||
E_W += tables->Get_E_IT().size() / 2;
|
||||
|
||||
// copy vert tables
|
||||
if (scheme == FarSubdivisionTables::CATMARK or
|
||||
@ -577,6 +595,7 @@ FarSubdivisionTablesFactory<T,U>::Splice(FarMeshVector const &meshes, FarKernelB
|
||||
batch._tableOffset += F_IToffsets[i];
|
||||
|
||||
} else if (batch._kernelType == FarKernelBatch::CATMARK_EDGE_VERTEX or
|
||||
batch._kernelType == FarKernelBatch::CATMARK_RESTRICTED_EDGE_VERTEX or
|
||||
batch._kernelType == FarKernelBatch::LOOP_EDGE_VERTEX or
|
||||
batch._kernelType == FarKernelBatch::BILINEAR_EDGE_VERTEX) {
|
||||
|
||||
|
@ -266,6 +266,34 @@ OsdCLComputeController::ApplyCatmarkEdgeVerticesKernel(
|
||||
CL_CHECK_ERROR(ciErrNum, "edge kernel %d\n", ciErrNum);
|
||||
}
|
||||
|
||||
void
|
||||
OsdCLComputeController::ApplyCatmarkRestrictedEdgeVerticesKernel(
|
||||
FarKernelBatch const &batch, OsdCLComputeContext const *context) const {
|
||||
|
||||
assert(context);
|
||||
|
||||
cl_int ciErrNum;
|
||||
size_t globalWorkSize[1] = { (size_t)(batch.GetEnd() - batch.GetStart()) };
|
||||
cl_kernel kernel = _currentBindState.kernelBundle->GetCatmarkRestrictedEdgeKernel();
|
||||
|
||||
cl_mem E_IT = context->GetTable(FarSubdivisionTables::E_IT)->GetDevicePtr();
|
||||
|
||||
clSetKernelArg(kernel, 0, sizeof(cl_mem), &_currentBindState.vertexBuffer);
|
||||
clSetKernelArg(kernel, 1, sizeof(cl_mem), &_currentBindState.varyingBuffer);
|
||||
clSetKernelArg(kernel, 2, sizeof(cl_mem), &E_IT);
|
||||
clSetKernelArg(kernel, 3, sizeof(int), &_currentBindState.vertexDesc.offset);
|
||||
clSetKernelArg(kernel, 4, sizeof(int), &_currentBindState.varyingDesc.offset);
|
||||
clSetKernelArg(kernel, 5, sizeof(int), batch.GetVertexOffsetPtr());
|
||||
clSetKernelArg(kernel, 6, sizeof(int), batch.GetTableOffsetPtr());
|
||||
clSetKernelArg(kernel, 7, sizeof(int), batch.GetStartPtr());
|
||||
clSetKernelArg(kernel, 8, sizeof(int), batch.GetEndPtr());
|
||||
|
||||
ciErrNum = clEnqueueNDRangeKernel(_clQueue,
|
||||
kernel, 1, NULL, globalWorkSize,
|
||||
NULL, 0, NULL, NULL);
|
||||
CL_CHECK_ERROR(ciErrNum, "restricted edge kernel %d\n", ciErrNum);
|
||||
}
|
||||
|
||||
void
|
||||
OsdCLComputeController::ApplyCatmarkVertexVerticesKernelB(
|
||||
FarKernelBatch const &batch, OsdCLComputeContext const *context) const {
|
||||
|
@ -143,6 +143,8 @@ protected:
|
||||
|
||||
void ApplyCatmarkEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkRestrictedEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkVertexVerticesKernelB(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkVertexVerticesKernelA1(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
@ -306,6 +306,41 @@ __kernel void computeEdge(__global float *vertex,
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void computeRestrictedEdge(__global float *vertex,
|
||||
__global float *varying,
|
||||
__global int *E_IT,
|
||||
int vertexOffset, int varyingOffset,
|
||||
int offset, int tableOffset,
|
||||
int start, int end) {
|
||||
|
||||
int i = start + get_global_id(0) + tableOffset;
|
||||
int vid = start + get_global_id(0) + offset;
|
||||
int eidx0 = E_IT[4*i+0];
|
||||
int eidx1 = E_IT[4*i+1];
|
||||
int eidx2 = E_IT[4*i+2];
|
||||
int eidx3 = E_IT[4*i+3];
|
||||
vertex += vertexOffset;
|
||||
varying += (varying ? varyingOffset :0);
|
||||
|
||||
struct Vertex dst;
|
||||
struct Varying dstVarying;
|
||||
clearVertex(&dst);
|
||||
clearVarying(&dstVarying);
|
||||
|
||||
addWithWeight(&dst, vertex, eidx0, 0.25f);
|
||||
addWithWeight(&dst, vertex, eidx1, 0.25f);
|
||||
addWithWeight(&dst, vertex, eidx2, 0.25f);
|
||||
addWithWeight(&dst, vertex, eidx3, 0.25f);
|
||||
|
||||
writeVertex(vertex, vid, &dst);
|
||||
|
||||
if (varying) {
|
||||
addVaryingWithWeight(&dstVarying, varying, eidx0, 0.5f);
|
||||
addVaryingWithWeight(&dstVarying, varying, eidx1, 0.5f);
|
||||
writeVarying(varying, vid, &dstVarying);
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void computeVertexA(__global float *vertex,
|
||||
__global float *varying,
|
||||
__global int *V_ITa,
|
||||
|
@ -54,6 +54,7 @@ OsdCLKernelBundle::OsdCLKernelBundle() :
|
||||
_clCatmarkQuadFace(NULL),
|
||||
_clCatmarkTriQuadFace(NULL),
|
||||
_clCatmarkEdge(NULL),
|
||||
_clCatmarkRestrictedEdge(NULL),
|
||||
_clCatmarkVertexA(NULL),
|
||||
_clCatmarkVertexB(NULL),
|
||||
_clLoopEdge(NULL),
|
||||
@ -80,6 +81,8 @@ OsdCLKernelBundle::~OsdCLKernelBundle() {
|
||||
clReleaseKernel(_clCatmarkTriQuadFace);
|
||||
if (_clCatmarkEdge)
|
||||
clReleaseKernel(_clCatmarkEdge);
|
||||
if (_clCatmarkRestrictedEdge)
|
||||
clReleaseKernel(_clCatmarkRestrictedEdge);
|
||||
if (_clCatmarkVertexA)
|
||||
clReleaseKernel(_clCatmarkVertexA);
|
||||
if (_clCatmarkVertexB)
|
||||
@ -157,6 +160,7 @@ OsdCLKernelBundle::Compile(cl_context clContext,
|
||||
_clCatmarkQuadFace = buildKernel(_clProgram, "computeQuadFace");
|
||||
_clCatmarkTriQuadFace = buildKernel(_clProgram, "computeTriQuadFace");
|
||||
_clCatmarkEdge = buildKernel(_clProgram, "computeEdge");
|
||||
_clCatmarkRestrictedEdge = buildKernel(_clProgram, "computeRestrictedEdge");
|
||||
_clCatmarkVertexA = buildKernel(_clProgram, "computeVertexA");
|
||||
_clCatmarkVertexB = buildKernel(_clProgram, "computeVertexB");
|
||||
_clLoopEdge = buildKernel(_clProgram, "computeEdge");
|
||||
|
@ -56,6 +56,8 @@ public:
|
||||
|
||||
cl_kernel GetCatmarkEdgeKernel() const { return _clCatmarkEdge; }
|
||||
|
||||
cl_kernel GetCatmarkRestrictedEdgeKernel() const { return _clCatmarkRestrictedEdge; }
|
||||
|
||||
cl_kernel GetCatmarkVertexKernelA() const { return _clCatmarkVertexA; }
|
||||
|
||||
cl_kernel GetCatmarkVertexKernelB() const { return _clCatmarkVertexB; }
|
||||
@ -99,6 +101,7 @@ protected:
|
||||
_clCatmarkQuadFace,
|
||||
_clCatmarkTriQuadFace,
|
||||
_clCatmarkEdge,
|
||||
_clCatmarkRestrictedEdge,
|
||||
_clCatmarkVertexA,
|
||||
_clCatmarkVertexB,
|
||||
_clLoopEdge,
|
||||
|
@ -130,6 +130,19 @@ OsdCpuComputeController::ApplyCatmarkEdgeVerticesKernel(
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
}
|
||||
|
||||
void
|
||||
OsdCpuComputeController::ApplyCatmarkRestrictedEdgeVerticesKernel(
|
||||
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {
|
||||
|
||||
assert(context);
|
||||
|
||||
OsdCpuComputeRestrictedEdge(
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
|
||||
(const int*)context->GetTable(FarSubdivisionTables::E_IT)->GetBuffer(),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
}
|
||||
|
||||
void
|
||||
OsdCpuComputeController::ApplyCatmarkVertexVerticesKernelB(
|
||||
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {
|
||||
|
@ -126,6 +126,8 @@ protected:
|
||||
|
||||
void ApplyCatmarkEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkRestrictedEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkVertexVerticesKernelB(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkVertexVerticesKernelA1(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
@ -231,6 +231,39 @@ void OsdCpuComputeEdge(
|
||||
}
|
||||
}
|
||||
|
||||
void OsdCpuComputeRestrictedEdge(
|
||||
float *vertex, float *varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *E_IT, int vertexOffset, int tableOffset,
|
||||
int start, int end) {
|
||||
|
||||
float *vertexResults = (float*)alloca(vertexDesc.length * sizeof(float));
|
||||
float *varyingResults = (float*)alloca(varyingDesc.length * sizeof(float));
|
||||
|
||||
for (int i = start + tableOffset; i < end + tableOffset; i++) {
|
||||
int eidx0 = E_IT[4*i+0];
|
||||
int eidx1 = E_IT[4*i+1];
|
||||
int eidx2 = E_IT[4*i+2];
|
||||
int eidx3 = E_IT[4*i+3];
|
||||
|
||||
int dstIndex = i + vertexOffset - tableOffset;
|
||||
clear(vertexResults, vertexDesc);
|
||||
clear(varyingResults, varyingDesc);
|
||||
|
||||
addWithWeight(vertexResults, vertex, eidx0, 0.25f, vertexDesc);
|
||||
addWithWeight(vertexResults, vertex, eidx1, 0.25f, vertexDesc);
|
||||
addWithWeight(vertexResults, vertex, eidx2, 0.25f, vertexDesc);
|
||||
addWithWeight(vertexResults, vertex, eidx3, 0.25f, vertexDesc);
|
||||
|
||||
addWithWeight(varyingResults, varying, eidx0, 0.5f, varyingDesc);
|
||||
addWithWeight(varyingResults, varying, eidx1, 0.5f, varyingDesc);
|
||||
|
||||
copy(vertex, vertexResults, dstIndex, vertexDesc);
|
||||
copy(varying, varyingResults, dstIndex, varyingDesc);
|
||||
}
|
||||
}
|
||||
|
||||
void OsdCpuComputeVertexA(
|
||||
float *vertex, float *varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
|
@ -169,6 +169,13 @@ void OsdCpuComputeEdge(float *vertex, float * varying,
|
||||
int vertexOffset, int tableOffset,
|
||||
int start, int end);
|
||||
|
||||
void OsdCpuComputeRestrictedEdge(float *vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *E_IT,
|
||||
int vertexOffset, int tableOffset,
|
||||
int start, int end);
|
||||
|
||||
template<int numVertexElements>
|
||||
void ComputeVertexAKernel( float *vertex,
|
||||
const int *V_ITa,
|
||||
|
@ -50,6 +50,11 @@ void OsdCudaComputeEdge(float *vertex, float *varying,
|
||||
int varyingLength, int varyingStride,
|
||||
int *E_IT, float *E_W, int offset, int tableOffset, int start, int end);
|
||||
|
||||
void OsdCudaComputeRestrictedEdge(float *vertex, float *varying,
|
||||
int vertexLength, int vertexStride,
|
||||
int varyingLength, int varyingStride,
|
||||
int *E_IT, int offset, int tableOffset, int start, int end);
|
||||
|
||||
void OsdCudaComputeVertexA(float *vertex, float *varying,
|
||||
int vertexLength, int vertexStride,
|
||||
int varyingLength, int varyingStride,
|
||||
@ -244,6 +249,26 @@ OsdCudaComputeController::ApplyCatmarkEdgeVerticesKernel(
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
}
|
||||
|
||||
void
|
||||
OsdCudaComputeController::ApplyCatmarkRestrictedEdgeVerticesKernel(
|
||||
FarKernelBatch const &batch, OsdCudaComputeContext const *context) const {
|
||||
|
||||
assert(context);
|
||||
|
||||
const OsdCudaTable * E_IT = context->GetTable(FarSubdivisionTables::E_IT);
|
||||
assert(E_IT);
|
||||
|
||||
float *vertex = _currentBindState.GetOffsettedVertexBuffer();
|
||||
float *varying = _currentBindState.GetOffsettedVaryingBuffer();
|
||||
|
||||
OsdCudaComputeRestrictedEdge(
|
||||
vertex, varying,
|
||||
_currentBindState.vertexDesc.length, _currentBindState.vertexDesc.stride,
|
||||
_currentBindState.varyingDesc.length, _currentBindState.varyingDesc.stride,
|
||||
static_cast<int*>(E_IT->GetCudaMemory()),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
}
|
||||
|
||||
void
|
||||
OsdCudaComputeController::ApplyCatmarkVertexVerticesKernelB(
|
||||
FarKernelBatch const &batch, OsdCudaComputeContext const *context) const {
|
||||
|
@ -126,6 +126,8 @@ protected:
|
||||
|
||||
void ApplyCatmarkEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkRestrictedEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkVertexVerticesKernelB(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkVertexVerticesKernelA1(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
@ -379,6 +379,71 @@ computeEdge(float *fVertex, float *fVarying,
|
||||
}
|
||||
}
|
||||
|
||||
template <int NUM_VERTEX_ELEMENTS, int NUM_VARYING_ELEMENTS> __global__ void
|
||||
computeRestrictedEdge(float *fVertex, float *fVaryings, int *E0_IT, int offset, int tableOffset, int start, int end)
|
||||
{
|
||||
DeviceVertex<NUM_VERTEX_ELEMENTS> *vertex = (DeviceVertex<NUM_VERTEX_ELEMENTS>*)fVertex;
|
||||
DeviceVertex<NUM_VARYING_ELEMENTS> *varyings = (DeviceVertex<NUM_VARYING_ELEMENTS>*)fVaryings;
|
||||
|
||||
for (int i = start + tableOffset + threadIdx.x + blockIdx.x*blockDim.x;
|
||||
i < end + tableOffset;
|
||||
i+= blockDim.x * gridDim.x){
|
||||
|
||||
int eidx0 = E0_IT[4*i+0];
|
||||
int eidx1 = E0_IT[4*i+1];
|
||||
int eidx2 = E0_IT[4*i+2];
|
||||
int eidx3 = E0_IT[4*i+3];
|
||||
|
||||
DeviceVertex<NUM_VERTEX_ELEMENTS> dst;
|
||||
dst.clear();
|
||||
dst.addWithWeight(&vertex[eidx0], 0.25f);
|
||||
dst.addWithWeight(&vertex[eidx1], 0.25f);
|
||||
dst.addWithWeight(&vertex[eidx2], 0.25f);
|
||||
dst.addWithWeight(&vertex[eidx3], 0.25f);
|
||||
vertex[offset+i-tableOffset] = dst;
|
||||
|
||||
if(NUM_VARYING_ELEMENTS > 0){
|
||||
DeviceVertex<NUM_VARYING_ELEMENTS> dstVarying;
|
||||
dstVarying.clear();
|
||||
dstVarying.addWithWeight(&varyings[eidx0], 0.5f);
|
||||
dstVarying.addWithWeight(&varyings[eidx1], 0.5f);
|
||||
varyings[offset+i-tableOffset] = dstVarying;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void
|
||||
computeRestrictedEdge(float *fVertex, float *fVarying,
|
||||
int vertexLength, int vertexStride,
|
||||
int varyingLength, int varyingStride,
|
||||
int *E0_IT, int offset, int tableOffset, int start, int end)
|
||||
{
|
||||
for (int i = start + tableOffset + threadIdx.x + blockIdx.x*blockDim.x;
|
||||
i < end + tableOffset;i+= blockDim.x * gridDim.x) {
|
||||
|
||||
int eidx0 = E0_IT[4*i+0];
|
||||
int eidx1 = E0_IT[4*i+1];
|
||||
int eidx2 = E0_IT[4*i+2];
|
||||
int eidx3 = E0_IT[4*i+3];
|
||||
|
||||
float *dstVertex = fVertex + (i+offset-tableOffset)*vertexStride;
|
||||
clear(dstVertex, vertexLength);
|
||||
|
||||
addWithWeight(dstVertex, fVertex + eidx0*vertexStride, 0.25f, vertexLength);
|
||||
addWithWeight(dstVertex, fVertex + eidx1*vertexStride, 0.25f, vertexLength);
|
||||
addWithWeight(dstVertex, fVertex + eidx2*vertexStride, 0.25f, vertexLength);
|
||||
addWithWeight(dstVertex, fVertex + eidx3*vertexStride, 0.25f, vertexLength);
|
||||
|
||||
if (varyingLength > 0){
|
||||
float *dstVarying = fVarying + (i+offset-tableOffset)*varyingStride;
|
||||
clear(dstVarying, varyingLength);
|
||||
|
||||
addWithWeight(dstVarying, fVarying + eidx0*varyingStride, 0.5f, varyingLength);
|
||||
addWithWeight(dstVarying, fVarying + eidx1*varyingStride, 0.5f, varyingLength);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <int NUM_VERTEX_ELEMENTS, int NUM_VARYING_ELEMENTS> __global__ void
|
||||
computeVertexA(float *fVertex, float *fVaryings, int *V0_ITa, float *V0_S, int offset, int tableOffset, int start, int end, int pass)
|
||||
{
|
||||
@ -847,6 +912,22 @@ void OsdCudaComputeEdge(float *vertex, float *varying,
|
||||
E_IT, E_W, offset, tableOffset, start, end);
|
||||
}
|
||||
|
||||
void OsdCudaComputeRestrictedEdge(float *vertex, float *varying,
|
||||
int vertexLength, int vertexStride,
|
||||
int varyingLength, int varyingStride,
|
||||
int *E_IT, int offset, int tableOffset, int start, int end)
|
||||
{
|
||||
//computeRestrictedEdge<0, 3><<<512,32>>>(vertex, varying, E_IT, offset, start, end);
|
||||
OPT_KERNEL(0, 0, computeRestrictedEdge, 512, 32, (vertex, varying, E_IT, offset, tableOffset, start, end));
|
||||
OPT_KERNEL(0, 3, computeRestrictedEdge, 512, 32, (vertex, varying, E_IT, offset, tableOffset, start, end));
|
||||
OPT_KERNEL(3, 0, computeRestrictedEdge, 512, 32, (vertex, varying, E_IT, offset, tableOffset, start, end));
|
||||
OPT_KERNEL(3, 3, computeRestrictedEdge, 512, 32, (vertex, varying, E_IT, offset, tableOffset, start, end));
|
||||
|
||||
computeRestrictedEdge<<<512, 32>>>(vertex, varying,
|
||||
vertexLength, vertexStride, varyingLength, varyingStride,
|
||||
E_IT, offset, tableOffset, start, end);
|
||||
}
|
||||
|
||||
void OsdCudaComputeVertexA(float *vertex, float *varying,
|
||||
int vertexLength, int vertexStride,
|
||||
int varyingLength, int varyingStride,
|
||||
|
@ -40,8 +40,11 @@ void
|
||||
OsdD3D11ComputeTable::createBuffer(int size, const void *ptr, DXGI_FORMAT format, int numElements,
|
||||
ID3D11DeviceContext *deviceContext) {
|
||||
|
||||
if (size == 0)
|
||||
if (size == 0) {
|
||||
_buffer = NULL;
|
||||
_srv = NULL;
|
||||
return;
|
||||
}
|
||||
|
||||
ID3D11Device *device = NULL;
|
||||
deviceContext->GetDevice(&device);
|
||||
|
@ -187,6 +187,18 @@ OsdD3D11ComputeController::ApplyCatmarkTriQuadFaceVerticesKernel(
|
||||
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset);
|
||||
}
|
||||
|
||||
void
|
||||
OsdD3D11ComputeController::ApplyCatmarkRestrictedEdgeVerticesKernel(
|
||||
FarKernelBatch const &batch, OsdD3D11ComputeContext const *context) const {
|
||||
|
||||
assert(context);
|
||||
|
||||
_currentBindState.kernelBundle->ApplyCatmarkRestrictedEdgeVerticesKernel(
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(),
|
||||
batch.GetStart(), batch.GetEnd(),
|
||||
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset);
|
||||
}
|
||||
|
||||
void
|
||||
OsdD3D11ComputeController::ApplyCatmarkEdgeVerticesKernel(
|
||||
FarKernelBatch const &batch, OsdD3D11ComputeContext const *context) const {
|
||||
|
@ -142,6 +142,8 @@ protected:
|
||||
|
||||
void ApplyCatmarkEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkRestrictedEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkVertexVerticesKernelB(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkVertexVerticesKernelA1(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
@ -55,6 +55,7 @@ OsdD3D11ComputeKernelBundle::OsdD3D11ComputeKernelBundle(
|
||||
_kernelComputeQuadFace(0),
|
||||
_kernelComputeTriQuadFace(0),
|
||||
_kernelComputeEdge(0),
|
||||
_kernelComputeRestrictedEdge(0),
|
||||
_kernelComputeBilinearEdge(0),
|
||||
_kernelComputeVertex(0),
|
||||
_kernelComputeVertexA(0),
|
||||
@ -74,6 +75,7 @@ OsdD3D11ComputeKernelBundle::~OsdD3D11ComputeKernelBundle() {
|
||||
SAFE_RELEASE(_kernelComputeQuadFace);
|
||||
SAFE_RELEASE(_kernelComputeTriQuadFace);
|
||||
SAFE_RELEASE(_kernelComputeEdge);
|
||||
SAFE_RELEASE(_kernelComputeRestrictedEdge);
|
||||
SAFE_RELEASE(_kernelComputeBilinearEdge);
|
||||
SAFE_RELEASE(_kernelComputeVertex);
|
||||
SAFE_RELEASE(_kernelComputeVertexA);
|
||||
@ -176,6 +178,9 @@ OsdD3D11ComputeKernelBundle::Compile(
|
||||
_classLinkage->GetClassInstance(
|
||||
"catmarkComputeEdge", 0, &_kernelComputeEdge);
|
||||
assert(_kernelComputeEdge);
|
||||
_classLinkage->GetClassInstance(
|
||||
"catmarkComputeRestrictedEdge", 0, &_kernelComputeRestrictedEdge);
|
||||
assert(_kernelComputeRestrictedEdge);
|
||||
_classLinkage->GetClassInstance(
|
||||
"bilinearComputeEdge", 0, &_kernelComputeBilinearEdge);
|
||||
assert(_kernelComputeBilinearEdge);
|
||||
@ -360,6 +365,22 @@ OsdD3D11ComputeKernelBundle::ApplyCatmarkEdgeVerticesKernel(
|
||||
dispatchCompute(_kernelComputeEdge, args);
|
||||
}
|
||||
|
||||
void
|
||||
OsdD3D11ComputeKernelBundle::ApplyCatmarkRestrictedEdgeVerticesKernel(
|
||||
int vertexOffset, int tableOffset, int start, int end,
|
||||
int vertexBaseOffset, int varyingBaseOffset) {
|
||||
|
||||
KernelCB args;
|
||||
ZeroMemory(&args, sizeof(args));
|
||||
args.vertexOffset = vertexOffset;
|
||||
args.tableOffset = tableOffset;
|
||||
args.indexStart = start;
|
||||
args.indexEnd = end;
|
||||
args.vertexBaseOffset = vertexBaseOffset;
|
||||
args.varyingBaseOffset = varyingBaseOffset;
|
||||
dispatchCompute(_kernelComputeRestrictedEdge, args);
|
||||
}
|
||||
|
||||
void
|
||||
OsdD3D11ComputeKernelBundle::ApplyCatmarkVertexVerticesKernelB(
|
||||
int vertexOffset, int tableOffset, int start, int end,
|
||||
|
@ -79,6 +79,10 @@ public:
|
||||
int vertexOffset, int tableOffset, int start, int end,
|
||||
int vertexBaseOffset, int varyingBaseOffset);
|
||||
|
||||
void ApplyCatmarkRestrictedEdgeVerticesKernel(
|
||||
int vertexOffset, int tableOffset, int start, int end,
|
||||
int vertexBaseOffset, int varyingBaseOffset);
|
||||
|
||||
void ApplyCatmarkVertexVerticesKernelB(
|
||||
int vertexOffset, int tableOffset, int start, int end,
|
||||
int vertexBaseOffset, int varyingBaseOffset);
|
||||
@ -140,12 +144,14 @@ protected:
|
||||
|
||||
ID3D11ClassInstance * _kernelComputeFace; // general face-vertex kernel (all schemes)
|
||||
|
||||
ID3D11ClassInstance * _kernelComputeQuadFace; // quad face-vertex kernel (catmark)
|
||||
ID3D11ClassInstance * _kernelComputeQuadFace; // quad face-vertex kernel (catmark scheme)
|
||||
|
||||
ID3D11ClassInstance * _kernelComputeTriQuadFace; // tri-quad face-vertex kernel (catmark)
|
||||
ID3D11ClassInstance * _kernelComputeTriQuadFace; // tri-quad face-vertex kernel (catmark scheme)
|
||||
|
||||
ID3D11ClassInstance * _kernelComputeEdge; // edge-vertex kernel (catmark + loop schemes)
|
||||
|
||||
ID3D11ClassInstance * _kernelComputeRestrictedEdge; // edge-vertex kernel (catmark scheme)
|
||||
|
||||
ID3D11ClassInstance * _kernelComputeBilinearEdge; // edge-vertex kernel (bilinear scheme)
|
||||
|
||||
ID3D11ClassInstance * _kernelComputeVertex; // vertex-vertex kernel (bilinear scheme)
|
||||
|
@ -135,6 +135,20 @@ OsdGcdComputeController::ApplyCatmarkEdgeVerticesKernel(
|
||||
_gcd_queue);
|
||||
}
|
||||
|
||||
void
|
||||
OsdGcdComputeController::ApplyCatmarkRestrictedEdgeVerticesKernel(
|
||||
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {
|
||||
|
||||
assert(context);
|
||||
|
||||
OsdGcdComputeRestrictedEdge(
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
|
||||
(const int*)context->GetTable(FarSubdivisionTables::E_IT)->GetBuffer(),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(),
|
||||
_gcd_queue);
|
||||
}
|
||||
|
||||
void
|
||||
OsdGcdComputeController::ApplyCatmarkVertexVerticesKernelB(
|
||||
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {
|
||||
|
@ -126,6 +126,8 @@ protected:
|
||||
|
||||
void ApplyCatmarkEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkRestrictedEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkVertexVerticesKernelB(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkVertexVerticesKernelA1(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
@ -151,6 +151,29 @@ void OsdGcdComputeEdge(
|
||||
vertexOffset, tableOffset, start_e, end_e);
|
||||
}
|
||||
|
||||
void OsdGcdComputeRestrictedEdge(
|
||||
float * vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *E_IT, int vertexOffset, int tableOffset, int start, int end,
|
||||
dispatch_queue_t gcdq) {
|
||||
|
||||
const int workSize = end-start;
|
||||
dispatch_apply(workSize/GCD_WORK_STRIDE, gcdq, ^(size_t blockIdx){
|
||||
const int start_i = start + blockIdx*GCD_WORK_STRIDE;
|
||||
const int end_i = start_i + GCD_WORK_STRIDE;
|
||||
OsdCpuComputeRestrictedEdge(vertex, varying, vertexDesc, varyingDesc,
|
||||
E_IT,
|
||||
vertexOffset, tableOffset, start_i, end_i);
|
||||
});
|
||||
const int start_e = end - workSize%GCD_WORK_STRIDE;
|
||||
const int end_e = end;
|
||||
if (start_e < end_e)
|
||||
OsdCpuComputeRestrictedEdge(vertex, varying, vertexDesc, varyingDesc,
|
||||
E_IT,
|
||||
vertexOffset, tableOffset, start_e, end_e);
|
||||
}
|
||||
|
||||
void OsdGcdComputeVertexA(
|
||||
float * vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
@ -281,7 +304,7 @@ void OsdGcdComputeBilinearVertex(
|
||||
void OsdGcdEditVertexAdd(
|
||||
float * vertex,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
int primVarOffset, int primVarWidth,
|
||||
int primVarOffset, int /*primVarWidth*/,
|
||||
int vertexOffset, int tableOffset,
|
||||
int start, int end,
|
||||
const unsigned int *editIndices, const float *editValues,
|
||||
@ -304,7 +327,7 @@ void OsdGcdEditVertexAdd(
|
||||
void OsdGcdEditVertexSet(
|
||||
float * vertex,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
int primVarOffset, int primVarWidth,
|
||||
int primVarOffset, int /*primVarWidth*/,
|
||||
int vertexOffset, int tableOffset,
|
||||
int start, int end,
|
||||
const unsigned int *editIndices, const float *editValues,
|
||||
|
@ -66,6 +66,14 @@ void OsdGcdComputeEdge(float *vertex, float * varying,
|
||||
int start, int end,
|
||||
dispatch_queue_t gcdq);
|
||||
|
||||
void OsdGcdComputeRestrictedEdge(float *vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *E_IT,
|
||||
int vertexOffset, int tableOffset,
|
||||
int start, int end,
|
||||
dispatch_queue_t gcdq);
|
||||
|
||||
void OsdGcdComputeVertexA(float *vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
|
@ -172,6 +172,17 @@ OsdGLSLComputeController::ApplyCatmarkEdgeVerticesKernel(
|
||||
batch.GetStart(), batch.GetEnd());
|
||||
}
|
||||
|
||||
void
|
||||
OsdGLSLComputeController::ApplyCatmarkRestrictedEdgeVerticesKernel(
|
||||
FarKernelBatch const &batch, OsdGLSLComputeContext const *context) const {
|
||||
|
||||
assert(context);
|
||||
|
||||
_currentBindState.kernelBundle->ApplyCatmarkRestrictedEdgeVerticesKernel(
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(),
|
||||
batch.GetStart(), batch.GetEnd());
|
||||
}
|
||||
|
||||
void
|
||||
OsdGLSLComputeController::ApplyCatmarkVertexVerticesKernelB(
|
||||
FarKernelBatch const &batch, OsdGLSLComputeContext const *context) const {
|
||||
|
@ -135,6 +135,8 @@ protected:
|
||||
|
||||
void ApplyCatmarkEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkRestrictedEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkVertexVerticesKernelB(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkVertexVerticesKernelA1(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
@ -235,7 +235,7 @@ void catmarkComputeTriQuadFace()
|
||||
writeVertex(vid, dst);
|
||||
}
|
||||
|
||||
// Edge-vertices compute Kernepl
|
||||
// Edge-vertices compute Kernel
|
||||
subroutine(computeKernelType)
|
||||
void catmarkComputeEdge()
|
||||
{
|
||||
@ -272,6 +272,34 @@ void catmarkComputeEdge()
|
||||
writeVertex(vid, dst);
|
||||
}
|
||||
|
||||
// Restricted edge-vertices compute Kernel
|
||||
subroutine(computeKernelType)
|
||||
void catmarkComputeRestrictedEdge()
|
||||
{
|
||||
int i = int(gl_GlobalInvocationID.x) + indexStart;
|
||||
if (i >= indexEnd) return;
|
||||
int vid = i + vertexOffset;
|
||||
i += tableOffset;
|
||||
|
||||
Vertex dst;
|
||||
clear(dst);
|
||||
|
||||
int eidx0 = _E_IT[4*i+0];
|
||||
int eidx1 = _E_IT[4*i+1];
|
||||
int eidx2 = _E_IT[4*i+2];
|
||||
int eidx3 = _E_IT[4*i+3];
|
||||
ivec4 eidx = ivec4(eidx0, eidx1, eidx2, eidx3);
|
||||
|
||||
addWithWeight(dst, readVertex(eidx.x), 0.25f);
|
||||
addWithWeight(dst, readVertex(eidx.y), 0.25f);
|
||||
addWithWeight(dst, readVertex(eidx.z), 0.25f);
|
||||
addWithWeight(dst, readVertex(eidx.w), 0.25f);
|
||||
addVaryingWithWeight(dst, readVertex(eidx.x), 0.5f);
|
||||
addVaryingWithWeight(dst, readVertex(eidx.y), 0.5f);
|
||||
|
||||
writeVertex(vid, dst);
|
||||
}
|
||||
|
||||
// Edge-vertices compute Kernel (bilinear scheme)
|
||||
subroutine(computeKernelType)
|
||||
void bilinearComputeEdge()
|
||||
|
@ -126,6 +126,9 @@ OsdGLSLComputeKernelBundle::Compile(
|
||||
_subComputeEdge = glGetSubroutineIndex(_program,
|
||||
GL_COMPUTE_SHADER,
|
||||
"catmarkComputeEdge");
|
||||
_subComputeRestrictedEdge = glGetSubroutineIndex(_program,
|
||||
GL_COMPUTE_SHADER,
|
||||
"catmarkComputeRestrictedEdge");
|
||||
_subComputeBilinearEdge = glGetSubroutineIndex(_program,
|
||||
GL_COMPUTE_SHADER,
|
||||
"bilinearComputeEdge");
|
||||
@ -259,6 +262,14 @@ OsdGLSLComputeKernelBundle::ApplyCatmarkEdgeVerticesKernel(
|
||||
dispatchCompute(vertexOffset, tableOffset, start, end);
|
||||
}
|
||||
|
||||
void
|
||||
OsdGLSLComputeKernelBundle::ApplyCatmarkRestrictedEdgeVerticesKernel(
|
||||
int vertexOffset, int tableOffset, int start, int end) {
|
||||
|
||||
glUniformSubroutinesuiv(GL_COMPUTE_SHADER, 1, &_subComputeRestrictedEdge);
|
||||
dispatchCompute(vertexOffset, tableOffset, start, end);
|
||||
}
|
||||
|
||||
void
|
||||
OsdGLSLComputeKernelBundle::ApplyCatmarkVertexVerticesKernelB(
|
||||
int vertexOffset, int tableOffset, int start, int end) {
|
||||
|
@ -66,6 +66,9 @@ public:
|
||||
void ApplyCatmarkEdgeVerticesKernel(
|
||||
int vertexOffset, int tableOffset, int start, int end);
|
||||
|
||||
void ApplyCatmarkRestrictedEdgeVerticesKernel(
|
||||
int vertexOffset, int tableOffset, int start, int end);
|
||||
|
||||
void ApplyCatmarkVertexVerticesKernelB(
|
||||
int vertexOffset, int tableOffset, int start, int end);
|
||||
|
||||
@ -136,12 +139,14 @@ protected:
|
||||
|
||||
GLuint _subComputeFace; // general face-vertex kernel (all schemes)
|
||||
|
||||
GLuint _subComputeQuadFace; // quad face-vertex kernel (catmark)
|
||||
GLuint _subComputeQuadFace; // quad face-vertex kernel (catmark scheme)
|
||||
|
||||
GLuint _subComputeTriQuadFace; // tri-quad face-vertex kernel (catmark)
|
||||
GLuint _subComputeTriQuadFace; // tri-quad face-vertex kernel (catmark scheme)
|
||||
|
||||
GLuint _subComputeEdge; // edge-vertex kernel (catmark + loop schemes)
|
||||
|
||||
GLuint _subComputeRestrictedEdge; // restricted edge-vertex kernel (catmark scheme)
|
||||
|
||||
GLuint _subComputeBilinearEdge; // edge-vertex kernel (bilinear scheme)
|
||||
|
||||
GLuint _subComputeVertex; // vertex-vertex kernel (bilinear scheme)
|
||||
|
@ -231,7 +231,6 @@ OsdGLSLTransformFeedbackComputeController::ApplyCatmarkTriQuadFaceVerticesKernel
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
OsdGLSLTransformFeedbackComputeController::ApplyCatmarkEdgeVerticesKernel(
|
||||
FarKernelBatch const &batch, OsdGLSLTransformFeedbackComputeContext const *context) const {
|
||||
@ -244,6 +243,18 @@ OsdGLSLTransformFeedbackComputeController::ApplyCatmarkEdgeVerticesKernel(
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
}
|
||||
|
||||
void
|
||||
OsdGLSLTransformFeedbackComputeController::ApplyCatmarkRestrictedEdgeVerticesKernel(
|
||||
FarKernelBatch const &batch, OsdGLSLTransformFeedbackComputeContext const *context) const {
|
||||
|
||||
assert(context);
|
||||
|
||||
_currentBindState.kernelBundle->ApplyCatmarkRestrictedEdgeVerticesKernel(
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset,
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
}
|
||||
|
||||
void
|
||||
OsdGLSLTransformFeedbackComputeController::ApplyCatmarkVertexVerticesKernelB(
|
||||
FarKernelBatch const &batch, OsdGLSLTransformFeedbackComputeContext const *context) const {
|
||||
|
@ -133,6 +133,8 @@ protected:
|
||||
|
||||
void ApplyCatmarkEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkRestrictedEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkVertexVerticesKernelB(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkVertexVerticesKernelA1(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
@ -284,6 +284,35 @@ void catmarkComputeEdge()
|
||||
writeVertex(dst);
|
||||
}
|
||||
|
||||
// Restricted edge-vertices compute Kernel
|
||||
subroutine(computeKernelType)
|
||||
void catmarkComputeRestrictedEdge()
|
||||
{
|
||||
int i = gl_VertexID + indexStart + tableOffset;
|
||||
|
||||
Vertex dst;
|
||||
clear(dst);
|
||||
|
||||
#ifdef OPT_E0_IT_VEC4
|
||||
ivec4 eidx = texelFetch(_E0_IT, i);
|
||||
#else
|
||||
int eidx0 = texelFetch(_E0_IT, 4*i+0).x;
|
||||
int eidx1 = texelFetch(_E0_IT, 4*i+1).x;
|
||||
int eidx2 = texelFetch(_E0_IT, 4*i+2).x;
|
||||
int eidx3 = texelFetch(_E0_IT, 4*i+3).x;
|
||||
ivec4 eidx = ivec4(eidx0, eidx1, eidx2, eidx3);
|
||||
#endif
|
||||
|
||||
addWithWeight(dst, readVertex(eidx.x), 0.25f);
|
||||
addWithWeight(dst, readVertex(eidx.y), 0.25f);
|
||||
addWithWeight(dst, readVertex(eidx.z), 0.25f);
|
||||
addWithWeight(dst, readVertex(eidx.w), 0.25f);
|
||||
addVaryingWithWeight(dst, readVertex(eidx.x), 0.5f);
|
||||
addVaryingWithWeight(dst, readVertex(eidx.y), 0.5f);
|
||||
|
||||
writeVertex(dst);
|
||||
}
|
||||
|
||||
// Edge-vertices compute Kernel (bilinear scheme)
|
||||
subroutine(computeKernelType)
|
||||
void bilinearComputeEdge()
|
||||
|
@ -228,6 +228,7 @@ OsdGLSLTransformFeedbackKernelBundle::Compile(
|
||||
_subComputeQuadFace = glGetSubroutineIndex(_program, GL_VERTEX_SHADER, "catmarkComputeQuadFace");
|
||||
_subComputeTriQuadFace = glGetSubroutineIndex(_program, GL_VERTEX_SHADER, "catmarkComputeTriQuadFace");
|
||||
_subComputeEdge = glGetSubroutineIndex(_program, GL_VERTEX_SHADER, "catmarkComputeEdge");
|
||||
_subComputeRestrictedEdge = glGetSubroutineIndex(_program, GL_VERTEX_SHADER, "catmarkComputeRestrictedEdge");
|
||||
_subComputeBilinearEdge = glGetSubroutineIndex(_program, GL_VERTEX_SHADER, "bilinearComputeEdge");
|
||||
_subComputeVertex = glGetSubroutineIndex(_program, GL_VERTEX_SHADER, "bilinearComputeVertex");
|
||||
_subComputeVertexA = glGetSubroutineIndex(_program, GL_VERTEX_SHADER, "catmarkComputeVertexA");
|
||||
@ -406,6 +407,18 @@ OsdGLSLTransformFeedbackKernelBundle::ApplyCatmarkEdgeVerticesKernel(
|
||||
offset, tableOffset, start, end);
|
||||
}
|
||||
|
||||
void
|
||||
OsdGLSLTransformFeedbackKernelBundle::ApplyCatmarkRestrictedEdgeVerticesKernel(
|
||||
GLuint vertexBuffer, GLuint varyingBuffer,
|
||||
int vertexOffset, int varyingOffset,
|
||||
int offset, int tableOffset, int start, int end) {
|
||||
|
||||
glUniformSubroutinesuiv(GL_VERTEX_SHADER, 1, &_subComputeRestrictedEdge);
|
||||
transformGpuBufferData(vertexBuffer, varyingBuffer,
|
||||
vertexOffset, varyingOffset,
|
||||
offset, tableOffset, start, end);
|
||||
}
|
||||
|
||||
void
|
||||
OsdGLSLTransformFeedbackKernelBundle::ApplyCatmarkVertexVerticesKernelB(
|
||||
GLuint vertexBuffer, GLuint varyingBuffer,
|
||||
|
@ -83,6 +83,11 @@ public:
|
||||
int vertexOffset, int varyingOffset,
|
||||
int offset, int tableOffset, int start, int end);
|
||||
|
||||
void ApplyCatmarkRestrictedEdgeVerticesKernel(
|
||||
GLuint vertexBuffer, GLuint varyingBuffer,
|
||||
int vertexOffset, int varyingOffset,
|
||||
int offset, int tableOffset, int start, int end);
|
||||
|
||||
void ApplyCatmarkVertexVerticesKernelB(
|
||||
GLuint vertexBuffer, GLuint varyingBuffer,
|
||||
int vertexOffset, int varyingOffset,
|
||||
@ -191,12 +196,14 @@ protected:
|
||||
|
||||
GLuint _subComputeFace; // general face-vertex kernel (all schemes)
|
||||
|
||||
GLuint _subComputeQuadFace; // quad face-vertex kernel (catmark)
|
||||
GLuint _subComputeQuadFace; // quad face-vertex kernel (catmark scheme)
|
||||
|
||||
GLuint _subComputeTriQuadFace; // tri-quad face-vertex kernel (catmark)
|
||||
GLuint _subComputeTriQuadFace; // tri-quad face-vertex kernel (catmark scheme)
|
||||
|
||||
GLuint _subComputeEdge; // edge-vertex kernel (catmark + loop schemes)
|
||||
|
||||
GLuint _subComputeRestrictedEdge; // restricted edge-vertex kernel (catmark scheme)
|
||||
|
||||
GLuint _subComputeBilinearEdge; // edge-vertex kernel (bilinear scheme)
|
||||
|
||||
GLuint _subComputeVertex; // vertex-vertex kernel (bilinear scheme)
|
||||
|
@ -271,6 +271,36 @@ void runKernel( uint3 ID )
|
||||
}
|
||||
};
|
||||
|
||||
// Restricted edge-vertices compute Kernel
|
||||
class CatmarkComputeRestrictedEdge : IComputeKernel {
|
||||
int placeholder;
|
||||
void runKernel( uint3 ID )
|
||||
{
|
||||
int i = int(ID.x) + indexStart;
|
||||
if (i >= indexEnd) return;
|
||||
int vid = i + vertexOffset;
|
||||
i += tableOffset;
|
||||
|
||||
Vertex dst;
|
||||
clear(dst);
|
||||
|
||||
int eidx0 = _E_IT[4*i+0];
|
||||
int eidx1 = _E_IT[4*i+1];
|
||||
int eidx2 = _E_IT[4*i+2];
|
||||
int eidx3 = _E_IT[4*i+3];
|
||||
int4 eidx = int4(eidx0, eidx1, eidx2, eidx3);
|
||||
|
||||
addWithWeight(dst, readVertex(eidx.x), 0.25f);
|
||||
addWithWeight(dst, readVertex(eidx.y), 0.25f);
|
||||
addWithWeight(dst, readVertex(eidx.z), 0.25f);
|
||||
addWithWeight(dst, readVertex(eidx.w), 0.25f);
|
||||
addVaryingWithWeight(dst, readVertex(eidx.x), 0.5f);
|
||||
addVaryingWithWeight(dst, readVertex(eidx.y), 0.5f);
|
||||
|
||||
writeVertex(vid, dst);
|
||||
}
|
||||
};
|
||||
|
||||
// Edge-vertices compute Kernel (bilinear scheme)
|
||||
class BilinearComputeEdge : IComputeKernel {
|
||||
int placeholder;
|
||||
@ -458,6 +488,7 @@ CatmarkComputeFace catmarkComputeFace;
|
||||
CatmarkComputeQuadFace catmarkComputeQuadFace;
|
||||
CatmarkComputeTriQuadFace catmarkComputeTriQuadFace;
|
||||
CatmarkComputeEdge catmarkComputeEdge;
|
||||
CatmarkComputeRestrictedEdge catmarkComputeRestrictedEdge;
|
||||
BilinearComputeEdge bilinearComputeEdge;
|
||||
BilinearComputeVertex bilinearComputeVertex;
|
||||
CatmarkComputeVertexA catmarkComputeVertexA;
|
||||
|
@ -134,6 +134,19 @@ OsdOmpComputeController::ApplyCatmarkEdgeVerticesKernel(
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
}
|
||||
|
||||
void
|
||||
OsdOmpComputeController::ApplyCatmarkRestrictedEdgeVerticesKernel(
|
||||
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {
|
||||
|
||||
assert(context);
|
||||
|
||||
OsdOmpComputeRestrictedEdge(
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
|
||||
(const int*)context->GetTable(FarSubdivisionTables::E_IT)->GetBuffer(),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
}
|
||||
|
||||
void
|
||||
OsdOmpComputeController::ApplyCatmarkVertexVerticesKernelB(
|
||||
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {
|
||||
|
@ -134,6 +134,8 @@ protected:
|
||||
|
||||
void ApplyCatmarkEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkRestrictedEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkVertexVerticesKernelB(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkVertexVerticesKernelA1(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
@ -243,6 +243,47 @@ void OsdOmpComputeEdge(
|
||||
}
|
||||
}
|
||||
|
||||
void OsdOmpComputeRestrictedEdge(
|
||||
float * vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *E_IT, int offset, int tableOffset, int start, int end) {
|
||||
|
||||
int numThreads = omp_get_max_threads();
|
||||
float *vertexResultsArray = (float*)alloca(vertexDesc.length * sizeof(float) * numThreads);
|
||||
float *varyingResultsArray = (float*)alloca(varyingDesc.length * sizeof(float) * numThreads);
|
||||
|
||||
#pragma omp parallel for
|
||||
for (int i = start + tableOffset; i < end + tableOffset; i++) {
|
||||
int eidx0 = E_IT[4*i+0];
|
||||
int eidx1 = E_IT[4*i+1];
|
||||
int eidx2 = E_IT[4*i+2];
|
||||
int eidx3 = E_IT[4*i+3];
|
||||
|
||||
int dstIndex = offset + i - tableOffset;
|
||||
|
||||
int threadId = omp_get_thread_num();
|
||||
float *vertexResults = vertexResultsArray +
|
||||
vertexDesc.length * threadId;
|
||||
float *varyingResults = varyingResultsArray +
|
||||
varyingDesc.length * threadId;
|
||||
|
||||
// clear
|
||||
clear(vertexResults, vertexDesc);
|
||||
clear(varyingResults, varyingDesc);
|
||||
|
||||
addWithWeight(vertexResults, vertex, eidx0, 0.25f, vertexDesc);
|
||||
addWithWeight(vertexResults, vertex, eidx1, 0.25f, vertexDesc);
|
||||
addWithWeight(vertexResults, vertex, eidx2, 0.25f, vertexDesc);
|
||||
addWithWeight(vertexResults, vertex, eidx3, 0.25f, vertexDesc);
|
||||
addWithWeight(varyingResults, varying, eidx0, 0.5f, varyingDesc);
|
||||
addWithWeight(varyingResults, varying, eidx1, 0.5f, varyingDesc);
|
||||
|
||||
copy(vertex, vertexResults, dstIndex, vertexDesc);
|
||||
copy(varying, varyingResults, dstIndex, varyingDesc);
|
||||
}
|
||||
}
|
||||
|
||||
void OsdOmpComputeVertexA(
|
||||
float * vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
|
@ -61,6 +61,13 @@ void OsdOmpComputeEdge(float *vertex, float * varying,
|
||||
int vertexOffset, int tableOffset,
|
||||
int start, int end);
|
||||
|
||||
void OsdOmpComputeRestrictedEdge(float *vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *E_IT,
|
||||
int vertexOffset, int tableOffset,
|
||||
int start, int end);
|
||||
|
||||
void OsdOmpComputeVertexA(float *vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
|
@ -140,6 +140,19 @@ OsdTbbComputeController::ApplyCatmarkEdgeVerticesKernel(
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
}
|
||||
|
||||
void
|
||||
OsdTbbComputeController::ApplyCatmarkRestrictedEdgeVerticesKernel(
|
||||
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {
|
||||
|
||||
assert(context);
|
||||
|
||||
OsdTbbComputeRestrictedEdge(
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
|
||||
(const int*)context->GetTable(FarSubdivisionTables::E_IT)->GetBuffer(),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
}
|
||||
|
||||
void
|
||||
OsdTbbComputeController::ApplyCatmarkVertexVerticesKernelB(
|
||||
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {
|
||||
|
@ -126,6 +126,8 @@ protected:
|
||||
|
||||
void ApplyCatmarkEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkRestrictedEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkVertexVerticesKernelB(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
void ApplyCatmarkVertexVerticesKernelA1(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
@ -378,7 +378,6 @@ public:
|
||||
{};
|
||||
};
|
||||
|
||||
|
||||
void OsdTbbComputeEdge(
|
||||
float *vertex, float *varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
@ -391,6 +390,76 @@ void OsdTbbComputeEdge(
|
||||
tbb::parallel_for(range, kernel);
|
||||
}
|
||||
|
||||
class TBBRestrictedEdgeKernel {
|
||||
float *vertex;
|
||||
float *varying;
|
||||
OsdVertexBufferDescriptor vertexDesc;
|
||||
OsdVertexBufferDescriptor varyingDesc;
|
||||
int const *E_IT;
|
||||
int vertexOffset;
|
||||
int tableOffset;
|
||||
|
||||
public:
|
||||
void operator() (tbb::blocked_range<int> const &r) const {
|
||||
for (int i = r.begin() + tableOffset; i < r.end() + tableOffset; i++) {
|
||||
int eidx0 = E_IT[4*i+0];
|
||||
int eidx1 = E_IT[4*i+1];
|
||||
int eidx2 = E_IT[4*i+2];
|
||||
int eidx3 = E_IT[4*i+3];
|
||||
|
||||
int dstIndex = i + vertexOffset - tableOffset;
|
||||
clear(vertex, dstIndex, vertexDesc);
|
||||
clear(varying, dstIndex, varyingDesc);
|
||||
|
||||
addWithWeight(vertex, dstIndex, eidx0, 0.25f, vertexDesc);
|
||||
addWithWeight(vertex, dstIndex, eidx1, 0.25f, vertexDesc);
|
||||
addWithWeight(vertex, dstIndex, eidx2, 0.25f, vertexDesc);
|
||||
addWithWeight(vertex, dstIndex, eidx3, 0.25f, vertexDesc);
|
||||
addWithWeight(varying, dstIndex, eidx0, 0.5f, varyingDesc);
|
||||
addWithWeight(varying, dstIndex, eidx1, 0.5f, varyingDesc);
|
||||
}
|
||||
}
|
||||
|
||||
TBBRestrictedEdgeKernel(TBBRestrictedEdgeKernel const &other)
|
||||
{
|
||||
this->vertex = other.vertex;
|
||||
this->varying= other.varying;
|
||||
this->vertexDesc = other.vertexDesc;
|
||||
this->varyingDesc = other.varyingDesc;
|
||||
this->E_IT = other.E_IT;
|
||||
this->vertexOffset = other.vertexOffset;
|
||||
this->tableOffset = other.tableOffset;
|
||||
}
|
||||
|
||||
TBBRestrictedEdgeKernel(float *vertex_in,
|
||||
float *varying_in,
|
||||
OsdVertexBufferDescriptor const &vertexDesc_in,
|
||||
OsdVertexBufferDescriptor const &varyingDesc_in,
|
||||
int const *E_IT_in,
|
||||
int vertexOffset_in,
|
||||
int tableOffset_in) :
|
||||
vertex (vertex_in),
|
||||
varying(varying_in),
|
||||
vertexDesc(vertexDesc_in),
|
||||
varyingDesc(varyingDesc_in),
|
||||
E_IT (E_IT_in),
|
||||
vertexOffset(vertexOffset_in),
|
||||
tableOffset(tableOffset_in)
|
||||
{};
|
||||
};
|
||||
|
||||
void OsdTbbComputeRestrictedEdge(
|
||||
float *vertex, float *varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
int const *E_IT, int vertexOffset, int tableOffset,
|
||||
int start, int end) {
|
||||
tbb::blocked_range<int> range(start, end, grain_size);
|
||||
TBBRestrictedEdgeKernel kernel(vertex, varying, vertexDesc, varyingDesc, E_IT,
|
||||
vertexOffset, tableOffset);
|
||||
tbb::parallel_for(range, kernel);
|
||||
}
|
||||
|
||||
class TBBVertexKernelA {
|
||||
float *vertex;
|
||||
float *varying;
|
||||
|
@ -56,10 +56,17 @@ void OsdTbbComputeTriQuadFace(float * vertex, float * varying,
|
||||
void OsdTbbComputeEdge(float *vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
int const *E_IT, float const *E_ITa,
|
||||
int const *E_IT, float const *E_W,
|
||||
int vertexOffset, int tableOffset,
|
||||
int start, int end);
|
||||
|
||||
void OsdTbbComputeRestrictedEdge(float *vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
int const *E_IT,
|
||||
int vertexOffset, int tableOffset,
|
||||
int start, int end);
|
||||
|
||||
void OsdTbbComputeVertexA(float *vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
|
Loading…
Reference in New Issue
Block a user