Merge pull request #310 from nathan-at-digitalfish/new_edge_vertex_kernel

New edge vertex kernel
This commit is contained in:
Manuel Kraemer 2014-06-23 15:05:49 -07:00
commit af3424e1da
46 changed files with 811 additions and 44 deletions

View File

@ -74,23 +74,39 @@ FarCatmarkSubdivisionTablesFactory<T,U>::Create( FarMeshFactory<T,U> * meshFacto
FarSubdivisionTables * result = new FarSubdivisionTables(maxlevel, FarSubdivisionTables::CATMARK);
// Calculate the size of the face-vertex index table
// Calculate the size of the face-vertex indexing tables
int minCoarseFaceValence = tablesFactory.GetMinCoarseFaceValence();
int maxCoarseFaceValence = tablesFactory.GetMaxCoarseFaceValence();
bool coarseMeshAllQuadFaces = minCoarseFaceValence == 4 && maxCoarseFaceValence == 4;
bool coarseMeshAllTriQuadFaces = minCoarseFaceValence >= 3 && maxCoarseFaceValence <= 4;
bool hasQuadFaceVertexKernel = meshFactory->IsKernelTypeSupported(FarKernelBatch::CATMARK_QUAD_FACE_VERTEX);
bool hasTriQuadFaceVertexKernel = meshFactory->IsKernelTypeSupported(FarKernelBatch::CATMARK_TRI_QUAD_FACE_VERTEX);
int F_ITa_size = 0;
if (!hasQuadFaceVertexKernel && !hasTriQuadFaceVertexKernel)
F_ITa_size = tablesFactory.GetNumFaceVerticesTotal(maxlevel) * 2;
else if (!coarseMeshAllTriQuadFaces || !hasTriQuadFaceVertexKernel)
F_ITa_size = tablesFactory.GetNumFaceVerticesTotal(1) * 2;
int F_IT_size = tablesFactory.GetFaceVertsValenceSum();
if (coarseMeshAllTriQuadFaces)
if (coarseMeshAllTriQuadFaces && hasTriQuadFaceVertexKernel)
F_IT_size += tablesFactory.GetNumCoarseTriangleFaces(); // add padding for tri faces
// Triangular interpolation mode :
// see "smoothtriangle" tag introduced in prman 3.9 and HbrCatmarkSubdivision<T>
typename HbrCatmarkSubdivision<T>::TriangleSubdivision triangleMethod =
dynamic_cast<HbrCatmarkSubdivision<T> *>(meshFactory->GetHbrMesh()->GetSubdivision())->GetTriangleSubdivisionMethod();
bool hasFractionalEdgeSharpness = tablesFactory.HasFractionalEdgeSharpness();
bool useRestrictedEdgeVertexKernel = meshFactory->IsKernelTypeSupported(FarKernelBatch::CATMARK_RESTRICTED_EDGE_VERTEX);;
useRestrictedEdgeVertexKernel &= !hasFractionalEdgeSharpness && triangleMethod != HbrCatmarkSubdivision<T>::k_New;
// Allocate memory for the indexing tables
if (!coarseMeshAllTriQuadFaces)
result->_F_ITa.resize(tablesFactory.GetNumFaceVerticesTotal(1) * 2);
result->_F_ITa.resize(F_ITa_size);
result->_F_IT.resize(F_IT_size);
result->_E_IT.resize(tablesFactory.GetNumEdgeVerticesTotal(maxlevel)*4);
result->_E_W.resize(tablesFactory.GetNumEdgeVerticesTotal(maxlevel)*2);
if (!useRestrictedEdgeVertexKernel)
result->_E_W.resize(tablesFactory.GetNumEdgeVerticesTotal(maxlevel)*2);
result->_V_ITa.resize((tablesFactory.GetNumVertexVerticesTotal(maxlevel)
- tablesFactory.GetNumVertexVerticesTotal(0))*5); // subtract coarse cage vertices
@ -127,20 +143,22 @@ FarCatmarkSubdivisionTablesFactory<T,U>::Create( FarMeshFactory<T,U> * meshFacto
int nFaceVertices = (int)tablesFactory._faceVertsList[level].size();
// choose the kernel type that best fits the face topology
int kernelType;
int kernelType = FarKernelBatch::CATMARK_FACE_VERTEX;
if (level == 1) {
if (coarseMeshAllQuadFaces)
if (coarseMeshAllQuadFaces && hasQuadFaceVertexKernel)
kernelType = FarKernelBatch::CATMARK_QUAD_FACE_VERTEX;
else if (coarseMeshAllTriQuadFaces)
else if (coarseMeshAllTriQuadFaces && hasTriQuadFaceVertexKernel)
kernelType = FarKernelBatch::CATMARK_TRI_QUAD_FACE_VERTEX;
else
kernelType = FarKernelBatch::CATMARK_FACE_VERTEX;
} else {
kernelType = FarKernelBatch::CATMARK_QUAD_FACE_VERTEX;
if (hasQuadFaceVertexKernel)
kernelType = FarKernelBatch::CATMARK_QUAD_FACE_VERTEX;
if (hasTriQuadFaceVertexKernel)
kernelType = FarKernelBatch::CATMARK_TRI_QUAD_FACE_VERTEX;
}
// add a batch for face vertices
if (nFaceVertices > 0) { // in torus case, nfacevertices could be zero
assert(meshFactory->IsKernelTypeSupported(kernelType));
if (kernelType == FarKernelBatch::CATMARK_FACE_VERTEX) {
batches->push_back(FarKernelBatch( kernelType,
level,
@ -189,19 +207,19 @@ FarCatmarkSubdivisionTablesFactory<T,U>::Create( FarMeshFactory<T,U> * meshFacto
// Edge vertices
// Triangular interpolation mode :
// see "smoothtriangle" tag introduced in prman 3.9 and HbrCatmarkSubdivision<T>
typename HbrCatmarkSubdivision<T>::TriangleSubdivision triangleMethod =
dynamic_cast<HbrCatmarkSubdivision<T> *>(meshFactory->GetHbrMesh()->GetSubdivision())->GetTriangleSubdivisionMethod();
// "For each vertex, gather the 2 vertices from the parent edege and the
// 2 child vertices from the faces to the left and right of that edge.
// Adjust if edge has a crease or is on a boundary."
int nEdgeVertices = (int)tablesFactory._edgeVertsList[level].size();
// add a batch for edge vertices
kernelType = (useRestrictedEdgeVertexKernel ?
FarKernelBatch::CATMARK_RESTRICTED_EDGE_VERTEX :
FarKernelBatch::CATMARK_EDGE_VERTEX);
if (nEdgeVertices > 0)
batches->push_back(FarKernelBatch( FarKernelBatch::CATMARK_EDGE_VERTEX,
assert(meshFactory->IsKernelTypeSupported(kernelType));
batches->push_back(FarKernelBatch( kernelType,
level,
0,
0,
@ -227,8 +245,20 @@ FarCatmarkSubdivisionTablesFactory<T,U>::Create( FarMeshFactory<T,U> * meshFacto
float faceWeight=0.5f, vertWeight=0.5f;
// in the case of a fractional sharpness, set the adjacent faces vertices
if (!e->IsBoundary() && esharp <= 1.0f) {
if (kernelType == FarKernelBatch::CATMARK_RESTRICTED_EDGE_VERTEX) {
// in the case of a sharp edge, repeat the endpoint vertices
if (!e->IsBoundary() && esharp == HbrHalfedge<T>::k_Smooth) {
HbrFace<T>* rf = e->GetRightFace();
HbrFace<T>* lf = e->GetLeftFace();
E_IT[4*i+2] = remap[lf->Subdivide()->GetID()];
E_IT[4*i+3] = remap[rf->Subdivide()->GetID()];
} else {
E_IT[4*i+2] = E_IT[4*i+0];
E_IT[4*i+3] = E_IT[4*i+1];
}
} else if (!e->IsBoundary() && esharp <= HbrHalfedge<T>::k_Sharp) {
// in the case of a fractional sharpness, set the adjacent faces vertices
float leftWeight, rightWeight;
HbrFace<T>* rf = e->GetRightFace();
@ -250,11 +280,14 @@ FarCatmarkSubdivisionTablesFactory<T,U>::Create( FarMeshFactory<T,U> * meshFacto
E_IT[4*i+2] = -1;
E_IT[4*i+3] = -1;
}
E_W[2*i+0] = vertWeight;
E_W[2*i+1] = faceWeight;
if (kernelType == FarKernelBatch::CATMARK_EDGE_VERTEX) {
E_W[2*i+0] = vertWeight;
E_W[2*i+1] = faceWeight;
}
}
E_IT += 4 * nEdgeVertices;
E_W += 2 * nEdgeVertices;
if (kernelType == FarKernelBatch::CATMARK_EDGE_VERTEX)
E_W += 2 * nEdgeVertices;
// Vertex vertices

View File

@ -96,6 +96,9 @@ FarDispatcher::ApplyKernel(CONTROLLER *controller, CONTEXT *context, FarKernelBa
case FarKernelBatch::CATMARK_EDGE_VERTEX:
controller->ApplyCatmarkEdgeVerticesKernel(batch, context);
break;
case FarKernelBatch::CATMARK_RESTRICTED_EDGE_VERTEX:
controller->ApplyCatmarkRestrictedEdgeVerticesKernel(batch, context);
break;
case FarKernelBatch::CATMARK_VERT_VERTEX_B:
controller->ApplyCatmarkVertexVerticesKernelB(batch, context);
break;
@ -185,6 +188,9 @@ public:
template <class CONTEXT>
void ApplyCatmarkEdgeVerticesKernel(FarKernelBatch const &batch, CONTEXT *context) const;
template <class CONTEXT>
void ApplyCatmarkRestrictedEdgeVerticesKernel(FarKernelBatch const &batch, CONTEXT *context) const;
template <class CONTEXT>
void ApplyCatmarkVertexVerticesKernelB(FarKernelBatch const &batch, CONTEXT *context) const;
@ -325,6 +331,21 @@ FarComputeController::ApplyCatmarkEdgeVerticesKernel(FarKernelBatch const &batch
vsrc );
}
template <class CONTEXT> void
FarComputeController::ApplyCatmarkRestrictedEdgeVerticesKernel(FarKernelBatch const &batch, CONTEXT *context) const {
typename CONTEXT::VertexType *vsrc = &context->GetVertices().at(0);
FarSubdivisionTables const * subdivision = context->GetSubdivisionTables();
assert(subdivision);
subdivision->computeCatmarkRestrictedEdgePoints( batch.GetVertexOffset(),
batch.GetTableOffset(),
batch.GetStart(),
batch.GetEnd(),
vsrc );
}
template <class CONTEXT> void
FarComputeController::ApplyCatmarkVertexVerticesKernelB(FarKernelBatch const &batch, CONTEXT *context) const {

View File

@ -65,10 +65,12 @@ class FarKernelBatch {
public:
enum KernelType {
CATMARK_FACE_VERTEX = 1,
FIRST_KERNEL_TYPE = 1,
CATMARK_FACE_VERTEX = FIRST_KERNEL_TYPE,
CATMARK_QUAD_FACE_VERTEX,
CATMARK_TRI_QUAD_FACE_VERTEX,
CATMARK_EDGE_VERTEX,
CATMARK_RESTRICTED_EDGE_VERTEX,
CATMARK_VERT_VERTEX_A1,
CATMARK_VERT_VERTEX_A2,
CATMARK_VERT_VERTEX_B,
@ -80,7 +82,8 @@ public:
BILINEAR_EDGE_VERTEX,
BILINEAR_VERT_VERTEX,
HIERARCHICAL_EDIT,
USER_DEFINED_KERNEL_START
NUM_KERNEL_TYPES,
USER_DEFINED_KERNEL_START = NUM_KERNEL_TYPES
};
/// \brief Constructor.

View File

@ -93,8 +93,13 @@ public:
/// @param patchType The type of patch to create: QUADS or TRIANGLES
/// Note : patchType is only applicable if adaptive is false
///
/// @param kernelTypes A zero-terminated list of kernel types supported by the
/// controller.
/// Note : NULL indicates that all kernel types are supported
///
FarMeshFactory(HbrMesh<T> * mesh, int maxlevel, bool adaptive=false, int firstLevel=-1,
FarPatchTables::Type patchType=FarPatchTables::QUADS);
FarPatchTables::Type patchType=FarPatchTables::QUADS,
const int * kernelTypes = NULL);
/// \brief Create a table-based mesh representation
///
@ -150,13 +155,24 @@ public:
///
int GetVertexID( HbrVertex<T> * v );
/// \brief Returns a the mapping between HbrVertex<T>->GetID() and Far
/// \brief Returns the mapping between HbrVertex<T>->GetID() and Far
/// vertices indices
///
/// @return the table that maps HbrMesh to FarMesh vertex indices
///
std::vector<int> const & GetRemappingTable( ) const { return _remapTable; }
/// \brief Returns true if the specified kernel type is supported by the
/// controller
///
/// @return true if the kernel type is supported
///
bool IsKernelTypeSupported(int kernelType) const {
assert(kernelType >= FarKernelBatch::FIRST_KERNEL_TYPE &&
kernelType < FarKernelBatch::NUM_KERNEL_TYPES);
return _supportedKernelTypes[kernelType];
}
private:
friend class FarBilinearSubdivisionTablesFactory<T,U>;
friend class FarCatmarkSubdivisionTablesFactory<T,U>;
@ -231,6 +247,8 @@ private:
FarPatchTables::Type _patchType;
bool _supportedKernelTypes[FarKernelBatch::NUM_KERNEL_TYPES];
// remapping table to translate vertex ID's between Hbr indices and the
// order of the same vertices in the tables
std::vector<int> _remapTable;
@ -614,7 +632,8 @@ FarMeshFactory<T,U>::refineAdaptive( HbrMesh<T> * mesh, int maxIsolate ) {
// random order, so the builder runs 2 passes over the entire vertex list to
// gather the counters needed to generate the indexing tables.
template <class T, class U>
FarMeshFactory<T,U>::FarMeshFactory( HbrMesh<T> * mesh, int maxlevel, bool adaptive, int firstlevel, FarPatchTables::Type patchType ) :
FarMeshFactory<T,U>::FarMeshFactory( HbrMesh<T> * mesh, int maxlevel, bool adaptive,
int firstlevel, FarPatchTables::Type patchType, const int * kernelTypes ) :
_hbrMesh(mesh),
_adaptive(adaptive),
_maxlevel(maxlevel),
@ -630,6 +649,17 @@ FarMeshFactory<T,U>::FarMeshFactory( HbrMesh<T> * mesh, int maxlevel, bool adapt
_numCoarseVertices = mesh->GetNumVertices();
_numPtexFaces = getNumPtexFaces(mesh);
// Select the kernel types that are supported by the controller.
for (int i = FarKernelBatch::FIRST_KERNEL_TYPE; i < FarKernelBatch::NUM_KERNEL_TYPES; ++i) {
_supportedKernelTypes[i] = kernelTypes ? false : true;
}
for (int i = kernelTypes ? *kernelTypes++ : 0; i; i = *kernelTypes++) {
assert(i >= FarKernelBatch::FIRST_KERNEL_TYPE &&
i < FarKernelBatch::NUM_KERNEL_TYPES);
_supportedKernelTypes[i] = true;
}
// Subdivide the Hbr mesh up to maxlevel.
//
// Note : using a placeholder vertex class 'T' can greatly speed up the

View File

@ -175,6 +175,10 @@ public:
template <class U>
void computeCatmarkEdgePoints(int vertexOffset, int tableOffset, int start, int end, U * vsrc) const;
// Compute-kernel applied to vertices resulting from the refinement of a smooth or sharp edge.
template <class U>
void computeCatmarkRestrictedEdgePoints(int vertexOffset, int tableOffset, int start, int end, U * vsrc) const;
// Compute-kernel applied to vertices resulting from the refinement of a vertex
// Kernel "A" Handles the k_Crease and k_Corner rules
template <class U>
@ -471,6 +475,33 @@ FarSubdivisionTables::computeCatmarkEdgePoints( int vertexOffset, int tableOffse
}
}
//
// Restricted edge-vertices compute Kernel - completely re-entrant
//
template <class U> void
FarSubdivisionTables::computeCatmarkRestrictedEdgePoints( int vertexOffset, int tableOffset, int start, int end, U * vsrc ) const {
U * vdst = vsrc + vertexOffset + start;
for (int i=start+tableOffset; i<end+tableOffset; ++i, ++vdst ) {
vdst->Clear();
int eidx0 = this->_E_IT[4*i+0],
eidx1 = this->_E_IT[4*i+1],
eidx2 = this->_E_IT[4*i+2],
eidx3 = this->_E_IT[4*i+3];
vdst->AddWithWeight( vsrc[eidx0], 0.25f );
vdst->AddWithWeight( vsrc[eidx1], 0.25f );
vdst->AddWithWeight( vsrc[eidx2], 0.25f );
vdst->AddWithWeight( vsrc[eidx3], 0.25f );
vdst->AddVaryingWithWeight( vsrc[eidx0], 0.5f );
vdst->AddVaryingWithWeight( vsrc[eidx1], 0.5f );
}
}
//
// Vertex-vertices compute Kernels "A" and "B" - completely re-entrant
//

View File

@ -106,6 +106,8 @@ protected:
// Returns an integer based on the order in which the kernels are applied
static int GetMaskRanking( unsigned char mask0, unsigned char mask1 );
bool HasFractionalEdgeSharpness() const { return _hasFractionalEdgeSharpness; }
// Per-level counters and offsets for each type of vertex (face,edge,vert)
std::vector<int> _faceVertIdx,
_edgeVertIdx,
@ -128,6 +130,9 @@ protected:
// Number of coarse triangle faces
int _numCoarseTriangleFaces;
// Indicates if an edge has a fractional (non-integer) sharpness
bool _hasFractionalEdgeSharpness;
private:
// Returns the subdivision level of a vertex
@ -155,7 +160,8 @@ FarSubdivisionTablesFactory<T,U>::FarSubdivisionTablesFactory( HbrMesh<T> const
_vertVertsList(maxlevel+1),
_minCoarseFaceValence(0),
_maxCoarseFaceValence(0),
_numCoarseTriangleFaces(0)
_numCoarseTriangleFaces(0),
_hasFractionalEdgeSharpness(false)
{
assert( mesh );
@ -199,9 +205,14 @@ FarSubdivisionTablesFactory<T,U>::FarSubdivisionTablesFactory( HbrMesh<T> const
if (valence == 3)
++_numCoarseTriangleFaces;
}
} else if (v->GetParentEdge())
} else if (v->GetParentEdge()) {
edgeCounts[depth]++;
else if (v->GetParentVertex()) {
// Determine if any edges have fractional sharpness.
float sharpness = v->GetParentEdge()->GetSharpness();
if (sharpness > HbrHalfedge<T>::k_Smooth && sharpness < HbrHalfedge<T>::k_Sharp)
_hasFractionalEdgeSharpness = true;
} else if (v->GetParentVertex()) {
vertCounts[depth]++;
_vertVertsValenceSum+=sumVertVertexValence(v);
}
@ -461,6 +472,10 @@ FarSubdivisionTablesFactory<T,U>::Splice(FarMeshVector const &meshes, FarKernelB
}
}
// pad E_W to align with E_IT when only some meshes use CATMARK_RESTRICTED_EDGE_VERTEX kernel
if (total_E_W != 0)
total_E_W = total_E_IT / 2;
FarSubdivisionTables *result = new FarSubdivisionTables(maxLevel, scheme);
result->_F_ITa.resize(total_F_ITa);
@ -543,7 +558,10 @@ FarSubdivisionTablesFactory<T,U>::Splice(FarMeshVector const &meshes, FarKernelB
// copy edge tables
E_IT = copyWithOffsetE_IT(E_IT, tables->Get_E_IT(), vertexOffsets[i]);
E_W = copyWithOffset(E_W, tables->Get_E_W(), 0);
if (!tables->Get_E_W().empty())
E_W = copyWithOffset(E_W, tables->Get_E_W(), 0);
else
E_W += tables->Get_E_IT().size() / 2;
// copy vert tables
if (scheme == FarSubdivisionTables::CATMARK or
@ -577,6 +595,7 @@ FarSubdivisionTablesFactory<T,U>::Splice(FarMeshVector const &meshes, FarKernelB
batch._tableOffset += F_IToffsets[i];
} else if (batch._kernelType == FarKernelBatch::CATMARK_EDGE_VERTEX or
batch._kernelType == FarKernelBatch::CATMARK_RESTRICTED_EDGE_VERTEX or
batch._kernelType == FarKernelBatch::LOOP_EDGE_VERTEX or
batch._kernelType == FarKernelBatch::BILINEAR_EDGE_VERTEX) {

View File

@ -266,6 +266,34 @@ OsdCLComputeController::ApplyCatmarkEdgeVerticesKernel(
CL_CHECK_ERROR(ciErrNum, "edge kernel %d\n", ciErrNum);
}
void
OsdCLComputeController::ApplyCatmarkRestrictedEdgeVerticesKernel(
FarKernelBatch const &batch, OsdCLComputeContext const *context) const {
assert(context);
cl_int ciErrNum;
size_t globalWorkSize[1] = { (size_t)(batch.GetEnd() - batch.GetStart()) };
cl_kernel kernel = _currentBindState.kernelBundle->GetCatmarkRestrictedEdgeKernel();
cl_mem E_IT = context->GetTable(FarSubdivisionTables::E_IT)->GetDevicePtr();
clSetKernelArg(kernel, 0, sizeof(cl_mem), &_currentBindState.vertexBuffer);
clSetKernelArg(kernel, 1, sizeof(cl_mem), &_currentBindState.varyingBuffer);
clSetKernelArg(kernel, 2, sizeof(cl_mem), &E_IT);
clSetKernelArg(kernel, 3, sizeof(int), &_currentBindState.vertexDesc.offset);
clSetKernelArg(kernel, 4, sizeof(int), &_currentBindState.varyingDesc.offset);
clSetKernelArg(kernel, 5, sizeof(int), batch.GetVertexOffsetPtr());
clSetKernelArg(kernel, 6, sizeof(int), batch.GetTableOffsetPtr());
clSetKernelArg(kernel, 7, sizeof(int), batch.GetStartPtr());
clSetKernelArg(kernel, 8, sizeof(int), batch.GetEndPtr());
ciErrNum = clEnqueueNDRangeKernel(_clQueue,
kernel, 1, NULL, globalWorkSize,
NULL, 0, NULL, NULL);
CL_CHECK_ERROR(ciErrNum, "restricted edge kernel %d\n", ciErrNum);
}
void
OsdCLComputeController::ApplyCatmarkVertexVerticesKernelB(
FarKernelBatch const &batch, OsdCLComputeContext const *context) const {

View File

@ -143,6 +143,8 @@ protected:
void ApplyCatmarkEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkRestrictedEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkVertexVerticesKernelB(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkVertexVerticesKernelA1(FarKernelBatch const &batch, ComputeContext const *context) const;

View File

@ -306,6 +306,41 @@ __kernel void computeEdge(__global float *vertex,
}
}
__kernel void computeRestrictedEdge(__global float *vertex,
__global float *varying,
__global int *E_IT,
int vertexOffset, int varyingOffset,
int offset, int tableOffset,
int start, int end) {
int i = start + get_global_id(0) + tableOffset;
int vid = start + get_global_id(0) + offset;
int eidx0 = E_IT[4*i+0];
int eidx1 = E_IT[4*i+1];
int eidx2 = E_IT[4*i+2];
int eidx3 = E_IT[4*i+3];
vertex += vertexOffset;
varying += (varying ? varyingOffset :0);
struct Vertex dst;
struct Varying dstVarying;
clearVertex(&dst);
clearVarying(&dstVarying);
addWithWeight(&dst, vertex, eidx0, 0.25f);
addWithWeight(&dst, vertex, eidx1, 0.25f);
addWithWeight(&dst, vertex, eidx2, 0.25f);
addWithWeight(&dst, vertex, eidx3, 0.25f);
writeVertex(vertex, vid, &dst);
if (varying) {
addVaryingWithWeight(&dstVarying, varying, eidx0, 0.5f);
addVaryingWithWeight(&dstVarying, varying, eidx1, 0.5f);
writeVarying(varying, vid, &dstVarying);
}
}
__kernel void computeVertexA(__global float *vertex,
__global float *varying,
__global int *V_ITa,

View File

@ -54,6 +54,7 @@ OsdCLKernelBundle::OsdCLKernelBundle() :
_clCatmarkQuadFace(NULL),
_clCatmarkTriQuadFace(NULL),
_clCatmarkEdge(NULL),
_clCatmarkRestrictedEdge(NULL),
_clCatmarkVertexA(NULL),
_clCatmarkVertexB(NULL),
_clLoopEdge(NULL),
@ -80,6 +81,8 @@ OsdCLKernelBundle::~OsdCLKernelBundle() {
clReleaseKernel(_clCatmarkTriQuadFace);
if (_clCatmarkEdge)
clReleaseKernel(_clCatmarkEdge);
if (_clCatmarkRestrictedEdge)
clReleaseKernel(_clCatmarkRestrictedEdge);
if (_clCatmarkVertexA)
clReleaseKernel(_clCatmarkVertexA);
if (_clCatmarkVertexB)
@ -157,6 +160,7 @@ OsdCLKernelBundle::Compile(cl_context clContext,
_clCatmarkQuadFace = buildKernel(_clProgram, "computeQuadFace");
_clCatmarkTriQuadFace = buildKernel(_clProgram, "computeTriQuadFace");
_clCatmarkEdge = buildKernel(_clProgram, "computeEdge");
_clCatmarkRestrictedEdge = buildKernel(_clProgram, "computeRestrictedEdge");
_clCatmarkVertexA = buildKernel(_clProgram, "computeVertexA");
_clCatmarkVertexB = buildKernel(_clProgram, "computeVertexB");
_clLoopEdge = buildKernel(_clProgram, "computeEdge");

View File

@ -56,6 +56,8 @@ public:
cl_kernel GetCatmarkEdgeKernel() const { return _clCatmarkEdge; }
cl_kernel GetCatmarkRestrictedEdgeKernel() const { return _clCatmarkRestrictedEdge; }
cl_kernel GetCatmarkVertexKernelA() const { return _clCatmarkVertexA; }
cl_kernel GetCatmarkVertexKernelB() const { return _clCatmarkVertexB; }
@ -99,6 +101,7 @@ protected:
_clCatmarkQuadFace,
_clCatmarkTriQuadFace,
_clCatmarkEdge,
_clCatmarkRestrictedEdge,
_clCatmarkVertexA,
_clCatmarkVertexB,
_clLoopEdge,

View File

@ -130,6 +130,19 @@ OsdCpuComputeController::ApplyCatmarkEdgeVerticesKernel(
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
}
void
OsdCpuComputeController::ApplyCatmarkRestrictedEdgeVerticesKernel(
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {
assert(context);
OsdCpuComputeRestrictedEdge(
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
(const int*)context->GetTable(FarSubdivisionTables::E_IT)->GetBuffer(),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
}
void
OsdCpuComputeController::ApplyCatmarkVertexVerticesKernelB(
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {

View File

@ -126,6 +126,8 @@ protected:
void ApplyCatmarkEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkRestrictedEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkVertexVerticesKernelB(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkVertexVerticesKernelA1(FarKernelBatch const &batch, ComputeContext const *context) const;

View File

@ -231,6 +231,39 @@ void OsdCpuComputeEdge(
}
}
void OsdCpuComputeRestrictedEdge(
float *vertex, float *varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *E_IT, int vertexOffset, int tableOffset,
int start, int end) {
float *vertexResults = (float*)alloca(vertexDesc.length * sizeof(float));
float *varyingResults = (float*)alloca(varyingDesc.length * sizeof(float));
for (int i = start + tableOffset; i < end + tableOffset; i++) {
int eidx0 = E_IT[4*i+0];
int eidx1 = E_IT[4*i+1];
int eidx2 = E_IT[4*i+2];
int eidx3 = E_IT[4*i+3];
int dstIndex = i + vertexOffset - tableOffset;
clear(vertexResults, vertexDesc);
clear(varyingResults, varyingDesc);
addWithWeight(vertexResults, vertex, eidx0, 0.25f, vertexDesc);
addWithWeight(vertexResults, vertex, eidx1, 0.25f, vertexDesc);
addWithWeight(vertexResults, vertex, eidx2, 0.25f, vertexDesc);
addWithWeight(vertexResults, vertex, eidx3, 0.25f, vertexDesc);
addWithWeight(varyingResults, varying, eidx0, 0.5f, varyingDesc);
addWithWeight(varyingResults, varying, eidx1, 0.5f, varyingDesc);
copy(vertex, vertexResults, dstIndex, vertexDesc);
copy(varying, varyingResults, dstIndex, varyingDesc);
}
}
void OsdCpuComputeVertexA(
float *vertex, float *varying,
OsdVertexBufferDescriptor const &vertexDesc,

View File

@ -169,6 +169,13 @@ void OsdCpuComputeEdge(float *vertex, float * varying,
int vertexOffset, int tableOffset,
int start, int end);
void OsdCpuComputeRestrictedEdge(float *vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *E_IT,
int vertexOffset, int tableOffset,
int start, int end);
template<int numVertexElements>
void ComputeVertexAKernel( float *vertex,
const int *V_ITa,

View File

@ -50,6 +50,11 @@ void OsdCudaComputeEdge(float *vertex, float *varying,
int varyingLength, int varyingStride,
int *E_IT, float *E_W, int offset, int tableOffset, int start, int end);
void OsdCudaComputeRestrictedEdge(float *vertex, float *varying,
int vertexLength, int vertexStride,
int varyingLength, int varyingStride,
int *E_IT, int offset, int tableOffset, int start, int end);
void OsdCudaComputeVertexA(float *vertex, float *varying,
int vertexLength, int vertexStride,
int varyingLength, int varyingStride,
@ -244,6 +249,26 @@ OsdCudaComputeController::ApplyCatmarkEdgeVerticesKernel(
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
}
void
OsdCudaComputeController::ApplyCatmarkRestrictedEdgeVerticesKernel(
FarKernelBatch const &batch, OsdCudaComputeContext const *context) const {
assert(context);
const OsdCudaTable * E_IT = context->GetTable(FarSubdivisionTables::E_IT);
assert(E_IT);
float *vertex = _currentBindState.GetOffsettedVertexBuffer();
float *varying = _currentBindState.GetOffsettedVaryingBuffer();
OsdCudaComputeRestrictedEdge(
vertex, varying,
_currentBindState.vertexDesc.length, _currentBindState.vertexDesc.stride,
_currentBindState.varyingDesc.length, _currentBindState.varyingDesc.stride,
static_cast<int*>(E_IT->GetCudaMemory()),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
}
void
OsdCudaComputeController::ApplyCatmarkVertexVerticesKernelB(
FarKernelBatch const &batch, OsdCudaComputeContext const *context) const {

View File

@ -126,6 +126,8 @@ protected:
void ApplyCatmarkEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkRestrictedEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkVertexVerticesKernelB(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkVertexVerticesKernelA1(FarKernelBatch const &batch, ComputeContext const *context) const;

View File

@ -379,6 +379,71 @@ computeEdge(float *fVertex, float *fVarying,
}
}
template <int NUM_VERTEX_ELEMENTS, int NUM_VARYING_ELEMENTS> __global__ void
computeRestrictedEdge(float *fVertex, float *fVaryings, int *E0_IT, int offset, int tableOffset, int start, int end)
{
DeviceVertex<NUM_VERTEX_ELEMENTS> *vertex = (DeviceVertex<NUM_VERTEX_ELEMENTS>*)fVertex;
DeviceVertex<NUM_VARYING_ELEMENTS> *varyings = (DeviceVertex<NUM_VARYING_ELEMENTS>*)fVaryings;
for (int i = start + tableOffset + threadIdx.x + blockIdx.x*blockDim.x;
i < end + tableOffset;
i+= blockDim.x * gridDim.x){
int eidx0 = E0_IT[4*i+0];
int eidx1 = E0_IT[4*i+1];
int eidx2 = E0_IT[4*i+2];
int eidx3 = E0_IT[4*i+3];
DeviceVertex<NUM_VERTEX_ELEMENTS> dst;
dst.clear();
dst.addWithWeight(&vertex[eidx0], 0.25f);
dst.addWithWeight(&vertex[eidx1], 0.25f);
dst.addWithWeight(&vertex[eidx2], 0.25f);
dst.addWithWeight(&vertex[eidx3], 0.25f);
vertex[offset+i-tableOffset] = dst;
if(NUM_VARYING_ELEMENTS > 0){
DeviceVertex<NUM_VARYING_ELEMENTS> dstVarying;
dstVarying.clear();
dstVarying.addWithWeight(&varyings[eidx0], 0.5f);
dstVarying.addWithWeight(&varyings[eidx1], 0.5f);
varyings[offset+i-tableOffset] = dstVarying;
}
}
}
__global__ void
computeRestrictedEdge(float *fVertex, float *fVarying,
int vertexLength, int vertexStride,
int varyingLength, int varyingStride,
int *E0_IT, int offset, int tableOffset, int start, int end)
{
for (int i = start + tableOffset + threadIdx.x + blockIdx.x*blockDim.x;
i < end + tableOffset;i+= blockDim.x * gridDim.x) {
int eidx0 = E0_IT[4*i+0];
int eidx1 = E0_IT[4*i+1];
int eidx2 = E0_IT[4*i+2];
int eidx3 = E0_IT[4*i+3];
float *dstVertex = fVertex + (i+offset-tableOffset)*vertexStride;
clear(dstVertex, vertexLength);
addWithWeight(dstVertex, fVertex + eidx0*vertexStride, 0.25f, vertexLength);
addWithWeight(dstVertex, fVertex + eidx1*vertexStride, 0.25f, vertexLength);
addWithWeight(dstVertex, fVertex + eidx2*vertexStride, 0.25f, vertexLength);
addWithWeight(dstVertex, fVertex + eidx3*vertexStride, 0.25f, vertexLength);
if (varyingLength > 0){
float *dstVarying = fVarying + (i+offset-tableOffset)*varyingStride;
clear(dstVarying, varyingLength);
addWithWeight(dstVarying, fVarying + eidx0*varyingStride, 0.5f, varyingLength);
addWithWeight(dstVarying, fVarying + eidx1*varyingStride, 0.5f, varyingLength);
}
}
}
template <int NUM_VERTEX_ELEMENTS, int NUM_VARYING_ELEMENTS> __global__ void
computeVertexA(float *fVertex, float *fVaryings, int *V0_ITa, float *V0_S, int offset, int tableOffset, int start, int end, int pass)
{
@ -847,6 +912,22 @@ void OsdCudaComputeEdge(float *vertex, float *varying,
E_IT, E_W, offset, tableOffset, start, end);
}
void OsdCudaComputeRestrictedEdge(float *vertex, float *varying,
int vertexLength, int vertexStride,
int varyingLength, int varyingStride,
int *E_IT, int offset, int tableOffset, int start, int end)
{
//computeRestrictedEdge<0, 3><<<512,32>>>(vertex, varying, E_IT, offset, start, end);
OPT_KERNEL(0, 0, computeRestrictedEdge, 512, 32, (vertex, varying, E_IT, offset, tableOffset, start, end));
OPT_KERNEL(0, 3, computeRestrictedEdge, 512, 32, (vertex, varying, E_IT, offset, tableOffset, start, end));
OPT_KERNEL(3, 0, computeRestrictedEdge, 512, 32, (vertex, varying, E_IT, offset, tableOffset, start, end));
OPT_KERNEL(3, 3, computeRestrictedEdge, 512, 32, (vertex, varying, E_IT, offset, tableOffset, start, end));
computeRestrictedEdge<<<512, 32>>>(vertex, varying,
vertexLength, vertexStride, varyingLength, varyingStride,
E_IT, offset, tableOffset, start, end);
}
void OsdCudaComputeVertexA(float *vertex, float *varying,
int vertexLength, int vertexStride,
int varyingLength, int varyingStride,

View File

@ -40,8 +40,11 @@ void
OsdD3D11ComputeTable::createBuffer(int size, const void *ptr, DXGI_FORMAT format, int numElements,
ID3D11DeviceContext *deviceContext) {
if (size == 0)
if (size == 0) {
_buffer = NULL;
_srv = NULL;
return;
}
ID3D11Device *device = NULL;
deviceContext->GetDevice(&device);

View File

@ -187,6 +187,18 @@ OsdD3D11ComputeController::ApplyCatmarkTriQuadFaceVerticesKernel(
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset);
}
void
OsdD3D11ComputeController::ApplyCatmarkRestrictedEdgeVerticesKernel(
FarKernelBatch const &batch, OsdD3D11ComputeContext const *context) const {
assert(context);
_currentBindState.kernelBundle->ApplyCatmarkRestrictedEdgeVerticesKernel(
batch.GetVertexOffset(), batch.GetTableOffset(),
batch.GetStart(), batch.GetEnd(),
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset);
}
void
OsdD3D11ComputeController::ApplyCatmarkEdgeVerticesKernel(
FarKernelBatch const &batch, OsdD3D11ComputeContext const *context) const {

View File

@ -142,6 +142,8 @@ protected:
void ApplyCatmarkEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkRestrictedEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkVertexVerticesKernelB(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkVertexVerticesKernelA1(FarKernelBatch const &batch, ComputeContext const *context) const;

View File

@ -55,6 +55,7 @@ OsdD3D11ComputeKernelBundle::OsdD3D11ComputeKernelBundle(
_kernelComputeQuadFace(0),
_kernelComputeTriQuadFace(0),
_kernelComputeEdge(0),
_kernelComputeRestrictedEdge(0),
_kernelComputeBilinearEdge(0),
_kernelComputeVertex(0),
_kernelComputeVertexA(0),
@ -74,6 +75,7 @@ OsdD3D11ComputeKernelBundle::~OsdD3D11ComputeKernelBundle() {
SAFE_RELEASE(_kernelComputeQuadFace);
SAFE_RELEASE(_kernelComputeTriQuadFace);
SAFE_RELEASE(_kernelComputeEdge);
SAFE_RELEASE(_kernelComputeRestrictedEdge);
SAFE_RELEASE(_kernelComputeBilinearEdge);
SAFE_RELEASE(_kernelComputeVertex);
SAFE_RELEASE(_kernelComputeVertexA);
@ -176,6 +178,9 @@ OsdD3D11ComputeKernelBundle::Compile(
_classLinkage->GetClassInstance(
"catmarkComputeEdge", 0, &_kernelComputeEdge);
assert(_kernelComputeEdge);
_classLinkage->GetClassInstance(
"catmarkComputeRestrictedEdge", 0, &_kernelComputeRestrictedEdge);
assert(_kernelComputeRestrictedEdge);
_classLinkage->GetClassInstance(
"bilinearComputeEdge", 0, &_kernelComputeBilinearEdge);
assert(_kernelComputeBilinearEdge);
@ -360,6 +365,22 @@ OsdD3D11ComputeKernelBundle::ApplyCatmarkEdgeVerticesKernel(
dispatchCompute(_kernelComputeEdge, args);
}
void
OsdD3D11ComputeKernelBundle::ApplyCatmarkRestrictedEdgeVerticesKernel(
int vertexOffset, int tableOffset, int start, int end,
int vertexBaseOffset, int varyingBaseOffset) {
KernelCB args;
ZeroMemory(&args, sizeof(args));
args.vertexOffset = vertexOffset;
args.tableOffset = tableOffset;
args.indexStart = start;
args.indexEnd = end;
args.vertexBaseOffset = vertexBaseOffset;
args.varyingBaseOffset = varyingBaseOffset;
dispatchCompute(_kernelComputeRestrictedEdge, args);
}
void
OsdD3D11ComputeKernelBundle::ApplyCatmarkVertexVerticesKernelB(
int vertexOffset, int tableOffset, int start, int end,

View File

@ -79,6 +79,10 @@ public:
int vertexOffset, int tableOffset, int start, int end,
int vertexBaseOffset, int varyingBaseOffset);
void ApplyCatmarkRestrictedEdgeVerticesKernel(
int vertexOffset, int tableOffset, int start, int end,
int vertexBaseOffset, int varyingBaseOffset);
void ApplyCatmarkVertexVerticesKernelB(
int vertexOffset, int tableOffset, int start, int end,
int vertexBaseOffset, int varyingBaseOffset);
@ -140,12 +144,14 @@ protected:
ID3D11ClassInstance * _kernelComputeFace; // general face-vertex kernel (all schemes)
ID3D11ClassInstance * _kernelComputeQuadFace; // quad face-vertex kernel (catmark)
ID3D11ClassInstance * _kernelComputeQuadFace; // quad face-vertex kernel (catmark scheme)
ID3D11ClassInstance * _kernelComputeTriQuadFace; // tri-quad face-vertex kernel (catmark)
ID3D11ClassInstance * _kernelComputeTriQuadFace; // tri-quad face-vertex kernel (catmark scheme)
ID3D11ClassInstance * _kernelComputeEdge; // edge-vertex kernel (catmark + loop schemes)
ID3D11ClassInstance * _kernelComputeRestrictedEdge; // edge-vertex kernel (catmark scheme)
ID3D11ClassInstance * _kernelComputeBilinearEdge; // edge-vertex kernel (bilinear scheme)
ID3D11ClassInstance * _kernelComputeVertex; // vertex-vertex kernel (bilinear scheme)

View File

@ -135,6 +135,20 @@ OsdGcdComputeController::ApplyCatmarkEdgeVerticesKernel(
_gcd_queue);
}
void
OsdGcdComputeController::ApplyCatmarkRestrictedEdgeVerticesKernel(
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {
assert(context);
OsdGcdComputeRestrictedEdge(
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
(const int*)context->GetTable(FarSubdivisionTables::E_IT)->GetBuffer(),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(),
_gcd_queue);
}
void
OsdGcdComputeController::ApplyCatmarkVertexVerticesKernelB(
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {

View File

@ -126,6 +126,8 @@ protected:
void ApplyCatmarkEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkRestrictedEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkVertexVerticesKernelB(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkVertexVerticesKernelA1(FarKernelBatch const &batch, ComputeContext const *context) const;

View File

@ -151,6 +151,29 @@ void OsdGcdComputeEdge(
vertexOffset, tableOffset, start_e, end_e);
}
void OsdGcdComputeRestrictedEdge(
float * vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *E_IT, int vertexOffset, int tableOffset, int start, int end,
dispatch_queue_t gcdq) {
const int workSize = end-start;
dispatch_apply(workSize/GCD_WORK_STRIDE, gcdq, ^(size_t blockIdx){
const int start_i = start + blockIdx*GCD_WORK_STRIDE;
const int end_i = start_i + GCD_WORK_STRIDE;
OsdCpuComputeRestrictedEdge(vertex, varying, vertexDesc, varyingDesc,
E_IT,
vertexOffset, tableOffset, start_i, end_i);
});
const int start_e = end - workSize%GCD_WORK_STRIDE;
const int end_e = end;
if (start_e < end_e)
OsdCpuComputeRestrictedEdge(vertex, varying, vertexDesc, varyingDesc,
E_IT,
vertexOffset, tableOffset, start_e, end_e);
}
void OsdGcdComputeVertexA(
float * vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
@ -281,7 +304,7 @@ void OsdGcdComputeBilinearVertex(
void OsdGcdEditVertexAdd(
float * vertex,
OsdVertexBufferDescriptor const &vertexDesc,
int primVarOffset, int primVarWidth,
int primVarOffset, int /*primVarWidth*/,
int vertexOffset, int tableOffset,
int start, int end,
const unsigned int *editIndices, const float *editValues,
@ -304,7 +327,7 @@ void OsdGcdEditVertexAdd(
void OsdGcdEditVertexSet(
float * vertex,
OsdVertexBufferDescriptor const &vertexDesc,
int primVarOffset, int primVarWidth,
int primVarOffset, int /*primVarWidth*/,
int vertexOffset, int tableOffset,
int start, int end,
const unsigned int *editIndices, const float *editValues,

View File

@ -66,6 +66,14 @@ void OsdGcdComputeEdge(float *vertex, float * varying,
int start, int end,
dispatch_queue_t gcdq);
void OsdGcdComputeRestrictedEdge(float *vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *E_IT,
int vertexOffset, int tableOffset,
int start, int end,
dispatch_queue_t gcdq);
void OsdGcdComputeVertexA(float *vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,

View File

@ -172,6 +172,17 @@ OsdGLSLComputeController::ApplyCatmarkEdgeVerticesKernel(
batch.GetStart(), batch.GetEnd());
}
void
OsdGLSLComputeController::ApplyCatmarkRestrictedEdgeVerticesKernel(
FarKernelBatch const &batch, OsdGLSLComputeContext const *context) const {
assert(context);
_currentBindState.kernelBundle->ApplyCatmarkRestrictedEdgeVerticesKernel(
batch.GetVertexOffset(), batch.GetTableOffset(),
batch.GetStart(), batch.GetEnd());
}
void
OsdGLSLComputeController::ApplyCatmarkVertexVerticesKernelB(
FarKernelBatch const &batch, OsdGLSLComputeContext const *context) const {

View File

@ -135,6 +135,8 @@ protected:
void ApplyCatmarkEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkRestrictedEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkVertexVerticesKernelB(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkVertexVerticesKernelA1(FarKernelBatch const &batch, ComputeContext const *context) const;

View File

@ -235,7 +235,7 @@ void catmarkComputeTriQuadFace()
writeVertex(vid, dst);
}
// Edge-vertices compute Kernepl
// Edge-vertices compute Kernel
subroutine(computeKernelType)
void catmarkComputeEdge()
{
@ -272,6 +272,34 @@ void catmarkComputeEdge()
writeVertex(vid, dst);
}
// Restricted edge-vertices compute Kernel
subroutine(computeKernelType)
void catmarkComputeRestrictedEdge()
{
int i = int(gl_GlobalInvocationID.x) + indexStart;
if (i >= indexEnd) return;
int vid = i + vertexOffset;
i += tableOffset;
Vertex dst;
clear(dst);
int eidx0 = _E_IT[4*i+0];
int eidx1 = _E_IT[4*i+1];
int eidx2 = _E_IT[4*i+2];
int eidx3 = _E_IT[4*i+3];
ivec4 eidx = ivec4(eidx0, eidx1, eidx2, eidx3);
addWithWeight(dst, readVertex(eidx.x), 0.25f);
addWithWeight(dst, readVertex(eidx.y), 0.25f);
addWithWeight(dst, readVertex(eidx.z), 0.25f);
addWithWeight(dst, readVertex(eidx.w), 0.25f);
addVaryingWithWeight(dst, readVertex(eidx.x), 0.5f);
addVaryingWithWeight(dst, readVertex(eidx.y), 0.5f);
writeVertex(vid, dst);
}
// Edge-vertices compute Kernel (bilinear scheme)
subroutine(computeKernelType)
void bilinearComputeEdge()

View File

@ -126,6 +126,9 @@ OsdGLSLComputeKernelBundle::Compile(
_subComputeEdge = glGetSubroutineIndex(_program,
GL_COMPUTE_SHADER,
"catmarkComputeEdge");
_subComputeRestrictedEdge = glGetSubroutineIndex(_program,
GL_COMPUTE_SHADER,
"catmarkComputeRestrictedEdge");
_subComputeBilinearEdge = glGetSubroutineIndex(_program,
GL_COMPUTE_SHADER,
"bilinearComputeEdge");
@ -259,6 +262,14 @@ OsdGLSLComputeKernelBundle::ApplyCatmarkEdgeVerticesKernel(
dispatchCompute(vertexOffset, tableOffset, start, end);
}
void
OsdGLSLComputeKernelBundle::ApplyCatmarkRestrictedEdgeVerticesKernel(
int vertexOffset, int tableOffset, int start, int end) {
glUniformSubroutinesuiv(GL_COMPUTE_SHADER, 1, &_subComputeRestrictedEdge);
dispatchCompute(vertexOffset, tableOffset, start, end);
}
void
OsdGLSLComputeKernelBundle::ApplyCatmarkVertexVerticesKernelB(
int vertexOffset, int tableOffset, int start, int end) {

View File

@ -66,6 +66,9 @@ public:
void ApplyCatmarkEdgeVerticesKernel(
int vertexOffset, int tableOffset, int start, int end);
void ApplyCatmarkRestrictedEdgeVerticesKernel(
int vertexOffset, int tableOffset, int start, int end);
void ApplyCatmarkVertexVerticesKernelB(
int vertexOffset, int tableOffset, int start, int end);
@ -136,12 +139,14 @@ protected:
GLuint _subComputeFace; // general face-vertex kernel (all schemes)
GLuint _subComputeQuadFace; // quad face-vertex kernel (catmark)
GLuint _subComputeQuadFace; // quad face-vertex kernel (catmark scheme)
GLuint _subComputeTriQuadFace; // tri-quad face-vertex kernel (catmark)
GLuint _subComputeTriQuadFace; // tri-quad face-vertex kernel (catmark scheme)
GLuint _subComputeEdge; // edge-vertex kernel (catmark + loop schemes)
GLuint _subComputeRestrictedEdge; // restricted edge-vertex kernel (catmark scheme)
GLuint _subComputeBilinearEdge; // edge-vertex kernel (bilinear scheme)
GLuint _subComputeVertex; // vertex-vertex kernel (bilinear scheme)

View File

@ -231,7 +231,6 @@ OsdGLSLTransformFeedbackComputeController::ApplyCatmarkTriQuadFaceVerticesKernel
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
}
void
OsdGLSLTransformFeedbackComputeController::ApplyCatmarkEdgeVerticesKernel(
FarKernelBatch const &batch, OsdGLSLTransformFeedbackComputeContext const *context) const {
@ -244,6 +243,18 @@ OsdGLSLTransformFeedbackComputeController::ApplyCatmarkEdgeVerticesKernel(
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
}
void
OsdGLSLTransformFeedbackComputeController::ApplyCatmarkRestrictedEdgeVerticesKernel(
FarKernelBatch const &batch, OsdGLSLTransformFeedbackComputeContext const *context) const {
assert(context);
_currentBindState.kernelBundle->ApplyCatmarkRestrictedEdgeVerticesKernel(
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset,
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
}
void
OsdGLSLTransformFeedbackComputeController::ApplyCatmarkVertexVerticesKernelB(
FarKernelBatch const &batch, OsdGLSLTransformFeedbackComputeContext const *context) const {

View File

@ -133,6 +133,8 @@ protected:
void ApplyCatmarkEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkRestrictedEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkVertexVerticesKernelB(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkVertexVerticesKernelA1(FarKernelBatch const &batch, ComputeContext const *context) const;

View File

@ -284,6 +284,35 @@ void catmarkComputeEdge()
writeVertex(dst);
}
// Restricted edge-vertices compute Kernel
subroutine(computeKernelType)
void catmarkComputeRestrictedEdge()
{
int i = gl_VertexID + indexStart + tableOffset;
Vertex dst;
clear(dst);
#ifdef OPT_E0_IT_VEC4
ivec4 eidx = texelFetch(_E0_IT, i);
#else
int eidx0 = texelFetch(_E0_IT, 4*i+0).x;
int eidx1 = texelFetch(_E0_IT, 4*i+1).x;
int eidx2 = texelFetch(_E0_IT, 4*i+2).x;
int eidx3 = texelFetch(_E0_IT, 4*i+3).x;
ivec4 eidx = ivec4(eidx0, eidx1, eidx2, eidx3);
#endif
addWithWeight(dst, readVertex(eidx.x), 0.25f);
addWithWeight(dst, readVertex(eidx.y), 0.25f);
addWithWeight(dst, readVertex(eidx.z), 0.25f);
addWithWeight(dst, readVertex(eidx.w), 0.25f);
addVaryingWithWeight(dst, readVertex(eidx.x), 0.5f);
addVaryingWithWeight(dst, readVertex(eidx.y), 0.5f);
writeVertex(dst);
}
// Edge-vertices compute Kernel (bilinear scheme)
subroutine(computeKernelType)
void bilinearComputeEdge()

View File

@ -228,6 +228,7 @@ OsdGLSLTransformFeedbackKernelBundle::Compile(
_subComputeQuadFace = glGetSubroutineIndex(_program, GL_VERTEX_SHADER, "catmarkComputeQuadFace");
_subComputeTriQuadFace = glGetSubroutineIndex(_program, GL_VERTEX_SHADER, "catmarkComputeTriQuadFace");
_subComputeEdge = glGetSubroutineIndex(_program, GL_VERTEX_SHADER, "catmarkComputeEdge");
_subComputeRestrictedEdge = glGetSubroutineIndex(_program, GL_VERTEX_SHADER, "catmarkComputeRestrictedEdge");
_subComputeBilinearEdge = glGetSubroutineIndex(_program, GL_VERTEX_SHADER, "bilinearComputeEdge");
_subComputeVertex = glGetSubroutineIndex(_program, GL_VERTEX_SHADER, "bilinearComputeVertex");
_subComputeVertexA = glGetSubroutineIndex(_program, GL_VERTEX_SHADER, "catmarkComputeVertexA");
@ -406,6 +407,18 @@ OsdGLSLTransformFeedbackKernelBundle::ApplyCatmarkEdgeVerticesKernel(
offset, tableOffset, start, end);
}
void
OsdGLSLTransformFeedbackKernelBundle::ApplyCatmarkRestrictedEdgeVerticesKernel(
GLuint vertexBuffer, GLuint varyingBuffer,
int vertexOffset, int varyingOffset,
int offset, int tableOffset, int start, int end) {
glUniformSubroutinesuiv(GL_VERTEX_SHADER, 1, &_subComputeRestrictedEdge);
transformGpuBufferData(vertexBuffer, varyingBuffer,
vertexOffset, varyingOffset,
offset, tableOffset, start, end);
}
void
OsdGLSLTransformFeedbackKernelBundle::ApplyCatmarkVertexVerticesKernelB(
GLuint vertexBuffer, GLuint varyingBuffer,

View File

@ -83,6 +83,11 @@ public:
int vertexOffset, int varyingOffset,
int offset, int tableOffset, int start, int end);
void ApplyCatmarkRestrictedEdgeVerticesKernel(
GLuint vertexBuffer, GLuint varyingBuffer,
int vertexOffset, int varyingOffset,
int offset, int tableOffset, int start, int end);
void ApplyCatmarkVertexVerticesKernelB(
GLuint vertexBuffer, GLuint varyingBuffer,
int vertexOffset, int varyingOffset,
@ -191,12 +196,14 @@ protected:
GLuint _subComputeFace; // general face-vertex kernel (all schemes)
GLuint _subComputeQuadFace; // quad face-vertex kernel (catmark)
GLuint _subComputeQuadFace; // quad face-vertex kernel (catmark scheme)
GLuint _subComputeTriQuadFace; // tri-quad face-vertex kernel (catmark)
GLuint _subComputeTriQuadFace; // tri-quad face-vertex kernel (catmark scheme)
GLuint _subComputeEdge; // edge-vertex kernel (catmark + loop schemes)
GLuint _subComputeRestrictedEdge; // restricted edge-vertex kernel (catmark scheme)
GLuint _subComputeBilinearEdge; // edge-vertex kernel (bilinear scheme)
GLuint _subComputeVertex; // vertex-vertex kernel (bilinear scheme)

View File

@ -271,6 +271,36 @@ void runKernel( uint3 ID )
}
};
// Restricted edge-vertices compute Kernel
class CatmarkComputeRestrictedEdge : IComputeKernel {
int placeholder;
void runKernel( uint3 ID )
{
int i = int(ID.x) + indexStart;
if (i >= indexEnd) return;
int vid = i + vertexOffset;
i += tableOffset;
Vertex dst;
clear(dst);
int eidx0 = _E_IT[4*i+0];
int eidx1 = _E_IT[4*i+1];
int eidx2 = _E_IT[4*i+2];
int eidx3 = _E_IT[4*i+3];
int4 eidx = int4(eidx0, eidx1, eidx2, eidx3);
addWithWeight(dst, readVertex(eidx.x), 0.25f);
addWithWeight(dst, readVertex(eidx.y), 0.25f);
addWithWeight(dst, readVertex(eidx.z), 0.25f);
addWithWeight(dst, readVertex(eidx.w), 0.25f);
addVaryingWithWeight(dst, readVertex(eidx.x), 0.5f);
addVaryingWithWeight(dst, readVertex(eidx.y), 0.5f);
writeVertex(vid, dst);
}
};
// Edge-vertices compute Kernel (bilinear scheme)
class BilinearComputeEdge : IComputeKernel {
int placeholder;
@ -458,6 +488,7 @@ CatmarkComputeFace catmarkComputeFace;
CatmarkComputeQuadFace catmarkComputeQuadFace;
CatmarkComputeTriQuadFace catmarkComputeTriQuadFace;
CatmarkComputeEdge catmarkComputeEdge;
CatmarkComputeRestrictedEdge catmarkComputeRestrictedEdge;
BilinearComputeEdge bilinearComputeEdge;
BilinearComputeVertex bilinearComputeVertex;
CatmarkComputeVertexA catmarkComputeVertexA;

View File

@ -134,6 +134,19 @@ OsdOmpComputeController::ApplyCatmarkEdgeVerticesKernel(
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
}
void
OsdOmpComputeController::ApplyCatmarkRestrictedEdgeVerticesKernel(
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {
assert(context);
OsdOmpComputeRestrictedEdge(
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
(const int*)context->GetTable(FarSubdivisionTables::E_IT)->GetBuffer(),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
}
void
OsdOmpComputeController::ApplyCatmarkVertexVerticesKernelB(
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {

View File

@ -134,6 +134,8 @@ protected:
void ApplyCatmarkEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkRestrictedEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkVertexVerticesKernelB(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkVertexVerticesKernelA1(FarKernelBatch const &batch, ComputeContext const *context) const;

View File

@ -243,6 +243,47 @@ void OsdOmpComputeEdge(
}
}
void OsdOmpComputeRestrictedEdge(
float * vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *E_IT, int offset, int tableOffset, int start, int end) {
int numThreads = omp_get_max_threads();
float *vertexResultsArray = (float*)alloca(vertexDesc.length * sizeof(float) * numThreads);
float *varyingResultsArray = (float*)alloca(varyingDesc.length * sizeof(float) * numThreads);
#pragma omp parallel for
for (int i = start + tableOffset; i < end + tableOffset; i++) {
int eidx0 = E_IT[4*i+0];
int eidx1 = E_IT[4*i+1];
int eidx2 = E_IT[4*i+2];
int eidx3 = E_IT[4*i+3];
int dstIndex = offset + i - tableOffset;
int threadId = omp_get_thread_num();
float *vertexResults = vertexResultsArray +
vertexDesc.length * threadId;
float *varyingResults = varyingResultsArray +
varyingDesc.length * threadId;
// clear
clear(vertexResults, vertexDesc);
clear(varyingResults, varyingDesc);
addWithWeight(vertexResults, vertex, eidx0, 0.25f, vertexDesc);
addWithWeight(vertexResults, vertex, eidx1, 0.25f, vertexDesc);
addWithWeight(vertexResults, vertex, eidx2, 0.25f, vertexDesc);
addWithWeight(vertexResults, vertex, eidx3, 0.25f, vertexDesc);
addWithWeight(varyingResults, varying, eidx0, 0.5f, varyingDesc);
addWithWeight(varyingResults, varying, eidx1, 0.5f, varyingDesc);
copy(vertex, vertexResults, dstIndex, vertexDesc);
copy(varying, varyingResults, dstIndex, varyingDesc);
}
}
void OsdOmpComputeVertexA(
float * vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,

View File

@ -61,6 +61,13 @@ void OsdOmpComputeEdge(float *vertex, float * varying,
int vertexOffset, int tableOffset,
int start, int end);
void OsdOmpComputeRestrictedEdge(float *vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *E_IT,
int vertexOffset, int tableOffset,
int start, int end);
void OsdOmpComputeVertexA(float *vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,

View File

@ -140,6 +140,19 @@ OsdTbbComputeController::ApplyCatmarkEdgeVerticesKernel(
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
}
void
OsdTbbComputeController::ApplyCatmarkRestrictedEdgeVerticesKernel(
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {
assert(context);
OsdTbbComputeRestrictedEdge(
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
(const int*)context->GetTable(FarSubdivisionTables::E_IT)->GetBuffer(),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
}
void
OsdTbbComputeController::ApplyCatmarkVertexVerticesKernelB(
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {

View File

@ -126,6 +126,8 @@ protected:
void ApplyCatmarkEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkRestrictedEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkVertexVerticesKernelB(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkVertexVerticesKernelA1(FarKernelBatch const &batch, ComputeContext const *context) const;

View File

@ -378,7 +378,6 @@ public:
{};
};
void OsdTbbComputeEdge(
float *vertex, float *varying,
OsdVertexBufferDescriptor const &vertexDesc,
@ -391,6 +390,76 @@ void OsdTbbComputeEdge(
tbb::parallel_for(range, kernel);
}
class TBBRestrictedEdgeKernel {
float *vertex;
float *varying;
OsdVertexBufferDescriptor vertexDesc;
OsdVertexBufferDescriptor varyingDesc;
int const *E_IT;
int vertexOffset;
int tableOffset;
public:
void operator() (tbb::blocked_range<int> const &r) const {
for (int i = r.begin() + tableOffset; i < r.end() + tableOffset; i++) {
int eidx0 = E_IT[4*i+0];
int eidx1 = E_IT[4*i+1];
int eidx2 = E_IT[4*i+2];
int eidx3 = E_IT[4*i+3];
int dstIndex = i + vertexOffset - tableOffset;
clear(vertex, dstIndex, vertexDesc);
clear(varying, dstIndex, varyingDesc);
addWithWeight(vertex, dstIndex, eidx0, 0.25f, vertexDesc);
addWithWeight(vertex, dstIndex, eidx1, 0.25f, vertexDesc);
addWithWeight(vertex, dstIndex, eidx2, 0.25f, vertexDesc);
addWithWeight(vertex, dstIndex, eidx3, 0.25f, vertexDesc);
addWithWeight(varying, dstIndex, eidx0, 0.5f, varyingDesc);
addWithWeight(varying, dstIndex, eidx1, 0.5f, varyingDesc);
}
}
TBBRestrictedEdgeKernel(TBBRestrictedEdgeKernel const &other)
{
this->vertex = other.vertex;
this->varying= other.varying;
this->vertexDesc = other.vertexDesc;
this->varyingDesc = other.varyingDesc;
this->E_IT = other.E_IT;
this->vertexOffset = other.vertexOffset;
this->tableOffset = other.tableOffset;
}
TBBRestrictedEdgeKernel(float *vertex_in,
float *varying_in,
OsdVertexBufferDescriptor const &vertexDesc_in,
OsdVertexBufferDescriptor const &varyingDesc_in,
int const *E_IT_in,
int vertexOffset_in,
int tableOffset_in) :
vertex (vertex_in),
varying(varying_in),
vertexDesc(vertexDesc_in),
varyingDesc(varyingDesc_in),
E_IT (E_IT_in),
vertexOffset(vertexOffset_in),
tableOffset(tableOffset_in)
{};
};
void OsdTbbComputeRestrictedEdge(
float *vertex, float *varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
int const *E_IT, int vertexOffset, int tableOffset,
int start, int end) {
tbb::blocked_range<int> range(start, end, grain_size);
TBBRestrictedEdgeKernel kernel(vertex, varying, vertexDesc, varyingDesc, E_IT,
vertexOffset, tableOffset);
tbb::parallel_for(range, kernel);
}
class TBBVertexKernelA {
float *vertex;
float *varying;

View File

@ -56,10 +56,17 @@ void OsdTbbComputeTriQuadFace(float * vertex, float * varying,
void OsdTbbComputeEdge(float *vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
int const *E_IT, float const *E_ITa,
int const *E_IT, float const *E_W,
int vertexOffset, int tableOffset,
int start, int end);
void OsdTbbComputeRestrictedEdge(float *vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
int const *E_IT,
int vertexOffset, int tableOffset,
int start, int end);
void OsdTbbComputeVertexA(float *vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,