Added the CATMARK_RESTRICTED_EDGE_VERTEX kernel which computes vertices resulting from the refinement of a smooth or (fully) sharp edge.

This commit is contained in:
Nathan Litke 2014-05-27 15:25:54 -07:00
parent c4e760e722
commit 0af14f8ac6
45 changed files with 750 additions and 29 deletions

View File

@ -84,13 +84,21 @@ FarCatmarkSubdivisionTablesFactory<T,U>::Create( FarMeshFactory<T,U> * meshFacto
if (coarseMeshAllTriQuadFaces)
F_IT_size += tablesFactory.GetNumCoarseTriangleFaces(); // add padding for tri faces
// Triangular interpolation mode :
// see "smoothtriangle" tag introduced in prman 3.9 and HbrCatmarkSubdivision<T>
typename HbrCatmarkSubdivision<T>::TriangleSubdivision triangleMethod =
dynamic_cast<HbrCatmarkSubdivision<T> *>(meshFactory->GetHbrMesh()->GetSubdivision())->GetTriangleSubdivisionMethod();
bool hasFractionalEdgeSharpness = tablesFactory.HasFractionalEdgeSharpness();
bool useRestrictedEdgeVertexKernel = !hasFractionalEdgeSharpness && triangleMethod != HbrCatmarkSubdivision<T>::k_New;
// Allocate memory for the indexing tables
if (!coarseMeshAllTriQuadFaces)
result->_F_ITa.resize(tablesFactory.GetNumFaceVerticesTotal(1) * 2);
result->_F_IT.resize(F_IT_size);
result->_E_IT.resize(tablesFactory.GetNumEdgeVerticesTotal(maxlevel)*4);
result->_E_W.resize(tablesFactory.GetNumEdgeVerticesTotal(maxlevel)*2);
if (!useRestrictedEdgeVertexKernel)
result->_E_W.resize(tablesFactory.GetNumEdgeVerticesTotal(maxlevel)*2);
result->_V_ITa.resize((tablesFactory.GetNumVertexVerticesTotal(maxlevel)
- tablesFactory.GetNumVertexVerticesTotal(0))*5); // subtract coarse cage vertices
@ -189,19 +197,18 @@ FarCatmarkSubdivisionTablesFactory<T,U>::Create( FarMeshFactory<T,U> * meshFacto
// Edge vertices
// Triangular interpolation mode :
// see "smoothtriangle" tag introduced in prman 3.9 and HbrCatmarkSubdivision<T>
typename HbrCatmarkSubdivision<T>::TriangleSubdivision triangleMethod =
dynamic_cast<HbrCatmarkSubdivision<T> *>(meshFactory->GetHbrMesh()->GetSubdivision())->GetTriangleSubdivisionMethod();
// "For each vertex, gather the 2 vertices from the parent edege and the
// 2 child vertices from the faces to the left and right of that edge.
// Adjust if edge has a crease or is on a boundary."
int nEdgeVertices = (int)tablesFactory._edgeVertsList[level].size();
// add a batch for edge vertices
kernelType = (useRestrictedEdgeVertexKernel ?
FarKernelBatch::CATMARK_RESTRICTED_EDGE_VERTEX :
FarKernelBatch::CATMARK_EDGE_VERTEX);
if (nEdgeVertices > 0)
batches->push_back(FarKernelBatch( FarKernelBatch::CATMARK_EDGE_VERTEX,
batches->push_back(FarKernelBatch( kernelType,
level,
0,
0,
@ -227,8 +234,20 @@ FarCatmarkSubdivisionTablesFactory<T,U>::Create( FarMeshFactory<T,U> * meshFacto
float faceWeight=0.5f, vertWeight=0.5f;
// in the case of a fractional sharpness, set the adjacent faces vertices
if (!e->IsBoundary() && esharp <= 1.0f) {
if (kernelType == FarKernelBatch::CATMARK_RESTRICTED_EDGE_VERTEX) {
// in the case of a sharp edge, repeat the endpoint vertices
if (!e->IsBoundary() && esharp < 1.0f) {
HbrFace<T>* rf = e->GetRightFace();
HbrFace<T>* lf = e->GetLeftFace();
E_IT[4*i+2] = remap[lf->Subdivide()->GetID()];
E_IT[4*i+3] = remap[rf->Subdivide()->GetID()];
} else {
E_IT[4*i+2] = E_IT[4*i+0];
E_IT[4*i+3] = E_IT[4*i+1];
}
} else if (!e->IsBoundary() && esharp <= 1.0f) {
// in the case of a fractional sharpness, set the adjacent faces vertices
float leftWeight, rightWeight;
HbrFace<T>* rf = e->GetRightFace();
@ -250,11 +269,14 @@ FarCatmarkSubdivisionTablesFactory<T,U>::Create( FarMeshFactory<T,U> * meshFacto
E_IT[4*i+2] = -1;
E_IT[4*i+3] = -1;
}
E_W[2*i+0] = vertWeight;
E_W[2*i+1] = faceWeight;
if (kernelType == FarKernelBatch::CATMARK_EDGE_VERTEX) {
E_W[2*i+0] = vertWeight;
E_W[2*i+1] = faceWeight;
}
}
E_IT += 4 * nEdgeVertices;
E_W += 2 * nEdgeVertices;
if (kernelType == FarKernelBatch::CATMARK_EDGE_VERTEX)
E_W += 2 * nEdgeVertices;
// Vertex vertices

View File

@ -96,6 +96,9 @@ FarDispatcher::ApplyKernel(CONTROLLER *controller, CONTEXT *context, FarKernelBa
case FarKernelBatch::CATMARK_EDGE_VERTEX:
controller->ApplyCatmarkEdgeVerticesKernel(batch, context);
break;
case FarKernelBatch::CATMARK_RESTRICTED_EDGE_VERTEX:
controller->ApplyCatmarkRestrictedEdgeVerticesKernel(batch, context);
break;
case FarKernelBatch::CATMARK_VERT_VERTEX_B:
controller->ApplyCatmarkVertexVerticesKernelB(batch, context);
break;
@ -185,6 +188,9 @@ public:
template <class CONTEXT>
void ApplyCatmarkEdgeVerticesKernel(FarKernelBatch const &batch, CONTEXT *context) const;
template <class CONTEXT>
void ApplyCatmarkRestrictedEdgeVerticesKernel(FarKernelBatch const &batch, CONTEXT *context) const;
template <class CONTEXT>
void ApplyCatmarkVertexVerticesKernelB(FarKernelBatch const &batch, CONTEXT *context) const;
@ -325,6 +331,21 @@ FarComputeController::ApplyCatmarkEdgeVerticesKernel(FarKernelBatch const &batch
vsrc );
}
template <class CONTEXT> void
FarComputeController::ApplyCatmarkRestrictedEdgeVerticesKernel(FarKernelBatch const &batch, CONTEXT *context) const {
typename CONTEXT::VertexType *vsrc = &context->GetVertices().at(0);
FarSubdivisionTables const * subdivision = context->GetSubdivisionTables();
assert(subdivision);
subdivision->computeCatmarkRestrictedEdgePoints( batch.GetVertexOffset(),
batch.GetTableOffset(),
batch.GetStart(),
batch.GetEnd(),
vsrc );
}
template <class CONTEXT> void
FarComputeController::ApplyCatmarkVertexVerticesKernelB(FarKernelBatch const &batch, CONTEXT *context) const {

View File

@ -69,6 +69,7 @@ public:
CATMARK_QUAD_FACE_VERTEX,
CATMARK_TRI_QUAD_FACE_VERTEX,
CATMARK_EDGE_VERTEX,
CATMARK_RESTRICTED_EDGE_VERTEX,
CATMARK_VERT_VERTEX_A1,
CATMARK_VERT_VERTEX_A2,
CATMARK_VERT_VERTEX_B,

View File

@ -175,6 +175,10 @@ public:
template <class U>
void computeCatmarkEdgePoints(int vertexOffset, int tableOffset, int start, int end, U * vsrc) const;
// Compute-kernel applied to vertices resulting from the refinement of a smooth or sharp edge.
template <class U>
void computeCatmarkRestrictedEdgePoints(int vertexOffset, int tableOffset, int start, int end, U * vsrc) const;
// Compute-kernel applied to vertices resulting from the refinement of a vertex
// Kernel "A" Handles the k_Crease and k_Corner rules
template <class U>
@ -471,6 +475,33 @@ FarSubdivisionTables::computeCatmarkEdgePoints( int vertexOffset, int tableOffse
}
}
//
// Restricted edge-vertices compute Kernel - completely re-entrant
//
template <class U> void
FarSubdivisionTables::computeCatmarkRestrictedEdgePoints( int vertexOffset, int tableOffset, int start, int end, U * vsrc ) const {
U * vdst = vsrc + vertexOffset + start;
for (int i=start+tableOffset; i<end+tableOffset; ++i, ++vdst ) {
vdst->Clear();
int eidx0 = this->_E_IT[4*i+0],
eidx1 = this->_E_IT[4*i+1],
eidx2 = this->_E_IT[4*i+2],
eidx3 = this->_E_IT[4*i+3];
vdst->AddWithWeight( vsrc[eidx0], 0.25f );
vdst->AddWithWeight( vsrc[eidx1], 0.25f );
vdst->AddWithWeight( vsrc[eidx2], 0.25f );
vdst->AddWithWeight( vsrc[eidx3], 0.25f );
vdst->AddVaryingWithWeight( vsrc[eidx0], 0.5f );
vdst->AddVaryingWithWeight( vsrc[eidx1], 0.5f );
}
}
//
// Vertex-vertices compute Kernels "A" and "B" - completely re-entrant
//

View File

@ -106,6 +106,8 @@ protected:
// Returns an integer based on the order in which the kernels are applied
static int GetMaskRanking( unsigned char mask0, unsigned char mask1 );
bool HasFractionalEdgeSharpness() const { return _hasFractionalEdgeSharpness; }
// Per-level counters and offsets for each type of vertex (face,edge,vert)
std::vector<int> _faceVertIdx,
_edgeVertIdx,
@ -128,6 +130,9 @@ protected:
// Number of coarse triangle faces
int _numCoarseTriangleFaces;
// Indicates if an edge has a fractional (non-integer) sharpness
bool _hasFractionalEdgeSharpness;
private:
// Returns the subdivision level of a vertex
@ -155,7 +160,8 @@ FarSubdivisionTablesFactory<T,U>::FarSubdivisionTablesFactory( HbrMesh<T> const
_vertVertsList(maxlevel+1),
_minCoarseFaceValence(0),
_maxCoarseFaceValence(0),
_numCoarseTriangleFaces(0)
_numCoarseTriangleFaces(0),
_hasFractionalEdgeSharpness(false)
{
assert( mesh );
@ -199,9 +205,12 @@ FarSubdivisionTablesFactory<T,U>::FarSubdivisionTablesFactory( HbrMesh<T> const
if (valence == 3)
++_numCoarseTriangleFaces;
}
} else if (v->GetParentEdge())
} else if (v->GetParentEdge()) {
edgeCounts[depth]++;
else if (v->GetParentVertex()) {
float sharpness = v->GetParentEdge()->GetSharpness();
if (sharpness > 0.0f && sharpness < 1.0f)
_hasFractionalEdgeSharpness = true;
} else if (v->GetParentVertex()) {
vertCounts[depth]++;
_vertVertsValenceSum+=sumVertVertexValence(v);
}
@ -461,6 +470,10 @@ FarSubdivisionTablesFactory<T,U>::Splice(FarMeshVector const &meshes, FarKernelB
}
}
// pad E_W to align with E_IT when only some meshes use CATMARK_RESTRICTED_EDGE_VERTEX kernel
if (total_E_W != 0)
total_E_W = total_E_IT / 2;
FarSubdivisionTables *result = new FarSubdivisionTables(maxLevel, scheme);
result->_F_ITa.resize(total_F_ITa);
@ -543,7 +556,10 @@ FarSubdivisionTablesFactory<T,U>::Splice(FarMeshVector const &meshes, FarKernelB
// copy edge tables
E_IT = copyWithOffsetE_IT(E_IT, tables->Get_E_IT(), vertexOffsets[i]);
E_W = copyWithOffset(E_W, tables->Get_E_W(), 0);
if (!tables->Get_E_W().empty())
E_W = copyWithOffset(E_W, tables->Get_E_W(), 0);
else
E_W += tables->Get_E_IT().size() / 2;
// copy vert tables
if (scheme == FarSubdivisionTables::CATMARK or
@ -577,6 +593,7 @@ FarSubdivisionTablesFactory<T,U>::Splice(FarMeshVector const &meshes, FarKernelB
batch._tableOffset += F_IToffsets[i];
} else if (batch._kernelType == FarKernelBatch::CATMARK_EDGE_VERTEX or
batch._kernelType == FarKernelBatch::CATMARK_RESTRICTED_EDGE_VERTEX or
batch._kernelType == FarKernelBatch::LOOP_EDGE_VERTEX or
batch._kernelType == FarKernelBatch::BILINEAR_EDGE_VERTEX) {

View File

@ -266,6 +266,34 @@ OsdCLComputeController::ApplyCatmarkEdgeVerticesKernel(
CL_CHECK_ERROR(ciErrNum, "edge kernel %d\n", ciErrNum);
}
void
OsdCLComputeController::ApplyCatmarkRestrictedEdgeVerticesKernel(
FarKernelBatch const &batch, OsdCLComputeContext const *context) const {
assert(context);
cl_int ciErrNum;
size_t globalWorkSize[1] = { (size_t)(batch.GetEnd() - batch.GetStart()) };
cl_kernel kernel = _currentBindState.kernelBundle->GetCatmarkRestrictedEdgeKernel();
cl_mem E_IT = context->GetTable(FarSubdivisionTables::E_IT)->GetDevicePtr();
clSetKernelArg(kernel, 0, sizeof(cl_mem), &_currentBindState.vertexBuffer);
clSetKernelArg(kernel, 1, sizeof(cl_mem), &_currentBindState.varyingBuffer);
clSetKernelArg(kernel, 2, sizeof(cl_mem), &E_IT);
clSetKernelArg(kernel, 3, sizeof(int), &_currentBindState.vertexDesc.offset);
clSetKernelArg(kernel, 4, sizeof(int), &_currentBindState.varyingDesc.offset);
clSetKernelArg(kernel, 5, sizeof(int), batch.GetVertexOffsetPtr());
clSetKernelArg(kernel, 6, sizeof(int), batch.GetTableOffsetPtr());
clSetKernelArg(kernel, 7, sizeof(int), batch.GetStartPtr());
clSetKernelArg(kernel, 8, sizeof(int), batch.GetEndPtr());
ciErrNum = clEnqueueNDRangeKernel(_clQueue,
kernel, 1, NULL, globalWorkSize,
NULL, 0, NULL, NULL);
CL_CHECK_ERROR(ciErrNum, "restricted edge kernel %d\n", ciErrNum);
}
void
OsdCLComputeController::ApplyCatmarkVertexVerticesKernelB(
FarKernelBatch const &batch, OsdCLComputeContext const *context) const {

View File

@ -143,6 +143,8 @@ protected:
void ApplyCatmarkEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkRestrictedEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkVertexVerticesKernelB(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkVertexVerticesKernelA1(FarKernelBatch const &batch, ComputeContext const *context) const;

View File

@ -306,6 +306,41 @@ __kernel void computeEdge(__global float *vertex,
}
}
__kernel void computeRestrictedEdge(__global float *vertex,
__global float *varying,
__global int *E_IT,
int vertexOffset, int varyingOffset,
int offset, int tableOffset,
int start, int end) {
int i = start + get_global_id(0) + tableOffset;
int vid = start + get_global_id(0) + offset;
int eidx0 = E_IT[4*i+0];
int eidx1 = E_IT[4*i+1];
int eidx2 = E_IT[4*i+2];
int eidx3 = E_IT[4*i+3];
vertex += vertexOffset;
varying += (varying ? varyingOffset :0);
struct Vertex dst;
struct Varying dstVarying;
clearVertex(&dst);
clearVarying(&dstVarying);
addWithWeight(&dst, vertex, eidx0, 0.25f);
addWithWeight(&dst, vertex, eidx1, 0.25f);
addWithWeight(&dst, vertex, eidx2, 0.25f);
addWithWeight(&dst, vertex, eidx3, 0.25f);
writeVertex(vertex, vid, &dst);
if (varying) {
addVaryingWithWeight(&dstVarying, varying, eidx0, 0.5f);
addVaryingWithWeight(&dstVarying, varying, eidx1, 0.5f);
writeVarying(varying, vid, &dstVarying);
}
}
__kernel void computeVertexA(__global float *vertex,
__global float *varying,
__global int *V_ITa,

View File

@ -54,6 +54,7 @@ OsdCLKernelBundle::OsdCLKernelBundle() :
_clCatmarkQuadFace(NULL),
_clCatmarkTriQuadFace(NULL),
_clCatmarkEdge(NULL),
_clCatmarkRestrictedEdge(NULL),
_clCatmarkVertexA(NULL),
_clCatmarkVertexB(NULL),
_clLoopEdge(NULL),
@ -80,6 +81,8 @@ OsdCLKernelBundle::~OsdCLKernelBundle() {
clReleaseKernel(_clCatmarkTriQuadFace);
if (_clCatmarkEdge)
clReleaseKernel(_clCatmarkEdge);
if (_clCatmarkRestrictedEdge)
clReleaseKernel(_clCatmarkRestrictedEdge);
if (_clCatmarkVertexA)
clReleaseKernel(_clCatmarkVertexA);
if (_clCatmarkVertexB)
@ -157,6 +160,7 @@ OsdCLKernelBundle::Compile(cl_context clContext,
_clCatmarkQuadFace = buildKernel(_clProgram, "computeQuadFace");
_clCatmarkTriQuadFace = buildKernel(_clProgram, "computeTriQuadFace");
_clCatmarkEdge = buildKernel(_clProgram, "computeEdge");
_clCatmarkRestrictedEdge = buildKernel(_clProgram, "computeRestrictedEdge");
_clCatmarkVertexA = buildKernel(_clProgram, "computeVertexA");
_clCatmarkVertexB = buildKernel(_clProgram, "computeVertexB");
_clLoopEdge = buildKernel(_clProgram, "computeEdge");

View File

@ -56,6 +56,8 @@ public:
cl_kernel GetCatmarkEdgeKernel() const { return _clCatmarkEdge; }
cl_kernel GetCatmarkRestrictedEdgeKernel() const { return _clCatmarkRestrictedEdge; }
cl_kernel GetCatmarkVertexKernelA() const { return _clCatmarkVertexA; }
cl_kernel GetCatmarkVertexKernelB() const { return _clCatmarkVertexB; }
@ -99,6 +101,7 @@ protected:
_clCatmarkQuadFace,
_clCatmarkTriQuadFace,
_clCatmarkEdge,
_clCatmarkRestrictedEdge,
_clCatmarkVertexA,
_clCatmarkVertexB,
_clLoopEdge,

View File

@ -130,6 +130,19 @@ OsdCpuComputeController::ApplyCatmarkEdgeVerticesKernel(
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
}
void
OsdCpuComputeController::ApplyCatmarkRestrictedEdgeVerticesKernel(
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {
assert(context);
OsdCpuComputeRestrictedEdge(
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
(const int*)context->GetTable(FarSubdivisionTables::E_IT)->GetBuffer(),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
}
void
OsdCpuComputeController::ApplyCatmarkVertexVerticesKernelB(
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {

View File

@ -126,6 +126,8 @@ protected:
void ApplyCatmarkEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkRestrictedEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkVertexVerticesKernelB(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkVertexVerticesKernelA1(FarKernelBatch const &batch, ComputeContext const *context) const;

View File

@ -231,6 +231,39 @@ void OsdCpuComputeEdge(
}
}
void OsdCpuComputeRestrictedEdge(
float *vertex, float *varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *E_IT, int vertexOffset, int tableOffset,
int start, int end) {
float *vertexResults = (float*)alloca(vertexDesc.length * sizeof(float));
float *varyingResults = (float*)alloca(varyingDesc.length * sizeof(float));
for (int i = start + tableOffset; i < end + tableOffset; i++) {
int eidx0 = E_IT[4*i+0];
int eidx1 = E_IT[4*i+1];
int eidx2 = E_IT[4*i+2];
int eidx3 = E_IT[4*i+3];
int dstIndex = i + vertexOffset - tableOffset;
clear(vertexResults, vertexDesc);
clear(varyingResults, varyingDesc);
addWithWeight(vertexResults, vertex, eidx0, 0.25f, vertexDesc);
addWithWeight(vertexResults, vertex, eidx1, 0.25f, vertexDesc);
addWithWeight(vertexResults, vertex, eidx2, 0.25f, vertexDesc);
addWithWeight(vertexResults, vertex, eidx3, 0.25f, vertexDesc);
addWithWeight(varyingResults, varying, eidx0, 0.5f, varyingDesc);
addWithWeight(varyingResults, varying, eidx1, 0.5f, varyingDesc);
copy(vertex, vertexResults, dstIndex, vertexDesc);
copy(varying, varyingResults, dstIndex, varyingDesc);
}
}
void OsdCpuComputeVertexA(
float *vertex, float *varying,
OsdVertexBufferDescriptor const &vertexDesc,

View File

@ -169,6 +169,13 @@ void OsdCpuComputeEdge(float *vertex, float * varying,
int vertexOffset, int tableOffset,
int start, int end);
void OsdCpuComputeRestrictedEdge(float *vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *E_IT,
int vertexOffset, int tableOffset,
int start, int end);
template<int numVertexElements>
void ComputeVertexAKernel( float *vertex,
const int *V_ITa,

View File

@ -50,6 +50,11 @@ void OsdCudaComputeEdge(float *vertex, float *varying,
int varyingLength, int varyingStride,
int *E_IT, float *E_W, int offset, int tableOffset, int start, int end);
void OsdCudaComputeRestrictedEdge(float *vertex, float *varying,
int vertexLength, int vertexStride,
int varyingLength, int varyingStride,
int *E_IT, int offset, int tableOffset, int start, int end);
void OsdCudaComputeVertexA(float *vertex, float *varying,
int vertexLength, int vertexStride,
int varyingLength, int varyingStride,
@ -244,6 +249,26 @@ OsdCudaComputeController::ApplyCatmarkEdgeVerticesKernel(
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
}
void
OsdCudaComputeController::ApplyCatmarkRestrictedEdgeVerticesKernel(
FarKernelBatch const &batch, OsdCudaComputeContext const *context) const {
assert(context);
const OsdCudaTable * E_IT = context->GetTable(FarSubdivisionTables::E_IT);
assert(E_IT);
float *vertex = _currentBindState.GetOffsettedVertexBuffer();
float *varying = _currentBindState.GetOffsettedVaryingBuffer();
OsdCudaComputeRestrictedEdge(
vertex, varying,
_currentBindState.vertexDesc.length, _currentBindState.vertexDesc.stride,
_currentBindState.varyingDesc.length, _currentBindState.varyingDesc.stride,
static_cast<int*>(E_IT->GetCudaMemory()),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
}
void
OsdCudaComputeController::ApplyCatmarkVertexVerticesKernelB(
FarKernelBatch const &batch, OsdCudaComputeContext const *context) const {

View File

@ -126,6 +126,8 @@ protected:
void ApplyCatmarkEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkRestrictedEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkVertexVerticesKernelB(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkVertexVerticesKernelA1(FarKernelBatch const &batch, ComputeContext const *context) const;

View File

@ -379,6 +379,71 @@ computeEdge(float *fVertex, float *fVarying,
}
}
template <int NUM_VERTEX_ELEMENTS, int NUM_VARYING_ELEMENTS> __global__ void
computeRestrictedEdge(float *fVertex, float *fVaryings, int *E0_IT, int offset, int tableOffset, int start, int end)
{
DeviceVertex<NUM_VERTEX_ELEMENTS> *vertex = (DeviceVertex<NUM_VERTEX_ELEMENTS>*)fVertex;
DeviceVertex<NUM_VARYING_ELEMENTS> *varyings = (DeviceVertex<NUM_VARYING_ELEMENTS>*)fVaryings;
for (int i = start + tableOffset + threadIdx.x + blockIdx.x*blockDim.x;
i < end + tableOffset;
i+= blockDim.x * gridDim.x){
int eidx0 = E0_IT[4*i+0];
int eidx1 = E0_IT[4*i+1];
int eidx2 = E0_IT[4*i+2];
int eidx3 = E0_IT[4*i+3];
DeviceVertex<NUM_VERTEX_ELEMENTS> dst;
dst.clear();
dst.addWithWeight(&vertex[eidx0], 0.25f);
dst.addWithWeight(&vertex[eidx1], 0.25f);
dst.addWithWeight(&vertex[eidx2], 0.25f);
dst.addWithWeight(&vertex[eidx3], 0.25f);
if(NUM_VARYING_ELEMENTS > 0){
DeviceVertex<NUM_VARYING_ELEMENTS> dstVarying;
dstVarying.clear();
dstVarying.addWithWeight(&varyings[eidx0], 0.5f);
dstVarying.addWithWeight(&varyings[eidx1], 0.5f);
varyings[offset+i-tableOffset] = dstVarying;
}
}
}
__global__ void
computeRestrictedEdge(float *fVertex, float *fVarying,
int vertexLength, int vertexStride,
int varyingLength, int varyingStride,
int *E0_IT, int offset, int tableOffset, int start, int end)
{
for (int i = start + tableOffset + threadIdx.x + blockIdx.x*blockDim.x;
i < end + tableOffset;i+= blockDim.x * gridDim.x) {
int eidx0 = E0_IT[4*i+0];
int eidx1 = E0_IT[4*i+1];
int eidx2 = E0_IT[4*i+2];
int eidx3 = E0_IT[4*i+3];
float *dstVertex = fVertex + (i+offset-tableOffset)*vertexStride;
clear(dstVertex, vertexLength);
addWithWeight(dstVertex, fVertex + eidx0*vertexStride, 0.25f, vertexLength);
addWithWeight(dstVertex, fVertex + eidx1*vertexStride, 0.25f, vertexLength);
addWithWeight(dstVertex, fVertex + eidx2*vertexStride, 0.25f, vertexLength);
addWithWeight(dstVertex, fVertex + eidx3*vertexStride, 0.25f, vertexLength);
if (varyingLength > 0){
float *dstVarying = fVarying + (i+offset-tableOffset)*varyingStride;
clear(dstVarying, varyingLength);
addWithWeight(dstVarying, fVarying + eidx0*varyingStride, 0.5f, varyingLength);
addWithWeight(dstVarying, fVarying + eidx1*varyingStride, 0.5f, varyingLength);
}
}
}
template <int NUM_VERTEX_ELEMENTS, int NUM_VARYING_ELEMENTS> __global__ void
computeVertexA(float *fVertex, float *fVaryings, int *V0_ITa, float *V0_S, int offset, int tableOffset, int start, int end, int pass)
{
@ -847,6 +912,22 @@ void OsdCudaComputeEdge(float *vertex, float *varying,
E_IT, E_W, offset, tableOffset, start, end);
}
void OsdCudaComputeRestrictedEdge(float *vertex, float *varying,
int vertexLength, int vertexStride,
int varyingLength, int varyingStride,
int *E_IT, int offset, int tableOffset, int start, int end)
{
//computeEdge<0, 3><<<512,32>>>(vertex, varying, E_IT, offset, start, end);
OPT_KERNEL(0, 0, computeRestrictedEdge, 512, 32, (vertex, varying, E_IT, offset, tableOffset, start, end));
OPT_KERNEL(0, 3, computeRestrictedEdge, 512, 32, (vertex, varying, E_IT, offset, tableOffset, start, end));
OPT_KERNEL(3, 0, computeRestrictedEdge, 512, 32, (vertex, varying, E_IT, offset, tableOffset, start, end));
OPT_KERNEL(3, 3, computeRestrictedEdge, 512, 32, (vertex, varying, E_IT, offset, tableOffset, start, end));
computeRestrictedEdge<<<512, 32>>>(vertex, varying,
vertexLength, vertexStride, varyingLength, varyingStride,
E_IT, offset, tableOffset, start, end);
}
void OsdCudaComputeVertexA(float *vertex, float *varying,
int vertexLength, int vertexStride,
int varyingLength, int varyingStride,

View File

@ -40,8 +40,11 @@ void
OsdD3D11ComputeTable::createBuffer(int size, const void *ptr, DXGI_FORMAT format, int numElements,
ID3D11DeviceContext *deviceContext) {
if (size == 0)
if (size == 0) {
_buffer = NULL;
_srv = NULL;
return;
}
ID3D11Device *device = NULL;
deviceContext->GetDevice(&device);

View File

@ -187,6 +187,18 @@ OsdD3D11ComputeController::ApplyCatmarkTriQuadFaceVerticesKernel(
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset);
}
void
OsdD3D11ComputeController::ApplyCatmarkRestrictedEdgeVerticesKernel(
FarKernelBatch const &batch, OsdD3D11ComputeContext const *context) const {
assert(context);
_currentBindState.kernelBundle->ApplyCatmarkRestrictedEdgeVerticesKernel(
batch.GetVertexOffset(), batch.GetTableOffset(),
batch.GetStart(), batch.GetEnd(),
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset);
}
void
OsdD3D11ComputeController::ApplyCatmarkEdgeVerticesKernel(
FarKernelBatch const &batch, OsdD3D11ComputeContext const *context) const {

View File

@ -142,6 +142,8 @@ protected:
void ApplyCatmarkEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkRestrictedEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkVertexVerticesKernelB(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkVertexVerticesKernelA1(FarKernelBatch const &batch, ComputeContext const *context) const;

View File

@ -55,6 +55,7 @@ OsdD3D11ComputeKernelBundle::OsdD3D11ComputeKernelBundle(
_kernelComputeQuadFace(0),
_kernelComputeTriQuadFace(0),
_kernelComputeEdge(0),
_kernelComputeRestrictedEdge(0),
_kernelComputeBilinearEdge(0),
_kernelComputeVertex(0),
_kernelComputeVertexA(0),
@ -74,6 +75,7 @@ OsdD3D11ComputeKernelBundle::~OsdD3D11ComputeKernelBundle() {
SAFE_RELEASE(_kernelComputeQuadFace);
SAFE_RELEASE(_kernelComputeTriQuadFace);
SAFE_RELEASE(_kernelComputeEdge);
SAFE_RELEASE(_kernelComputeRestrictedEdge);
SAFE_RELEASE(_kernelComputeBilinearEdge);
SAFE_RELEASE(_kernelComputeVertex);
SAFE_RELEASE(_kernelComputeVertexA);
@ -176,6 +178,9 @@ OsdD3D11ComputeKernelBundle::Compile(
_classLinkage->GetClassInstance(
"catmarkComputeEdge", 0, &_kernelComputeEdge);
assert(_kernelComputeEdge);
_classLinkage->GetClassInstance(
"catmarkComputeRestrictedEdge", 0, &_kernelComputeRestrictedEdge);
assert(_kernelComputeRestrictedEdge);
_classLinkage->GetClassInstance(
"bilinearComputeEdge", 0, &_kernelComputeBilinearEdge);
assert(_kernelComputeBilinearEdge);
@ -360,6 +365,22 @@ OsdD3D11ComputeKernelBundle::ApplyCatmarkEdgeVerticesKernel(
dispatchCompute(_kernelComputeEdge, args);
}
void
OsdD3D11ComputeKernelBundle::ApplyCatmarkRestrictedEdgeVerticesKernel(
int vertexOffset, int tableOffset, int start, int end,
int vertexBaseOffset, int varyingBaseOffset) {
KernelCB args;
ZeroMemory(&args, sizeof(args));
args.vertexOffset = vertexOffset;
args.tableOffset = tableOffset;
args.indexStart = start;
args.indexEnd = end;
args.vertexBaseOffset = vertexBaseOffset;
args.varyingBaseOffset = varyingBaseOffset;
dispatchCompute(_kernelComputeRestrictedEdge, args);
}
void
OsdD3D11ComputeKernelBundle::ApplyCatmarkVertexVerticesKernelB(
int vertexOffset, int tableOffset, int start, int end,

View File

@ -79,6 +79,10 @@ public:
int vertexOffset, int tableOffset, int start, int end,
int vertexBaseOffset, int varyingBaseOffset);
void ApplyCatmarkRestrictedEdgeVerticesKernel(
int vertexOffset, int tableOffset, int start, int end,
int vertexBaseOffset, int varyingBaseOffset);
void ApplyCatmarkVertexVerticesKernelB(
int vertexOffset, int tableOffset, int start, int end,
int vertexBaseOffset, int varyingBaseOffset);
@ -140,12 +144,14 @@ protected:
ID3D11ClassInstance * _kernelComputeFace; // general face-vertex kernel (all schemes)
ID3D11ClassInstance * _kernelComputeQuadFace; // quad face-vertex kernel (catmark)
ID3D11ClassInstance * _kernelComputeQuadFace; // quad face-vertex kernel (catmark scheme)
ID3D11ClassInstance * _kernelComputeTriQuadFace; // tri-quad face-vertex kernel (catmark)
ID3D11ClassInstance * _kernelComputeTriQuadFace; // tri-quad face-vertex kernel (catmark scheme)
ID3D11ClassInstance * _kernelComputeEdge; // edge-vertex kernel (catmark + loop schemes)
ID3D11ClassInstance * _kernelComputeRestrictedEdge; // edge-vertex kernel (catmark scheme)
ID3D11ClassInstance * _kernelComputeBilinearEdge; // edge-vertex kernel (bilinear scheme)
ID3D11ClassInstance * _kernelComputeVertex; // vertex-vertex kernel (bilinear scheme)

View File

@ -135,6 +135,20 @@ OsdGcdComputeController::ApplyCatmarkEdgeVerticesKernel(
_gcd_queue);
}
void
OsdGcdComputeController::ApplyCatmarkRestrictedEdgeVerticesKernel(
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {
assert(context);
OsdGcdComputeRestrictedEdge(
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
(const int*)context->GetTable(FarSubdivisionTables::E_IT)->GetBuffer(),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(),
_gcd_queue);
}
void
OsdGcdComputeController::ApplyCatmarkVertexVerticesKernelB(
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {

View File

@ -126,6 +126,8 @@ protected:
void ApplyCatmarkEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkRestrictedEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkVertexVerticesKernelB(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkVertexVerticesKernelA1(FarKernelBatch const &batch, ComputeContext const *context) const;

View File

@ -151,6 +151,29 @@ void OsdGcdComputeEdge(
vertexOffset, tableOffset, start_e, end_e);
}
void OsdGcdComputeRestrictedEdge(
float * vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *E_IT, int vertexOffset, int tableOffset, int start, int end,
dispatch_queue_t gcdq) {
const int workSize = end-start;
dispatch_apply(workSize/GCD_WORK_STRIDE, gcdq, ^(size_t blockIdx){
const int start_i = start + blockIdx*GCD_WORK_STRIDE;
const int end_i = start_i + GCD_WORK_STRIDE;
OsdCpuComputeRestrictedEdge(vertex, varying, vertexDesc, varyingDesc,
E_IT,
vertexOffset, tableOffset, start_i, end_i);
});
const int start_e = end - workSize%GCD_WORK_STRIDE;
const int end_e = end;
if (start_e < end_e)
OsdCpuComputeRestrictedEdge(vertex, varying, vertexDesc, varyingDesc,
E_IT,
vertexOffset, tableOffset, start_e, end_e);
}
void OsdGcdComputeVertexA(
float * vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
@ -281,7 +304,7 @@ void OsdGcdComputeBilinearVertex(
void OsdGcdEditVertexAdd(
float * vertex,
OsdVertexBufferDescriptor const &vertexDesc,
int primVarOffset, int primVarWidth,
int primVarOffset, int /*primVarWidth*/,
int vertexOffset, int tableOffset,
int start, int end,
const unsigned int *editIndices, const float *editValues,
@ -304,7 +327,7 @@ void OsdGcdEditVertexAdd(
void OsdGcdEditVertexSet(
float * vertex,
OsdVertexBufferDescriptor const &vertexDesc,
int primVarOffset, int primVarWidth,
int primVarOffset, int /*primVarWidth*/,
int vertexOffset, int tableOffset,
int start, int end,
const unsigned int *editIndices, const float *editValues,

View File

@ -66,6 +66,14 @@ void OsdGcdComputeEdge(float *vertex, float * varying,
int start, int end,
dispatch_queue_t gcdq);
void OsdGcdComputeRestrictedEdge(float *vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *E_IT,
int vertexOffset, int tableOffset,
int start, int end,
dispatch_queue_t gcdq);
void OsdGcdComputeVertexA(float *vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,

View File

@ -172,6 +172,17 @@ OsdGLSLComputeController::ApplyCatmarkEdgeVerticesKernel(
batch.GetStart(), batch.GetEnd());
}
void
OsdGLSLComputeController::ApplyCatmarkRestrictedEdgeVerticesKernel(
FarKernelBatch const &batch, OsdGLSLComputeContext const *context) const {
assert(context);
_currentBindState.kernelBundle->ApplyCatmarkRestrictedEdgeVerticesKernel(
batch.GetVertexOffset(), batch.GetTableOffset(),
batch.GetStart(), batch.GetEnd());
}
void
OsdGLSLComputeController::ApplyCatmarkVertexVerticesKernelB(
FarKernelBatch const &batch, OsdGLSLComputeContext const *context) const {

View File

@ -135,6 +135,8 @@ protected:
void ApplyCatmarkEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkRestrictedEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkVertexVerticesKernelB(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkVertexVerticesKernelA1(FarKernelBatch const &batch, ComputeContext const *context) const;

View File

@ -235,7 +235,7 @@ void catmarkComputeTriQuadFace()
writeVertex(vid, dst);
}
// Edge-vertices compute Kernepl
// Edge-vertices compute Kernel
subroutine(computeKernelType)
void catmarkComputeEdge()
{
@ -272,6 +272,34 @@ void catmarkComputeEdge()
writeVertex(vid, dst);
}
// Restricted edge-vertices compute Kernel
subroutine(computeKernelType)
void catmarkComputeRestrictedEdge()
{
int i = int(gl_GlobalInvocationID.x) + indexStart;
if (i >= indexEnd) return;
int vid = i + vertexOffset;
i += tableOffset;
Vertex dst;
clear(dst);
int eidx0 = _E_IT[4*i+0];
int eidx1 = _E_IT[4*i+1];
int eidx2 = _E_IT[4*i+2];
int eidx3 = _E_IT[4*i+3];
ivec4 eidx = ivec4(eidx0, eidx1, eidx2, eidx3);
addWithWeight(dst, readVertex(eidx.x), 0.25f);
addWithWeight(dst, readVertex(eidx.y), 0.25f);
addWithWeight(dst, readVertex(eidx.z), 0.25f);
addWithWeight(dst, readVertex(eidx.w), 0.25f);
addVaryingWithWeight(dst, readVertex(eidx.x), 0.5f);
addVaryingWithWeight(dst, readVertex(eidx.y), 0.5f);
writeVertex(vid, dst);
}
// Edge-vertices compute Kernel (bilinear scheme)
subroutine(computeKernelType)
void bilinearComputeEdge()

View File

@ -126,6 +126,9 @@ OsdGLSLComputeKernelBundle::Compile(
_subComputeEdge = glGetSubroutineIndex(_program,
GL_COMPUTE_SHADER,
"catmarkComputeEdge");
_subComputeRestrictedEdge = glGetSubroutineIndex(_program,
GL_COMPUTE_SHADER,
"catmarkComputeRestrictedEdge");
_subComputeBilinearEdge = glGetSubroutineIndex(_program,
GL_COMPUTE_SHADER,
"bilinearComputeEdge");
@ -259,6 +262,14 @@ OsdGLSLComputeKernelBundle::ApplyCatmarkEdgeVerticesKernel(
dispatchCompute(vertexOffset, tableOffset, start, end);
}
void
OsdGLSLComputeKernelBundle::ApplyCatmarkRestrictedEdgeVerticesKernel(
int vertexOffset, int tableOffset, int start, int end) {
glUniformSubroutinesuiv(GL_COMPUTE_SHADER, 1, &_subComputeRestrictedEdge);
dispatchCompute(vertexOffset, tableOffset, start, end);
}
void
OsdGLSLComputeKernelBundle::ApplyCatmarkVertexVerticesKernelB(
int vertexOffset, int tableOffset, int start, int end) {

View File

@ -66,6 +66,9 @@ public:
void ApplyCatmarkEdgeVerticesKernel(
int vertexOffset, int tableOffset, int start, int end);
void ApplyCatmarkRestrictedEdgeVerticesKernel(
int vertexOffset, int tableOffset, int start, int end);
void ApplyCatmarkVertexVerticesKernelB(
int vertexOffset, int tableOffset, int start, int end);
@ -136,12 +139,14 @@ protected:
GLuint _subComputeFace; // general face-vertex kernel (all schemes)
GLuint _subComputeQuadFace; // quad face-vertex kernel (catmark)
GLuint _subComputeQuadFace; // quad face-vertex kernel (catmark scheme)
GLuint _subComputeTriQuadFace; // tri-quad face-vertex kernel (catmark)
GLuint _subComputeTriQuadFace; // tri-quad face-vertex kernel (catmark scheme)
GLuint _subComputeEdge; // edge-vertex kernel (catmark + loop schemes)
GLuint _subComputeRestrictedEdge; // restricted edge-vertex kernel (catmark scheme)
GLuint _subComputeBilinearEdge; // edge-vertex kernel (bilinear scheme)
GLuint _subComputeVertex; // vertex-vertex kernel (bilinear scheme)

View File

@ -231,7 +231,6 @@ OsdGLSLTransformFeedbackComputeController::ApplyCatmarkTriQuadFaceVerticesKernel
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
}
void
OsdGLSLTransformFeedbackComputeController::ApplyCatmarkEdgeVerticesKernel(
FarKernelBatch const &batch, OsdGLSLTransformFeedbackComputeContext const *context) const {
@ -244,6 +243,18 @@ OsdGLSLTransformFeedbackComputeController::ApplyCatmarkEdgeVerticesKernel(
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
}
void
OsdGLSLTransformFeedbackComputeController::ApplyCatmarkRestrictedEdgeVerticesKernel(
FarKernelBatch const &batch, OsdGLSLTransformFeedbackComputeContext const *context) const {
assert(context);
_currentBindState.kernelBundle->ApplyCatmarkRestrictedEdgeVerticesKernel(
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset,
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
}
void
OsdGLSLTransformFeedbackComputeController::ApplyCatmarkVertexVerticesKernelB(
FarKernelBatch const &batch, OsdGLSLTransformFeedbackComputeContext const *context) const {

View File

@ -133,6 +133,8 @@ protected:
void ApplyCatmarkEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkRestrictedEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkVertexVerticesKernelB(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkVertexVerticesKernelA1(FarKernelBatch const &batch, ComputeContext const *context) const;

View File

@ -284,6 +284,35 @@ void catmarkComputeEdge()
writeVertex(dst);
}
// Restricted edge-vertices compute Kernel
subroutine(computeKernelType)
void catmarkComputeRestrictedEdge()
{
int i = gl_VertexID + indexStart + tableOffset;
Vertex dst;
clear(dst);
#ifdef OPT_E0_IT_VEC4
ivec4 eidx = texelFetch(_E0_IT, i);
#else
int eidx0 = texelFetch(_E0_IT, 4*i+0).x;
int eidx1 = texelFetch(_E0_IT, 4*i+1).x;
int eidx2 = texelFetch(_E0_IT, 4*i+2).x;
int eidx3 = texelFetch(_E0_IT, 4*i+3).x;
ivec4 eidx = ivec4(eidx0, eidx1, eidx2, eidx3);
#endif
addWithWeight(dst, readVertex(eidx.x), 0.25f);
addWithWeight(dst, readVertex(eidx.y), 0.25f);
addWithWeight(dst, readVertex(eidx.z), 0.25f);
addWithWeight(dst, readVertex(eidx.w), 0.25f);
addVaryingWithWeight(dst, readVertex(eidx.x), 0.5f);
addVaryingWithWeight(dst, readVertex(eidx.y), 0.5f);
writeVertex(dst);
}
// Edge-vertices compute Kernel (bilinear scheme)
subroutine(computeKernelType)
void bilinearComputeEdge()

View File

@ -228,6 +228,7 @@ OsdGLSLTransformFeedbackKernelBundle::Compile(
_subComputeQuadFace = glGetSubroutineIndex(_program, GL_VERTEX_SHADER, "catmarkComputeQuadFace");
_subComputeTriQuadFace = glGetSubroutineIndex(_program, GL_VERTEX_SHADER, "catmarkComputeTriQuadFace");
_subComputeEdge = glGetSubroutineIndex(_program, GL_VERTEX_SHADER, "catmarkComputeEdge");
_subComputeRestrictedEdge = glGetSubroutineIndex(_program, GL_VERTEX_SHADER, "catmarkComputeRestrictedEdge");
_subComputeBilinearEdge = glGetSubroutineIndex(_program, GL_VERTEX_SHADER, "bilinearComputeEdge");
_subComputeVertex = glGetSubroutineIndex(_program, GL_VERTEX_SHADER, "bilinearComputeVertex");
_subComputeVertexA = glGetSubroutineIndex(_program, GL_VERTEX_SHADER, "catmarkComputeVertexA");
@ -406,6 +407,18 @@ OsdGLSLTransformFeedbackKernelBundle::ApplyCatmarkEdgeVerticesKernel(
offset, tableOffset, start, end);
}
void
OsdGLSLTransformFeedbackKernelBundle::ApplyCatmarkRestrictedEdgeVerticesKernel(
GLuint vertexBuffer, GLuint varyingBuffer,
int vertexOffset, int varyingOffset,
int offset, int tableOffset, int start, int end) {
glUniformSubroutinesuiv(GL_VERTEX_SHADER, 1, &_subComputeRestrictedEdge);
transformGpuBufferData(vertexBuffer, varyingBuffer,
vertexOffset, varyingOffset,
offset, tableOffset, start, end);
}
void
OsdGLSLTransformFeedbackKernelBundle::ApplyCatmarkVertexVerticesKernelB(
GLuint vertexBuffer, GLuint varyingBuffer,

View File

@ -83,6 +83,11 @@ public:
int vertexOffset, int varyingOffset,
int offset, int tableOffset, int start, int end);
void ApplyCatmarkRestrictedEdgeVerticesKernel(
GLuint vertexBuffer, GLuint varyingBuffer,
int vertexOffset, int varyingOffset,
int offset, int tableOffset, int start, int end);
void ApplyCatmarkVertexVerticesKernelB(
GLuint vertexBuffer, GLuint varyingBuffer,
int vertexOffset, int varyingOffset,
@ -191,12 +196,14 @@ protected:
GLuint _subComputeFace; // general face-vertex kernel (all schemes)
GLuint _subComputeQuadFace; // quad face-vertex kernel (catmark)
GLuint _subComputeQuadFace; // quad face-vertex kernel (catmark scheme)
GLuint _subComputeTriQuadFace; // tri-quad face-vertex kernel (catmark)
GLuint _subComputeTriQuadFace; // tri-quad face-vertex kernel (catmark scheme)
GLuint _subComputeEdge; // edge-vertex kernel (catmark + loop schemes)
GLuint _subComputeRestrictedEdge; // restricted edge-vertex kernel (catmark scheme)
GLuint _subComputeBilinearEdge; // edge-vertex kernel (bilinear scheme)
GLuint _subComputeVertex; // vertex-vertex kernel (bilinear scheme)

View File

@ -271,6 +271,36 @@ void runKernel( uint3 ID )
}
};
// Restricted edge-vertices compute Kernel
class CatmarkComputeRestrictedEdge : IComputeKernel {
int placeholder;
void runKernel( uint3 ID )
{
int i = int(ID.x) + indexStart;
if (i >= indexEnd) return;
int vid = i + vertexOffset;
i += tableOffset;
Vertex dst;
clear(dst);
int eidx0 = _E_IT[4*i+0];
int eidx1 = _E_IT[4*i+1];
int eidx2 = _E_IT[4*i+2];
int eidx3 = _E_IT[4*i+3];
int4 eidx = int4(eidx0, eidx1, eidx2, eidx3);
addWithWeight(dst, readVertex(eidx.x), 0.25f);
addWithWeight(dst, readVertex(eidx.y), 0.25f);
addWithWeight(dst, readVertex(eidx.z), 0.25f);
addWithWeight(dst, readVertex(eidx.w), 0.25f);
addVaryingWithWeight(dst, readVertex(eidx.x), 0.5f);
addVaryingWithWeight(dst, readVertex(eidx.y), 0.5f);
writeVertex(vid, dst);
}
};
// Edge-vertices compute Kernel (bilinear scheme)
class BilinearComputeEdge : IComputeKernel {
int placeholder;

View File

@ -134,6 +134,19 @@ OsdOmpComputeController::ApplyCatmarkEdgeVerticesKernel(
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
}
void
OsdOmpComputeController::ApplyCatmarkRestrictedEdgeVerticesKernel(
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {
assert(context);
OsdOmpComputeRestrictedEdge(
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
(const int*)context->GetTable(FarSubdivisionTables::E_IT)->GetBuffer(),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
}
void
OsdOmpComputeController::ApplyCatmarkVertexVerticesKernelB(
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {

View File

@ -134,6 +134,8 @@ protected:
void ApplyCatmarkEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkRestrictedEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkVertexVerticesKernelB(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkVertexVerticesKernelA1(FarKernelBatch const &batch, ComputeContext const *context) const;

View File

@ -243,6 +243,47 @@ void OsdOmpComputeEdge(
}
}
void OsdOmpComputeRestrictedEdge(
float * vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *E_IT, int offset, int tableOffset, int start, int end) {
int numThreads = omp_get_max_threads();
float *vertexResultsArray = (float*)alloca(vertexDesc.length * sizeof(float) * numThreads);
float *varyingResultsArray = (float*)alloca(varyingDesc.length * sizeof(float) * numThreads);
#pragma omp parallel for
for (int i = start + tableOffset; i < end + tableOffset; i++) {
int eidx0 = E_IT[4*i+0];
int eidx1 = E_IT[4*i+1];
int eidx2 = E_IT[4*i+2];
int eidx3 = E_IT[4*i+3];
int dstIndex = offset + i - tableOffset;
int threadId = omp_get_thread_num();
float *vertexResults = vertexResultsArray +
vertexDesc.length * threadId;
float *varyingResults = varyingResultsArray +
varyingDesc.length * threadId;
// clear
clear(vertexResults, vertexDesc);
clear(varyingResults, varyingDesc);
addWithWeight(vertexResults, vertex, eidx0, 0.25f, vertexDesc);
addWithWeight(vertexResults, vertex, eidx1, 0.25f, vertexDesc);
addWithWeight(vertexResults, vertex, eidx2, 0.25f, vertexDesc);
addWithWeight(vertexResults, vertex, eidx3, 0.25f, vertexDesc);
addWithWeight(varyingResults, varying, eidx0, 0.5f, varyingDesc);
addWithWeight(varyingResults, varying, eidx1, 0.5f, varyingDesc);
copy(vertex, vertexResults, dstIndex, vertexDesc);
copy(varying, varyingResults, dstIndex, varyingDesc);
}
}
void OsdOmpComputeVertexA(
float * vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,

View File

@ -61,6 +61,13 @@ void OsdOmpComputeEdge(float *vertex, float * varying,
int vertexOffset, int tableOffset,
int start, int end);
void OsdOmpComputeRestrictedEdge(float *vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *E_IT,
int vertexOffset, int tableOffset,
int start, int end);
void OsdOmpComputeVertexA(float *vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,

View File

@ -140,6 +140,19 @@ OsdTbbComputeController::ApplyCatmarkEdgeVerticesKernel(
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
}
void
OsdTbbComputeController::ApplyCatmarkRestrictedEdgeVerticesKernel(
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {
assert(context);
OsdTbbComputeRestrictedEdge(
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
(const int*)context->GetTable(FarSubdivisionTables::E_IT)->GetBuffer(),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
}
void
OsdTbbComputeController::ApplyCatmarkVertexVerticesKernelB(
FarKernelBatch const &batch, OsdCpuComputeContext const *context) const {

View File

@ -126,6 +126,8 @@ protected:
void ApplyCatmarkEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkRestrictedEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkVertexVerticesKernelB(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkVertexVerticesKernelA1(FarKernelBatch const &batch, ComputeContext const *context) const;

View File

@ -378,7 +378,6 @@ public:
{};
};
void OsdTbbComputeEdge(
float *vertex, float *varying,
OsdVertexBufferDescriptor const &vertexDesc,
@ -391,6 +390,76 @@ void OsdTbbComputeEdge(
tbb::parallel_for(range, kernel);
}
class TBBRestrictedEdgeKernel {
float *vertex;
float *varying;
OsdVertexBufferDescriptor vertexDesc;
OsdVertexBufferDescriptor varyingDesc;
int const *E_IT;
int vertexOffset;
int tableOffset;
public:
void operator() (tbb::blocked_range<int> const &r) const {
for (int i = r.begin() + tableOffset; i < r.end() + tableOffset; i++) {
int eidx0 = E_IT[4*i+0];
int eidx1 = E_IT[4*i+1];
int eidx2 = E_IT[4*i+2];
int eidx3 = E_IT[4*i+3];
int dstIndex = i + vertexOffset - tableOffset;
clear(vertex, dstIndex, vertexDesc);
clear(varying, dstIndex, varyingDesc);
addWithWeight(vertex, dstIndex, eidx0, 0.25f, vertexDesc);
addWithWeight(vertex, dstIndex, eidx1, 0.25f, vertexDesc);
addWithWeight(vertex, dstIndex, eidx2, 0.25f, vertexDesc);
addWithWeight(vertex, dstIndex, eidx3, 0.25f, vertexDesc);
addWithWeight(varying, dstIndex, eidx0, 0.5f, varyingDesc);
addWithWeight(varying, dstIndex, eidx1, 0.5f, varyingDesc);
}
}
TBBRestrictedEdgeKernel(TBBRestrictedEdgeKernel const &other)
{
this->vertex = other.vertex;
this->varying= other.varying;
this->vertexDesc = other.vertexDesc;
this->varyingDesc = other.varyingDesc;
this->E_IT = other.E_IT;
this->vertexOffset = other.vertexOffset;
this->tableOffset = other.tableOffset;
}
TBBRestrictedEdgeKernel(float *vertex_in,
float *varying_in,
OsdVertexBufferDescriptor const &vertexDesc_in,
OsdVertexBufferDescriptor const &varyingDesc_in,
int const *E_IT_in,
int vertexOffset_in,
int tableOffset_in) :
vertex (vertex_in),
varying(varying_in),
vertexDesc(vertexDesc_in),
varyingDesc(varyingDesc_in),
E_IT (E_IT_in),
vertexOffset(vertexOffset_in),
tableOffset(tableOffset_in)
{};
};
void OsdTbbComputeRestrictedEdge(
float *vertex, float *varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
int const *E_IT, int vertexOffset, int tableOffset,
int start, int end) {
tbb::blocked_range<int> range(start, end, grain_size);
TBBEdgeKernel kernel(vertex, varying, vertexDesc, varyingDesc, E_IT,
vertexOffset, tableOffset);
tbb::parallel_for(range, kernel);
}
class TBBVertexKernelA {
float *vertex;
float *varying;

View File

@ -56,10 +56,17 @@ void OsdTbbComputeTriQuadFace(float * vertex, float * varying,
void OsdTbbComputeEdge(float *vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
int const *E_IT, float const *E_ITa,
int const *E_IT, float const *E_W,
int vertexOffset, int tableOffset,
int start, int end);
void OsdTbbComputeRestrictedEdge(float *vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
int const *E_IT,
int vertexOffset, int tableOffset,
int start, int end);
void OsdTbbComputeVertexA(float *vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,