Interleaved buffer support in OsdCompute. Removed OsdVertexDescriptor and replaced with OsdVertexBufferDescriptor.

All kernels take offset/length/stride to apply subdivision partially in each vertex elements.

Also the offset can be used for client-based VBO aggregation, without modifying index buffers.
This is useful for topology sharing, in conjunction with glDrawElementsBaseVertex etc.
However, gregory patch shader fetches vertex buffer via texture buffer, which index should also
be offsetted too. Although gl_BaseVertexARB extension should be able to do that job, it's a
relatively new extension. So we use OsdBaseVertex() call to mitigate the compatibility
issue as clients can provide it in their way at least for the time being.
This commit is contained in:
Takahito Tejima 2014-05-08 17:20:54 -07:00
parent 2372eb45ef
commit ee061291b7
51 changed files with 2850 additions and 1607 deletions

View File

@ -67,6 +67,10 @@ int OsdPrimitiveIdBase()
{
return PrimitiveIdBase;
}
int OsdBaseVertex()
{
return 0;
}
//--------------------------------------------------------------
// Vertex Shader

View File

@ -99,6 +99,10 @@ int OsdPrimitiveIdBase()
{
return PrimitiveIdBase;
}
int OsdBaseVertex()
{
return 0;
}
//--------------------------------------------------------------
// Vertex Shader

View File

@ -97,6 +97,10 @@ int OsdPrimitiveIdBase()
{
return PrimitiveIdBase;
}
int OsdBaseVertex()
{
return 0;
}
//--------------------------------------------------------------
// Vertex Shader

View File

@ -98,6 +98,10 @@ int OsdPrimitiveIdBase()
{
return PrimitiveIdBase;
}
int OsdBaseVertex()
{
return 0;
}
//--------------------------------------------------------------
// Geometry Shader

View File

@ -130,6 +130,10 @@ int OsdPrimitiveIdBase()
{
return PrimitiveIdBase;
}
int OsdBaseVertex()
{
return 0;
}
//--------------------------------------------------------------
// Vertex Shader

View File

@ -91,6 +91,10 @@ int OsdPrimitiveIdBase()
{
return PrimitiveIdBase;
}
int OsdBaseVertex()
{
return 0;
}
//--------------------------------------------------------------
// Vertex Shader

View File

@ -53,9 +53,7 @@ namespace OPENSUBDIV_VERSION {
OsdCLComputeController::OsdCLComputeController(cl_context clContext,
cl_command_queue queue) :
_clContext(clContext), _clQueue(queue),
_currentVertexBuffer(0), _currentVaryingBuffer(0),
_currentKernelBundle(NULL) {
_clContext(clContext), _clQueue(queue) {
}
OsdCLComputeController::~OsdCLComputeController() {
@ -73,21 +71,23 @@ OsdCLComputeController::Synchronize() {
}
OsdCLKernelBundle *
OsdCLComputeController::getKernelBundle(int numVertexElements,
int numVaryingElements) {
OsdCLComputeController::getKernelBundle(
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc) {
std::vector<OsdCLKernelBundle*>::iterator it =
std::find_if(_kernelRegistry.begin(), _kernelRegistry.end(),
OsdCLKernelBundle::Match(numVertexElements,
numVaryingElements));
OsdCLKernelBundle::Match(vertexDesc,
varyingDesc));
if (it != _kernelRegistry.end()) {
return *it;
} else {
OsdCLKernelBundle *kernelBundle = new OsdCLKernelBundle();
_kernelRegistry.push_back(kernelBundle);
kernelBundle->Compile(_clContext,
numVertexElements,
numVaryingElements);
vertexDesc,
varyingDesc);
return kernelBundle;
}
}
@ -107,17 +107,19 @@ OsdCLComputeController::ApplyBilinearEdgeVerticesKernel(
cl_int ciErrNum;
size_t globalWorkSize[1] = { (size_t)(batch.GetEnd() - batch.GetStart()) };
cl_kernel kernel = _currentKernelBundle->GetBilinearEdgeKernel();
cl_kernel kernel = _currentBindState.kernelBundle->GetBilinearEdgeKernel();
cl_mem E_IT = context->GetTable(FarSubdivisionTables::E_IT)->GetDevicePtr();
clSetKernelArg(kernel, 0, sizeof(cl_mem), &_currentVertexBuffer);
clSetKernelArg(kernel, 1, sizeof(cl_mem), &_currentVaryingBuffer);
clSetKernelArg(kernel, 0, sizeof(cl_mem), &_currentBindState.vertexBuffer);
clSetKernelArg(kernel, 1, sizeof(cl_mem), &_currentBindState.varyingBuffer);
clSetKernelArg(kernel, 2, sizeof(cl_mem), &E_IT);
clSetKernelArg(kernel, 3, sizeof(int), batch.GetVertexOffsetPtr());
clSetKernelArg(kernel, 4, sizeof(int), batch.GetTableOffsetPtr());
clSetKernelArg(kernel, 5, sizeof(int), batch.GetStartPtr());
clSetKernelArg(kernel, 6, sizeof(int), batch.GetEndPtr());
clSetKernelArg(kernel, 3, sizeof(int), &_currentBindState.vertexDesc.offset);
clSetKernelArg(kernel, 4, sizeof(int), &_currentBindState.varyingDesc.offset);
clSetKernelArg(kernel, 5, sizeof(int), batch.GetVertexOffsetPtr());
clSetKernelArg(kernel, 6, sizeof(int), batch.GetTableOffsetPtr());
clSetKernelArg(kernel, 7, sizeof(int), batch.GetStartPtr());
clSetKernelArg(kernel, 8, sizeof(int), batch.GetEndPtr());
ciErrNum = clEnqueueNDRangeKernel(_clQueue,
kernel, 1, NULL, globalWorkSize,
NULL, 0, NULL, NULL);
@ -132,17 +134,19 @@ OsdCLComputeController::ApplyBilinearVertexVerticesKernel(
cl_int ciErrNum;
size_t globalWorkSize[1] = { (size_t)(batch.GetEnd() - batch.GetStart()) };
cl_kernel kernel = _currentKernelBundle->GetBilinearVertexKernel();
cl_kernel kernel = _currentBindState.kernelBundle->GetBilinearVertexKernel();
cl_mem V_ITa = context->GetTable(FarSubdivisionTables::V_ITa)->GetDevicePtr();
clSetKernelArg(kernel, 0, sizeof(cl_mem), &_currentVertexBuffer);
clSetKernelArg(kernel, 1, sizeof(cl_mem), &_currentVaryingBuffer);
clSetKernelArg(kernel, 0, sizeof(cl_mem), &_currentBindState.vertexBuffer);
clSetKernelArg(kernel, 1, sizeof(cl_mem), &_currentBindState.varyingBuffer);
clSetKernelArg(kernel, 2, sizeof(cl_mem), &V_ITa);
clSetKernelArg(kernel, 3, sizeof(int), batch.GetVertexOffsetPtr());
clSetKernelArg(kernel, 4, sizeof(int), batch.GetTableOffsetPtr());
clSetKernelArg(kernel, 5, sizeof(int), batch.GetStartPtr());
clSetKernelArg(kernel, 6, sizeof(int), batch.GetEndPtr());
clSetKernelArg(kernel, 3, sizeof(int), &_currentBindState.vertexDesc.offset);
clSetKernelArg(kernel, 4, sizeof(int), &_currentBindState.varyingDesc.offset);
clSetKernelArg(kernel, 5, sizeof(int), batch.GetVertexOffsetPtr());
clSetKernelArg(kernel, 6, sizeof(int), batch.GetTableOffsetPtr());
clSetKernelArg(kernel, 7, sizeof(int), batch.GetStartPtr());
clSetKernelArg(kernel, 8, sizeof(int), batch.GetEndPtr());
ciErrNum = clEnqueueNDRangeKernel(_clQueue,
kernel, 1, NULL, globalWorkSize,
NULL, 0, NULL, NULL);
@ -157,19 +161,21 @@ OsdCLComputeController::ApplyCatmarkFaceVerticesKernel(
cl_int ciErrNum;
size_t globalWorkSize[1] = { (size_t)(batch.GetEnd() - batch.GetStart()) };
cl_kernel kernel = _currentKernelBundle->GetCatmarkFaceKernel();
cl_kernel kernel = _currentBindState.kernelBundle->GetCatmarkFaceKernel();
cl_mem F_IT = context->GetTable(FarSubdivisionTables::F_IT)->GetDevicePtr();
cl_mem F_ITa = context->GetTable(FarSubdivisionTables::F_ITa)->GetDevicePtr();
clSetKernelArg(kernel, 0, sizeof(cl_mem), &_currentVertexBuffer);
clSetKernelArg(kernel, 1, sizeof(cl_mem), &_currentVaryingBuffer);
clSetKernelArg(kernel, 0, sizeof(cl_mem), &_currentBindState.vertexBuffer);
clSetKernelArg(kernel, 1, sizeof(cl_mem), &_currentBindState.varyingBuffer);
clSetKernelArg(kernel, 2, sizeof(cl_mem), &F_IT);
clSetKernelArg(kernel, 3, sizeof(cl_mem), &F_ITa);
clSetKernelArg(kernel, 4, sizeof(int), batch.GetVertexOffsetPtr());
clSetKernelArg(kernel, 5, sizeof(int), batch.GetTableOffsetPtr());
clSetKernelArg(kernel, 6, sizeof(int), batch.GetStartPtr());
clSetKernelArg(kernel, 7, sizeof(int), batch.GetEndPtr());
clSetKernelArg(kernel, 4, sizeof(int), &_currentBindState.vertexDesc.offset);
clSetKernelArg(kernel, 5, sizeof(int), &_currentBindState.varyingDesc.offset);
clSetKernelArg(kernel, 6, sizeof(int), batch.GetVertexOffsetPtr());
clSetKernelArg(kernel, 7, sizeof(int), batch.GetTableOffsetPtr());
clSetKernelArg(kernel, 8, sizeof(int), batch.GetStartPtr());
clSetKernelArg(kernel, 9, sizeof(int), batch.GetEndPtr());
ciErrNum = clEnqueueNDRangeKernel(_clQueue,
kernel, 1, NULL, globalWorkSize,
@ -185,19 +191,21 @@ OsdCLComputeController::ApplyCatmarkEdgeVerticesKernel(
cl_int ciErrNum;
size_t globalWorkSize[1] = { (size_t)(batch.GetEnd() - batch.GetStart()) };
cl_kernel kernel = _currentKernelBundle->GetCatmarkEdgeKernel();
cl_kernel kernel = _currentBindState.kernelBundle->GetCatmarkEdgeKernel();
cl_mem E_IT = context->GetTable(FarSubdivisionTables::E_IT)->GetDevicePtr();
cl_mem E_W = context->GetTable(FarSubdivisionTables::E_W)->GetDevicePtr();
clSetKernelArg(kernel, 0, sizeof(cl_mem), &_currentVertexBuffer);
clSetKernelArg(kernel, 1, sizeof(cl_mem), &_currentVaryingBuffer);
clSetKernelArg(kernel, 0, sizeof(cl_mem), &_currentBindState.vertexBuffer);
clSetKernelArg(kernel, 1, sizeof(cl_mem), &_currentBindState.varyingBuffer);
clSetKernelArg(kernel, 2, sizeof(cl_mem), &E_IT);
clSetKernelArg(kernel, 3, sizeof(cl_mem), &E_W);
clSetKernelArg(kernel, 4, sizeof(int), batch.GetVertexOffsetPtr());
clSetKernelArg(kernel, 5, sizeof(int), batch.GetTableOffsetPtr());
clSetKernelArg(kernel, 6, sizeof(int), batch.GetStartPtr());
clSetKernelArg(kernel, 7, sizeof(int), batch.GetEndPtr());
clSetKernelArg(kernel, 4, sizeof(int), &_currentBindState.vertexDesc.offset);
clSetKernelArg(kernel, 5, sizeof(int), &_currentBindState.varyingDesc.offset);
clSetKernelArg(kernel, 6, sizeof(int), batch.GetVertexOffsetPtr());
clSetKernelArg(kernel, 7, sizeof(int), batch.GetTableOffsetPtr());
clSetKernelArg(kernel, 8, sizeof(int), batch.GetStartPtr());
clSetKernelArg(kernel, 9, sizeof(int), batch.GetEndPtr());
ciErrNum = clEnqueueNDRangeKernel(_clQueue,
kernel, 1, NULL, globalWorkSize,
@ -213,21 +221,23 @@ OsdCLComputeController::ApplyCatmarkVertexVerticesKernelB(
cl_int ciErrNum;
size_t globalWorkSize[1] = { (size_t)(batch.GetEnd() - batch.GetStart()) };
cl_kernel kernel = _currentKernelBundle->GetCatmarkVertexKernelB();
cl_kernel kernel = _currentBindState.kernelBundle->GetCatmarkVertexKernelB();
cl_mem V_ITa = context->GetTable(FarSubdivisionTables::V_ITa)->GetDevicePtr();
cl_mem V_IT = context->GetTable(FarSubdivisionTables::V_IT)->GetDevicePtr();
cl_mem V_W = context->GetTable(FarSubdivisionTables::V_W)->GetDevicePtr();
clSetKernelArg(kernel, 0, sizeof(cl_mem), &_currentVertexBuffer);
clSetKernelArg(kernel, 1, sizeof(cl_mem), &_currentVaryingBuffer);
clSetKernelArg(kernel, 0, sizeof(cl_mem), &_currentBindState.vertexBuffer);
clSetKernelArg(kernel, 1, sizeof(cl_mem), &_currentBindState.varyingBuffer);
clSetKernelArg(kernel, 2, sizeof(cl_mem), &V_ITa);
clSetKernelArg(kernel, 3, sizeof(cl_mem), &V_IT);
clSetKernelArg(kernel, 4, sizeof(cl_mem), &V_W);
clSetKernelArg(kernel, 5, sizeof(int), batch.GetVertexOffsetPtr());
clSetKernelArg(kernel, 6, sizeof(int), batch.GetTableOffsetPtr());
clSetKernelArg(kernel, 7, sizeof(int), batch.GetStartPtr());
clSetKernelArg(kernel, 8, sizeof(int), batch.GetEndPtr());
clSetKernelArg(kernel, 5, sizeof(int), &_currentBindState.vertexDesc.offset);
clSetKernelArg(kernel, 6, sizeof(int), &_currentBindState.varyingDesc.offset);
clSetKernelArg(kernel, 7, sizeof(int), batch.GetVertexOffsetPtr());
clSetKernelArg(kernel, 8, sizeof(int), batch.GetTableOffsetPtr());
clSetKernelArg(kernel, 9, sizeof(int), batch.GetStartPtr());
clSetKernelArg(kernel, 10, sizeof(int), batch.GetEndPtr());
ciErrNum = clEnqueueNDRangeKernel(_clQueue,
kernel, 1, NULL, globalWorkSize,
@ -244,20 +254,22 @@ OsdCLComputeController::ApplyCatmarkVertexVerticesKernelA1(
cl_int ciErrNum;
size_t globalWorkSize[1] = { (size_t)(batch.GetEnd() - batch.GetStart()) };
int ipass = false;
cl_kernel kernel = _currentKernelBundle->GetCatmarkVertexKernelA();
cl_kernel kernel = _currentBindState.kernelBundle->GetCatmarkVertexKernelA();
cl_mem V_ITa = context->GetTable(FarSubdivisionTables::V_ITa)->GetDevicePtr();
cl_mem V_W = context->GetTable(FarSubdivisionTables::V_W)->GetDevicePtr();
clSetKernelArg(kernel, 0, sizeof(cl_mem), &_currentVertexBuffer);
clSetKernelArg(kernel, 1, sizeof(cl_mem), &_currentVaryingBuffer);
clSetKernelArg(kernel, 0, sizeof(cl_mem), &_currentBindState.vertexBuffer);
clSetKernelArg(kernel, 1, sizeof(cl_mem), &_currentBindState.varyingBuffer);
clSetKernelArg(kernel, 2, sizeof(cl_mem), &V_ITa);
clSetKernelArg(kernel, 3, sizeof(cl_mem), &V_W);
clSetKernelArg(kernel, 4, sizeof(int), batch.GetVertexOffsetPtr());
clSetKernelArg(kernel, 5, sizeof(int), batch.GetTableOffsetPtr());
clSetKernelArg(kernel, 6, sizeof(int), batch.GetStartPtr());
clSetKernelArg(kernel, 7, sizeof(int), batch.GetEndPtr());
clSetKernelArg(kernel, 8, sizeof(int), &ipass);
clSetKernelArg(kernel, 4, sizeof(int), &_currentBindState.vertexDesc.offset);
clSetKernelArg(kernel, 5, sizeof(int), &_currentBindState.varyingDesc.offset);
clSetKernelArg(kernel, 6, sizeof(int), batch.GetVertexOffsetPtr());
clSetKernelArg(kernel, 7, sizeof(int), batch.GetTableOffsetPtr());
clSetKernelArg(kernel, 8, sizeof(int), batch.GetStartPtr());
clSetKernelArg(kernel, 9, sizeof(int), batch.GetEndPtr());
clSetKernelArg(kernel, 10, sizeof(int), &ipass);
ciErrNum = clEnqueueNDRangeKernel(_clQueue,
kernel, 1, NULL, globalWorkSize,
@ -274,20 +286,22 @@ OsdCLComputeController::ApplyCatmarkVertexVerticesKernelA2(
cl_int ciErrNum;
size_t globalWorkSize[1] = { (size_t)(batch.GetEnd() - batch.GetStart()) };
int ipass = true;
cl_kernel kernel = _currentKernelBundle->GetCatmarkVertexKernelA();
cl_kernel kernel = _currentBindState.kernelBundle->GetCatmarkVertexKernelA();
cl_mem V_ITa = context->GetTable(FarSubdivisionTables::V_ITa)->GetDevicePtr();
cl_mem V_W = context->GetTable(FarSubdivisionTables::V_W)->GetDevicePtr();
clSetKernelArg(kernel, 0, sizeof(cl_mem), &_currentVertexBuffer);
clSetKernelArg(kernel, 1, sizeof(cl_mem), &_currentVaryingBuffer);
clSetKernelArg(kernel, 0, sizeof(cl_mem), &_currentBindState.vertexBuffer);
clSetKernelArg(kernel, 1, sizeof(cl_mem), &_currentBindState.varyingBuffer);
clSetKernelArg(kernel, 2, sizeof(cl_mem), &V_ITa);
clSetKernelArg(kernel, 3, sizeof(cl_mem), &V_W);
clSetKernelArg(kernel, 4, sizeof(int), batch.GetVertexOffsetPtr());
clSetKernelArg(kernel, 5, sizeof(int), batch.GetTableOffsetPtr());
clSetKernelArg(kernel, 6, sizeof(int), batch.GetStartPtr());
clSetKernelArg(kernel, 7, sizeof(int), batch.GetEndPtr());
clSetKernelArg(kernel, 8, sizeof(int), &ipass);
clSetKernelArg(kernel, 4, sizeof(int), &_currentBindState.vertexDesc.offset);
clSetKernelArg(kernel, 5, sizeof(int), &_currentBindState.varyingDesc.offset);
clSetKernelArg(kernel, 6, sizeof(int), batch.GetVertexOffsetPtr());
clSetKernelArg(kernel, 7, sizeof(int), batch.GetTableOffsetPtr());
clSetKernelArg(kernel, 8, sizeof(int), batch.GetStartPtr());
clSetKernelArg(kernel, 9, sizeof(int), batch.GetEndPtr());
clSetKernelArg(kernel, 10, sizeof(int), &ipass);
ciErrNum = clEnqueueNDRangeKernel(_clQueue,
kernel, 1, NULL, globalWorkSize,
@ -303,19 +317,21 @@ OsdCLComputeController::ApplyLoopEdgeVerticesKernel(
cl_int ciErrNum;
size_t globalWorkSize[1] = { (size_t)(batch.GetEnd() - batch.GetStart()) };
cl_kernel kernel = _currentKernelBundle->GetLoopEdgeKernel();
cl_kernel kernel = _currentBindState.kernelBundle->GetLoopEdgeKernel();
cl_mem E_IT = context->GetTable(FarSubdivisionTables::E_IT)->GetDevicePtr();
cl_mem E_W = context->GetTable(FarSubdivisionTables::E_W)->GetDevicePtr();
clSetKernelArg(kernel, 0, sizeof(cl_mem), &_currentVertexBuffer);
clSetKernelArg(kernel, 1, sizeof(cl_mem), &_currentVaryingBuffer);
clSetKernelArg(kernel, 0, sizeof(cl_mem), &_currentBindState.vertexBuffer);
clSetKernelArg(kernel, 1, sizeof(cl_mem), &_currentBindState.varyingBuffer);
clSetKernelArg(kernel, 2, sizeof(cl_mem), &E_IT);
clSetKernelArg(kernel, 3, sizeof(cl_mem), &E_W);
clSetKernelArg(kernel, 4, sizeof(int), batch.GetVertexOffsetPtr());
clSetKernelArg(kernel, 5, sizeof(int), batch.GetTableOffsetPtr());
clSetKernelArg(kernel, 6, sizeof(int), batch.GetStartPtr());
clSetKernelArg(kernel, 7, sizeof(int), batch.GetEndPtr());
clSetKernelArg(kernel, 4, sizeof(int), &_currentBindState.vertexDesc.offset);
clSetKernelArg(kernel, 5, sizeof(int), &_currentBindState.varyingDesc.offset);
clSetKernelArg(kernel, 6, sizeof(int), batch.GetVertexOffsetPtr());
clSetKernelArg(kernel, 7, sizeof(int), batch.GetTableOffsetPtr());
clSetKernelArg(kernel, 8, sizeof(int), batch.GetStartPtr());
clSetKernelArg(kernel, 9, sizeof(int), batch.GetEndPtr());
ciErrNum = clEnqueueNDRangeKernel(_clQueue,
kernel, 1, NULL, globalWorkSize,
@ -331,21 +347,23 @@ OsdCLComputeController::ApplyLoopVertexVerticesKernelB(
cl_int ciErrNum;
size_t globalWorkSize[1] = { (size_t)(batch.GetEnd() - batch.GetStart()) };
cl_kernel kernel = _currentKernelBundle->GetLoopVertexKernelB();
cl_kernel kernel = _currentBindState.kernelBundle->GetLoopVertexKernelB();
cl_mem V_ITa = context->GetTable(FarSubdivisionTables::V_ITa)->GetDevicePtr();
cl_mem V_IT = context->GetTable(FarSubdivisionTables::V_IT)->GetDevicePtr();
cl_mem V_W = context->GetTable(FarSubdivisionTables::V_W)->GetDevicePtr();
clSetKernelArg(kernel, 0, sizeof(cl_mem), &_currentVertexBuffer);
clSetKernelArg(kernel, 1, sizeof(cl_mem), &_currentVaryingBuffer);
clSetKernelArg(kernel, 0, sizeof(cl_mem), &_currentBindState.vertexBuffer);
clSetKernelArg(kernel, 1, sizeof(cl_mem), &_currentBindState.varyingBuffer);
clSetKernelArg(kernel, 2, sizeof(cl_mem), &V_ITa);
clSetKernelArg(kernel, 3, sizeof(cl_mem), &V_IT);
clSetKernelArg(kernel, 4, sizeof(cl_mem), &V_W);
clSetKernelArg(kernel, 5, sizeof(int), batch.GetVertexOffsetPtr());
clSetKernelArg(kernel, 6, sizeof(int), batch.GetTableOffsetPtr());
clSetKernelArg(kernel, 7, sizeof(int), batch.GetStartPtr());
clSetKernelArg(kernel, 8, sizeof(int), batch.GetEndPtr());
clSetKernelArg(kernel, 5, sizeof(int), &_currentBindState.vertexDesc.offset);
clSetKernelArg(kernel, 6, sizeof(int), &_currentBindState.varyingDesc.offset);
clSetKernelArg(kernel, 7, sizeof(int), batch.GetVertexOffsetPtr());
clSetKernelArg(kernel, 8, sizeof(int), batch.GetTableOffsetPtr());
clSetKernelArg(kernel, 9, sizeof(int), batch.GetStartPtr());
clSetKernelArg(kernel, 10, sizeof(int), batch.GetEndPtr());
ciErrNum = clEnqueueNDRangeKernel(_clQueue,
kernel, 1, NULL, globalWorkSize,
@ -362,20 +380,22 @@ OsdCLComputeController::ApplyLoopVertexVerticesKernelA1(
cl_int ciErrNum;
size_t globalWorkSize[1] = { (size_t)(batch.GetEnd() - batch.GetStart()) };
int ipass = false;
cl_kernel kernel = _currentKernelBundle->GetLoopVertexKernelA();
cl_kernel kernel = _currentBindState.kernelBundle->GetLoopVertexKernelA();
cl_mem V_ITa = context->GetTable(FarSubdivisionTables::V_ITa)->GetDevicePtr();
cl_mem V_W = context->GetTable(FarSubdivisionTables::V_W)->GetDevicePtr();
clSetKernelArg(kernel, 0, sizeof(cl_mem), &_currentVertexBuffer);
clSetKernelArg(kernel, 1, sizeof(cl_mem), &_currentVaryingBuffer);
clSetKernelArg(kernel, 0, sizeof(cl_mem), &_currentBindState.vertexBuffer);
clSetKernelArg(kernel, 1, sizeof(cl_mem), &_currentBindState.varyingBuffer);
clSetKernelArg(kernel, 2, sizeof(cl_mem), &V_ITa);
clSetKernelArg(kernel, 3, sizeof(cl_mem), &V_W);
clSetKernelArg(kernel, 4, sizeof(int), batch.GetVertexOffsetPtr());
clSetKernelArg(kernel, 5, sizeof(int), batch.GetTableOffsetPtr());
clSetKernelArg(kernel, 6, sizeof(int), batch.GetStartPtr());
clSetKernelArg(kernel, 7, sizeof(int), batch.GetEndPtr());
clSetKernelArg(kernel, 8, sizeof(int), &ipass);
clSetKernelArg(kernel, 4, sizeof(int), &_currentBindState.vertexDesc.offset);
clSetKernelArg(kernel, 5, sizeof(int), &_currentBindState.varyingDesc.offset);
clSetKernelArg(kernel, 6, sizeof(int), batch.GetVertexOffsetPtr());
clSetKernelArg(kernel, 7, sizeof(int), batch.GetTableOffsetPtr());
clSetKernelArg(kernel, 8, sizeof(int), batch.GetStartPtr());
clSetKernelArg(kernel, 9, sizeof(int), batch.GetEndPtr());
clSetKernelArg(kernel, 10, sizeof(int), &ipass);
ciErrNum = clEnqueueNDRangeKernel(_clQueue,
kernel, 1, NULL, globalWorkSize,
@ -392,20 +412,22 @@ OsdCLComputeController::ApplyLoopVertexVerticesKernelA2(
cl_int ciErrNum;
size_t globalWorkSize[1] = { (size_t)(batch.GetEnd() - batch.GetStart()) };
int ipass = true;
cl_kernel kernel = _currentKernelBundle->GetLoopVertexKernelA();
cl_kernel kernel = _currentBindState.kernelBundle->GetLoopVertexKernelA();
cl_mem V_ITa = context->GetTable(FarSubdivisionTables::V_ITa)->GetDevicePtr();
cl_mem V_W = context->GetTable(FarSubdivisionTables::V_W)->GetDevicePtr();
clSetKernelArg(kernel, 0, sizeof(cl_mem), &_currentVertexBuffer);
clSetKernelArg(kernel, 1, sizeof(cl_mem), &_currentVaryingBuffer);
clSetKernelArg(kernel, 0, sizeof(cl_mem), &_currentBindState.vertexBuffer);
clSetKernelArg(kernel, 1, sizeof(cl_mem), &_currentBindState.varyingBuffer);
clSetKernelArg(kernel, 2, sizeof(cl_mem), &V_ITa);
clSetKernelArg(kernel, 3, sizeof(cl_mem), &V_W);
clSetKernelArg(kernel, 4, sizeof(int), batch.GetVertexOffsetPtr());
clSetKernelArg(kernel, 5, sizeof(int), batch.GetTableOffsetPtr());
clSetKernelArg(kernel, 6, sizeof(int), batch.GetStartPtr());
clSetKernelArg(kernel, 7, sizeof(int), batch.GetEndPtr());
clSetKernelArg(kernel, 8, sizeof(int), &ipass);
clSetKernelArg(kernel, 4, sizeof(int), &_currentBindState.vertexDesc.offset);
clSetKernelArg(kernel, 5, sizeof(int), &_currentBindState.varyingDesc.offset);
clSetKernelArg(kernel, 6, sizeof(int), batch.GetVertexOffsetPtr());
clSetKernelArg(kernel, 7, sizeof(int), batch.GetTableOffsetPtr());
clSetKernelArg(kernel, 8, sizeof(int), batch.GetStartPtr());
clSetKernelArg(kernel, 9, sizeof(int), batch.GetEndPtr());
clSetKernelArg(kernel, 10, sizeof(int), &ipass);
ciErrNum = clEnqueueNDRangeKernel(_clQueue,
kernel, 1, NULL, globalWorkSize,
@ -434,17 +456,18 @@ OsdCLComputeController::ApplyVertexEdits(
int primvarWidth = edit->GetPrimvarWidth();
if (edit->GetOperation() == FarVertexEdit::Add) {
cl_kernel kernel = _currentKernelBundle->GetVertexEditAdd();
cl_kernel kernel = _currentBindState.kernelBundle->GetVertexEditAdd();
clSetKernelArg(kernel, 0, sizeof(cl_mem), &_currentVertexBuffer);
clSetKernelArg(kernel, 0, sizeof(cl_mem), &_currentBindState.vertexBuffer);
clSetKernelArg(kernel, 1, sizeof(cl_mem), &indices);
clSetKernelArg(kernel, 2, sizeof(cl_mem), &values);
clSetKernelArg(kernel, 3, sizeof(int), &primvarOffset);
clSetKernelArg(kernel, 4, sizeof(int), &primvarWidth);
clSetKernelArg(kernel, 5, sizeof(int), batch.GetVertexOffsetPtr());
clSetKernelArg(kernel, 6, sizeof(int), batch.GetTableOffsetPtr());
clSetKernelArg(kernel, 7, sizeof(int), batch.GetStartPtr());
clSetKernelArg(kernel, 8, sizeof(int), batch.GetEndPtr());
clSetKernelArg(kernel, 3, sizeof(int), &_currentBindState.vertexDesc.offset);
clSetKernelArg(kernel, 4, sizeof(int), &primvarOffset);
clSetKernelArg(kernel, 5, sizeof(int), &primvarWidth);
clSetKernelArg(kernel, 6, sizeof(int), batch.GetVertexOffsetPtr());
clSetKernelArg(kernel, 7, sizeof(int), batch.GetTableOffsetPtr());
clSetKernelArg(kernel, 8, sizeof(int), batch.GetStartPtr());
clSetKernelArg(kernel, 9, sizeof(int), batch.GetEndPtr());
ciErrNum = clEnqueueNDRangeKernel(_clQueue,
kernel, 1, NULL, globalWorkSize,

View File

@ -29,6 +29,7 @@
#include "../far/dispatcher.h"
#include "../osd/clComputeContext.h"
#include "../osd/vertexDescriptor.h"
#if defined(__APPLE__)
#include <OpenCL/opencl.h>
@ -79,15 +80,25 @@ public:
///
/// @param varyingBuffer varying-interpolated data buffer
///
/// @param vertexDesc the descriptor of vertex elements to be refined.
/// if it's null, all primvars in the vertex buffer
/// will be refined.
///
/// @param varyingDesc the descriptor of varying elements to be refined.
/// if it's null, all primvars in the varying buffer
/// will be refined.
///
template<class VERTEX_BUFFER, class VARYING_BUFFER>
void Refine(ComputeContext const *context,
FarKernelBatchVector const &batches,
VERTEX_BUFFER *vertexBuffer,
VARYING_BUFFER *varyingBuffer) {
VARYING_BUFFER *varyingBuffer,
OsdVertexBufferDescriptor const *vertexDesc=NULL,
OsdVertexBufferDescriptor const *varyingDesc=NULL) {
if (batches.empty()) return;
bind(vertexBuffer, varyingBuffer);
bind(vertexBuffer, varyingBuffer, vertexDesc, varyingDesc);
FarDispatcher::Refine(this, context, batches, /*maxlevel*/-1);
@ -152,33 +163,63 @@ protected:
void ApplyVertexEdits(FarKernelBatch const &batch, ComputeContext const *context) const;
OsdCLKernelBundle * getKernelBundle(int numVertexElements,
int numVaryingElements);
OsdCLKernelBundle * getKernelBundle(
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc);
template<class VERTEX_BUFFER, class VARYING_BUFFER>
void bind(VERTEX_BUFFER *vertex, VARYING_BUFFER *varying) {
void bind(VERTEX_BUFFER *vertex, VARYING_BUFFER *varying,
OsdVertexBufferDescriptor const *vertexDesc,
OsdVertexBufferDescriptor const *varyingDesc) {
int numVertexElements = vertex ? vertex->GetNumElements() : 0;
int numVaryingElements = varying ? varying->GetNumElements() : 0;
// if the vertex buffer descriptor is specified, use it.
// otherwise, assumes the data is tightly packed in the vertex buffer.
if (vertexDesc) {
_currentBindState.vertexDesc = *vertexDesc;
} else {
int numElements = vertex ? vertex->GetNumElements() : 0;
_currentBindState.vertexDesc = OsdVertexBufferDescriptor(
0, numElements, numElements);
}
if (varyingDesc) {
_currentBindState.varyingDesc = *varyingDesc;
} else {
int numElements = varying ? varying->GetNumElements() : 0;
_currentBindState.varyingDesc = OsdVertexBufferDescriptor(
0, numElements, numElements);
}
_currentVertexBuffer = vertex ? vertex->BindCLBuffer(_clQueue) : NULL;
_currentVaryingBuffer = varying ? varying->BindCLBuffer(_clQueue) : NULL;
_currentKernelBundle = getKernelBundle(numVertexElements, numVaryingElements);
_currentBindState.vertexBuffer = vertex ? vertex->BindCLBuffer(_clQueue) : 0;
_currentBindState.varyingBuffer = varying ? varying->BindCLBuffer(_clQueue) : 0;
_currentBindState.kernelBundle = getKernelBundle(_currentBindState.vertexDesc,
_currentBindState.varyingDesc);
}
void unbind() {
_currentVertexBuffer = NULL;
_currentVaryingBuffer = NULL;
_currentKernelBundle = NULL;
_currentBindState.Reset();
}
private:
struct BindState {
BindState() : vertexBuffer(NULL), varyingBuffer(NULL), kernelBundle(NULL) {}
void Reset() {
vertexBuffer = varyingBuffer = NULL;
vertexDesc.Reset();
varyingDesc.Reset();
kernelBundle = NULL;
}
cl_mem vertexBuffer;
cl_mem varyingBuffer;
OsdVertexBufferDescriptor vertexDesc;
OsdVertexBufferDescriptor varyingDesc;
OsdCLKernelBundle *kernelBundle;
};
BindState _currentBindState;
cl_context _clContext;
cl_command_queue _clQueue;
std::vector<OsdCLKernelBundle *> _kernelRegistry;
cl_mem _currentVertexBuffer, _currentVaryingBuffer;
OsdCLKernelBundle *_currentKernelBundle;
};
} // end namespace OPENSUBDIV_VERSION

View File

@ -28,12 +28,12 @@
struct Vertex
{
float v[NUM_VERTEX_ELEMENTS];
float v[VERTEX_STRIDE];
};
struct Varying
{
float v[NUM_VARYING_ELEMENTS];
float v[VARYING_STRIDE];
};
static void clearVertex(struct Vertex *vertex) {
@ -49,86 +49,121 @@ static void clearVarying(struct Varying *varying) {
}
}
static void addWithWeight(struct Vertex *dst, __global struct Vertex *src, float weight) {
static void addWithWeight(struct Vertex *dst,
__global float *srcOrigin,
int index, float weight) {
for (int i = 0; i < NUM_VERTEX_ELEMENTS; i++) {
dst->v[i] += src->v[i] * weight;
__global float *src = srcOrigin + index * VERTEX_STRIDE;
for (int i = 0; i < NUM_VERTEX_ELEMENTS; ++i) {
dst->v[i] += src[i] * weight;
}
}
static void addVaryingWithWeight(struct Varying *dst, __global struct Varying *src, float weight) {
static void addVaryingWithWeight(struct Varying *dst,
__global float *srcOrigin,
int index, float weight) {
for (int i = 0; i < NUM_VARYING_ELEMENTS; i++) {
dst->v[i] += src->v[i] * weight;
__global float *src = srcOrigin + index * VARYING_STRIDE;
for (int i = 0; i < NUM_VARYING_ELEMENTS; ++i) {
dst->v[i] += src[i] * weight;
}
}
__kernel void computeBilinearEdge(__global struct Vertex *vertex,
__global struct Varying *varying,
static void writeVertex(__global float *dstOrigin,
int index,
struct Vertex *src) {
__global float *dst = dstOrigin + index * VERTEX_STRIDE;
for (int i = 0; i < NUM_VERTEX_ELEMENTS; ++i) {
dst[i] = src->v[i];
}
}
static void writeVarying(__global float *dstOrigin,
int index,
struct Varying *src) {
__global float *dst = dstOrigin + index * VARYING_STRIDE;
for (int i = 0; i < NUM_VARYING_ELEMENTS; ++i) {
dst[i] = src->v[i];
}
}
__kernel void computeBilinearEdge(__global float *vertex,
__global float *varying,
__global int *E_IT,
int vertexOffset, int tableOffset,
int vertexOffset, int varyingOffset,
int offset, int tableOffset,
int start, int end) {
int i = start + get_global_id(0) + tableOffset;
int vid = start + get_global_id(0) + vertexOffset;
int vid = start + get_global_id(0) + offset;
int eidx0 = E_IT[2*i+0];
int eidx1 = E_IT[2*i+1];
vertex += vertexOffset;
varying += (varying ? varyingOffset :0);
struct Vertex dst;
struct Varying dstVarying;
clearVertex(&dst);
clearVarying(&dstVarying);
addWithWeight(&dst, &vertex[eidx0], 0.5f);
addWithWeight(&dst, &vertex[eidx1], 0.5f);
addWithWeight(&dst, vertex, eidx0, 0.5f);
addWithWeight(&dst, vertex, eidx1, 0.5f);
vertex[vid] = dst;
writeVertex(vertex, vid, &dst);
if (varying) {
addVaryingWithWeight(&dstVarying, &varying[eidx0], 0.5f);
addVaryingWithWeight(&dstVarying, &varying[eidx1], 0.5f);
varying[vid] = dstVarying;
addVaryingWithWeight(&dstVarying, varying, eidx0, 0.5f);
addVaryingWithWeight(&dstVarying, varying, eidx1, 0.5f);
writeVarying(varying, vid, &dstVarying);
}
}
__kernel void computeBilinearVertex(__global struct Vertex *vertex,
__global struct Varying *varying,
__kernel void computeBilinearVertex(__global float *vertex,
__global float *varying,
__global int *V_ITa,
int vertexOffset, int tableOffset,
int vertexOffset, int varyingOffset,
int offset, int tableOffset,
int start, int end) {
int i = start + get_global_id(0) + tableOffset;
int vid = start + get_global_id(0) + vertexOffset;
int vid = start + get_global_id(0) + offset;
vertex += vertexOffset;
varying += (varying ? varyingOffset :0);
int p = V_ITa[i];
struct Vertex dst;
clearVertex(&dst);
addWithWeight(&dst, &vertex[p], 1.0f);
addWithWeight(&dst, vertex, p, 1.0f);
vertex[vid] = dst;
writeVertex(vertex, vid, &dst);
if (varying) {
struct Varying dstVarying;
clearVarying(&dstVarying);
addVaryingWithWeight(&dstVarying, &varying[p], 1.0f);
varying[vid] = dstVarying;
addVaryingWithWeight(&dstVarying, varying, p, 1.0f);
writeVarying(varying, vid, &dstVarying);
}
}
// ----------------------------------------------------------------------------------------
// ---------------------------------------------------------------------------
__kernel void computeFace(__global struct Vertex *vertex,
__global struct Varying *varying,
__kernel void computeFace(__global float *vertex,
__global float *varying,
__global int *F_IT,
__global int *F_ITa,
int vertexOffset, int tableOffset,
int vertexOffset, int varyingOffset,
int offset, int tableOffset,
int start, int end) {
int i = start + get_global_id(0) + tableOffset;
int vid = start + get_global_id(0) + vertexOffset;
int vid = start + get_global_id(0) + offset;
int h = F_ITa[2*i];
int n = F_ITa[2*i+1];
vertex += vertexOffset;
varying += (varying ? varyingOffset :0);
float weight = 1.0f/n;
@ -138,26 +173,31 @@ __kernel void computeFace(__global struct Vertex *vertex,
clearVarying(&dstVarying);
for (int j=0; j<n; ++j) {
int index = F_IT[h+j];
addWithWeight(&dst, &vertex[index], weight);
if(varying) addVaryingWithWeight(&dstVarying, &varying[index], weight);
addWithWeight(&dst, vertex, index, weight);
if (varying) {
addVaryingWithWeight(&dstVarying, varying, index, weight);
}
}
vertex[vid] = dst;
if (varying) varying[vid] = dstVarying;
writeVertex(vertex, vid, &dst);
if (varying) writeVarying(varying, vid, &dstVarying);
}
__kernel void computeEdge(__global struct Vertex *vertex,
__global struct Varying *varying,
__kernel void computeEdge(__global float *vertex,
__global float *varying,
__global int *E_IT,
__global float *E_W,
int vertexOffset, int tableOffset,
int vertexOffset, int varyingOffset,
int offset, int tableOffset,
int start, int end) {
int i = start + get_global_id(0) + tableOffset;
int vid = start + get_global_id(0) + vertexOffset;
int vid = start + get_global_id(0) + offset;
int eidx0 = E_IT[4*i+0];
int eidx1 = E_IT[4*i+1];
int eidx2 = E_IT[4*i+2];
int eidx3 = E_IT[4*i+3];
vertex += vertexOffset;
varying += (varying ? varyingOffset :0);
float vertWeight = E_W[i*2+0];
@ -167,38 +207,41 @@ __kernel void computeEdge(__global struct Vertex *vertex,
clearVertex(&dst);
clearVarying(&dstVarying);
addWithWeight(&dst, &vertex[eidx0], vertWeight);
addWithWeight(&dst, &vertex[eidx1], vertWeight);
addWithWeight(&dst, vertex, eidx0, vertWeight);
addWithWeight(&dst, vertex, eidx1, vertWeight);
if (eidx2 > -1) {
float faceWeight = E_W[i*2+1];
addWithWeight(&dst, &vertex[eidx2], faceWeight);
addWithWeight(&dst, &vertex[eidx3], faceWeight);
addWithWeight(&dst, vertex, eidx2, faceWeight);
addWithWeight(&dst, vertex, eidx3, faceWeight);
}
vertex[vid] = dst;
writeVertex(vertex, vid, &dst);
if (varying) {
addVaryingWithWeight(&dstVarying, &varying[eidx0], 0.5f);
addVaryingWithWeight(&dstVarying, &varying[eidx1], 0.5f);
varying[vid] = dstVarying;
addVaryingWithWeight(&dstVarying, varying, eidx0, 0.5f);
addVaryingWithWeight(&dstVarying, varying, eidx1, 0.5f);
writeVarying(varying, vid, &dstVarying);
}
}
__kernel void computeVertexA(__global struct Vertex *vertex,
__global struct Varying *varying,
__kernel void computeVertexA(__global float *vertex,
__global float *varying,
__global int *V_ITa,
__global float *V_W,
int vertexOffset, int tableOffset,
int vertexOffset, int varyingOffset,
int offset, int tableOffset,
int start, int end, int pass) {
int i = start + get_global_id(0) + tableOffset;
int vid = start + get_global_id(0) + vertexOffset;
int vid = start + get_global_id(0) + offset;
int n = V_ITa[5*i+1];
int p = V_ITa[5*i+2];
int eidx0 = V_ITa[5*i+3];
int eidx1 = V_ITa[5*i+4];
vertex += vertexOffset;
varying += (varying ? varyingOffset :0);
float weight = (pass==1) ? V_W[i] : 1.0f - V_W[i];
@ -209,41 +252,43 @@ __kernel void computeVertexA(__global struct Vertex *vertex,
weight=1.0f-weight;
struct Vertex dst;
if (! pass)
clearVertex(&dst);
else
dst = vertex[vid];
clearVertex(&dst);
if (pass)
addWithWeight(&dst, vertex, vid, 1.0f); // copy previous result
if (eidx0==-1 || (pass==0 && (n==-1)) ) {
addWithWeight(&dst, &vertex[p], weight);
addWithWeight(&dst, vertex, p, weight);
} else {
addWithWeight(&dst, &vertex[p], weight * 0.75f);
addWithWeight(&dst, &vertex[eidx0], weight * 0.125f);
addWithWeight(&dst, &vertex[eidx1], weight * 0.125f);
addWithWeight(&dst, vertex, p, weight * 0.75f);
addWithWeight(&dst, vertex, eidx0, weight * 0.125f);
addWithWeight(&dst, vertex, eidx1, weight * 0.125f);
}
vertex[vid] = dst;
writeVertex(vertex, vid, &dst);
if (! pass && varying) {
struct Varying dstVarying;
clearVarying(&dstVarying);
addVaryingWithWeight(&dstVarying, &varying[p], 1.0f);
varying[vid] = dstVarying;
addVaryingWithWeight(&dstVarying, varying, p, 1.0f);
writeVarying(varying, vid, &dstVarying);
}
}
__kernel void computeVertexB(__global struct Vertex *vertex,
__global struct Varying *varying,
__kernel void computeVertexB(__global float *vertex,
__global float *varying,
__global int *V_ITa,
__global int *V_IT,
__global float *V_W,
int vertexOffset, int tableOffset,
int vertexOffset, int varyingOffset,
int offset, int tableOffset,
int start, int end) {
int i = start + get_global_id(0) + tableOffset;
int vid = start + get_global_id(0) + vertexOffset;
int vid = start + get_global_id(0) + offset;
int h = V_ITa[5*i];
int n = V_ITa[5*i+1];
int p = V_ITa[5*i+2];
vertex += vertexOffset;
varying += (varying ? varyingOffset :0);
float weight = V_W[i];
float wp = 1.0f/(float)(n*n);
@ -252,35 +297,38 @@ __kernel void computeVertexB(__global struct Vertex *vertex,
struct Vertex dst;
clearVertex(&dst);
addWithWeight(&dst, &vertex[p], weight * wv);
addWithWeight(&dst, vertex, p, weight * wv);
for (int j = 0; j < n; ++j) {
addWithWeight(&dst, &vertex[V_IT[h+j*2]], weight * wp);
addWithWeight(&dst, &vertex[V_IT[h+j*2+1]], weight * wp);
addWithWeight(&dst, vertex, V_IT[h+j*2], weight * wp);
addWithWeight(&dst, vertex, V_IT[h+j*2+1], weight * wp);
}
vertex[vid] = dst;
writeVertex(vertex, vid, &dst);
if (varying) {
struct Varying dstVarying;
clearVarying(&dstVarying);
addVaryingWithWeight(&dstVarying, &varying[p], 1.0f);
varying[vid] = dstVarying;
addVaryingWithWeight(&dstVarying, varying, p, 1.0f);
writeVarying(varying, vid, &dstVarying);
}
}
__kernel void computeLoopVertexB(__global struct Vertex *vertex,
__global struct Varying *varying,
__kernel void computeLoopVertexB(__global float *vertex,
__global float *varying,
__global int *V_ITa,
__global int *V_IT,
__global float *V_W,
int vertexOffset, int tableOffset,
int vertexOffset, int varyingOffset,
int offset, int tableOffset,
int start, int end) {
int i = start + get_global_id(0) + tableOffset;
int vid = start + get_global_id(0) + vertexOffset;
int vid = start + get_global_id(0) + offset;
int h = V_ITa[5*i];
int n = V_ITa[5*i+1];
int p = V_ITa[5*i+2];
vertex += vertexOffset;
varying += (varying ? varyingOffset :0);
float weight = V_W[i];
float wp = 1.0f/(float)(n);
@ -290,36 +338,37 @@ __kernel void computeLoopVertexB(__global struct Vertex *vertex,
struct Vertex dst;
clearVertex(&dst);
addWithWeight(&dst, &vertex[p], weight * (1.0f - (beta * n)));
addWithWeight(&dst, vertex, p, weight * (1.0f - (beta * n)));
for (int j = 0; j < n; ++j) {
addWithWeight(&dst, &vertex[V_IT[h+j]], weight * beta);
addWithWeight(&dst, vertex, V_IT[h+j], weight * beta);
}
vertex[vid] = dst;
writeVertex(vertex, vid, &dst);
if (varying) {
struct Varying dstVarying;
clearVarying(&dstVarying);
addVaryingWithWeight(&dstVarying, &varying[p], 1.0f);
varying[vid] = dstVarying;
addVaryingWithWeight(&dstVarying, varying, p, 1.0f);
writeVarying(varying, vid, &dstVarying);
}
}
__kernel void editVertexAdd(__global struct Vertex *vertex,
__kernel void editVertexAdd(__global float *vertex,
__global int *editIndices,
__global float *editValues,
int vertexOffset,
int primVarOffset,
int primVarWidth,
int vertexOffset, int tableOffset,
int offset, int tableOffset,
int start, int end) {
int i = start + get_global_id(0) + tableOffset;
int v = editIndices[i];
int eid = start + get_global_id(0);
struct Vertex dst = vertex[v];
vertex += vertexOffset;
vertex += v * VERTEX_STRIDE + primVarOffset;
for (int j = 0; j < primVarWidth; ++j) {
dst.v[j+primVarOffset] += editValues[eid*primVarWidth + j];
vertex[j] += editValues[eid*primVarWidth + j];
}
vertex[v] = dst;
}

View File

@ -28,6 +28,8 @@
#include "../osd/error.h"
#include <stdio.h>
#include <sstream>
#ifdef _MSC_VER
#define snprintf _snprintf
#endif
@ -54,8 +56,11 @@ OsdCLKernelBundle::OsdCLKernelBundle() :
_clCatmarkVertexB(NULL),
_clLoopEdge(NULL),
_clLoopVertexA(NULL),
_clLoopVertexB(NULL)
{
_clLoopVertexB(NULL),
_numVertexElements(0),
_vertexStride(0),
_numVaryingElements(0),
_varyingStride(0) {
}
OsdCLKernelBundle::~OsdCLKernelBundle() {
@ -97,19 +102,24 @@ static cl_kernel buildKernel(cl_program prog, const char * name) {
bool
OsdCLKernelBundle::Compile(cl_context clContext,
int numVertexElements, int numVaryingElements) {
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc) {
cl_int ciErrNum;
_vdesc.Set( numVertexElements, numVaryingElements );
_numVertexElements = vertexDesc.length;
_vertexStride = vertexDesc.stride;
_numVaryingElements = varyingDesc.length;
_varyingStride = varyingDesc.stride;
char constantDefine[256];
snprintf(constantDefine, sizeof(constantDefine),
"#define NUM_VERTEX_ELEMENTS %d\n"
"#define NUM_VARYING_ELEMENTS %d\n",
numVertexElements, numVaryingElements);
std::ostringstream defines;
defines << "#define NUM_VERTEX_ELEMENTS " << _numVertexElements << "\n"
<< "#define VERTEX_STRIDE " << _vertexStride << "\n"
<< "#define NUM_VARYING_ELEMENTS " << _numVaryingElements << "\n"
<< "#define VARYING_STRIDE " << _varyingStride << "\n";
std::string defineStr = defines.str();
const char *sources[] = { constantDefine, clSource };
const char *sources[] = { defineStr.c_str(), clSource };
_clProgram = clCreateProgramWithSource(clContext, 2, sources, 0, &ciErrNum);
CL_CHECK_ERROR(ciErrNum, "clCreateProgramWithSource\n");
@ -131,6 +141,7 @@ OsdCLKernelBundle::Compile(cl_context clContext,
OsdError(OSD_CL_PROGRAM_BUILD_ERROR, cBuildLog);
}
delete[] devices;
return false;
}

View File

@ -47,7 +47,8 @@ public:
~OsdCLKernelBundle();
bool Compile(cl_context clContext,
int numVertexElements, int numVaryingElements);
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc);
cl_kernel GetBilinearEdgeKernel() const { return _clBilinearEdge; }
@ -70,17 +71,23 @@ public:
cl_kernel GetVertexEditAdd() const { return _clVertexEditAdd; }
struct Match {
/// Constructor
Match(int numVertexElements, int numVaryingElements)
: vdesc(numVertexElements, numVaryingElements) {
Match(OsdVertexBufferDescriptor const &vertex,
OsdVertexBufferDescriptor const &varying)
: vertexDesc(vertex), varyingDesc(varying) {
}
bool operator() (OsdCLKernelBundle const *kernel) {
return vdesc == kernel->_vdesc;
// offset is dynamic. just comparing length and stride here,
// returns true if they are equal
return (vertexDesc.length == kernel->_numVertexElements and
vertexDesc.stride == kernel->_vertexStride and
varyingDesc.length == kernel->_numVaryingElements and
varyingDesc.stride == kernel->_varyingStride);
}
OsdVertexDescriptor vdesc;
OsdVertexBufferDescriptor vertexDesc;
OsdVertexBufferDescriptor varyingDesc;
};
friend struct Match;
@ -99,7 +106,10 @@ protected:
_clLoopVertexB,
_clVertexEditAdd;
OsdVertexDescriptor _vdesc;
int _numVertexElements;
int _vertexStride;
int _numVaryingElements;
int _varyingStride;
};
} // end namespace OPENSUBDIV_VERSION

View File

@ -30,8 +30,7 @@ namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
OsdCpuComputeController::OsdCpuComputeController() :
_currentVertexBuffer(NULL), _currentVaryingBuffer(NULL) {
OsdCpuComputeController::OsdCpuComputeController() {
}
OsdCpuComputeController::~OsdCpuComputeController() {
@ -44,7 +43,8 @@ OsdCpuComputeController::ApplyBilinearFaceVerticesKernel(
assert(context);
OsdCpuComputeFace(
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
(const int*)context->GetTable(FarSubdivisionTables::F_IT)->GetBuffer(),
(const int*)context->GetTable(FarSubdivisionTables::F_ITa)->GetBuffer(),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
@ -57,7 +57,8 @@ OsdCpuComputeController::ApplyBilinearEdgeVerticesKernel(
assert(context);
OsdCpuComputeBilinearEdge(
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
(const int*)context->GetTable(FarSubdivisionTables::E_IT)->GetBuffer(),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
}
@ -69,7 +70,8 @@ OsdCpuComputeController::ApplyBilinearVertexVerticesKernel(
assert(context);
OsdCpuComputeBilinearVertex(
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
}
@ -81,7 +83,8 @@ OsdCpuComputeController::ApplyCatmarkFaceVerticesKernel(
assert(context);
OsdCpuComputeFace(
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
(const int*)context->GetTable(FarSubdivisionTables::F_IT)->GetBuffer(),
(const int*)context->GetTable(FarSubdivisionTables::F_ITa)->GetBuffer(),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
@ -94,7 +97,8 @@ OsdCpuComputeController::ApplyCatmarkEdgeVerticesKernel(
assert(context);
OsdCpuComputeEdge(
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
(const int*)context->GetTable(FarSubdivisionTables::E_IT)->GetBuffer(),
(const float*)context->GetTable(FarSubdivisionTables::E_W)->GetBuffer(),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
@ -107,7 +111,8 @@ OsdCpuComputeController::ApplyCatmarkVertexVerticesKernelB(
assert(context);
OsdCpuComputeVertexB(
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
(const int*)context->GetTable(FarSubdivisionTables::V_IT)->GetBuffer(),
(const float*)context->GetTable(FarSubdivisionTables::V_W)->GetBuffer(),
@ -121,7 +126,8 @@ OsdCpuComputeController::ApplyCatmarkVertexVerticesKernelA1(
assert(context);
OsdCpuComputeVertexA(
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
(const float*)context->GetTable(FarSubdivisionTables::V_W)->GetBuffer(),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(), false);
@ -134,7 +140,8 @@ OsdCpuComputeController::ApplyCatmarkVertexVerticesKernelA2(
assert(context);
OsdCpuComputeVertexA(
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
(const float*)context->GetTable(FarSubdivisionTables::V_W)->GetBuffer(),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(), true);
@ -147,7 +154,8 @@ OsdCpuComputeController::ApplyLoopEdgeVerticesKernel(
assert(context);
OsdCpuComputeEdge(
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
(const int*)context->GetTable(FarSubdivisionTables::E_IT)->GetBuffer(),
(const float*)context->GetTable(FarSubdivisionTables::E_W)->GetBuffer(),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
@ -160,7 +168,8 @@ OsdCpuComputeController::ApplyLoopVertexVerticesKernelB(
assert(context);
OsdCpuComputeLoopVertexB(
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
(const int*)context->GetTable(FarSubdivisionTables::V_IT)->GetBuffer(),
(const float*)context->GetTable(FarSubdivisionTables::V_W)->GetBuffer(),
@ -174,7 +183,8 @@ OsdCpuComputeController::ApplyLoopVertexVerticesKernelA1(
assert(context);
OsdCpuComputeVertexA(
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
(const float*)context->GetTable(FarSubdivisionTables::V_W)->GetBuffer(),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(), false);
@ -187,7 +197,8 @@ OsdCpuComputeController::ApplyLoopVertexVerticesKernelA2(
assert(context);
OsdCpuComputeVertexA(
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
(const float*)context->GetTable(FarSubdivisionTables::V_W)->GetBuffer(),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(), true);
@ -206,24 +217,24 @@ OsdCpuComputeController::ApplyVertexEdits(
const OsdCpuTable * editValues = edit->GetEditValues();
if (edit->GetOperation() == FarVertexEdit::Add) {
OsdCpuEditVertexAdd(_vdesc,
_currentVertexBuffer,
OsdCpuEditVertexAdd(_currentBindState.vertexBuffer,
_currentBindState.vertexDesc,
edit->GetPrimvarOffset(),
edit->GetPrimvarWidth(),
batch.GetVertexOffset(),
batch.GetTableOffset(),
batch.GetStart(),
batch.GetVertexOffset(),
batch.GetTableOffset(),
batch.GetStart(),
batch.GetEnd(),
static_cast<unsigned int*>(primvarIndices->GetBuffer()),
static_cast<float*>(editValues->GetBuffer()));
} else if (edit->GetOperation() == FarVertexEdit::Set) {
OsdCpuEditVertexSet(_vdesc,
_currentVertexBuffer,
OsdCpuEditVertexSet(_currentBindState.vertexBuffer,
_currentBindState.vertexDesc,
edit->GetPrimvarOffset(),
edit->GetPrimvarWidth(),
batch.GetVertexOffset(),
batch.GetTableOffset(),
batch.GetStart(),
batch.GetVertexOffset(),
batch.GetTableOffset(),
batch.GetStart(),
batch.GetEnd(),
static_cast<unsigned int*>(primvarIndices->GetBuffer()),
static_cast<float*>(editValues->GetBuffer()));

View File

@ -29,6 +29,7 @@
#include "../far/dispatcher.h"
#include "../osd/cpuComputeContext.h"
#include "../osd/vertexDescriptor.h"
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
@ -64,15 +65,25 @@ public:
///
/// @param varyingBuffer varying-interpolated data buffer
///
/// @param vertexDesc the descriptor of vertex elements to be refined.
/// if it's null, all primvars in the vertex buffer
/// will be refined.
///
/// @param varyingDesc the descriptor of varying elements to be refined.
/// if it's null, all primvars in the varying buffer
/// will be refined.
///
template<class VERTEX_BUFFER, class VARYING_BUFFER>
void Refine(OsdCpuComputeContext const *context,
FarKernelBatchVector const & batches,
VERTEX_BUFFER *vertexBuffer,
VARYING_BUFFER *varyingBuffer) {
VARYING_BUFFER *varyingBuffer,
OsdVertexBufferDescriptor const *vertexDesc=NULL,
OsdVertexBufferDescriptor const *varyingDesc=NULL) {
if (batches.empty()) return;
bind(vertexBuffer, varyingBuffer);
bind(vertexBuffer, varyingBuffer, vertexDesc, varyingDesc);
FarDispatcher::Refine(this, context, batches, /*maxlevel*/-1);
@ -130,25 +141,62 @@ protected:
void ApplyVertexEdits(FarKernelBatch const &batch, ComputeContext const *context) const;
template<class VERTEX_BUFFER, class VARYING_BUFFER>
void bind(VERTEX_BUFFER *vertex, VARYING_BUFFER *varying) {
void bind(VERTEX_BUFFER *vertex, VARYING_BUFFER *varying,
OsdVertexBufferDescriptor const *vertexDesc,
OsdVertexBufferDescriptor const *varyingDesc) {
_currentVertexBuffer = vertex ? vertex->BindCpuBuffer() : 0;
_currentVaryingBuffer = varying ? varying->BindCpuBuffer() : 0;
// if the vertex buffer descriptor is specified, use it.
// otherwise, assumes the data is tightly packed in the vertex buffer.
if (vertexDesc) {
_currentBindState.vertexDesc = *vertexDesc;
} else {
int numElements = vertex ? vertex->GetNumElements() : 0;
_currentBindState.vertexDesc = OsdVertexBufferDescriptor(
0, numElements, numElements);
}
if (varyingDesc) {
_currentBindState.varyingDesc = *varyingDesc;
} else {
int numElements = varying ? varying->GetNumElements() : 0;
_currentBindState.varyingDesc = OsdVertexBufferDescriptor(
0, numElements, numElements);
}
int numVertexElements = vertex ? vertex->GetNumElements() : 0;
int numVaryingElements = varying ? varying->GetNumElements() : 0;
_vdesc.Set(numVertexElements, numVaryingElements);
// apply vertex offset here
if (vertex) {
_currentBindState.vertexBuffer =
vertex->BindCpuBuffer() + _currentBindState.vertexDesc.offset;
} else {
_currentBindState.vertexBuffer = NULL;
}
if (varying) {
_currentBindState.varyingBuffer =
varying->BindCpuBuffer() + _currentBindState.varyingDesc.offset;
} else {
_currentBindState.varyingBuffer = NULL;
}
}
void unbind() {
_currentVertexBuffer = 0;
_currentVaryingBuffer = 0;
_vdesc.Reset();
_currentBindState.Reset();
}
private:
float *_currentVertexBuffer, *_currentVaryingBuffer;
OsdVertexDescriptor _vdesc;
// Bind state is a transitional state during refinement.
// It doesn't take an ownership of vertex buffers.
struct BindState {
BindState() : vertexBuffer(NULL), varyingBuffer(NULL) {}
void Reset() {
vertexBuffer = varyingBuffer = NULL;
vertexDesc.Reset();
varyingDesc.Reset();
}
float *vertexBuffer;
float *varyingBuffer;
OsdVertexBufferDescriptor vertexDesc;
OsdVertexBufferDescriptor varyingDesc;
};
BindState _currentBindState;
};
} // end namespace OPENSUBDIV_VERSION

283
opensubdiv/osd/cpuKernel.cpp Normal file → Executable file
View File

@ -25,54 +25,101 @@
#include "../osd/cpuKernel.h"
#include "../osd/vertexDescriptor.h"
#include <algorithm>
#include <cmath>
#include <cstdlib>
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
static inline void
clear(float *dst, OsdVertexBufferDescriptor const &desc) {
memset(dst, 0, desc.length*sizeof(float));
}
static inline void
addWithWeight(float *dst, const float *srcOrigin, int srcIndex, float weight,
OsdVertexBufferDescriptor const &desc) {
if (srcOrigin && dst) {
const float *src = srcOrigin + srcIndex * desc.stride;
for (int k = 0; k < desc.length; ++k) {
dst[k] += src[k] * weight;
}
}
}
static inline void
copy(float *dstOrigin, const float *src, int dstIndex,
OsdVertexBufferDescriptor const &desc) {
if (dstOrigin && src) {
float *dst = dstOrigin + dstIndex * desc.stride;
memcpy(dst, src, desc.length*sizeof(float));
}
}
void OsdCpuComputeFace(
OsdVertexDescriptor const &vdesc, float * vertex, float * varying,
float * vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *F_IT, const int *F_ITa, int vertexOffset, int tableOffset,
int start, int end) {
if(vdesc.numVertexElements == 4 && varying == NULL) {
if(vertexDesc == OsdVertexBufferDescriptor(0, 4, 4) && varying == NULL) {
ComputeFaceKernel<4>
(vertex, F_IT, F_ITa, vertexOffset, tableOffset, start, end);
} else if(vdesc.numVertexElements == 8 && varying == NULL) {
} else if(vertexDesc == OsdVertexBufferDescriptor(0, 8, 8) && varying == NULL) {
ComputeFaceKernel<8>
(vertex, F_IT, F_ITa, vertexOffset, tableOffset, start, end);
}
else {
float *vertexResults = (float*)alloca(vertexDesc.length * sizeof(float));
float *varyingResults = (float*)alloca(varyingDesc.length * sizeof(float));
for (int i = start + tableOffset; i < end + tableOffset; i++) {
int h = F_ITa[2*i];
int n = F_ITa[2*i+1];
float weight = 1.0f/n;
// XXX: should use local vertex struct variable instead of
// accumulating directly into global memory.
int dstIndex = i + vertexOffset - tableOffset;
vdesc.Clear(vertex, varying, dstIndex);
// clear
clear(vertexResults, vertexDesc);
clear(varyingResults, varyingDesc);
// accum
for (int j = 0; j < n; ++j) {
int index = F_IT[h+j];
vdesc.AddWithWeight(vertex, dstIndex, index, weight);
vdesc.AddVaryingWithWeight(varying, dstIndex, index, weight);
addWithWeight(vertexResults, vertex, index, weight, vertexDesc);
addWithWeight(varyingResults, varying, index, weight, varyingDesc);
}
}
// write results
copy(vertex, vertexResults, dstIndex, vertexDesc);
copy(varying, varyingResults, dstIndex, varyingDesc);
}
}
}
void OsdCpuComputeEdge(
OsdVertexDescriptor const &vdesc, float *vertex, float *varying,
float *vertex, float *varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *E_IT, const float *E_W, int vertexOffset, int tableOffset,
int start, int end) {
if(vdesc.numVertexElements == 4 && varying == NULL) {
if(vertexDesc == OsdVertexBufferDescriptor(0, 4, 4) && varying == NULL) {
ComputeEdgeKernel<4>(vertex, E_IT, E_W, vertexOffset, tableOffset,
start, end);
}
else if(vdesc.numVertexElements == 8 && varying == NULL) {
else if(vertexDesc == OsdVertexBufferDescriptor(0, 8, 8) && varying == NULL) {
ComputeEdgeKernel<8>(vertex, E_IT, E_W, vertexOffset, tableOffset,
start, end);
start, end);
}
else {
float *vertexResults = (float*)alloca(vertexDesc.length * sizeof(float));
float *varyingResults = (float*)alloca(varyingDesc.length * sizeof(float));
for (int i = start + tableOffset; i < end + tableOffset; i++) {
int eidx0 = E_IT[4*i+0];
int eidx1 = E_IT[4*i+1];
@ -82,37 +129,46 @@ void OsdCpuComputeEdge(
float vertWeight = E_W[i*2+0];
int dstIndex = i + vertexOffset - tableOffset;
vdesc.Clear(vertex, varying, dstIndex);
clear(vertexResults, vertexDesc);
clear(varyingResults, varyingDesc);
vdesc.AddWithWeight(vertex, dstIndex, eidx0, vertWeight);
vdesc.AddWithWeight(vertex, dstIndex, eidx1, vertWeight);
addWithWeight(vertexResults, vertex, eidx0, vertWeight, vertexDesc);
addWithWeight(vertexResults, vertex, eidx1, vertWeight, vertexDesc);
if (eidx2 != -1) {
float faceWeight = E_W[i*2+1];
vdesc.AddWithWeight(vertex, dstIndex, eidx2, faceWeight);
vdesc.AddWithWeight(vertex, dstIndex, eidx3, faceWeight);
addWithWeight(vertexResults, vertex, eidx2, faceWeight, vertexDesc);
addWithWeight(vertexResults, vertex, eidx3, faceWeight, vertexDesc);
}
vdesc.AddVaryingWithWeight(varying, dstIndex, eidx0, 0.5f);
vdesc.AddVaryingWithWeight(varying, dstIndex, eidx1, 0.5f);
}
addWithWeight(varyingResults, varying, eidx0, 0.5f, varyingDesc);
addWithWeight(varyingResults, varying, eidx1, 0.5f, varyingDesc);
copy(vertex, vertexResults, dstIndex, vertexDesc);
copy(varying, varyingResults, dstIndex, varyingDesc);
}
}
}
void OsdCpuComputeVertexA(
OsdVertexDescriptor const &vdesc, float *vertex, float *varying,
float *vertex, float *varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *V_ITa, const float *V_W, int vertexOffset, int tableOffset,
int start, int end, int pass) {
if(vdesc.numVertexElements == 4 && varying == NULL) {
if(vertexDesc == OsdVertexBufferDescriptor(0, 4, 4) && varying == NULL) {
ComputeVertexAKernel<4>(vertex, V_ITa, V_W, vertexOffset, tableOffset,
start, end, pass);
}
else if (vdesc.numVertexElements == 8 && varying == NULL) {
else if(vertexDesc == OsdVertexBufferDescriptor(0, 8, 8) && varying == NULL) {
ComputeVertexAKernel<8>(vertex, V_ITa, V_W, vertexOffset, tableOffset,
start, end, pass);
}
}
else {
float *vertexResults = (float*)alloca(vertexDesc.length * sizeof(float));
float *varyingResults = (float*)alloca(varyingDesc.length * sizeof(float));
for (int i = start + tableOffset; i < end + tableOffset; i++) {
int n = V_ITa[5*i+1];
int p = V_ITa[5*i+2];
@ -129,36 +185,48 @@ void OsdCpuComputeVertexA(
int dstIndex = i + vertexOffset - tableOffset;
if (not pass)
vdesc.Clear(vertex, varying, dstIndex);
if (eidx0 == -1 || (pass == 0 && (n == -1))) {
vdesc.AddWithWeight(vertex, dstIndex, p, weight);
} else {
vdesc.AddWithWeight(vertex, dstIndex, p, weight * 0.75f);
vdesc.AddWithWeight(vertex, dstIndex, eidx0, weight * 0.125f);
vdesc.AddWithWeight(vertex, dstIndex, eidx1, weight * 0.125f);
clear(vertexResults, vertexDesc);
clear(varyingResults, varyingDesc);
if (pass) {
// copy previous results
addWithWeight(vertexResults, vertex, dstIndex, 1.0f, vertexDesc);
}
if (not pass)
vdesc.AddVaryingWithWeight(varying, dstIndex, p, 1.0f);
}
if (eidx0 == -1 || (pass == 0 && (n == -1))) {
addWithWeight(vertexResults, vertex, p, weight, vertexDesc);
} else {
addWithWeight(vertexResults, vertex, p, weight * 0.75f, vertexDesc);
addWithWeight(vertexResults, vertex, eidx0, weight * 0.125f, vertexDesc);
addWithWeight(vertexResults, vertex, eidx1, weight * 0.125f, vertexDesc);
}
copy(vertex, vertexResults, dstIndex, vertexDesc);
if (not pass) {
addWithWeight(varyingResults, varying, p, 1.0f, varyingDesc);
copy(varying, varyingResults, dstIndex, varyingDesc);
}
}
}
}
void OsdCpuComputeVertexB(
OsdVertexDescriptor const &vdesc, float *vertex, float *varying,
float *vertex, float *varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *V_ITa, const int *V_IT, const float *V_W,
int vertexOffset, int tableOffset, int start, int end) {
if(vdesc.numVertexElements == 4 && varying == NULL) {
if(vertexDesc == OsdVertexBufferDescriptor(0, 4, 4) && varying == NULL) {
ComputeVertexBKernel<4>(vertex, V_ITa, V_IT, V_W,
vertexOffset, tableOffset, start, end);
}
else if(vdesc.numVertexElements == 8 && varying == NULL) {
else if(vertexDesc == OsdVertexBufferDescriptor(0, 8, 8) && varying == NULL) {
ComputeVertexBKernel<8>(vertex, V_ITa, V_IT, V_W,
vertexOffset, tableOffset, start, end);
}
}
else {
float *vertexResults = (float*)alloca(vertexDesc.length * sizeof(float));
float *varyingResults = (float*)alloca(varyingDesc.length * sizeof(float));
for (int i = start + tableOffset; i < end + tableOffset; i++) {
int h = V_ITa[5*i];
int n = V_ITa[5*i+1];
@ -169,32 +237,41 @@ void OsdCpuComputeVertexB(
float wv = (n-2.0f) * n * wp;
int dstIndex = i + vertexOffset - tableOffset;
vdesc.Clear(vertex, varying, dstIndex);
clear(vertexResults, vertexDesc);
clear(varyingResults, varyingDesc);
vdesc.AddWithWeight(vertex, dstIndex, p, weight * wv);
addWithWeight(vertexResults, vertex, p, weight * wv, vertexDesc);
for (int j = 0; j < n; ++j) {
vdesc.AddWithWeight(vertex, dstIndex, V_IT[h+j*2], weight * wp);
vdesc.AddWithWeight(vertex, dstIndex, V_IT[h+j*2+1], weight * wp);
addWithWeight(vertexResults, vertex, V_IT[h+j*2], weight * wp, vertexDesc);
addWithWeight(vertexResults, vertex, V_IT[h+j*2+1], weight * wp, vertexDesc);
}
vdesc.AddVaryingWithWeight(varying, dstIndex, p, 1.0f);
addWithWeight(varyingResults, varying, p, 1.0f, varyingDesc);
copy(vertex, vertexResults, dstIndex, vertexDesc);
copy(varying, varyingResults, dstIndex, varyingDesc);
}
}
}
}
void OsdCpuComputeLoopVertexB(
OsdVertexDescriptor const &vdesc, float *vertex, float *varying,
float *vertex, float *varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *V_ITa, const int *V_IT, const float *V_W,
int vertexOffset, int tableOffset, int start, int end) {
if(vdesc.numVertexElements == 4 && varying == NULL) {
if(vertexDesc == OsdVertexBufferDescriptor(0, 4, 4) && varying == NULL) {
ComputeLoopVertexBKernel<4>(vertex, V_ITa, V_IT, V_W, vertexOffset,
tableOffset, start, end);
}
else if(vdesc.numVertexElements == 8 && varying == NULL) {
else if(vertexDesc == OsdVertexBufferDescriptor(0, 8, 8) && varying == NULL) {
ComputeLoopVertexBKernel<8>(vertex, V_ITa, V_IT, V_W, vertexOffset,
tableOffset, start, end);
}
else {
float *vertexResults = (float*)alloca(vertexDesc.length * sizeof(float));
float *varyingResults = (float*)alloca(varyingDesc.length * sizeof(float));
for (int i = start + tableOffset; i < end + tableOffset; i++) {
int h = V_ITa[5*i];
int n = V_ITa[5*i+1];
@ -207,94 +284,120 @@ void OsdCpuComputeLoopVertexB(
beta = (0.625f - beta) * wp;
int dstIndex = i + vertexOffset - tableOffset;
vdesc.Clear(vertex, varying, dstIndex);
clear(vertexResults, vertexDesc);
clear(varyingResults, varyingDesc);
vdesc.AddWithWeight(vertex, dstIndex, p, weight * (1.0f - (beta * n)));
addWithWeight(vertexResults, vertex, p, weight * (1.0f - (beta * n)), vertexDesc);
for (int j = 0; j < n; ++j)
vdesc.AddWithWeight(vertex, dstIndex, V_IT[h+j], weight * beta);
addWithWeight(vertexResults, vertex, V_IT[h+j], weight * beta, vertexDesc);
vdesc.AddVaryingWithWeight(varying, dstIndex, p, 1.0f);
}
}
addWithWeight(varyingResults, varying, p, 1.0f, varyingDesc);
copy(vertex, vertexResults, dstIndex, vertexDesc);
copy(varying, varyingResults, dstIndex, varyingDesc);
}
}
}
void OsdCpuComputeBilinearEdge(
OsdVertexDescriptor const &vdesc, float *vertex, float *varying,
float *vertex, float *varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *E_IT, int vertexOffset, int tableOffset, int start, int end) {
if(vdesc.numVertexElements == 4 && varying == NULL) {
if(vertexDesc == OsdVertexBufferDescriptor(0, 4, 4) && varying == NULL) {
ComputeBilinearEdgeKernel<4>(vertex, E_IT, vertexOffset, tableOffset,
start, end);
}
else if(vdesc.numVertexElements == 8 && varying == NULL) {
else if(vertexDesc == OsdVertexBufferDescriptor(0, 8, 8) && varying == NULL) {
ComputeBilinearEdgeKernel<8>(vertex, E_IT, vertexOffset, tableOffset,
start, end);
}
else {
float *vertexResults = (float*)alloca(vertexDesc.length * sizeof(float));
float *varyingResults = (float*)alloca(varyingDesc.length * sizeof(float));
for (int i = start + tableOffset; i < end + tableOffset; i++) {
int eidx0 = E_IT[2*i+0];
int eidx1 = E_IT[2*i+1];
int dstIndex = i + vertexOffset - tableOffset;
vdesc.Clear(vertex, varying, dstIndex);
clear(vertexResults, vertexDesc);
clear(varyingResults, varyingDesc);
vdesc.AddWithWeight(vertex, dstIndex, eidx0, 0.5f);
vdesc.AddWithWeight(vertex, dstIndex, eidx1, 0.5f);
addWithWeight(vertexResults, vertex, eidx0, 0.5f, vertexDesc);
addWithWeight(vertexResults, vertex, eidx1, 0.5f, vertexDesc);
vdesc.AddVaryingWithWeight(varying, dstIndex, eidx0, 0.5f);
vdesc.AddVaryingWithWeight(varying, dstIndex, eidx1, 0.5f);
}
addWithWeight(varyingResults, varying, eidx0, 0.5f, varyingDesc);
addWithWeight(varyingResults, varying, eidx1, 0.5f, varyingDesc);
copy(vertex, vertexResults, dstIndex, vertexDesc);
copy(varying, varyingResults, dstIndex, varyingDesc);
}
}
}
void OsdCpuComputeBilinearVertex(
OsdVertexDescriptor const &vdesc, float *vertex, float *varying,
float *vertex, float *varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *V_ITa, int vertexOffset, int tableOffset, int start, int end) {
int numVertexElements = vdesc.numVertexElements;
int numVaryingElements = vdesc.numVaryingElements;
float *src, *des;
float *src, *des;
for (int i = start + tableOffset; i < end + tableOffset; i++) {
int p = V_ITa[i];
int dstIndex = i + vertexOffset - tableOffset;
src = vertex + p * numVertexElements;
des = vertex + dstIndex * numVertexElements;
memcpy(des, src, sizeof(float)*numVertexElements);
if(varying) {
src = varying + p * numVaryingElements;
des = varying + dstIndex * numVaryingElements;
memcpy(des, src, sizeof(float)*numVaryingElements);
int dstIndex = i + vertexOffset - tableOffset;
if (vertex) {
src = vertex + p * vertexDesc.stride;
des = vertex + dstIndex * vertexDesc.stride;
memcpy(des, src, sizeof(float)*vertexDesc.length);
}
if (varying) {
src = varying + p * varyingDesc.stride;
des = varying + dstIndex * varyingDesc.stride;
memcpy(des, src, sizeof(float)*varyingDesc.length);
}
}
}
void OsdCpuEditVertexAdd(
OsdVertexDescriptor const &vdesc, float *vertex,
float *vertex,
OsdVertexBufferDescriptor const &vertexDesc,
int primVarOffset, int primVarWidth, int vertexOffset, int tableOffset,
int start, int end,
const unsigned int *editIndices, const float *editValues) {
for (int i = start+tableOffset; i < end+tableOffset; i++) {
vdesc.ApplyVertexEditAdd(vertex,
primVarOffset,
primVarWidth,
editIndices[i] + vertexOffset,
&editValues[i*primVarWidth]);
if (vertex) {
int editIndex = editIndices[i] + vertexOffset;
float *dst = vertex + editIndex * vertexDesc.stride + primVarOffset;
for (int i = 0; i < primVarWidth; ++i) {
dst[i] += editValues[i];
}
}
}
}
void OsdCpuEditVertexSet(
OsdVertexDescriptor const &vdesc, float *vertex,
float *vertex,
OsdVertexBufferDescriptor const &vertexDesc,
int primVarOffset, int primVarWidth, int vertexOffset, int tableOffset,
int start, int end,
const unsigned int *editIndices, const float *editValues) {
for (int i = start+tableOffset; i < end+tableOffset; i++) {
vdesc.ApplyVertexEditSet(vertex,
primVarOffset,
primVarWidth,
editIndices[i] + vertexOffset,
&editValues[i*primVarWidth]);
if (vertex) {
int editIndex = editIndices[i] + vertexOffset;
float *dst = vertex + editIndex * vertexDesc.stride + primVarOffset;
for (int i = 0; i < primVarWidth; ++i) {
dst[i] = editValues[i];
}
}
}
}

View File

@ -88,8 +88,9 @@ void ComputeFaceKernel(float *vertex,
memcpy(des, result1, sizeof(float)*numVertexElements);
}
}
void OsdCpuComputeFace(OsdVertexDescriptor const &vdesc,
float * vertex, float * varying,
void OsdCpuComputeFace(float * vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *F_IT, const int *F_ITa,
int vertexOffset, int tableOffset,
int start, int end);
@ -147,8 +148,9 @@ void ComputeEdgeKernel( float *vertex,
memcpy(des, result1, sizeof(float)*numVertexElements);
}
}
void OsdCpuComputeEdge(OsdVertexDescriptor const &vdesc,
float *vertex, float * varying,
void OsdCpuComputeEdge(float *vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *E_IT, const float *E_ITa,
int vertexOffset, int tableOffset,
int start, int end);
@ -230,8 +232,9 @@ void ComputeVertexAKernel( float *vertex,
memcpy(des, result1, sizeof(float)*numVertexElements);
}
}
void OsdCpuComputeVertexA(OsdVertexDescriptor const &vdesc,
float *vertex, float * varying,
void OsdCpuComputeVertexA(float *vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *V_ITa, const float *V_IT,
int vertexOffset, int tableOffset,
int start, int end, int pass);
@ -291,8 +294,9 @@ void ComputeVertexBKernel( float *vertex,
}
}
void OsdCpuComputeVertexB(OsdVertexDescriptor const &vdesc,
float *vertex, float * varying,
void OsdCpuComputeVertexB(float *vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *V_ITa, const int *V_IT, const float *V_W,
int vertexOffset, int tableOffset,
int start, int end);
@ -350,8 +354,9 @@ void ComputeLoopVertexBKernel( float *vertex,
memcpy(des, result1, sizeof(float)*numVertexElements);
}
}
void OsdCpuComputeLoopVertexB(OsdVertexDescriptor const &vdesc,
float *vertex, float * varying,
void OsdCpuComputeLoopVertexB(float *vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *V_ITa, const int *V_IT,
const float *V_W,
int vertexOffset, int tableOffset,
@ -385,26 +390,30 @@ void ComputeBilinearEdgeKernel( float *vertex,
memcpy(des, result, sizeof(float)*numVertexElements);
}
}
void OsdCpuComputeBilinearEdge(OsdVertexDescriptor const &vdesc,
float *vertex, float * varying,
void OsdCpuComputeBilinearEdge(float *vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *E_IT,
int vertexOffset, int tableOffset,
int start, int end);
void OsdCpuComputeBilinearVertex(OsdVertexDescriptor const &vdesc,
float *vertex, float * varying,
void OsdCpuComputeBilinearVertex(float *vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *V_ITa,
int vertexOffset, int tableOffset,
int start, int end);
void OsdCpuEditVertexAdd(OsdVertexDescriptor const &vdesc, float *vertex,
void OsdCpuEditVertexAdd(float *vertex,
OsdVertexBufferDescriptor const &vertexDesc,
int primVarOffset, int primVarWidth,
int vertexOffset, int tableOffset,
int start, int end,
const unsigned int *editIndices,
const float *editValues);
void OsdCpuEditVertexSet(OsdVertexDescriptor const &vdesc, float *vertex,
void OsdCpuEditVertexSet(float *vertex,
OsdVertexBufferDescriptor const &vertexDesc,
int primVarOffset, int primVarWidth,
int vertexOffset, int tableOffset,
int start, int end,

View File

@ -31,42 +31,47 @@
extern "C" {
void OsdCudaComputeFace(float *vertex, float *varying,
int numUserVertexElements, int numVaryingElements,
int vertexLength, int vertexStride,
int varyingLength, int varyingStride,
int *F_IT, int *F_ITa, int offset, int tableOffset, int start, int end);
void OsdCudaComputeEdge(float *vertex, float *varying,
int numUserVertexElements, int numVaryingElements,
int vertexLength, int vertexStride,
int varyingLength, int varyingStride,
int *E_IT, float *E_W, int offset, int tableOffset, int start, int end);
void OsdCudaComputeVertexA(float *vertex, float *varying,
int numUserVertexElements, int numVaryingElements,
int vertexLength, int vertexStride,
int varyingLength, int varyingStride,
int *V_ITa, float *V_W, int offset, int tableOffset,
int start, int end, int pass);
void OsdCudaComputeVertexB(float *vertex, float *varying,
int numUserVertexElements, int numVaryingElements,
int vertexLength, int vertexStride,
int varyingLength, int varyingStride,
int *V_ITa, int *V_IT, float *V_W, int offset, int tableOffset,
int start, int end);
void OsdCudaComputeLoopVertexB(float *vertex, float *varying,
int numUserVertexElements,
int numVaryingElements,
int vertexLength, int vertexStride,
int varyingLength, int varyingStride,
int *V_ITa, int *V_IT, float *V_W, int offset, int tableOffset,
int start, int end);
void OsdCudaComputeBilinearEdge(float *vertex, float *varying,
int numUserVertexElements,
int numVaryingElements,
int vertexLength, int vertexStride,
int varyingLength, int varyingStride,
int *E_IT, int offset, int tableOffset, int start, int end);
void OsdCudaComputeBilinearVertex(float *vertex, float *varying,
int numUserVertexElements,
int numVaryingElements,
int vertexLength, int vertexStride,
int varyingLength, int varyingStride,
int *V_ITa, int offset, int tableOffset, int start, int end);
void OsdCudaEditVertexAdd(float *vertex, int numUserVertexElements,
void OsdCudaEditVertexAdd(float *vertex,
int vertexLength, int vertexStride,
int primVarOffset, int primVarWidth,
int vertexOffset, int tableOffset,
int offset, int tableOffset,
int start, int end, int *editIndices, float *editValues);
}
@ -74,8 +79,7 @@ void OsdCudaEditVertexAdd(float *vertex, int numUserVertexElements,
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
OsdCudaComputeController::OsdCudaComputeController() :
_currentVertexBuffer(NULL), _currentVaryingBuffer(NULL) {
OsdCudaComputeController::OsdCudaComputeController() {
}
OsdCudaComputeController::~OsdCudaComputeController() {
@ -92,9 +96,13 @@ OsdCudaComputeController::ApplyBilinearFaceVerticesKernel(
assert(F_IT);
assert(F_ITa);
float *vertex = _currentBindState.GetOffsettedVertexBuffer();
float *varying = _currentBindState.GetOffsettedVaryingBuffer();
OsdCudaComputeFace(
_currentVertexBuffer, _currentVaryingBuffer,
_vdesc.numVertexElements-3, _vdesc.numVaryingElements,
vertex, varying,
_currentBindState.vertexDesc.length, _currentBindState.vertexDesc.stride,
_currentBindState.varyingDesc.length, _currentBindState.varyingDesc.stride,
static_cast<int*>(F_IT->GetCudaMemory()),
static_cast<int*>(F_ITa->GetCudaMemory()),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
@ -109,9 +117,13 @@ OsdCudaComputeController::ApplyBilinearEdgeVerticesKernel(
const OsdCudaTable * E_IT = context->GetTable(FarSubdivisionTables::E_IT);
assert(E_IT);
float *vertex = _currentBindState.GetOffsettedVertexBuffer();
float *varying = _currentBindState.GetOffsettedVaryingBuffer();
OsdCudaComputeBilinearEdge(
_currentVertexBuffer, _currentVaryingBuffer,
_vdesc.numVertexElements-3, _vdesc.numVaryingElements,
vertex, varying,
_currentBindState.vertexDesc.length, _currentBindState.vertexDesc.stride,
_currentBindState.varyingDesc.length, _currentBindState.varyingDesc.stride,
static_cast<int*>(E_IT->GetCudaMemory()),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
}
@ -125,9 +137,13 @@ OsdCudaComputeController::ApplyBilinearVertexVerticesKernel(
const OsdCudaTable * V_ITa = context->GetTable(FarSubdivisionTables::V_ITa);
assert(V_ITa);
float *vertex = _currentBindState.GetOffsettedVertexBuffer();
float *varying = _currentBindState.GetOffsettedVaryingBuffer();
OsdCudaComputeBilinearVertex(
_currentVertexBuffer, _currentVaryingBuffer,
_vdesc.numVertexElements-3, _vdesc.numVaryingElements,
vertex, varying,
_currentBindState.vertexDesc.length, _currentBindState.vertexDesc.stride,
_currentBindState.varyingDesc.length, _currentBindState.varyingDesc.stride,
static_cast<int*>(V_ITa->GetCudaMemory()),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
}
@ -143,9 +159,13 @@ OsdCudaComputeController::ApplyCatmarkFaceVerticesKernel(
assert(F_IT);
assert(F_ITa);
float *vertex = _currentBindState.GetOffsettedVertexBuffer();
float *varying = _currentBindState.GetOffsettedVaryingBuffer();
OsdCudaComputeFace(
_currentVertexBuffer, _currentVaryingBuffer,
_vdesc.numVertexElements-3, _vdesc.numVaryingElements,
vertex, varying,
_currentBindState.vertexDesc.length, _currentBindState.vertexDesc.stride,
_currentBindState.varyingDesc.length, _currentBindState.varyingDesc.stride,
static_cast<int*>(F_IT->GetCudaMemory()),
static_cast<int*>(F_ITa->GetCudaMemory()),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
@ -162,9 +182,13 @@ OsdCudaComputeController::ApplyCatmarkEdgeVerticesKernel(
assert(E_IT);
assert(E_W);
float *vertex = _currentBindState.GetOffsettedVertexBuffer();
float *varying = _currentBindState.GetOffsettedVaryingBuffer();
OsdCudaComputeEdge(
_currentVertexBuffer, _currentVaryingBuffer,
_vdesc.numVertexElements-3, _vdesc.numVaryingElements,
vertex, varying,
_currentBindState.vertexDesc.length, _currentBindState.vertexDesc.stride,
_currentBindState.varyingDesc.length, _currentBindState.varyingDesc.stride,
static_cast<int*>(E_IT->GetCudaMemory()),
static_cast<float*>(E_W->GetCudaMemory()),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
@ -183,9 +207,13 @@ OsdCudaComputeController::ApplyCatmarkVertexVerticesKernelB(
assert(V_IT);
assert(V_W);
float *vertex = _currentBindState.GetOffsettedVertexBuffer();
float *varying = _currentBindState.GetOffsettedVaryingBuffer();
OsdCudaComputeVertexB(
_currentVertexBuffer, _currentVaryingBuffer,
_vdesc.numVertexElements-3, _vdesc.numVaryingElements,
vertex, varying,
_currentBindState.vertexDesc.length, _currentBindState.vertexDesc.stride,
_currentBindState.varyingDesc.length, _currentBindState.varyingDesc.stride,
static_cast<int*>(V_ITa->GetCudaMemory()),
static_cast<int*>(V_IT->GetCudaMemory()),
static_cast<float*>(V_W->GetCudaMemory()),
@ -203,9 +231,13 @@ OsdCudaComputeController::ApplyCatmarkVertexVerticesKernelA1(
assert(V_ITa);
assert(V_W);
float *vertex = _currentBindState.GetOffsettedVertexBuffer();
float *varying = _currentBindState.GetOffsettedVaryingBuffer();
OsdCudaComputeVertexA(
_currentVertexBuffer, _currentVaryingBuffer,
_vdesc.numVertexElements-3, _vdesc.numVaryingElements,
vertex, varying,
_currentBindState.vertexDesc.length, _currentBindState.vertexDesc.stride,
_currentBindState.varyingDesc.length, _currentBindState.varyingDesc.stride,
static_cast<int*>(V_ITa->GetCudaMemory()),
static_cast<float*>(V_W->GetCudaMemory()),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(), false);
@ -222,9 +254,13 @@ OsdCudaComputeController::ApplyCatmarkVertexVerticesKernelA2(
assert(V_ITa);
assert(V_W);
float *vertex = _currentBindState.GetOffsettedVertexBuffer();
float *varying = _currentBindState.GetOffsettedVaryingBuffer();
OsdCudaComputeVertexA(
_currentVertexBuffer, _currentVaryingBuffer,
_vdesc.numVertexElements-3, _vdesc.numVaryingElements,
vertex, varying,
_currentBindState.vertexDesc.length, _currentBindState.vertexDesc.stride,
_currentBindState.varyingDesc.length, _currentBindState.varyingDesc.stride,
static_cast<int*>(V_ITa->GetCudaMemory()),
static_cast<float*>(V_W->GetCudaMemory()),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(), true);
@ -241,9 +277,13 @@ OsdCudaComputeController::ApplyLoopEdgeVerticesKernel(
assert(E_IT);
assert(E_W);
float *vertex = _currentBindState.GetOffsettedVertexBuffer();
float *varying = _currentBindState.GetOffsettedVaryingBuffer();
OsdCudaComputeEdge(
_currentVertexBuffer, _currentVaryingBuffer,
_vdesc.numVertexElements-3, _vdesc.numVaryingElements,
vertex, varying,
_currentBindState.vertexDesc.length, _currentBindState.vertexDesc.stride,
_currentBindState.varyingDesc.length, _currentBindState.varyingDesc.stride,
static_cast<int*>(E_IT->GetCudaMemory()),
static_cast<float*>(E_W->GetCudaMemory()),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
@ -262,9 +302,13 @@ OsdCudaComputeController::ApplyLoopVertexVerticesKernelB(
assert(V_IT);
assert(V_W);
float *vertex = _currentBindState.GetOffsettedVertexBuffer();
float *varying = _currentBindState.GetOffsettedVaryingBuffer();
OsdCudaComputeLoopVertexB(
_currentVertexBuffer, _currentVaryingBuffer,
_vdesc.numVertexElements-3, _vdesc.numVaryingElements,
vertex, varying,
_currentBindState.vertexDesc.length, _currentBindState.vertexDesc.stride,
_currentBindState.varyingDesc.length, _currentBindState.varyingDesc.stride,
static_cast<int*>(V_ITa->GetCudaMemory()),
static_cast<int*>(V_IT->GetCudaMemory()),
static_cast<float*>(V_W->GetCudaMemory()),
@ -282,9 +326,13 @@ OsdCudaComputeController::ApplyLoopVertexVerticesKernelA1(
assert(V_ITa);
assert(V_W);
float *vertex = _currentBindState.GetOffsettedVertexBuffer();
float *varying = _currentBindState.GetOffsettedVaryingBuffer();
OsdCudaComputeVertexA(
_currentVertexBuffer, _currentVaryingBuffer,
_vdesc.numVertexElements-3, _vdesc.numVaryingElements,
vertex, varying,
_currentBindState.vertexDesc.length, _currentBindState.vertexDesc.stride,
_currentBindState.varyingDesc.length, _currentBindState.varyingDesc.stride,
static_cast<int*>(V_ITa->GetCudaMemory()),
static_cast<float*>(V_W->GetCudaMemory()),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(), false);
@ -301,9 +349,13 @@ OsdCudaComputeController::ApplyLoopVertexVerticesKernelA2(
assert(V_ITa);
assert(V_W);
float *vertex = _currentBindState.GetOffsettedVertexBuffer();
float *varying = _currentBindState.GetOffsettedVaryingBuffer();
OsdCudaComputeVertexA(
_currentVertexBuffer, _currentVaryingBuffer,
_vdesc.numVertexElements-3, _vdesc.numVaryingElements,
vertex, varying,
_currentBindState.vertexDesc.length, _currentBindState.vertexDesc.stride,
_currentBindState.varyingDesc.length, _currentBindState.varyingDesc.stride,
static_cast<int*>(V_ITa->GetCudaMemory()),
static_cast<float*>(V_W->GetCudaMemory()),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(), true);
@ -321,10 +373,12 @@ OsdCudaComputeController::ApplyVertexEdits(
const OsdCudaTable * primvarIndices = edit->GetPrimvarIndices();
const OsdCudaTable * editValues = edit->GetEditValues();
float *vertex = _currentBindState.GetOffsettedVertexBuffer();
if (edit->GetOperation() == FarVertexEdit::Add) {
OsdCudaEditVertexAdd(
_currentVertexBuffer,
_vdesc.numVertexElements-3,
vertex,
_currentBindState.vertexDesc.length, _currentBindState.vertexDesc.stride,
edit->GetPrimvarOffset(),
edit->GetPrimvarWidth(),
batch.GetVertexOffset(),

View File

@ -29,6 +29,7 @@
#include "../far/dispatcher.h"
#include "../osd/cudaComputeContext.h"
#include "../osd/vertexDescriptor.h"
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
@ -64,15 +65,25 @@ public:
///
/// @param varyingBuffer varying-interpolated data buffer
///
/// @param vertexDesc the descriptor of vertex elements to be refined.
/// if it's null, all primvars in the vertex buffer
/// will be refined.
///
/// @param varyingDesc the descriptor of varying elements to be refined.
/// if it's null, all primvars in the varying buffer
/// will be refined.
///
template<class VERTEX_BUFFER, class VARYING_BUFFER>
void Refine(OsdCudaComputeContext const *context,
FarKernelBatchVector const &batches,
VERTEX_BUFFER *vertexBuffer,
VARYING_BUFFER *varyingBuffer) {
VARYING_BUFFER *varyingBuffer,
OsdVertexBufferDescriptor const *vertexDesc=NULL,
OsdVertexBufferDescriptor const *varyingDesc=NULL) {
if (batches.empty()) return;
bind(vertexBuffer, varyingBuffer);
bind(vertexBuffer, varyingBuffer, vertexDesc, varyingDesc);
FarDispatcher::Refine(this, context, batches, /*maxlevel*/-1);
@ -130,37 +141,60 @@ protected:
void ApplyVertexEdits(FarKernelBatch const &batch, ComputeContext const *context) const;
template<class VERTEX_BUFFER, class VARYING_BUFFER>
void bind(VERTEX_BUFFER *vertex, VARYING_BUFFER *varying) {
void bind(VERTEX_BUFFER *vertex, VARYING_BUFFER *varying,
OsdVertexBufferDescriptor const *vertexDesc,
OsdVertexBufferDescriptor const *varyingDesc) {
if (vertex) {
_currentVertexBuffer = static_cast<float*>(vertex->BindCudaBuffer());
_vdesc.numVertexElements = vertex->GetNumElements();
// if the vertex buffer descriptor is specified, use it.
// otherwise, assumes the data is tightly packed in the vertex buffer.
if (vertexDesc) {
_currentBindState.vertexDesc = *vertexDesc;
} else {
_currentVertexBuffer = 0;
_vdesc.numVertexElements = 0;
int numElements = vertex ? vertex->GetNumElements() : 0;
_currentBindState.vertexDesc = OsdVertexBufferDescriptor(
0, numElements, numElements);
}
if (varyingDesc) {
_currentBindState.varyingDesc = *varyingDesc;
} else {
int numElements = varying ? varying->GetNumElements() : 0;
_currentBindState.varyingDesc = OsdVertexBufferDescriptor(
0, numElements, numElements);
}
if (varying) {
_currentVaryingBuffer = static_cast<float*>(varying->BindCudaBuffer());
_vdesc.numVaryingElements = varying->GetNumElements();
} else {
_currentVaryingBuffer = 0;
_vdesc.numVaryingElements = 0;
}
_currentBindState.vertexBuffer = vertex ?
static_cast<float*>(vertex->BindCudaBuffer()) : 0;
_currentBindState.varyingBuffer = varying ?
static_cast<float*>(varying->BindCudaBuffer()) : 0;
}
/// Unbinds any previously bound vertex and varying data buffers.
void unbind() {
_currentVertexBuffer = 0;
_currentVaryingBuffer = 0;
_currentBindState.Reset();
}
private:
float *_currentVertexBuffer, // cuda buffers
*_currentVaryingBuffer;
struct BindState {
BindState() : vertexBuffer(NULL), varyingBuffer(NULL) {}
void Reset() {
vertexBuffer = varyingBuffer = NULL;
vertexDesc.Reset();
varyingDesc.Reset();
}
float *GetOffsettedVertexBuffer() const {
return vertexBuffer ? vertexBuffer + vertexDesc.offset : 0;
}
float *GetOffsettedVaryingBuffer() const {
return varyingBuffer ? varyingBuffer + varyingDesc.offset : 0;
}
OsdVertexDescriptor _vdesc;
float *vertexBuffer; // cuda buffers
float *varyingBuffer;
OsdVertexBufferDescriptor vertexDesc;
OsdVertexBufferDescriptor varyingDesc;
};
BindState _currentBindState;
};
} // end namespace OPENSUBDIV_VERSION

View File

@ -25,37 +25,18 @@
#include <assert.h>
template<int N> struct DeviceVertex
{
float pos[3];
float userVertexData[N];
__device__ void addWithWeight(const DeviceVertex<N> *src, float weight) {
pos[0] += src->pos[0] * weight;
pos[1] += src->pos[1] * weight;
pos[2] += src->pos[2] * weight;
for(int i = 0; i < N; ++i){
userVertexData[i] += src->userVertexData[i] * weight;
}
}
__device__ void clear() {
pos[0] = pos[1] = pos[2] = 0.0f;
for(int i = 0; i < N; ++i){
userVertexData[i] = 0.0f;
}
}
};
template<int N> struct DeviceVarying
{
float v[N];
__device__ void addVaryingWithWeight(const DeviceVarying<N> *src, float weight) {
__device__ void addWithWeight(const DeviceVertex<N> *src, float weight) {
#pragma unroll
for(int i = 0; i < N; ++i){
v[i] += src->v[i] * weight;
}
}
__device__ void clear() {
#pragma unroll
for(int i = 0; i < N; ++i){
v[i] = 0.0f;
}
@ -64,9 +45,9 @@ template<int N> struct DeviceVarying
// Specialize DeviceVarying for N=0 to avoid compile error:
// "flexible array member in otherwise empty struct"
template<> struct DeviceVarying<0>
template<> struct DeviceVertex<0>
{
__device__ void addVaryingWithWeight(const DeviceVarying<0> *src, float weight) {
__device__ void addWithWeight(const DeviceVertex<0> *src, float weight) {
}
__device__ void clear() {
}
@ -94,32 +75,30 @@ __device__ void addWithWeight(float *dst, float *src, float weight, int count)
for(int i = 0; i < count; ++i) dst[i] += src[i] * weight;
}
__device__ void addVaryingWithWeight(float *dst, float *src, float weight, int count)
{
for(int i = 0; i < count; ++i) dst[i] += src[i] * weight;
}
template <int NUM_USER_VERTEX_ELEMENTS, int NUM_VARYING_ELEMENTS> __global__ void
template <int NUM_VERTEX_ELEMENTS, int NUM_VARYING_ELEMENTS> __global__ void
computeFace(float *fVertex, float *fVaryings, int *F0_IT, int *F0_ITa, int offset, int tableOffset, int start, int end)
{
DeviceVertex<NUM_USER_VERTEX_ELEMENTS> *vertex = (DeviceVertex<NUM_USER_VERTEX_ELEMENTS>*)fVertex;
DeviceVarying<NUM_VARYING_ELEMENTS> *varyings = (DeviceVarying<NUM_VARYING_ELEMENTS>*)fVaryings;
for(int i = start + tableOffset + threadIdx.x + blockIdx.x*blockDim.x; i < end + tableOffset; i += blockDim.x * gridDim.x){
DeviceVertex<NUM_VERTEX_ELEMENTS> *vertex = (DeviceVertex<NUM_VERTEX_ELEMENTS>*)fVertex;
DeviceVertex<NUM_VARYING_ELEMENTS> *varyings = (DeviceVertex<NUM_VARYING_ELEMENTS>*)fVaryings;
for (int i = start + tableOffset + threadIdx.x + blockIdx.x*blockDim.x;
i < end + tableOffset;
i += blockDim.x * gridDim.x) {
int h = F0_ITa[2*i];
int n = F0_ITa[2*i+1];
float weight = 1.0f/n;
DeviceVertex<NUM_USER_VERTEX_ELEMENTS> dst;
DeviceVertex<NUM_VERTEX_ELEMENTS> dst;
dst.clear();
if(NUM_VARYING_ELEMENTS > 0){
DeviceVarying<NUM_VARYING_ELEMENTS> dstVarying;
DeviceVertex<NUM_VARYING_ELEMENTS> dstVarying;
dstVarying.clear();
for(int j=0; j<n; ++j){
int index = F0_IT[h+j];
dst.addWithWeight(&vertex[index], weight);
dstVarying.addVaryingWithWeight(&varyings[index], weight);
dstVarying.addWithWeight(&varyings[index], weight);
}
vertex[offset + i - tableOffset] = dst;
varyings[offset + i - tableOffset] = dstVarying;
@ -134,34 +113,43 @@ computeFace(float *fVertex, float *fVaryings, int *F0_IT, int *F0_ITa, int offse
}
__global__ void
computeFace(float *fVertex, int numVertexElements, float *fVaryings, int numVaryingElements,
computeFace(float *fVertex, float *fVarying,
int vertexLength, int vertexStride,
int varyingLength, int varyingStride,
int *F0_IT, int *F0_ITa, int offset, int tableOffset, int start, int end)
{
for(int i = start + tableOffset +threadIdx.x + blockIdx.x*blockDim.x; i < end + tableOffset; i += blockDim.x * gridDim.x){
for (int i = start + tableOffset +threadIdx.x + blockIdx.x*blockDim.x;
i < end + tableOffset;
i += blockDim.x * gridDim.x){
int h = F0_ITa[2*i];
int n = F0_ITa[2*i+1];
float weight = 1.0f/n;
// XXX: can we use local stack like alloca?
float *dstVertex = fVertex + (i+offset-tableOffset)*numVertexElements;
clear(dstVertex, numVertexElements);
float *dstVarying = fVaryings + (i+offset-tableOffset)*numVaryingElements;
clear(dstVarying, numVaryingElements);
float *dstVertex = fVertex + (i+offset-tableOffset)*vertexStride;
clear(dstVertex, vertexLength);
float *dstVarying = fVarying + (i+offset-tableOffset)*varyingStride;
clear(dstVarying, varyingLength);
for(int j=0; j<n; ++j){
int index = F0_IT[h+j];
addWithWeight(dstVertex, fVertex + index*numVertexElements, weight, numVertexElements);
addVaryingWithWeight(dstVarying, fVaryings + index*numVaryingElements, weight, numVaryingElements);
addWithWeight(dstVertex, fVertex + index*vertexStride, weight, vertexLength);
addWithWeight(dstVarying, fVarying + index*varyingStride, weight, varyingLength);
}
}
}
template <int NUM_USER_VERTEX_ELEMENTS, int NUM_VARYING_ELEMENTS> __global__ void
template <int NUM_VERTEX_ELEMENTS, int NUM_VARYING_ELEMENTS> __global__ void
computeEdge(float *fVertex, float *fVaryings, int *E0_IT, float *E0_S, int offset, int tableOffset, int start, int end)
{
DeviceVertex<NUM_USER_VERTEX_ELEMENTS> *vertex = (DeviceVertex<NUM_USER_VERTEX_ELEMENTS>*)fVertex;
DeviceVarying<NUM_VARYING_ELEMENTS> *varyings = (DeviceVarying<NUM_VARYING_ELEMENTS>*)fVaryings;
for(int i = start + tableOffset + threadIdx.x + blockIdx.x*blockDim.x; i < end + tableOffset; i+= blockDim.x * gridDim.x){
DeviceVertex<NUM_VERTEX_ELEMENTS> *vertex = (DeviceVertex<NUM_VERTEX_ELEMENTS>*)fVertex;
DeviceVertex<NUM_VARYING_ELEMENTS> *varyings = (DeviceVertex<NUM_VARYING_ELEMENTS>*)fVaryings;
for (int i = start + tableOffset + threadIdx.x + blockIdx.x*blockDim.x;
i < end + tableOffset;
i+= blockDim.x * gridDim.x){
int eidx0 = E0_IT[4*i+0];
int eidx1 = E0_IT[4*i+1];
int eidx2 = E0_IT[4*i+2];
@ -170,7 +158,7 @@ computeEdge(float *fVertex, float *fVaryings, int *E0_IT, float *E0_S, int offse
float vertWeight = E0_S[i*2+0];
// Fully sharp edge : vertWeight = 0.5f;
DeviceVertex<NUM_USER_VERTEX_ELEMENTS> dst;
DeviceVertex<NUM_VERTEX_ELEMENTS> dst;
dst.clear();
dst.addWithWeight(&vertex[eidx0], vertWeight);
@ -185,20 +173,24 @@ computeEdge(float *fVertex, float *fVaryings, int *E0_IT, float *E0_S, int offse
vertex[offset+i-tableOffset] = dst;
if(NUM_VARYING_ELEMENTS > 0){
DeviceVarying<NUM_VARYING_ELEMENTS> dstVarying;
DeviceVertex<NUM_VARYING_ELEMENTS> dstVarying;
dstVarying.clear();
dstVarying.addVaryingWithWeight(&varyings[eidx0], 0.5f);
dstVarying.addVaryingWithWeight(&varyings[eidx1], 0.5f);
dstVarying.addWithWeight(&varyings[eidx0], 0.5f);
dstVarying.addWithWeight(&varyings[eidx1], 0.5f);
varyings[offset+i-tableOffset] = dstVarying;
}
}
}
__global__ void
computeEdge(float *fVertex, int numVertexElements, float *fVarying, int numVaryingElements,
computeEdge(float *fVertex, float *fVarying,
int vertexLength, int vertexStride,
int varyingLength, int varyingStride,
int *E0_IT, float *E0_S, int offset, int tableOffset, int start, int end)
{
for(int i = start + tableOffset + threadIdx.x + blockIdx.x*blockDim.x; i < end + tableOffset; i+= blockDim.x * gridDim.x){
for (int i = start + tableOffset + threadIdx.x + blockIdx.x*blockDim.x;
i < end + tableOffset;i+= blockDim.x * gridDim.x) {
int eidx0 = E0_IT[4*i+0];
int eidx1 = E0_IT[4*i+1];
int eidx2 = E0_IT[4*i+2];
@ -207,35 +199,38 @@ computeEdge(float *fVertex, int numVertexElements, float *fVarying, int numVaryi
float vertWeight = E0_S[i*2+0];
// Fully sharp edge : vertWeight = 0.5f;
float *dstVertex = fVertex + (i+offset-tableOffset)*numVertexElements;
clear(dstVertex, numVertexElements);
float *dstVertex = fVertex + (i+offset-tableOffset)*vertexStride;
clear(dstVertex, vertexLength);
addWithWeight(dstVertex, fVertex + eidx0*numVertexElements, vertWeight, numVertexElements);
addWithWeight(dstVertex, fVertex + eidx1*numVertexElements, vertWeight, numVertexElements);
addWithWeight(dstVertex, fVertex + eidx0*vertexStride, vertWeight, vertexLength);
addWithWeight(dstVertex, fVertex + eidx1*vertexStride, vertWeight, vertexLength);
if(eidx2 > -1){
float faceWeight = E0_S[i*2+1];
addWithWeight(dstVertex, fVertex + eidx2*numVertexElements, faceWeight, numVertexElements);
addWithWeight(dstVertex, fVertex + eidx3*numVertexElements, faceWeight, numVertexElements);
addWithWeight(dstVertex, fVertex + eidx2*vertexStride, faceWeight, vertexLength);
addWithWeight(dstVertex, fVertex + eidx3*vertexStride, faceWeight, vertexLength);
}
if(numVaryingElements > 0){
float *dstVarying = fVarying + (i+offset-tableOffset)*numVaryingElements;
clear(dstVarying, numVaryingElements);
if (varyingLength > 0){
float *dstVarying = fVarying + (i+offset-tableOffset)*varyingStride;
clear(dstVarying, varyingLength);
addVaryingWithWeight(dstVarying, fVarying + eidx0*numVaryingElements, 0.5f, numVaryingElements);
addVaryingWithWeight(dstVarying, fVarying + eidx1*numVaryingElements, 0.5f, numVaryingElements);
addWithWeight(dstVarying, fVarying + eidx0*varyingStride, 0.5f, varyingLength);
addWithWeight(dstVarying, fVarying + eidx1*varyingStride, 0.5f, varyingLength);
}
}
}
template <int NUM_USER_VERTEX_ELEMENTS, int NUM_VARYING_ELEMENTS> __global__ void
template <int NUM_VERTEX_ELEMENTS, int NUM_VARYING_ELEMENTS> __global__ void
computeVertexA(float *fVertex, float *fVaryings, int *V0_ITa, float *V0_S, int offset, int tableOffset, int start, int end, int pass)
{
DeviceVertex<NUM_USER_VERTEX_ELEMENTS> *vertex = (DeviceVertex<NUM_USER_VERTEX_ELEMENTS>*)fVertex;
DeviceVarying<NUM_VARYING_ELEMENTS> *varyings = (DeviceVarying<NUM_VARYING_ELEMENTS>*)fVaryings;
for(int i = start + tableOffset + threadIdx.x + blockIdx.x*blockDim.x; i < end+tableOffset; i += blockDim.x * gridDim.x){
DeviceVertex<NUM_VERTEX_ELEMENTS> *vertex = (DeviceVertex<NUM_VERTEX_ELEMENTS>*)fVertex;
DeviceVertex<NUM_VARYING_ELEMENTS> *varyings = (DeviceVertex<NUM_VARYING_ELEMENTS>*)fVaryings;
for (int i = start + tableOffset + threadIdx.x + blockIdx.x*blockDim.x;
i < end+tableOffset;
i += blockDim.x * gridDim.x) {
int n = V0_ITa[5*i+1];
int p = V0_ITa[5*i+2];
int eidx0 = V0_ITa[5*i+3];
@ -249,7 +244,7 @@ computeVertexA(float *fVertex, float *fVaryings, int *V0_ITa, float *V0_S, int o
if (weight>0.0f && weight<1.0f && n > 0)
weight=1.0f-weight;
DeviceVertex<NUM_USER_VERTEX_ELEMENTS> dst;
DeviceVertex<NUM_VERTEX_ELEMENTS> dst;
if (not pass) {
dst.clear();
} else {
@ -267,9 +262,9 @@ computeVertexA(float *fVertex, float *fVaryings, int *V0_ITa, float *V0_S, int o
if(NUM_VARYING_ELEMENTS > 0){
if(not pass){
DeviceVarying<NUM_VARYING_ELEMENTS> dstVarying;
DeviceVertex<NUM_VARYING_ELEMENTS> dstVarying;
dstVarying.clear();
dstVarying.addVaryingWithWeight(&varyings[p], 1.0f);
dstVarying.addWithWeight(&varyings[p], 1.0f);
varyings[i+offset-tableOffset] = dstVarying;
}
}
@ -277,10 +272,15 @@ computeVertexA(float *fVertex, float *fVaryings, int *V0_ITa, float *V0_S, int o
}
__global__ void
computeVertexA(float *fVertex, int numVertexElements, float *fVaryings, int numVaryingElements,
computeVertexA(float *fVertex, float *fVaryings,
int vertexLength, int vertexStride,
int varyingLength, int varyingStride,
int *V0_ITa, float *V0_S, int offset, int tableOffset, int start, int end, int pass)
{
for(int i = start + tableOffset + threadIdx.x + blockIdx.x*blockDim.x; i < end + tableOffset; i += blockDim.x * gridDim.x){
for (int i = start + tableOffset + threadIdx.x + blockIdx.x*blockDim.x;
i < end + tableOffset;
i += blockDim.x * gridDim.x){
int n = V0_ITa[5*i+1];
int p = V0_ITa[5*i+2];
int eidx0 = V0_ITa[5*i+3];
@ -294,24 +294,24 @@ computeVertexA(float *fVertex, int numVertexElements, float *fVaryings, int numV
if (weight>0.0f && weight<1.0f && n > 0)
weight=1.0f-weight;
float *dstVertex = fVertex + (i+offset-tableOffset)*numVertexElements;
float *dstVertex = fVertex + (i+offset-tableOffset)*vertexStride;
if (not pass) {
clear(dstVertex, numVertexElements);
clear(dstVertex, vertexLength);
}
if (eidx0==-1 || (pass==0 && (n==-1)) ) {
addWithWeight(dstVertex, fVertex + p*numVertexElements, weight, numVertexElements);
addWithWeight(dstVertex, fVertex + p*vertexStride, weight, vertexLength);
} else {
addWithWeight(dstVertex, fVertex + p*numVertexElements, weight*0.75f, numVertexElements);
addWithWeight(dstVertex, fVertex + eidx0*numVertexElements, weight*0.125f, numVertexElements);
addWithWeight(dstVertex, fVertex + eidx1*numVertexElements, weight*0.125f, numVertexElements);
addWithWeight(dstVertex, fVertex + p*vertexStride, weight*0.75f, vertexLength);
addWithWeight(dstVertex, fVertex + eidx0*vertexStride, weight*0.125f, vertexLength);
addWithWeight(dstVertex, fVertex + eidx1*vertexStride, weight*0.125f, vertexLength);
}
if(numVaryingElements > 0){
if(varyingLength > 0){
if(not pass){
float *dstVarying = fVaryings + (i+offset-tableOffset)*numVaryingElements;
clear(dstVarying, numVaryingElements);
addVaryingWithWeight(dstVarying, fVaryings + p*numVaryingElements, 1.0f, numVaryingElements);
float *dstVarying = fVaryings + (i+offset-tableOffset)*varyingStride;
clear(dstVarying, varyingLength);
addWithWeight(dstVarying, fVaryings + p*varyingStride, 1.0f, varyingLength);
}
}
}
@ -321,13 +321,16 @@ computeVertexA(float *fVertex, int numVertexElements, float *fVaryings, int numV
//texture <int, 1> texV0_IT;
template <int NUM_USER_VERTEX_ELEMENTS, int NUM_VARYING_ELEMENTS> __global__ void
template <int NUM_VERTEX_ELEMENTS, int NUM_VARYING_ELEMENTS> __global__ void
computeVertexB(float *fVertex, float *fVaryings,
const int *V0_ITa, const int *V0_IT, const float *V0_S, int offset, int tableOffset, int start, int end)
{
DeviceVertex<NUM_USER_VERTEX_ELEMENTS> *vertex = (DeviceVertex<NUM_USER_VERTEX_ELEMENTS>*)fVertex;
DeviceVarying<NUM_VARYING_ELEMENTS> *varyings = (DeviceVarying<NUM_VARYING_ELEMENTS>*)fVaryings;
for(int i = start + tableOffset + threadIdx.x + blockIdx.x*blockDim.x; i < end + tableOffset; i += blockDim.x * gridDim.x){
DeviceVertex<NUM_VERTEX_ELEMENTS> *vertex = (DeviceVertex<NUM_VERTEX_ELEMENTS>*)fVertex;
DeviceVertex<NUM_VARYING_ELEMENTS> *varyings = (DeviceVertex<NUM_VARYING_ELEMENTS>*)fVaryings;
for (int i = start + tableOffset + threadIdx.x + blockIdx.x*blockDim.x;
i < end + tableOffset;
i += blockDim.x * gridDim.x) {
int h = V0_ITa[5*i];
int n = V0_ITa[5*i+1];
int p = V0_ITa[5*i+2];
@ -336,11 +339,11 @@ computeVertexB(float *fVertex, float *fVaryings,
float wp = 1.0f/float(n*n);
float wv = (n-2.0f) * n * wp;
DeviceVertex<NUM_USER_VERTEX_ELEMENTS> dst;
DeviceVertex<NUM_VERTEX_ELEMENTS> dst;
dst.clear();
dst.addWithWeight(&vertex[p], weight * wv);
for(int j = 0; j < n; ++j){
for (int j = 0; j < n; ++j) {
dst.addWithWeight(&vertex[V0_IT[h+j*2]], weight * wp);
dst.addWithWeight(&vertex[V0_IT[h+j*2+1]], weight * wp);
// int idx0 = tex1Dfetch(texV0_IT, h+j*2);
@ -351,19 +354,24 @@ computeVertexB(float *fVertex, float *fVaryings,
vertex[i+offset-tableOffset] = dst;
if(NUM_VARYING_ELEMENTS > 0){
DeviceVarying<NUM_VARYING_ELEMENTS> dstVarying;
DeviceVertex<NUM_VARYING_ELEMENTS> dstVarying;
dstVarying.clear();
dstVarying.addVaryingWithWeight(&varyings[p], 1.0f);
dstVarying.addWithWeight(&varyings[p], 1.0f);
varyings[i+offset-tableOffset] = dstVarying;
}
}
}
__global__ void
computeVertexB(float *fVertex, int numVertexElements, float *fVaryings, int numVaryingElements,
computeVertexB(float *fVertex, float *fVarying,
int vertexLength, int vertexStride,
int varyingLength, int varyingStride,
const int *V0_ITa, const int *V0_IT, const float *V0_S, int offset, int tableOffset, int start, int end)
{
for(int i = start + tableOffset + threadIdx.x + blockIdx.x*blockDim.x; i < end + tableOffset; i += blockDim.x * gridDim.x){
for (int i = start + tableOffset + threadIdx.x + blockIdx.x*blockDim.x;
i < end + tableOffset;
i += blockDim.x * gridDim.x) {
int h = V0_ITa[5*i];
int n = V0_ITa[5*i+1];
int p = V0_ITa[5*i+2];
@ -372,19 +380,19 @@ computeVertexB(float *fVertex, int numVertexElements, float *fVaryings, int numV
float wp = 1.0f/float(n*n);
float wv = (n-2.0f) * n * wp;
float *dstVertex = fVertex + (i+offset-tableOffset)*numVertexElements;
clear(dstVertex, numVertexElements);
addWithWeight(dstVertex, fVertex + p*numVertexElements, weight*wv, numVertexElements);
float *dstVertex = fVertex + (i+offset-tableOffset)*vertexStride;
clear(dstVertex, vertexLength);
addWithWeight(dstVertex, fVertex + p*vertexStride, weight*wv, vertexLength);
for(int j = 0; j < n; ++j){
addWithWeight(dstVertex, fVertex + V0_IT[h+j*2]*numVertexElements, weight*wp, numVertexElements);
addWithWeight(dstVertex, fVertex + V0_IT[h+j*2+1]*numVertexElements, weight*wp, numVertexElements);
for (int j = 0; j < n; ++j) {
addWithWeight(dstVertex, fVertex + V0_IT[h+j*2]*vertexStride, weight*wp, vertexLength);
addWithWeight(dstVertex, fVertex + V0_IT[h+j*2+1]*vertexStride, weight*wp, vertexLength);
}
if(numVaryingElements > 0){
float *dstVarying = fVaryings + (i+offset-tableOffset)*numVaryingElements;
clear(dstVarying, numVaryingElements);
addVaryingWithWeight(dstVarying, fVaryings + p*numVaryingElements, 1.0f, numVaryingElements);
if (varyingLength > 0) {
float *dstVarying = fVarying + (i+offset-tableOffset)*varyingStride;
clear(dstVarying, varyingLength);
addWithWeight(dstVarying, fVarying + p*varyingStride, 1.0f, varyingLength);
}
}
}
@ -392,12 +400,15 @@ computeVertexB(float *fVertex, int numVertexElements, float *fVaryings, int numV
// --------------------------------------------------------------------------------------------
template <int NUM_USER_VERTEX_ELEMENTS, int NUM_VARYING_ELEMENTS> __global__ void
template <int NUM_VERTEX_ELEMENTS, int NUM_VARYING_ELEMENTS> __global__ void
computeLoopVertexB(float *fVertex, float *fVaryings, int *V0_ITa, int *V0_IT, float *V0_S, int offset, int tableOffset, int start, int end)
{
DeviceVertex<NUM_USER_VERTEX_ELEMENTS> *vertex = (DeviceVertex<NUM_USER_VERTEX_ELEMENTS>*)fVertex;
DeviceVarying<NUM_VARYING_ELEMENTS> *varyings = (DeviceVarying<NUM_VARYING_ELEMENTS>*)fVaryings;
for(int i = start + tableOffset + threadIdx.x + blockIdx.x*blockDim.x; i < end + tableOffset; i += blockDim.x * gridDim.x){
DeviceVertex<NUM_VERTEX_ELEMENTS> *vertex = (DeviceVertex<NUM_VERTEX_ELEMENTS>*)fVertex;
DeviceVertex<NUM_VARYING_ELEMENTS> *varyings = (DeviceVertex<NUM_VARYING_ELEMENTS>*)fVaryings;
for (int i = start + tableOffset + threadIdx.x + blockIdx.x*blockDim.x;
i < end + tableOffset;
i += blockDim.x * gridDim.x) {
int h = V0_ITa[5*i];
int n = V0_ITa[5*i+1];
int p = V0_ITa[5*i+2];
@ -408,30 +419,35 @@ computeLoopVertexB(float *fVertex, float *fVaryings, int *V0_ITa, int *V0_IT, fl
beta = beta * beta;
beta = (0.625f - beta) * wp;
DeviceVertex<NUM_USER_VERTEX_ELEMENTS> dst;
DeviceVertex<NUM_VERTEX_ELEMENTS> dst;
dst.clear();
dst.addWithWeight(&vertex[p], weight * (1.0f - (beta * n)));
for(int j = 0; j < n; ++j){
for (int j = 0; j < n; ++j) {
dst.addWithWeight(&vertex[V0_IT[h+j]], weight * beta);
}
vertex[i+offset-tableOffset] = dst;
if(NUM_VARYING_ELEMENTS > 0){
DeviceVarying<NUM_VARYING_ELEMENTS> dstVarying;
if (NUM_VARYING_ELEMENTS > 0) {
DeviceVertex<NUM_VARYING_ELEMENTS> dstVarying;
dstVarying.clear();
dstVarying.addVaryingWithWeight(&varyings[p], 1.0f);
dstVarying.addWithWeight(&varyings[p], 1.0f);
varyings[i+offset-tableOffset] = dstVarying;
}
}
}
__global__ void
computeLoopVertexB(float *fVertex, int numVertexElements, float *fVaryings, int numVaryingElements,
computeLoopVertexB(float *fVertex, float *fVarying,
int vertexLength, int vertexStride,
int varyingLength, int varyingStride,
const int *V0_ITa, const int *V0_IT, const float *V0_S, int offset, int tableOffset, int start, int end)
{
for(int i = start + tableOffset + threadIdx.x + blockIdx.x*blockDim.x; i < end + tableOffset; i += blockDim.x * gridDim.x){
for (int i = start + tableOffset + threadIdx.x + blockIdx.x*blockDim.x;
i < end + tableOffset;
i += blockDim.x * gridDim.x) {
int h = V0_ITa[5*i];
int n = V0_ITa[5*i+1];
int p = V0_ITa[5*i+2];
@ -442,34 +458,37 @@ computeLoopVertexB(float *fVertex, int numVertexElements, float *fVaryings, int
beta = beta * beta;
beta = (0.625f - beta) * wp;
float *dstVertex = fVertex + (i+offset-tableOffset)*numVertexElements;
clear(dstVertex, numVertexElements);
addWithWeight(dstVertex, fVertex + p*numVertexElements, weight*(1.0f-(beta*n)), numVertexElements);
float *dstVertex = fVertex + (i+offset-tableOffset)*vertexStride;
clear(dstVertex, vertexLength);
addWithWeight(dstVertex, fVertex + p*vertexStride, weight*(1.0f-(beta*n)), vertexLength);
for(int j = 0; j < n; ++j){
addWithWeight(dstVertex, fVertex + V0_IT[h+j]*numVertexElements, weight*beta, numVertexElements);
for (int j = 0; j < n; ++j) {
addWithWeight(dstVertex, fVertex + V0_IT[h+j]*vertexStride, weight*beta, vertexLength);
}
if(numVaryingElements > 0){
float *dstVarying = fVaryings + (i+offset-tableOffset)*numVaryingElements;
clear(dstVarying, numVaryingElements);
addVaryingWithWeight(dstVarying, fVaryings + p*numVaryingElements, 1.0f, numVaryingElements);
if (varyingLength > 0) {
float *dstVarying = fVarying + (i+offset-tableOffset)*varyingStride;
clear(dstVarying, varyingLength);
addWithWeight(dstVarying, fVarying + p*varyingStride, 1.0f, varyingLength);
}
}
}
// --------------------------------------------------------------------------------------------
template <int NUM_USER_VERTEX_ELEMENTS, int NUM_VARYING_ELEMENTS> __global__ void
template <int NUM_VERTEX_ELEMENTS, int NUM_VARYING_ELEMENTS> __global__ void
computeBilinearEdge(float *fVertex, float *fVaryings, int *E0_IT, int offset, int tableOffset, int start, int end)
{
DeviceVertex<NUM_USER_VERTEX_ELEMENTS> *vertex = (DeviceVertex<NUM_USER_VERTEX_ELEMENTS>*)fVertex;
DeviceVarying<NUM_VARYING_ELEMENTS> *varyings = (DeviceVarying<NUM_VARYING_ELEMENTS>*)fVaryings;
for(int i = start + tableOffset + threadIdx.x + blockIdx.x*blockDim.x; i < end + tableOffset; i+= blockDim.x * gridDim.x){
DeviceVertex<NUM_VERTEX_ELEMENTS> *vertex = (DeviceVertex<NUM_VERTEX_ELEMENTS>*)fVertex;
DeviceVertex<NUM_VARYING_ELEMENTS> *varyings = (DeviceVertex<NUM_VARYING_ELEMENTS>*)fVaryings;
for (int i = start + tableOffset + threadIdx.x + blockIdx.x*blockDim.x;
i < end + tableOffset;
i+= blockDim.x * gridDim.x) {
int eidx0 = E0_IT[2*i+0];
int eidx1 = E0_IT[2*i+1];
DeviceVertex<NUM_USER_VERTEX_ELEMENTS> dst;
DeviceVertex<NUM_VERTEX_ELEMENTS> dst;
dst.clear();
dst.addWithWeight(&vertex[eidx0], 0.5f);
@ -477,78 +496,91 @@ computeBilinearEdge(float *fVertex, float *fVaryings, int *E0_IT, int offset, in
vertex[offset+i-tableOffset] = dst;
if(NUM_VARYING_ELEMENTS > 0){
DeviceVarying<NUM_VARYING_ELEMENTS> dstVarying;
if (NUM_VARYING_ELEMENTS > 0) {
DeviceVertex<NUM_VARYING_ELEMENTS> dstVarying;
dstVarying.clear();
dstVarying.addVaryingWithWeight(&varyings[eidx0], 0.5f);
dstVarying.addVaryingWithWeight(&varyings[eidx1], 0.5f);
dstVarying.addWithWeight(&varyings[eidx0], 0.5f);
dstVarying.addWithWeight(&varyings[eidx1], 0.5f);
varyings[offset+i-tableOffset] = dstVarying;
}
}
}
__global__ void
computeBilinearEdge(float *fVertex, int numVertexElements, float *fVarying, int numVaryingElements,
computeBilinearEdge(float *fVertex, float *fVarying,
int vertexLength, int vertexStride,
int varyingLength, int varyingStride,
int *E0_IT, int offset, int tableOffset, int start, int end)
{
for(int i = start + tableOffset + threadIdx.x + blockIdx.x*blockDim.x; i < end + tableOffset; i+= blockDim.x * gridDim.x){
for (int i = start + tableOffset + threadIdx.x + blockIdx.x*blockDim.x;
i < end + tableOffset;
i+= blockDim.x * gridDim.x) {
int eidx0 = E0_IT[2*i+0];
int eidx1 = E0_IT[2*i+1];
float *dstVertex = fVertex + (i+offset-tableOffset)*numVertexElements;
clear(dstVertex, numVertexElements);
float *dstVertex = fVertex + (i+offset-tableOffset)*vertexStride;
clear(dstVertex, vertexLength);
addWithWeight(dstVertex, fVertex + eidx0*numVertexElements, 0.5f, numVertexElements);
addWithWeight(dstVertex, fVertex + eidx1*numVertexElements, 0.5f, numVertexElements);
addWithWeight(dstVertex, fVertex + eidx0*vertexStride, 0.5f, vertexLength);
addWithWeight(dstVertex, fVertex + eidx1*vertexStride, 0.5f, vertexLength);
if(numVaryingElements > 0){
float *dstVarying = fVarying + (i+offset-tableOffset)*numVaryingElements;
clear(dstVarying, numVaryingElements);
if (varyingLength > 0) {
float *dstVarying = fVarying + (i+offset-tableOffset)*varyingStride;
clear(dstVarying, varyingLength);
addVaryingWithWeight(dstVarying, fVarying + eidx0*numVaryingElements, 0.5f, numVaryingElements);
addVaryingWithWeight(dstVarying, fVarying + eidx1*numVaryingElements, 0.5f, numVaryingElements);
addWithWeight(dstVarying, fVarying + eidx0*varyingStride, 0.5f, varyingLength);
addWithWeight(dstVarying, fVarying + eidx1*varyingStride, 0.5f, varyingLength);
}
}
}
template <int NUM_USER_VERTEX_ELEMENTS, int NUM_VARYING_ELEMENTS> __global__ void
template <int NUM_VERTEX_ELEMENTS, int NUM_VARYING_ELEMENTS> __global__ void
computeBilinearVertex(float *fVertex, float *fVaryings, int *V0_ITa, int offset, int tableOffset, int start, int end)
{
DeviceVertex<NUM_USER_VERTEX_ELEMENTS> *vertex = (DeviceVertex<NUM_USER_VERTEX_ELEMENTS>*)fVertex;
DeviceVarying<NUM_VARYING_ELEMENTS> *varyings = (DeviceVarying<NUM_VARYING_ELEMENTS>*)fVaryings;
for(int i = start + tableOffset + threadIdx.x + blockIdx.x*blockDim.x; i < end + tableOffset; i += blockDim.x * gridDim.x){
DeviceVertex<NUM_VERTEX_ELEMENTS> *vertex = (DeviceVertex<NUM_VERTEX_ELEMENTS>*)fVertex;
DeviceVertex<NUM_VARYING_ELEMENTS> *varyings = (DeviceVertex<NUM_VARYING_ELEMENTS>*)fVaryings;
for (int i = start + tableOffset + threadIdx.x + blockIdx.x*blockDim.x;
i < end + tableOffset;
i += blockDim.x * gridDim.x) {
int p = V0_ITa[i];
DeviceVertex<NUM_USER_VERTEX_ELEMENTS> dst;
DeviceVertex<NUM_VERTEX_ELEMENTS> dst;
dst.clear();
dst.addWithWeight(&vertex[p], 1.0f);
vertex[i+offset-tableOffset] = dst;
if(NUM_VARYING_ELEMENTS > 0){
DeviceVarying<NUM_VARYING_ELEMENTS> dstVarying;
if (NUM_VARYING_ELEMENTS > 0) {
DeviceVertex<NUM_VARYING_ELEMENTS> dstVarying;
dstVarying.clear();
dstVarying.addVaryingWithWeight(&varyings[p], 1.0f);
dstVarying.addWithWeight(&varyings[p], 1.0f);
varyings[i+offset-tableOffset] = dstVarying;
}
}
}
__global__ void
computeBilinearVertex(float *fVertex, int numVertexElements, float *fVaryings, int numVaryingElements,
computeBilinearVertex(float *fVertex, float *fVarying,
int vertexLength, int vertexStride,
int varyingLength, int varyingStride,
const int *V0_ITa, int offset, int tableOffset, int start, int end)
{
for(int i = start + tableOffset + threadIdx.x + blockIdx.x*blockDim.x; i < end + tableOffset; i += blockDim.x * gridDim.x){
for (int i = start + tableOffset + threadIdx.x + blockIdx.x*blockDim.x;
i < end + tableOffset;
i += blockDim.x * gridDim.x) {
int p = V0_ITa[i];
float *dstVertex = fVertex + (i+offset-tableOffset)*numVertexElements;
clear(dstVertex, numVertexElements);
addWithWeight(dstVertex, fVertex + p*numVertexElements, 1.0f, numVertexElements);
float *dstVertex = fVertex + (i+offset-tableOffset)*vertexStride;
clear(dstVertex, vertexLength);
addWithWeight(dstVertex, fVertex + p*vertexStride, 1.0f, vertexLength);
if(numVaryingElements > 0){
float *dstVarying = fVaryings + (i+offset-tableOffset)*numVaryingElements;
clear(dstVarying, numVaryingElements);
addVaryingWithWeight(dstVarying, fVaryings + p*numVaryingElements, 1.0f, numVaryingElements);
if (varyingLength > 0) {
float *dstVarying = fVarying + (i+offset-tableOffset)*varyingStride;
clear(dstVarying, varyingLength);
addWithWeight(dstVarying, fVarying + p*varyingStride, 1.0f, varyingLength);
}
}
}
@ -556,15 +588,16 @@ computeBilinearVertex(float *fVertex, int numVertexElements, float *fVaryings, i
// --------------------------------------------------------------------------------------------
__global__ void
editVertexAdd(float *fVertex, int numVertexElements, int primVarOffset, int primVarWidth,
editVertexAdd(float *fVertex, int vertexLength, int vertexStride,
int primVarOffset, int primVarWidth,
int vertexOffset, int tableOffset, int start, int end,
const int *editIndices, const float *editValues)
{
for(int i = start + tableOffset + threadIdx.x + blockIdx.x*blockDim.x;
i < end + tableOffset;
i += blockDim.x * gridDim.x) {
for (int i = start + tableOffset + threadIdx.x + blockIdx.x*blockDim.x;
i < end + tableOffset;
i += blockDim.x * gridDim.x) {
float *dstVertex = fVertex + (editIndices[i] + vertexOffset) * numVertexElements + primVarOffset;
float *dstVertex = fVertex + (editIndices[i] + vertexOffset) * vertexStride + primVarOffset;
for(int j = 0; j < primVarWidth; j++) {
*dstVertex++ += editValues[i*primVarWidth + j];
@ -579,16 +612,19 @@ editVertexAdd(float *fVertex, int numVertexElements, int primVarOffset, int prim
// XXX: this macro usage is tentative. Since cuda kernel can't be dynamically configured,
// still trying to find better way to have optimized kernel..
#define OPT_KERNEL(NUM_USER_VERTEX_ELEMENTS, NUM_VARYING_ELEMENTS, KERNEL, X, Y, ARG) \
if(numUserVertexElements == NUM_USER_VERTEX_ELEMENTS && \
numVaryingElements == NUM_VARYING_ELEMENTS) \
{ KERNEL<NUM_USER_VERTEX_ELEMENTS, NUM_VARYING_ELEMENTS><<<X,Y>>>ARG; \
return; }
#define OPT_KERNEL(NUM_VERTEX_ELEMENTS, NUM_VARYING_ELEMENTS, KERNEL, X, Y, ARG) \
if(vertexLength == NUM_VERTEX_ELEMENTS && \
varyingLength == NUM_VARYING_ELEMENTS && \
vertexStride == vertexLength && \
varyingStride == varyingLength) \
{ KERNEL<NUM_VERTEX_ELEMENTS, NUM_VARYING_ELEMENTS><<<X,Y>>>ARG; \
return; }
extern "C" {
void OsdCudaComputeFace(float *vertex, float *varying,
int numUserVertexElements, int numVaryingElements,
int vertexLength, int vertexStride,
int varyingLength, int varyingStride,
int *F_IT, int *F_ITa, int offset, int tableOffset, int start, int end)
{
//computeFace<3, 0><<<512,32>>>(vertex, varying, F_IT, F_ITa, offset, start, end);
@ -598,12 +634,15 @@ void OsdCudaComputeFace(float *vertex, float *varying,
OPT_KERNEL(3, 3, computeFace, 512, 32, (vertex, varying, F_IT, F_ITa, offset, tableOffset, start, end));
// fallback kernel (slow)
computeFace<<<512, 32>>>(vertex, 3+numUserVertexElements, varying, numVaryingElements,
computeFace<<<512, 32>>>(vertex, varying,
vertexLength, vertexStride, varyingLength, varyingStride,
F_IT, F_ITa, offset, tableOffset, start, end);
}
void OsdCudaComputeEdge(float *vertex, float *varying,
int numUserVertexElements, int numVaryingElements,
int vertexLength, int vertexStride,
int varyingLength, int varyingStride,
int *E_IT, float *E_W, int offset, int tableOffset, int start, int end)
{
//computeEdge<0, 3><<<512,32>>>(vertex, varying, E_IT, E_W, offset, start, end);
@ -612,12 +651,14 @@ void OsdCudaComputeEdge(float *vertex, float *varying,
OPT_KERNEL(3, 0, computeEdge, 512, 32, (vertex, varying, E_IT, E_W, offset, tableOffset, start, end));
OPT_KERNEL(3, 3, computeEdge, 512, 32, (vertex, varying, E_IT, E_W, offset, tableOffset, start, end));
computeEdge<<<512, 32>>>(vertex, 3+numUserVertexElements, varying, numVaryingElements,
computeEdge<<<512, 32>>>(vertex, varying,
vertexLength, vertexStride, varyingLength, varyingStride,
E_IT, E_W, offset, tableOffset, start, end);
}
void OsdCudaComputeVertexA(float *vertex, float *varying,
int numUserVertexElements, int numVaryingElements,
int vertexLength, int vertexStride,
int varyingLength, int varyingStride,
int *V_ITa, float *V_W, int offset, int tableOffset, int start, int end, int pass)
{
// computeVertexA<0, 3><<<512,32>>>(vertex, varying, V_ITa, V_W, offset, start, end, pass);
@ -626,12 +667,14 @@ void OsdCudaComputeVertexA(float *vertex, float *varying,
OPT_KERNEL(3, 0, computeVertexA, 512, 32, (vertex, varying, V_ITa, V_W, offset, tableOffset, start, end, pass));
OPT_KERNEL(3, 3, computeVertexA, 512, 32, (vertex, varying, V_ITa, V_W, offset, tableOffset, start, end, pass));
computeVertexA<<<512, 32>>>(vertex, 3+numUserVertexElements, varying, numVaryingElements,
computeVertexA<<<512, 32>>>(vertex, varying,
vertexLength, vertexStride, varyingLength, varyingStride,
V_ITa, V_W, offset, tableOffset, start, end, pass);
}
void OsdCudaComputeVertexB(float *vertex, float *varying,
int numUserVertexElements, int numVaryingElements,
int vertexLength, int vertexStride,
int varyingLength, int varyingStride,
int *V_ITa, int *V_IT, float *V_W, int offset, int tableOffset, int start, int end)
{
// computeVertexB<0, 3><<<512,32>>>(vertex, varying, V_ITa, V_IT, V_W, offset, start, end);
@ -640,12 +683,14 @@ void OsdCudaComputeVertexB(float *vertex, float *varying,
OPT_KERNEL(3, 0, computeVertexB, 512, 32, (vertex, varying, V_ITa, V_IT, V_W, offset, tableOffset, start, end));
OPT_KERNEL(3, 3, computeVertexB, 512, 32, (vertex, varying, V_ITa, V_IT, V_W, offset, tableOffset, start, end));
computeVertexB<<<512, 32>>>(vertex, 3+numUserVertexElements, varying, numVaryingElements,
computeVertexB<<<512, 32>>>(vertex, varying,
vertexLength, vertexStride, varyingLength, varyingStride,
V_ITa, V_IT, V_W, offset, tableOffset, start, end);
}
void OsdCudaComputeLoopVertexB(float *vertex, float *varying,
int numUserVertexElements, int numVaryingElements,
int vertexLength, int vertexStride,
int varyingLength, int varyingStride,
int *V_ITa, int *V_IT, float *V_W, int offset, int tableOffset, int start, int end)
{
// computeLoopVertexB<0, 3><<<512,32>>>(vertex, varying, V_ITa, V_IT, V_W, offset, start, end);
@ -654,12 +699,14 @@ void OsdCudaComputeLoopVertexB(float *vertex, float *varying,
OPT_KERNEL(3, 0, computeLoopVertexB, 512, 32, (vertex, varying, V_ITa, V_IT, V_W, offset, tableOffset, start, end));
OPT_KERNEL(3, 3, computeLoopVertexB, 512, 32, (vertex, varying, V_ITa, V_IT, V_W, offset, tableOffset, start, end));
computeLoopVertexB<<<512, 32>>>(vertex, 3+numUserVertexElements, varying, numVaryingElements,
computeLoopVertexB<<<512, 32>>>(vertex, varying,
vertexLength, vertexStride, varyingLength, varyingStride,
V_ITa, V_IT, V_W, offset, tableOffset, start, end);
}
void OsdCudaComputeBilinearEdge(float *vertex, float *varying,
int numUserVertexElements, int numVaryingElements,
int vertexLength, int vertexStride,
int varyingLength, int varyingStride,
int *E_IT, int offset, int tableOffset, int start, int end)
{
//computeBilinearEdge<0, 3><<<512,32>>>(vertex, varying, E_IT, offset, start, end);
@ -668,12 +715,14 @@ void OsdCudaComputeBilinearEdge(float *vertex, float *varying,
OPT_KERNEL(3, 0, computeBilinearEdge, 512, 32, (vertex, varying, E_IT, offset, tableOffset, start, end));
OPT_KERNEL(3, 3, computeBilinearEdge, 512, 32, (vertex, varying, E_IT, offset, tableOffset, start, end));
computeBilinearEdge<<<512, 32>>>(vertex, 3+numUserVertexElements, varying, numVaryingElements,
computeBilinearEdge<<<512, 32>>>(vertex, varying,
vertexLength, vertexStride, varyingLength, varyingStride,
E_IT, offset, tableOffset, start, end);
}
void OsdCudaComputeBilinearVertex(float *vertex, float *varying,
int numUserVertexElements, int numVaryingElements,
int vertexLength, int vertexStride,
int varyingLength, int varyingStride,
int *V_ITa, int offset, int tableOffset, int start, int end)
{
// computeBilinearVertex<0, 3><<<512,32>>>(vertex, varying, V_ITa, offset, start, end);
@ -682,16 +731,17 @@ void OsdCudaComputeBilinearVertex(float *vertex, float *varying,
OPT_KERNEL(3, 0, computeBilinearVertex, 512, 32, (vertex, varying, V_ITa, offset, tableOffset, start, end));
OPT_KERNEL(3, 3, computeBilinearVertex, 512, 32, (vertex, varying, V_ITa, offset, tableOffset, start, end));
computeBilinearVertex<<<512, 32>>>(vertex, 3+numUserVertexElements, varying, numVaryingElements,
computeBilinearVertex<<<512, 32>>>(vertex, varying,
vertexLength, vertexStride, varyingLength, varyingStride,
V_ITa, offset, tableOffset, start, end);
}
void OsdCudaEditVertexAdd(float *vertex, int numUserVertexElements,
void OsdCudaEditVertexAdd(float *vertex, int vertexLength, int vertexStride,
int primVarOffset, int primVarWidth,
int vertexOffset, int tableOffset,
int start, int end, int *editIndices, float *editValues)
{
editVertexAdd<<<512, 32>>>(vertex, 3+numUserVertexElements, primVarOffset, primVarWidth,
editVertexAdd<<<512, 32>>>(vertex, vertexLength, vertexStride, primVarOffset, primVarWidth,
vertexOffset, tableOffset, start, end,
editIndices, editValues);
}

101
opensubdiv/osd/d3d11ComputeController.cpp Normal file → Executable file
View File

@ -38,9 +38,7 @@ namespace OPENSUBDIV_VERSION {
OsdD3D11ComputeController::OsdD3D11ComputeController(
ID3D11DeviceContext *deviceContext)
: _deviceContext(deviceContext), _query(0),
_currentVertexBufferUAV(0), _currentVaryingBufferUAV(0),
_currentKernelBundle(NULL) {
: _deviceContext(deviceContext), _query(0) {
}
OsdD3D11ComputeController::~OsdD3D11ComputeController() {
@ -72,20 +70,21 @@ OsdD3D11ComputeController::Synchronize() {
}
OsdD3D11ComputeKernelBundle *
OsdD3D11ComputeController::getKernels(int numVertexElements,
int numVaryingElements) {
OsdD3D11ComputeController::getKernels(OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc) {
std::vector<OsdD3D11ComputeKernelBundle*>::iterator it =
std::find_if(_kernelRegistry.begin(), _kernelRegistry.end(),
OsdD3D11ComputeKernelBundle::Match(numVertexElements,
numVaryingElements));
OsdD3D11ComputeKernelBundle::Match(
vertexDesc, varyingDesc));
if (it != _kernelRegistry.end()) {
return *it;
} else {
OsdD3D11ComputeKernelBundle *kernelBundle =
new OsdD3D11ComputeKernelBundle(_deviceContext);
_kernelRegistry.push_back(kernelBundle);
kernelBundle->Compile(numVertexElements, numVaryingElements);
kernelBundle->Compile(vertexDesc, varyingDesc);
return kernelBundle;
}
}
@ -102,11 +101,11 @@ OsdD3D11ComputeController::bindShaderResources()
ID3D11ShaderResourceView *NULLSRV = 0;
_deviceContext->VSSetShaderResources(0, 1, &NULLSRV);
if (_currentVertexBufferUAV)
_deviceContext->CSSetUnorderedAccessViews(0, 1, &_currentVertexBufferUAV, 0); // u0
if (_currentBindState.vertexBuffer)
_deviceContext->CSSetUnorderedAccessViews(0, 1, &_currentBindState.vertexBuffer, 0); // u0
if (_currentVaryingBufferUAV)
_deviceContext->CSSetUnorderedAccessViews(1, 1, &_currentVaryingBufferUAV, 0); // u1
if (_currentBindState.varyingBuffer)
_deviceContext->CSSetUnorderedAccessViews(1, 1, &_currentBindState.varyingBuffer, 0); // u1
}
void
@ -122,8 +121,10 @@ OsdD3D11ComputeController::ApplyBilinearFaceVerticesKernel(
assert(context);
_currentKernelBundle->ApplyBilinearFaceVerticesKernel(
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
_currentBindState.kernelBundle->ApplyBilinearFaceVerticesKernel(
batch.GetVertexOffset(), batch.GetTableOffset(),
batch.GetStart(), batch.GetEnd(),
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset);
}
void
@ -132,8 +133,10 @@ OsdD3D11ComputeController::ApplyBilinearEdgeVerticesKernel(
assert(context);
_currentKernelBundle->ApplyBilinearEdgeVerticesKernel(
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
_currentBindState.kernelBundle->ApplyBilinearEdgeVerticesKernel(
batch.GetVertexOffset(), batch.GetTableOffset(),
batch.GetStart(), batch.GetEnd(),
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset);
}
void
@ -142,8 +145,10 @@ OsdD3D11ComputeController::ApplyBilinearVertexVerticesKernel(
assert(context);
_currentKernelBundle->ApplyBilinearVertexVerticesKernel(
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
_currentBindState.kernelBundle->ApplyBilinearVertexVerticesKernel(
batch.GetVertexOffset(), batch.GetTableOffset(),
batch.GetStart(), batch.GetEnd(),
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset);
}
void
@ -152,8 +157,10 @@ OsdD3D11ComputeController::ApplyCatmarkFaceVerticesKernel(
assert(context);
_currentKernelBundle->ApplyCatmarkFaceVerticesKernel(
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
_currentBindState.kernelBundle->ApplyCatmarkFaceVerticesKernel(
batch.GetVertexOffset(), batch.GetTableOffset(),
batch.GetStart(), batch.GetEnd(),
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset);
}
@ -164,8 +171,10 @@ OsdD3D11ComputeController::ApplyCatmarkEdgeVerticesKernel(
assert(context);
_currentKernelBundle->ApplyCatmarkEdgeVerticesKernel(
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
_currentBindState.kernelBundle->ApplyCatmarkEdgeVerticesKernel(
batch.GetVertexOffset(), batch.GetTableOffset(),
batch.GetStart(), batch.GetEnd(),
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset);
}
void
@ -174,8 +183,10 @@ OsdD3D11ComputeController::ApplyCatmarkVertexVerticesKernelB(
assert(context);
_currentKernelBundle->ApplyCatmarkVertexVerticesKernelB(
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
_currentBindState.kernelBundle->ApplyCatmarkVertexVerticesKernelB(
batch.GetVertexOffset(), batch.GetTableOffset(),
batch.GetStart(), batch.GetEnd(),
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset);
}
void
@ -184,8 +195,10 @@ OsdD3D11ComputeController::ApplyCatmarkVertexVerticesKernelA1(
assert(context);
_currentKernelBundle->ApplyCatmarkVertexVerticesKernelA(
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(), false);
_currentBindState.kernelBundle->ApplyCatmarkVertexVerticesKernelA(
batch.GetVertexOffset(), batch.GetTableOffset(),
batch.GetStart(), batch.GetEnd(), false,
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset);
}
void
@ -194,8 +207,10 @@ OsdD3D11ComputeController::ApplyCatmarkVertexVerticesKernelA2(
assert(context);
_currentKernelBundle->ApplyCatmarkVertexVerticesKernelA(
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(), true);
_currentBindState.kernelBundle->ApplyCatmarkVertexVerticesKernelA(
batch.GetVertexOffset(), batch.GetTableOffset(),
batch.GetStart(), batch.GetEnd(), true,
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset);
}
void
@ -204,8 +219,10 @@ OsdD3D11ComputeController::ApplyLoopEdgeVerticesKernel(
assert(context);
_currentKernelBundle->ApplyLoopEdgeVerticesKernel(
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
_currentBindState.kernelBundle->ApplyLoopEdgeVerticesKernel(
batch.GetVertexOffset(), batch.GetTableOffset(),
batch.GetStart(), batch.GetEnd(),
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset);
}
void
@ -214,8 +231,10 @@ OsdD3D11ComputeController::ApplyLoopVertexVerticesKernelB(
assert(context);
_currentKernelBundle->ApplyLoopVertexVerticesKernelB(
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
_currentBindState.kernelBundle->ApplyLoopVertexVerticesKernelB(
batch.GetVertexOffset(), batch.GetTableOffset(),
batch.GetStart(), batch.GetEnd(),
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset);
}
void
@ -224,8 +243,10 @@ OsdD3D11ComputeController::ApplyLoopVertexVerticesKernelA1(
assert(context);
_currentKernelBundle->ApplyLoopVertexVerticesKernelA(
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(), false);
_currentBindState.kernelBundle->ApplyLoopVertexVerticesKernelA(
batch.GetVertexOffset(), batch.GetTableOffset(),
batch.GetStart(), batch.GetEnd(), false,
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset);
}
void
@ -234,8 +255,10 @@ OsdD3D11ComputeController::ApplyLoopVertexVerticesKernelA2(
assert(context);
_currentKernelBundle->ApplyLoopVertexVerticesKernelA(
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(), true);
_currentBindState.kernelBundle->ApplyLoopVertexVerticesKernelA(
batch.GetVertexOffset(), batch.GetTableOffset(),
batch.GetStart(), batch.GetEnd(), true,
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset);
}
void
@ -253,11 +276,13 @@ OsdD3D11ComputeController::ApplyVertexEdits(
int primvarWidth = edit->GetPrimvarWidth();
if (edit->GetOperation() == FarVertexEdit::Add) {
_currentKernelBundle->ApplyEditAdd(primvarOffset, primvarWidth,
_currentBindState.kernelBundle->ApplyEditAdd(primvarOffset, primvarWidth,
batch.GetVertexOffset(),
batch.GetTableOffset(),
batch.GetStart(),
batch.GetEnd());
batch.GetEnd(),
_currentBindState.vertexDesc.offset,
_currentBindState.varyingDesc.offset);
} else {
// XXX: edit SET is not implemented yet.
}

79
opensubdiv/osd/d3d11ComputeController.h Normal file → Executable file
View File

@ -29,6 +29,7 @@
#include "../far/dispatcher.h"
#include "../osd/d3d11ComputeContext.h"
#include "../osd/vertexDescriptor.h"
#include <vector>
@ -75,15 +76,25 @@ public:
///
/// @param varyingBuffer varying-interpolated data buffer
///
/// @param vertexDesc the descriptor of vertex elements to be refined.
/// if it's null, all primvars in the vertex buffer
/// will be refined.
///
/// @param varyingDesc the descriptor of varying elements to be refined.
/// if it's null, all primvars in the varying buffer
/// will be refined.
///
template<class VERTEX_BUFFER, class VARYING_BUFFER>
void Refine(OsdD3D11ComputeContext const *context,
FarKernelBatchVector const &batches,
VERTEX_BUFFER *vertexBuffer,
VARYING_BUFFER *varyingBuffer) {
VARYING_BUFFER *varyingBuffer,
OsdVertexBufferDescriptor const *vertexDesc=NULL,
OsdVertexBufferDescriptor const *varyingDesc=NULL) {
if (batches.empty()) return;
bind(vertexBuffer, varyingBuffer);
bind(vertexBuffer, varyingBuffer, vertexDesc, varyingDesc);
context->BindShaderStorageBuffers(_deviceContext);
FarDispatcher::Refine(this,
@ -145,48 +156,68 @@ protected:
void ApplyVertexEdits(FarKernelBatch const &batch, ComputeContext const *context) const;
OsdD3D11ComputeKernelBundle * getKernels(int numVertexElements,
int numVaryingElements);
OsdD3D11ComputeKernelBundle * getKernels(OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc);
void bindShaderResources();
void unbindShaderResources();
template<class VERTEX_BUFFER, class VARYING_BUFFER>
void bind(VERTEX_BUFFER *vertex, VARYING_BUFFER *varying) {
void bind(VERTEX_BUFFER *vertex, VARYING_BUFFER *varying,
OsdVertexBufferDescriptor const *vertexDesc,
OsdVertexBufferDescriptor const *varyingDesc) {
_currentVertexBufferUAV = vertex ? vertex->BindD3D11UAV(_deviceContext) : 0;
_currentVaryingBufferUAV = varying ? varying->BindD3D11UAV(_deviceContext) : 0;
_vdesc.numVertexElements = vertex ? vertex->GetNumElements() : 0;
_vdesc.numVaryingElements = varying ? varying->GetNumElements() : 0;
_currentKernelBundle = getKernels(_vdesc.numVertexElements,
_vdesc.numVaryingElements);
// if the vertex buffer descriptor is specified, use it.
// otherwise, assumes the data is tightly packed in the vertex buffer.
if (vertexDesc) {
_currentBindState.vertexDesc = *vertexDesc;
} else {
int numElements = vertex ? vertex->GetNumElements() : 0;
_currentBindState.vertexDesc = OsdVertexBufferDescriptor(
0, numElements, numElements);
}
if (varyingDesc) {
_currentBindState.varyingDesc = *varyingDesc;
} else {
int numElements = varying ? varying->GetNumElements() : 0;
_currentBindState.varyingDesc = OsdVertexBufferDescriptor(
0, numElements, numElements);
}
_currentBindState.vertexBuffer = vertex ? vertex->BindD3D11UAV(_deviceContext) : 0;
_currentBindState.varyingBuffer = varying ? varying->BindD3D11UAV(_deviceContext) : 0;
_currentBindState.kernelBundle = getKernels(_currentBindState.vertexDesc,
_currentBindState.varyingDesc);
bindShaderResources();
}
void unbind() {
_currentVertexBufferUAV = 0;
_currentVaryingBufferUAV = 0;
_currentKernelBundle = 0;
_currentBindState.Reset();
unbindShaderResources();
}
private:
struct BindState {
BindState() : vertexBuffer(0), varyingBuffer(0), kernelBundle(NULL) {}
void Reset() {
vertexBuffer = varyingBuffer = 0;
vertexDesc.Reset();
varyingDesc.Reset();
}
ID3D11UnorderedAccessView *vertexBuffer;
ID3D11UnorderedAccessView *varyingBuffer;
OsdVertexBufferDescriptor vertexDesc;
OsdVertexBufferDescriptor varyingDesc;
OsdD3D11ComputeKernelBundle *kernelBundle;
};
BindState _currentBindState;
ID3D11DeviceContext *_deviceContext;
ID3D11Query *_query;
std::vector<OsdD3D11ComputeKernelBundle *> _kernelRegistry;
OsdVertexDescriptor _vdesc;
ID3D11UnorderedAccessView * _currentVertexBufferUAV,
* _currentVaryingBufferUAV;
OsdD3D11ComputeKernelBundle * _currentKernelBundle;
};
} // end namespace OPENSUBDIV_VERSION

View File

@ -79,10 +79,14 @@ OsdD3D11ComputeKernelBundle::~OsdD3D11ComputeKernelBundle() {
}
bool
OsdD3D11ComputeKernelBundle::Compile(int numVertexElements,
int numVaryingElements) {
OsdD3D11ComputeKernelBundle::Compile(
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc) {
_vdesc.Set( numVertexElements, numVaryingElements );
_numVertexElements = vertexDesc.length;
_vertexStride = vertexDesc.stride;
_numVaryingElements = varyingDesc.length;
_varyingStride = varyingDesc.stride;
DWORD dwShaderFlags = D3DCOMPILE_ENABLE_STRICTNESS;
#ifdef _DEBUG
@ -90,18 +94,26 @@ OsdD3D11ComputeKernelBundle::Compile(int numVertexElements,
#endif
std::ostringstream ss;
ss << numVertexElements;
ss << _numVertexElements;
std::string numVertexElementsStr(ss.str());
ss.str("");
ss << numVaryingElements;
ss << _numVaryingElements;
std::string numVaryingElementsStr(ss.str());
ss.str("");
ss << _vertexStride;
std::string vertexStrideStr(ss.str());
ss.str("");
ss << _varyingStride;
std::string varyingStrideStr(ss.str());
ss.str("");
ss << _workGroupSize;
std::string workGroupSizeStr(ss.str());
D3D_SHADER_MACRO shaderDefines[] = {
"NUM_VERTEX_ELEMENTS", numVertexElementsStr.c_str(),
"VERTEX_STRIDE", vertexStrideStr.c_str(),
"NUM_VARYING_ELEMENTS", numVaryingElementsStr.c_str(),
"VARYING_STRIDE", varyingStrideStr.c_str(),
"WORK_GROUP_SIZE", workGroupSizeStr.c_str(),
0, 0
};
@ -183,6 +195,8 @@ struct OsdD3D11ComputeKernelBundle::KernelCB {
int tableOffset; // offset of subdivision table
int indexStart; // start index relative to tableOffset
int indexEnd; // end index relative to tableOffset
int vertexBaseOffset; // base vbo offset of the vertex buffer
int varyingBaseOffset; // base vbo offset of the varying buffer
BOOL vertexPass; // 4-byte bool
// vertex edit kernel
@ -225,7 +239,8 @@ OsdD3D11ComputeKernelBundle::dispatchCompute(
void
OsdD3D11ComputeKernelBundle::ApplyBilinearFaceVerticesKernel(
int vertexOffset, int tableOffset, int start, int end) {
int vertexOffset, int tableOffset, int start, int end,
int vertexBaseOffset, int varyingBaseOffset) {
KernelCB args;
ZeroMemory(&args, sizeof(args));
@ -233,12 +248,15 @@ OsdD3D11ComputeKernelBundle::ApplyBilinearFaceVerticesKernel(
args.tableOffset = tableOffset;
args.indexStart = start;
args.indexEnd = end;
args.vertexBaseOffset = vertexBaseOffset;
args.varyingBaseOffset = varyingBaseOffset;
dispatchCompute(_kernelComputeFace, args);
}
void
OsdD3D11ComputeKernelBundle::ApplyBilinearEdgeVerticesKernel(
int vertexOffset, int tableOffset, int start, int end) {
int vertexOffset, int tableOffset, int start, int end,
int vertexBaseOffset, int varyingBaseOffset) {
KernelCB args;
ZeroMemory(&args, sizeof(args));
@ -246,12 +264,15 @@ OsdD3D11ComputeKernelBundle::ApplyBilinearEdgeVerticesKernel(
args.tableOffset = tableOffset;
args.indexStart = start;
args.indexEnd = end;
args.vertexBaseOffset = vertexBaseOffset;
args.varyingBaseOffset = varyingBaseOffset;
dispatchCompute(_kernelComputeBilinearEdge, args);
}
void
OsdD3D11ComputeKernelBundle::ApplyBilinearVertexVerticesKernel(
int vertexOffset, int tableOffset, int start, int end) {
int vertexOffset, int tableOffset, int start, int end,
int vertexBaseOffset, int varyingBaseOffset) {
KernelCB args;
ZeroMemory(&args, sizeof(args));
@ -259,13 +280,16 @@ OsdD3D11ComputeKernelBundle::ApplyBilinearVertexVerticesKernel(
args.tableOffset = tableOffset;
args.indexStart = start;
args.indexEnd = end;
args.vertexBaseOffset = vertexBaseOffset;
args.varyingBaseOffset = varyingBaseOffset;
dispatchCompute(_kernelComputeVertex, args);
}
void
OsdD3D11ComputeKernelBundle::ApplyCatmarkFaceVerticesKernel(
int vertexOffset, int tableOffset, int start, int end) {
int vertexOffset, int tableOffset, int start, int end,
int vertexBaseOffset, int varyingBaseOffset) {
KernelCB args;
ZeroMemory(&args, sizeof(args));
@ -273,12 +297,15 @@ OsdD3D11ComputeKernelBundle::ApplyCatmarkFaceVerticesKernel(
args.tableOffset = tableOffset;
args.indexStart = start;
args.indexEnd = end;
args.vertexBaseOffset = vertexBaseOffset;
args.varyingBaseOffset = varyingBaseOffset;
dispatchCompute(_kernelComputeFace, args);
}
void
OsdD3D11ComputeKernelBundle::ApplyCatmarkEdgeVerticesKernel(
int vertexOffset, int tableOffset, int start, int end) {
int vertexOffset, int tableOffset, int start, int end,
int vertexBaseOffset, int varyingBaseOffset) {
KernelCB args;
ZeroMemory(&args, sizeof(args));
@ -286,12 +313,15 @@ OsdD3D11ComputeKernelBundle::ApplyCatmarkEdgeVerticesKernel(
args.tableOffset = tableOffset;
args.indexStart = start;
args.indexEnd = end;
args.vertexBaseOffset = vertexBaseOffset;
args.varyingBaseOffset = varyingBaseOffset;
dispatchCompute(_kernelComputeEdge, args);
}
void
OsdD3D11ComputeKernelBundle::ApplyCatmarkVertexVerticesKernelB(
int vertexOffset, int tableOffset, int start, int end) {
int vertexOffset, int tableOffset, int start, int end,
int vertexBaseOffset, int varyingBaseOffset) {
KernelCB args;
ZeroMemory(&args, sizeof(args));
@ -299,12 +329,15 @@ OsdD3D11ComputeKernelBundle::ApplyCatmarkVertexVerticesKernelB(
args.tableOffset = tableOffset;
args.indexStart = start;
args.indexEnd = end;
args.vertexBaseOffset = vertexBaseOffset;
args.varyingBaseOffset = varyingBaseOffset;
dispatchCompute(_kernelComputeCatmarkVertexB, args);
}
void
OsdD3D11ComputeKernelBundle::ApplyCatmarkVertexVerticesKernelA(
int vertexOffset, int tableOffset, int start, int end, bool pass) {
int vertexOffset, int tableOffset, int start, int end, bool pass,
int vertexBaseOffset, int varyingBaseOffset) {
KernelCB args;
ZeroMemory(&args, sizeof(args));
@ -313,12 +346,15 @@ OsdD3D11ComputeKernelBundle::ApplyCatmarkVertexVerticesKernelA(
args.indexStart = start;
args.indexEnd = end;
args.vertexPass = pass ? 1 : 0;
args.vertexBaseOffset = vertexBaseOffset;
args.varyingBaseOffset = varyingBaseOffset;
dispatchCompute(_kernelComputeVertexA, args);
}
void
OsdD3D11ComputeKernelBundle::ApplyLoopEdgeVerticesKernel(
int vertexOffset, int tableOffset, int start, int end) {
int vertexOffset, int tableOffset, int start, int end,
int vertexBaseOffset, int varyingBaseOffset) {
KernelCB args;
ZeroMemory(&args, sizeof(args));
@ -326,12 +362,15 @@ OsdD3D11ComputeKernelBundle::ApplyLoopEdgeVerticesKernel(
args.tableOffset = tableOffset;
args.indexStart = start;
args.indexEnd = end;
args.vertexBaseOffset = vertexBaseOffset;
args.varyingBaseOffset = varyingBaseOffset;
dispatchCompute(_kernelComputeEdge, args);
}
void
OsdD3D11ComputeKernelBundle::ApplyLoopVertexVerticesKernelB(
int vertexOffset, int tableOffset, int start, int end) {
int vertexOffset, int tableOffset, int start, int end,
int vertexBaseOffset, int varyingBaseOffset) {
KernelCB args;
ZeroMemory(&args, sizeof(args));
@ -339,12 +378,15 @@ OsdD3D11ComputeKernelBundle::ApplyLoopVertexVerticesKernelB(
args.tableOffset = tableOffset;
args.indexStart = start;
args.indexEnd = end;
args.vertexBaseOffset = vertexBaseOffset;
args.varyingBaseOffset = varyingBaseOffset;
dispatchCompute(_kernelComputeLoopVertexB, args);
}
void
OsdD3D11ComputeKernelBundle::ApplyLoopVertexVerticesKernelA(
int vertexOffset, int tableOffset, int start, int end, bool pass) {
int vertexOffset, int tableOffset, int start, int end, bool pass,
int vertexBaseOffset, int varyingBaseOffset) {
KernelCB args;
ZeroMemory(&args, sizeof(args));
@ -353,13 +395,16 @@ OsdD3D11ComputeKernelBundle::ApplyLoopVertexVerticesKernelA(
args.indexStart = start;
args.indexEnd = end;
args.vertexPass = pass ? 1 : 0;
args.vertexBaseOffset = vertexBaseOffset;
args.varyingBaseOffset = varyingBaseOffset;
dispatchCompute(_kernelComputeVertexA, args);
}
void
OsdD3D11ComputeKernelBundle::ApplyEditAdd(
int primvarOffset, int primvarWidth,
int vertexOffset, int tableOffset, int start, int end) {
int vertexOffset, int tableOffset, int start, int end,
int vertexBaseOffset, int varyingBaseOffset) {
KernelCB args;
ZeroMemory(&args, sizeof(args));
@ -369,6 +414,8 @@ OsdD3D11ComputeKernelBundle::ApplyEditAdd(
args.indexEnd = end;
args.editPrimVarOffset = primvarOffset;
args.editPrimVarWidth = primvarWidth;
args.vertexBaseOffset = vertexBaseOffset;
args.varyingBaseOffset = varyingBaseOffset;
dispatchCompute(_kernelEditAdd, args);
}

57
opensubdiv/osd/d3d11KernelBundle.h Normal file → Executable file
View File

@ -48,53 +48,71 @@ public:
/// Destructor
~OsdD3D11ComputeKernelBundle();
bool Compile(int numVertexElements, int numVaryingElements);
bool Compile(OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc);
void ApplyBilinearFaceVerticesKernel(
int vertexOffset, int tableOffset, int start, int end);
int vertexOffset, int tableOffset, int start, int end,
int vertexBaseOffset, int varyingBaseOffset);
void ApplyBilinearEdgeVerticesKernel(
int vertexOffset, int tableOffset, int start, int end);
int vertexOffset, int tableOffset, int start, int end,
int vertexBaseOffset, int varyingBaseOffset);
void ApplyBilinearVertexVerticesKernel(
int vertexOffset, int tableOffset, int start, int end);
int vertexOffset, int tableOffset, int start, int end,
int vertexBaseOffset, int varyingBaseOffset);
void ApplyCatmarkFaceVerticesKernel(
int vertexOffset, int tableOffset, int start, int end);
int vertexOffset, int tableOffset, int start, int end,
int vertexBaseOffset, int varyingBaseOffset);
void ApplyCatmarkEdgeVerticesKernel(
int vertexOffset, int tableOffset, int start, int end);
int vertexOffset, int tableOffset, int start, int end,
int vertexBaseOffset, int varyingBaseOffset);
void ApplyCatmarkVertexVerticesKernelB(
int vertexOffset, int tableOffset, int start, int end);
int vertexOffset, int tableOffset, int start, int end,
int vertexBaseOffset, int varyingBaseOffset);
void ApplyCatmarkVertexVerticesKernelA(
int vertexOffset, int tableOffset, int start, int end, bool pass);
int vertexOffset, int tableOffset, int start, int end, bool pass,
int vertexBaseOffset, int varyingBaseOffset);
void ApplyLoopEdgeVerticesKernel(
int vertexOffset, int tableOffset, int start, int end);
int vertexOffset, int tableOffset, int start, int end,
int vertexBaseOffset, int varyingBaseOffset);
void ApplyLoopVertexVerticesKernelB(
int vertexOffset, int tableOffset, int start, int end);
int vertexOffset, int tableOffset, int start, int end,
int vertexBaseOffset, int varyingBaseOffset);
void ApplyLoopVertexVerticesKernelA(
int vertexOffset, int tableOffset, int start, int end, bool pass);
int vertexOffset, int tableOffset, int start, int end, bool pass,
int vertexBaseOffset, int varyingBaseOffset);
void ApplyEditAdd(int primvarOffset, int primvarWidth,
int vertexOffset, int tableOffset, int start, int end);
int vertexOffset, int tableOffset, int start, int end,
int vertexBaseOffset, int varyingBaseOffset);
struct Match {
/// Constructor
Match(int numVertexElements, int numVaryingElements)
: vdesc(numVertexElements, numVaryingElements) {
Match(OsdVertexBufferDescriptor const &vertex,
OsdVertexBufferDescriptor const &varying)
: vertexDesc(vertex), varyingDesc(varying) {
}
bool operator() (OsdD3D11ComputeKernelBundle const *kernel) {
return vdesc == kernel->_vdesc;
// offset is dynamic. just comparing length and stride here,
// returns true if they are equal
return (vertexDesc.length == kernel->_numVertexElements and
vertexDesc.stride == kernel->_vertexStride and
varyingDesc.length == kernel->_numVaryingElements and
varyingDesc.stride == kernel->_varyingStride);
}
OsdVertexDescriptor vdesc;
OsdVertexBufferDescriptor vertexDesc;
OsdVertexBufferDescriptor varyingDesc;
};
friend struct Match;
@ -130,7 +148,10 @@ protected:
int _workGroupSize;
OsdVertexDescriptor _vdesc;
int _numVertexElements;
int _vertexStride;
int _numVaryingElements;
int _varyingStride;
};
} // end namespace OPENSUBDIV_VERSION

14
opensubdiv/osd/d3d11Mesh.h Normal file → Executable file
View File

@ -124,6 +124,13 @@ public:
virtual void Refine() {
_computeController->Refine(_computeContext, _farMesh->GetKernelBatches(), _vertexBuffer, _varyingBuffer);
}
virtual void Refine(OsdVertexBufferDescriptor const *vertexDesc,
OsdVertexBufferDescriptor const *varyingDesc,
bool interleaved) {
_computeController->Refine(_computeContext, _farMesh->GetKernelBatches(),
_vertexBuffer, (interleaved ? _vertexBuffer : _varyingBuffer),
vertexDesc, varyingDesc);
}
virtual void Synchronize() {
_computeController->Synchronize();
}
@ -265,6 +272,13 @@ public:
virtual void Refine() {
_computeController->Refine(_computeContext, _farMesh->GetKernelBatches(), _vertexBuffer, _varyingBuffer);
}
virtual void Refine(OsdVertexBufferDescriptor const *vertexDesc,
OsdVertexBufferDescriptor const *varyingDesc,
bool interleaved) {
_computeController->Refine(_computeContext, _farMesh->GetKernelBatches(),
_vertexBuffer, (interleaved ? _vertexBuffer : _varyingBuffer),
vertexDesc, varyingDesc);
}
virtual void Synchronize() {
_computeController->Synchronize();
}

View File

@ -30,8 +30,7 @@ namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
OsdGcdComputeController::OsdGcdComputeController() :
_currentVertexBuffer(0), _currentVaryingBuffer(0) {
OsdGcdComputeController::OsdGcdComputeController() {
_gcd_queue = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0);
}
@ -42,7 +41,8 @@ OsdGcdComputeController::ApplyBilinearFaceVerticesKernel(
assert(context);
OsdGcdComputeFace(
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
(const int*)context->GetTable(FarSubdivisionTables::F_IT)->GetBuffer(),
(const int*)context->GetTable(FarSubdivisionTables::F_ITa)->GetBuffer(),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(),
@ -56,7 +56,8 @@ OsdGcdComputeController::ApplyBilinearEdgeVerticesKernel(
assert(context);
OsdGcdComputeBilinearEdge(
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
(const int*)context->GetTable(FarSubdivisionTables::E_IT)->GetBuffer(),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(),
_gcd_queue);
@ -69,7 +70,8 @@ OsdGcdComputeController::ApplyBilinearVertexVerticesKernel(
assert(context);
OsdGcdComputeBilinearVertex(
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(),
_gcd_queue);
@ -82,7 +84,8 @@ OsdGcdComputeController::ApplyCatmarkFaceVerticesKernel(
assert(context);
OsdGcdComputeFace(
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
(const int*)context->GetTable(FarSubdivisionTables::F_IT)->GetBuffer(),
(const int*)context->GetTable(FarSubdivisionTables::F_ITa)->GetBuffer(),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(),
@ -96,7 +99,8 @@ OsdGcdComputeController::ApplyCatmarkEdgeVerticesKernel(
assert(context);
OsdGcdComputeEdge(
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
(const int*)context->GetTable(FarSubdivisionTables::E_IT)->GetBuffer(),
(const float*)context->GetTable(FarSubdivisionTables::E_W)->GetBuffer(),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(),
@ -110,7 +114,8 @@ OsdGcdComputeController::ApplyCatmarkVertexVerticesKernelB(
assert(context);
OsdGcdComputeVertexB(
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
(const int*)context->GetTable(FarSubdivisionTables::V_IT)->GetBuffer(),
(const float*)context->GetTable(FarSubdivisionTables::V_W)->GetBuffer(),
@ -125,7 +130,8 @@ OsdGcdComputeController::ApplyCatmarkVertexVerticesKernelA1(
assert(context);
OsdGcdComputeVertexA(
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
(const float*)context->GetTable(FarSubdivisionTables::V_W)->GetBuffer(),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(), false,
@ -139,7 +145,8 @@ OsdGcdComputeController::ApplyCatmarkVertexVerticesKernelA2(
assert(context);
OsdGcdComputeVertexA(
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
(const float*)context->GetTable(FarSubdivisionTables::V_W)->GetBuffer(),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(), true,
@ -153,7 +160,8 @@ OsdGcdComputeController::ApplyLoopEdgeVerticesKernel(
assert(context);
OsdGcdComputeEdge(
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
(const int*)context->GetTable(FarSubdivisionTables::E_IT)->GetBuffer(),
(const float*)context->GetTable(FarSubdivisionTables::E_W)->GetBuffer(),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(),
@ -167,7 +175,8 @@ OsdGcdComputeController::ApplyLoopVertexVerticesKernelB(
assert(context);
OsdGcdComputeLoopVertexB(
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
(const int*)context->GetTable(FarSubdivisionTables::V_IT)->GetBuffer(),
(const float*)context->GetTable(FarSubdivisionTables::V_W)->GetBuffer(),
@ -182,7 +191,8 @@ OsdGcdComputeController::ApplyLoopVertexVerticesKernelA1(
assert(context);
OsdGcdComputeVertexA(
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
(const float*)context->GetTable(FarSubdivisionTables::V_W)->GetBuffer(),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(), false,
@ -196,7 +206,8 @@ OsdGcdComputeController::ApplyLoopVertexVerticesKernelA2(
assert(context);
OsdGcdComputeVertexA(
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
(const float*)context->GetTable(FarSubdivisionTables::V_W)->GetBuffer(),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(), true,
@ -216,8 +227,8 @@ OsdGcdComputeController::ApplyVertexEdits(
const OsdCpuTable * editValues = edit->GetEditValues();
if (edit->GetOperation() == FarVertexEdit::Add) {
OsdGcdEditVertexAdd(_vdesc,
_currentVertexBuffer,
OsdGcdEditVertexAdd(_currentBindState.vertexBuffer,
_currentBindState.vertexDesc,
edit->GetPrimvarOffset(),
edit->GetPrimvarWidth(),
batch.GetVertexOffset(),
@ -228,8 +239,8 @@ OsdGcdComputeController::ApplyVertexEdits(
static_cast<float*>(editValues->GetBuffer()),
_gcd_queue);
} else if (edit->GetOperation() == FarVertexEdit::Set) {
OsdGcdEditVertexSet(_vdesc,
_currentVertexBuffer,
OsdGcdEditVertexSet(_currentBindState.vertexBuffer,
_currentBindState.vertexDesc,
edit->GetPrimvarOffset(),
edit->GetPrimvarWidth(),
batch.GetVertexOffset(),

View File

@ -29,6 +29,7 @@
#include "../far/dispatcher.h"
#include "../osd/cpuComputeContext.h"
#include "../osd/vertexDescriptor.h"
#include <dispatch/dispatch.h>
@ -64,15 +65,25 @@ public:
///
/// @param varyingBuffer varying-interpolated data buffer
///
/// @param vertexDesc the descriptor of vertex elements to be refined.
/// if it's null, all primvars in the vertex buffer
/// will be refined.
///
/// @param varyingDesc the descriptor of varying elements to be refined.
/// if it's null, all primvars in the varying buffer
/// will be refined.
///
template<class VERTEX_BUFFER, class VARYING_BUFFER>
void Refine(OsdCpuComputeContext const *context,
FarKernelBatchVector const & batches,
VERTEX_BUFFER *vertexBuffer,
VARYING_BUFFER *varyingBuffer) {
VARYING_BUFFER *varyingBuffer,
OsdVertexBufferDescriptor const *vertexDesc=NULL,
OsdVertexBufferDescriptor const *varyingDesc=NULL) {
if (batches.empty()) return;
bind(vertexBuffer, varyingBuffer);
bind(vertexBuffer, varyingBuffer, vertexDesc, varyingDesc);
FarDispatcher::Refine(this, context, batches, /*maxlevel*/-1);
@ -130,26 +141,50 @@ protected:
void ApplyVertexEdits(FarKernelBatch const &batch, ComputeContext const *context) const;
template<class VERTEX_BUFFER, class VARYING_BUFFER>
void bind(VERTEX_BUFFER *vertex, VARYING_BUFFER *varying) {
void bind(VERTEX_BUFFER *vertex, VARYING_BUFFER *varying,
OsdVertexBufferDescriptor const *vertexDesc,
OsdVertexBufferDescriptor const *varyingDesc) {
_currentVertexBuffer = vertex ? vertex->BindCpuBuffer() : 0;
_currentVaryingBuffer = varying ? varying->BindCpuBuffer() : 0;
// if the vertex buffer descriptor is specified, use it.
// otherwise, assumes the data is tightly packed in the vertex buffer.
if (vertexDesc) {
_currentBindState.vertexDesc = *vertexDesc;
} else {
int numElements = vertex ? vertex->GetNumElements() : 0;
_currentBindState.vertexDesc = OsdVertexBufferDescriptor(
0, numElements, numElements);
}
if (varyingDesc) {
_currentBindState.varyingDesc = *varyingDesc;
} else {
int numElements = varying ? varying->GetNumElements() : 0;
_currentBindState.varyingDesc = OsdVertexBufferDescriptor(
0, numElements, numElements);
}
int numVertexElements = vertex ? vertex->GetNumElements() : 0;
int numVaryingElements = varying ? varying->GetNumElements() : 0;
_vdesc.Set(numVertexElements, numVaryingElements);
_currentBindState.vertexBuffer = vertex ? vertex->BindCpuBuffer() : 0;
_currentBindState.varyingBuffer = varying ? varying->BindCpuBuffer() : 0;
}
void unbind() {
_currentVertexBuffer = 0;
_currentVaryingBuffer = 0;
_vdesc.Reset();
_currentBindState.Reset();
}
private:
dispatch_queue_t _gcd_queue;
struct BindState {
BindState() : vertexBuffer(NULL), varyingBuffer(NULL) {}
void Reset() {
vertexBuffer = varyingBuffer = NULL;
vertexDesc.Reset();
varyingDesc.Reset();
}
float *vertexBuffer;
float *varyingBuffer;
OsdVertexBufferDescriptor vertexDesc;
OsdVertexBufferDescriptor varyingDesc;
};
float *_currentVertexBuffer, *_currentVaryingBuffer;
OsdVertexDescriptor _vdesc;
BindState _currentBindState;
dispatch_queue_t _gcd_queue;
};

View File

@ -33,9 +33,32 @@ namespace OPENSUBDIV_VERSION {
const int GCD_WORK_STRIDE = 32;
static inline void
clear(float *origin, int index, OsdVertexBufferDescriptor const &desc) {
if (origin) {
float *dst = origin + index * desc.stride + desc.offset;
memset(dst, 0, desc.length * sizeof(float));
}
}
static inline void
addWithWeight(float *origin, int dstIndex, int srcIndex,
float weight, OsdVertexBufferDescriptor const &desc) {
if (origin) {
const float *src = origin + srcIndex * desc.stride + desc.offset;
float *dst = origin + dstIndex * desc.stride + desc.offset;
for (int k = 0; k < desc.length; ++k) {
dst[k] += src[k] * weight;
}
}
}
void OsdGcdComputeFace(
OsdVertexDescriptor const &vdesc, float * vertex, float * varying,
float * vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *F_IT, const int *F_ITa,
int vertexOffset, int tableOffset, int start, int end,
dispatch_queue_t gcdq) {
@ -44,18 +67,22 @@ void OsdGcdComputeFace(
dispatch_apply(workSize/GCD_WORK_STRIDE, gcdq, ^(size_t blockIdx){
const int start_i = start + blockIdx*GCD_WORK_STRIDE;
const int end_i = start_i + GCD_WORK_STRIDE;
OsdCpuComputeFace(vdesc, vertex, varying, F_IT, F_ITa,
OsdCpuComputeFace(vertex, varying, vertexDesc, varyingDesc,
F_IT, F_ITa,
vertexOffset, tableOffset, start_i, end_i);
});
const int start_e = end - workSize%GCD_WORK_STRIDE;
const int end_e = end;
if (start_e < end_e)
OsdCpuComputeFace(vdesc, vertex, varying, F_IT, F_ITa,
OsdCpuComputeFace(vertex, varying, vertexDesc, varyingDesc,
F_IT, F_ITa,
vertexOffset, tableOffset, start_e, end_e);
}
void OsdGcdComputeEdge(
OsdVertexDescriptor const &vdesc, float * vertex, float * varying,
float * vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *E_IT, const float *E_W,
int vertexOffset, int tableOffset, int start, int end,
dispatch_queue_t gcdq) {
@ -64,18 +91,22 @@ void OsdGcdComputeEdge(
dispatch_apply(workSize/GCD_WORK_STRIDE, gcdq, ^(size_t blockIdx){
const int start_i = start + blockIdx*GCD_WORK_STRIDE;
const int end_i = start_i + GCD_WORK_STRIDE;
OsdCpuComputeEdge(vdesc, vertex, varying, E_IT, E_W,
OsdCpuComputeEdge(vertex, varying, vertexDesc, varyingDesc,
E_IT, E_W,
vertexOffset, tableOffset, start_i, end_i);
});
const int start_e = end - workSize%GCD_WORK_STRIDE;
const int end_e = end;
if (start_e < end_e)
OsdCpuComputeEdge(vdesc, vertex, varying, E_IT, E_W,
OsdCpuComputeEdge(vertex, varying, vertexDesc, varyingDesc,
E_IT, E_W,
vertexOffset, tableOffset, start_e, end_e);
}
void OsdGcdComputeVertexA(
OsdVertexDescriptor const &vdesc, float * vertex, float * varying,
float * vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *V_ITa, const float *V_W,
int vertexOffset, int tableOffset, int start, int end, int pass,
dispatch_queue_t gcdq) {
@ -84,18 +115,22 @@ void OsdGcdComputeVertexA(
dispatch_apply(workSize/GCD_WORK_STRIDE, gcdq, ^(size_t blockIdx){
const int start_i = start + blockIdx*GCD_WORK_STRIDE;
const int end_i = start_i + GCD_WORK_STRIDE;
OsdCpuComputeVertexA(vdesc, vertex, varying, V_ITa, V_W,
OsdCpuComputeVertexA(vertex, varying, vertexDesc, varyingDesc,
V_ITa, V_W,
vertexOffset, tableOffset, start_i, end_i, pass);
});
const int start_e = end - workSize%GCD_WORK_STRIDE;
const int end_e = end;
if (start_e < end_e)
OsdCpuComputeVertexA(vdesc, vertex, varying, V_ITa, V_W,
OsdCpuComputeVertexA(vertex, varying, vertexDesc, varyingDesc,
V_ITa, V_W,
vertexOffset, tableOffset, start_e, end_e, pass);
}
void OsdGcdComputeVertexB(
OsdVertexDescriptor const &vdesc, float * vertex, float * varying,
float * vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *V_ITa, const int *V_IT, const float *V_W,
int vertexOffset, int tableOffset, int start, int end,
dispatch_queue_t gcdq) {
@ -104,18 +139,22 @@ void OsdGcdComputeVertexB(
dispatch_apply(workSize/GCD_WORK_STRIDE, gcdq, ^(size_t blockIdx){
const int start_i = start + blockIdx*GCD_WORK_STRIDE;
const int end_i = start_i + GCD_WORK_STRIDE;
OsdCpuComputeVertexB(vdesc, vertex, varying, V_ITa, V_IT, V_W,
OsdCpuComputeVertexB(vertex, varying, vertexDesc, varyingDesc,
V_ITa, V_IT, V_W,
vertexOffset, tableOffset, start_i, end_i);
});
const int start_e = end - workSize%GCD_WORK_STRIDE;
const int end_e = end;
if (start_e < end_e)
OsdCpuComputeVertexB(vdesc, vertex, varying, V_ITa, V_IT, V_W,
OsdCpuComputeVertexB(vertex, varying, vertexDesc, varyingDesc,
V_ITa, V_IT, V_W,
vertexOffset, tableOffset, start_e, end_e);
}
void OsdGcdComputeLoopVertexB(
OsdVertexDescriptor const &vdesc, float * vertex, float * varying,
float * vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *V_ITa, const int *V_IT, const float *V_W,
int vertexOffset, int tableOffset, int start, int end,
dispatch_queue_t gcdq) {
@ -133,19 +172,22 @@ void OsdGcdComputeLoopVertexB(
beta = (0.625f - beta) * wp;
int dstIndex = vertexOffset + i - tableOffset;
vdesc.Clear(vertex, varying, dstIndex);
clear(vertex, dstIndex, vertexDesc);
clear(varying, dstIndex, varyingDesc);
vdesc.AddWithWeight(vertex, dstIndex, p, weight * (1.0f - (beta * n)));
addWithWeight(vertex, dstIndex, p, weight * (1.0f - (beta * n)), vertexDesc);
for (int j = 0; j < n; ++j)
vdesc.AddWithWeight(vertex, dstIndex, V_IT[h+j], weight * beta);
addWithWeight(vertex, dstIndex, V_IT[h+j], weight * beta, vertexDesc);
vdesc.AddVaryingWithWeight(varying, dstIndex, p, 1.0f);
addWithWeight(varying, dstIndex, p, 1.0f, varyingDesc);
});
}
void OsdGcdComputeBilinearEdge(
OsdVertexDescriptor const &vdesc, float * vertex, float * varying,
float * vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *E_IT,
int vertexOffset, int tableOffset, int start, int end,
dispatch_queue_t gcdq) {
@ -156,18 +198,21 @@ void OsdGcdComputeBilinearEdge(
int eidx1 = E_IT[2*i+1];
int dstIndex = vertexOffset + i - tableOffset;
vdesc.Clear(vertex, varying, dstIndex);
clear(vertex, dstIndex, vertexDesc);
clear(varying, dstIndex, varyingDesc);
vdesc.AddWithWeight(vertex, dstIndex, eidx0, 0.5f);
vdesc.AddWithWeight(vertex, dstIndex, eidx1, 0.5f);
addWithWeight(vertex, dstIndex, eidx0, 0.5f, vertexDesc);
addWithWeight(vertex, dstIndex, eidx1, 0.5f, vertexDesc);
vdesc.AddVaryingWithWeight(varying, dstIndex, eidx0, 0.5f);
vdesc.AddVaryingWithWeight(varying, dstIndex, eidx1, 0.5f);
addWithWeight(varying, dstIndex, eidx0, 0.5f, varyingDesc);
addWithWeight(varying, dstIndex, eidx1, 0.5f, varyingDesc);
});
}
void OsdGcdComputeBilinearVertex(
OsdVertexDescriptor const &vdesc, float * vertex, float * varying,
float * vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *V_ITa,
int vertexOffset, int tableOffset, int start, int end,
dispatch_queue_t gcdq) {
@ -177,15 +222,17 @@ void OsdGcdComputeBilinearVertex(
int p = V_ITa[i];
int dstIndex = vertexOffset + i - tableOffset;
vdesc.Clear(vertex, varying, dstIndex);
clear(vertex, dstIndex, vertexDesc);
clear(varying, dstIndex, varyingDesc);
vdesc.AddWithWeight(vertex, dstIndex, p, 1.0f);
vdesc.AddVaryingWithWeight(varying, dstIndex, p, 1.0f);
addWithWeight(vertex, dstIndex, p, 1.0f, vertexDesc);
addWithWeight(varying, dstIndex, p, 1.0f, varyingDesc);
});
}
void OsdGcdEditVertexAdd(
OsdVertexDescriptor const &vdesc, float * vertex,
float * vertex,
OsdVertexBufferDescriptor const &vertexDesc,
int primVarOffset, int primVarWidth,
int vertexOffset, int tableOffset,
int start, int end,
@ -195,14 +242,20 @@ void OsdGcdEditVertexAdd(
int vertexCount = end - start;
dispatch_apply(vertexCount, gcdq, ^(size_t blockIdx){
int i = start + blockIdx + tableOffset;
vdesc.ApplyVertexEditAdd(vertex, primVarOffset, primVarWidth,
editIndices[i] + vertexOffset,
&editValues[i*primVarWidth]);
if (vertex) {
int editIndex = editIndices[i] + vertexOffset;
float *dst = vertex + editIndex * vertexDesc.stride
+ vertexDesc.offset + primVarOffset;
dst[i] += editValues[i];
}
});
}
void OsdGcdEditVertexSet(
OsdVertexDescriptor const &vdesc, float * vertex,
float * vertex,
OsdVertexBufferDescriptor const &vertexDesc,
int primVarOffset, int primVarWidth,
int vertexOffset, int tableOffset,
int start, int end,
@ -212,9 +265,14 @@ void OsdGcdEditVertexSet(
int vertexCount = end - start;
dispatch_apply(vertexCount, gcdq, ^(size_t blockIdx){
int i = start + blockIdx + tableOffset;
vdesc.ApplyVertexEditSet(vertex, primVarOffset, primVarWidth,
editIndices[i] + vertexOffset,
&editValues[i*primVarWidth]);
if (vertex) {
int editIndex = editIndices[i] + vertexOffset;
float *dst = vertex + editIndex * vertexDesc.stride
+ vertexDesc.offset + primVarOffset;
dst[i] = editValues[i];
}
});
}

View File

@ -32,66 +32,75 @@
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
struct OsdVertexDescriptor;
struct OsdVertexBufferDescriptor;
void OsdGcdComputeFace(OsdVertexDescriptor const &vdesc,
float * vertex, float * varying,
void OsdGcdComputeFace(float * vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *F_IT, const int *F_ITa,
int vertexOffset, int tableOffset,
int start, int end,
dispatch_queue_t gcdq);
void OsdGcdComputeEdge(OsdVertexDescriptor const &vdesc,
float *vertex, float * varying,
void OsdGcdComputeEdge(float *vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *E_IT, const float *E_ITa,
int vertexOffset, int tableOffset,
int start, int end,
dispatch_queue_t gcdq);
void OsdGcdComputeVertexA(OsdVertexDescriptor const &vdesc,
float *vertex, float * varying,
void OsdGcdComputeVertexA(float *vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *V_ITa, const float *V_IT,
int vertexOffset, int tableOffset,
int start, int end, int pass,
dispatch_queue_t gcdq);
void OsdGcdComputeVertexB(OsdVertexDescriptor const &vdesc,
float *vertex, float * varying,
void OsdGcdComputeVertexB(float *vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *V_ITa, const int *V_IT, const float *V_W,
int vertexOffset, int tableOffset,
int start, int end,
dispatch_queue_t gcdq);
void OsdGcdComputeLoopVertexB(OsdVertexDescriptor const &vdesc,
float *vertex, float * varying,
void OsdGcdComputeLoopVertexB(float *vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *V_ITa, const int *V_IT,
const float *V_W,
int vertexOffset, int tableOffset,
int start, int end,
dispatch_queue_t gcdq);
void OsdGcdComputeBilinearEdge(OsdVertexDescriptor const &vdesc,
float *vertex, float * varying,
void OsdGcdComputeBilinearEdge(float *vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *E_IT,
int vertexOffset, int tableOffset,
int start, int end,
dispatch_queue_t gcdq);
void OsdGcdComputeBilinearVertex(OsdVertexDescriptor const &vdesc,
float *vertex, float * varying,
void OsdGcdComputeBilinearVertex(float *vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *V_ITa,
int vertexOffset, int tableOffset,
int start, int end,
dispatch_queue_t gcdq);
void OsdGcdEditVertexAdd(OsdVertexDescriptor const &vdesc, float *vertex,
void OsdGcdEditVertexAdd(float *vertex,
OsdVertexBufferDescriptor const &vertexDesc,
int primVarOffset, int primVarWidth,
int vertexOffset, int tableOffset,
int start, int end,
const unsigned int *editIndices, const float *editValues,
dispatch_queue_t gcdq);
void OsdGcdEditVertexSet(OsdVertexDescriptor const &vdesc, float *vertex,
void OsdGcdEditVertexSet(float *vertex,
OsdVertexBufferDescriptor const &vertexDesc,
int primVarOffset, int primVarWidth,
int vertexOffset, int tableOffset,
int start, int end,

View File

@ -29,6 +29,7 @@
#include "../osd/mesh.h"
#include "../osd/glDrawContext.h"
#include "../osd/vertexDescriptor.h"
#ifdef OPENSUBDIV_HAS_OPENCL
#if defined(__APPLE__)
@ -125,6 +126,14 @@ public:
virtual void Refine() {
_computeController->Refine(_computeContext, _farMesh->GetKernelBatches(), _vertexBuffer, _varyingBuffer);
}
virtual void Refine(OsdVertexBufferDescriptor const *vertexDesc,
OsdVertexBufferDescriptor const *varyingDesc,
bool interleaved) {
_computeController->Refine(_computeContext, _farMesh->GetKernelBatches(),
_vertexBuffer, (interleaved ? _vertexBuffer : _varyingBuffer),
vertexDesc, varyingDesc);
}
virtual void Synchronize() {
_computeController->Synchronize();
}
@ -250,6 +259,7 @@ public:
virtual ~OsdMesh() {
delete _farMesh;
delete _vertexBuffer;
delete _varyingBuffer;
delete _computeContext;
delete _drawContext;
}
@ -265,6 +275,14 @@ public:
virtual void Refine() {
_computeController->Refine(_computeContext, _farMesh->GetKernelBatches(), _vertexBuffer, _varyingBuffer);
}
virtual void Refine(OsdVertexBufferDescriptor const *vertexDesc,
OsdVertexBufferDescriptor const *varyingDesc,
bool interleaved) {
_computeController->Refine(_computeContext, _farMesh->GetKernelBatches(),
_vertexBuffer, (interleaved ? _vertexBuffer : _varyingBuffer),
vertexDesc, varyingDesc);
}
virtual void Synchronize() {
_computeController->Synchronize();
}

View File

@ -34,8 +34,7 @@
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
OsdGLSLComputeController::OsdGLSLComputeController()
: _currentVertexBuffer(0), _currentVaryingBuffer(0), _currentKernelBundle(NULL) {
OsdGLSLComputeController::OsdGLSLComputeController() {
}
OsdGLSLComputeController::~OsdGLSLComputeController() {
@ -54,20 +53,21 @@ OsdGLSLComputeController::Synchronize() {
}
OsdGLSLComputeKernelBundle *
OsdGLSLComputeController::getKernels(int numVertexElements,
int numVaryingElements) {
OsdGLSLComputeController::getKernels(
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc) {
std::vector<OsdGLSLComputeKernelBundle*>::iterator it =
std::find_if(_kernelRegistry.begin(), _kernelRegistry.end(),
OsdGLSLComputeKernelBundle::Match(numVertexElements,
numVaryingElements));
OsdGLSLComputeKernelBundle::Match(vertexDesc,
varyingDesc));
if (it != _kernelRegistry.end()) {
return *it;
} else {
OsdGLSLComputeKernelBundle *kernelBundle =
new OsdGLSLComputeKernelBundle();
_kernelRegistry.push_back(kernelBundle);
kernelBundle->Compile(numVertexElements, numVaryingElements);
kernelBundle->Compile(vertexDesc, varyingDesc);
return kernelBundle;
}
}
@ -75,18 +75,21 @@ OsdGLSLComputeController::getKernels(int numVertexElements,
void
OsdGLSLComputeController::bindBufferAndProgram() {
if (_currentVertexBuffer)
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, _currentVertexBuffer);
if (_currentBindState.vertexBuffer)
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, _currentBindState.vertexBuffer);
if (_currentVaryingBuffer)
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, _currentVaryingBuffer);
if (_currentBindState.varyingBuffer)
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, _currentBindState.varyingBuffer);
_currentKernelBundle->UseProgram();
_currentBindState.kernelBundle->UseProgram(_currentBindState.vertexDesc.offset,
_currentBindState.varyingDesc.offset);
glMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT);
}
void
OsdGLSLComputeController::unbindBufferAndProgram() {
glMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, 0);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, 0);
glUseProgram(0);
@ -98,8 +101,9 @@ OsdGLSLComputeController::ApplyBilinearFaceVerticesKernel(
assert(context);
_currentKernelBundle->ApplyBilinearFaceVerticesKernel(
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
_currentBindState.kernelBundle->ApplyBilinearFaceVerticesKernel(
batch.GetVertexOffset(), batch.GetTableOffset(),
batch.GetStart(), batch.GetEnd());
}
void
@ -108,8 +112,9 @@ OsdGLSLComputeController::ApplyBilinearEdgeVerticesKernel(
assert(context);
_currentKernelBundle->ApplyBilinearEdgeVerticesKernel(
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
_currentBindState.kernelBundle->ApplyBilinearEdgeVerticesKernel(
batch.GetVertexOffset(), batch.GetTableOffset(),
batch.GetStart(), batch.GetEnd());
}
void
@ -118,8 +123,9 @@ OsdGLSLComputeController::ApplyBilinearVertexVerticesKernel(
assert(context);
_currentKernelBundle->ApplyBilinearVertexVerticesKernel(
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
_currentBindState.kernelBundle->ApplyBilinearVertexVerticesKernel(
batch.GetVertexOffset(), batch.GetTableOffset(),
batch.GetStart(), batch.GetEnd());
}
void
@ -128,20 +134,20 @@ OsdGLSLComputeController::ApplyCatmarkFaceVerticesKernel(
assert(context);
_currentKernelBundle->ApplyCatmarkFaceVerticesKernel(
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
_currentBindState.kernelBundle->ApplyCatmarkFaceVerticesKernel(
batch.GetVertexOffset(), batch.GetTableOffset(),
batch.GetStart(), batch.GetEnd());
}
void
OsdGLSLComputeController::ApplyCatmarkEdgeVerticesKernel(
FarKernelBatch const &batch, OsdGLSLComputeContext const *context) const {
assert(context);
_currentKernelBundle->ApplyCatmarkEdgeVerticesKernel(
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
_currentBindState.kernelBundle->ApplyCatmarkEdgeVerticesKernel(
batch.GetVertexOffset(), batch.GetTableOffset(),
batch.GetStart(), batch.GetEnd());
}
void
@ -150,8 +156,9 @@ OsdGLSLComputeController::ApplyCatmarkVertexVerticesKernelB(
assert(context);
_currentKernelBundle->ApplyCatmarkVertexVerticesKernelB(
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
_currentBindState.kernelBundle->ApplyCatmarkVertexVerticesKernelB(
batch.GetVertexOffset(), batch.GetTableOffset(),
batch.GetStart(), batch.GetEnd());
}
void
@ -160,8 +167,9 @@ OsdGLSLComputeController::ApplyCatmarkVertexVerticesKernelA1(
assert(context);
_currentKernelBundle->ApplyCatmarkVertexVerticesKernelA(
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(), false);
_currentBindState.kernelBundle->ApplyCatmarkVertexVerticesKernelA(
batch.GetVertexOffset(), batch.GetTableOffset(),
batch.GetStart(), batch.GetEnd(), false);
}
void
@ -170,8 +178,9 @@ OsdGLSLComputeController::ApplyCatmarkVertexVerticesKernelA2(
assert(context);
_currentKernelBundle->ApplyCatmarkVertexVerticesKernelA(
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(), true);
_currentBindState.kernelBundle->ApplyCatmarkVertexVerticesKernelA(
batch.GetVertexOffset(), batch.GetTableOffset(),
batch.GetStart(), batch.GetEnd(), true);
}
void
@ -180,8 +189,9 @@ OsdGLSLComputeController::ApplyLoopEdgeVerticesKernel(
assert(context);
_currentKernelBundle->ApplyLoopEdgeVerticesKernel(
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
_currentBindState.kernelBundle->ApplyLoopEdgeVerticesKernel(
batch.GetVertexOffset(), batch.GetTableOffset(),
batch.GetStart(), batch.GetEnd());
}
void
@ -190,8 +200,9 @@ OsdGLSLComputeController::ApplyLoopVertexVerticesKernelB(
assert(context);
_currentKernelBundle->ApplyLoopVertexVerticesKernelB(
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
_currentBindState.kernelBundle->ApplyLoopVertexVerticesKernelB(
batch.GetVertexOffset(), batch.GetTableOffset(),
batch.GetStart(), batch.GetEnd());
}
void
@ -200,8 +211,9 @@ OsdGLSLComputeController::ApplyLoopVertexVerticesKernelA1(
assert(context);
_currentKernelBundle->ApplyLoopVertexVerticesKernelA(
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(), false);
_currentBindState.kernelBundle->ApplyLoopVertexVerticesKernelA(
batch.GetVertexOffset(), batch.GetTableOffset(),
batch.GetStart(), batch.GetEnd(), false);
}
void
@ -210,8 +222,9 @@ OsdGLSLComputeController::ApplyLoopVertexVerticesKernelA2(
assert(context);
_currentKernelBundle->ApplyLoopVertexVerticesKernelA(
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(), true);
_currentBindState.kernelBundle->ApplyLoopVertexVerticesKernelA(
batch.GetVertexOffset(), batch.GetTableOffset(),
batch.GetStart(), batch.GetEnd(), true);
}
void
@ -229,12 +242,12 @@ OsdGLSLComputeController::ApplyVertexEdits(
int primvarWidth = edit->GetPrimvarWidth();
if (edit->GetOperation() == FarVertexEdit::Add) {
_currentKernelBundle->ApplyEditAdd( primvarOffset,
primvarWidth,
batch.GetVertexOffset(),
batch.GetTableOffset(),
batch.GetStart(),
batch.GetEnd());
_currentBindState.kernelBundle->ApplyEditAdd(primvarOffset,
primvarWidth,
batch.GetVertexOffset(),
batch.GetTableOffset(),
batch.GetStart(),
batch.GetEnd());
} else {
// XXX: edit SET is not implemented yet.
}

View File

@ -29,6 +29,7 @@
#include "../far/dispatcher.h"
#include "../osd/glslComputeContext.h"
#include "../osd/vertexDescriptor.h"
#include <vector>
@ -69,18 +70,25 @@ public:
///
/// @param varyingBuffer varying-interpolated data buffer
///
/// @param vertexDesc the descriptor of vertex elements to be refined.
/// if it's null, all primvars in the vertex buffer
/// will be refined.
///
/// @param varyingDesc the descriptor of varying elements to be refined.
/// if it's null, all primvars in the varying buffer
/// will be refined.
///
template<class VERTEX_BUFFER, class VARYING_BUFFER>
void Refine(OsdGLSLComputeContext const *context,
FarKernelBatchVector const &batches,
VERTEX_BUFFER *vertexBuffer,
VARYING_BUFFER *varyingBuffer) {
VARYING_BUFFER *varyingBuffer,
OsdVertexBufferDescriptor const *vertexDesc=NULL,
OsdVertexBufferDescriptor const *varyingDesc=NULL) {
if (batches.empty()) return;
int numVertexElements = vertexBuffer ? vertexBuffer->GetNumElements() : 0;
int numVaryingElements = varyingBuffer ? varyingBuffer->GetNumElements() : 0;
bind(vertexBuffer, varyingBuffer, getKernels(numVertexElements, numVaryingElements));
bind(vertexBuffer, varyingBuffer, vertexDesc, varyingDesc);
// bind table buffers.
context->BindShaderStorageBuffers();
@ -141,42 +149,69 @@ protected:
void ApplyVertexEdits(FarKernelBatch const &batch, ComputeContext const *context) const;
OsdGLSLComputeKernelBundle * getKernels(int numVertexElements,
int numVaryingElements);
OsdGLSLComputeKernelBundle * getKernels(
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc);
void bindBufferAndProgram();
void unbindBufferAndProgram();
template<class VERTEX_BUFFER, class VARYING_BUFFER>
void bind(VERTEX_BUFFER *vertex, VARYING_BUFFER *varying, OsdGLSLComputeKernelBundle *kernelBundle) {
void bind(VERTEX_BUFFER *vertex, VARYING_BUFFER *varying,
OsdVertexBufferDescriptor const *vertexDesc,
OsdVertexBufferDescriptor const *varyingDesc) {
_currentVertexBuffer = vertex ? vertex->BindVBO() : 0;
_currentVaryingBuffer = varying ? varying->BindVBO() : 0;
// if the vertex buffer descriptor is specified, use it.
// otherwise, assumes the data is tightly packed in the vertex buffer.
if (vertexDesc) {
_currentBindState.vertexDesc = *vertexDesc;
} else {
int numElements = vertex ? vertex->GetNumElements() : 0;
_currentBindState.vertexDesc = OsdVertexBufferDescriptor(
0, numElements, numElements);
}
if (varyingDesc) {
_currentBindState.varyingDesc = *varyingDesc;
} else {
int numElements = varying ? varying->GetNumElements() : 0;
_currentBindState.varyingDesc = OsdVertexBufferDescriptor(
0, numElements, numElements);
}
_vdesc.numVertexElements = vertex ? vertex->GetNumElements() : 0;
_vdesc.numVaryingElements = varying ? varying->GetNumElements() : 0;
_currentKernelBundle = kernelBundle;
_currentBindState.vertexBuffer = vertex ? vertex->BindVBO() : 0;
_currentBindState.varyingBuffer = varying ? varying->BindVBO() : 0;
_currentBindState.kernelBundle = getKernels(_currentBindState.vertexDesc,
_currentBindState.varyingDesc);
bindBufferAndProgram();
}
/// Unbinds any previously bound vertex and varying data buffers.
void unbind() {
_currentVertexBuffer = 0;
_currentVaryingBuffer = 0;
_currentBindState.Reset();
unbindBufferAndProgram();
}
private:
struct BindState {
BindState() : vertexBuffer(0), varyingBuffer(0), kernelBundle(NULL) {}
void Reset() {
vertexBuffer = varyingBuffer = 0;
vertexDesc.Reset();
varyingDesc.Reset();
}
GLuint vertexBuffer;
GLuint varyingBuffer;
OsdVertexBufferDescriptor vertexDesc;
OsdVertexBufferDescriptor varyingDesc;
OsdGLSLComputeKernelBundle *kernelBundle;
};
BindState _currentBindState;
std::vector<OsdGLSLComputeKernelBundle *> _kernelRegistry;
GLuint _currentVertexBuffer, _currentVaryingBuffer;
OsdVertexDescriptor _vdesc;
OsdGLSLComputeKernelBundle * _currentKernelBundle;
};
} // end namespace OPENSUBDIV_VERSION

View File

@ -31,6 +31,8 @@ uniform int vertexOffset = 0; // vertex index offset for the batch
uniform int tableOffset = 0; // offset of subdivision table
uniform int indexStart = 0; // start index relative to tableOffset
uniform int indexEnd = 0; // end index relative to tableOffset
uniform int vertexBaseOffset = 0; // base vbo offset of the vertex buffer
uniform int varyingBaseOffset = 0; // base vbo offset of the varying buffer
uniform bool vertexPass;
/*
@ -40,6 +42,22 @@ uniform bool vertexPass;
^ ^ ^
vertexOffset | |
indexStart indexEnd
interleaved buffer example
+---------------------------+
| x | y | z | r | g | b | a |
+---------------------------+
^
vertexBaseOffset
^
varyingBaseOffset
NUM_VERTEX_ELEMENTS = 3
NUM_VARYING_ELEMENTS = 4
VERTEX_STRIDE = VARYING_STRIDE = 7
*/
layout(binding=0) buffer vertex_buffer { float vertexBuffer[]; };
@ -86,13 +104,15 @@ Vertex readVertex(int index)
Vertex v;
#if NUM_VERTEX_ELEMENTS > 0
int vertexIndex = index * VERTEX_STRIDE + vertexBaseOffset;
for (int i = 0; i < NUM_VERTEX_ELEMENTS; i++) {
v.vertexData[i] = vertexBuffer[index*NUM_VERTEX_ELEMENTS+i];
v.vertexData[i] = vertexBuffer[vertexIndex + i];
}
#endif
#if NUM_VARYING_ELEMENTS > 0
int varyingIndex = index * VARYING_STRIDE + varyingBaseOffset;
for (int i = 0; i < NUM_VARYING_ELEMENTS; i++) {
v.varyingData[i] = varyingBuffer[index*NUM_VARYING_ELEMENTS+i];
v.varyingData[i] = varyingBuffer[varyingIndex + i];
}
#endif
return v;
@ -101,13 +121,15 @@ Vertex readVertex(int index)
void writeVertex(int index, Vertex v)
{
#if NUM_VERTEX_ELEMENTS > 0
int vertexIndex = index * VERTEX_STRIDE + vertexBaseOffset;
for (int i = 0; i < NUM_VERTEX_ELEMENTS; i++) {
vertexBuffer[index*NUM_VERTEX_ELEMENTS+i] = v.vertexData[i];
vertexBuffer[vertexIndex + i] = v.vertexData[i];
}
#endif
#if NUM_VARYING_ELEMENTS > 0
int varyingIndex = index * VARYING_STRIDE + varyingBaseOffset;
for (int i = 0; i < NUM_VARYING_ELEMENTS; i++) {
varyingBuffer[index*NUM_VARYING_ELEMENTS+i] = v.varyingData[i];
varyingBuffer[varyingIndex + i] = v.varyingData[i];
}
#endif
}
@ -152,6 +174,7 @@ void catmarkComputeFace()
addWithWeight(dst, readVertex(index), weight);
addVaryingWithWeight(dst, readVertex(index), weight);
}
writeVertex(vid, dst);
}
@ -356,6 +379,7 @@ void editAdd()
// seemingly we can't iterate dynamically over vertexData[n]
// due to mysterious glsl runtime limitation...?
#if NUM_VERTEX_ELEMENTS > 0
for (int j = 0; j < NUM_VERTEX_ELEMENTS; ++j) {
float editValue = _editValues[i*editPrimVarWidth + min(j, editPrimVarWidth)];
editValue *= float(j >= editPrimVarOffset);
@ -363,6 +387,7 @@ void editAdd()
dst.vertexData[j] += editValue;
}
writeVertex(v + vertexOffset, dst);
#endif
}
void main()

View File

@ -37,6 +37,7 @@
#include "../osd/opengl.h"
#include <cassert>
#include <sstream>
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
@ -46,7 +47,11 @@ static const char *shaderSource =
;
OsdGLSLComputeKernelBundle::OsdGLSLComputeKernelBundle()
: _program(0) {
: _program(0),
_numVertexElements(0),
_vertexStride(0),
_numVaryingElements(0),
_varyingStride(0) {
// XXX: too rough!
_workGroupSize = 64;
@ -58,9 +63,14 @@ OsdGLSLComputeKernelBundle::~OsdGLSLComputeKernelBundle() {
}
bool
OsdGLSLComputeKernelBundle::Compile(int numVertexElements, int numVaryingElements) {
OsdGLSLComputeKernelBundle::Compile(
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc) {
_vdesc.Set(numVertexElements, numVaryingElements );
_numVertexElements = vertexDesc.length;
_vertexStride = vertexDesc.stride;
_numVaryingElements = varyingDesc.length;
_varyingStride = varyingDesc.stride;
if (_program) {
glDeleteProgram(_program);
@ -70,15 +80,16 @@ OsdGLSLComputeKernelBundle::Compile(int numVertexElements, int numVaryingElement
GLuint shader = glCreateShader(GL_COMPUTE_SHADER);
char constantDefine[256];
snprintf(constantDefine, 256,
"#define NUM_VERTEX_ELEMENTS %d\n"
"#define NUM_VARYING_ELEMENTS %d\n"
"#define WORK_GROUP_SIZE %d\n",
numVertexElements, numVaryingElements, _workGroupSize);
std::ostringstream defines;
defines << "#define NUM_VERTEX_ELEMENTS " << _numVertexElements << "\n"
<< "#define VERTEX_STRIDE " << _vertexStride << "\n"
<< "#define NUM_VARYING_ELEMENTS " << _numVaryingElements << "\n"
<< "#define VARYING_STRIDE " << _varyingStride << "\n"
<< "#define WORK_GROUP_SIZE " << _workGroupSize << "\n";
std::string defineStr = defines.str();
const char *shaderSources[3];
shaderSources[0] = constantDefine;
shaderSources[0] = defineStr.c_str();
shaderSources[1] = shaderSource;
glShaderSource(shader, 2, shaderSources, NULL);
glCompileShader(shader);
@ -98,9 +109,6 @@ OsdGLSLComputeKernelBundle::Compile(int numVertexElements, int numVaryingElement
glDeleteProgram(_program);
_program = 0;
// XXX ERROR HANDLE
printf("%s\n", constantDefine);
assert(false);
return false;
}
@ -129,11 +137,13 @@ OsdGLSLComputeKernelBundle::Compile(int numVertexElements, int numVaryingElement
"loopComputeVertexB");
// set uniform locations for compute
_uniformVertexPass = glGetUniformLocation(_program, "vertexPass");
_uniformVertexOffset = glGetUniformLocation(_program, "vertexOffset");
_uniformTableOffset = glGetUniformLocation(_program, "tableOffset");
_uniformIndexStart = glGetUniformLocation(_program, "indexStart");
_uniformIndexEnd = glGetUniformLocation(_program, "indexEnd");
_uniformVertexPass = glGetUniformLocation(_program, "vertexPass");
_uniformVertexOffset = glGetUniformLocation(_program, "vertexOffset");
_uniformTableOffset = glGetUniformLocation(_program, "tableOffset");
_uniformIndexStart = glGetUniformLocation(_program, "indexStart");
_uniformIndexEnd = glGetUniformLocation(_program, "indexEnd");
_uniformVertexBaseOffset = glGetUniformLocation(_program, "vertexBaseOffset");
_uniformVaryingBaseOffset = glGetUniformLocation(_program, "varyingBaseOffset");
_tableUniforms[FarSubdivisionTables::F_IT] = glGetUniformLocation(_program, "_F0_IT");
_tableUniforms[FarSubdivisionTables::F_ITa] = glGetUniformLocation(_program, "_F0_ITa");
@ -176,8 +186,7 @@ OsdGLSLComputeKernelBundle::dispatchCompute(
// we found a problem (issue #295) with nvidia driver 331.49 / Quadro4000
// resulting invalid vertices.
// Apparently adding TEXTURE_FETCH_BARRIER after face kernel fixes it.
// We'll revisit this later.
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
// The workaroud is commented out, since it looks fixed at driver 334.xx.
}
void
@ -186,6 +195,8 @@ OsdGLSLComputeKernelBundle::ApplyBilinearFaceVerticesKernel(
glUniformSubroutinesuiv(GL_COMPUTE_SHADER, 1, &_subComputeFace);
dispatchCompute(vertexOffset, tableOffset, start, end);
// glMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT);
}
void
@ -213,8 +224,8 @@ OsdGLSLComputeKernelBundle::ApplyCatmarkFaceVerticesKernel(
dispatchCompute(vertexOffset, tableOffset, start, end);
// see the comment in dispatchCompute()
// this workaround could be a performance problem
glMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT);
// this workaround causes a performance problem.
// glMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT);
}
void
@ -279,9 +290,13 @@ OsdGLSLComputeKernelBundle::ApplyEditAdd(
}
void
OsdGLSLComputeKernelBundle::UseProgram() const
OsdGLSLComputeKernelBundle::UseProgram(int vertexBaseOffset,
int varyingBaseOffset) const
{
glUseProgram(_program);
glUniform1i(_uniformVertexBaseOffset, vertexBaseOffset);
glUniform1i(_uniformVaryingBaseOffset, varyingBaseOffset);
}
} // end namespace OPENSUBDIV_VERSION

View File

@ -42,7 +42,8 @@ public:
OsdGLSLComputeKernelBundle();
~OsdGLSLComputeKernelBundle();
bool Compile(int numVertexElements, int numVaryingElements);
bool Compile(OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc);
void ApplyBilinearFaceVerticesKernel(
int vertexOffset, int tableOffset, int start, int end);
@ -75,32 +76,40 @@ public:
int vertexOffset, int tableOffset, int start, int end, bool pass);
void ApplyEditAdd(int primvarOffset, int primvarWidth,
int vertexOffset, int tableOffset, int start, int end);
int vertexOffset, int tableOffset,
int start, int end);
void UseProgram() const;
void UseProgram(int vertexBaseOffset, int varyingBaseOffset) const;
GLuint GetTableUniformLocation(int tableIndex) const {
return _tableUniforms[tableIndex];
}
struct Match {
/// Constructor
Match(int numVertexElements, int numVaryingElements)
: vdesc(numVertexElements, numVaryingElements) {
Match(OsdVertexBufferDescriptor const &vertex,
OsdVertexBufferDescriptor const &varying)
: vertexDesc(vertex), varyingDesc(varying) {
}
bool operator() (OsdGLSLComputeKernelBundle const *kernel) {
return vdesc == kernel->_vdesc;
// offset is dynamic. just comparing length and stride here,
// returns true if they are equal
return (vertexDesc.length == kernel->_numVertexElements and
vertexDesc.stride == kernel->_vertexStride and
varyingDesc.length == kernel->_numVaryingElements and
varyingDesc.stride == kernel->_varyingStride);
}
OsdVertexDescriptor vdesc;
OsdVertexBufferDescriptor vertexDesc;
OsdVertexBufferDescriptor varyingDesc;
};
friend struct Match;
protected:
void dispatchCompute(int vertexOffset, int tableOffset, int start, int end) const;
void dispatchCompute(int vertexOffset, int tableOffset,
int start, int end) const ;
GLuint _program;
@ -111,6 +120,8 @@ protected:
GLuint _uniformTableOffset;
GLuint _uniformIndexStart;
GLuint _uniformIndexEnd;
GLuint _uniformVertexBaseOffset;
GLuint _uniformVaryingBaseOffset;
// uniform locations for vertex edit
GLuint _uniformEditPrimVarOffset;
@ -135,7 +146,10 @@ protected:
int _workGroupSize;
OsdVertexDescriptor _vdesc;
int _numVertexElements;
int _vertexStride;
int _numVaryingElements;
int _varyingStride;
};
} // end namespace OPENSUBDIV_VERSION

View File

@ -140,6 +140,7 @@ mat4 OsdModelViewProjectionMatrix();
float OsdTessLevel();
int OsdGregoryQuadOffsetBase();
int OsdPrimitiveIdBase();
int OsdBaseVertex();
float GetTessLevel(int patchLevel)
{

View File

@ -68,6 +68,14 @@ out block {
OSD_USER_VARYING_DECLARE
} outpt;
vec3 readVertex(uint vertexIndex)
{
vertexIndex += OsdBaseVertex();
return vec3(texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*vertexIndex)).x,
texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*vertexIndex+1)).x,
texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*vertexIndex+2)).x);
}
void main()
{
int vID = gl_VertexID;
@ -122,38 +130,23 @@ void main()
}
#endif
vec3 neighbor =
vec3(texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*idx_neighbor)).x,
texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*idx_neighbor+1)).x,
texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*idx_neighbor+2)).x);
vec3 neighbor = readVertex(idx_neighbor);
uint idx_diagonal = uint(texelFetch(OsdValenceBuffer, int(vID * (2*OSD_MAX_VALENCE+1) + 2*i + 1 + 1)).x);
vec3 diagonal =
vec3(texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*idx_diagonal)).x,
texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*idx_diagonal+1)).x,
texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*idx_diagonal+2)).x);
vec3 diagonal = readVertex(idx_diagonal);
uint idx_neighbor_p = uint(texelFetch(OsdValenceBuffer, int(vID * (2*OSD_MAX_VALENCE+1) + 2*ip + 0 + 1)).x);
vec3 neighbor_p =
vec3(texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*idx_neighbor_p)).x,
texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*idx_neighbor_p+1)).x,
texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*idx_neighbor_p+2)).x);
vec3 neighbor_p = readVertex(idx_neighbor_p);
uint idx_neighbor_m = uint(texelFetch(OsdValenceBuffer, int(vID * (2*OSD_MAX_VALENCE+1) + 2*im + 0 + 1)).x);
vec3 neighbor_m =
vec3(texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*idx_neighbor_m)).x,
texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*idx_neighbor_m+1)).x,
texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*idx_neighbor_m+2)).x);
vec3 neighbor_m = readVertex(idx_neighbor_m);
uint idx_diagonal_m = uint(texelFetch(OsdValenceBuffer, int(vID * (2*OSD_MAX_VALENCE+1) + 2*im + 1 + 1)).x);
vec3 diagonal_m =
vec3(texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*idx_diagonal_m)).x,
texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*idx_diagonal_m+1)).x,
texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*idx_diagonal_m+2)).x);
vec3 diagonal_m = readVertex(idx_diagonal_m);
f[i] = (pos * float(valence) + (neighbor_p + neighbor)*2.0f + diagonal) / (float(valence)+5.0f);
@ -186,24 +179,16 @@ void main()
if (ivalence < 0) {
if (valence > 2) {
outpt.v.position = (
vec3(texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*boundaryEdgeNeighbors[0])).x,
texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*boundaryEdgeNeighbors[0]+1)).x,
texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*boundaryEdgeNeighbors[0]+2)).x) +
vec3(texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*boundaryEdgeNeighbors[1])).x,
texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*boundaryEdgeNeighbors[1]+1)).x,
texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*boundaryEdgeNeighbors[1]+2)).x) +
readVertex(boundaryEdgeNeighbors[0]) +
readVertex(boundaryEdgeNeighbors[1]) +
4.0f * pos)/6.0f;
} else {
outpt.v.position = pos;
}
outpt.v.e0 = (
vec3(texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*boundaryEdgeNeighbors[0])).x,
texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*boundaryEdgeNeighbors[0]+1)).x,
texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*boundaryEdgeNeighbors[0]+2)).x) -
vec3(texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*boundaryEdgeNeighbors[1])).x,
texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*boundaryEdgeNeighbors[1]+1)).x,
texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*boundaryEdgeNeighbors[1]+2)).x)
readVertex(boundaryEdgeNeighbors[0]) -
readVertex(boundaryEdgeNeighbors[1])
)/6.0;
float k = float(float(valence) - 1.0f); //k is the number of faces
@ -216,18 +201,11 @@ void main()
int idx_diagonal = texelFetch(OsdValenceBuffer,int((vID) * (2*OSD_MAX_VALENCE+1) + 2*zerothNeighbor + 1 + 1)).x;
idx_diagonal = abs(idx_diagonal);
vec3 diagonal =
vec3(texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*idx_diagonal)).x,
texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*idx_diagonal+1)).x,
texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*idx_diagonal+2)).x);
vec3 diagonal = readVertex(idx_diagonal);
outpt.v.e1 = gamma * pos +
alpha_0k * vec3(texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*boundaryEdgeNeighbors[0])).x,
texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*boundaryEdgeNeighbors[0]+1)).x,
texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*boundaryEdgeNeighbors[0]+2)).x) +
alpha_0k * vec3(texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*boundaryEdgeNeighbors[1])).x,
texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*boundaryEdgeNeighbors[1]+1)).x,
texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*boundaryEdgeNeighbors[1]+2)).x) +
alpha_0k * readVertex(boundaryEdgeNeighbors[0]) +
alpha_0k * readVertex(boundaryEdgeNeighbors[1]) +
beta_0 * diagonal;
for (uint x=1; x<valence - 1; ++x) {
@ -238,17 +216,11 @@ void main()
int idx_neighbor = texelFetch(OsdValenceBuffer, int((vID) * (2*OSD_MAX_VALENCE+1) + 2*curri + 0 + 1)).x;
idx_neighbor = abs(idx_neighbor);
vec3 neighbor =
vec3(texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*idx_neighbor)).x,
texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*idx_neighbor+1)).x,
texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*idx_neighbor+2)).x);
vec3 neighbor = readVertex(idx_neighbor);
idx_diagonal = texelFetch(OsdValenceBuffer, int((vID) * (2*OSD_MAX_VALENCE+1) + 2*curri + 1 + 1)).x;
diagonal =
vec3(texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*idx_diagonal)).x,
texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*idx_diagonal+1)).x,
texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*idx_diagonal+2)).x);
diagonal = readVertex(idx_diagonal);
outpt.v.e1 += alpha * neighbor + beta * diagonal;
}

View File

@ -35,9 +35,7 @@ namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
OsdGLSLTransformFeedbackComputeController::OsdGLSLTransformFeedbackComputeController() :
_vertexTexture(0), _varyingTexture(0),
_currentVertexBuffer(0), _currentVaryingBuffer(0),
_currentKernelBundle(NULL) {
_vertexTexture(0), _varyingTexture(0), _vao(0) {
}
OsdGLSLTransformFeedbackComputeController::~OsdGLSLTransformFeedbackComputeController() {
@ -58,19 +56,23 @@ OsdGLSLTransformFeedbackComputeController::Synchronize() {
}
OsdGLSLTransformFeedbackKernelBundle *
OsdGLSLTransformFeedbackComputeController::getKernels(int numVertexElements,
int numVaryingElements) {
OsdGLSLTransformFeedbackComputeController::getKernels(
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
bool interleaved) {
std::vector<OsdGLSLTransformFeedbackKernelBundle*>::iterator it =
std::find_if(_kernelRegistry.begin(), _kernelRegistry.end(),
OsdGLSLTransformFeedbackKernelBundle::Match(numVertexElements,
numVaryingElements));
OsdGLSLTransformFeedbackKernelBundle::Match(
vertexDesc, varyingDesc, interleaved));
if (it != _kernelRegistry.end()) {
return *it;
} else {
OsdGLSLTransformFeedbackKernelBundle *kernelBundle = new OsdGLSLTransformFeedbackKernelBundle();
OsdGLSLTransformFeedbackKernelBundle *kernelBundle =
new OsdGLSLTransformFeedbackKernelBundle();
_kernelRegistry.push_back(kernelBundle);
kernelBundle->Compile(numVertexElements, numVaryingElements);
kernelBundle->Compile(vertexDesc, varyingDesc, interleaved);
return kernelBundle;
}
}
@ -86,54 +88,59 @@ bindTexture(GLint samplerUniform, GLuint texture, int unit) {
}
void
OsdGLSLTransformFeedbackComputeController::bindTextures() {
OsdGLSLTransformFeedbackComputeController::bindResources() {
glEnable(GL_RASTERIZER_DISCARD);
_currentKernelBundle->UseProgram();
_currentBindState.kernelBundle->UseProgram(_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset);
// bind vertex texture
if (_currentVertexBuffer) {
if (_currentBindState.vertexBuffer) {
if (not _vertexTexture) glGenTextures(1, &_vertexTexture);
#if defined(GL_EXT_direct_state_access)
if (glTextureBufferEXT) {
glTextureBufferEXT(_vertexTexture, GL_TEXTURE_BUFFER, GL_R32F, _currentVertexBuffer);
glTextureBufferEXT(_vertexTexture, GL_TEXTURE_BUFFER, GL_R32F, _currentBindState.vertexBuffer);
} else {
#else
{
#endif
glBindTexture(GL_TEXTURE_BUFFER, _vertexTexture);
glTexBuffer(GL_TEXTURE_BUFFER, GL_R32F, _currentVertexBuffer);
glTexBuffer(GL_TEXTURE_BUFFER, GL_R32F, _currentBindState.vertexBuffer);
glBindTexture(GL_TEXTURE_BUFFER, 0);
}
}
if (_currentVaryingBuffer) {
if (_currentBindState.varyingBuffer) {
if (not _varyingTexture) glGenTextures(1, &_varyingTexture);
#if defined(GL_EXT_direct_state_access)
if (glTextureBufferEXT) {
glTextureBufferEXT(_varyingTexture, GL_TEXTURE_BUFFER, GL_R32F, _currentVaryingBuffer);
glTextureBufferEXT(_varyingTexture, GL_TEXTURE_BUFFER, GL_R32F, _currentBindState.varyingBuffer);
} else {
#else
{
#endif
glBindTexture(GL_TEXTURE_BUFFER, _varyingTexture);
glTexBuffer(GL_TEXTURE_BUFFER, GL_R32F, _currentVaryingBuffer);
glTexBuffer(GL_TEXTURE_BUFFER, GL_R32F, _currentBindState.varyingBuffer);
glBindTexture(GL_TEXTURE_BUFFER, 0);
}
}
if (_vertexTexture)
bindTexture(_currentKernelBundle->GetVertexUniformLocation(), _vertexTexture, 0);
bindTexture(_currentBindState.kernelBundle->GetVertexUniformLocation(), _vertexTexture, 0);
if (_varyingTexture)
bindTexture(_currentKernelBundle->GetVaryingUniformLocation(), _varyingTexture, 1);
bindTexture(_currentBindState.kernelBundle->GetVaryingUniformLocation(), _varyingTexture, 1);
// bind vertex texture image (for edit kernel)
glUniform1i(_currentKernelBundle->GetVertexBufferImageUniformLocation(), 0);
glUniform1i(_currentBindState.kernelBundle->GetVertexBufferImageUniformLocation(), 0);
glBindImageTexture(0, _vertexTexture, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_R32F);
// bind vertex array
// always create new one, to be safe with multiple contexts.
glGenVertexArrays(1, &_vao);
glBindVertexArray(_vao);
}
void
OsdGLSLTransformFeedbackComputeController::unbindTextures() {
OsdGLSLTransformFeedbackComputeController::unbindResources() {
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_BUFFER, 0);
@ -146,6 +153,10 @@ OsdGLSLTransformFeedbackComputeController::unbindTextures() {
glDisable(GL_RASTERIZER_DISCARD);
glUseProgram(0);
glActiveTexture(GL_TEXTURE0);
// unbind vertex array
glBindVertexArray(0);
glDeleteVertexArrays(1, &_vao);
}
void
@ -154,9 +165,9 @@ OsdGLSLTransformFeedbackComputeController::ApplyBilinearFaceVerticesKernel(
assert(context);
_currentKernelBundle->ApplyBilinearFaceVerticesKernel(
_currentVertexBuffer, _vdesc.numVertexElements,
_currentVaryingBuffer, _vdesc.numVaryingElements,
_currentBindState.kernelBundle->ApplyBilinearFaceVerticesKernel(
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset,
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
}
@ -166,9 +177,9 @@ OsdGLSLTransformFeedbackComputeController::ApplyBilinearEdgeVerticesKernel(
assert(context);
_currentKernelBundle->ApplyBilinearEdgeVerticesKernel(
_currentVertexBuffer, _vdesc.numVertexElements,
_currentVaryingBuffer, _vdesc.numVaryingElements,
_currentBindState.kernelBundle->ApplyBilinearEdgeVerticesKernel(
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset,
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
}
@ -178,9 +189,9 @@ OsdGLSLTransformFeedbackComputeController::ApplyBilinearVertexVerticesKernel(
assert(context);
_currentKernelBundle->ApplyBilinearVertexVerticesKernel(
_currentVertexBuffer, _vdesc.numVertexElements,
_currentVaryingBuffer, _vdesc.numVaryingElements,
_currentBindState.kernelBundle->ApplyBilinearVertexVerticesKernel(
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset,
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
}
@ -190,9 +201,9 @@ OsdGLSLTransformFeedbackComputeController::ApplyCatmarkFaceVerticesKernel(
assert(context);
_currentKernelBundle->ApplyCatmarkFaceVerticesKernel(
_currentVertexBuffer, _vdesc.numVertexElements,
_currentVaryingBuffer, _vdesc.numVaryingElements,
_currentBindState.kernelBundle->ApplyCatmarkFaceVerticesKernel(
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset,
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
}
@ -204,9 +215,9 @@ OsdGLSLTransformFeedbackComputeController::ApplyCatmarkEdgeVerticesKernel(
assert(context);
_currentKernelBundle->ApplyCatmarkEdgeVerticesKernel(
_currentVertexBuffer, _vdesc.numVertexElements,
_currentVaryingBuffer, _vdesc.numVaryingElements,
_currentBindState.kernelBundle->ApplyCatmarkEdgeVerticesKernel(
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset,
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
}
@ -216,9 +227,9 @@ OsdGLSLTransformFeedbackComputeController::ApplyCatmarkVertexVerticesKernelB(
assert(context);
_currentKernelBundle->ApplyCatmarkVertexVerticesKernelB(
_currentVertexBuffer, _vdesc.numVertexElements,
_currentVaryingBuffer, _vdesc.numVaryingElements,
_currentBindState.kernelBundle->ApplyCatmarkVertexVerticesKernelB(
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset,
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
}
@ -228,9 +239,9 @@ OsdGLSLTransformFeedbackComputeController::ApplyCatmarkVertexVerticesKernelA1(
assert(context);
_currentKernelBundle->ApplyCatmarkVertexVerticesKernelA(
_currentVertexBuffer, _vdesc.numVertexElements,
_currentVaryingBuffer, _vdesc.numVaryingElements,
_currentBindState.kernelBundle->ApplyCatmarkVertexVerticesKernelA(
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset,
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(), false);
}
@ -240,9 +251,9 @@ OsdGLSLTransformFeedbackComputeController::ApplyCatmarkVertexVerticesKernelA2(
assert(context);
_currentKernelBundle->ApplyCatmarkVertexVerticesKernelA(
_currentVertexBuffer, _vdesc.numVertexElements,
_currentVaryingBuffer, _vdesc.numVaryingElements,
_currentBindState.kernelBundle->ApplyCatmarkVertexVerticesKernelA(
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset,
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(), true);
}
@ -252,9 +263,9 @@ OsdGLSLTransformFeedbackComputeController::ApplyLoopEdgeVerticesKernel(
assert(context);
_currentKernelBundle->ApplyLoopEdgeVerticesKernel(
_currentVertexBuffer, _vdesc.numVertexElements,
_currentVaryingBuffer, _vdesc.numVaryingElements,
_currentBindState.kernelBundle->ApplyLoopEdgeVerticesKernel(
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset,
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
}
@ -264,9 +275,9 @@ OsdGLSLTransformFeedbackComputeController::ApplyLoopVertexVerticesKernelB(
assert(context);
_currentKernelBundle->ApplyLoopVertexVerticesKernelB(
_currentVertexBuffer, _vdesc.numVertexElements,
_currentVaryingBuffer, _vdesc.numVaryingElements,
_currentBindState.kernelBundle->ApplyLoopVertexVerticesKernelB(
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset,
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
}
@ -276,9 +287,9 @@ OsdGLSLTransformFeedbackComputeController::ApplyLoopVertexVerticesKernelA1(
assert(context);
_currentKernelBundle->ApplyLoopVertexVerticesKernelA(
_currentVertexBuffer, _vdesc.numVertexElements,
_currentVaryingBuffer, _vdesc.numVaryingElements,
_currentBindState.kernelBundle->ApplyLoopVertexVerticesKernelA(
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset,
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(), false);
}
@ -288,9 +299,9 @@ OsdGLSLTransformFeedbackComputeController::ApplyLoopVertexVerticesKernelA2(
assert(context);
_currentKernelBundle->ApplyLoopVertexVerticesKernelA(
_currentVertexBuffer, _vdesc.numVertexElements,
_currentVaryingBuffer, _vdesc.numVaryingElements,
_currentBindState.kernelBundle->ApplyLoopVertexVerticesKernelA(
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset,
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(), true);
}
@ -303,15 +314,15 @@ OsdGLSLTransformFeedbackComputeController::ApplyVertexEdits(
const OsdGLSLTransformFeedbackHEditTable * edit = context->GetEditTable(batch.GetTableIndex());
assert(edit);
context->BindEditTextures(batch.GetTableIndex(), _currentKernelBundle);
context->BindEditTextures(batch.GetTableIndex(), _currentBindState.kernelBundle);
int primvarOffset = edit->GetPrimvarOffset();
int primvarWidth = edit->GetPrimvarWidth();
if (edit->GetOperation() == FarVertexEdit::Add) {
_currentKernelBundle->ApplyEditAdd(
_currentVertexBuffer, _vdesc.numVertexElements,
_currentVaryingBuffer, _vdesc.numVaryingElements,
_currentBindState.kernelBundle->ApplyEditAdd(
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset,
primvarOffset, primvarWidth,
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
} else {

View File

@ -29,6 +29,7 @@
#include "../far/dispatcher.h"
#include "../osd/glslTransformFeedbackComputeContext.h"
#include "../osd/vertexDescriptor.h"
#include <vector>
@ -69,16 +70,26 @@ public:
///
/// @param varyingBuffer varying-interpolated data buffer
///
/// @param vertexDesc the descriptor of vertex elements to be refined.
/// if it's null, all primvars in the vertex buffer
/// will be refined.
///
/// @param varyingDesc the descriptor of varying elements to be refined.
/// if it's null, all primvars in the varying buffer
/// will be refined.
///
template<class VERTEX_BUFFER, class VARYING_BUFFER>
void Refine(OsdGLSLTransformFeedbackComputeContext const *context,
FarKernelBatchVector const &batches,
VERTEX_BUFFER *vertexBuffer,
VARYING_BUFFER *varyingBuffer) {
VARYING_BUFFER *varyingBuffer,
OsdVertexBufferDescriptor const *vertexDesc=NULL,
OsdVertexBufferDescriptor const *varyingDesc=NULL) {
if (batches.empty()) return;
bind(vertexBuffer, varyingBuffer);
context->BindTableTextures(_currentKernelBundle);
bind(vertexBuffer, varyingBuffer, vertexDesc, varyingDesc);
context->BindTableTextures(_currentBindState.kernelBundle);
FarDispatcher::Refine(this, context, batches, /*maxlevel*/-1);
@ -136,47 +147,73 @@ protected:
void ApplyVertexEdits(FarKernelBatch const &batch, ComputeContext const *context) const;
OsdGLSLTransformFeedbackKernelBundle * getKernels(int numVertexElements,
int numVaryingElements);
OsdGLSLTransformFeedbackKernelBundle * getKernels(
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
bool interleaved);
void bindTextures();
void bindResources();
void unbindTextures();
void unbindResources();
template<class VERTEX_BUFFER, class VARYING_BUFFER>
void bind(VERTEX_BUFFER *vertex, VARYING_BUFFER *varying) {
void bind(VERTEX_BUFFER *vertex, VARYING_BUFFER *varying,
OsdVertexBufferDescriptor const *vertexDesc,
OsdVertexBufferDescriptor const *varyingDesc) {
_currentVertexBuffer = vertex ? vertex->BindVBO() : 0;
_currentVaryingBuffer = varying ? varying->BindVBO() : 0;
// if the vertex buffer descriptor is specified, use it.
// otherwise, assumes the data is tightly packed in the vertex buffer.
if (vertexDesc) {
_currentBindState.vertexDesc = *vertexDesc;
} else {
int numElements = vertex ? vertex->GetNumElements() : 0;
_currentBindState.vertexDesc = OsdVertexBufferDescriptor(
0, numElements, numElements);
}
if (varyingDesc) {
_currentBindState.varyingDesc = *varyingDesc;
} else {
int numElements = varying ? varying->GetNumElements() : 0;
_currentBindState.varyingDesc = OsdVertexBufferDescriptor(
0, numElements, numElements);
}
_vdesc.numVertexElements = vertex ? vertex->GetNumElements() : 0;
_vdesc.numVaryingElements = varying ? varying->GetNumElements() : 0;
bool interleaved = (vertex and varying and (vertex == varying));
_currentBindState.vertexBuffer = vertex ? vertex->BindVBO() : 0;
_currentBindState.varyingBuffer = varying ? varying->BindVBO() : 0;
_currentBindState.kernelBundle = getKernels(_currentBindState.vertexDesc,
_currentBindState.varyingDesc,
interleaved);
_currentKernelBundle =
getKernels(_vdesc.numVertexElements, _vdesc.numVaryingElements);
bindTextures();
bindResources();
}
/// Unbinds any previously bound vertex and varying data buffers.
void unbind() {
_currentVertexBuffer = 0;
_currentVaryingBuffer = 0;
_currentKernelBundle = NULL;
_currentBindState.Reset();
unbindTextures();
unbindResources();
}
private:
struct BindState {
BindState() : vertexBuffer(0), varyingBuffer(0), kernelBundle(NULL) {}
void Reset() {
vertexBuffer = varyingBuffer = 0;
vertexDesc.Reset();
varyingDesc.Reset();
}
GLuint vertexBuffer;
GLuint varyingBuffer;
OsdVertexBufferDescriptor vertexDesc;
OsdVertexBufferDescriptor varyingDesc;
OsdGLSLTransformFeedbackKernelBundle *kernelBundle;
};
BindState _currentBindState;
std::vector<OsdGLSLTransformFeedbackKernelBundle *> _kernelRegistry;
GLuint _vertexTexture, _varyingTexture;
GLuint _currentVertexBuffer, _currentVaryingBuffer;
OsdVertexDescriptor _vdesc;
OsdGLSLTransformFeedbackKernelBundle * _currentKernelBundle;
GLuint _vao;
};
} // end namespace OPENSUBDIV_VERSION

View File

@ -41,6 +41,8 @@ layout(size1x32) uniform imageBuffer _vertexBufferImage;
uniform int vertexOffset = 0; // vertex index offset for the batch
uniform int tableOffset = 0; // offset of subdivision table
uniform int indexStart = 0; // start index relative to tableOffset
uniform int vertexBaseOffset = 0; // base vbo offset of the vertex buffer
uniform int varyingBaseOffset = 0; // base vbo offset of the varying buffer
uniform bool vertexPass;
/*
@ -50,6 +52,12 @@ uniform bool vertexPass;
^ ^
vertexOffset |
indexStart
NUM_VERTEX_ELEMENTS = 3
NUM_VARYING_ELEMENTS = 4
VERTEX_STRIDE = VARYING_STRIDE = 7
*/
//--------------------------------------------------------------------------------
@ -100,13 +108,15 @@ Vertex readVertex(int index)
// unpacking
#if NUM_VERTEX_ELEMENTS > 0
int vertexIndex = index * VERTEX_STRIDE;
for(int i = 0; i < NUM_VERTEX_ELEMENTS; i++) {
v.vertexData[i] = texelFetch(vertexData, index*NUM_VERTEX_ELEMENTS+i).x;
v.vertexData[i] = texelFetch(vertexData, vertexIndex+i+vertexBaseOffset).x;
}
#endif
#if NUM_VARYING_ELEMENTS > 0
int varyingIndex = index * VARYING_STRIDE;
for(int i = 0; i < NUM_VARYING_ELEMENTS; i++){
v.varyingData[i] = texelFetch(varyingData, index*NUM_VARYING_ELEMENTS+i).x;
v.varyingData[i] = texelFetch(varyingData, varyingIndex+i+varyingBaseOffset).x;
}
#endif
return v;
@ -130,7 +140,7 @@ void writeVertex(Vertex v)
void writeVertexByImageStore(Vertex v, int index)
{
#if NUM_VERTEX_ELEMENTS > 0
int p = index * NUM_VERTEX_ELEMENTS;
int p = index * VERTEX_STRIDE + vertexBaseOffset;
for(int i = 0; i < NUM_VERTEX_ELEMENTS; i++) {
imageStore(_vertexBufferImage, p+i, vec4(v.vertexData[i], 0, 0, 0));
}

View File

@ -40,6 +40,7 @@
#include <cassert>
#include <string>
#include <sstream>
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
@ -61,7 +62,12 @@ static const char *shaderDefines = ""
;
OsdGLSLTransformFeedbackKernelBundle::OsdGLSLTransformFeedbackKernelBundle()
: _program(0) {
: _program(0),
_numVertexElements(0),
_vertexStride(0),
_numVaryingElements(0),
_varyingStride(0),
_interleaved(false) {
}
OsdGLSLTransformFeedbackKernelBundle::~OsdGLSLTransformFeedbackKernelBundle() {
@ -70,24 +76,34 @@ OsdGLSLTransformFeedbackKernelBundle::~OsdGLSLTransformFeedbackKernelBundle() {
}
bool
OsdGLSLTransformFeedbackKernelBundle::Compile(int numVertexElements, int numVaryingElements) {
OsdGLSLTransformFeedbackKernelBundle::Compile(
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
bool interleaved) {
assert(numVertexElements >= 3); // at least xyz required (for performance reason)
_numVertexElements = vertexDesc.length;
_vertexStride = vertexDesc.stride;
_numVaryingElements = varyingDesc.length;
_varyingStride = varyingDesc.stride;
_interleaved = interleaved;
// modulo of vbo offset
_vertexOffsetMod = (_vertexStride ? vertexDesc.offset % _vertexStride : 0);
_varyingOffsetMod = (_varyingStride ? varyingDesc.offset % _varyingStride : 0);
_vdesc.Set(numVertexElements, numVaryingElements);
_program = glCreateProgram();
GLuint shader = glCreateShader(GL_VERTEX_SHADER);
char constantDefine[256];
snprintf(constantDefine, 256,
"#define NUM_VERTEX_ELEMENTS %d\n"
"#define NUM_VARYING_ELEMENTS %d\n",
numVertexElements, numVaryingElements);
std::ostringstream defines;
defines << "#define NUM_VERTEX_ELEMENTS " << _numVertexElements << "\n"
<< "#define VERTEX_STRIDE " << _vertexStride << "\n"
<< "#define NUM_VARYING_ELEMENTS " << _numVaryingElements << "\n"
<< "#define VARYING_STRIDE " << _varyingStride << "\n";
std::string defineStr = defines.str();
const char *shaderSources[3];
shaderSources[0] = constantDefine;
shaderSources[0] = defineStr.c_str();
shaderSources[1] = shaderDefines;
shaderSources[2] = shaderSource;
glShaderSource(shader, 3, shaderSources, NULL);
@ -96,21 +112,85 @@ OsdGLSLTransformFeedbackKernelBundle::Compile(int numVertexElements, int numVary
std::vector<std::string> outputs;
// position and custom vertex data are stored same buffer whereas varying data
// exists on another buffer. "gl_NextBuffer" identifier helps to split them.
for (int i = 0; i < numVertexElements; ++i) {
/*
output attribute array
- interleaved
outVertexData[0]
outVertexData[1]
outVertexData[2]
(gl_SkipComponents1)
outVaryingData[0]
outVaryingData[1]
outVaryingData[2]
outVaryingData[3]
(gl_SkipComponents1)
...
- non-interleaved
outVertexData[0]
outVertexData[1]
outVertexData[2]
gl_NextBuffer
outVaryingData[0]
outVaryingData[1]
outVaryingData[2]
outVaryingData[3]
*/
if (_interleaved) {
assert(_vertexStride == _varyingStride);
assert(_numVertexElements + _numVaryingElements <= _vertexStride);
char attrName[32];
snprintf(attrName, 32, "outVertexData[%d]", i);
outputs.push_back(attrName);
}
for (int i = 0; i < numVaryingElements; ++i) {
if (i == 0 and (not outputs.empty())) {
for (int i = 0; i < _vertexStride; ++i) {
int vertexElem = i - _vertexOffsetMod;
int varyingElem = i - _varyingOffsetMod;
if (vertexElem >= 0 and vertexElem < _numVertexElements) {
snprintf(attrName, 32, "outVertexData[%d]", vertexElem);
outputs.push_back(attrName);
} else if (varyingElem >= 0 and varyingElem <= _numVaryingElements) {
snprintf(attrName, 32, "outVaryingData[%d]", varyingElem);
outputs.push_back(attrName);
} else {
outputs.push_back("gl_SkipComponents1");
}
}
} else {
// non-interleaved
char attrName[32];
// vertex data (may include custom vertex data) and varying data
// are stored into the same buffer, interleaved.
for (int i = 0; i < _vertexOffsetMod; ++i)
outputs.push_back("gl_SkipComponents1");
for (int i = 0; i < _numVertexElements; ++i) {
snprintf(attrName, 32, "outVertexData[%d]", i);
outputs.push_back(attrName);
}
for (int i = _numVertexElements + _vertexOffsetMod; i < _vertexStride; ++i)
outputs.push_back("gl_SkipComponents1");
// varying
if (_numVaryingElements) {
outputs.push_back("gl_NextBuffer");
}
char attrName[32];
snprintf(attrName, 32, "outVaryingData[%d]", i);
outputs.push_back(attrName);
for (int i = 0; i < _varyingOffsetMod; ++i) {
outputs.push_back("gl_SkipComponents1");
}
for (int i = 0; i < _numVaryingElements; ++i) {
snprintf(attrName, 32, "outVaryingData[%d]", i);
outputs.push_back(attrName);
}
for (int i = _numVaryingElements + _varyingOffsetMod; i < _varyingStride; ++i) {
outputs.push_back("gl_SkipComponents1");
}
}
// convert to char* array
std::vector<const char *> pOutputs;
for (size_t i = 0; i < outputs.size(); ++i) {
pOutputs.push_back(&outputs[i][0]);
@ -156,6 +236,8 @@ OsdGLSLTransformFeedbackKernelBundle::Compile(int numVertexElements, int numVary
_uniformVertexOffset = glGetUniformLocation(_program, "vertexOffset");
_uniformTableOffset = glGetUniformLocation(_program, "tableOffset");
_uniformIndexStart = glGetUniformLocation(_program, "indexStart");
_uniformVertexBaseOffset = glGetUniformLocation(_program, "vertexBaseOffset");
_uniformVaryingBaseOffset = glGetUniformLocation(_program, "varyingBaseOffset");
_uniformTables[FarSubdivisionTables::F_IT] = glGetUniformLocation(_program, "_F0_IT");
_uniformTables[FarSubdivisionTables::F_ITa] = glGetUniformLocation(_program, "_F0_ITa");
@ -181,32 +263,44 @@ OsdGLSLTransformFeedbackKernelBundle::Compile(int numVertexElements, int numVary
void
OsdGLSLTransformFeedbackKernelBundle::transformGpuBufferData(
GLuint vertexBuffer, int numVertexElements,
GLuint varyingBuffer, int numVaryingElements,
int vertexOffset, int tableOffset, int start, int end) const {
GLuint vertexBuffer, GLuint varyingBuffer,
int vertexOffset, int varyingOffset,
int offset, int tableOffset, int start, int end) const {
int count = end - start;
if (count <= 0) return;
// set batch range
glUniform1i(_uniformIndexStart, start);
glUniform1i(_uniformVertexOffset, vertexOffset);
glUniform1i(_uniformVertexOffset, offset);
glUniform1i(_uniformTableOffset, tableOffset);
// XXX: end is not used here now
OSD_DEBUG_CHECK_GL_ERROR("Uniform index set at offset=%d. start=%d\n",
vertexOffset, start);
offset, start);
int vertexOrigin = vertexOffset - _vertexOffsetMod;
int varyingOrigin = varyingOffset - _varyingOffsetMod;
// set transform feedback buffer
if (vertexBuffer) {
int vertexStride = numVertexElements*sizeof(float);
if (_interleaved) {
int vertexStride = _vertexStride*sizeof(float);
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, 0, vertexBuffer,
(start + vertexOffset)*vertexStride, count*vertexStride);
}
if (varyingBuffer){
int varyingStride = numVaryingElements*sizeof(float);
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, 1, varyingBuffer,
(start + vertexOffset)*varyingStride, count*varyingStride);
(start + offset)*vertexStride + vertexOrigin*sizeof(float),
count*vertexStride);
} else {
if (vertexBuffer) {
int vertexStride = _vertexStride*sizeof(float);
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, 0, vertexBuffer,
(start + offset)*vertexStride + vertexOrigin*sizeof(float),
count*vertexStride);
}
if (varyingBuffer){
int varyingStride = _varyingStride*sizeof(float);
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, 1, varyingBuffer,
(start + offset)*varyingStride + varyingOrigin*sizeof(float),
count*varyingStride);
}
}
OSD_DEBUG_CHECK_GL_ERROR("transformGpuBufferData glBindBufferRange\n");
@ -222,142 +316,138 @@ OsdGLSLTransformFeedbackKernelBundle::transformGpuBufferData(
glEndTransformFeedback();
glBindBuffer(GL_TRANSFORM_FEEDBACK_BUFFER, 0);
GLsync sync = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
glWaitSync(sync, 0, GL_TIMEOUT_IGNORED);
glDeleteSync(sync);
}
void
OsdGLSLTransformFeedbackKernelBundle::ApplyBilinearFaceVerticesKernel(
GLuint vertexBuffer, int numVertexElements,
GLuint varyingBuffer, int numVaryingElements,
int vertexOffset, int tableOffset, int start, int end) {
GLuint vertexBuffer, GLuint varyingBuffer,
int vertexOffset, int varyingOffset,
int offset, int tableOffset, int start, int end) {
glUniformSubroutinesuiv(GL_VERTEX_SHADER, 1, &_subComputeFace);
transformGpuBufferData(vertexBuffer, numVertexElements,
varyingBuffer, numVaryingElements,
vertexOffset, tableOffset, start, end);
transformGpuBufferData(vertexBuffer, varyingBuffer,
vertexOffset, varyingOffset,
offset, tableOffset, start, end);
}
void
OsdGLSLTransformFeedbackKernelBundle::ApplyBilinearEdgeVerticesKernel(
GLuint vertexBuffer, int numVertexElements,
GLuint varyingBuffer, int numVaryingElements,
int vertexOffset, int tableOffset, int start, int end) {
GLuint vertexBuffer, GLuint varyingBuffer,
int vertexOffset, int varyingOffset,
int offset, int tableOffset, int start, int end) {
glUniformSubroutinesuiv(GL_VERTEX_SHADER, 1, &_subComputeBilinearEdge);
transformGpuBufferData(vertexBuffer, numVertexElements,
varyingBuffer, numVaryingElements,
vertexOffset, tableOffset, start, end);
transformGpuBufferData(vertexBuffer, varyingBuffer,
vertexOffset, varyingOffset,
offset, tableOffset, start, end);
}
void
OsdGLSLTransformFeedbackKernelBundle::ApplyBilinearVertexVerticesKernel(
GLuint vertexBuffer, int numVertexElements,
GLuint varyingBuffer, int numVaryingElements,
int vertexOffset, int tableOffset, int start, int end) {
GLuint vertexBuffer, GLuint varyingBuffer,
int vertexOffset, int varyingOffset,
int offset, int tableOffset, int start, int end) {
glUniformSubroutinesuiv(GL_VERTEX_SHADER, 1, &_subComputeVertex);
transformGpuBufferData(vertexBuffer, numVertexElements,
varyingBuffer, numVaryingElements,
vertexOffset, tableOffset, start, end);
transformGpuBufferData(vertexBuffer, varyingBuffer,
vertexOffset, varyingOffset,
offset, tableOffset, start, end);
}
void
OsdGLSLTransformFeedbackKernelBundle::ApplyCatmarkFaceVerticesKernel(
GLuint vertexBuffer, int numVertexElements,
GLuint varyingBuffer, int numVaryingElements,
int vertexOffset, int tableOffset, int start, int end) {
GLuint vertexBuffer, GLuint varyingBuffer,
int vertexOffset, int varyingOffset,
int offset, int tableOffset, int start, int end) {
glUniformSubroutinesuiv(GL_VERTEX_SHADER, 1, &_subComputeFace);
transformGpuBufferData(vertexBuffer, numVertexElements,
varyingBuffer, numVaryingElements,
vertexOffset, tableOffset, start, end);
transformGpuBufferData(vertexBuffer, varyingBuffer,
vertexOffset, varyingOffset,
offset, tableOffset, start, end);
}
void
OsdGLSLTransformFeedbackKernelBundle::ApplyCatmarkEdgeVerticesKernel(
GLuint vertexBuffer, int numVertexElements,
GLuint varyingBuffer, int numVaryingElements,
int vertexOffset, int tableOffset, int start, int end) {
GLuint vertexBuffer, GLuint varyingBuffer,
int vertexOffset, int varyingOffset,
int offset, int tableOffset, int start, int end) {
glUniformSubroutinesuiv(GL_VERTEX_SHADER, 1, &_subComputeEdge);
transformGpuBufferData(vertexBuffer, numVertexElements,
varyingBuffer, numVaryingElements,
vertexOffset, tableOffset, start, end);
transformGpuBufferData(vertexBuffer, varyingBuffer,
vertexOffset, varyingOffset,
offset, tableOffset, start, end);
}
void
OsdGLSLTransformFeedbackKernelBundle::ApplyCatmarkVertexVerticesKernelB(
GLuint vertexBuffer, int numVertexElements,
GLuint varyingBuffer, int numVaryingElements,
int vertexOffset, int tableOffset, int start, int end) {
GLuint vertexBuffer, GLuint varyingBuffer,
int vertexOffset, int varyingOffset,
int offset, int tableOffset, int start, int end) {
glUniformSubroutinesuiv(GL_VERTEX_SHADER, 1, &_subComputeCatmarkVertexB);
transformGpuBufferData(vertexBuffer, numVertexElements,
varyingBuffer, numVaryingElements,
vertexOffset, tableOffset, start, end);
transformGpuBufferData(vertexBuffer, varyingBuffer,
vertexOffset, varyingOffset,
offset, tableOffset, start, end);
}
void
OsdGLSLTransformFeedbackKernelBundle::ApplyCatmarkVertexVerticesKernelA(
GLuint vertexBuffer, int numVertexElements,
GLuint varyingBuffer, int numVaryingElements,
int vertexOffset, int tableOffset, int start, int end, bool pass) {
GLuint vertexBuffer, GLuint varyingBuffer,
int vertexOffset, int varyingOffset,
int offset, int tableOffset, int start, int end, bool pass) {
glUniformSubroutinesuiv(GL_VERTEX_SHADER, 1, &_subComputeVertexA);
glUniform1i(_uniformVertexPass, pass ? 1 : 0);
transformGpuBufferData(vertexBuffer, numVertexElements,
varyingBuffer, numVaryingElements,
vertexOffset, tableOffset, start, end);
transformGpuBufferData(vertexBuffer, varyingBuffer,
vertexOffset, varyingOffset,
offset, tableOffset, start, end);
}
void
OsdGLSLTransformFeedbackKernelBundle::ApplyLoopEdgeVerticesKernel(
GLuint vertexBuffer, int numVertexElements,
GLuint varyingBuffer, int numVaryingElements,
int vertexOffset, int tableOffset, int start, int end) {
GLuint vertexBuffer, GLuint varyingBuffer,
int vertexOffset, int varyingOffset,
int offset, int tableOffset, int start, int end) {
glUniformSubroutinesuiv(GL_VERTEX_SHADER, 1, &_subComputeEdge);
transformGpuBufferData(vertexBuffer, numVertexElements,
varyingBuffer, numVaryingElements,
vertexOffset, tableOffset, start, end);
transformGpuBufferData(vertexBuffer, varyingBuffer,
vertexOffset, varyingOffset,
offset, tableOffset, start, end);
}
void
OsdGLSLTransformFeedbackKernelBundle::ApplyLoopVertexVerticesKernelB(
GLuint vertexBuffer, int numVertexElements,
GLuint varyingBuffer, int numVaryingElements,
int vertexOffset, int tableOffset, int start, int end) {
GLuint vertexBuffer, GLuint varyingBuffer,
int vertexOffset, int varyingOffset,
int offset, int tableOffset, int start, int end) {
glUniformSubroutinesuiv(GL_VERTEX_SHADER, 1, &_subComputeLoopVertexB);
transformGpuBufferData(vertexBuffer, numVertexElements,
varyingBuffer, numVaryingElements,
vertexOffset, tableOffset, start, end);
transformGpuBufferData(vertexBuffer, varyingBuffer,
vertexOffset, varyingOffset,
offset, tableOffset, start, end);
}
void
OsdGLSLTransformFeedbackKernelBundle::ApplyLoopVertexVerticesKernelA(
GLuint vertexBuffer, int numVertexElements,
GLuint varyingBuffer, int numVaryingElements,
int vertexOffset, int tableOffset, int start, int end, bool pass) {
GLuint vertexBuffer, GLuint varyingBuffer,
int vertexOffset, int varyingOffset,
int offset, int tableOffset, int start, int end, bool pass) {
glUniformSubroutinesuiv(GL_VERTEX_SHADER, 1, &_subComputeVertexA);
glUniform1i(_uniformVertexPass, pass ? 1 : 0);
transformGpuBufferData(vertexBuffer, numVertexElements,
varyingBuffer, numVaryingElements,
vertexOffset, tableOffset, start, end);
transformGpuBufferData(vertexBuffer, varyingBuffer,
vertexOffset, varyingOffset,
offset, tableOffset, start, end);
}
void
OsdGLSLTransformFeedbackKernelBundle::ApplyEditAdd(
GLuint vertexBuffer, int numVertexElements,
GLuint varyingBuffer, int numVaryingElements,
GLuint vertexBuffer, GLuint varyingBuffer,
int vertexOffset, int varyingOffset,
int primvarOffset, int primvarWidth,
int vertexOffset, int tableOffset, int start, int end) {
int offset, int tableOffset, int start, int end) {
if (end - start <= 0) return;
glUniformSubroutinesuiv(GL_VERTEX_SHADER, 1, &_subEditAdd);
@ -365,15 +455,19 @@ OsdGLSLTransformFeedbackKernelBundle::ApplyEditAdd(
glUniform1i(_uniformEditPrimVarWidth, primvarWidth);
glUniform1i(_uniformIndexStart, start);
glUniform1i(_uniformVertexOffset, vertexOffset);
glUniform1i(_uniformVertexOffset, offset);
glUniform1i(_uniformTableOffset, tableOffset);
glDrawArrays(GL_POINTS, 0, end - start);
}
void
OsdGLSLTransformFeedbackKernelBundle::UseProgram() const
OsdGLSLTransformFeedbackKernelBundle::UseProgram(int vertexBaseOffset,
int varyingBaseOffset) const
{
glUseProgram(_program);
glUniform1i(_uniformVertexBaseOffset, vertexBaseOffset);
glUniform1i(_uniformVaryingBaseOffset, varyingBaseOffset);
}

View File

@ -44,65 +44,67 @@ public:
~OsdGLSLTransformFeedbackKernelBundle();
bool Compile(int numVertexElements, int numVaryingElements);
bool Compile(OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
bool interleaved);
void ApplyBilinearFaceVerticesKernel(
GLuint vertexBuffer, int numVertexElements,
GLuint varyingBuffer, int numVaryingElements,
int vertexOffset, int tableOffset, int start, int end);
GLuint vertexBuffer, GLuint varyingBuffer,
int vertexOffset, int varyingOffset,
int offset, int tableOffset, int start, int end);
void ApplyBilinearEdgeVerticesKernel(
GLuint vertexBuffer, int numVertexElements,
GLuint varyingBuffer, int numVaryingElements,
int vertexOffset, int tableOffset, int start, int end);
GLuint vertexBuffer, GLuint varyingBuffer,
int vertexOffset, int varyingOffset,
int offset, int tableOffset, int start, int end);
void ApplyBilinearVertexVerticesKernel(
GLuint vertexBuffer, int numVertexElements,
GLuint varyingBuffer, int numVaryingElements,
int vertexOffset, int tableOffset, int start, int end);
GLuint vertexBuffer, GLuint varyingBuffer,
int vertexOffset, int varyingOffset,
int offset, int tableOffset, int start, int end);
void ApplyCatmarkFaceVerticesKernel(
GLuint vertexBuffer, int numVertexElements,
GLuint varyingBuffer, int numVaryingElements,
int vertexOffset, int tableOffset, int start, int end);
GLuint vertexBuffer, GLuint varyingBuffer,
int vertexOffset, int varyingOffset,
int offset, int tableOffset, int start, int end);
void ApplyCatmarkEdgeVerticesKernel(
GLuint vertexBuffer, int numVertexElements,
GLuint varyingBuffer, int numVaryingElements,
int vertexOffset, int tableOffset, int start, int end);
GLuint vertexBuffer, GLuint varyingBuffer,
int vertexOffset, int varyingOffset,
int offset, int tableOffset, int start, int end);
void ApplyCatmarkVertexVerticesKernelB(
GLuint vertexBuffer, int numVertexElements,
GLuint varyingBuffer, int numVaryingElements,
int vertexOffset, int tableOffset, int start, int end);
GLuint vertexBuffer, GLuint varyingBuffer,
int vertexOffset, int varyingOffset,
int offset, int tableOffset, int start, int end);
void ApplyCatmarkVertexVerticesKernelA(
GLuint vertexBuffer, int numVertexElements,
GLuint varyingBuffer, int numVaryingElements,
int vertexOffset, int tableOffset, int start, int end, bool pass);
GLuint vertexBuffer, GLuint varyingBuffer,
int vertexOffset, int varyingOffset,
int offset, int tableOffset, int start, int end, bool pass);
void ApplyLoopEdgeVerticesKernel(
GLuint vertexBuffer, int numVertexElements,
GLuint varyingBuffer, int numVaryingElements,
int vertexOffset, int tableOffset, int start, int end);
GLuint vertexBuffer, GLuint varyingBuffer,
int vertexOffset, int varyingOffset,
int offset, int tableOffset, int start, int end);
void ApplyLoopVertexVerticesKernelB(
GLuint vertexBuffer, int numVertexElements,
GLuint varyingBuffer, int numVaryingElements,
int vertexOffset, int tableOffset, int start, int end);
GLuint vertexBuffer, GLuint varyingBuffer,
int vertexOffset, int varyingOffset,
int offset, int tableOffset, int start, int end);
void ApplyLoopVertexVerticesKernelA(
GLuint vertexBuffer, int numVertexElements,
GLuint varyingBuffer, int numVaryingElements,
int vertexOffset, int tableOffset, int start, int end, bool pass);
GLuint vertexBuffer, GLuint varyingBuffer,
int vertexOffset, int varyingOffset,
int offset, int tableOffset, int start, int end, bool pass);
void ApplyEditAdd(
GLuint vertexBuffer, int numVertexElements,
GLuint varyingBuffer, int numVaryingElements,
GLuint vertexBuffer, GLuint varyingBuffer,
int vertexOffset, int varyingOffset,
int primvarOffset, int primvarWidth,
int vertexOffset, int tableOffset, int start, int end);
int offset, int tableOffset, int start, int end);
void UseProgram() const;
void UseProgram(int vertexBaseOffset, int varyingBaseOffset) const;
GLint GetTableUniformLocation(int tableIndex) const {
return _uniformTables[tableIndex];
@ -124,26 +126,35 @@ public:
}
struct Match {
/// Constructor
Match(int numVertexElements, int numVaryingElements)
: vdesc(numVertexElements, numVaryingElements) {
Match(OsdVertexBufferDescriptor const &vertex,
OsdVertexBufferDescriptor const &varying,
bool interleaved)
: vertexDesc(vertex), varyingDesc(varying), interleaved(interleaved) {
}
bool operator() (OsdGLSLTransformFeedbackKernelBundle const *kernel) {
return vdesc == kernel->_vdesc;
// offset is dynamic. just comparing length and stride here,
// returns true if they are equal
return (vertexDesc.length == kernel->_numVertexElements and
vertexDesc.stride == kernel->_vertexStride and
varyingDesc.length == kernel->_numVaryingElements and
varyingDesc.stride == kernel->_varyingStride and
interleaved == kernel->_interleaved);
}
OsdVertexDescriptor vdesc;
OsdVertexBufferDescriptor vertexDesc;
OsdVertexBufferDescriptor varyingDesc;
bool interleaved;
};
friend struct Match;
protected:
void transformGpuBufferData(
GLuint vertexBuffer, int numVertexElements,
GLuint varyingBuffer, int numVaryingElements,
int vertexOffset, int tableOffset, int start, int end) const;
GLuint vertexBuffer, GLuint varyingBuffer,
int vertexOffset, int varyingOffset,
int offset, int tableOffset, int start, int end) const;
GLuint _program;
@ -153,6 +164,8 @@ protected:
GLint _uniformVertexOffset;
GLint _uniformTableOffset;
GLint _uniformIndexStart;
GLint _uniformVertexBaseOffset;
GLint _uniformVaryingBaseOffset;
GLint _uniformVertexBuffer;
GLint _uniformVaryingBuffer;
@ -182,7 +195,14 @@ protected:
GLuint _subEditAdd; // hedit kernel (add)
OsdVertexDescriptor _vdesc;
// kernelbundle discriminators
int _numVertexElements;
int _vertexStride;
int _numVaryingElements;
int _varyingStride;
int _vertexOffsetMod;
int _varyingOffsetMod;
bool _interleaved;
};
} // end namespace OPENSUBDIV_VERSION

View File

@ -32,6 +32,8 @@ cbuffer KernelCB : register( b0 ) {
int tableOffset; // offset of subdivision table
int indexStart; // start index relative to tableOffset
int indexEnd; // end index relative to tableOffset
int vertexBaseOffset; // base vbo offset of the vertex buffer
int varyingBaseOffset; // base vbo offset of the varying buffer
bool vertexPass;
// vertex edit kernel
@ -91,13 +93,15 @@ Vertex readVertex(int index)
Vertex v;
#if NUM_VERTEX_ELEMENTS > 0
int vertexIndex = index * VERTEX_STRIDE + vertexBaseOffset;
for (int i = 0; i < NUM_VERTEX_ELEMENTS; i++) {
v.vertexData[i] = vertexBuffer[index*NUM_VERTEX_ELEMENTS+i];
v.vertexData[i] = vertexBuffer[vertexIndex + i];
}
#endif
#if NUM_VARYING_ELEMENTS > 0
int varyingIndex = index * VARYING_STRIDE + varyingBaseOffset;
for (int i = 0; i < NUM_VARYING_ELEMENTS; i++) {
v.varyingData[i] = varyingBuffer[index*NUM_VARYING_ELEMENTS+i];
v.varyingData[i] = varyingBuffer[varyingIndex + i];
}
#endif
return v;
@ -106,13 +110,15 @@ Vertex readVertex(int index)
void writeVertex(int index, Vertex v)
{
#if NUM_VERTEX_ELEMENTS > 0
int vertexIndex = index * VERTEX_STRIDE + vertexBaseOffset;
for (int i = 0; i < NUM_VERTEX_ELEMENTS; i++) {
vertexBuffer[index*NUM_VERTEX_ELEMENTS+i] = v.vertexData[i];
vertexBuffer[vertexIndex + i] = v.vertexData[i];
}
#endif
#if NUM_VARYING_ELEMENTS > 0
int varyingIndex = index * VARYING_STRIDE + varyingBaseOffset;
for (int i = 0; i < NUM_VARYING_ELEMENTS; i++) {
varyingBuffer[index*NUM_VARYING_ELEMENTS+i] = v.varyingData[i];
varyingBuffer[varyingIndex + i] = v.varyingData[i];
}
#endif
}

View File

@ -33,6 +33,7 @@
#include "../hbr/mesh.h"
#include "../osd/vertex.h"
#include "../osd/vertexDescriptor.h"
#include <bitset>
@ -68,6 +69,10 @@ public:
virtual void Refine() = 0;
virtual void Refine(OsdVertexBufferDescriptor const *vertexDesc,
OsdVertexBufferDescriptor const *varyingDesc,
bool interleaved) = 0;
virtual void Synchronize() = 0;
virtual DrawContext * GetDrawContext() = 0;
@ -158,6 +163,13 @@ public:
virtual void Refine() {
_computeController->Refine(_computeContext, _farMesh->GetKernelBatches(), _vertexBuffer, _varyingBuffer);
}
virtual void Refine(OsdVertexBufferDescriptor const *vertexDesc,
OsdVertexBufferDescriptor const *varyingDesc) {
_computeController->Refine(_computeContext, _farMesh->GetKernelBatches(),
_vertexBuffer, _varyingBuffer,
vertexDesc, varyingDesc);
}
virtual void Synchronize() {
_computeController->Synchronize();
}

View File

@ -34,8 +34,7 @@ namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
OsdOmpComputeController::OsdOmpComputeController(int numThreads) :
_currentVertexBuffer(NULL), _currentVaryingBuffer(NULL) {
OsdOmpComputeController::OsdOmpComputeController(int numThreads) {
_numThreads = (numThreads == -1) ? omp_get_max_threads() : numThreads;
}
@ -48,7 +47,8 @@ OsdOmpComputeController::ApplyBilinearFaceVerticesKernel(
assert(context);
OsdOmpComputeFace(
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
(const int*)context->GetTable(FarSubdivisionTables::F_IT)->GetBuffer(),
(const int*)context->GetTable(FarSubdivisionTables::F_ITa)->GetBuffer(),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
@ -61,7 +61,8 @@ OsdOmpComputeController::ApplyBilinearEdgeVerticesKernel(
assert(context);
OsdOmpComputeBilinearEdge(
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
(const int*)context->GetTable(FarSubdivisionTables::E_IT)->GetBuffer(),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
}
@ -73,7 +74,8 @@ OsdOmpComputeController::ApplyBilinearVertexVerticesKernel(
assert(context);
OsdOmpComputeBilinearVertex(
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
}
@ -85,7 +87,8 @@ OsdOmpComputeController::ApplyCatmarkFaceVerticesKernel(
assert(context);
OsdOmpComputeFace(
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
(const int*)context->GetTable(FarSubdivisionTables::F_IT)->GetBuffer(),
(const int*)context->GetTable(FarSubdivisionTables::F_ITa)->GetBuffer(),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
@ -98,7 +101,8 @@ OsdOmpComputeController::ApplyCatmarkEdgeVerticesKernel(
assert(context);
OsdOmpComputeEdge(
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
(const int*)context->GetTable(FarSubdivisionTables::E_IT)->GetBuffer(),
(const float*)context->GetTable(FarSubdivisionTables::E_W)->GetBuffer(),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
@ -111,7 +115,8 @@ OsdOmpComputeController::ApplyCatmarkVertexVerticesKernelB(
assert(context);
OsdOmpComputeVertexB(
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
(const int*)context->GetTable(FarSubdivisionTables::V_IT)->GetBuffer(),
(const float*)context->GetTable(FarSubdivisionTables::V_W)->GetBuffer(),
@ -125,7 +130,8 @@ OsdOmpComputeController::ApplyCatmarkVertexVerticesKernelA1(
assert(context);
OsdOmpComputeVertexA(
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
(const float*)context->GetTable(FarSubdivisionTables::V_W)->GetBuffer(),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(), false);
@ -138,7 +144,8 @@ OsdOmpComputeController::ApplyCatmarkVertexVerticesKernelA2(
assert(context);
OsdOmpComputeVertexA(
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
(const float*)context->GetTable(FarSubdivisionTables::V_W)->GetBuffer(),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(), true);
@ -151,7 +158,8 @@ OsdOmpComputeController::ApplyLoopEdgeVerticesKernel(
assert(context);
OsdOmpComputeEdge(
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
(const int*)context->GetTable(FarSubdivisionTables::E_IT)->GetBuffer(),
(const float*)context->GetTable(FarSubdivisionTables::E_W)->GetBuffer(),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
@ -164,7 +172,8 @@ OsdOmpComputeController::ApplyLoopVertexVerticesKernelB(
assert(context);
OsdOmpComputeLoopVertexB(
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
(const int*)context->GetTable(FarSubdivisionTables::V_IT)->GetBuffer(),
(const float*)context->GetTable(FarSubdivisionTables::V_W)->GetBuffer(),
@ -178,7 +187,8 @@ OsdOmpComputeController::ApplyLoopVertexVerticesKernelA1(
assert(context);
OsdOmpComputeVertexA(
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
(const float*)context->GetTable(FarSubdivisionTables::V_W)->GetBuffer(),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(), false);
@ -191,7 +201,8 @@ OsdOmpComputeController::ApplyLoopVertexVerticesKernelA2(
assert(context);
OsdOmpComputeVertexA(
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
(const float*)context->GetTable(FarSubdivisionTables::V_W)->GetBuffer(),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(), true);
@ -210,8 +221,8 @@ OsdOmpComputeController::ApplyVertexEdits(
const OsdCpuTable * editValues = edit->GetEditValues();
if (edit->GetOperation() == FarVertexEdit::Add) {
OsdOmpEditVertexAdd(_vdesc,
_currentVertexBuffer,
OsdOmpEditVertexAdd(_currentBindState.vertexBuffer,
_currentBindState.vertexDesc,
edit->GetPrimvarOffset(),
edit->GetPrimvarWidth(),
batch.GetVertexOffset(),
@ -221,8 +232,8 @@ OsdOmpComputeController::ApplyVertexEdits(
static_cast<unsigned int*>(primvarIndices->GetBuffer()),
static_cast<float*>(editValues->GetBuffer()));
} else if (edit->GetOperation() == FarVertexEdit::Set) {
OsdOmpEditVertexSet(_vdesc,
_currentVertexBuffer,
OsdOmpEditVertexSet(_currentBindState.vertexBuffer,
_currentBindState.vertexDesc,
edit->GetPrimvarOffset(),
edit->GetPrimvarWidth(),
batch.GetVertexOffset(),

View File

@ -29,6 +29,7 @@
#include "../far/dispatcher.h"
#include "../osd/cpuComputeContext.h"
#include "../osd/vertexDescriptor.h"
#ifdef OPENSUBDIV_HAS_OPENMP
#include <omp.h>
@ -69,17 +70,27 @@ public:
///
/// @param varyingBuffer varying-interpolated data buffer
///
/// @param vertexDesc the descriptor of vertex elements to be refined.
/// if it's null, all primvars in the vertex buffer
/// will be refined.
///
/// @param varyingDesc the descriptor of varying elements to be refined.
/// if it's null, all primvars in the varying buffer
/// will be refined.
///
template<class VERTEX_BUFFER, class VARYING_BUFFER>
void Refine(OsdCpuComputeContext const *context,
FarKernelBatchVector const & batches,
VERTEX_BUFFER * vertexBuffer,
VARYING_BUFFER * varyingBuffer) {
VARYING_BUFFER * varyingBuffer,
OsdVertexBufferDescriptor const *vertexDesc=NULL,
OsdVertexBufferDescriptor const *varyingDesc=NULL) {
if (batches.empty()) return;
omp_set_num_threads(_numThreads);
bind(vertexBuffer, varyingBuffer);
bind(vertexBuffer, varyingBuffer, vertexDesc, varyingDesc);
FarDispatcher::Refine(this, context, batches, /*maxlevel*/-1);
@ -137,24 +148,60 @@ protected:
void ApplyVertexEdits(FarKernelBatch const &batch, ComputeContext const *context) const;
template<class VERTEX_BUFFER, class VARYING_BUFFER>
void bind(VERTEX_BUFFER *vertex, VARYING_BUFFER *varying) {
void bind(VERTEX_BUFFER *vertex, VARYING_BUFFER *varying,
OsdVertexBufferDescriptor const *vertexDesc,
OsdVertexBufferDescriptor const *varyingDesc) {
_currentVertexBuffer = vertex ? vertex->BindCpuBuffer() : 0;
_currentVaryingBuffer = varying ? varying->BindCpuBuffer() : 0;
// if the vertex buffer descriptor is specified, use it.
// otherwise, assumes the data is tightly packed in the vertex buffer.
if (vertexDesc) {
_currentBindState.vertexDesc = *vertexDesc;
} else {
int numElements = vertex ? vertex->GetNumElements() : 0;
_currentBindState.vertexDesc = OsdVertexBufferDescriptor(
0, numElements, numElements);
}
if (varyingDesc) {
_currentBindState.varyingDesc = *varyingDesc;
} else {
int numElements = varying ? varying->GetNumElements() : 0;
_currentBindState.varyingDesc = OsdVertexBufferDescriptor(
0, numElements, numElements);
}
int numVertexElements = vertex ? vertex->GetNumElements() : 0;
int numVaryingElements = varying ? varying->GetNumElements() : 0;
_vdesc.Set(numVertexElements, numVaryingElements);
// apply vertex offset here
if (vertex) {
_currentBindState.vertexBuffer =
vertex->BindCpuBuffer() + _currentBindState.vertexDesc.offset;
} else {
_currentBindState.vertexBuffer = NULL;
}
if (varying) {
_currentBindState.varyingBuffer =
varying->BindCpuBuffer() + _currentBindState.varyingDesc.offset;
} else {
_currentBindState.varyingBuffer = NULL;
}
}
void unbind() {
_currentVertexBuffer = 0;
_currentVaryingBuffer = 0;
_vdesc.Reset();
_currentBindState.Reset();
}
private:
float *_currentVertexBuffer, *_currentVaryingBuffer;
OsdVertexDescriptor _vdesc;
struct BindState {
BindState() : vertexBuffer(NULL), varyingBuffer(NULL) {}
void Reset() {
vertexBuffer = varyingBuffer = NULL;
vertexDesc.Reset();
varyingDesc.Reset();
}
float *vertexBuffer;
float *varyingBuffer;
OsdVertexBufferDescriptor vertexDesc;
OsdVertexBufferDescriptor varyingDesc;
};
BindState _currentBindState;
int _numThreads;
};

View File

@ -25,40 +25,94 @@
#include "../osd/ompKernel.h"
#include "../osd/vertexDescriptor.h"
#include <math.h>
#include <algorithm>
#include <cmath>
#include <cstdlib>
#include <omp.h>
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
static inline void
clear(float *dst, OsdVertexBufferDescriptor const &desc) {
if (dst) {
memset(dst, 0, desc.length*sizeof(float));
}
}
static inline void
addWithWeight(float *dst, const float *srcOrigin, int srcIndex, float weight,
OsdVertexBufferDescriptor const &desc) {
if (srcOrigin && dst) {
const float *src = srcOrigin + srcIndex * desc.stride;
for (int k = 0; k < desc.length; ++k) {
dst[k] += src[k] * weight;
}
}
}
static inline void
copy(float *dstOrigin, const float *src, int dstIndex,
OsdVertexBufferDescriptor const &desc) {
if (dstOrigin && src) {
float *dst = dstOrigin + dstIndex * desc.stride;
memcpy(dst, src, desc.length*sizeof(float));
}
}
void OsdOmpComputeFace(
OsdVertexDescriptor const &vdesc, float * vertex, float * varying,
float * vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *F_IT, const int *F_ITa, int offset, int tableOffset, int start, int end) {
int numThreads = omp_get_max_threads();
float *vertexResultsArray = (float*)alloca(vertexDesc.length * sizeof(float) * numThreads);
float *varyingResultsArray = (float*)alloca(varyingDesc.length * sizeof(float) * numThreads);
#pragma omp parallel for
for (int i = start + tableOffset; i < end + tableOffset; i++) {
int h = F_ITa[2*i];
int n = F_ITa[2*i+1];
float weight = 1.0f/n;
// XXX: should use local vertex struct variable instead of
// accumulating directly into global memory.
int dstIndex = offset + i - tableOffset;
vdesc.Clear(vertex, varying, dstIndex);
int threadId = omp_get_thread_num();
float *vertexResults = vertexResultsArray +
vertexDesc.length * threadId;
float *varyingResults = varyingResultsArray +
varyingDesc.length * threadId;
// clear
clear(vertexResults, vertexDesc);
clear(varyingResults, varyingDesc);
for (int j = 0; j < n; ++j) {
int index = F_IT[h+j];
vdesc.AddWithWeight(vertex, dstIndex, index, weight);
vdesc.AddVaryingWithWeight(varying, dstIndex, index, weight);
addWithWeight(vertexResults, vertex, index, weight, vertexDesc);
addWithWeight(varyingResults, varying, index, weight, varyingDesc);
}
// write results
copy(vertex, vertexResults, dstIndex, vertexDesc);
copy(varying, varyingResults, dstIndex, varyingDesc);
}
}
void OsdOmpComputeEdge(
OsdVertexDescriptor const &vdesc, float *vertex, float *varying,
float * vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *E_IT, const float *E_W, int offset, int tableOffset, int start, int end) {
int numThreads = omp_get_max_threads();
float *vertexResultsArray = (float*)alloca(vertexDesc.length * sizeof(float) * numThreads);
float *varyingResultsArray = (float*)alloca(varyingDesc.length * sizeof(float) * numThreads);
#pragma omp parallel for
for (int i = start + tableOffset; i < end + tableOffset; i++) {
int eidx0 = E_IT[4*i+0];
@ -67,30 +121,47 @@ void OsdOmpComputeEdge(
int eidx3 = E_IT[4*i+3];
float vertWeight = E_W[i*2+0];
int dstIndex = offset + i - tableOffset;
vdesc.Clear(vertex, varying, dstIndex);
vdesc.AddWithWeight(vertex, dstIndex, eidx0, vertWeight);
vdesc.AddWithWeight(vertex, dstIndex, eidx1, vertWeight);
int threadId = omp_get_thread_num();
float *vertexResults = vertexResultsArray +
vertexDesc.length * threadId;
float *varyingResults = varyingResultsArray +
varyingDesc.length * threadId;
// clear
clear(vertexResults, vertexDesc);
clear(varyingResults, varyingDesc);
addWithWeight(vertexResults, vertex, eidx0, vertWeight, vertexDesc);
addWithWeight(vertexResults, vertex, eidx1, vertWeight, vertexDesc);
if (eidx2 != -1) {
float faceWeight = E_W[i*2+1];
vdesc.AddWithWeight(vertex, dstIndex, eidx2, faceWeight);
vdesc.AddWithWeight(vertex, dstIndex, eidx3, faceWeight);
addWithWeight(vertexResults, vertex, eidx2, faceWeight, vertexDesc);
addWithWeight(vertexResults, vertex, eidx3, faceWeight, vertexDesc);
}
vdesc.AddVaryingWithWeight(varying, dstIndex, eidx0, 0.5f);
vdesc.AddVaryingWithWeight(varying, dstIndex, eidx1, 0.5f);
addWithWeight(varyingResults, varying, eidx0, 0.5f, varyingDesc);
addWithWeight(varyingResults, varying, eidx1, 0.5f, varyingDesc);
copy(vertex, vertexResults, dstIndex, vertexDesc);
copy(varying, varyingResults, dstIndex, varyingDesc);
}
}
void OsdOmpComputeVertexA(
OsdVertexDescriptor const &vdesc, float *vertex, float *varying,
float * vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *V_ITa, const float *V_W,
int offset, int tableOffset, int start, int end, int pass) {
int numThreads = omp_get_max_threads();
float *vertexResultsArray = (float*)alloca(vertexDesc.length * sizeof(float) * numThreads);
float *varyingResultsArray = (float*)alloca(varyingDesc.length * sizeof(float) * numThreads);
#pragma omp parallel for
for (int i = start + tableOffset; i < end + tableOffset; i++) {
int n = V_ITa[5*i+1];
@ -107,27 +178,47 @@ void OsdOmpComputeVertexA(
weight = 1.0f - weight;
int dstIndex = offset + i - tableOffset;
if (not pass)
vdesc.Clear(vertex, varying, dstIndex);
if (eidx0 == -1 || (pass == 0 && (n == -1))) {
vdesc.AddWithWeight(vertex, dstIndex, p, weight);
} else {
vdesc.AddWithWeight(vertex, dstIndex, p, weight * 0.75f);
vdesc.AddWithWeight(vertex, dstIndex, eidx0, weight * 0.125f);
vdesc.AddWithWeight(vertex, dstIndex, eidx1, weight * 0.125f);
int threadId = omp_get_thread_num();
float *vertexResults = vertexResultsArray +
vertexDesc.length * threadId;
float *varyingResults = varyingResultsArray +
varyingDesc.length * threadId;
clear(vertexResults, vertexDesc);
clear(varyingResults, varyingDesc);
if (pass) {
// copy previous results
addWithWeight(vertexResults, vertex, dstIndex, 1.0f, vertexDesc);
}
if (not pass)
vdesc.AddVaryingWithWeight(varying, dstIndex, p, 1.0f);
if (eidx0 == -1 || (pass == 0 && (n == -1))) {
addWithWeight(vertexResults, vertex, p, weight, vertexDesc);
} else {
addWithWeight(vertexResults, vertex, p, weight * 0.75f, vertexDesc);
addWithWeight(vertexResults, vertex, eidx0, weight * 0.125f, vertexDesc);
addWithWeight(vertexResults, vertex, eidx1, weight * 0.125f, vertexDesc);
}
copy(vertex, vertexResults, dstIndex, vertexDesc);
if (not pass) {
addWithWeight(varyingResults, varying, p, 1.0f, varyingDesc);
copy(varying, varyingResults, dstIndex, varyingDesc);
}
}
}
void OsdOmpComputeVertexB(
OsdVertexDescriptor const &vdesc, float *vertex, float *varying,
float * vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *V_ITa, const int *V_IT, const float *V_W,
int offset, int tableOffset, int start, int end) {
int numThreads = omp_get_max_threads();
float *vertexResultsArray = (float*)alloca(vertexDesc.length * sizeof(float) * numThreads);
float *varyingResultsArray = (float*)alloca(varyingDesc.length * sizeof(float) * numThreads);
#pragma omp parallel for
for (int i = start + tableOffset; i < end + tableOffset; i++) {
int h = V_ITa[5*i];
@ -139,23 +230,40 @@ void OsdOmpComputeVertexB(
float wv = (n-2.0f) * n * wp;
int dstIndex = offset + i - tableOffset;
vdesc.Clear(vertex, varying, dstIndex);
vdesc.AddWithWeight(vertex, dstIndex, p, weight * wv);
int threadId = omp_get_thread_num();
float *vertexResults = vertexResultsArray +
vertexDesc.length * threadId;
float *varyingResults = varyingResultsArray +
varyingDesc.length * threadId;
clear(vertexResults, vertexDesc);
clear(varyingResults, varyingDesc);
addWithWeight(vertexResults, vertex, p, weight * wv, vertexDesc);
for (int j = 0; j < n; ++j) {
vdesc.AddWithWeight(vertex, dstIndex, V_IT[h+j*2], weight * wp);
vdesc.AddWithWeight(vertex, dstIndex, V_IT[h+j*2+1], weight * wp);
addWithWeight(vertexResults, vertex, V_IT[h+j*2], weight * wp, vertexDesc);
addWithWeight(vertexResults, vertex, V_IT[h+j*2+1], weight * wp, vertexDesc);
}
vdesc.AddVaryingWithWeight(varying, dstIndex, p, 1.0f);
addWithWeight(varyingResults, varying, p, 1.0f, varyingDesc);
copy(vertex, vertexResults, dstIndex, vertexDesc);
copy(varying, varyingResults, dstIndex, varyingDesc);
}
}
void OsdOmpComputeLoopVertexB(
OsdVertexDescriptor const &vdesc, float *vertex, float *varying,
float * vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *V_ITa, const int *V_IT, const float *V_W,
int vertexOffset, int tableOffset, int start, int end) {
int numThreads = omp_get_max_threads();
float *vertexResultsArray = (float*)alloca(vertexDesc.length * sizeof(float) * numThreads);
float *varyingResultsArray = (float*)alloca(varyingDesc.length * sizeof(float) * numThreads);
#pragma omp parallel for
for (int i = start + tableOffset; i < end + tableOffset; i++) {
int h = V_ITa[5*i];
@ -169,82 +277,137 @@ void OsdOmpComputeLoopVertexB(
beta = (0.625f - beta) * wp;
int dstIndex = i + vertexOffset - tableOffset;
vdesc.Clear(vertex, varying, dstIndex);
vdesc.AddWithWeight(vertex, dstIndex, p, weight * (1.0f - (beta * n)));
int threadId = omp_get_thread_num();
float *vertexResults = vertexResultsArray +
vertexDesc.length * threadId;
float *varyingResults = varyingResultsArray +
varyingDesc.length * threadId;
clear(vertexResults, vertexDesc);
clear(varyingResults, varyingDesc);
addWithWeight(vertexResults, vertex, p, weight * (1.0f - (beta * n)), vertexDesc);
for (int j = 0; j < n; ++j)
vdesc.AddWithWeight(vertex, dstIndex, V_IT[h+j], weight * beta);
addWithWeight(vertexResults, vertex, V_IT[h+j], weight * beta, vertexDesc);
vdesc.AddVaryingWithWeight(varying, dstIndex, p, 1.0f);
addWithWeight(varyingResults, varying, p, 1.0f, varyingDesc);
copy(vertex, vertexResults, dstIndex, vertexDesc);
copy(varying, varyingResults, dstIndex, varyingDesc);
}
}
void OsdOmpComputeBilinearEdge(
OsdVertexDescriptor const &vdesc, float *vertex, float *varying,
float * vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *E_IT, int vertexOffset, int tableOffset, int start, int end) {
int numThreads = omp_get_max_threads();
float *vertexResultsArray = (float*)alloca(vertexDesc.length * sizeof(float) * numThreads);
float *varyingResultsArray = (float*)alloca(varyingDesc.length * sizeof(float) * numThreads);
#pragma omp parallel for
for (int i = start + tableOffset; i < end + tableOffset; i++) {
int eidx0 = E_IT[2*i+0];
int eidx1 = E_IT[2*i+1];
int dstIndex = i + vertexOffset - tableOffset;
vdesc.Clear(vertex, varying, dstIndex);
vdesc.AddWithWeight(vertex, dstIndex, eidx0, 0.5f);
vdesc.AddWithWeight(vertex, dstIndex, eidx1, 0.5f);
int threadId = omp_get_thread_num();
float *vertexResults = vertexResultsArray +
vertexDesc.length * threadId;
float *varyingResults = varyingResultsArray +
varyingDesc.length * threadId;
vdesc.AddVaryingWithWeight(varying, dstIndex, eidx0, 0.5f);
vdesc.AddVaryingWithWeight(varying, dstIndex, eidx1, 0.5f);
clear(vertexResults, vertexDesc);
clear(varyingResults, varyingDesc);
addWithWeight(vertexResults, vertex, eidx0, 0.5f, vertexDesc);
addWithWeight(vertexResults, vertex, eidx1, 0.5f, vertexDesc);
addWithWeight(varyingResults, varying, eidx0, 0.5f, varyingDesc);
addWithWeight(varyingResults, varying, eidx1, 0.5f, varyingDesc);
copy(vertex, vertexResults, dstIndex, vertexDesc);
copy(varying, varyingResults, dstIndex, varyingDesc);
}
}
void OsdOmpComputeBilinearVertex(
OsdVertexDescriptor const &vdesc, float *vertex, float *varying,
float * vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *V_ITa, int vertexOffset, int tableOffset, int start, int end) {
int numThreads = omp_get_max_threads();
float *vertexResultsArray = (float*)alloca(vertexDesc.length * sizeof(float) * numThreads);
float *varyingResultsArray = (float*)alloca(varyingDesc.length * sizeof(float) * numThreads);
#pragma omp parallel for
for (int i = start + tableOffset; i < end + tableOffset; i++) {
int p = V_ITa[i];
int dstIndex = i + vertexOffset - tableOffset;
vdesc.Clear(vertex, varying, dstIndex);
vdesc.AddWithWeight(vertex, dstIndex, p, 1.0f);
vdesc.AddVaryingWithWeight(varying, dstIndex, p, 1.0f);
int threadId = omp_get_thread_num();
float *vertexResults = vertexResultsArray +
vertexDesc.length * threadId;
float *varyingResults = varyingResultsArray +
varyingDesc.length * threadId;
clear(vertexResults, vertexDesc);
clear(varyingResults, varyingDesc);
addWithWeight(vertexResults, vertex, p, 1.0f, vertexDesc);
addWithWeight(varyingResults, varying, p, 1.0f, varyingDesc);
copy(vertex, vertexResults, dstIndex, vertexDesc);
copy(varying, varyingResults, dstIndex, varyingDesc);
}
}
void OsdOmpEditVertexAdd(
OsdVertexDescriptor const &vdesc, float *vertex,
float * vertex,
OsdVertexBufferDescriptor const &vertexDesc,
int primVarOffset, int primVarWidth, int vertexOffset, int tableOffset,
int start, int end,
const unsigned int *editIndices, const float *editValues) {
#pragma omp parallel for
for (int i = start+tableOffset; i < end+tableOffset; i++) {
vdesc.ApplyVertexEditAdd(vertex,
primVarOffset,
primVarWidth,
editIndices[i] + vertexOffset,
&editValues[i*primVarWidth]);
if (vertex) {
int editIndex = editIndices[i] + vertexOffset;
float *dst = vertex + editIndex * vertexDesc.stride + primVarOffset;
for (int i = 0; i < primVarWidth; ++i) {
dst[i] += editValues[i];
}
}
}
}
void OsdOmpEditVertexSet(
OsdVertexDescriptor const &vdesc, float *vertex,
float * vertex,
OsdVertexBufferDescriptor const &vertexDesc,
int primVarOffset, int primVarWidth, int vertexOffset, int tableOffset,
int start, int end,
const unsigned int *editIndices, const float *editValues) {
#pragma omp parallel for
for (int i = start+tableOffset; i < end+tableOffset; i++) {
vdesc.ApplyVertexEditSet(vertex,
primVarOffset,
primVarWidth,
editIndices[i] + vertexOffset,
&editValues[i*primVarWidth]);
if (vertex) {
int editIndex = editIndices[i] + vertexOffset;
float *dst = vertex + editIndex * vertexDesc.stride + primVarOffset;
for (int i = 0; i < primVarWidth; ++i) {
dst[i] = editValues[i];
}
}
}
}

View File

@ -26,63 +26,73 @@
#define OSD_OMP_KERNEL_H
#include "../version.h"
#include "../osd/vertexDescriptor.h"
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
struct OsdVertexDescriptor;
void OsdOmpComputeFace(OsdVertexDescriptor const &vdesc,
float * vertex, float * varying,
void OsdOmpComputeFace(float * vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *F_IT, const int *F_ITa,
int vertexOffset, int tableOffset,
int start, int end);
void OsdOmpComputeEdge(OsdVertexDescriptor const &vdesc,
float *vertex, float * varying,
void OsdOmpComputeEdge(float *vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *E_IT, const float *E_ITa,
int vertexOffset, int tableOffset,
int start, int end);
void OsdOmpComputeVertexA(OsdVertexDescriptor const &vdesc,
float *vertex, float * varying,
void OsdOmpComputeVertexA(float *vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *V_ITa, const float *V_IT,
int vertexOffset, int tableOffset,
int start, int end, int pass);
void OsdOmpComputeVertexB(OsdVertexDescriptor const &vdesc,
float *vertex, float * varying,
void OsdOmpComputeVertexB(float *vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *V_ITa, const int *V_IT, const float *V_W,
int vertexOffset, int tableOffset,
int start, int end);
void OsdOmpComputeLoopVertexB(OsdVertexDescriptor const &vdesc,
float *vertex, float * varying,
void OsdOmpComputeLoopVertexB(float *vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *V_ITa, const int *V_IT,
const float *V_W,
int vertexOffset, int tableOffset,
int start, int end);
void OsdOmpComputeBilinearEdge(OsdVertexDescriptor const &vdesc,
float *vertex, float * varying,
void OsdOmpComputeBilinearEdge(float *vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *E_IT,
int vertexOffset, int tableOffset,
int start, int end);
void OsdOmpComputeBilinearVertex(OsdVertexDescriptor const &vdesc,
float *vertex, float * varying,
void OsdOmpComputeBilinearVertex(float *vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
const int *V_ITa,
int vertexOffset, int tableOffset,
int start, int end);
void OsdOmpEditVertexAdd(OsdVertexDescriptor const &vdesc, float *vertex,
void OsdOmpEditVertexAdd(float *vertex,
OsdVertexBufferDescriptor const &vertexDesc,
int primVarOffset, int primVarWidth,
int vertexOffset, int tableOffset,
int start, int end,
const unsigned int *editIndices,
const float *editValues);
void OsdOmpEditVertexSet(OsdVertexDescriptor const &vdesc, float *vertex,
void OsdOmpEditVertexSet(float *vertex,
OsdVertexBufferDescriptor const &vertexDesc,
int primVarOffset, int primVarWidth,
int vertexOffset, int tableOffset,
int start, int end,

View File

@ -37,9 +37,7 @@ namespace OPENSUBDIV_VERSION {
OsdTbbComputeController::OsdTbbComputeController(int numThreads)
: _currentVertexBuffer(NULL),
_currentVaryingBuffer(NULL),
_numThreads(numThreads) {
: _numThreads(numThreads) {
if(_numThreads == -1)
tbb::task_scheduler_init init;
@ -55,7 +53,8 @@ OsdTbbComputeController::ApplyBilinearFaceVerticesKernel(
assert(context);
OsdTbbComputeFace(
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
(const int*)context->GetTable(FarSubdivisionTables::F_IT)->GetBuffer(),
(const int*)context->GetTable(FarSubdivisionTables::F_ITa)->GetBuffer(),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
@ -68,7 +67,8 @@ OsdTbbComputeController::ApplyBilinearEdgeVerticesKernel(
assert(context);
OsdTbbComputeBilinearEdge(
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
(const int*)context->GetTable(FarSubdivisionTables::E_IT)->GetBuffer(),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
}
@ -80,7 +80,8 @@ OsdTbbComputeController::ApplyBilinearVertexVerticesKernel(
assert(context);
OsdTbbComputeBilinearVertex(
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
}
@ -92,7 +93,8 @@ OsdTbbComputeController::ApplyCatmarkFaceVerticesKernel(
assert(context);
OsdTbbComputeFace(
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
(const int*)context->GetTable(FarSubdivisionTables::F_IT)->GetBuffer(),
(const int*)context->GetTable(FarSubdivisionTables::F_ITa)->GetBuffer(),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
@ -105,7 +107,8 @@ OsdTbbComputeController::ApplyCatmarkEdgeVerticesKernel(
assert(context);
OsdTbbComputeEdge(
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
(const int*)context->GetTable(FarSubdivisionTables::E_IT)->GetBuffer(),
(const float*)context->GetTable(FarSubdivisionTables::E_W)->GetBuffer(),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
@ -118,7 +121,8 @@ OsdTbbComputeController::ApplyCatmarkVertexVerticesKernelB(
assert(context);
OsdTbbComputeVertexB(
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
(const int*)context->GetTable(FarSubdivisionTables::V_IT)->GetBuffer(),
(const float*)context->GetTable(FarSubdivisionTables::V_W)->GetBuffer(),
@ -132,7 +136,8 @@ OsdTbbComputeController::ApplyCatmarkVertexVerticesKernelA1(
assert(context);
OsdTbbComputeVertexA(
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
(const float*)context->GetTable(FarSubdivisionTables::V_W)->GetBuffer(),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(), false);
@ -145,7 +150,8 @@ OsdTbbComputeController::ApplyCatmarkVertexVerticesKernelA2(
assert(context);
OsdTbbComputeVertexA(
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
(const float*)context->GetTable(FarSubdivisionTables::V_W)->GetBuffer(),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(), true);
@ -158,7 +164,8 @@ OsdTbbComputeController::ApplyLoopEdgeVerticesKernel(
assert(context);
OsdTbbComputeEdge(
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
(const int*)context->GetTable(FarSubdivisionTables::E_IT)->GetBuffer(),
(const float*)context->GetTable(FarSubdivisionTables::E_W)->GetBuffer(),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
@ -171,7 +178,8 @@ OsdTbbComputeController::ApplyLoopVertexVerticesKernelB(
assert(context);
OsdTbbComputeLoopVertexB(
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
(const int*)context->GetTable(FarSubdivisionTables::V_IT)->GetBuffer(),
(const float*)context->GetTable(FarSubdivisionTables::V_W)->GetBuffer(),
@ -185,7 +193,8 @@ OsdTbbComputeController::ApplyLoopVertexVerticesKernelA1(
assert(context);
OsdTbbComputeVertexA(
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
(const float*)context->GetTable(FarSubdivisionTables::V_W)->GetBuffer(),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(), false);
@ -198,7 +207,8 @@ OsdTbbComputeController::ApplyLoopVertexVerticesKernelA2(
assert(context);
OsdTbbComputeVertexA(
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
(const float*)context->GetTable(FarSubdivisionTables::V_W)->GetBuffer(),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(), true);
@ -217,24 +227,24 @@ OsdTbbComputeController::ApplyVertexEdits(
const OsdCpuTable * editValues = edit->GetEditValues();
if (edit->GetOperation() == FarVertexEdit::Add) {
OsdTbbEditVertexAdd(_vdesc,
_currentVertexBuffer,
OsdTbbEditVertexAdd(_currentBindState.vertexBuffer,
_currentBindState.vertexDesc,
edit->GetPrimvarOffset(),
edit->GetPrimvarWidth(),
batch.GetVertexOffset(),
batch.GetTableOffset(),
batch.GetStart(),
batch.GetVertexOffset(),
batch.GetTableOffset(),
batch.GetStart(),
batch.GetEnd(),
static_cast<unsigned int*>(primvarIndices->GetBuffer()),
static_cast<float*>(editValues->GetBuffer()));
} else if (edit->GetOperation() == FarVertexEdit::Set) {
OsdTbbEditVertexSet(_vdesc,
_currentVertexBuffer,
OsdTbbEditVertexSet(_currentBindState.vertexBuffer,
_currentBindState.vertexDesc,
edit->GetPrimvarOffset(),
edit->GetPrimvarWidth(),
batch.GetVertexOffset(),
batch.GetTableOffset(),
batch.GetStart(),
batch.GetVertexOffset(),
batch.GetTableOffset(),
batch.GetStart(),
batch.GetEnd(),
static_cast<unsigned int*>(primvarIndices->GetBuffer()),
static_cast<float*>(editValues->GetBuffer()));

View File

@ -29,6 +29,7 @@
#include "../far/dispatcher.h"
#include "../osd/cpuComputeContext.h"
#include "../osd/vertexDescriptor.h"
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
@ -65,13 +66,23 @@ public:
///
/// @param varyingBuffer varying-interpolated data buffer
///
/// @param vertexDesc the descriptor of vertex elements to be refined.
/// if it's null, all primvars in the vertex buffer
/// will be refined.
///
/// @param varyingDesc the descriptor of varying elements to be refined.
/// if it's null, all primvars in the varying buffer
/// will be refined.
///
template<class VERTEX_BUFFER, class VARYING_BUFFER>
void Refine(OsdCpuComputeContext const *context,
FarKernelBatchVector const & batches,
VERTEX_BUFFER * vertexBuffer,
VARYING_BUFFER * varyingBuffer) {
VARYING_BUFFER * varyingBuffer,
OsdVertexBufferDescriptor const *vertexDesc=NULL,
OsdVertexBufferDescriptor const *varyingDesc=NULL) {
bind(vertexBuffer, varyingBuffer);
bind(vertexBuffer, varyingBuffer, vertexDesc, varyingDesc);
FarDispatcher::Refine(this, context, batches, /*maxlevel*/-1);
@ -128,25 +139,61 @@ protected:
void ApplyVertexEdits(FarKernelBatch const &batch, ComputeContext const *context) const;
private:
template<class VERTEX_BUFFER, class VARYING_BUFFER>
void bind(VERTEX_BUFFER *vertex, VARYING_BUFFER *varying) {
void bind(VERTEX_BUFFER *vertex, VARYING_BUFFER *varying,
OsdVertexBufferDescriptor const *vertexDesc,
OsdVertexBufferDescriptor const *varyingDesc) {
_currentVertexBuffer = vertex ? vertex->BindCpuBuffer() : 0;
_currentVaryingBuffer = varying ? varying->BindCpuBuffer() : 0;
// if the vertex buffer descriptor is specified, use it.
// otherwise, assumes the data is tightly packed in the vertex buffer.
if (vertexDesc) {
_currentBindState.vertexDesc = *vertexDesc;
} else {
int numElements = vertex ? vertex->GetNumElements() : 0;
_currentBindState.vertexDesc = OsdVertexBufferDescriptor(
0, numElements, numElements);
}
if (varyingDesc) {
_currentBindState.varyingDesc = *varyingDesc;
} else {
int numElements = varying ? varying->GetNumElements() : 0;
_currentBindState.varyingDesc = OsdVertexBufferDescriptor(
0, numElements, numElements);
}
int numVertexElements = vertex ? vertex->GetNumElements() : 0;
int numVaryingElements = varying ? varying->GetNumElements() : 0;
_vdesc.Set(numVertexElements, numVaryingElements);
// apply vertex offset here
if (vertex) {
_currentBindState.vertexBuffer =
vertex->BindCpuBuffer() + _currentBindState.vertexDesc.offset;
} else {
_currentBindState.vertexBuffer = NULL;
}
if (varying) {
_currentBindState.varyingBuffer =
varying->BindCpuBuffer() + _currentBindState.varyingDesc.offset;
} else {
_currentBindState.varyingBuffer = NULL;
}
}
void unbind() {
_currentVertexBuffer = 0;
_currentVaryingBuffer = 0;
_vdesc.Reset();
_currentBindState.Reset();
}
float *_currentVertexBuffer, *_currentVaryingBuffer;
OsdVertexDescriptor _vdesc;
private:
struct BindState {
BindState() : vertexBuffer(NULL), varyingBuffer(NULL) {}
void Reset() {
vertexBuffer = varyingBuffer = NULL;
vertexDesc.Reset();
varyingDesc.Reset();
}
float *vertexBuffer;
float *varyingBuffer;
OsdVertexBufferDescriptor vertexDesc;
OsdVertexBufferDescriptor varyingDesc;
};
BindState _currentBindState;
int _numThreads;
};

View File

@ -34,10 +34,33 @@ namespace OPENSUBDIV_VERSION {
#define grain_size 200
static inline void
clear(float *origin, int index, OsdVertexBufferDescriptor const &desc) {
if (origin) {
float *dst = origin + index * desc.stride;
memset(dst, 0, desc.length * sizeof(float));
}
}
static inline void
addWithWeight(float *origin, int dstIndex, int srcIndex,
float weight, OsdVertexBufferDescriptor const &desc) {
if (origin) {
const float *src = origin + srcIndex * desc.stride;
float *dst = origin + dstIndex * desc.stride;
for (int k = 0; k < desc.length; ++k) {
dst[k] += src[k] * weight;
}
}
}
class TBBFaceKernel {
OsdVertexDescriptor const *vdesc;
float *vertex;
float *varying;
OsdVertexBufferDescriptor vertexDesc;
OsdVertexBufferDescriptor varyingDesc;
int const *F_IT;
int const *F_ITa;
int vertexOffset;
@ -45,10 +68,10 @@ class TBBFaceKernel {
public:
void operator() (tbb::blocked_range<int> const &r) const {
if(vdesc->numVertexElements == 4 && varying == NULL) {
if(vertexDesc.length == 4 && varying == NULL) {
ComputeFaceKernel<4>
(vertex, F_IT, F_ITa, vertexOffset, tableOffset, r.begin(), r.end());
} else if(vdesc->numVertexElements == 8 && varying == NULL) {
} else if(vertexDesc.length == 8 && varying == NULL) {
ComputeFaceKernel<8>
(vertex, F_IT, F_ITa, vertexOffset, tableOffset, r.begin(), r.end());
}
@ -62,12 +85,14 @@ public:
// XXX: should use local vertex struct variable instead of
// accumulating directly into global memory.
int dstIndex = i + vertexOffset - tableOffset;
vdesc->Clear(vertex, varying, dstIndex);
clear(vertex, dstIndex, vertexDesc);
clear(varying, dstIndex, varyingDesc);
for (int j = 0; j < n; ++j) {
int index = F_IT[h+j];
vdesc->AddWithWeight(vertex, dstIndex, index, weight);
vdesc->AddVaryingWithWeight(varying, dstIndex, index, weight);
addWithWeight(vertex, dstIndex, index, weight, vertexDesc);
addWithWeight(varying, dstIndex, index, weight, varyingDesc);
}
}
}
@ -75,25 +100,28 @@ public:
TBBFaceKernel(TBBFaceKernel const &other)
{
this->vdesc = other.vdesc;
this->vertex = other.vertex;
this->varying= other.varying;
this->vertexDesc = other.vertexDesc;
this->varyingDesc = other.varyingDesc;
this->F_IT = other.F_IT;
this->F_ITa = other.F_ITa;
this->vertexOffset = other.vertexOffset;
this->tableOffset = other.tableOffset;
}
TBBFaceKernel(OsdVertexDescriptor const *vdesc_in,
float *vertex_in,
TBBFaceKernel(float *vertex_in,
float *varying_in,
OsdVertexBufferDescriptor const &vertexDesc_in,
OsdVertexBufferDescriptor const &varyingDesc_in,
int const *F_IT_in,
int const *F_ITa_in,
int vertexOffset_in,
int tableOffset_in) :
vdesc (vdesc_in),
vertex (vertex_in),
varying(varying_in),
vertexDesc(vertexDesc_in),
varyingDesc(varyingDesc_in),
F_IT (F_IT_in),
F_ITa (F_ITa_in),
vertexOffset(vertexOffset_in),
@ -102,20 +130,23 @@ public:
};
void OsdTbbComputeFace(
OsdVertexDescriptor const &vdesc, float * vertex, float * varying,
float * vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
int const *F_IT, int const *F_ITa, int vertexOffset, int tableOffset,
int start, int end) {
TBBFaceKernel kernel(&vdesc, vertex, varying, F_IT, F_ITa,
TBBFaceKernel kernel(vertex, varying, vertexDesc, varyingDesc, F_IT, F_ITa,
vertexOffset, tableOffset);
tbb::blocked_range<int> range(start, end, grain_size);
tbb::parallel_for(range, kernel);
}
class TBBEdgeKernel {
OsdVertexDescriptor const *vdesc;
float *vertex;
float *varying;
OsdVertexBufferDescriptor vertexDesc;
OsdVertexBufferDescriptor varyingDesc;
int const *E_IT;
float const *E_W;
int vertexOffset;
@ -123,11 +154,11 @@ class TBBEdgeKernel {
public:
void operator() (tbb::blocked_range<int> const &r) const {
if(vdesc->numVertexElements == 4 && varying == NULL) {
if(vertexDesc.length == 4 && varying == NULL) {
ComputeEdgeKernel<4>(vertex, E_IT, E_W, vertexOffset, tableOffset,
r.begin(), r.end());
}
else if(vdesc->numVertexElements == 8 && varying == NULL) {
else if(vertexDesc.length == 8 && varying == NULL) {
ComputeEdgeKernel<8>(vertex, E_IT, E_W, vertexOffset, tableOffset,
r.begin(), r.end());
}
@ -141,45 +172,49 @@ public:
float vertWeight = E_W[i*2+0];
int dstIndex = i + vertexOffset - tableOffset;
vdesc->Clear(vertex, varying, dstIndex);
clear(vertex, dstIndex, vertexDesc);
clear(varying, dstIndex, varyingDesc);
vdesc->AddWithWeight(vertex, dstIndex, eidx0, vertWeight);
vdesc->AddWithWeight(vertex, dstIndex, eidx1, vertWeight);
addWithWeight(vertex, dstIndex, eidx0, vertWeight, vertexDesc);
addWithWeight(vertex, dstIndex, eidx1, vertWeight, vertexDesc);
if (eidx2 != -1) {
float faceWeight = E_W[i*2+1];
vdesc->AddWithWeight(vertex, dstIndex, eidx2, faceWeight);
vdesc->AddWithWeight(vertex, dstIndex, eidx3, faceWeight);
addWithWeight(vertex, dstIndex, eidx2, faceWeight, vertexDesc);
addWithWeight(vertex, dstIndex, eidx3, faceWeight, vertexDesc);
}
vdesc->AddVaryingWithWeight(varying, dstIndex, eidx0, 0.5f);
vdesc->AddVaryingWithWeight(varying, dstIndex, eidx1, 0.5f);
addWithWeight(varying, dstIndex, eidx0, 0.5f, varyingDesc);
addWithWeight(varying, dstIndex, eidx1, 0.5f, varyingDesc);
}
}
}
TBBEdgeKernel(TBBEdgeKernel const &other)
{
this->vdesc = other.vdesc;
this->vertex = other.vertex;
this->varying= other.varying;
this->vertexDesc = other.vertexDesc;
this->varyingDesc = other.varyingDesc;
this->E_IT = other.E_IT;
this->E_W = other.E_W;
this->vertexOffset = other.vertexOffset;
this->tableOffset = other.tableOffset;
}
TBBEdgeKernel(OsdVertexDescriptor const *vdesc_in,
float *vertex_in,
TBBEdgeKernel(float *vertex_in,
float *varying_in,
OsdVertexBufferDescriptor const &vertexDesc_in,
OsdVertexBufferDescriptor const &varyingDesc_in,
int const *E_IT_in,
float const *E_W_in,
int vertexOffset_in,
int tableOffset_in) :
vdesc (vdesc_in),
vertex (vertex_in),
varying(varying_in),
vertexDesc(vertexDesc_in),
varyingDesc(varyingDesc_in),
E_IT (E_IT_in),
E_W (E_W_in),
vertexOffset(vertexOffset_in),
@ -189,19 +224,22 @@ public:
void OsdTbbComputeEdge(
OsdVertexDescriptor const &vdesc, float *vertex, float *varying,
float *vertex, float *varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
int const *E_IT, float const *E_W, int vertexOffset, int tableOffset,
int start, int end) {
tbb::blocked_range<int> range(start, end, grain_size);
TBBEdgeKernel kernel(&vdesc, vertex, varying, E_IT, E_W,
TBBEdgeKernel kernel(vertex, varying, vertexDesc, varyingDesc, E_IT, E_W,
vertexOffset, tableOffset);
tbb::parallel_for(range, kernel);
}
class TBBVertexKernelA {
OsdVertexDescriptor const *vdesc;
float *vertex;
float *varying;
OsdVertexBufferDescriptor vertexDesc;
OsdVertexBufferDescriptor varyingDesc;
int const *V_ITa;
float const *V_W;
int vertexOffset;
@ -210,11 +248,11 @@ class TBBVertexKernelA {
public:
void operator() (tbb::blocked_range<int> const &r) const {
if(vdesc->numVertexElements == 4 && varying == NULL) {
if(vertexDesc.length == 4 && varying == NULL) {
ComputeVertexAKernel<4>(vertex, V_ITa, V_W, vertexOffset, tableOffset,
r.begin(), r.end(), pass);
}
else if (vdesc->numVertexElements == 8 && varying == NULL) {
else if (vertexDesc.length == 8 && varying == NULL) {
ComputeVertexAKernel<8>(vertex, V_ITa, V_W, vertexOffset, tableOffset,
r.begin(), r.end(), pass);
}
@ -235,28 +273,31 @@ public:
int dstIndex = i + vertexOffset - tableOffset;
if (not pass)
vdesc->Clear(vertex, varying, dstIndex);
if (not pass) {
clear(vertex, dstIndex, vertexDesc);
clear(varying, dstIndex, varyingDesc);
}
if (eidx0 == -1 || (pass == 0 && (n == -1))) {
vdesc->AddWithWeight(vertex, dstIndex, p, weight);
addWithWeight(vertex, dstIndex, p, weight, vertexDesc);
} else {
vdesc->AddWithWeight(vertex, dstIndex, p, weight * 0.75f);
vdesc->AddWithWeight(vertex, dstIndex, eidx0, weight * 0.125f);
vdesc->AddWithWeight(vertex, dstIndex, eidx1, weight * 0.125f);
addWithWeight(vertex, dstIndex, p, weight * 0.75f, vertexDesc);
addWithWeight(vertex, dstIndex, eidx0, weight * 0.125f, vertexDesc);
addWithWeight(vertex, dstIndex, eidx1, weight * 0.125f, vertexDesc);
}
if (not pass)
vdesc->AddVaryingWithWeight(varying, dstIndex, p, 1.0f);
addWithWeight(varying, dstIndex, p, 1.0f, varyingDesc);
}
}
}
TBBVertexKernelA(TBBVertexKernelA const &other)
{
this->vdesc = other.vdesc;
this->vertex = other.vertex;
this->varying= other.varying;
this->vertexDesc = other.vertexDesc;
this->varyingDesc = other.varyingDesc;
this->V_ITa = other.V_ITa;
this->V_W = other.V_W;
this->vertexOffset = other.vertexOffset;
@ -264,17 +305,19 @@ public:
this->pass = other.pass;
}
TBBVertexKernelA(OsdVertexDescriptor const *vdesc_in,
float *vertex_in,
TBBVertexKernelA(float *vertex_in,
float *varying_in,
OsdVertexBufferDescriptor const &vertexDesc_in,
OsdVertexBufferDescriptor const &varyingDesc_in,
int const *V_ITa_in,
float const *V_W_in,
int vertexOffset_in,
int tableOffset_in,
int pass_in) :
vdesc (vdesc_in),
vertex (vertex_in),
varying(varying_in),
vertexDesc(vertexDesc_in),
varyingDesc(varyingDesc_in),
V_ITa (V_ITa_in),
V_W (V_W_in),
vertexOffset(vertexOffset_in),
@ -284,19 +327,23 @@ public:
};
void OsdTbbComputeVertexA(
OsdVertexDescriptor const &vdesc, float *vertex, float *varying,
float *vertex, float *varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
int const *V_ITa, float const *V_W, int vertexOffset, int tableOffset,
int start, int end, int pass) {
tbb::blocked_range<int> range(start, end, grain_size);
TBBVertexKernelA kernel(&vdesc, vertex, varying, V_ITa, V_W,
TBBVertexKernelA kernel(vertex, varying, vertexDesc, varyingDesc,
V_ITa, V_W,
vertexOffset, tableOffset, pass);
tbb::parallel_for(range, kernel);
}
class TBBVertexKernelB {
OsdVertexDescriptor const *vdesc;
float *vertex;
float *varying;
OsdVertexBufferDescriptor vertexDesc;
OsdVertexBufferDescriptor varyingDesc;
int const *V_ITa;
int const *V_IT;
float const *V_W;
@ -305,11 +352,11 @@ class TBBVertexKernelB {
public:
void operator() (tbb::blocked_range<int> const &r) const {
if(vdesc->numVertexElements == 4 && varying == NULL) {
if(vertexDesc.length == 4 && varying == NULL) {
ComputeVertexBKernel<4>(vertex, V_ITa, V_IT, V_W,
vertexOffset, tableOffset, r.begin(), r.end());
}
else if(vdesc->numVertexElements == 8 && varying == NULL) {
else if(vertexDesc.length == 8 && varying == NULL) {
ComputeVertexBKernel<8>(vertex, V_ITa, V_IT, V_W,
vertexOffset, tableOffset, r.begin(), r.end());
}
@ -324,24 +371,26 @@ public:
float wv = (n-2.0f) * n * wp;
int dstIndex = i + vertexOffset - tableOffset;
vdesc->Clear(vertex, varying, dstIndex);
clear(vertex, dstIndex, vertexDesc);
clear(varying, dstIndex, varyingDesc);
vdesc->AddWithWeight(vertex, dstIndex, p, weight * wv);
addWithWeight(vertex, dstIndex, p, weight * wv, vertexDesc);
for (int j = 0; j < n; ++j) {
vdesc->AddWithWeight(vertex, dstIndex, V_IT[h+j*2], weight * wp);
vdesc->AddWithWeight(vertex, dstIndex, V_IT[h+j*2+1], weight * wp);
addWithWeight(vertex, dstIndex, V_IT[h+j*2], weight * wp, vertexDesc);
addWithWeight(vertex, dstIndex, V_IT[h+j*2+1], weight * wp, vertexDesc);
}
vdesc->AddVaryingWithWeight(varying, dstIndex, p, 1.0f);
addWithWeight(varying, dstIndex, p, 1.0f, varyingDesc);
}
}
}
TBBVertexKernelB(TBBVertexKernelB const &other)
{
this->vdesc = other.vdesc;
this->vertex = other.vertex;
this->varying= other.varying;
this->vertexDesc = other.vertexDesc;
this->varyingDesc = other.varyingDesc;
this->V_ITa = other.V_ITa;
this->V_IT = other.V_IT;
this->V_W = other.V_W;
@ -349,17 +398,19 @@ public:
this->tableOffset = other.tableOffset;
}
TBBVertexKernelB(OsdVertexDescriptor const *vdesc_in,
float *vertex_in,
TBBVertexKernelB(float *vertex_in,
float *varying_in,
OsdVertexBufferDescriptor const &vertexDesc_in,
OsdVertexBufferDescriptor const &varyingDesc_in,
int const *V_ITa_in,
int const *V_IT_in,
float const *V_W_in,
int vertexOffset_in,
int tableOffset_in) :
vdesc (vdesc_in),
vertex (vertex_in),
varying(varying_in),
vertexDesc(vertexDesc_in),
varyingDesc(varyingDesc_in),
V_ITa (V_ITa_in),
V_IT (V_IT_in),
V_W (V_W_in),
@ -369,20 +420,24 @@ public:
};
void OsdTbbComputeVertexB(
OsdVertexDescriptor const &vdesc, float *vertex, float *varying,
float *vertex, float *varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
int const *V_ITa, int const *V_IT, float const *V_W,
int vertexOffset, int tableOffset, int start, int end) {
tbb::blocked_range<int> range(start, end, grain_size);
TBBVertexKernelB kernel(&vdesc, vertex, varying, V_ITa, V_IT, V_W,
TBBVertexKernelB kernel(vertex, varying, vertexDesc, varyingDesc,
V_ITa, V_IT, V_W,
vertexOffset, tableOffset);
tbb::parallel_for(range, kernel);
}
class TBBLoopVertexKernelB {
OsdVertexDescriptor const *vdesc;
float *vertex;
float *varying;
OsdVertexBufferDescriptor vertexDesc;
OsdVertexBufferDescriptor varyingDesc;
int const *V_ITa;
int const *V_IT;
float const *V_W;
@ -391,11 +446,11 @@ class TBBLoopVertexKernelB {
public:
void operator() (tbb::blocked_range<int> const &r) const {
if(vdesc->numVertexElements == 4 && varying == NULL) {
if(vertexDesc.length == 4 && varying == NULL) {
ComputeLoopVertexBKernel<4>(vertex, V_ITa, V_IT, V_W, vertexOffset,
tableOffset, r.begin(), r.end());
}
else if(vdesc->numVertexElements == 8 && varying == NULL) {
else if(vertexDesc.length == 8 && varying == NULL) {
ComputeLoopVertexBKernel<8>(vertex, V_ITa, V_IT, V_W, vertexOffset,
tableOffset, r.begin(), r.end());
}
@ -412,23 +467,25 @@ public:
beta = (0.625f - beta) * wp;
int dstIndex = i + vertexOffset - tableOffset;
vdesc->Clear(vertex, varying, dstIndex);
clear(vertex, dstIndex, vertexDesc);
clear(varying, dstIndex, varyingDesc);
vdesc->AddWithWeight(vertex, dstIndex, p, weight * (1.0f - (beta * n)));
addWithWeight(vertex, dstIndex, p, weight * (1.0f - (beta * n)), vertexDesc);
for (int j = 0; j < n; ++j)
vdesc->AddWithWeight(vertex, dstIndex, V_IT[h+j], weight * beta);
addWithWeight(vertex, dstIndex, V_IT[h+j], weight * beta, vertexDesc);
vdesc->AddVaryingWithWeight(varying, dstIndex, p, 1.0f);
addWithWeight(varying, dstIndex, p, 1.0f, varyingDesc);
}
}
}
TBBLoopVertexKernelB(TBBLoopVertexKernelB const &other)
{
this->vdesc = other.vdesc;
this->vertex = other.vertex;
this->varying= other.varying;
this->vertexDesc = other.vertexDesc;
this->varyingDesc = other.varyingDesc;
this->V_ITa = other.V_ITa;
this->V_IT = other.V_IT;
this->V_W = other.V_W;
@ -436,17 +493,19 @@ public:
this->tableOffset = other.tableOffset;
}
TBBLoopVertexKernelB(OsdVertexDescriptor const *vdesc_in,
float *vertex_in,
TBBLoopVertexKernelB(float *vertex_in,
float *varying_in,
OsdVertexBufferDescriptor const &vertexDesc_in,
OsdVertexBufferDescriptor const &varyingDesc_in,
int const *V_ITa_in,
int const *V_IT_in,
float const *V_W_in,
int vertexOffset_in,
int tableOffset_in) :
vdesc (vdesc_in),
vertex (vertex_in),
varying(varying_in),
vertexDesc(vertexDesc_in),
varyingDesc(varyingDesc_in),
V_ITa (V_ITa_in),
V_IT (V_IT_in),
V_W (V_W_in),
@ -456,31 +515,35 @@ public:
};
void OsdTbbComputeLoopVertexB(
OsdVertexDescriptor const &vdesc, float *vertex, float *varying,
float *vertex, float *varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
int const *V_ITa, int const *V_IT, float const *V_W,
int vertexOffset, int tableOffset, int start, int end) {
tbb::blocked_range<int> range(start, end, grain_size);
TBBLoopVertexKernelB kernel(&vdesc, vertex, varying, V_ITa, V_IT, V_W,
TBBLoopVertexKernelB kernel(vertex, varying, vertexDesc, varyingDesc,
V_ITa, V_IT, V_W,
vertexOffset, tableOffset);
tbb::parallel_for(range, kernel);
}
class TBBBilinearEdgeKernel {
OsdVertexDescriptor const *vdesc;
float *vertex;
float *varying;
OsdVertexBufferDescriptor vertexDesc;
OsdVertexBufferDescriptor varyingDesc;
int const *E_IT;
int vertexOffset;
int tableOffset;
public:
void operator() (tbb::blocked_range<int> const &r) const {
if(vdesc->numVertexElements == 4 && varying == NULL) {
if(vertexDesc.length == 4 && varying == NULL) {
ComputeBilinearEdgeKernel<4>(vertex, E_IT, vertexOffset, tableOffset,
r.begin(), r.end());
}
else if(vdesc->numVertexElements == 8 && varying == NULL) {
else if(vertexDesc.length == 8 && varying == NULL) {
ComputeBilinearEdgeKernel<8>(vertex, E_IT, vertexOffset, tableOffset,
r.begin(), r.end());
}
@ -490,36 +553,40 @@ public:
int eidx1 = E_IT[2*i+1];
int dstIndex = i + vertexOffset - tableOffset;
vdesc->Clear(vertex, varying, dstIndex);
clear(vertex, dstIndex, vertexDesc);
clear(varying, dstIndex, varyingDesc);
vdesc->AddWithWeight(vertex, dstIndex, eidx0, 0.5f);
vdesc->AddWithWeight(vertex, dstIndex, eidx1, 0.5f);
addWithWeight(vertex, dstIndex, eidx0, 0.5f, vertexDesc);
addWithWeight(vertex, dstIndex, eidx1, 0.5f, vertexDesc);
vdesc->AddVaryingWithWeight(varying, dstIndex, eidx0, 0.5f);
vdesc->AddVaryingWithWeight(varying, dstIndex, eidx1, 0.5f);
addWithWeight(varying, dstIndex, eidx0, 0.5f, varyingDesc);
addWithWeight(varying, dstIndex, eidx1, 0.5f, varyingDesc);
}
}
}
TBBBilinearEdgeKernel(TBBBilinearEdgeKernel const &other)
{
this->vdesc = other.vdesc;
this->vertex = other.vertex;
this->varying= other.varying;
this->vertexDesc = other.vertexDesc;
this->varyingDesc = other.varyingDesc;
this->E_IT = other.E_IT;
this->vertexOffset = other.vertexOffset;
this->tableOffset = other.tableOffset;
}
TBBBilinearEdgeKernel(OsdVertexDescriptor const *vdesc_in,
float *vertex_in,
TBBBilinearEdgeKernel(float *vertex_in,
float *varying_in,
OsdVertexBufferDescriptor const &vertexDesc_in,
OsdVertexBufferDescriptor const &varyingDesc_in,
int const *E_IT_in,
int vertexOffset_in,
int tableOffset_in) :
vdesc (vdesc_in),
vertex (vertex_in),
varying(varying_in),
vertexDesc(vertexDesc_in),
varyingDesc(varyingDesc_in),
E_IT (E_IT_in),
vertexOffset(vertexOffset_in),
tableOffset(tableOffset_in)
@ -527,25 +594,29 @@ public:
};
void OsdTbbComputeBilinearEdge(
OsdVertexDescriptor const &vdesc, float *vertex, float *varying,
float *vertex, float *varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
int const *E_IT, int vertexOffset, int tableOffset, int start, int end) {
tbb::blocked_range<int> range(start, end, grain_size);
TBBBilinearEdgeKernel kernel(&vdesc, vertex, varying, E_IT, vertexOffset, tableOffset);
TBBBilinearEdgeKernel kernel(vertex, varying, vertexDesc, varyingDesc,
E_IT, vertexOffset, tableOffset);
tbb::parallel_for(range, kernel);
}
class TBBBilinearVertexKernel {
OsdVertexDescriptor const *vdesc;
float *vertex;
float *varying;
OsdVertexBufferDescriptor vertexDesc;
OsdVertexBufferDescriptor varyingDesc;
int const *V_ITa;
int vertexOffset;
int tableOffset;
public:
void operator() (tbb::blocked_range<int> const &r) const {
int numVertexElements = vdesc->numVertexElements;
int numVaryingElements = vdesc->numVaryingElements;
int numVertexElements = vertexDesc.length;
int numVaryingElements = varyingDesc.length;
float *src, *des;
for (int i = r.begin() + tableOffset; i < r.end() + tableOffset; i++) {
int p = V_ITa[i];
@ -564,23 +635,26 @@ public:
TBBBilinearVertexKernel(TBBBilinearVertexKernel const &other)
{
this->vdesc = other.vdesc;
this->vertex = other.vertex;
this->varying= other.varying;
this->vertexDesc = other.vertexDesc;
this->varyingDesc = other.varyingDesc;
this->V_ITa = other.V_ITa;
this->vertexOffset = other.vertexOffset;
this->tableOffset = other.tableOffset;
}
TBBBilinearVertexKernel(OsdVertexDescriptor const *vdesc_in,
float *vertex_in,
TBBBilinearVertexKernel(float *vertex_in,
float *varying_in,
OsdVertexBufferDescriptor const &vertexDesc_in,
OsdVertexBufferDescriptor const &varyingDesc_in,
int const *V_ITa_in,
int vertexOffset_in,
int tableOffset_in) :
vdesc (vdesc_in),
vertex (vertex_in),
varying(varying_in),
vertexDesc(vertexDesc_in),
varyingDesc(varyingDesc_in),
V_ITa (V_ITa_in),
vertexOffset(vertexOffset_in),
tableOffset(tableOffset_in)
@ -588,40 +662,53 @@ public:
};
void OsdTbbComputeBilinearVertex(
OsdVertexDescriptor const &vdesc, float *vertex, float *varying,
float *vertex, float *varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
int const *V_ITa, int vertexOffset, int tableOffset, int start, int end) {
tbb::blocked_range<int> range(start, end, grain_size);
TBBBilinearVertexKernel kernel(&vdesc, vertex, varying, V_ITa, vertexOffset, tableOffset);
TBBBilinearVertexKernel kernel(vertex, varying, vertexDesc, varyingDesc,
V_ITa, vertexOffset, tableOffset);
tbb::parallel_for(range, kernel);
}
void OsdTbbEditVertexAdd(
OsdVertexDescriptor const &vdesc, float *vertex,
float *vertex,
OsdVertexBufferDescriptor const &vertexDesc,
int primVarOffset, int primVarWidth, int vertexOffset, int tableOffset,
int start, int end,
unsigned int const *editIndices, float const *editValues) {
for (int i = start+tableOffset; i < end+tableOffset; i++) {
vdesc.ApplyVertexEditAdd(vertex,
primVarOffset,
primVarWidth,
editIndices[i] + vertexOffset,
&editValues[i*primVarWidth]);
if (vertex) {
int editIndex = editIndices[i] + vertexOffset;
float *dst = vertex + editIndex * vertexDesc.stride + primVarOffset;
for (int i = 0; i < primVarWidth; ++i) {
dst[i] += editValues[i];
}
}
}
}
void OsdTbbEditVertexSet(
OsdVertexDescriptor const &vdesc, float *vertex,
float *vertex,
OsdVertexBufferDescriptor const &vertexDesc,
int primVarOffset, int primVarWidth, int vertexOffset, int tableOffset,
int start, int end,
unsigned int const *editIndices, float const *editValues) {
for (int i = start+tableOffset; i < end+tableOffset; i++) {
vdesc.ApplyVertexEditSet(vertex,
primVarOffset,
primVarWidth,
editIndices[i] + vertexOffset,
&editValues[i*primVarWidth]);
if (vertex) {
int editIndex = editIndices[i] + vertexOffset;
float *dst = vertex + editIndex * vertexDesc.stride + primVarOffset;
for (int i = 0; i < primVarWidth; ++i) {
dst[i] = editValues[i];
}
}
}
}

View File

@ -30,59 +30,68 @@
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
struct OsdVertexDescriptor;
struct OsdVertexBufferDescriptor;
void OsdTbbComputeFace(OsdVertexDescriptor const &vdesc,
float * vertex, float * varying,
void OsdTbbComputeFace(float * vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
int const *F_IT, int const *F_ITa,
int vertexOffset, int tableOffset,
int start, int end);
void OsdTbbComputeEdge(OsdVertexDescriptor const &vdesc,
float *vertex, float * varying,
void OsdTbbComputeEdge(float *vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
int const *E_IT, float const *E_ITa,
int vertexOffset, int tableOffset,
int start, int end);
void OsdTbbComputeVertexA(OsdVertexDescriptor const &vdesc,
float *vertex, float * varying,
void OsdTbbComputeVertexA(float *vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
int const *V_ITa, float const *V_IT,
int vertexOffset, int tableOffset,
int start, int end, int pass);
void OsdTbbComputeVertexB(OsdVertexDescriptor const &vdesc,
float *vertex, float * varying,
void OsdTbbComputeVertexB(float *vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
int const *V_ITa, int const *V_IT, float const *V_W,
int vertexOffset, int tableOffset,
int start, int end);
void OsdTbbComputeLoopVertexB(OsdVertexDescriptor const &vdesc,
float *vertex, float * varying,
void OsdTbbComputeLoopVertexB(float *vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
int const *V_ITa, int const *V_IT,
float const *V_W,
int vertexOffset, int tableOffset,
int start, int end);
void OsdTbbComputeBilinearEdge(OsdVertexDescriptor const &vdesc,
float *vertex, float * varying,
void OsdTbbComputeBilinearEdge(float *vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
int const *E_IT,
int vertexOffset, int tableOffset,
int start, int end);
void OsdTbbComputeBilinearVertex(OsdVertexDescriptor const &vdesc,
float *vertex, float * varying,
void OsdTbbComputeBilinearVertex(float *vertex, float * varying,
OsdVertexBufferDescriptor const &vertexDesc,
OsdVertexBufferDescriptor const &varyingDesc,
int const *V_ITa,
int vertexOffset, int tableOffset,
int start, int end);
void OsdTbbEditVertexAdd(OsdVertexDescriptor const &vdesc, float *vertex,
void OsdTbbEditVertexAdd(float *vertex,
OsdVertexBufferDescriptor const &vertexDesc,
int primVarOffset, int primVarWidth,
int vertexOffset, int tableOffset,
int start, int end,
unsigned int const *editIndices,
float const *editValues);
void OsdTbbEditVertexSet(OsdVertexDescriptor const &vdesc, float *vertex,
void OsdTbbEditVertexSet(float *vertex,
OsdVertexBufferDescriptor const &vertexDesc,
int primVarOffset, int primVarWidth,
int vertexOffset, int tableOffset,
int start, int end,

View File

@ -31,155 +31,6 @@
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
struct OsdVertexDescriptor {
/// Constructor
OsdVertexDescriptor() : numVertexElements(0), numVaryingElements(0) {}
/// Constructor
///
/// @param numVertexElem number of vertex-interpolated data elements (floats)
///
/// @param numVaryingElem number of varying-interpolated data elements (floats)
///
OsdVertexDescriptor(int numVertexElem, int numVaryingElem)
: numVertexElements(numVertexElem),
numVaryingElements(numVaryingElem) { }
/// Sets descriptor
///
/// @param numVertexElem number of vertex-interpolated data elements (floats)
///
/// @param numVaryingElem number of varying-interpolated data elements (floats)
///
void Set(int numVertexElem, int numVaryingElem) {
numVertexElements = numVertexElem;
numVaryingElements = numVaryingElem;
}
/// Resets the descriptor
void Reset() {
numVertexElements = numVaryingElements = 0;
}
/// Returns the total number of elements (vertex + varying)
int GetNumElements() const {
return numVertexElements + numVaryingElements;
}
bool operator == (OsdVertexDescriptor const & other) {
return (numVertexElements == other.numVertexElements and
numVaryingElements == other.numVaryingElements);
}
/// Resets the contents of vertex & varying primvar data buffers for a given
/// vertex.
///
/// @param vertex The float array containing the vertex-interpolated primvar
/// data that needs to be reset.
///
/// @param varying The float array containing the varying-interpolated primvar
/// data that needs to be reset.
///
/// @param index Vertex index in the buffer.
///
void Clear(float *vertex, float *varying, int index) const {
if (vertex) {
memset(vertex+index*numVertexElements, 0, sizeof(float)*numVertexElements);
}
if (varying) {
memset(varying+index*numVaryingElements, 0, sizeof(float)*numVaryingElements);
}
}
/// Applies "dst += src*weight" to "vertex" primvar data in a vertex buffer.
///
/// @param vertex The VertexData buffer
///
/// @param dstIndex Index of the destination vertex.
///
/// @param srcIndex Index of the origin vertex.
///
/// @param weight Weight applied to the primvar data.
///
inline
void AddWithWeight(float *vertex, int dstIndex, int srcIndex, float weight) const {
int d = dstIndex * numVertexElements;
int s = srcIndex * numVertexElements;
#if defined ( __INTEL_COMPILER ) or defined ( __ICC )
#pragma ivdep
#pragma vector aligned
#endif
for (int i = 0; i < numVertexElements; ++i)
vertex[d++] += vertex[s++] * weight;
}
/// Applies "dst += src*weight" to "varying" primvar data in a vertex buffer.
///
/// @param varying The VaryingData buffer
///
/// @param dstIndex Index of the destination vertex.
///
/// @param srcIndex Index of the source vertex.
///
/// @param weight Weight applied to the primvar data.
///
inline
void AddVaryingWithWeight(float *varying, int dstIndex, int srcIndex, float weight) const {
int d = dstIndex * numVaryingElements;
int s = srcIndex * numVaryingElements;
#if defined ( __INTEL_COMPILER ) or defined ( __ICC )
#pragma ivdep
#pragma vector aligned
#endif
for (int i = 0; i < numVaryingElements; ++i)
varying[d++] += varying[s++] * weight;
}
/// Applies an "add" vertex edit
///
/// @param vertex The primvar data buffer.
///
/// @param primVarOffset Offset to the primvar datum.
///
/// @param primVarWidth Length of the primvar datum.
///
/// @param editIndex The location of the vertex in the buffer.
///
/// @param editValues The values to add to the primvar datum.
///
void ApplyVertexEditAdd(float *vertex, int primVarOffset, int primVarWidth, int editIndex, const float *editValues) const {
int d = editIndex * numVertexElements + primVarOffset;
for (int i = 0; i < primVarWidth; ++i) {
vertex[d++] += editValues[i];
}
}
/// Applies a "set" vertex edit
///
/// @param vertex The primvar data buffer.
///
/// @param primVarOffset Offset to the primvar datum.
///
/// @param primVarWidth Length of the primvar datum.
///
/// @param editIndex The location of the vertex in the buffer.
///
/// @param editValues The values to add to the primvar datum.
///
void ApplyVertexEditSet(float *vertex, int primVarOffset, int primVarWidth, int editIndex, const float *editValues) const {
int d = editIndex * numVertexElements + primVarOffset;
for (int i = 0; i < primVarWidth; ++i) {
vertex[d++] = editValues[i];
}
}
int numVertexElements;
int numVaryingElements;
};
/// \brief Describes vertex elements in interleaved data buffers
struct OsdVertexBufferDescriptor {
@ -207,6 +58,13 @@ struct OsdVertexBufferDescriptor {
offset = length = stride = 0;
}
/// True if the descriptors are identical
bool operator == ( OsdVertexBufferDescriptor const other ) const {
return (offset == other.offset and
length == other.length and
stride == other.stride);
}
int offset; // offset to desired element data
int length; // number or length of the data
int stride; // stride to the next element