mirror of
https://github.com/PixarAnimationStudios/OpenSubdiv
synced 2024-11-12 23:20:10 +00:00
Interleaved buffer support in OsdCompute. Removed OsdVertexDescriptor and replaced with OsdVertexBufferDescriptor.
All kernels take offset/length/stride to apply subdivision partially in each vertex elements. Also the offset can be used for client-based VBO aggregation, without modifying index buffers. This is useful for topology sharing, in conjunction with glDrawElementsBaseVertex etc. However, gregory patch shader fetches vertex buffer via texture buffer, which index should also be offsetted too. Although gl_BaseVertexARB extension should be able to do that job, it's a relatively new extension. So we use OsdBaseVertex() call to mitigate the compatibility issue as clients can provide it in their way at least for the time being.
This commit is contained in:
parent
2372eb45ef
commit
ee061291b7
@ -67,6 +67,10 @@ int OsdPrimitiveIdBase()
|
||||
{
|
||||
return PrimitiveIdBase;
|
||||
}
|
||||
int OsdBaseVertex()
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------
|
||||
// Vertex Shader
|
||||
|
@ -99,6 +99,10 @@ int OsdPrimitiveIdBase()
|
||||
{
|
||||
return PrimitiveIdBase;
|
||||
}
|
||||
int OsdBaseVertex()
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------
|
||||
// Vertex Shader
|
||||
|
@ -97,6 +97,10 @@ int OsdPrimitiveIdBase()
|
||||
{
|
||||
return PrimitiveIdBase;
|
||||
}
|
||||
int OsdBaseVertex()
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------
|
||||
// Vertex Shader
|
||||
|
@ -98,6 +98,10 @@ int OsdPrimitiveIdBase()
|
||||
{
|
||||
return PrimitiveIdBase;
|
||||
}
|
||||
int OsdBaseVertex()
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------
|
||||
// Geometry Shader
|
||||
|
@ -130,6 +130,10 @@ int OsdPrimitiveIdBase()
|
||||
{
|
||||
return PrimitiveIdBase;
|
||||
}
|
||||
int OsdBaseVertex()
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------
|
||||
// Vertex Shader
|
||||
|
@ -91,6 +91,10 @@ int OsdPrimitiveIdBase()
|
||||
{
|
||||
return PrimitiveIdBase;
|
||||
}
|
||||
int OsdBaseVertex()
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------
|
||||
// Vertex Shader
|
||||
|
@ -53,9 +53,7 @@ namespace OPENSUBDIV_VERSION {
|
||||
|
||||
OsdCLComputeController::OsdCLComputeController(cl_context clContext,
|
||||
cl_command_queue queue) :
|
||||
_clContext(clContext), _clQueue(queue),
|
||||
_currentVertexBuffer(0), _currentVaryingBuffer(0),
|
||||
_currentKernelBundle(NULL) {
|
||||
_clContext(clContext), _clQueue(queue) {
|
||||
}
|
||||
|
||||
OsdCLComputeController::~OsdCLComputeController() {
|
||||
@ -73,21 +71,23 @@ OsdCLComputeController::Synchronize() {
|
||||
}
|
||||
|
||||
OsdCLKernelBundle *
|
||||
OsdCLComputeController::getKernelBundle(int numVertexElements,
|
||||
int numVaryingElements) {
|
||||
OsdCLComputeController::getKernelBundle(
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc) {
|
||||
|
||||
std::vector<OsdCLKernelBundle*>::iterator it =
|
||||
std::find_if(_kernelRegistry.begin(), _kernelRegistry.end(),
|
||||
OsdCLKernelBundle::Match(numVertexElements,
|
||||
numVaryingElements));
|
||||
OsdCLKernelBundle::Match(vertexDesc,
|
||||
varyingDesc));
|
||||
|
||||
if (it != _kernelRegistry.end()) {
|
||||
return *it;
|
||||
} else {
|
||||
OsdCLKernelBundle *kernelBundle = new OsdCLKernelBundle();
|
||||
_kernelRegistry.push_back(kernelBundle);
|
||||
kernelBundle->Compile(_clContext,
|
||||
numVertexElements,
|
||||
numVaryingElements);
|
||||
vertexDesc,
|
||||
varyingDesc);
|
||||
return kernelBundle;
|
||||
}
|
||||
}
|
||||
@ -107,17 +107,19 @@ OsdCLComputeController::ApplyBilinearEdgeVerticesKernel(
|
||||
|
||||
cl_int ciErrNum;
|
||||
size_t globalWorkSize[1] = { (size_t)(batch.GetEnd() - batch.GetStart()) };
|
||||
cl_kernel kernel = _currentKernelBundle->GetBilinearEdgeKernel();
|
||||
cl_kernel kernel = _currentBindState.kernelBundle->GetBilinearEdgeKernel();
|
||||
|
||||
cl_mem E_IT = context->GetTable(FarSubdivisionTables::E_IT)->GetDevicePtr();
|
||||
|
||||
clSetKernelArg(kernel, 0, sizeof(cl_mem), &_currentVertexBuffer);
|
||||
clSetKernelArg(kernel, 1, sizeof(cl_mem), &_currentVaryingBuffer);
|
||||
clSetKernelArg(kernel, 0, sizeof(cl_mem), &_currentBindState.vertexBuffer);
|
||||
clSetKernelArg(kernel, 1, sizeof(cl_mem), &_currentBindState.varyingBuffer);
|
||||
clSetKernelArg(kernel, 2, sizeof(cl_mem), &E_IT);
|
||||
clSetKernelArg(kernel, 3, sizeof(int), batch.GetVertexOffsetPtr());
|
||||
clSetKernelArg(kernel, 4, sizeof(int), batch.GetTableOffsetPtr());
|
||||
clSetKernelArg(kernel, 5, sizeof(int), batch.GetStartPtr());
|
||||
clSetKernelArg(kernel, 6, sizeof(int), batch.GetEndPtr());
|
||||
clSetKernelArg(kernel, 3, sizeof(int), &_currentBindState.vertexDesc.offset);
|
||||
clSetKernelArg(kernel, 4, sizeof(int), &_currentBindState.varyingDesc.offset);
|
||||
clSetKernelArg(kernel, 5, sizeof(int), batch.GetVertexOffsetPtr());
|
||||
clSetKernelArg(kernel, 6, sizeof(int), batch.GetTableOffsetPtr());
|
||||
clSetKernelArg(kernel, 7, sizeof(int), batch.GetStartPtr());
|
||||
clSetKernelArg(kernel, 8, sizeof(int), batch.GetEndPtr());
|
||||
ciErrNum = clEnqueueNDRangeKernel(_clQueue,
|
||||
kernel, 1, NULL, globalWorkSize,
|
||||
NULL, 0, NULL, NULL);
|
||||
@ -132,17 +134,19 @@ OsdCLComputeController::ApplyBilinearVertexVerticesKernel(
|
||||
|
||||
cl_int ciErrNum;
|
||||
size_t globalWorkSize[1] = { (size_t)(batch.GetEnd() - batch.GetStart()) };
|
||||
cl_kernel kernel = _currentKernelBundle->GetBilinearVertexKernel();
|
||||
cl_kernel kernel = _currentBindState.kernelBundle->GetBilinearVertexKernel();
|
||||
|
||||
cl_mem V_ITa = context->GetTable(FarSubdivisionTables::V_ITa)->GetDevicePtr();
|
||||
|
||||
clSetKernelArg(kernel, 0, sizeof(cl_mem), &_currentVertexBuffer);
|
||||
clSetKernelArg(kernel, 1, sizeof(cl_mem), &_currentVaryingBuffer);
|
||||
clSetKernelArg(kernel, 0, sizeof(cl_mem), &_currentBindState.vertexBuffer);
|
||||
clSetKernelArg(kernel, 1, sizeof(cl_mem), &_currentBindState.varyingBuffer);
|
||||
clSetKernelArg(kernel, 2, sizeof(cl_mem), &V_ITa);
|
||||
clSetKernelArg(kernel, 3, sizeof(int), batch.GetVertexOffsetPtr());
|
||||
clSetKernelArg(kernel, 4, sizeof(int), batch.GetTableOffsetPtr());
|
||||
clSetKernelArg(kernel, 5, sizeof(int), batch.GetStartPtr());
|
||||
clSetKernelArg(kernel, 6, sizeof(int), batch.GetEndPtr());
|
||||
clSetKernelArg(kernel, 3, sizeof(int), &_currentBindState.vertexDesc.offset);
|
||||
clSetKernelArg(kernel, 4, sizeof(int), &_currentBindState.varyingDesc.offset);
|
||||
clSetKernelArg(kernel, 5, sizeof(int), batch.GetVertexOffsetPtr());
|
||||
clSetKernelArg(kernel, 6, sizeof(int), batch.GetTableOffsetPtr());
|
||||
clSetKernelArg(kernel, 7, sizeof(int), batch.GetStartPtr());
|
||||
clSetKernelArg(kernel, 8, sizeof(int), batch.GetEndPtr());
|
||||
ciErrNum = clEnqueueNDRangeKernel(_clQueue,
|
||||
kernel, 1, NULL, globalWorkSize,
|
||||
NULL, 0, NULL, NULL);
|
||||
@ -157,19 +161,21 @@ OsdCLComputeController::ApplyCatmarkFaceVerticesKernel(
|
||||
|
||||
cl_int ciErrNum;
|
||||
size_t globalWorkSize[1] = { (size_t)(batch.GetEnd() - batch.GetStart()) };
|
||||
cl_kernel kernel = _currentKernelBundle->GetCatmarkFaceKernel();
|
||||
cl_kernel kernel = _currentBindState.kernelBundle->GetCatmarkFaceKernel();
|
||||
|
||||
cl_mem F_IT = context->GetTable(FarSubdivisionTables::F_IT)->GetDevicePtr();
|
||||
cl_mem F_ITa = context->GetTable(FarSubdivisionTables::F_ITa)->GetDevicePtr();
|
||||
|
||||
clSetKernelArg(kernel, 0, sizeof(cl_mem), &_currentVertexBuffer);
|
||||
clSetKernelArg(kernel, 1, sizeof(cl_mem), &_currentVaryingBuffer);
|
||||
clSetKernelArg(kernel, 0, sizeof(cl_mem), &_currentBindState.vertexBuffer);
|
||||
clSetKernelArg(kernel, 1, sizeof(cl_mem), &_currentBindState.varyingBuffer);
|
||||
clSetKernelArg(kernel, 2, sizeof(cl_mem), &F_IT);
|
||||
clSetKernelArg(kernel, 3, sizeof(cl_mem), &F_ITa);
|
||||
clSetKernelArg(kernel, 4, sizeof(int), batch.GetVertexOffsetPtr());
|
||||
clSetKernelArg(kernel, 5, sizeof(int), batch.GetTableOffsetPtr());
|
||||
clSetKernelArg(kernel, 6, sizeof(int), batch.GetStartPtr());
|
||||
clSetKernelArg(kernel, 7, sizeof(int), batch.GetEndPtr());
|
||||
clSetKernelArg(kernel, 4, sizeof(int), &_currentBindState.vertexDesc.offset);
|
||||
clSetKernelArg(kernel, 5, sizeof(int), &_currentBindState.varyingDesc.offset);
|
||||
clSetKernelArg(kernel, 6, sizeof(int), batch.GetVertexOffsetPtr());
|
||||
clSetKernelArg(kernel, 7, sizeof(int), batch.GetTableOffsetPtr());
|
||||
clSetKernelArg(kernel, 8, sizeof(int), batch.GetStartPtr());
|
||||
clSetKernelArg(kernel, 9, sizeof(int), batch.GetEndPtr());
|
||||
|
||||
ciErrNum = clEnqueueNDRangeKernel(_clQueue,
|
||||
kernel, 1, NULL, globalWorkSize,
|
||||
@ -185,19 +191,21 @@ OsdCLComputeController::ApplyCatmarkEdgeVerticesKernel(
|
||||
|
||||
cl_int ciErrNum;
|
||||
size_t globalWorkSize[1] = { (size_t)(batch.GetEnd() - batch.GetStart()) };
|
||||
cl_kernel kernel = _currentKernelBundle->GetCatmarkEdgeKernel();
|
||||
cl_kernel kernel = _currentBindState.kernelBundle->GetCatmarkEdgeKernel();
|
||||
|
||||
cl_mem E_IT = context->GetTable(FarSubdivisionTables::E_IT)->GetDevicePtr();
|
||||
cl_mem E_W = context->GetTable(FarSubdivisionTables::E_W)->GetDevicePtr();
|
||||
|
||||
clSetKernelArg(kernel, 0, sizeof(cl_mem), &_currentVertexBuffer);
|
||||
clSetKernelArg(kernel, 1, sizeof(cl_mem), &_currentVaryingBuffer);
|
||||
clSetKernelArg(kernel, 0, sizeof(cl_mem), &_currentBindState.vertexBuffer);
|
||||
clSetKernelArg(kernel, 1, sizeof(cl_mem), &_currentBindState.varyingBuffer);
|
||||
clSetKernelArg(kernel, 2, sizeof(cl_mem), &E_IT);
|
||||
clSetKernelArg(kernel, 3, sizeof(cl_mem), &E_W);
|
||||
clSetKernelArg(kernel, 4, sizeof(int), batch.GetVertexOffsetPtr());
|
||||
clSetKernelArg(kernel, 5, sizeof(int), batch.GetTableOffsetPtr());
|
||||
clSetKernelArg(kernel, 6, sizeof(int), batch.GetStartPtr());
|
||||
clSetKernelArg(kernel, 7, sizeof(int), batch.GetEndPtr());
|
||||
clSetKernelArg(kernel, 4, sizeof(int), &_currentBindState.vertexDesc.offset);
|
||||
clSetKernelArg(kernel, 5, sizeof(int), &_currentBindState.varyingDesc.offset);
|
||||
clSetKernelArg(kernel, 6, sizeof(int), batch.GetVertexOffsetPtr());
|
||||
clSetKernelArg(kernel, 7, sizeof(int), batch.GetTableOffsetPtr());
|
||||
clSetKernelArg(kernel, 8, sizeof(int), batch.GetStartPtr());
|
||||
clSetKernelArg(kernel, 9, sizeof(int), batch.GetEndPtr());
|
||||
|
||||
ciErrNum = clEnqueueNDRangeKernel(_clQueue,
|
||||
kernel, 1, NULL, globalWorkSize,
|
||||
@ -213,21 +221,23 @@ OsdCLComputeController::ApplyCatmarkVertexVerticesKernelB(
|
||||
|
||||
cl_int ciErrNum;
|
||||
size_t globalWorkSize[1] = { (size_t)(batch.GetEnd() - batch.GetStart()) };
|
||||
cl_kernel kernel = _currentKernelBundle->GetCatmarkVertexKernelB();
|
||||
cl_kernel kernel = _currentBindState.kernelBundle->GetCatmarkVertexKernelB();
|
||||
|
||||
cl_mem V_ITa = context->GetTable(FarSubdivisionTables::V_ITa)->GetDevicePtr();
|
||||
cl_mem V_IT = context->GetTable(FarSubdivisionTables::V_IT)->GetDevicePtr();
|
||||
cl_mem V_W = context->GetTable(FarSubdivisionTables::V_W)->GetDevicePtr();
|
||||
|
||||
clSetKernelArg(kernel, 0, sizeof(cl_mem), &_currentVertexBuffer);
|
||||
clSetKernelArg(kernel, 1, sizeof(cl_mem), &_currentVaryingBuffer);
|
||||
clSetKernelArg(kernel, 0, sizeof(cl_mem), &_currentBindState.vertexBuffer);
|
||||
clSetKernelArg(kernel, 1, sizeof(cl_mem), &_currentBindState.varyingBuffer);
|
||||
clSetKernelArg(kernel, 2, sizeof(cl_mem), &V_ITa);
|
||||
clSetKernelArg(kernel, 3, sizeof(cl_mem), &V_IT);
|
||||
clSetKernelArg(kernel, 4, sizeof(cl_mem), &V_W);
|
||||
clSetKernelArg(kernel, 5, sizeof(int), batch.GetVertexOffsetPtr());
|
||||
clSetKernelArg(kernel, 6, sizeof(int), batch.GetTableOffsetPtr());
|
||||
clSetKernelArg(kernel, 7, sizeof(int), batch.GetStartPtr());
|
||||
clSetKernelArg(kernel, 8, sizeof(int), batch.GetEndPtr());
|
||||
clSetKernelArg(kernel, 5, sizeof(int), &_currentBindState.vertexDesc.offset);
|
||||
clSetKernelArg(kernel, 6, sizeof(int), &_currentBindState.varyingDesc.offset);
|
||||
clSetKernelArg(kernel, 7, sizeof(int), batch.GetVertexOffsetPtr());
|
||||
clSetKernelArg(kernel, 8, sizeof(int), batch.GetTableOffsetPtr());
|
||||
clSetKernelArg(kernel, 9, sizeof(int), batch.GetStartPtr());
|
||||
clSetKernelArg(kernel, 10, sizeof(int), batch.GetEndPtr());
|
||||
|
||||
ciErrNum = clEnqueueNDRangeKernel(_clQueue,
|
||||
kernel, 1, NULL, globalWorkSize,
|
||||
@ -244,20 +254,22 @@ OsdCLComputeController::ApplyCatmarkVertexVerticesKernelA1(
|
||||
cl_int ciErrNum;
|
||||
size_t globalWorkSize[1] = { (size_t)(batch.GetEnd() - batch.GetStart()) };
|
||||
int ipass = false;
|
||||
cl_kernel kernel = _currentKernelBundle->GetCatmarkVertexKernelA();
|
||||
cl_kernel kernel = _currentBindState.kernelBundle->GetCatmarkVertexKernelA();
|
||||
|
||||
cl_mem V_ITa = context->GetTable(FarSubdivisionTables::V_ITa)->GetDevicePtr();
|
||||
cl_mem V_W = context->GetTable(FarSubdivisionTables::V_W)->GetDevicePtr();
|
||||
|
||||
clSetKernelArg(kernel, 0, sizeof(cl_mem), &_currentVertexBuffer);
|
||||
clSetKernelArg(kernel, 1, sizeof(cl_mem), &_currentVaryingBuffer);
|
||||
clSetKernelArg(kernel, 0, sizeof(cl_mem), &_currentBindState.vertexBuffer);
|
||||
clSetKernelArg(kernel, 1, sizeof(cl_mem), &_currentBindState.varyingBuffer);
|
||||
clSetKernelArg(kernel, 2, sizeof(cl_mem), &V_ITa);
|
||||
clSetKernelArg(kernel, 3, sizeof(cl_mem), &V_W);
|
||||
clSetKernelArg(kernel, 4, sizeof(int), batch.GetVertexOffsetPtr());
|
||||
clSetKernelArg(kernel, 5, sizeof(int), batch.GetTableOffsetPtr());
|
||||
clSetKernelArg(kernel, 6, sizeof(int), batch.GetStartPtr());
|
||||
clSetKernelArg(kernel, 7, sizeof(int), batch.GetEndPtr());
|
||||
clSetKernelArg(kernel, 8, sizeof(int), &ipass);
|
||||
clSetKernelArg(kernel, 4, sizeof(int), &_currentBindState.vertexDesc.offset);
|
||||
clSetKernelArg(kernel, 5, sizeof(int), &_currentBindState.varyingDesc.offset);
|
||||
clSetKernelArg(kernel, 6, sizeof(int), batch.GetVertexOffsetPtr());
|
||||
clSetKernelArg(kernel, 7, sizeof(int), batch.GetTableOffsetPtr());
|
||||
clSetKernelArg(kernel, 8, sizeof(int), batch.GetStartPtr());
|
||||
clSetKernelArg(kernel, 9, sizeof(int), batch.GetEndPtr());
|
||||
clSetKernelArg(kernel, 10, sizeof(int), &ipass);
|
||||
|
||||
ciErrNum = clEnqueueNDRangeKernel(_clQueue,
|
||||
kernel, 1, NULL, globalWorkSize,
|
||||
@ -274,20 +286,22 @@ OsdCLComputeController::ApplyCatmarkVertexVerticesKernelA2(
|
||||
cl_int ciErrNum;
|
||||
size_t globalWorkSize[1] = { (size_t)(batch.GetEnd() - batch.GetStart()) };
|
||||
int ipass = true;
|
||||
cl_kernel kernel = _currentKernelBundle->GetCatmarkVertexKernelA();
|
||||
cl_kernel kernel = _currentBindState.kernelBundle->GetCatmarkVertexKernelA();
|
||||
|
||||
cl_mem V_ITa = context->GetTable(FarSubdivisionTables::V_ITa)->GetDevicePtr();
|
||||
cl_mem V_W = context->GetTable(FarSubdivisionTables::V_W)->GetDevicePtr();
|
||||
|
||||
clSetKernelArg(kernel, 0, sizeof(cl_mem), &_currentVertexBuffer);
|
||||
clSetKernelArg(kernel, 1, sizeof(cl_mem), &_currentVaryingBuffer);
|
||||
clSetKernelArg(kernel, 0, sizeof(cl_mem), &_currentBindState.vertexBuffer);
|
||||
clSetKernelArg(kernel, 1, sizeof(cl_mem), &_currentBindState.varyingBuffer);
|
||||
clSetKernelArg(kernel, 2, sizeof(cl_mem), &V_ITa);
|
||||
clSetKernelArg(kernel, 3, sizeof(cl_mem), &V_W);
|
||||
clSetKernelArg(kernel, 4, sizeof(int), batch.GetVertexOffsetPtr());
|
||||
clSetKernelArg(kernel, 5, sizeof(int), batch.GetTableOffsetPtr());
|
||||
clSetKernelArg(kernel, 6, sizeof(int), batch.GetStartPtr());
|
||||
clSetKernelArg(kernel, 7, sizeof(int), batch.GetEndPtr());
|
||||
clSetKernelArg(kernel, 8, sizeof(int), &ipass);
|
||||
clSetKernelArg(kernel, 4, sizeof(int), &_currentBindState.vertexDesc.offset);
|
||||
clSetKernelArg(kernel, 5, sizeof(int), &_currentBindState.varyingDesc.offset);
|
||||
clSetKernelArg(kernel, 6, sizeof(int), batch.GetVertexOffsetPtr());
|
||||
clSetKernelArg(kernel, 7, sizeof(int), batch.GetTableOffsetPtr());
|
||||
clSetKernelArg(kernel, 8, sizeof(int), batch.GetStartPtr());
|
||||
clSetKernelArg(kernel, 9, sizeof(int), batch.GetEndPtr());
|
||||
clSetKernelArg(kernel, 10, sizeof(int), &ipass);
|
||||
|
||||
ciErrNum = clEnqueueNDRangeKernel(_clQueue,
|
||||
kernel, 1, NULL, globalWorkSize,
|
||||
@ -303,19 +317,21 @@ OsdCLComputeController::ApplyLoopEdgeVerticesKernel(
|
||||
|
||||
cl_int ciErrNum;
|
||||
size_t globalWorkSize[1] = { (size_t)(batch.GetEnd() - batch.GetStart()) };
|
||||
cl_kernel kernel = _currentKernelBundle->GetLoopEdgeKernel();
|
||||
cl_kernel kernel = _currentBindState.kernelBundle->GetLoopEdgeKernel();
|
||||
|
||||
cl_mem E_IT = context->GetTable(FarSubdivisionTables::E_IT)->GetDevicePtr();
|
||||
cl_mem E_W = context->GetTable(FarSubdivisionTables::E_W)->GetDevicePtr();
|
||||
|
||||
clSetKernelArg(kernel, 0, sizeof(cl_mem), &_currentVertexBuffer);
|
||||
clSetKernelArg(kernel, 1, sizeof(cl_mem), &_currentVaryingBuffer);
|
||||
clSetKernelArg(kernel, 0, sizeof(cl_mem), &_currentBindState.vertexBuffer);
|
||||
clSetKernelArg(kernel, 1, sizeof(cl_mem), &_currentBindState.varyingBuffer);
|
||||
clSetKernelArg(kernel, 2, sizeof(cl_mem), &E_IT);
|
||||
clSetKernelArg(kernel, 3, sizeof(cl_mem), &E_W);
|
||||
clSetKernelArg(kernel, 4, sizeof(int), batch.GetVertexOffsetPtr());
|
||||
clSetKernelArg(kernel, 5, sizeof(int), batch.GetTableOffsetPtr());
|
||||
clSetKernelArg(kernel, 6, sizeof(int), batch.GetStartPtr());
|
||||
clSetKernelArg(kernel, 7, sizeof(int), batch.GetEndPtr());
|
||||
clSetKernelArg(kernel, 4, sizeof(int), &_currentBindState.vertexDesc.offset);
|
||||
clSetKernelArg(kernel, 5, sizeof(int), &_currentBindState.varyingDesc.offset);
|
||||
clSetKernelArg(kernel, 6, sizeof(int), batch.GetVertexOffsetPtr());
|
||||
clSetKernelArg(kernel, 7, sizeof(int), batch.GetTableOffsetPtr());
|
||||
clSetKernelArg(kernel, 8, sizeof(int), batch.GetStartPtr());
|
||||
clSetKernelArg(kernel, 9, sizeof(int), batch.GetEndPtr());
|
||||
|
||||
ciErrNum = clEnqueueNDRangeKernel(_clQueue,
|
||||
kernel, 1, NULL, globalWorkSize,
|
||||
@ -331,21 +347,23 @@ OsdCLComputeController::ApplyLoopVertexVerticesKernelB(
|
||||
|
||||
cl_int ciErrNum;
|
||||
size_t globalWorkSize[1] = { (size_t)(batch.GetEnd() - batch.GetStart()) };
|
||||
cl_kernel kernel = _currentKernelBundle->GetLoopVertexKernelB();
|
||||
cl_kernel kernel = _currentBindState.kernelBundle->GetLoopVertexKernelB();
|
||||
|
||||
cl_mem V_ITa = context->GetTable(FarSubdivisionTables::V_ITa)->GetDevicePtr();
|
||||
cl_mem V_IT = context->GetTable(FarSubdivisionTables::V_IT)->GetDevicePtr();
|
||||
cl_mem V_W = context->GetTable(FarSubdivisionTables::V_W)->GetDevicePtr();
|
||||
|
||||
clSetKernelArg(kernel, 0, sizeof(cl_mem), &_currentVertexBuffer);
|
||||
clSetKernelArg(kernel, 1, sizeof(cl_mem), &_currentVaryingBuffer);
|
||||
clSetKernelArg(kernel, 0, sizeof(cl_mem), &_currentBindState.vertexBuffer);
|
||||
clSetKernelArg(kernel, 1, sizeof(cl_mem), &_currentBindState.varyingBuffer);
|
||||
clSetKernelArg(kernel, 2, sizeof(cl_mem), &V_ITa);
|
||||
clSetKernelArg(kernel, 3, sizeof(cl_mem), &V_IT);
|
||||
clSetKernelArg(kernel, 4, sizeof(cl_mem), &V_W);
|
||||
clSetKernelArg(kernel, 5, sizeof(int), batch.GetVertexOffsetPtr());
|
||||
clSetKernelArg(kernel, 6, sizeof(int), batch.GetTableOffsetPtr());
|
||||
clSetKernelArg(kernel, 7, sizeof(int), batch.GetStartPtr());
|
||||
clSetKernelArg(kernel, 8, sizeof(int), batch.GetEndPtr());
|
||||
clSetKernelArg(kernel, 5, sizeof(int), &_currentBindState.vertexDesc.offset);
|
||||
clSetKernelArg(kernel, 6, sizeof(int), &_currentBindState.varyingDesc.offset);
|
||||
clSetKernelArg(kernel, 7, sizeof(int), batch.GetVertexOffsetPtr());
|
||||
clSetKernelArg(kernel, 8, sizeof(int), batch.GetTableOffsetPtr());
|
||||
clSetKernelArg(kernel, 9, sizeof(int), batch.GetStartPtr());
|
||||
clSetKernelArg(kernel, 10, sizeof(int), batch.GetEndPtr());
|
||||
|
||||
ciErrNum = clEnqueueNDRangeKernel(_clQueue,
|
||||
kernel, 1, NULL, globalWorkSize,
|
||||
@ -362,20 +380,22 @@ OsdCLComputeController::ApplyLoopVertexVerticesKernelA1(
|
||||
cl_int ciErrNum;
|
||||
size_t globalWorkSize[1] = { (size_t)(batch.GetEnd() - batch.GetStart()) };
|
||||
int ipass = false;
|
||||
cl_kernel kernel = _currentKernelBundle->GetLoopVertexKernelA();
|
||||
cl_kernel kernel = _currentBindState.kernelBundle->GetLoopVertexKernelA();
|
||||
|
||||
cl_mem V_ITa = context->GetTable(FarSubdivisionTables::V_ITa)->GetDevicePtr();
|
||||
cl_mem V_W = context->GetTable(FarSubdivisionTables::V_W)->GetDevicePtr();
|
||||
|
||||
clSetKernelArg(kernel, 0, sizeof(cl_mem), &_currentVertexBuffer);
|
||||
clSetKernelArg(kernel, 1, sizeof(cl_mem), &_currentVaryingBuffer);
|
||||
clSetKernelArg(kernel, 0, sizeof(cl_mem), &_currentBindState.vertexBuffer);
|
||||
clSetKernelArg(kernel, 1, sizeof(cl_mem), &_currentBindState.varyingBuffer);
|
||||
clSetKernelArg(kernel, 2, sizeof(cl_mem), &V_ITa);
|
||||
clSetKernelArg(kernel, 3, sizeof(cl_mem), &V_W);
|
||||
clSetKernelArg(kernel, 4, sizeof(int), batch.GetVertexOffsetPtr());
|
||||
clSetKernelArg(kernel, 5, sizeof(int), batch.GetTableOffsetPtr());
|
||||
clSetKernelArg(kernel, 6, sizeof(int), batch.GetStartPtr());
|
||||
clSetKernelArg(kernel, 7, sizeof(int), batch.GetEndPtr());
|
||||
clSetKernelArg(kernel, 8, sizeof(int), &ipass);
|
||||
clSetKernelArg(kernel, 4, sizeof(int), &_currentBindState.vertexDesc.offset);
|
||||
clSetKernelArg(kernel, 5, sizeof(int), &_currentBindState.varyingDesc.offset);
|
||||
clSetKernelArg(kernel, 6, sizeof(int), batch.GetVertexOffsetPtr());
|
||||
clSetKernelArg(kernel, 7, sizeof(int), batch.GetTableOffsetPtr());
|
||||
clSetKernelArg(kernel, 8, sizeof(int), batch.GetStartPtr());
|
||||
clSetKernelArg(kernel, 9, sizeof(int), batch.GetEndPtr());
|
||||
clSetKernelArg(kernel, 10, sizeof(int), &ipass);
|
||||
|
||||
ciErrNum = clEnqueueNDRangeKernel(_clQueue,
|
||||
kernel, 1, NULL, globalWorkSize,
|
||||
@ -392,20 +412,22 @@ OsdCLComputeController::ApplyLoopVertexVerticesKernelA2(
|
||||
cl_int ciErrNum;
|
||||
size_t globalWorkSize[1] = { (size_t)(batch.GetEnd() - batch.GetStart()) };
|
||||
int ipass = true;
|
||||
cl_kernel kernel = _currentKernelBundle->GetLoopVertexKernelA();
|
||||
cl_kernel kernel = _currentBindState.kernelBundle->GetLoopVertexKernelA();
|
||||
|
||||
cl_mem V_ITa = context->GetTable(FarSubdivisionTables::V_ITa)->GetDevicePtr();
|
||||
cl_mem V_W = context->GetTable(FarSubdivisionTables::V_W)->GetDevicePtr();
|
||||
|
||||
clSetKernelArg(kernel, 0, sizeof(cl_mem), &_currentVertexBuffer);
|
||||
clSetKernelArg(kernel, 1, sizeof(cl_mem), &_currentVaryingBuffer);
|
||||
clSetKernelArg(kernel, 0, sizeof(cl_mem), &_currentBindState.vertexBuffer);
|
||||
clSetKernelArg(kernel, 1, sizeof(cl_mem), &_currentBindState.varyingBuffer);
|
||||
clSetKernelArg(kernel, 2, sizeof(cl_mem), &V_ITa);
|
||||
clSetKernelArg(kernel, 3, sizeof(cl_mem), &V_W);
|
||||
clSetKernelArg(kernel, 4, sizeof(int), batch.GetVertexOffsetPtr());
|
||||
clSetKernelArg(kernel, 5, sizeof(int), batch.GetTableOffsetPtr());
|
||||
clSetKernelArg(kernel, 6, sizeof(int), batch.GetStartPtr());
|
||||
clSetKernelArg(kernel, 7, sizeof(int), batch.GetEndPtr());
|
||||
clSetKernelArg(kernel, 8, sizeof(int), &ipass);
|
||||
clSetKernelArg(kernel, 4, sizeof(int), &_currentBindState.vertexDesc.offset);
|
||||
clSetKernelArg(kernel, 5, sizeof(int), &_currentBindState.varyingDesc.offset);
|
||||
clSetKernelArg(kernel, 6, sizeof(int), batch.GetVertexOffsetPtr());
|
||||
clSetKernelArg(kernel, 7, sizeof(int), batch.GetTableOffsetPtr());
|
||||
clSetKernelArg(kernel, 8, sizeof(int), batch.GetStartPtr());
|
||||
clSetKernelArg(kernel, 9, sizeof(int), batch.GetEndPtr());
|
||||
clSetKernelArg(kernel, 10, sizeof(int), &ipass);
|
||||
|
||||
ciErrNum = clEnqueueNDRangeKernel(_clQueue,
|
||||
kernel, 1, NULL, globalWorkSize,
|
||||
@ -434,17 +456,18 @@ OsdCLComputeController::ApplyVertexEdits(
|
||||
int primvarWidth = edit->GetPrimvarWidth();
|
||||
|
||||
if (edit->GetOperation() == FarVertexEdit::Add) {
|
||||
cl_kernel kernel = _currentKernelBundle->GetVertexEditAdd();
|
||||
cl_kernel kernel = _currentBindState.kernelBundle->GetVertexEditAdd();
|
||||
|
||||
clSetKernelArg(kernel, 0, sizeof(cl_mem), &_currentVertexBuffer);
|
||||
clSetKernelArg(kernel, 0, sizeof(cl_mem), &_currentBindState.vertexBuffer);
|
||||
clSetKernelArg(kernel, 1, sizeof(cl_mem), &indices);
|
||||
clSetKernelArg(kernel, 2, sizeof(cl_mem), &values);
|
||||
clSetKernelArg(kernel, 3, sizeof(int), &primvarOffset);
|
||||
clSetKernelArg(kernel, 4, sizeof(int), &primvarWidth);
|
||||
clSetKernelArg(kernel, 5, sizeof(int), batch.GetVertexOffsetPtr());
|
||||
clSetKernelArg(kernel, 6, sizeof(int), batch.GetTableOffsetPtr());
|
||||
clSetKernelArg(kernel, 7, sizeof(int), batch.GetStartPtr());
|
||||
clSetKernelArg(kernel, 8, sizeof(int), batch.GetEndPtr());
|
||||
clSetKernelArg(kernel, 3, sizeof(int), &_currentBindState.vertexDesc.offset);
|
||||
clSetKernelArg(kernel, 4, sizeof(int), &primvarOffset);
|
||||
clSetKernelArg(kernel, 5, sizeof(int), &primvarWidth);
|
||||
clSetKernelArg(kernel, 6, sizeof(int), batch.GetVertexOffsetPtr());
|
||||
clSetKernelArg(kernel, 7, sizeof(int), batch.GetTableOffsetPtr());
|
||||
clSetKernelArg(kernel, 8, sizeof(int), batch.GetStartPtr());
|
||||
clSetKernelArg(kernel, 9, sizeof(int), batch.GetEndPtr());
|
||||
|
||||
ciErrNum = clEnqueueNDRangeKernel(_clQueue,
|
||||
kernel, 1, NULL, globalWorkSize,
|
||||
|
@ -29,6 +29,7 @@
|
||||
|
||||
#include "../far/dispatcher.h"
|
||||
#include "../osd/clComputeContext.h"
|
||||
#include "../osd/vertexDescriptor.h"
|
||||
|
||||
#if defined(__APPLE__)
|
||||
#include <OpenCL/opencl.h>
|
||||
@ -79,15 +80,25 @@ public:
|
||||
///
|
||||
/// @param varyingBuffer varying-interpolated data buffer
|
||||
///
|
||||
/// @param vertexDesc the descriptor of vertex elements to be refined.
|
||||
/// if it's null, all primvars in the vertex buffer
|
||||
/// will be refined.
|
||||
///
|
||||
/// @param varyingDesc the descriptor of varying elements to be refined.
|
||||
/// if it's null, all primvars in the varying buffer
|
||||
/// will be refined.
|
||||
///
|
||||
template<class VERTEX_BUFFER, class VARYING_BUFFER>
|
||||
void Refine(ComputeContext const *context,
|
||||
FarKernelBatchVector const &batches,
|
||||
VERTEX_BUFFER *vertexBuffer,
|
||||
VARYING_BUFFER *varyingBuffer) {
|
||||
VARYING_BUFFER *varyingBuffer,
|
||||
OsdVertexBufferDescriptor const *vertexDesc=NULL,
|
||||
OsdVertexBufferDescriptor const *varyingDesc=NULL) {
|
||||
|
||||
if (batches.empty()) return;
|
||||
|
||||
bind(vertexBuffer, varyingBuffer);
|
||||
bind(vertexBuffer, varyingBuffer, vertexDesc, varyingDesc);
|
||||
|
||||
FarDispatcher::Refine(this, context, batches, /*maxlevel*/-1);
|
||||
|
||||
@ -152,33 +163,63 @@ protected:
|
||||
void ApplyVertexEdits(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
|
||||
OsdCLKernelBundle * getKernelBundle(int numVertexElements,
|
||||
int numVaryingElements);
|
||||
OsdCLKernelBundle * getKernelBundle(
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc);
|
||||
|
||||
template<class VERTEX_BUFFER, class VARYING_BUFFER>
|
||||
void bind(VERTEX_BUFFER *vertex, VARYING_BUFFER *varying) {
|
||||
void bind(VERTEX_BUFFER *vertex, VARYING_BUFFER *varying,
|
||||
OsdVertexBufferDescriptor const *vertexDesc,
|
||||
OsdVertexBufferDescriptor const *varyingDesc) {
|
||||
|
||||
int numVertexElements = vertex ? vertex->GetNumElements() : 0;
|
||||
int numVaryingElements = varying ? varying->GetNumElements() : 0;
|
||||
// if the vertex buffer descriptor is specified, use it.
|
||||
// otherwise, assumes the data is tightly packed in the vertex buffer.
|
||||
if (vertexDesc) {
|
||||
_currentBindState.vertexDesc = *vertexDesc;
|
||||
} else {
|
||||
int numElements = vertex ? vertex->GetNumElements() : 0;
|
||||
_currentBindState.vertexDesc = OsdVertexBufferDescriptor(
|
||||
0, numElements, numElements);
|
||||
}
|
||||
if (varyingDesc) {
|
||||
_currentBindState.varyingDesc = *varyingDesc;
|
||||
} else {
|
||||
int numElements = varying ? varying->GetNumElements() : 0;
|
||||
_currentBindState.varyingDesc = OsdVertexBufferDescriptor(
|
||||
0, numElements, numElements);
|
||||
}
|
||||
|
||||
_currentVertexBuffer = vertex ? vertex->BindCLBuffer(_clQueue) : NULL;
|
||||
_currentVaryingBuffer = varying ? varying->BindCLBuffer(_clQueue) : NULL;
|
||||
_currentKernelBundle = getKernelBundle(numVertexElements, numVaryingElements);
|
||||
_currentBindState.vertexBuffer = vertex ? vertex->BindCLBuffer(_clQueue) : 0;
|
||||
_currentBindState.varyingBuffer = varying ? varying->BindCLBuffer(_clQueue) : 0;
|
||||
_currentBindState.kernelBundle = getKernelBundle(_currentBindState.vertexDesc,
|
||||
_currentBindState.varyingDesc);
|
||||
}
|
||||
|
||||
void unbind() {
|
||||
_currentVertexBuffer = NULL;
|
||||
_currentVaryingBuffer = NULL;
|
||||
_currentKernelBundle = NULL;
|
||||
_currentBindState.Reset();
|
||||
}
|
||||
|
||||
private:
|
||||
struct BindState {
|
||||
BindState() : vertexBuffer(NULL), varyingBuffer(NULL), kernelBundle(NULL) {}
|
||||
void Reset() {
|
||||
vertexBuffer = varyingBuffer = NULL;
|
||||
vertexDesc.Reset();
|
||||
varyingDesc.Reset();
|
||||
kernelBundle = NULL;
|
||||
}
|
||||
cl_mem vertexBuffer;
|
||||
cl_mem varyingBuffer;
|
||||
OsdVertexBufferDescriptor vertexDesc;
|
||||
OsdVertexBufferDescriptor varyingDesc;
|
||||
OsdCLKernelBundle *kernelBundle;
|
||||
};
|
||||
|
||||
BindState _currentBindState;
|
||||
|
||||
cl_context _clContext;
|
||||
cl_command_queue _clQueue;
|
||||
std::vector<OsdCLKernelBundle *> _kernelRegistry;
|
||||
|
||||
cl_mem _currentVertexBuffer, _currentVaryingBuffer;
|
||||
OsdCLKernelBundle *_currentKernelBundle;
|
||||
};
|
||||
|
||||
} // end namespace OPENSUBDIV_VERSION
|
||||
|
@ -28,12 +28,12 @@
|
||||
|
||||
struct Vertex
|
||||
{
|
||||
float v[NUM_VERTEX_ELEMENTS];
|
||||
float v[VERTEX_STRIDE];
|
||||
};
|
||||
|
||||
struct Varying
|
||||
{
|
||||
float v[NUM_VARYING_ELEMENTS];
|
||||
float v[VARYING_STRIDE];
|
||||
};
|
||||
|
||||
static void clearVertex(struct Vertex *vertex) {
|
||||
@ -49,86 +49,121 @@ static void clearVarying(struct Varying *varying) {
|
||||
}
|
||||
}
|
||||
|
||||
static void addWithWeight(struct Vertex *dst, __global struct Vertex *src, float weight) {
|
||||
static void addWithWeight(struct Vertex *dst,
|
||||
__global float *srcOrigin,
|
||||
int index, float weight) {
|
||||
|
||||
for (int i = 0; i < NUM_VERTEX_ELEMENTS; i++) {
|
||||
dst->v[i] += src->v[i] * weight;
|
||||
__global float *src = srcOrigin + index * VERTEX_STRIDE;
|
||||
for (int i = 0; i < NUM_VERTEX_ELEMENTS; ++i) {
|
||||
dst->v[i] += src[i] * weight;
|
||||
}
|
||||
}
|
||||
|
||||
static void addVaryingWithWeight(struct Varying *dst, __global struct Varying *src, float weight) {
|
||||
static void addVaryingWithWeight(struct Varying *dst,
|
||||
__global float *srcOrigin,
|
||||
int index, float weight) {
|
||||
|
||||
for (int i = 0; i < NUM_VARYING_ELEMENTS; i++) {
|
||||
dst->v[i] += src->v[i] * weight;
|
||||
__global float *src = srcOrigin + index * VARYING_STRIDE;
|
||||
for (int i = 0; i < NUM_VARYING_ELEMENTS; ++i) {
|
||||
dst->v[i] += src[i] * weight;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void computeBilinearEdge(__global struct Vertex *vertex,
|
||||
__global struct Varying *varying,
|
||||
static void writeVertex(__global float *dstOrigin,
|
||||
int index,
|
||||
struct Vertex *src) {
|
||||
|
||||
__global float *dst = dstOrigin + index * VERTEX_STRIDE;
|
||||
for (int i = 0; i < NUM_VERTEX_ELEMENTS; ++i) {
|
||||
dst[i] = src->v[i];
|
||||
}
|
||||
}
|
||||
|
||||
static void writeVarying(__global float *dstOrigin,
|
||||
int index,
|
||||
struct Varying *src) {
|
||||
|
||||
__global float *dst = dstOrigin + index * VARYING_STRIDE;
|
||||
for (int i = 0; i < NUM_VARYING_ELEMENTS; ++i) {
|
||||
dst[i] = src->v[i];
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void computeBilinearEdge(__global float *vertex,
|
||||
__global float *varying,
|
||||
__global int *E_IT,
|
||||
int vertexOffset, int tableOffset,
|
||||
int vertexOffset, int varyingOffset,
|
||||
int offset, int tableOffset,
|
||||
int start, int end) {
|
||||
|
||||
int i = start + get_global_id(0) + tableOffset;
|
||||
int vid = start + get_global_id(0) + vertexOffset;
|
||||
int vid = start + get_global_id(0) + offset;
|
||||
int eidx0 = E_IT[2*i+0];
|
||||
int eidx1 = E_IT[2*i+1];
|
||||
vertex += vertexOffset;
|
||||
varying += (varying ? varyingOffset :0);
|
||||
|
||||
struct Vertex dst;
|
||||
struct Varying dstVarying;
|
||||
clearVertex(&dst);
|
||||
clearVarying(&dstVarying);
|
||||
|
||||
addWithWeight(&dst, &vertex[eidx0], 0.5f);
|
||||
addWithWeight(&dst, &vertex[eidx1], 0.5f);
|
||||
addWithWeight(&dst, vertex, eidx0, 0.5f);
|
||||
addWithWeight(&dst, vertex, eidx1, 0.5f);
|
||||
|
||||
vertex[vid] = dst;
|
||||
writeVertex(vertex, vid, &dst);
|
||||
|
||||
if (varying) {
|
||||
addVaryingWithWeight(&dstVarying, &varying[eidx0], 0.5f);
|
||||
addVaryingWithWeight(&dstVarying, &varying[eidx1], 0.5f);
|
||||
varying[vid] = dstVarying;
|
||||
addVaryingWithWeight(&dstVarying, varying, eidx0, 0.5f);
|
||||
addVaryingWithWeight(&dstVarying, varying, eidx1, 0.5f);
|
||||
writeVarying(varying, vid, &dstVarying);
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void computeBilinearVertex(__global struct Vertex *vertex,
|
||||
__global struct Varying *varying,
|
||||
__kernel void computeBilinearVertex(__global float *vertex,
|
||||
__global float *varying,
|
||||
__global int *V_ITa,
|
||||
int vertexOffset, int tableOffset,
|
||||
int vertexOffset, int varyingOffset,
|
||||
int offset, int tableOffset,
|
||||
int start, int end) {
|
||||
|
||||
int i = start + get_global_id(0) + tableOffset;
|
||||
int vid = start + get_global_id(0) + vertexOffset;
|
||||
int vid = start + get_global_id(0) + offset;
|
||||
vertex += vertexOffset;
|
||||
varying += (varying ? varyingOffset :0);
|
||||
|
||||
int p = V_ITa[i];
|
||||
|
||||
struct Vertex dst;
|
||||
clearVertex(&dst);
|
||||
addWithWeight(&dst, &vertex[p], 1.0f);
|
||||
addWithWeight(&dst, vertex, p, 1.0f);
|
||||
|
||||
vertex[vid] = dst;
|
||||
writeVertex(vertex, vid, &dst);
|
||||
|
||||
if (varying) {
|
||||
struct Varying dstVarying;
|
||||
clearVarying(&dstVarying);
|
||||
addVaryingWithWeight(&dstVarying, &varying[p], 1.0f);
|
||||
varying[vid] = dstVarying;
|
||||
addVaryingWithWeight(&dstVarying, varying, p, 1.0f);
|
||||
writeVarying(varying, vid, &dstVarying);
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
__kernel void computeFace(__global struct Vertex *vertex,
|
||||
__global struct Varying *varying,
|
||||
__kernel void computeFace(__global float *vertex,
|
||||
__global float *varying,
|
||||
__global int *F_IT,
|
||||
__global int *F_ITa,
|
||||
int vertexOffset, int tableOffset,
|
||||
int vertexOffset, int varyingOffset,
|
||||
int offset, int tableOffset,
|
||||
int start, int end) {
|
||||
|
||||
int i = start + get_global_id(0) + tableOffset;
|
||||
int vid = start + get_global_id(0) + vertexOffset;
|
||||
int vid = start + get_global_id(0) + offset;
|
||||
int h = F_ITa[2*i];
|
||||
int n = F_ITa[2*i+1];
|
||||
vertex += vertexOffset;
|
||||
varying += (varying ? varyingOffset :0);
|
||||
|
||||
float weight = 1.0f/n;
|
||||
|
||||
@ -138,26 +173,31 @@ __kernel void computeFace(__global struct Vertex *vertex,
|
||||
clearVarying(&dstVarying);
|
||||
for (int j=0; j<n; ++j) {
|
||||
int index = F_IT[h+j];
|
||||
addWithWeight(&dst, &vertex[index], weight);
|
||||
if(varying) addVaryingWithWeight(&dstVarying, &varying[index], weight);
|
||||
addWithWeight(&dst, vertex, index, weight);
|
||||
if (varying) {
|
||||
addVaryingWithWeight(&dstVarying, varying, index, weight);
|
||||
}
|
||||
}
|
||||
vertex[vid] = dst;
|
||||
if (varying) varying[vid] = dstVarying;
|
||||
writeVertex(vertex, vid, &dst);
|
||||
if (varying) writeVarying(varying, vid, &dstVarying);
|
||||
}
|
||||
|
||||
__kernel void computeEdge(__global struct Vertex *vertex,
|
||||
__global struct Varying *varying,
|
||||
__kernel void computeEdge(__global float *vertex,
|
||||
__global float *varying,
|
||||
__global int *E_IT,
|
||||
__global float *E_W,
|
||||
int vertexOffset, int tableOffset,
|
||||
int vertexOffset, int varyingOffset,
|
||||
int offset, int tableOffset,
|
||||
int start, int end) {
|
||||
|
||||
int i = start + get_global_id(0) + tableOffset;
|
||||
int vid = start + get_global_id(0) + vertexOffset;
|
||||
int vid = start + get_global_id(0) + offset;
|
||||
int eidx0 = E_IT[4*i+0];
|
||||
int eidx1 = E_IT[4*i+1];
|
||||
int eidx2 = E_IT[4*i+2];
|
||||
int eidx3 = E_IT[4*i+3];
|
||||
vertex += vertexOffset;
|
||||
varying += (varying ? varyingOffset :0);
|
||||
|
||||
float vertWeight = E_W[i*2+0];
|
||||
|
||||
@ -167,38 +207,41 @@ __kernel void computeEdge(__global struct Vertex *vertex,
|
||||
clearVertex(&dst);
|
||||
clearVarying(&dstVarying);
|
||||
|
||||
addWithWeight(&dst, &vertex[eidx0], vertWeight);
|
||||
addWithWeight(&dst, &vertex[eidx1], vertWeight);
|
||||
addWithWeight(&dst, vertex, eidx0, vertWeight);
|
||||
addWithWeight(&dst, vertex, eidx1, vertWeight);
|
||||
|
||||
if (eidx2 > -1) {
|
||||
float faceWeight = E_W[i*2+1];
|
||||
|
||||
addWithWeight(&dst, &vertex[eidx2], faceWeight);
|
||||
addWithWeight(&dst, &vertex[eidx3], faceWeight);
|
||||
addWithWeight(&dst, vertex, eidx2, faceWeight);
|
||||
addWithWeight(&dst, vertex, eidx3, faceWeight);
|
||||
}
|
||||
|
||||
vertex[vid] = dst;
|
||||
writeVertex(vertex, vid, &dst);
|
||||
|
||||
if (varying) {
|
||||
addVaryingWithWeight(&dstVarying, &varying[eidx0], 0.5f);
|
||||
addVaryingWithWeight(&dstVarying, &varying[eidx1], 0.5f);
|
||||
varying[vid] = dstVarying;
|
||||
addVaryingWithWeight(&dstVarying, varying, eidx0, 0.5f);
|
||||
addVaryingWithWeight(&dstVarying, varying, eidx1, 0.5f);
|
||||
writeVarying(varying, vid, &dstVarying);
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void computeVertexA(__global struct Vertex *vertex,
|
||||
__global struct Varying *varying,
|
||||
__kernel void computeVertexA(__global float *vertex,
|
||||
__global float *varying,
|
||||
__global int *V_ITa,
|
||||
__global float *V_W,
|
||||
int vertexOffset, int tableOffset,
|
||||
int vertexOffset, int varyingOffset,
|
||||
int offset, int tableOffset,
|
||||
int start, int end, int pass) {
|
||||
|
||||
int i = start + get_global_id(0) + tableOffset;
|
||||
int vid = start + get_global_id(0) + vertexOffset;
|
||||
int vid = start + get_global_id(0) + offset;
|
||||
int n = V_ITa[5*i+1];
|
||||
int p = V_ITa[5*i+2];
|
||||
int eidx0 = V_ITa[5*i+3];
|
||||
int eidx1 = V_ITa[5*i+4];
|
||||
vertex += vertexOffset;
|
||||
varying += (varying ? varyingOffset :0);
|
||||
|
||||
float weight = (pass==1) ? V_W[i] : 1.0f - V_W[i];
|
||||
|
||||
@ -209,41 +252,43 @@ __kernel void computeVertexA(__global struct Vertex *vertex,
|
||||
weight=1.0f-weight;
|
||||
|
||||
struct Vertex dst;
|
||||
if (! pass)
|
||||
clearVertex(&dst);
|
||||
else
|
||||
dst = vertex[vid];
|
||||
clearVertex(&dst);
|
||||
if (pass)
|
||||
addWithWeight(&dst, vertex, vid, 1.0f); // copy previous result
|
||||
|
||||
if (eidx0==-1 || (pass==0 && (n==-1)) ) {
|
||||
addWithWeight(&dst, &vertex[p], weight);
|
||||
addWithWeight(&dst, vertex, p, weight);
|
||||
} else {
|
||||
addWithWeight(&dst, &vertex[p], weight * 0.75f);
|
||||
addWithWeight(&dst, &vertex[eidx0], weight * 0.125f);
|
||||
addWithWeight(&dst, &vertex[eidx1], weight * 0.125f);
|
||||
addWithWeight(&dst, vertex, p, weight * 0.75f);
|
||||
addWithWeight(&dst, vertex, eidx0, weight * 0.125f);
|
||||
addWithWeight(&dst, vertex, eidx1, weight * 0.125f);
|
||||
}
|
||||
vertex[vid] = dst;
|
||||
writeVertex(vertex, vid, &dst);
|
||||
|
||||
if (! pass && varying) {
|
||||
struct Varying dstVarying;
|
||||
clearVarying(&dstVarying);
|
||||
addVaryingWithWeight(&dstVarying, &varying[p], 1.0f);
|
||||
varying[vid] = dstVarying;
|
||||
addVaryingWithWeight(&dstVarying, varying, p, 1.0f);
|
||||
writeVarying(varying, vid, &dstVarying);
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void computeVertexB(__global struct Vertex *vertex,
|
||||
__global struct Varying *varying,
|
||||
__kernel void computeVertexB(__global float *vertex,
|
||||
__global float *varying,
|
||||
__global int *V_ITa,
|
||||
__global int *V_IT,
|
||||
__global float *V_W,
|
||||
int vertexOffset, int tableOffset,
|
||||
int vertexOffset, int varyingOffset,
|
||||
int offset, int tableOffset,
|
||||
int start, int end) {
|
||||
|
||||
int i = start + get_global_id(0) + tableOffset;
|
||||
int vid = start + get_global_id(0) + vertexOffset;
|
||||
int vid = start + get_global_id(0) + offset;
|
||||
int h = V_ITa[5*i];
|
||||
int n = V_ITa[5*i+1];
|
||||
int p = V_ITa[5*i+2];
|
||||
vertex += vertexOffset;
|
||||
varying += (varying ? varyingOffset :0);
|
||||
|
||||
float weight = V_W[i];
|
||||
float wp = 1.0f/(float)(n*n);
|
||||
@ -252,35 +297,38 @@ __kernel void computeVertexB(__global struct Vertex *vertex,
|
||||
struct Vertex dst;
|
||||
clearVertex(&dst);
|
||||
|
||||
addWithWeight(&dst, &vertex[p], weight * wv);
|
||||
addWithWeight(&dst, vertex, p, weight * wv);
|
||||
|
||||
for (int j = 0; j < n; ++j) {
|
||||
addWithWeight(&dst, &vertex[V_IT[h+j*2]], weight * wp);
|
||||
addWithWeight(&dst, &vertex[V_IT[h+j*2+1]], weight * wp);
|
||||
addWithWeight(&dst, vertex, V_IT[h+j*2], weight * wp);
|
||||
addWithWeight(&dst, vertex, V_IT[h+j*2+1], weight * wp);
|
||||
}
|
||||
vertex[vid] = dst;
|
||||
writeVertex(vertex, vid, &dst);
|
||||
|
||||
if (varying) {
|
||||
struct Varying dstVarying;
|
||||
clearVarying(&dstVarying);
|
||||
addVaryingWithWeight(&dstVarying, &varying[p], 1.0f);
|
||||
varying[vid] = dstVarying;
|
||||
addVaryingWithWeight(&dstVarying, varying, p, 1.0f);
|
||||
writeVarying(varying, vid, &dstVarying);
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void computeLoopVertexB(__global struct Vertex *vertex,
|
||||
__global struct Varying *varying,
|
||||
__kernel void computeLoopVertexB(__global float *vertex,
|
||||
__global float *varying,
|
||||
__global int *V_ITa,
|
||||
__global int *V_IT,
|
||||
__global float *V_W,
|
||||
int vertexOffset, int tableOffset,
|
||||
int vertexOffset, int varyingOffset,
|
||||
int offset, int tableOffset,
|
||||
int start, int end) {
|
||||
|
||||
int i = start + get_global_id(0) + tableOffset;
|
||||
int vid = start + get_global_id(0) + vertexOffset;
|
||||
int vid = start + get_global_id(0) + offset;
|
||||
int h = V_ITa[5*i];
|
||||
int n = V_ITa[5*i+1];
|
||||
int p = V_ITa[5*i+2];
|
||||
vertex += vertexOffset;
|
||||
varying += (varying ? varyingOffset :0);
|
||||
|
||||
float weight = V_W[i];
|
||||
float wp = 1.0f/(float)(n);
|
||||
@ -290,36 +338,37 @@ __kernel void computeLoopVertexB(__global struct Vertex *vertex,
|
||||
|
||||
struct Vertex dst;
|
||||
clearVertex(&dst);
|
||||
addWithWeight(&dst, &vertex[p], weight * (1.0f - (beta * n)));
|
||||
addWithWeight(&dst, vertex, p, weight * (1.0f - (beta * n)));
|
||||
|
||||
for (int j = 0; j < n; ++j) {
|
||||
addWithWeight(&dst, &vertex[V_IT[h+j]], weight * beta);
|
||||
addWithWeight(&dst, vertex, V_IT[h+j], weight * beta);
|
||||
}
|
||||
vertex[vid] = dst;
|
||||
writeVertex(vertex, vid, &dst);
|
||||
|
||||
if (varying) {
|
||||
struct Varying dstVarying;
|
||||
clearVarying(&dstVarying);
|
||||
addVaryingWithWeight(&dstVarying, &varying[p], 1.0f);
|
||||
varying[vid] = dstVarying;
|
||||
addVaryingWithWeight(&dstVarying, varying, p, 1.0f);
|
||||
writeVarying(varying, vid, &dstVarying);
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void editVertexAdd(__global struct Vertex *vertex,
|
||||
__kernel void editVertexAdd(__global float *vertex,
|
||||
__global int *editIndices,
|
||||
__global float *editValues,
|
||||
int vertexOffset,
|
||||
int primVarOffset,
|
||||
int primVarWidth,
|
||||
int vertexOffset, int tableOffset,
|
||||
int offset, int tableOffset,
|
||||
int start, int end) {
|
||||
|
||||
int i = start + get_global_id(0) + tableOffset;
|
||||
int v = editIndices[i];
|
||||
int eid = start + get_global_id(0);
|
||||
struct Vertex dst = vertex[v];
|
||||
vertex += vertexOffset;
|
||||
vertex += v * VERTEX_STRIDE + primVarOffset;
|
||||
|
||||
for (int j = 0; j < primVarWidth; ++j) {
|
||||
dst.v[j+primVarOffset] += editValues[eid*primVarWidth + j];
|
||||
vertex[j] += editValues[eid*primVarWidth + j];
|
||||
}
|
||||
vertex[v] = dst;
|
||||
}
|
||||
|
@ -28,6 +28,8 @@
|
||||
#include "../osd/error.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <sstream>
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#define snprintf _snprintf
|
||||
#endif
|
||||
@ -54,8 +56,11 @@ OsdCLKernelBundle::OsdCLKernelBundle() :
|
||||
_clCatmarkVertexB(NULL),
|
||||
_clLoopEdge(NULL),
|
||||
_clLoopVertexA(NULL),
|
||||
_clLoopVertexB(NULL)
|
||||
{
|
||||
_clLoopVertexB(NULL),
|
||||
_numVertexElements(0),
|
||||
_vertexStride(0),
|
||||
_numVaryingElements(0),
|
||||
_varyingStride(0) {
|
||||
}
|
||||
|
||||
OsdCLKernelBundle::~OsdCLKernelBundle() {
|
||||
@ -97,19 +102,24 @@ static cl_kernel buildKernel(cl_program prog, const char * name) {
|
||||
|
||||
bool
|
||||
OsdCLKernelBundle::Compile(cl_context clContext,
|
||||
int numVertexElements, int numVaryingElements) {
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc) {
|
||||
|
||||
cl_int ciErrNum;
|
||||
|
||||
_vdesc.Set( numVertexElements, numVaryingElements );
|
||||
_numVertexElements = vertexDesc.length;
|
||||
_vertexStride = vertexDesc.stride;
|
||||
_numVaryingElements = varyingDesc.length;
|
||||
_varyingStride = varyingDesc.stride;
|
||||
|
||||
char constantDefine[256];
|
||||
snprintf(constantDefine, sizeof(constantDefine),
|
||||
"#define NUM_VERTEX_ELEMENTS %d\n"
|
||||
"#define NUM_VARYING_ELEMENTS %d\n",
|
||||
numVertexElements, numVaryingElements);
|
||||
std::ostringstream defines;
|
||||
defines << "#define NUM_VERTEX_ELEMENTS " << _numVertexElements << "\n"
|
||||
<< "#define VERTEX_STRIDE " << _vertexStride << "\n"
|
||||
<< "#define NUM_VARYING_ELEMENTS " << _numVaryingElements << "\n"
|
||||
<< "#define VARYING_STRIDE " << _varyingStride << "\n";
|
||||
std::string defineStr = defines.str();
|
||||
|
||||
const char *sources[] = { constantDefine, clSource };
|
||||
const char *sources[] = { defineStr.c_str(), clSource };
|
||||
|
||||
_clProgram = clCreateProgramWithSource(clContext, 2, sources, 0, &ciErrNum);
|
||||
CL_CHECK_ERROR(ciErrNum, "clCreateProgramWithSource\n");
|
||||
@ -131,6 +141,7 @@ OsdCLKernelBundle::Compile(cl_context clContext,
|
||||
OsdError(OSD_CL_PROGRAM_BUILD_ERROR, cBuildLog);
|
||||
}
|
||||
delete[] devices;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -47,7 +47,8 @@ public:
|
||||
~OsdCLKernelBundle();
|
||||
|
||||
bool Compile(cl_context clContext,
|
||||
int numVertexElements, int numVaryingElements);
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc);
|
||||
|
||||
cl_kernel GetBilinearEdgeKernel() const { return _clBilinearEdge; }
|
||||
|
||||
@ -70,17 +71,23 @@ public:
|
||||
cl_kernel GetVertexEditAdd() const { return _clVertexEditAdd; }
|
||||
|
||||
struct Match {
|
||||
|
||||
/// Constructor
|
||||
Match(int numVertexElements, int numVaryingElements)
|
||||
: vdesc(numVertexElements, numVaryingElements) {
|
||||
Match(OsdVertexBufferDescriptor const &vertex,
|
||||
OsdVertexBufferDescriptor const &varying)
|
||||
: vertexDesc(vertex), varyingDesc(varying) {
|
||||
}
|
||||
|
||||
|
||||
bool operator() (OsdCLKernelBundle const *kernel) {
|
||||
return vdesc == kernel->_vdesc;
|
||||
// offset is dynamic. just comparing length and stride here,
|
||||
// returns true if they are equal
|
||||
return (vertexDesc.length == kernel->_numVertexElements and
|
||||
vertexDesc.stride == kernel->_vertexStride and
|
||||
varyingDesc.length == kernel->_numVaryingElements and
|
||||
varyingDesc.stride == kernel->_varyingStride);
|
||||
}
|
||||
|
||||
OsdVertexDescriptor vdesc;
|
||||
|
||||
OsdVertexBufferDescriptor vertexDesc;
|
||||
OsdVertexBufferDescriptor varyingDesc;
|
||||
};
|
||||
|
||||
friend struct Match;
|
||||
@ -99,7 +106,10 @@ protected:
|
||||
_clLoopVertexB,
|
||||
_clVertexEditAdd;
|
||||
|
||||
OsdVertexDescriptor _vdesc;
|
||||
int _numVertexElements;
|
||||
int _vertexStride;
|
||||
int _numVaryingElements;
|
||||
int _varyingStride;
|
||||
};
|
||||
|
||||
} // end namespace OPENSUBDIV_VERSION
|
||||
|
@ -30,8 +30,7 @@ namespace OpenSubdiv {
|
||||
namespace OPENSUBDIV_VERSION {
|
||||
|
||||
|
||||
OsdCpuComputeController::OsdCpuComputeController() :
|
||||
_currentVertexBuffer(NULL), _currentVaryingBuffer(NULL) {
|
||||
OsdCpuComputeController::OsdCpuComputeController() {
|
||||
}
|
||||
|
||||
OsdCpuComputeController::~OsdCpuComputeController() {
|
||||
@ -44,7 +43,8 @@ OsdCpuComputeController::ApplyBilinearFaceVerticesKernel(
|
||||
assert(context);
|
||||
|
||||
OsdCpuComputeFace(
|
||||
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
|
||||
(const int*)context->GetTable(FarSubdivisionTables::F_IT)->GetBuffer(),
|
||||
(const int*)context->GetTable(FarSubdivisionTables::F_ITa)->GetBuffer(),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
@ -57,7 +57,8 @@ OsdCpuComputeController::ApplyBilinearEdgeVerticesKernel(
|
||||
assert(context);
|
||||
|
||||
OsdCpuComputeBilinearEdge(
|
||||
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
|
||||
(const int*)context->GetTable(FarSubdivisionTables::E_IT)->GetBuffer(),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
}
|
||||
@ -69,7 +70,8 @@ OsdCpuComputeController::ApplyBilinearVertexVerticesKernel(
|
||||
assert(context);
|
||||
|
||||
OsdCpuComputeBilinearVertex(
|
||||
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
|
||||
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
}
|
||||
@ -81,7 +83,8 @@ OsdCpuComputeController::ApplyCatmarkFaceVerticesKernel(
|
||||
assert(context);
|
||||
|
||||
OsdCpuComputeFace(
|
||||
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
|
||||
(const int*)context->GetTable(FarSubdivisionTables::F_IT)->GetBuffer(),
|
||||
(const int*)context->GetTable(FarSubdivisionTables::F_ITa)->GetBuffer(),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
@ -94,7 +97,8 @@ OsdCpuComputeController::ApplyCatmarkEdgeVerticesKernel(
|
||||
assert(context);
|
||||
|
||||
OsdCpuComputeEdge(
|
||||
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
|
||||
(const int*)context->GetTable(FarSubdivisionTables::E_IT)->GetBuffer(),
|
||||
(const float*)context->GetTable(FarSubdivisionTables::E_W)->GetBuffer(),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
@ -107,7 +111,8 @@ OsdCpuComputeController::ApplyCatmarkVertexVerticesKernelB(
|
||||
assert(context);
|
||||
|
||||
OsdCpuComputeVertexB(
|
||||
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
|
||||
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
|
||||
(const int*)context->GetTable(FarSubdivisionTables::V_IT)->GetBuffer(),
|
||||
(const float*)context->GetTable(FarSubdivisionTables::V_W)->GetBuffer(),
|
||||
@ -121,7 +126,8 @@ OsdCpuComputeController::ApplyCatmarkVertexVerticesKernelA1(
|
||||
assert(context);
|
||||
|
||||
OsdCpuComputeVertexA(
|
||||
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
|
||||
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
|
||||
(const float*)context->GetTable(FarSubdivisionTables::V_W)->GetBuffer(),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(), false);
|
||||
@ -134,7 +140,8 @@ OsdCpuComputeController::ApplyCatmarkVertexVerticesKernelA2(
|
||||
assert(context);
|
||||
|
||||
OsdCpuComputeVertexA(
|
||||
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
|
||||
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
|
||||
(const float*)context->GetTable(FarSubdivisionTables::V_W)->GetBuffer(),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(), true);
|
||||
@ -147,7 +154,8 @@ OsdCpuComputeController::ApplyLoopEdgeVerticesKernel(
|
||||
assert(context);
|
||||
|
||||
OsdCpuComputeEdge(
|
||||
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
|
||||
(const int*)context->GetTable(FarSubdivisionTables::E_IT)->GetBuffer(),
|
||||
(const float*)context->GetTable(FarSubdivisionTables::E_W)->GetBuffer(),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
@ -160,7 +168,8 @@ OsdCpuComputeController::ApplyLoopVertexVerticesKernelB(
|
||||
assert(context);
|
||||
|
||||
OsdCpuComputeLoopVertexB(
|
||||
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
|
||||
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
|
||||
(const int*)context->GetTable(FarSubdivisionTables::V_IT)->GetBuffer(),
|
||||
(const float*)context->GetTable(FarSubdivisionTables::V_W)->GetBuffer(),
|
||||
@ -174,7 +183,8 @@ OsdCpuComputeController::ApplyLoopVertexVerticesKernelA1(
|
||||
assert(context);
|
||||
|
||||
OsdCpuComputeVertexA(
|
||||
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
|
||||
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
|
||||
(const float*)context->GetTable(FarSubdivisionTables::V_W)->GetBuffer(),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(), false);
|
||||
@ -187,7 +197,8 @@ OsdCpuComputeController::ApplyLoopVertexVerticesKernelA2(
|
||||
assert(context);
|
||||
|
||||
OsdCpuComputeVertexA(
|
||||
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
|
||||
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
|
||||
(const float*)context->GetTable(FarSubdivisionTables::V_W)->GetBuffer(),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(), true);
|
||||
@ -206,24 +217,24 @@ OsdCpuComputeController::ApplyVertexEdits(
|
||||
const OsdCpuTable * editValues = edit->GetEditValues();
|
||||
|
||||
if (edit->GetOperation() == FarVertexEdit::Add) {
|
||||
OsdCpuEditVertexAdd(_vdesc,
|
||||
_currentVertexBuffer,
|
||||
OsdCpuEditVertexAdd(_currentBindState.vertexBuffer,
|
||||
_currentBindState.vertexDesc,
|
||||
edit->GetPrimvarOffset(),
|
||||
edit->GetPrimvarWidth(),
|
||||
batch.GetVertexOffset(),
|
||||
batch.GetTableOffset(),
|
||||
batch.GetStart(),
|
||||
batch.GetVertexOffset(),
|
||||
batch.GetTableOffset(),
|
||||
batch.GetStart(),
|
||||
batch.GetEnd(),
|
||||
static_cast<unsigned int*>(primvarIndices->GetBuffer()),
|
||||
static_cast<float*>(editValues->GetBuffer()));
|
||||
} else if (edit->GetOperation() == FarVertexEdit::Set) {
|
||||
OsdCpuEditVertexSet(_vdesc,
|
||||
_currentVertexBuffer,
|
||||
OsdCpuEditVertexSet(_currentBindState.vertexBuffer,
|
||||
_currentBindState.vertexDesc,
|
||||
edit->GetPrimvarOffset(),
|
||||
edit->GetPrimvarWidth(),
|
||||
batch.GetVertexOffset(),
|
||||
batch.GetTableOffset(),
|
||||
batch.GetStart(),
|
||||
batch.GetVertexOffset(),
|
||||
batch.GetTableOffset(),
|
||||
batch.GetStart(),
|
||||
batch.GetEnd(),
|
||||
static_cast<unsigned int*>(primvarIndices->GetBuffer()),
|
||||
static_cast<float*>(editValues->GetBuffer()));
|
||||
|
@ -29,6 +29,7 @@
|
||||
|
||||
#include "../far/dispatcher.h"
|
||||
#include "../osd/cpuComputeContext.h"
|
||||
#include "../osd/vertexDescriptor.h"
|
||||
|
||||
namespace OpenSubdiv {
|
||||
namespace OPENSUBDIV_VERSION {
|
||||
@ -64,15 +65,25 @@ public:
|
||||
///
|
||||
/// @param varyingBuffer varying-interpolated data buffer
|
||||
///
|
||||
/// @param vertexDesc the descriptor of vertex elements to be refined.
|
||||
/// if it's null, all primvars in the vertex buffer
|
||||
/// will be refined.
|
||||
///
|
||||
/// @param varyingDesc the descriptor of varying elements to be refined.
|
||||
/// if it's null, all primvars in the varying buffer
|
||||
/// will be refined.
|
||||
///
|
||||
template<class VERTEX_BUFFER, class VARYING_BUFFER>
|
||||
void Refine(OsdCpuComputeContext const *context,
|
||||
FarKernelBatchVector const & batches,
|
||||
VERTEX_BUFFER *vertexBuffer,
|
||||
VARYING_BUFFER *varyingBuffer) {
|
||||
VARYING_BUFFER *varyingBuffer,
|
||||
OsdVertexBufferDescriptor const *vertexDesc=NULL,
|
||||
OsdVertexBufferDescriptor const *varyingDesc=NULL) {
|
||||
|
||||
if (batches.empty()) return;
|
||||
|
||||
bind(vertexBuffer, varyingBuffer);
|
||||
bind(vertexBuffer, varyingBuffer, vertexDesc, varyingDesc);
|
||||
|
||||
FarDispatcher::Refine(this, context, batches, /*maxlevel*/-1);
|
||||
|
||||
@ -130,25 +141,62 @@ protected:
|
||||
void ApplyVertexEdits(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
template<class VERTEX_BUFFER, class VARYING_BUFFER>
|
||||
void bind(VERTEX_BUFFER *vertex, VARYING_BUFFER *varying) {
|
||||
void bind(VERTEX_BUFFER *vertex, VARYING_BUFFER *varying,
|
||||
OsdVertexBufferDescriptor const *vertexDesc,
|
||||
OsdVertexBufferDescriptor const *varyingDesc) {
|
||||
|
||||
_currentVertexBuffer = vertex ? vertex->BindCpuBuffer() : 0;
|
||||
_currentVaryingBuffer = varying ? varying->BindCpuBuffer() : 0;
|
||||
// if the vertex buffer descriptor is specified, use it.
|
||||
// otherwise, assumes the data is tightly packed in the vertex buffer.
|
||||
if (vertexDesc) {
|
||||
_currentBindState.vertexDesc = *vertexDesc;
|
||||
} else {
|
||||
int numElements = vertex ? vertex->GetNumElements() : 0;
|
||||
_currentBindState.vertexDesc = OsdVertexBufferDescriptor(
|
||||
0, numElements, numElements);
|
||||
}
|
||||
if (varyingDesc) {
|
||||
_currentBindState.varyingDesc = *varyingDesc;
|
||||
} else {
|
||||
int numElements = varying ? varying->GetNumElements() : 0;
|
||||
_currentBindState.varyingDesc = OsdVertexBufferDescriptor(
|
||||
0, numElements, numElements);
|
||||
}
|
||||
|
||||
int numVertexElements = vertex ? vertex->GetNumElements() : 0;
|
||||
int numVaryingElements = varying ? varying->GetNumElements() : 0;
|
||||
_vdesc.Set(numVertexElements, numVaryingElements);
|
||||
// apply vertex offset here
|
||||
if (vertex) {
|
||||
_currentBindState.vertexBuffer =
|
||||
vertex->BindCpuBuffer() + _currentBindState.vertexDesc.offset;
|
||||
} else {
|
||||
_currentBindState.vertexBuffer = NULL;
|
||||
}
|
||||
if (varying) {
|
||||
_currentBindState.varyingBuffer =
|
||||
varying->BindCpuBuffer() + _currentBindState.varyingDesc.offset;
|
||||
} else {
|
||||
_currentBindState.varyingBuffer = NULL;
|
||||
}
|
||||
}
|
||||
void unbind() {
|
||||
_currentVertexBuffer = 0;
|
||||
_currentVaryingBuffer = 0;
|
||||
_vdesc.Reset();
|
||||
_currentBindState.Reset();
|
||||
}
|
||||
|
||||
private:
|
||||
float *_currentVertexBuffer, *_currentVaryingBuffer;
|
||||
OsdVertexDescriptor _vdesc;
|
||||
// Bind state is a transitional state during refinement.
|
||||
// It doesn't take an ownership of vertex buffers.
|
||||
struct BindState {
|
||||
BindState() : vertexBuffer(NULL), varyingBuffer(NULL) {}
|
||||
void Reset() {
|
||||
vertexBuffer = varyingBuffer = NULL;
|
||||
vertexDesc.Reset();
|
||||
varyingDesc.Reset();
|
||||
}
|
||||
float *vertexBuffer;
|
||||
float *varyingBuffer;
|
||||
OsdVertexBufferDescriptor vertexDesc;
|
||||
OsdVertexBufferDescriptor varyingDesc;
|
||||
};
|
||||
|
||||
BindState _currentBindState;
|
||||
};
|
||||
|
||||
} // end namespace OPENSUBDIV_VERSION
|
||||
|
283
opensubdiv/osd/cpuKernel.cpp
Normal file → Executable file
283
opensubdiv/osd/cpuKernel.cpp
Normal file → Executable file
@ -25,54 +25,101 @@
|
||||
#include "../osd/cpuKernel.h"
|
||||
#include "../osd/vertexDescriptor.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <cstdlib>
|
||||
|
||||
namespace OpenSubdiv {
|
||||
namespace OPENSUBDIV_VERSION {
|
||||
|
||||
static inline void
|
||||
clear(float *dst, OsdVertexBufferDescriptor const &desc) {
|
||||
|
||||
memset(dst, 0, desc.length*sizeof(float));
|
||||
}
|
||||
|
||||
static inline void
|
||||
addWithWeight(float *dst, const float *srcOrigin, int srcIndex, float weight,
|
||||
OsdVertexBufferDescriptor const &desc) {
|
||||
|
||||
if (srcOrigin && dst) {
|
||||
const float *src = srcOrigin + srcIndex * desc.stride;
|
||||
for (int k = 0; k < desc.length; ++k) {
|
||||
dst[k] += src[k] * weight;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
copy(float *dstOrigin, const float *src, int dstIndex,
|
||||
OsdVertexBufferDescriptor const &desc) {
|
||||
|
||||
if (dstOrigin && src) {
|
||||
float *dst = dstOrigin + dstIndex * desc.stride;
|
||||
memcpy(dst, src, desc.length*sizeof(float));
|
||||
}
|
||||
}
|
||||
|
||||
void OsdCpuComputeFace(
|
||||
OsdVertexDescriptor const &vdesc, float * vertex, float * varying,
|
||||
float * vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *F_IT, const int *F_ITa, int vertexOffset, int tableOffset,
|
||||
int start, int end) {
|
||||
if(vdesc.numVertexElements == 4 && varying == NULL) {
|
||||
if(vertexDesc == OsdVertexBufferDescriptor(0, 4, 4) && varying == NULL) {
|
||||
ComputeFaceKernel<4>
|
||||
(vertex, F_IT, F_ITa, vertexOffset, tableOffset, start, end);
|
||||
} else if(vdesc.numVertexElements == 8 && varying == NULL) {
|
||||
} else if(vertexDesc == OsdVertexBufferDescriptor(0, 8, 8) && varying == NULL) {
|
||||
ComputeFaceKernel<8>
|
||||
(vertex, F_IT, F_ITa, vertexOffset, tableOffset, start, end);
|
||||
}
|
||||
else {
|
||||
float *vertexResults = (float*)alloca(vertexDesc.length * sizeof(float));
|
||||
float *varyingResults = (float*)alloca(varyingDesc.length * sizeof(float));
|
||||
|
||||
for (int i = start + tableOffset; i < end + tableOffset; i++) {
|
||||
int h = F_ITa[2*i];
|
||||
int n = F_ITa[2*i+1];
|
||||
|
||||
float weight = 1.0f/n;
|
||||
|
||||
// XXX: should use local vertex struct variable instead of
|
||||
// accumulating directly into global memory.
|
||||
int dstIndex = i + vertexOffset - tableOffset;
|
||||
vdesc.Clear(vertex, varying, dstIndex);
|
||||
|
||||
// clear
|
||||
clear(vertexResults, vertexDesc);
|
||||
clear(varyingResults, varyingDesc);
|
||||
|
||||
// accum
|
||||
for (int j = 0; j < n; ++j) {
|
||||
int index = F_IT[h+j];
|
||||
vdesc.AddWithWeight(vertex, dstIndex, index, weight);
|
||||
vdesc.AddVaryingWithWeight(varying, dstIndex, index, weight);
|
||||
addWithWeight(vertexResults, vertex, index, weight, vertexDesc);
|
||||
addWithWeight(varyingResults, varying, index, weight, varyingDesc);
|
||||
}
|
||||
}
|
||||
|
||||
// write results
|
||||
copy(vertex, vertexResults, dstIndex, vertexDesc);
|
||||
copy(varying, varyingResults, dstIndex, varyingDesc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void OsdCpuComputeEdge(
|
||||
OsdVertexDescriptor const &vdesc, float *vertex, float *varying,
|
||||
float *vertex, float *varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *E_IT, const float *E_W, int vertexOffset, int tableOffset,
|
||||
int start, int end) {
|
||||
if(vdesc.numVertexElements == 4 && varying == NULL) {
|
||||
if(vertexDesc == OsdVertexBufferDescriptor(0, 4, 4) && varying == NULL) {
|
||||
ComputeEdgeKernel<4>(vertex, E_IT, E_W, vertexOffset, tableOffset,
|
||||
start, end);
|
||||
}
|
||||
else if(vdesc.numVertexElements == 8 && varying == NULL) {
|
||||
else if(vertexDesc == OsdVertexBufferDescriptor(0, 8, 8) && varying == NULL) {
|
||||
ComputeEdgeKernel<8>(vertex, E_IT, E_W, vertexOffset, tableOffset,
|
||||
start, end);
|
||||
start, end);
|
||||
}
|
||||
else {
|
||||
float *vertexResults = (float*)alloca(vertexDesc.length * sizeof(float));
|
||||
float *varyingResults = (float*)alloca(varyingDesc.length * sizeof(float));
|
||||
|
||||
for (int i = start + tableOffset; i < end + tableOffset; i++) {
|
||||
int eidx0 = E_IT[4*i+0];
|
||||
int eidx1 = E_IT[4*i+1];
|
||||
@ -82,37 +129,46 @@ void OsdCpuComputeEdge(
|
||||
float vertWeight = E_W[i*2+0];
|
||||
|
||||
int dstIndex = i + vertexOffset - tableOffset;
|
||||
vdesc.Clear(vertex, varying, dstIndex);
|
||||
clear(vertexResults, vertexDesc);
|
||||
clear(varyingResults, varyingDesc);
|
||||
|
||||
vdesc.AddWithWeight(vertex, dstIndex, eidx0, vertWeight);
|
||||
vdesc.AddWithWeight(vertex, dstIndex, eidx1, vertWeight);
|
||||
addWithWeight(vertexResults, vertex, eidx0, vertWeight, vertexDesc);
|
||||
addWithWeight(vertexResults, vertex, eidx1, vertWeight, vertexDesc);
|
||||
|
||||
if (eidx2 != -1) {
|
||||
float faceWeight = E_W[i*2+1];
|
||||
|
||||
vdesc.AddWithWeight(vertex, dstIndex, eidx2, faceWeight);
|
||||
vdesc.AddWithWeight(vertex, dstIndex, eidx3, faceWeight);
|
||||
addWithWeight(vertexResults, vertex, eidx2, faceWeight, vertexDesc);
|
||||
addWithWeight(vertexResults, vertex, eidx3, faceWeight, vertexDesc);
|
||||
}
|
||||
|
||||
vdesc.AddVaryingWithWeight(varying, dstIndex, eidx0, 0.5f);
|
||||
vdesc.AddVaryingWithWeight(varying, dstIndex, eidx1, 0.5f);
|
||||
}
|
||||
addWithWeight(varyingResults, varying, eidx0, 0.5f, varyingDesc);
|
||||
addWithWeight(varyingResults, varying, eidx1, 0.5f, varyingDesc);
|
||||
|
||||
copy(vertex, vertexResults, dstIndex, vertexDesc);
|
||||
copy(varying, varyingResults, dstIndex, varyingDesc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void OsdCpuComputeVertexA(
|
||||
OsdVertexDescriptor const &vdesc, float *vertex, float *varying,
|
||||
float *vertex, float *varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *V_ITa, const float *V_W, int vertexOffset, int tableOffset,
|
||||
int start, int end, int pass) {
|
||||
if(vdesc.numVertexElements == 4 && varying == NULL) {
|
||||
if(vertexDesc == OsdVertexBufferDescriptor(0, 4, 4) && varying == NULL) {
|
||||
ComputeVertexAKernel<4>(vertex, V_ITa, V_W, vertexOffset, tableOffset,
|
||||
start, end, pass);
|
||||
}
|
||||
else if (vdesc.numVertexElements == 8 && varying == NULL) {
|
||||
else if(vertexDesc == OsdVertexBufferDescriptor(0, 8, 8) && varying == NULL) {
|
||||
ComputeVertexAKernel<8>(vertex, V_ITa, V_W, vertexOffset, tableOffset,
|
||||
start, end, pass);
|
||||
}
|
||||
}
|
||||
else {
|
||||
float *vertexResults = (float*)alloca(vertexDesc.length * sizeof(float));
|
||||
float *varyingResults = (float*)alloca(varyingDesc.length * sizeof(float));
|
||||
|
||||
for (int i = start + tableOffset; i < end + tableOffset; i++) {
|
||||
int n = V_ITa[5*i+1];
|
||||
int p = V_ITa[5*i+2];
|
||||
@ -129,36 +185,48 @@ void OsdCpuComputeVertexA(
|
||||
|
||||
int dstIndex = i + vertexOffset - tableOffset;
|
||||
|
||||
if (not pass)
|
||||
vdesc.Clear(vertex, varying, dstIndex);
|
||||
|
||||
if (eidx0 == -1 || (pass == 0 && (n == -1))) {
|
||||
vdesc.AddWithWeight(vertex, dstIndex, p, weight);
|
||||
} else {
|
||||
vdesc.AddWithWeight(vertex, dstIndex, p, weight * 0.75f);
|
||||
vdesc.AddWithWeight(vertex, dstIndex, eidx0, weight * 0.125f);
|
||||
vdesc.AddWithWeight(vertex, dstIndex, eidx1, weight * 0.125f);
|
||||
clear(vertexResults, vertexDesc);
|
||||
clear(varyingResults, varyingDesc);
|
||||
if (pass) {
|
||||
// copy previous results
|
||||
addWithWeight(vertexResults, vertex, dstIndex, 1.0f, vertexDesc);
|
||||
}
|
||||
|
||||
if (not pass)
|
||||
vdesc.AddVaryingWithWeight(varying, dstIndex, p, 1.0f);
|
||||
}
|
||||
if (eidx0 == -1 || (pass == 0 && (n == -1))) {
|
||||
addWithWeight(vertexResults, vertex, p, weight, vertexDesc);
|
||||
} else {
|
||||
addWithWeight(vertexResults, vertex, p, weight * 0.75f, vertexDesc);
|
||||
addWithWeight(vertexResults, vertex, eidx0, weight * 0.125f, vertexDesc);
|
||||
addWithWeight(vertexResults, vertex, eidx1, weight * 0.125f, vertexDesc);
|
||||
}
|
||||
|
||||
copy(vertex, vertexResults, dstIndex, vertexDesc);
|
||||
if (not pass) {
|
||||
addWithWeight(varyingResults, varying, p, 1.0f, varyingDesc);
|
||||
copy(varying, varyingResults, dstIndex, varyingDesc);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void OsdCpuComputeVertexB(
|
||||
OsdVertexDescriptor const &vdesc, float *vertex, float *varying,
|
||||
float *vertex, float *varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *V_ITa, const int *V_IT, const float *V_W,
|
||||
int vertexOffset, int tableOffset, int start, int end) {
|
||||
if(vdesc.numVertexElements == 4 && varying == NULL) {
|
||||
if(vertexDesc == OsdVertexBufferDescriptor(0, 4, 4) && varying == NULL) {
|
||||
ComputeVertexBKernel<4>(vertex, V_ITa, V_IT, V_W,
|
||||
vertexOffset, tableOffset, start, end);
|
||||
}
|
||||
else if(vdesc.numVertexElements == 8 && varying == NULL) {
|
||||
else if(vertexDesc == OsdVertexBufferDescriptor(0, 8, 8) && varying == NULL) {
|
||||
ComputeVertexBKernel<8>(vertex, V_ITa, V_IT, V_W,
|
||||
vertexOffset, tableOffset, start, end);
|
||||
}
|
||||
}
|
||||
else {
|
||||
float *vertexResults = (float*)alloca(vertexDesc.length * sizeof(float));
|
||||
float *varyingResults = (float*)alloca(varyingDesc.length * sizeof(float));
|
||||
|
||||
for (int i = start + tableOffset; i < end + tableOffset; i++) {
|
||||
int h = V_ITa[5*i];
|
||||
int n = V_ITa[5*i+1];
|
||||
@ -169,32 +237,41 @@ void OsdCpuComputeVertexB(
|
||||
float wv = (n-2.0f) * n * wp;
|
||||
|
||||
int dstIndex = i + vertexOffset - tableOffset;
|
||||
vdesc.Clear(vertex, varying, dstIndex);
|
||||
clear(vertexResults, vertexDesc);
|
||||
clear(varyingResults, varyingDesc);
|
||||
|
||||
vdesc.AddWithWeight(vertex, dstIndex, p, weight * wv);
|
||||
addWithWeight(vertexResults, vertex, p, weight * wv, vertexDesc);
|
||||
|
||||
for (int j = 0; j < n; ++j) {
|
||||
vdesc.AddWithWeight(vertex, dstIndex, V_IT[h+j*2], weight * wp);
|
||||
vdesc.AddWithWeight(vertex, dstIndex, V_IT[h+j*2+1], weight * wp);
|
||||
addWithWeight(vertexResults, vertex, V_IT[h+j*2], weight * wp, vertexDesc);
|
||||
addWithWeight(vertexResults, vertex, V_IT[h+j*2+1], weight * wp, vertexDesc);
|
||||
}
|
||||
vdesc.AddVaryingWithWeight(varying, dstIndex, p, 1.0f);
|
||||
addWithWeight(varyingResults, varying, p, 1.0f, varyingDesc);
|
||||
|
||||
copy(vertex, vertexResults, dstIndex, vertexDesc);
|
||||
copy(varying, varyingResults, dstIndex, varyingDesc);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void OsdCpuComputeLoopVertexB(
|
||||
OsdVertexDescriptor const &vdesc, float *vertex, float *varying,
|
||||
float *vertex, float *varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *V_ITa, const int *V_IT, const float *V_W,
|
||||
int vertexOffset, int tableOffset, int start, int end) {
|
||||
if(vdesc.numVertexElements == 4 && varying == NULL) {
|
||||
if(vertexDesc == OsdVertexBufferDescriptor(0, 4, 4) && varying == NULL) {
|
||||
ComputeLoopVertexBKernel<4>(vertex, V_ITa, V_IT, V_W, vertexOffset,
|
||||
tableOffset, start, end);
|
||||
}
|
||||
else if(vdesc.numVertexElements == 8 && varying == NULL) {
|
||||
else if(vertexDesc == OsdVertexBufferDescriptor(0, 8, 8) && varying == NULL) {
|
||||
ComputeLoopVertexBKernel<8>(vertex, V_ITa, V_IT, V_W, vertexOffset,
|
||||
tableOffset, start, end);
|
||||
}
|
||||
else {
|
||||
float *vertexResults = (float*)alloca(vertexDesc.length * sizeof(float));
|
||||
float *varyingResults = (float*)alloca(varyingDesc.length * sizeof(float));
|
||||
|
||||
for (int i = start + tableOffset; i < end + tableOffset; i++) {
|
||||
int h = V_ITa[5*i];
|
||||
int n = V_ITa[5*i+1];
|
||||
@ -207,94 +284,120 @@ void OsdCpuComputeLoopVertexB(
|
||||
beta = (0.625f - beta) * wp;
|
||||
|
||||
int dstIndex = i + vertexOffset - tableOffset;
|
||||
vdesc.Clear(vertex, varying, dstIndex);
|
||||
clear(vertexResults, vertexDesc);
|
||||
clear(varyingResults, varyingDesc);
|
||||
|
||||
vdesc.AddWithWeight(vertex, dstIndex, p, weight * (1.0f - (beta * n)));
|
||||
addWithWeight(vertexResults, vertex, p, weight * (1.0f - (beta * n)), vertexDesc);
|
||||
|
||||
for (int j = 0; j < n; ++j)
|
||||
vdesc.AddWithWeight(vertex, dstIndex, V_IT[h+j], weight * beta);
|
||||
addWithWeight(vertexResults, vertex, V_IT[h+j], weight * beta, vertexDesc);
|
||||
|
||||
vdesc.AddVaryingWithWeight(varying, dstIndex, p, 1.0f);
|
||||
}
|
||||
}
|
||||
addWithWeight(varyingResults, varying, p, 1.0f, varyingDesc);
|
||||
|
||||
copy(vertex, vertexResults, dstIndex, vertexDesc);
|
||||
copy(varying, varyingResults, dstIndex, varyingDesc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void OsdCpuComputeBilinearEdge(
|
||||
OsdVertexDescriptor const &vdesc, float *vertex, float *varying,
|
||||
float *vertex, float *varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *E_IT, int vertexOffset, int tableOffset, int start, int end) {
|
||||
if(vdesc.numVertexElements == 4 && varying == NULL) {
|
||||
if(vertexDesc == OsdVertexBufferDescriptor(0, 4, 4) && varying == NULL) {
|
||||
ComputeBilinearEdgeKernel<4>(vertex, E_IT, vertexOffset, tableOffset,
|
||||
start, end);
|
||||
}
|
||||
else if(vdesc.numVertexElements == 8 && varying == NULL) {
|
||||
else if(vertexDesc == OsdVertexBufferDescriptor(0, 8, 8) && varying == NULL) {
|
||||
ComputeBilinearEdgeKernel<8>(vertex, E_IT, vertexOffset, tableOffset,
|
||||
start, end);
|
||||
}
|
||||
else {
|
||||
float *vertexResults = (float*)alloca(vertexDesc.length * sizeof(float));
|
||||
float *varyingResults = (float*)alloca(varyingDesc.length * sizeof(float));
|
||||
|
||||
for (int i = start + tableOffset; i < end + tableOffset; i++) {
|
||||
int eidx0 = E_IT[2*i+0];
|
||||
int eidx1 = E_IT[2*i+1];
|
||||
|
||||
int dstIndex = i + vertexOffset - tableOffset;
|
||||
vdesc.Clear(vertex, varying, dstIndex);
|
||||
clear(vertexResults, vertexDesc);
|
||||
clear(varyingResults, varyingDesc);
|
||||
|
||||
vdesc.AddWithWeight(vertex, dstIndex, eidx0, 0.5f);
|
||||
vdesc.AddWithWeight(vertex, dstIndex, eidx1, 0.5f);
|
||||
addWithWeight(vertexResults, vertex, eidx0, 0.5f, vertexDesc);
|
||||
addWithWeight(vertexResults, vertex, eidx1, 0.5f, vertexDesc);
|
||||
|
||||
vdesc.AddVaryingWithWeight(varying, dstIndex, eidx0, 0.5f);
|
||||
vdesc.AddVaryingWithWeight(varying, dstIndex, eidx1, 0.5f);
|
||||
}
|
||||
addWithWeight(varyingResults, varying, eidx0, 0.5f, varyingDesc);
|
||||
addWithWeight(varyingResults, varying, eidx1, 0.5f, varyingDesc);
|
||||
|
||||
copy(vertex, vertexResults, dstIndex, vertexDesc);
|
||||
copy(varying, varyingResults, dstIndex, varyingDesc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void OsdCpuComputeBilinearVertex(
|
||||
OsdVertexDescriptor const &vdesc, float *vertex, float *varying,
|
||||
float *vertex, float *varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *V_ITa, int vertexOffset, int tableOffset, int start, int end) {
|
||||
int numVertexElements = vdesc.numVertexElements;
|
||||
int numVaryingElements = vdesc.numVaryingElements;
|
||||
float *src, *des;
|
||||
|
||||
float *src, *des;
|
||||
for (int i = start + tableOffset; i < end + tableOffset; i++) {
|
||||
int p = V_ITa[i];
|
||||
|
||||
int dstIndex = i + vertexOffset - tableOffset;
|
||||
src = vertex + p * numVertexElements;
|
||||
des = vertex + dstIndex * numVertexElements;
|
||||
memcpy(des, src, sizeof(float)*numVertexElements);
|
||||
if(varying) {
|
||||
src = varying + p * numVaryingElements;
|
||||
des = varying + dstIndex * numVaryingElements;
|
||||
memcpy(des, src, sizeof(float)*numVaryingElements);
|
||||
int dstIndex = i + vertexOffset - tableOffset;
|
||||
if (vertex) {
|
||||
src = vertex + p * vertexDesc.stride;
|
||||
des = vertex + dstIndex * vertexDesc.stride;
|
||||
memcpy(des, src, sizeof(float)*vertexDesc.length);
|
||||
}
|
||||
if (varying) {
|
||||
src = varying + p * varyingDesc.stride;
|
||||
des = varying + dstIndex * varyingDesc.stride;
|
||||
memcpy(des, src, sizeof(float)*varyingDesc.length);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void OsdCpuEditVertexAdd(
|
||||
OsdVertexDescriptor const &vdesc, float *vertex,
|
||||
float *vertex,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
int primVarOffset, int primVarWidth, int vertexOffset, int tableOffset,
|
||||
int start, int end,
|
||||
const unsigned int *editIndices, const float *editValues) {
|
||||
|
||||
for (int i = start+tableOffset; i < end+tableOffset; i++) {
|
||||
vdesc.ApplyVertexEditAdd(vertex,
|
||||
primVarOffset,
|
||||
primVarWidth,
|
||||
editIndices[i] + vertexOffset,
|
||||
&editValues[i*primVarWidth]);
|
||||
|
||||
if (vertex) {
|
||||
int editIndex = editIndices[i] + vertexOffset;
|
||||
float *dst = vertex + editIndex * vertexDesc.stride + primVarOffset;
|
||||
|
||||
for (int i = 0; i < primVarWidth; ++i) {
|
||||
dst[i] += editValues[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void OsdCpuEditVertexSet(
|
||||
OsdVertexDescriptor const &vdesc, float *vertex,
|
||||
float *vertex,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
int primVarOffset, int primVarWidth, int vertexOffset, int tableOffset,
|
||||
int start, int end,
|
||||
const unsigned int *editIndices, const float *editValues) {
|
||||
|
||||
for (int i = start+tableOffset; i < end+tableOffset; i++) {
|
||||
vdesc.ApplyVertexEditSet(vertex,
|
||||
primVarOffset,
|
||||
primVarWidth,
|
||||
editIndices[i] + vertexOffset,
|
||||
&editValues[i*primVarWidth]);
|
||||
|
||||
if (vertex) {
|
||||
int editIndex = editIndices[i] + vertexOffset;
|
||||
float *dst = vertex + editIndex * vertexDesc.stride + primVarOffset;
|
||||
|
||||
for (int i = 0; i < primVarWidth; ++i) {
|
||||
dst[i] = editValues[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -88,8 +88,9 @@ void ComputeFaceKernel(float *vertex,
|
||||
memcpy(des, result1, sizeof(float)*numVertexElements);
|
||||
}
|
||||
}
|
||||
void OsdCpuComputeFace(OsdVertexDescriptor const &vdesc,
|
||||
float * vertex, float * varying,
|
||||
void OsdCpuComputeFace(float * vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *F_IT, const int *F_ITa,
|
||||
int vertexOffset, int tableOffset,
|
||||
int start, int end);
|
||||
@ -147,8 +148,9 @@ void ComputeEdgeKernel( float *vertex,
|
||||
memcpy(des, result1, sizeof(float)*numVertexElements);
|
||||
}
|
||||
}
|
||||
void OsdCpuComputeEdge(OsdVertexDescriptor const &vdesc,
|
||||
float *vertex, float * varying,
|
||||
void OsdCpuComputeEdge(float *vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *E_IT, const float *E_ITa,
|
||||
int vertexOffset, int tableOffset,
|
||||
int start, int end);
|
||||
@ -230,8 +232,9 @@ void ComputeVertexAKernel( float *vertex,
|
||||
memcpy(des, result1, sizeof(float)*numVertexElements);
|
||||
}
|
||||
}
|
||||
void OsdCpuComputeVertexA(OsdVertexDescriptor const &vdesc,
|
||||
float *vertex, float * varying,
|
||||
void OsdCpuComputeVertexA(float *vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *V_ITa, const float *V_IT,
|
||||
int vertexOffset, int tableOffset,
|
||||
int start, int end, int pass);
|
||||
@ -291,8 +294,9 @@ void ComputeVertexBKernel( float *vertex,
|
||||
}
|
||||
}
|
||||
|
||||
void OsdCpuComputeVertexB(OsdVertexDescriptor const &vdesc,
|
||||
float *vertex, float * varying,
|
||||
void OsdCpuComputeVertexB(float *vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *V_ITa, const int *V_IT, const float *V_W,
|
||||
int vertexOffset, int tableOffset,
|
||||
int start, int end);
|
||||
@ -350,8 +354,9 @@ void ComputeLoopVertexBKernel( float *vertex,
|
||||
memcpy(des, result1, sizeof(float)*numVertexElements);
|
||||
}
|
||||
}
|
||||
void OsdCpuComputeLoopVertexB(OsdVertexDescriptor const &vdesc,
|
||||
float *vertex, float * varying,
|
||||
void OsdCpuComputeLoopVertexB(float *vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *V_ITa, const int *V_IT,
|
||||
const float *V_W,
|
||||
int vertexOffset, int tableOffset,
|
||||
@ -385,26 +390,30 @@ void ComputeBilinearEdgeKernel( float *vertex,
|
||||
memcpy(des, result, sizeof(float)*numVertexElements);
|
||||
}
|
||||
}
|
||||
void OsdCpuComputeBilinearEdge(OsdVertexDescriptor const &vdesc,
|
||||
float *vertex, float * varying,
|
||||
void OsdCpuComputeBilinearEdge(float *vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *E_IT,
|
||||
int vertexOffset, int tableOffset,
|
||||
int start, int end);
|
||||
|
||||
void OsdCpuComputeBilinearVertex(OsdVertexDescriptor const &vdesc,
|
||||
float *vertex, float * varying,
|
||||
void OsdCpuComputeBilinearVertex(float *vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *V_ITa,
|
||||
int vertexOffset, int tableOffset,
|
||||
int start, int end);
|
||||
|
||||
void OsdCpuEditVertexAdd(OsdVertexDescriptor const &vdesc, float *vertex,
|
||||
void OsdCpuEditVertexAdd(float *vertex,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
int primVarOffset, int primVarWidth,
|
||||
int vertexOffset, int tableOffset,
|
||||
int start, int end,
|
||||
const unsigned int *editIndices,
|
||||
const float *editValues);
|
||||
|
||||
void OsdCpuEditVertexSet(OsdVertexDescriptor const &vdesc, float *vertex,
|
||||
void OsdCpuEditVertexSet(float *vertex,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
int primVarOffset, int primVarWidth,
|
||||
int vertexOffset, int tableOffset,
|
||||
int start, int end,
|
||||
|
@ -31,42 +31,47 @@
|
||||
extern "C" {
|
||||
|
||||
void OsdCudaComputeFace(float *vertex, float *varying,
|
||||
int numUserVertexElements, int numVaryingElements,
|
||||
int vertexLength, int vertexStride,
|
||||
int varyingLength, int varyingStride,
|
||||
int *F_IT, int *F_ITa, int offset, int tableOffset, int start, int end);
|
||||
|
||||
void OsdCudaComputeEdge(float *vertex, float *varying,
|
||||
int numUserVertexElements, int numVaryingElements,
|
||||
int vertexLength, int vertexStride,
|
||||
int varyingLength, int varyingStride,
|
||||
int *E_IT, float *E_W, int offset, int tableOffset, int start, int end);
|
||||
|
||||
void OsdCudaComputeVertexA(float *vertex, float *varying,
|
||||
int numUserVertexElements, int numVaryingElements,
|
||||
int vertexLength, int vertexStride,
|
||||
int varyingLength, int varyingStride,
|
||||
int *V_ITa, float *V_W, int offset, int tableOffset,
|
||||
int start, int end, int pass);
|
||||
|
||||
void OsdCudaComputeVertexB(float *vertex, float *varying,
|
||||
int numUserVertexElements, int numVaryingElements,
|
||||
int vertexLength, int vertexStride,
|
||||
int varyingLength, int varyingStride,
|
||||
int *V_ITa, int *V_IT, float *V_W, int offset, int tableOffset,
|
||||
int start, int end);
|
||||
|
||||
void OsdCudaComputeLoopVertexB(float *vertex, float *varying,
|
||||
int numUserVertexElements,
|
||||
int numVaryingElements,
|
||||
int vertexLength, int vertexStride,
|
||||
int varyingLength, int varyingStride,
|
||||
int *V_ITa, int *V_IT, float *V_W, int offset, int tableOffset,
|
||||
int start, int end);
|
||||
|
||||
void OsdCudaComputeBilinearEdge(float *vertex, float *varying,
|
||||
int numUserVertexElements,
|
||||
int numVaryingElements,
|
||||
int vertexLength, int vertexStride,
|
||||
int varyingLength, int varyingStride,
|
||||
int *E_IT, int offset, int tableOffset, int start, int end);
|
||||
|
||||
void OsdCudaComputeBilinearVertex(float *vertex, float *varying,
|
||||
int numUserVertexElements,
|
||||
int numVaryingElements,
|
||||
int vertexLength, int vertexStride,
|
||||
int varyingLength, int varyingStride,
|
||||
int *V_ITa, int offset, int tableOffset, int start, int end);
|
||||
|
||||
void OsdCudaEditVertexAdd(float *vertex, int numUserVertexElements,
|
||||
void OsdCudaEditVertexAdd(float *vertex,
|
||||
int vertexLength, int vertexStride,
|
||||
int primVarOffset, int primVarWidth,
|
||||
int vertexOffset, int tableOffset,
|
||||
int offset, int tableOffset,
|
||||
int start, int end, int *editIndices, float *editValues);
|
||||
|
||||
}
|
||||
@ -74,8 +79,7 @@ void OsdCudaEditVertexAdd(float *vertex, int numUserVertexElements,
|
||||
namespace OpenSubdiv {
|
||||
namespace OPENSUBDIV_VERSION {
|
||||
|
||||
OsdCudaComputeController::OsdCudaComputeController() :
|
||||
_currentVertexBuffer(NULL), _currentVaryingBuffer(NULL) {
|
||||
OsdCudaComputeController::OsdCudaComputeController() {
|
||||
}
|
||||
|
||||
OsdCudaComputeController::~OsdCudaComputeController() {
|
||||
@ -92,9 +96,13 @@ OsdCudaComputeController::ApplyBilinearFaceVerticesKernel(
|
||||
assert(F_IT);
|
||||
assert(F_ITa);
|
||||
|
||||
float *vertex = _currentBindState.GetOffsettedVertexBuffer();
|
||||
float *varying = _currentBindState.GetOffsettedVaryingBuffer();
|
||||
|
||||
OsdCudaComputeFace(
|
||||
_currentVertexBuffer, _currentVaryingBuffer,
|
||||
_vdesc.numVertexElements-3, _vdesc.numVaryingElements,
|
||||
vertex, varying,
|
||||
_currentBindState.vertexDesc.length, _currentBindState.vertexDesc.stride,
|
||||
_currentBindState.varyingDesc.length, _currentBindState.varyingDesc.stride,
|
||||
static_cast<int*>(F_IT->GetCudaMemory()),
|
||||
static_cast<int*>(F_ITa->GetCudaMemory()),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
@ -109,9 +117,13 @@ OsdCudaComputeController::ApplyBilinearEdgeVerticesKernel(
|
||||
const OsdCudaTable * E_IT = context->GetTable(FarSubdivisionTables::E_IT);
|
||||
assert(E_IT);
|
||||
|
||||
float *vertex = _currentBindState.GetOffsettedVertexBuffer();
|
||||
float *varying = _currentBindState.GetOffsettedVaryingBuffer();
|
||||
|
||||
OsdCudaComputeBilinearEdge(
|
||||
_currentVertexBuffer, _currentVaryingBuffer,
|
||||
_vdesc.numVertexElements-3, _vdesc.numVaryingElements,
|
||||
vertex, varying,
|
||||
_currentBindState.vertexDesc.length, _currentBindState.vertexDesc.stride,
|
||||
_currentBindState.varyingDesc.length, _currentBindState.varyingDesc.stride,
|
||||
static_cast<int*>(E_IT->GetCudaMemory()),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
}
|
||||
@ -125,9 +137,13 @@ OsdCudaComputeController::ApplyBilinearVertexVerticesKernel(
|
||||
const OsdCudaTable * V_ITa = context->GetTable(FarSubdivisionTables::V_ITa);
|
||||
assert(V_ITa);
|
||||
|
||||
float *vertex = _currentBindState.GetOffsettedVertexBuffer();
|
||||
float *varying = _currentBindState.GetOffsettedVaryingBuffer();
|
||||
|
||||
OsdCudaComputeBilinearVertex(
|
||||
_currentVertexBuffer, _currentVaryingBuffer,
|
||||
_vdesc.numVertexElements-3, _vdesc.numVaryingElements,
|
||||
vertex, varying,
|
||||
_currentBindState.vertexDesc.length, _currentBindState.vertexDesc.stride,
|
||||
_currentBindState.varyingDesc.length, _currentBindState.varyingDesc.stride,
|
||||
static_cast<int*>(V_ITa->GetCudaMemory()),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
}
|
||||
@ -143,9 +159,13 @@ OsdCudaComputeController::ApplyCatmarkFaceVerticesKernel(
|
||||
assert(F_IT);
|
||||
assert(F_ITa);
|
||||
|
||||
float *vertex = _currentBindState.GetOffsettedVertexBuffer();
|
||||
float *varying = _currentBindState.GetOffsettedVaryingBuffer();
|
||||
|
||||
OsdCudaComputeFace(
|
||||
_currentVertexBuffer, _currentVaryingBuffer,
|
||||
_vdesc.numVertexElements-3, _vdesc.numVaryingElements,
|
||||
vertex, varying,
|
||||
_currentBindState.vertexDesc.length, _currentBindState.vertexDesc.stride,
|
||||
_currentBindState.varyingDesc.length, _currentBindState.varyingDesc.stride,
|
||||
static_cast<int*>(F_IT->GetCudaMemory()),
|
||||
static_cast<int*>(F_ITa->GetCudaMemory()),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
@ -162,9 +182,13 @@ OsdCudaComputeController::ApplyCatmarkEdgeVerticesKernel(
|
||||
assert(E_IT);
|
||||
assert(E_W);
|
||||
|
||||
float *vertex = _currentBindState.GetOffsettedVertexBuffer();
|
||||
float *varying = _currentBindState.GetOffsettedVaryingBuffer();
|
||||
|
||||
OsdCudaComputeEdge(
|
||||
_currentVertexBuffer, _currentVaryingBuffer,
|
||||
_vdesc.numVertexElements-3, _vdesc.numVaryingElements,
|
||||
vertex, varying,
|
||||
_currentBindState.vertexDesc.length, _currentBindState.vertexDesc.stride,
|
||||
_currentBindState.varyingDesc.length, _currentBindState.varyingDesc.stride,
|
||||
static_cast<int*>(E_IT->GetCudaMemory()),
|
||||
static_cast<float*>(E_W->GetCudaMemory()),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
@ -183,9 +207,13 @@ OsdCudaComputeController::ApplyCatmarkVertexVerticesKernelB(
|
||||
assert(V_IT);
|
||||
assert(V_W);
|
||||
|
||||
float *vertex = _currentBindState.GetOffsettedVertexBuffer();
|
||||
float *varying = _currentBindState.GetOffsettedVaryingBuffer();
|
||||
|
||||
OsdCudaComputeVertexB(
|
||||
_currentVertexBuffer, _currentVaryingBuffer,
|
||||
_vdesc.numVertexElements-3, _vdesc.numVaryingElements,
|
||||
vertex, varying,
|
||||
_currentBindState.vertexDesc.length, _currentBindState.vertexDesc.stride,
|
||||
_currentBindState.varyingDesc.length, _currentBindState.varyingDesc.stride,
|
||||
static_cast<int*>(V_ITa->GetCudaMemory()),
|
||||
static_cast<int*>(V_IT->GetCudaMemory()),
|
||||
static_cast<float*>(V_W->GetCudaMemory()),
|
||||
@ -203,9 +231,13 @@ OsdCudaComputeController::ApplyCatmarkVertexVerticesKernelA1(
|
||||
assert(V_ITa);
|
||||
assert(V_W);
|
||||
|
||||
float *vertex = _currentBindState.GetOffsettedVertexBuffer();
|
||||
float *varying = _currentBindState.GetOffsettedVaryingBuffer();
|
||||
|
||||
OsdCudaComputeVertexA(
|
||||
_currentVertexBuffer, _currentVaryingBuffer,
|
||||
_vdesc.numVertexElements-3, _vdesc.numVaryingElements,
|
||||
vertex, varying,
|
||||
_currentBindState.vertexDesc.length, _currentBindState.vertexDesc.stride,
|
||||
_currentBindState.varyingDesc.length, _currentBindState.varyingDesc.stride,
|
||||
static_cast<int*>(V_ITa->GetCudaMemory()),
|
||||
static_cast<float*>(V_W->GetCudaMemory()),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(), false);
|
||||
@ -222,9 +254,13 @@ OsdCudaComputeController::ApplyCatmarkVertexVerticesKernelA2(
|
||||
assert(V_ITa);
|
||||
assert(V_W);
|
||||
|
||||
float *vertex = _currentBindState.GetOffsettedVertexBuffer();
|
||||
float *varying = _currentBindState.GetOffsettedVaryingBuffer();
|
||||
|
||||
OsdCudaComputeVertexA(
|
||||
_currentVertexBuffer, _currentVaryingBuffer,
|
||||
_vdesc.numVertexElements-3, _vdesc.numVaryingElements,
|
||||
vertex, varying,
|
||||
_currentBindState.vertexDesc.length, _currentBindState.vertexDesc.stride,
|
||||
_currentBindState.varyingDesc.length, _currentBindState.varyingDesc.stride,
|
||||
static_cast<int*>(V_ITa->GetCudaMemory()),
|
||||
static_cast<float*>(V_W->GetCudaMemory()),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(), true);
|
||||
@ -241,9 +277,13 @@ OsdCudaComputeController::ApplyLoopEdgeVerticesKernel(
|
||||
assert(E_IT);
|
||||
assert(E_W);
|
||||
|
||||
float *vertex = _currentBindState.GetOffsettedVertexBuffer();
|
||||
float *varying = _currentBindState.GetOffsettedVaryingBuffer();
|
||||
|
||||
OsdCudaComputeEdge(
|
||||
_currentVertexBuffer, _currentVaryingBuffer,
|
||||
_vdesc.numVertexElements-3, _vdesc.numVaryingElements,
|
||||
vertex, varying,
|
||||
_currentBindState.vertexDesc.length, _currentBindState.vertexDesc.stride,
|
||||
_currentBindState.varyingDesc.length, _currentBindState.varyingDesc.stride,
|
||||
static_cast<int*>(E_IT->GetCudaMemory()),
|
||||
static_cast<float*>(E_W->GetCudaMemory()),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
@ -262,9 +302,13 @@ OsdCudaComputeController::ApplyLoopVertexVerticesKernelB(
|
||||
assert(V_IT);
|
||||
assert(V_W);
|
||||
|
||||
float *vertex = _currentBindState.GetOffsettedVertexBuffer();
|
||||
float *varying = _currentBindState.GetOffsettedVaryingBuffer();
|
||||
|
||||
OsdCudaComputeLoopVertexB(
|
||||
_currentVertexBuffer, _currentVaryingBuffer,
|
||||
_vdesc.numVertexElements-3, _vdesc.numVaryingElements,
|
||||
vertex, varying,
|
||||
_currentBindState.vertexDesc.length, _currentBindState.vertexDesc.stride,
|
||||
_currentBindState.varyingDesc.length, _currentBindState.varyingDesc.stride,
|
||||
static_cast<int*>(V_ITa->GetCudaMemory()),
|
||||
static_cast<int*>(V_IT->GetCudaMemory()),
|
||||
static_cast<float*>(V_W->GetCudaMemory()),
|
||||
@ -282,9 +326,13 @@ OsdCudaComputeController::ApplyLoopVertexVerticesKernelA1(
|
||||
assert(V_ITa);
|
||||
assert(V_W);
|
||||
|
||||
float *vertex = _currentBindState.GetOffsettedVertexBuffer();
|
||||
float *varying = _currentBindState.GetOffsettedVaryingBuffer();
|
||||
|
||||
OsdCudaComputeVertexA(
|
||||
_currentVertexBuffer, _currentVaryingBuffer,
|
||||
_vdesc.numVertexElements-3, _vdesc.numVaryingElements,
|
||||
vertex, varying,
|
||||
_currentBindState.vertexDesc.length, _currentBindState.vertexDesc.stride,
|
||||
_currentBindState.varyingDesc.length, _currentBindState.varyingDesc.stride,
|
||||
static_cast<int*>(V_ITa->GetCudaMemory()),
|
||||
static_cast<float*>(V_W->GetCudaMemory()),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(), false);
|
||||
@ -301,9 +349,13 @@ OsdCudaComputeController::ApplyLoopVertexVerticesKernelA2(
|
||||
assert(V_ITa);
|
||||
assert(V_W);
|
||||
|
||||
float *vertex = _currentBindState.GetOffsettedVertexBuffer();
|
||||
float *varying = _currentBindState.GetOffsettedVaryingBuffer();
|
||||
|
||||
OsdCudaComputeVertexA(
|
||||
_currentVertexBuffer, _currentVaryingBuffer,
|
||||
_vdesc.numVertexElements-3, _vdesc.numVaryingElements,
|
||||
vertex, varying,
|
||||
_currentBindState.vertexDesc.length, _currentBindState.vertexDesc.stride,
|
||||
_currentBindState.varyingDesc.length, _currentBindState.varyingDesc.stride,
|
||||
static_cast<int*>(V_ITa->GetCudaMemory()),
|
||||
static_cast<float*>(V_W->GetCudaMemory()),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(), true);
|
||||
@ -321,10 +373,12 @@ OsdCudaComputeController::ApplyVertexEdits(
|
||||
const OsdCudaTable * primvarIndices = edit->GetPrimvarIndices();
|
||||
const OsdCudaTable * editValues = edit->GetEditValues();
|
||||
|
||||
float *vertex = _currentBindState.GetOffsettedVertexBuffer();
|
||||
|
||||
if (edit->GetOperation() == FarVertexEdit::Add) {
|
||||
OsdCudaEditVertexAdd(
|
||||
_currentVertexBuffer,
|
||||
_vdesc.numVertexElements-3,
|
||||
vertex,
|
||||
_currentBindState.vertexDesc.length, _currentBindState.vertexDesc.stride,
|
||||
edit->GetPrimvarOffset(),
|
||||
edit->GetPrimvarWidth(),
|
||||
batch.GetVertexOffset(),
|
||||
|
@ -29,6 +29,7 @@
|
||||
|
||||
#include "../far/dispatcher.h"
|
||||
#include "../osd/cudaComputeContext.h"
|
||||
#include "../osd/vertexDescriptor.h"
|
||||
|
||||
namespace OpenSubdiv {
|
||||
namespace OPENSUBDIV_VERSION {
|
||||
@ -64,15 +65,25 @@ public:
|
||||
///
|
||||
/// @param varyingBuffer varying-interpolated data buffer
|
||||
///
|
||||
/// @param vertexDesc the descriptor of vertex elements to be refined.
|
||||
/// if it's null, all primvars in the vertex buffer
|
||||
/// will be refined.
|
||||
///
|
||||
/// @param varyingDesc the descriptor of varying elements to be refined.
|
||||
/// if it's null, all primvars in the varying buffer
|
||||
/// will be refined.
|
||||
///
|
||||
template<class VERTEX_BUFFER, class VARYING_BUFFER>
|
||||
void Refine(OsdCudaComputeContext const *context,
|
||||
FarKernelBatchVector const &batches,
|
||||
VERTEX_BUFFER *vertexBuffer,
|
||||
VARYING_BUFFER *varyingBuffer) {
|
||||
VARYING_BUFFER *varyingBuffer,
|
||||
OsdVertexBufferDescriptor const *vertexDesc=NULL,
|
||||
OsdVertexBufferDescriptor const *varyingDesc=NULL) {
|
||||
|
||||
if (batches.empty()) return;
|
||||
|
||||
bind(vertexBuffer, varyingBuffer);
|
||||
bind(vertexBuffer, varyingBuffer, vertexDesc, varyingDesc);
|
||||
|
||||
FarDispatcher::Refine(this, context, batches, /*maxlevel*/-1);
|
||||
|
||||
@ -130,37 +141,60 @@ protected:
|
||||
void ApplyVertexEdits(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
template<class VERTEX_BUFFER, class VARYING_BUFFER>
|
||||
void bind(VERTEX_BUFFER *vertex, VARYING_BUFFER *varying) {
|
||||
void bind(VERTEX_BUFFER *vertex, VARYING_BUFFER *varying,
|
||||
OsdVertexBufferDescriptor const *vertexDesc,
|
||||
OsdVertexBufferDescriptor const *varyingDesc) {
|
||||
|
||||
if (vertex) {
|
||||
_currentVertexBuffer = static_cast<float*>(vertex->BindCudaBuffer());
|
||||
_vdesc.numVertexElements = vertex->GetNumElements();
|
||||
// if the vertex buffer descriptor is specified, use it.
|
||||
// otherwise, assumes the data is tightly packed in the vertex buffer.
|
||||
if (vertexDesc) {
|
||||
_currentBindState.vertexDesc = *vertexDesc;
|
||||
} else {
|
||||
_currentVertexBuffer = 0;
|
||||
_vdesc.numVertexElements = 0;
|
||||
int numElements = vertex ? vertex->GetNumElements() : 0;
|
||||
_currentBindState.vertexDesc = OsdVertexBufferDescriptor(
|
||||
0, numElements, numElements);
|
||||
}
|
||||
if (varyingDesc) {
|
||||
_currentBindState.varyingDesc = *varyingDesc;
|
||||
} else {
|
||||
int numElements = varying ? varying->GetNumElements() : 0;
|
||||
_currentBindState.varyingDesc = OsdVertexBufferDescriptor(
|
||||
0, numElements, numElements);
|
||||
}
|
||||
|
||||
if (varying) {
|
||||
_currentVaryingBuffer = static_cast<float*>(varying->BindCudaBuffer());
|
||||
_vdesc.numVaryingElements = varying->GetNumElements();
|
||||
} else {
|
||||
_currentVaryingBuffer = 0;
|
||||
_vdesc.numVaryingElements = 0;
|
||||
}
|
||||
_currentBindState.vertexBuffer = vertex ?
|
||||
static_cast<float*>(vertex->BindCudaBuffer()) : 0;
|
||||
_currentBindState.varyingBuffer = varying ?
|
||||
static_cast<float*>(varying->BindCudaBuffer()) : 0;
|
||||
}
|
||||
|
||||
/// Unbinds any previously bound vertex and varying data buffers.
|
||||
void unbind() {
|
||||
_currentVertexBuffer = 0;
|
||||
_currentVaryingBuffer = 0;
|
||||
_currentBindState.Reset();
|
||||
}
|
||||
|
||||
private:
|
||||
float *_currentVertexBuffer, // cuda buffers
|
||||
*_currentVaryingBuffer;
|
||||
struct BindState {
|
||||
BindState() : vertexBuffer(NULL), varyingBuffer(NULL) {}
|
||||
void Reset() {
|
||||
vertexBuffer = varyingBuffer = NULL;
|
||||
vertexDesc.Reset();
|
||||
varyingDesc.Reset();
|
||||
}
|
||||
float *GetOffsettedVertexBuffer() const {
|
||||
return vertexBuffer ? vertexBuffer + vertexDesc.offset : 0;
|
||||
}
|
||||
float *GetOffsettedVaryingBuffer() const {
|
||||
return varyingBuffer ? varyingBuffer + varyingDesc.offset : 0;
|
||||
}
|
||||
|
||||
OsdVertexDescriptor _vdesc;
|
||||
float *vertexBuffer; // cuda buffers
|
||||
float *varyingBuffer;
|
||||
OsdVertexBufferDescriptor vertexDesc;
|
||||
OsdVertexBufferDescriptor varyingDesc;
|
||||
};
|
||||
|
||||
BindState _currentBindState;
|
||||
};
|
||||
|
||||
} // end namespace OPENSUBDIV_VERSION
|
||||
|
@ -25,37 +25,18 @@
|
||||
#include <assert.h>
|
||||
|
||||
template<int N> struct DeviceVertex
|
||||
{
|
||||
float pos[3];
|
||||
float userVertexData[N];
|
||||
|
||||
__device__ void addWithWeight(const DeviceVertex<N> *src, float weight) {
|
||||
pos[0] += src->pos[0] * weight;
|
||||
pos[1] += src->pos[1] * weight;
|
||||
pos[2] += src->pos[2] * weight;
|
||||
|
||||
for(int i = 0; i < N; ++i){
|
||||
userVertexData[i] += src->userVertexData[i] * weight;
|
||||
}
|
||||
}
|
||||
__device__ void clear() {
|
||||
pos[0] = pos[1] = pos[2] = 0.0f;
|
||||
for(int i = 0; i < N; ++i){
|
||||
userVertexData[i] = 0.0f;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template<int N> struct DeviceVarying
|
||||
{
|
||||
float v[N];
|
||||
|
||||
__device__ void addVaryingWithWeight(const DeviceVarying<N> *src, float weight) {
|
||||
__device__ void addWithWeight(const DeviceVertex<N> *src, float weight) {
|
||||
#pragma unroll
|
||||
for(int i = 0; i < N; ++i){
|
||||
v[i] += src->v[i] * weight;
|
||||
}
|
||||
}
|
||||
|
||||
__device__ void clear() {
|
||||
#pragma unroll
|
||||
for(int i = 0; i < N; ++i){
|
||||
v[i] = 0.0f;
|
||||
}
|
||||
@ -64,9 +45,9 @@ template<int N> struct DeviceVarying
|
||||
|
||||
// Specialize DeviceVarying for N=0 to avoid compile error:
|
||||
// "flexible array member in otherwise empty struct"
|
||||
template<> struct DeviceVarying<0>
|
||||
template<> struct DeviceVertex<0>
|
||||
{
|
||||
__device__ void addVaryingWithWeight(const DeviceVarying<0> *src, float weight) {
|
||||
__device__ void addWithWeight(const DeviceVertex<0> *src, float weight) {
|
||||
}
|
||||
__device__ void clear() {
|
||||
}
|
||||
@ -94,32 +75,30 @@ __device__ void addWithWeight(float *dst, float *src, float weight, int count)
|
||||
for(int i = 0; i < count; ++i) dst[i] += src[i] * weight;
|
||||
}
|
||||
|
||||
__device__ void addVaryingWithWeight(float *dst, float *src, float weight, int count)
|
||||
{
|
||||
for(int i = 0; i < count; ++i) dst[i] += src[i] * weight;
|
||||
}
|
||||
|
||||
template <int NUM_USER_VERTEX_ELEMENTS, int NUM_VARYING_ELEMENTS> __global__ void
|
||||
template <int NUM_VERTEX_ELEMENTS, int NUM_VARYING_ELEMENTS> __global__ void
|
||||
computeFace(float *fVertex, float *fVaryings, int *F0_IT, int *F0_ITa, int offset, int tableOffset, int start, int end)
|
||||
{
|
||||
DeviceVertex<NUM_USER_VERTEX_ELEMENTS> *vertex = (DeviceVertex<NUM_USER_VERTEX_ELEMENTS>*)fVertex;
|
||||
DeviceVarying<NUM_VARYING_ELEMENTS> *varyings = (DeviceVarying<NUM_VARYING_ELEMENTS>*)fVaryings;
|
||||
for(int i = start + tableOffset + threadIdx.x + blockIdx.x*blockDim.x; i < end + tableOffset; i += blockDim.x * gridDim.x){
|
||||
DeviceVertex<NUM_VERTEX_ELEMENTS> *vertex = (DeviceVertex<NUM_VERTEX_ELEMENTS>*)fVertex;
|
||||
DeviceVertex<NUM_VARYING_ELEMENTS> *varyings = (DeviceVertex<NUM_VARYING_ELEMENTS>*)fVaryings;
|
||||
for (int i = start + tableOffset + threadIdx.x + blockIdx.x*blockDim.x;
|
||||
i < end + tableOffset;
|
||||
i += blockDim.x * gridDim.x) {
|
||||
|
||||
int h = F0_ITa[2*i];
|
||||
int n = F0_ITa[2*i+1];
|
||||
float weight = 1.0f/n;
|
||||
|
||||
DeviceVertex<NUM_USER_VERTEX_ELEMENTS> dst;
|
||||
DeviceVertex<NUM_VERTEX_ELEMENTS> dst;
|
||||
dst.clear();
|
||||
|
||||
if(NUM_VARYING_ELEMENTS > 0){
|
||||
DeviceVarying<NUM_VARYING_ELEMENTS> dstVarying;
|
||||
DeviceVertex<NUM_VARYING_ELEMENTS> dstVarying;
|
||||
dstVarying.clear();
|
||||
|
||||
for(int j=0; j<n; ++j){
|
||||
int index = F0_IT[h+j];
|
||||
dst.addWithWeight(&vertex[index], weight);
|
||||
dstVarying.addVaryingWithWeight(&varyings[index], weight);
|
||||
dstVarying.addWithWeight(&varyings[index], weight);
|
||||
}
|
||||
vertex[offset + i - tableOffset] = dst;
|
||||
varyings[offset + i - tableOffset] = dstVarying;
|
||||
@ -134,34 +113,43 @@ computeFace(float *fVertex, float *fVaryings, int *F0_IT, int *F0_ITa, int offse
|
||||
}
|
||||
|
||||
__global__ void
|
||||
computeFace(float *fVertex, int numVertexElements, float *fVaryings, int numVaryingElements,
|
||||
computeFace(float *fVertex, float *fVarying,
|
||||
int vertexLength, int vertexStride,
|
||||
int varyingLength, int varyingStride,
|
||||
int *F0_IT, int *F0_ITa, int offset, int tableOffset, int start, int end)
|
||||
{
|
||||
for(int i = start + tableOffset +threadIdx.x + blockIdx.x*blockDim.x; i < end + tableOffset; i += blockDim.x * gridDim.x){
|
||||
for (int i = start + tableOffset +threadIdx.x + blockIdx.x*blockDim.x;
|
||||
i < end + tableOffset;
|
||||
i += blockDim.x * gridDim.x){
|
||||
|
||||
int h = F0_ITa[2*i];
|
||||
int n = F0_ITa[2*i+1];
|
||||
float weight = 1.0f/n;
|
||||
|
||||
// XXX: can we use local stack like alloca?
|
||||
float *dstVertex = fVertex + (i+offset-tableOffset)*numVertexElements;
|
||||
clear(dstVertex, numVertexElements);
|
||||
float *dstVarying = fVaryings + (i+offset-tableOffset)*numVaryingElements;
|
||||
clear(dstVarying, numVaryingElements);
|
||||
float *dstVertex = fVertex + (i+offset-tableOffset)*vertexStride;
|
||||
clear(dstVertex, vertexLength);
|
||||
float *dstVarying = fVarying + (i+offset-tableOffset)*varyingStride;
|
||||
clear(dstVarying, varyingLength);
|
||||
|
||||
for(int j=0; j<n; ++j){
|
||||
int index = F0_IT[h+j];
|
||||
addWithWeight(dstVertex, fVertex + index*numVertexElements, weight, numVertexElements);
|
||||
addVaryingWithWeight(dstVarying, fVaryings + index*numVaryingElements, weight, numVaryingElements);
|
||||
addWithWeight(dstVertex, fVertex + index*vertexStride, weight, vertexLength);
|
||||
addWithWeight(dstVarying, fVarying + index*varyingStride, weight, varyingLength);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <int NUM_USER_VERTEX_ELEMENTS, int NUM_VARYING_ELEMENTS> __global__ void
|
||||
template <int NUM_VERTEX_ELEMENTS, int NUM_VARYING_ELEMENTS> __global__ void
|
||||
computeEdge(float *fVertex, float *fVaryings, int *E0_IT, float *E0_S, int offset, int tableOffset, int start, int end)
|
||||
{
|
||||
DeviceVertex<NUM_USER_VERTEX_ELEMENTS> *vertex = (DeviceVertex<NUM_USER_VERTEX_ELEMENTS>*)fVertex;
|
||||
DeviceVarying<NUM_VARYING_ELEMENTS> *varyings = (DeviceVarying<NUM_VARYING_ELEMENTS>*)fVaryings;
|
||||
for(int i = start + tableOffset + threadIdx.x + blockIdx.x*blockDim.x; i < end + tableOffset; i+= blockDim.x * gridDim.x){
|
||||
DeviceVertex<NUM_VERTEX_ELEMENTS> *vertex = (DeviceVertex<NUM_VERTEX_ELEMENTS>*)fVertex;
|
||||
DeviceVertex<NUM_VARYING_ELEMENTS> *varyings = (DeviceVertex<NUM_VARYING_ELEMENTS>*)fVaryings;
|
||||
|
||||
for (int i = start + tableOffset + threadIdx.x + blockIdx.x*blockDim.x;
|
||||
i < end + tableOffset;
|
||||
i+= blockDim.x * gridDim.x){
|
||||
|
||||
int eidx0 = E0_IT[4*i+0];
|
||||
int eidx1 = E0_IT[4*i+1];
|
||||
int eidx2 = E0_IT[4*i+2];
|
||||
@ -170,7 +158,7 @@ computeEdge(float *fVertex, float *fVaryings, int *E0_IT, float *E0_S, int offse
|
||||
float vertWeight = E0_S[i*2+0];
|
||||
|
||||
// Fully sharp edge : vertWeight = 0.5f;
|
||||
DeviceVertex<NUM_USER_VERTEX_ELEMENTS> dst;
|
||||
DeviceVertex<NUM_VERTEX_ELEMENTS> dst;
|
||||
dst.clear();
|
||||
|
||||
dst.addWithWeight(&vertex[eidx0], vertWeight);
|
||||
@ -185,20 +173,24 @@ computeEdge(float *fVertex, float *fVaryings, int *E0_IT, float *E0_S, int offse
|
||||
vertex[offset+i-tableOffset] = dst;
|
||||
|
||||
if(NUM_VARYING_ELEMENTS > 0){
|
||||
DeviceVarying<NUM_VARYING_ELEMENTS> dstVarying;
|
||||
DeviceVertex<NUM_VARYING_ELEMENTS> dstVarying;
|
||||
dstVarying.clear();
|
||||
dstVarying.addVaryingWithWeight(&varyings[eidx0], 0.5f);
|
||||
dstVarying.addVaryingWithWeight(&varyings[eidx1], 0.5f);
|
||||
dstVarying.addWithWeight(&varyings[eidx0], 0.5f);
|
||||
dstVarying.addWithWeight(&varyings[eidx1], 0.5f);
|
||||
varyings[offset+i-tableOffset] = dstVarying;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void
|
||||
computeEdge(float *fVertex, int numVertexElements, float *fVarying, int numVaryingElements,
|
||||
computeEdge(float *fVertex, float *fVarying,
|
||||
int vertexLength, int vertexStride,
|
||||
int varyingLength, int varyingStride,
|
||||
int *E0_IT, float *E0_S, int offset, int tableOffset, int start, int end)
|
||||
{
|
||||
for(int i = start + tableOffset + threadIdx.x + blockIdx.x*blockDim.x; i < end + tableOffset; i+= blockDim.x * gridDim.x){
|
||||
for (int i = start + tableOffset + threadIdx.x + blockIdx.x*blockDim.x;
|
||||
i < end + tableOffset;i+= blockDim.x * gridDim.x) {
|
||||
|
||||
int eidx0 = E0_IT[4*i+0];
|
||||
int eidx1 = E0_IT[4*i+1];
|
||||
int eidx2 = E0_IT[4*i+2];
|
||||
@ -207,35 +199,38 @@ computeEdge(float *fVertex, int numVertexElements, float *fVarying, int numVaryi
|
||||
float vertWeight = E0_S[i*2+0];
|
||||
|
||||
// Fully sharp edge : vertWeight = 0.5f;
|
||||
float *dstVertex = fVertex + (i+offset-tableOffset)*numVertexElements;
|
||||
clear(dstVertex, numVertexElements);
|
||||
float *dstVertex = fVertex + (i+offset-tableOffset)*vertexStride;
|
||||
clear(dstVertex, vertexLength);
|
||||
|
||||
addWithWeight(dstVertex, fVertex + eidx0*numVertexElements, vertWeight, numVertexElements);
|
||||
addWithWeight(dstVertex, fVertex + eidx1*numVertexElements, vertWeight, numVertexElements);
|
||||
addWithWeight(dstVertex, fVertex + eidx0*vertexStride, vertWeight, vertexLength);
|
||||
addWithWeight(dstVertex, fVertex + eidx1*vertexStride, vertWeight, vertexLength);
|
||||
|
||||
if(eidx2 > -1){
|
||||
float faceWeight = E0_S[i*2+1];
|
||||
|
||||
addWithWeight(dstVertex, fVertex + eidx2*numVertexElements, faceWeight, numVertexElements);
|
||||
addWithWeight(dstVertex, fVertex + eidx3*numVertexElements, faceWeight, numVertexElements);
|
||||
addWithWeight(dstVertex, fVertex + eidx2*vertexStride, faceWeight, vertexLength);
|
||||
addWithWeight(dstVertex, fVertex + eidx3*vertexStride, faceWeight, vertexLength);
|
||||
}
|
||||
|
||||
if(numVaryingElements > 0){
|
||||
float *dstVarying = fVarying + (i+offset-tableOffset)*numVaryingElements;
|
||||
clear(dstVarying, numVaryingElements);
|
||||
if (varyingLength > 0){
|
||||
float *dstVarying = fVarying + (i+offset-tableOffset)*varyingStride;
|
||||
clear(dstVarying, varyingLength);
|
||||
|
||||
addVaryingWithWeight(dstVarying, fVarying + eidx0*numVaryingElements, 0.5f, numVaryingElements);
|
||||
addVaryingWithWeight(dstVarying, fVarying + eidx1*numVaryingElements, 0.5f, numVaryingElements);
|
||||
addWithWeight(dstVarying, fVarying + eidx0*varyingStride, 0.5f, varyingLength);
|
||||
addWithWeight(dstVarying, fVarying + eidx1*varyingStride, 0.5f, varyingLength);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <int NUM_USER_VERTEX_ELEMENTS, int NUM_VARYING_ELEMENTS> __global__ void
|
||||
template <int NUM_VERTEX_ELEMENTS, int NUM_VARYING_ELEMENTS> __global__ void
|
||||
computeVertexA(float *fVertex, float *fVaryings, int *V0_ITa, float *V0_S, int offset, int tableOffset, int start, int end, int pass)
|
||||
{
|
||||
DeviceVertex<NUM_USER_VERTEX_ELEMENTS> *vertex = (DeviceVertex<NUM_USER_VERTEX_ELEMENTS>*)fVertex;
|
||||
DeviceVarying<NUM_VARYING_ELEMENTS> *varyings = (DeviceVarying<NUM_VARYING_ELEMENTS>*)fVaryings;
|
||||
for(int i = start + tableOffset + threadIdx.x + blockIdx.x*blockDim.x; i < end+tableOffset; i += blockDim.x * gridDim.x){
|
||||
DeviceVertex<NUM_VERTEX_ELEMENTS> *vertex = (DeviceVertex<NUM_VERTEX_ELEMENTS>*)fVertex;
|
||||
DeviceVertex<NUM_VARYING_ELEMENTS> *varyings = (DeviceVertex<NUM_VARYING_ELEMENTS>*)fVaryings;
|
||||
for (int i = start + tableOffset + threadIdx.x + blockIdx.x*blockDim.x;
|
||||
i < end+tableOffset;
|
||||
i += blockDim.x * gridDim.x) {
|
||||
|
||||
int n = V0_ITa[5*i+1];
|
||||
int p = V0_ITa[5*i+2];
|
||||
int eidx0 = V0_ITa[5*i+3];
|
||||
@ -249,7 +244,7 @@ computeVertexA(float *fVertex, float *fVaryings, int *V0_ITa, float *V0_S, int o
|
||||
if (weight>0.0f && weight<1.0f && n > 0)
|
||||
weight=1.0f-weight;
|
||||
|
||||
DeviceVertex<NUM_USER_VERTEX_ELEMENTS> dst;
|
||||
DeviceVertex<NUM_VERTEX_ELEMENTS> dst;
|
||||
if (not pass) {
|
||||
dst.clear();
|
||||
} else {
|
||||
@ -267,9 +262,9 @@ computeVertexA(float *fVertex, float *fVaryings, int *V0_ITa, float *V0_S, int o
|
||||
|
||||
if(NUM_VARYING_ELEMENTS > 0){
|
||||
if(not pass){
|
||||
DeviceVarying<NUM_VARYING_ELEMENTS> dstVarying;
|
||||
DeviceVertex<NUM_VARYING_ELEMENTS> dstVarying;
|
||||
dstVarying.clear();
|
||||
dstVarying.addVaryingWithWeight(&varyings[p], 1.0f);
|
||||
dstVarying.addWithWeight(&varyings[p], 1.0f);
|
||||
varyings[i+offset-tableOffset] = dstVarying;
|
||||
}
|
||||
}
|
||||
@ -277,10 +272,15 @@ computeVertexA(float *fVertex, float *fVaryings, int *V0_ITa, float *V0_S, int o
|
||||
}
|
||||
|
||||
__global__ void
|
||||
computeVertexA(float *fVertex, int numVertexElements, float *fVaryings, int numVaryingElements,
|
||||
computeVertexA(float *fVertex, float *fVaryings,
|
||||
int vertexLength, int vertexStride,
|
||||
int varyingLength, int varyingStride,
|
||||
int *V0_ITa, float *V0_S, int offset, int tableOffset, int start, int end, int pass)
|
||||
{
|
||||
for(int i = start + tableOffset + threadIdx.x + blockIdx.x*blockDim.x; i < end + tableOffset; i += blockDim.x * gridDim.x){
|
||||
for (int i = start + tableOffset + threadIdx.x + blockIdx.x*blockDim.x;
|
||||
i < end + tableOffset;
|
||||
i += blockDim.x * gridDim.x){
|
||||
|
||||
int n = V0_ITa[5*i+1];
|
||||
int p = V0_ITa[5*i+2];
|
||||
int eidx0 = V0_ITa[5*i+3];
|
||||
@ -294,24 +294,24 @@ computeVertexA(float *fVertex, int numVertexElements, float *fVaryings, int numV
|
||||
if (weight>0.0f && weight<1.0f && n > 0)
|
||||
weight=1.0f-weight;
|
||||
|
||||
float *dstVertex = fVertex + (i+offset-tableOffset)*numVertexElements;
|
||||
float *dstVertex = fVertex + (i+offset-tableOffset)*vertexStride;
|
||||
if (not pass) {
|
||||
clear(dstVertex, numVertexElements);
|
||||
clear(dstVertex, vertexLength);
|
||||
}
|
||||
|
||||
if (eidx0==-1 || (pass==0 && (n==-1)) ) {
|
||||
addWithWeight(dstVertex, fVertex + p*numVertexElements, weight, numVertexElements);
|
||||
addWithWeight(dstVertex, fVertex + p*vertexStride, weight, vertexLength);
|
||||
} else {
|
||||
addWithWeight(dstVertex, fVertex + p*numVertexElements, weight*0.75f, numVertexElements);
|
||||
addWithWeight(dstVertex, fVertex + eidx0*numVertexElements, weight*0.125f, numVertexElements);
|
||||
addWithWeight(dstVertex, fVertex + eidx1*numVertexElements, weight*0.125f, numVertexElements);
|
||||
addWithWeight(dstVertex, fVertex + p*vertexStride, weight*0.75f, vertexLength);
|
||||
addWithWeight(dstVertex, fVertex + eidx0*vertexStride, weight*0.125f, vertexLength);
|
||||
addWithWeight(dstVertex, fVertex + eidx1*vertexStride, weight*0.125f, vertexLength);
|
||||
}
|
||||
|
||||
if(numVaryingElements > 0){
|
||||
if(varyingLength > 0){
|
||||
if(not pass){
|
||||
float *dstVarying = fVaryings + (i+offset-tableOffset)*numVaryingElements;
|
||||
clear(dstVarying, numVaryingElements);
|
||||
addVaryingWithWeight(dstVarying, fVaryings + p*numVaryingElements, 1.0f, numVaryingElements);
|
||||
float *dstVarying = fVaryings + (i+offset-tableOffset)*varyingStride;
|
||||
clear(dstVarying, varyingLength);
|
||||
addWithWeight(dstVarying, fVaryings + p*varyingStride, 1.0f, varyingLength);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -321,13 +321,16 @@ computeVertexA(float *fVertex, int numVertexElements, float *fVaryings, int numV
|
||||
|
||||
//texture <int, 1> texV0_IT;
|
||||
|
||||
template <int NUM_USER_VERTEX_ELEMENTS, int NUM_VARYING_ELEMENTS> __global__ void
|
||||
template <int NUM_VERTEX_ELEMENTS, int NUM_VARYING_ELEMENTS> __global__ void
|
||||
computeVertexB(float *fVertex, float *fVaryings,
|
||||
const int *V0_ITa, const int *V0_IT, const float *V0_S, int offset, int tableOffset, int start, int end)
|
||||
{
|
||||
DeviceVertex<NUM_USER_VERTEX_ELEMENTS> *vertex = (DeviceVertex<NUM_USER_VERTEX_ELEMENTS>*)fVertex;
|
||||
DeviceVarying<NUM_VARYING_ELEMENTS> *varyings = (DeviceVarying<NUM_VARYING_ELEMENTS>*)fVaryings;
|
||||
for(int i = start + tableOffset + threadIdx.x + blockIdx.x*blockDim.x; i < end + tableOffset; i += blockDim.x * gridDim.x){
|
||||
DeviceVertex<NUM_VERTEX_ELEMENTS> *vertex = (DeviceVertex<NUM_VERTEX_ELEMENTS>*)fVertex;
|
||||
DeviceVertex<NUM_VARYING_ELEMENTS> *varyings = (DeviceVertex<NUM_VARYING_ELEMENTS>*)fVaryings;
|
||||
for (int i = start + tableOffset + threadIdx.x + blockIdx.x*blockDim.x;
|
||||
i < end + tableOffset;
|
||||
i += blockDim.x * gridDim.x) {
|
||||
|
||||
int h = V0_ITa[5*i];
|
||||
int n = V0_ITa[5*i+1];
|
||||
int p = V0_ITa[5*i+2];
|
||||
@ -336,11 +339,11 @@ computeVertexB(float *fVertex, float *fVaryings,
|
||||
float wp = 1.0f/float(n*n);
|
||||
float wv = (n-2.0f) * n * wp;
|
||||
|
||||
DeviceVertex<NUM_USER_VERTEX_ELEMENTS> dst;
|
||||
DeviceVertex<NUM_VERTEX_ELEMENTS> dst;
|
||||
dst.clear();
|
||||
dst.addWithWeight(&vertex[p], weight * wv);
|
||||
|
||||
for(int j = 0; j < n; ++j){
|
||||
for (int j = 0; j < n; ++j) {
|
||||
dst.addWithWeight(&vertex[V0_IT[h+j*2]], weight * wp);
|
||||
dst.addWithWeight(&vertex[V0_IT[h+j*2+1]], weight * wp);
|
||||
// int idx0 = tex1Dfetch(texV0_IT, h+j*2);
|
||||
@ -351,19 +354,24 @@ computeVertexB(float *fVertex, float *fVaryings,
|
||||
vertex[i+offset-tableOffset] = dst;
|
||||
|
||||
if(NUM_VARYING_ELEMENTS > 0){
|
||||
DeviceVarying<NUM_VARYING_ELEMENTS> dstVarying;
|
||||
DeviceVertex<NUM_VARYING_ELEMENTS> dstVarying;
|
||||
dstVarying.clear();
|
||||
dstVarying.addVaryingWithWeight(&varyings[p], 1.0f);
|
||||
dstVarying.addWithWeight(&varyings[p], 1.0f);
|
||||
varyings[i+offset-tableOffset] = dstVarying;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void
|
||||
computeVertexB(float *fVertex, int numVertexElements, float *fVaryings, int numVaryingElements,
|
||||
computeVertexB(float *fVertex, float *fVarying,
|
||||
int vertexLength, int vertexStride,
|
||||
int varyingLength, int varyingStride,
|
||||
const int *V0_ITa, const int *V0_IT, const float *V0_S, int offset, int tableOffset, int start, int end)
|
||||
{
|
||||
for(int i = start + tableOffset + threadIdx.x + blockIdx.x*blockDim.x; i < end + tableOffset; i += blockDim.x * gridDim.x){
|
||||
for (int i = start + tableOffset + threadIdx.x + blockIdx.x*blockDim.x;
|
||||
i < end + tableOffset;
|
||||
i += blockDim.x * gridDim.x) {
|
||||
|
||||
int h = V0_ITa[5*i];
|
||||
int n = V0_ITa[5*i+1];
|
||||
int p = V0_ITa[5*i+2];
|
||||
@ -372,19 +380,19 @@ computeVertexB(float *fVertex, int numVertexElements, float *fVaryings, int numV
|
||||
float wp = 1.0f/float(n*n);
|
||||
float wv = (n-2.0f) * n * wp;
|
||||
|
||||
float *dstVertex = fVertex + (i+offset-tableOffset)*numVertexElements;
|
||||
clear(dstVertex, numVertexElements);
|
||||
addWithWeight(dstVertex, fVertex + p*numVertexElements, weight*wv, numVertexElements);
|
||||
float *dstVertex = fVertex + (i+offset-tableOffset)*vertexStride;
|
||||
clear(dstVertex, vertexLength);
|
||||
addWithWeight(dstVertex, fVertex + p*vertexStride, weight*wv, vertexLength);
|
||||
|
||||
for(int j = 0; j < n; ++j){
|
||||
addWithWeight(dstVertex, fVertex + V0_IT[h+j*2]*numVertexElements, weight*wp, numVertexElements);
|
||||
addWithWeight(dstVertex, fVertex + V0_IT[h+j*2+1]*numVertexElements, weight*wp, numVertexElements);
|
||||
for (int j = 0; j < n; ++j) {
|
||||
addWithWeight(dstVertex, fVertex + V0_IT[h+j*2]*vertexStride, weight*wp, vertexLength);
|
||||
addWithWeight(dstVertex, fVertex + V0_IT[h+j*2+1]*vertexStride, weight*wp, vertexLength);
|
||||
}
|
||||
|
||||
if(numVaryingElements > 0){
|
||||
float *dstVarying = fVaryings + (i+offset-tableOffset)*numVaryingElements;
|
||||
clear(dstVarying, numVaryingElements);
|
||||
addVaryingWithWeight(dstVarying, fVaryings + p*numVaryingElements, 1.0f, numVaryingElements);
|
||||
if (varyingLength > 0) {
|
||||
float *dstVarying = fVarying + (i+offset-tableOffset)*varyingStride;
|
||||
clear(dstVarying, varyingLength);
|
||||
addWithWeight(dstVarying, fVarying + p*varyingStride, 1.0f, varyingLength);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -392,12 +400,15 @@ computeVertexB(float *fVertex, int numVertexElements, float *fVaryings, int numV
|
||||
|
||||
// --------------------------------------------------------------------------------------------
|
||||
|
||||
template <int NUM_USER_VERTEX_ELEMENTS, int NUM_VARYING_ELEMENTS> __global__ void
|
||||
template <int NUM_VERTEX_ELEMENTS, int NUM_VARYING_ELEMENTS> __global__ void
|
||||
computeLoopVertexB(float *fVertex, float *fVaryings, int *V0_ITa, int *V0_IT, float *V0_S, int offset, int tableOffset, int start, int end)
|
||||
{
|
||||
DeviceVertex<NUM_USER_VERTEX_ELEMENTS> *vertex = (DeviceVertex<NUM_USER_VERTEX_ELEMENTS>*)fVertex;
|
||||
DeviceVarying<NUM_VARYING_ELEMENTS> *varyings = (DeviceVarying<NUM_VARYING_ELEMENTS>*)fVaryings;
|
||||
for(int i = start + tableOffset + threadIdx.x + blockIdx.x*blockDim.x; i < end + tableOffset; i += blockDim.x * gridDim.x){
|
||||
DeviceVertex<NUM_VERTEX_ELEMENTS> *vertex = (DeviceVertex<NUM_VERTEX_ELEMENTS>*)fVertex;
|
||||
DeviceVertex<NUM_VARYING_ELEMENTS> *varyings = (DeviceVertex<NUM_VARYING_ELEMENTS>*)fVaryings;
|
||||
for (int i = start + tableOffset + threadIdx.x + blockIdx.x*blockDim.x;
|
||||
i < end + tableOffset;
|
||||
i += blockDim.x * gridDim.x) {
|
||||
|
||||
int h = V0_ITa[5*i];
|
||||
int n = V0_ITa[5*i+1];
|
||||
int p = V0_ITa[5*i+2];
|
||||
@ -408,30 +419,35 @@ computeLoopVertexB(float *fVertex, float *fVaryings, int *V0_ITa, int *V0_IT, fl
|
||||
beta = beta * beta;
|
||||
beta = (0.625f - beta) * wp;
|
||||
|
||||
DeviceVertex<NUM_USER_VERTEX_ELEMENTS> dst;
|
||||
DeviceVertex<NUM_VERTEX_ELEMENTS> dst;
|
||||
dst.clear();
|
||||
|
||||
dst.addWithWeight(&vertex[p], weight * (1.0f - (beta * n)));
|
||||
|
||||
for(int j = 0; j < n; ++j){
|
||||
for (int j = 0; j < n; ++j) {
|
||||
dst.addWithWeight(&vertex[V0_IT[h+j]], weight * beta);
|
||||
}
|
||||
vertex[i+offset-tableOffset] = dst;
|
||||
|
||||
if(NUM_VARYING_ELEMENTS > 0){
|
||||
DeviceVarying<NUM_VARYING_ELEMENTS> dstVarying;
|
||||
if (NUM_VARYING_ELEMENTS > 0) {
|
||||
DeviceVertex<NUM_VARYING_ELEMENTS> dstVarying;
|
||||
dstVarying.clear();
|
||||
dstVarying.addVaryingWithWeight(&varyings[p], 1.0f);
|
||||
dstVarying.addWithWeight(&varyings[p], 1.0f);
|
||||
varyings[i+offset-tableOffset] = dstVarying;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void
|
||||
computeLoopVertexB(float *fVertex, int numVertexElements, float *fVaryings, int numVaryingElements,
|
||||
computeLoopVertexB(float *fVertex, float *fVarying,
|
||||
int vertexLength, int vertexStride,
|
||||
int varyingLength, int varyingStride,
|
||||
const int *V0_ITa, const int *V0_IT, const float *V0_S, int offset, int tableOffset, int start, int end)
|
||||
{
|
||||
for(int i = start + tableOffset + threadIdx.x + blockIdx.x*blockDim.x; i < end + tableOffset; i += blockDim.x * gridDim.x){
|
||||
for (int i = start + tableOffset + threadIdx.x + blockIdx.x*blockDim.x;
|
||||
i < end + tableOffset;
|
||||
i += blockDim.x * gridDim.x) {
|
||||
|
||||
int h = V0_ITa[5*i];
|
||||
int n = V0_ITa[5*i+1];
|
||||
int p = V0_ITa[5*i+2];
|
||||
@ -442,34 +458,37 @@ computeLoopVertexB(float *fVertex, int numVertexElements, float *fVaryings, int
|
||||
beta = beta * beta;
|
||||
beta = (0.625f - beta) * wp;
|
||||
|
||||
float *dstVertex = fVertex + (i+offset-tableOffset)*numVertexElements;
|
||||
clear(dstVertex, numVertexElements);
|
||||
addWithWeight(dstVertex, fVertex + p*numVertexElements, weight*(1.0f-(beta*n)), numVertexElements);
|
||||
float *dstVertex = fVertex + (i+offset-tableOffset)*vertexStride;
|
||||
clear(dstVertex, vertexLength);
|
||||
addWithWeight(dstVertex, fVertex + p*vertexStride, weight*(1.0f-(beta*n)), vertexLength);
|
||||
|
||||
for(int j = 0; j < n; ++j){
|
||||
addWithWeight(dstVertex, fVertex + V0_IT[h+j]*numVertexElements, weight*beta, numVertexElements);
|
||||
for (int j = 0; j < n; ++j) {
|
||||
addWithWeight(dstVertex, fVertex + V0_IT[h+j]*vertexStride, weight*beta, vertexLength);
|
||||
}
|
||||
|
||||
if(numVaryingElements > 0){
|
||||
float *dstVarying = fVaryings + (i+offset-tableOffset)*numVaryingElements;
|
||||
clear(dstVarying, numVaryingElements);
|
||||
addVaryingWithWeight(dstVarying, fVaryings + p*numVaryingElements, 1.0f, numVaryingElements);
|
||||
if (varyingLength > 0) {
|
||||
float *dstVarying = fVarying + (i+offset-tableOffset)*varyingStride;
|
||||
clear(dstVarying, varyingLength);
|
||||
addWithWeight(dstVarying, fVarying + p*varyingStride, 1.0f, varyingLength);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------------------
|
||||
|
||||
template <int NUM_USER_VERTEX_ELEMENTS, int NUM_VARYING_ELEMENTS> __global__ void
|
||||
template <int NUM_VERTEX_ELEMENTS, int NUM_VARYING_ELEMENTS> __global__ void
|
||||
computeBilinearEdge(float *fVertex, float *fVaryings, int *E0_IT, int offset, int tableOffset, int start, int end)
|
||||
{
|
||||
DeviceVertex<NUM_USER_VERTEX_ELEMENTS> *vertex = (DeviceVertex<NUM_USER_VERTEX_ELEMENTS>*)fVertex;
|
||||
DeviceVarying<NUM_VARYING_ELEMENTS> *varyings = (DeviceVarying<NUM_VARYING_ELEMENTS>*)fVaryings;
|
||||
for(int i = start + tableOffset + threadIdx.x + blockIdx.x*blockDim.x; i < end + tableOffset; i+= blockDim.x * gridDim.x){
|
||||
DeviceVertex<NUM_VERTEX_ELEMENTS> *vertex = (DeviceVertex<NUM_VERTEX_ELEMENTS>*)fVertex;
|
||||
DeviceVertex<NUM_VARYING_ELEMENTS> *varyings = (DeviceVertex<NUM_VARYING_ELEMENTS>*)fVaryings;
|
||||
for (int i = start + tableOffset + threadIdx.x + blockIdx.x*blockDim.x;
|
||||
i < end + tableOffset;
|
||||
i+= blockDim.x * gridDim.x) {
|
||||
|
||||
int eidx0 = E0_IT[2*i+0];
|
||||
int eidx1 = E0_IT[2*i+1];
|
||||
|
||||
DeviceVertex<NUM_USER_VERTEX_ELEMENTS> dst;
|
||||
DeviceVertex<NUM_VERTEX_ELEMENTS> dst;
|
||||
dst.clear();
|
||||
|
||||
dst.addWithWeight(&vertex[eidx0], 0.5f);
|
||||
@ -477,78 +496,91 @@ computeBilinearEdge(float *fVertex, float *fVaryings, int *E0_IT, int offset, in
|
||||
|
||||
vertex[offset+i-tableOffset] = dst;
|
||||
|
||||
if(NUM_VARYING_ELEMENTS > 0){
|
||||
DeviceVarying<NUM_VARYING_ELEMENTS> dstVarying;
|
||||
if (NUM_VARYING_ELEMENTS > 0) {
|
||||
DeviceVertex<NUM_VARYING_ELEMENTS> dstVarying;
|
||||
dstVarying.clear();
|
||||
dstVarying.addVaryingWithWeight(&varyings[eidx0], 0.5f);
|
||||
dstVarying.addVaryingWithWeight(&varyings[eidx1], 0.5f);
|
||||
dstVarying.addWithWeight(&varyings[eidx0], 0.5f);
|
||||
dstVarying.addWithWeight(&varyings[eidx1], 0.5f);
|
||||
varyings[offset+i-tableOffset] = dstVarying;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void
|
||||
computeBilinearEdge(float *fVertex, int numVertexElements, float *fVarying, int numVaryingElements,
|
||||
computeBilinearEdge(float *fVertex, float *fVarying,
|
||||
int vertexLength, int vertexStride,
|
||||
int varyingLength, int varyingStride,
|
||||
int *E0_IT, int offset, int tableOffset, int start, int end)
|
||||
{
|
||||
for(int i = start + tableOffset + threadIdx.x + blockIdx.x*blockDim.x; i < end + tableOffset; i+= blockDim.x * gridDim.x){
|
||||
for (int i = start + tableOffset + threadIdx.x + blockIdx.x*blockDim.x;
|
||||
i < end + tableOffset;
|
||||
i+= blockDim.x * gridDim.x) {
|
||||
|
||||
int eidx0 = E0_IT[2*i+0];
|
||||
int eidx1 = E0_IT[2*i+1];
|
||||
|
||||
float *dstVertex = fVertex + (i+offset-tableOffset)*numVertexElements;
|
||||
clear(dstVertex, numVertexElements);
|
||||
float *dstVertex = fVertex + (i+offset-tableOffset)*vertexStride;
|
||||
clear(dstVertex, vertexLength);
|
||||
|
||||
addWithWeight(dstVertex, fVertex + eidx0*numVertexElements, 0.5f, numVertexElements);
|
||||
addWithWeight(dstVertex, fVertex + eidx1*numVertexElements, 0.5f, numVertexElements);
|
||||
addWithWeight(dstVertex, fVertex + eidx0*vertexStride, 0.5f, vertexLength);
|
||||
addWithWeight(dstVertex, fVertex + eidx1*vertexStride, 0.5f, vertexLength);
|
||||
|
||||
if(numVaryingElements > 0){
|
||||
float *dstVarying = fVarying + (i+offset-tableOffset)*numVaryingElements;
|
||||
clear(dstVarying, numVaryingElements);
|
||||
if (varyingLength > 0) {
|
||||
float *dstVarying = fVarying + (i+offset-tableOffset)*varyingStride;
|
||||
clear(dstVarying, varyingLength);
|
||||
|
||||
addVaryingWithWeight(dstVarying, fVarying + eidx0*numVaryingElements, 0.5f, numVaryingElements);
|
||||
addVaryingWithWeight(dstVarying, fVarying + eidx1*numVaryingElements, 0.5f, numVaryingElements);
|
||||
addWithWeight(dstVarying, fVarying + eidx0*varyingStride, 0.5f, varyingLength);
|
||||
addWithWeight(dstVarying, fVarying + eidx1*varyingStride, 0.5f, varyingLength);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <int NUM_USER_VERTEX_ELEMENTS, int NUM_VARYING_ELEMENTS> __global__ void
|
||||
template <int NUM_VERTEX_ELEMENTS, int NUM_VARYING_ELEMENTS> __global__ void
|
||||
computeBilinearVertex(float *fVertex, float *fVaryings, int *V0_ITa, int offset, int tableOffset, int start, int end)
|
||||
{
|
||||
DeviceVertex<NUM_USER_VERTEX_ELEMENTS> *vertex = (DeviceVertex<NUM_USER_VERTEX_ELEMENTS>*)fVertex;
|
||||
DeviceVarying<NUM_VARYING_ELEMENTS> *varyings = (DeviceVarying<NUM_VARYING_ELEMENTS>*)fVaryings;
|
||||
for(int i = start + tableOffset + threadIdx.x + blockIdx.x*blockDim.x; i < end + tableOffset; i += blockDim.x * gridDim.x){
|
||||
DeviceVertex<NUM_VERTEX_ELEMENTS> *vertex = (DeviceVertex<NUM_VERTEX_ELEMENTS>*)fVertex;
|
||||
DeviceVertex<NUM_VARYING_ELEMENTS> *varyings = (DeviceVertex<NUM_VARYING_ELEMENTS>*)fVaryings;
|
||||
for (int i = start + tableOffset + threadIdx.x + blockIdx.x*blockDim.x;
|
||||
i < end + tableOffset;
|
||||
i += blockDim.x * gridDim.x) {
|
||||
|
||||
int p = V0_ITa[i];
|
||||
|
||||
DeviceVertex<NUM_USER_VERTEX_ELEMENTS> dst;
|
||||
DeviceVertex<NUM_VERTEX_ELEMENTS> dst;
|
||||
dst.clear();
|
||||
|
||||
dst.addWithWeight(&vertex[p], 1.0f);
|
||||
vertex[i+offset-tableOffset] = dst;
|
||||
|
||||
if(NUM_VARYING_ELEMENTS > 0){
|
||||
DeviceVarying<NUM_VARYING_ELEMENTS> dstVarying;
|
||||
if (NUM_VARYING_ELEMENTS > 0) {
|
||||
DeviceVertex<NUM_VARYING_ELEMENTS> dstVarying;
|
||||
dstVarying.clear();
|
||||
dstVarying.addVaryingWithWeight(&varyings[p], 1.0f);
|
||||
dstVarying.addWithWeight(&varyings[p], 1.0f);
|
||||
varyings[i+offset-tableOffset] = dstVarying;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void
|
||||
computeBilinearVertex(float *fVertex, int numVertexElements, float *fVaryings, int numVaryingElements,
|
||||
computeBilinearVertex(float *fVertex, float *fVarying,
|
||||
int vertexLength, int vertexStride,
|
||||
int varyingLength, int varyingStride,
|
||||
const int *V0_ITa, int offset, int tableOffset, int start, int end)
|
||||
{
|
||||
for(int i = start + tableOffset + threadIdx.x + blockIdx.x*blockDim.x; i < end + tableOffset; i += blockDim.x * gridDim.x){
|
||||
for (int i = start + tableOffset + threadIdx.x + blockIdx.x*blockDim.x;
|
||||
i < end + tableOffset;
|
||||
i += blockDim.x * gridDim.x) {
|
||||
|
||||
int p = V0_ITa[i];
|
||||
|
||||
float *dstVertex = fVertex + (i+offset-tableOffset)*numVertexElements;
|
||||
clear(dstVertex, numVertexElements);
|
||||
addWithWeight(dstVertex, fVertex + p*numVertexElements, 1.0f, numVertexElements);
|
||||
float *dstVertex = fVertex + (i+offset-tableOffset)*vertexStride;
|
||||
clear(dstVertex, vertexLength);
|
||||
addWithWeight(dstVertex, fVertex + p*vertexStride, 1.0f, vertexLength);
|
||||
|
||||
if(numVaryingElements > 0){
|
||||
float *dstVarying = fVaryings + (i+offset-tableOffset)*numVaryingElements;
|
||||
clear(dstVarying, numVaryingElements);
|
||||
addVaryingWithWeight(dstVarying, fVaryings + p*numVaryingElements, 1.0f, numVaryingElements);
|
||||
if (varyingLength > 0) {
|
||||
float *dstVarying = fVarying + (i+offset-tableOffset)*varyingStride;
|
||||
clear(dstVarying, varyingLength);
|
||||
addWithWeight(dstVarying, fVarying + p*varyingStride, 1.0f, varyingLength);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -556,15 +588,16 @@ computeBilinearVertex(float *fVertex, int numVertexElements, float *fVaryings, i
|
||||
// --------------------------------------------------------------------------------------------
|
||||
|
||||
__global__ void
|
||||
editVertexAdd(float *fVertex, int numVertexElements, int primVarOffset, int primVarWidth,
|
||||
editVertexAdd(float *fVertex, int vertexLength, int vertexStride,
|
||||
int primVarOffset, int primVarWidth,
|
||||
int vertexOffset, int tableOffset, int start, int end,
|
||||
const int *editIndices, const float *editValues)
|
||||
{
|
||||
for(int i = start + tableOffset + threadIdx.x + blockIdx.x*blockDim.x;
|
||||
i < end + tableOffset;
|
||||
i += blockDim.x * gridDim.x) {
|
||||
for (int i = start + tableOffset + threadIdx.x + blockIdx.x*blockDim.x;
|
||||
i < end + tableOffset;
|
||||
i += blockDim.x * gridDim.x) {
|
||||
|
||||
float *dstVertex = fVertex + (editIndices[i] + vertexOffset) * numVertexElements + primVarOffset;
|
||||
float *dstVertex = fVertex + (editIndices[i] + vertexOffset) * vertexStride + primVarOffset;
|
||||
|
||||
for(int j = 0; j < primVarWidth; j++) {
|
||||
*dstVertex++ += editValues[i*primVarWidth + j];
|
||||
@ -579,16 +612,19 @@ editVertexAdd(float *fVertex, int numVertexElements, int primVarOffset, int prim
|
||||
// XXX: this macro usage is tentative. Since cuda kernel can't be dynamically configured,
|
||||
// still trying to find better way to have optimized kernel..
|
||||
|
||||
#define OPT_KERNEL(NUM_USER_VERTEX_ELEMENTS, NUM_VARYING_ELEMENTS, KERNEL, X, Y, ARG) \
|
||||
if(numUserVertexElements == NUM_USER_VERTEX_ELEMENTS && \
|
||||
numVaryingElements == NUM_VARYING_ELEMENTS) \
|
||||
{ KERNEL<NUM_USER_VERTEX_ELEMENTS, NUM_VARYING_ELEMENTS><<<X,Y>>>ARG; \
|
||||
return; }
|
||||
#define OPT_KERNEL(NUM_VERTEX_ELEMENTS, NUM_VARYING_ELEMENTS, KERNEL, X, Y, ARG) \
|
||||
if(vertexLength == NUM_VERTEX_ELEMENTS && \
|
||||
varyingLength == NUM_VARYING_ELEMENTS && \
|
||||
vertexStride == vertexLength && \
|
||||
varyingStride == varyingLength) \
|
||||
{ KERNEL<NUM_VERTEX_ELEMENTS, NUM_VARYING_ELEMENTS><<<X,Y>>>ARG; \
|
||||
return; }
|
||||
|
||||
extern "C" {
|
||||
|
||||
void OsdCudaComputeFace(float *vertex, float *varying,
|
||||
int numUserVertexElements, int numVaryingElements,
|
||||
int vertexLength, int vertexStride,
|
||||
int varyingLength, int varyingStride,
|
||||
int *F_IT, int *F_ITa, int offset, int tableOffset, int start, int end)
|
||||
{
|
||||
//computeFace<3, 0><<<512,32>>>(vertex, varying, F_IT, F_ITa, offset, start, end);
|
||||
@ -598,12 +634,15 @@ void OsdCudaComputeFace(float *vertex, float *varying,
|
||||
OPT_KERNEL(3, 3, computeFace, 512, 32, (vertex, varying, F_IT, F_ITa, offset, tableOffset, start, end));
|
||||
|
||||
// fallback kernel (slow)
|
||||
computeFace<<<512, 32>>>(vertex, 3+numUserVertexElements, varying, numVaryingElements,
|
||||
computeFace<<<512, 32>>>(vertex, varying,
|
||||
vertexLength, vertexStride, varyingLength, varyingStride,
|
||||
F_IT, F_ITa, offset, tableOffset, start, end);
|
||||
}
|
||||
|
||||
|
||||
void OsdCudaComputeEdge(float *vertex, float *varying,
|
||||
int numUserVertexElements, int numVaryingElements,
|
||||
int vertexLength, int vertexStride,
|
||||
int varyingLength, int varyingStride,
|
||||
int *E_IT, float *E_W, int offset, int tableOffset, int start, int end)
|
||||
{
|
||||
//computeEdge<0, 3><<<512,32>>>(vertex, varying, E_IT, E_W, offset, start, end);
|
||||
@ -612,12 +651,14 @@ void OsdCudaComputeEdge(float *vertex, float *varying,
|
||||
OPT_KERNEL(3, 0, computeEdge, 512, 32, (vertex, varying, E_IT, E_W, offset, tableOffset, start, end));
|
||||
OPT_KERNEL(3, 3, computeEdge, 512, 32, (vertex, varying, E_IT, E_W, offset, tableOffset, start, end));
|
||||
|
||||
computeEdge<<<512, 32>>>(vertex, 3+numUserVertexElements, varying, numVaryingElements,
|
||||
computeEdge<<<512, 32>>>(vertex, varying,
|
||||
vertexLength, vertexStride, varyingLength, varyingStride,
|
||||
E_IT, E_W, offset, tableOffset, start, end);
|
||||
}
|
||||
|
||||
void OsdCudaComputeVertexA(float *vertex, float *varying,
|
||||
int numUserVertexElements, int numVaryingElements,
|
||||
int vertexLength, int vertexStride,
|
||||
int varyingLength, int varyingStride,
|
||||
int *V_ITa, float *V_W, int offset, int tableOffset, int start, int end, int pass)
|
||||
{
|
||||
// computeVertexA<0, 3><<<512,32>>>(vertex, varying, V_ITa, V_W, offset, start, end, pass);
|
||||
@ -626,12 +667,14 @@ void OsdCudaComputeVertexA(float *vertex, float *varying,
|
||||
OPT_KERNEL(3, 0, computeVertexA, 512, 32, (vertex, varying, V_ITa, V_W, offset, tableOffset, start, end, pass));
|
||||
OPT_KERNEL(3, 3, computeVertexA, 512, 32, (vertex, varying, V_ITa, V_W, offset, tableOffset, start, end, pass));
|
||||
|
||||
computeVertexA<<<512, 32>>>(vertex, 3+numUserVertexElements, varying, numVaryingElements,
|
||||
computeVertexA<<<512, 32>>>(vertex, varying,
|
||||
vertexLength, vertexStride, varyingLength, varyingStride,
|
||||
V_ITa, V_W, offset, tableOffset, start, end, pass);
|
||||
}
|
||||
|
||||
void OsdCudaComputeVertexB(float *vertex, float *varying,
|
||||
int numUserVertexElements, int numVaryingElements,
|
||||
int vertexLength, int vertexStride,
|
||||
int varyingLength, int varyingStride,
|
||||
int *V_ITa, int *V_IT, float *V_W, int offset, int tableOffset, int start, int end)
|
||||
{
|
||||
// computeVertexB<0, 3><<<512,32>>>(vertex, varying, V_ITa, V_IT, V_W, offset, start, end);
|
||||
@ -640,12 +683,14 @@ void OsdCudaComputeVertexB(float *vertex, float *varying,
|
||||
OPT_KERNEL(3, 0, computeVertexB, 512, 32, (vertex, varying, V_ITa, V_IT, V_W, offset, tableOffset, start, end));
|
||||
OPT_KERNEL(3, 3, computeVertexB, 512, 32, (vertex, varying, V_ITa, V_IT, V_W, offset, tableOffset, start, end));
|
||||
|
||||
computeVertexB<<<512, 32>>>(vertex, 3+numUserVertexElements, varying, numVaryingElements,
|
||||
computeVertexB<<<512, 32>>>(vertex, varying,
|
||||
vertexLength, vertexStride, varyingLength, varyingStride,
|
||||
V_ITa, V_IT, V_W, offset, tableOffset, start, end);
|
||||
}
|
||||
|
||||
void OsdCudaComputeLoopVertexB(float *vertex, float *varying,
|
||||
int numUserVertexElements, int numVaryingElements,
|
||||
int vertexLength, int vertexStride,
|
||||
int varyingLength, int varyingStride,
|
||||
int *V_ITa, int *V_IT, float *V_W, int offset, int tableOffset, int start, int end)
|
||||
{
|
||||
// computeLoopVertexB<0, 3><<<512,32>>>(vertex, varying, V_ITa, V_IT, V_W, offset, start, end);
|
||||
@ -654,12 +699,14 @@ void OsdCudaComputeLoopVertexB(float *vertex, float *varying,
|
||||
OPT_KERNEL(3, 0, computeLoopVertexB, 512, 32, (vertex, varying, V_ITa, V_IT, V_W, offset, tableOffset, start, end));
|
||||
OPT_KERNEL(3, 3, computeLoopVertexB, 512, 32, (vertex, varying, V_ITa, V_IT, V_W, offset, tableOffset, start, end));
|
||||
|
||||
computeLoopVertexB<<<512, 32>>>(vertex, 3+numUserVertexElements, varying, numVaryingElements,
|
||||
computeLoopVertexB<<<512, 32>>>(vertex, varying,
|
||||
vertexLength, vertexStride, varyingLength, varyingStride,
|
||||
V_ITa, V_IT, V_W, offset, tableOffset, start, end);
|
||||
}
|
||||
|
||||
void OsdCudaComputeBilinearEdge(float *vertex, float *varying,
|
||||
int numUserVertexElements, int numVaryingElements,
|
||||
int vertexLength, int vertexStride,
|
||||
int varyingLength, int varyingStride,
|
||||
int *E_IT, int offset, int tableOffset, int start, int end)
|
||||
{
|
||||
//computeBilinearEdge<0, 3><<<512,32>>>(vertex, varying, E_IT, offset, start, end);
|
||||
@ -668,12 +715,14 @@ void OsdCudaComputeBilinearEdge(float *vertex, float *varying,
|
||||
OPT_KERNEL(3, 0, computeBilinearEdge, 512, 32, (vertex, varying, E_IT, offset, tableOffset, start, end));
|
||||
OPT_KERNEL(3, 3, computeBilinearEdge, 512, 32, (vertex, varying, E_IT, offset, tableOffset, start, end));
|
||||
|
||||
computeBilinearEdge<<<512, 32>>>(vertex, 3+numUserVertexElements, varying, numVaryingElements,
|
||||
computeBilinearEdge<<<512, 32>>>(vertex, varying,
|
||||
vertexLength, vertexStride, varyingLength, varyingStride,
|
||||
E_IT, offset, tableOffset, start, end);
|
||||
}
|
||||
|
||||
void OsdCudaComputeBilinearVertex(float *vertex, float *varying,
|
||||
int numUserVertexElements, int numVaryingElements,
|
||||
int vertexLength, int vertexStride,
|
||||
int varyingLength, int varyingStride,
|
||||
int *V_ITa, int offset, int tableOffset, int start, int end)
|
||||
{
|
||||
// computeBilinearVertex<0, 3><<<512,32>>>(vertex, varying, V_ITa, offset, start, end);
|
||||
@ -682,16 +731,17 @@ void OsdCudaComputeBilinearVertex(float *vertex, float *varying,
|
||||
OPT_KERNEL(3, 0, computeBilinearVertex, 512, 32, (vertex, varying, V_ITa, offset, tableOffset, start, end));
|
||||
OPT_KERNEL(3, 3, computeBilinearVertex, 512, 32, (vertex, varying, V_ITa, offset, tableOffset, start, end));
|
||||
|
||||
computeBilinearVertex<<<512, 32>>>(vertex, 3+numUserVertexElements, varying, numVaryingElements,
|
||||
computeBilinearVertex<<<512, 32>>>(vertex, varying,
|
||||
vertexLength, vertexStride, varyingLength, varyingStride,
|
||||
V_ITa, offset, tableOffset, start, end);
|
||||
}
|
||||
|
||||
void OsdCudaEditVertexAdd(float *vertex, int numUserVertexElements,
|
||||
void OsdCudaEditVertexAdd(float *vertex, int vertexLength, int vertexStride,
|
||||
int primVarOffset, int primVarWidth,
|
||||
int vertexOffset, int tableOffset,
|
||||
int start, int end, int *editIndices, float *editValues)
|
||||
{
|
||||
editVertexAdd<<<512, 32>>>(vertex, 3+numUserVertexElements, primVarOffset, primVarWidth,
|
||||
editVertexAdd<<<512, 32>>>(vertex, vertexLength, vertexStride, primVarOffset, primVarWidth,
|
||||
vertexOffset, tableOffset, start, end,
|
||||
editIndices, editValues);
|
||||
}
|
||||
|
101
opensubdiv/osd/d3d11ComputeController.cpp
Normal file → Executable file
101
opensubdiv/osd/d3d11ComputeController.cpp
Normal file → Executable file
@ -38,9 +38,7 @@ namespace OPENSUBDIV_VERSION {
|
||||
|
||||
OsdD3D11ComputeController::OsdD3D11ComputeController(
|
||||
ID3D11DeviceContext *deviceContext)
|
||||
: _deviceContext(deviceContext), _query(0),
|
||||
_currentVertexBufferUAV(0), _currentVaryingBufferUAV(0),
|
||||
_currentKernelBundle(NULL) {
|
||||
: _deviceContext(deviceContext), _query(0) {
|
||||
}
|
||||
|
||||
OsdD3D11ComputeController::~OsdD3D11ComputeController() {
|
||||
@ -72,20 +70,21 @@ OsdD3D11ComputeController::Synchronize() {
|
||||
}
|
||||
|
||||
OsdD3D11ComputeKernelBundle *
|
||||
OsdD3D11ComputeController::getKernels(int numVertexElements,
|
||||
int numVaryingElements) {
|
||||
OsdD3D11ComputeController::getKernels(OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc) {
|
||||
|
||||
std::vector<OsdD3D11ComputeKernelBundle*>::iterator it =
|
||||
std::find_if(_kernelRegistry.begin(), _kernelRegistry.end(),
|
||||
OsdD3D11ComputeKernelBundle::Match(numVertexElements,
|
||||
numVaryingElements));
|
||||
OsdD3D11ComputeKernelBundle::Match(
|
||||
vertexDesc, varyingDesc));
|
||||
|
||||
if (it != _kernelRegistry.end()) {
|
||||
return *it;
|
||||
} else {
|
||||
OsdD3D11ComputeKernelBundle *kernelBundle =
|
||||
new OsdD3D11ComputeKernelBundle(_deviceContext);
|
||||
_kernelRegistry.push_back(kernelBundle);
|
||||
kernelBundle->Compile(numVertexElements, numVaryingElements);
|
||||
kernelBundle->Compile(vertexDesc, varyingDesc);
|
||||
return kernelBundle;
|
||||
}
|
||||
}
|
||||
@ -102,11 +101,11 @@ OsdD3D11ComputeController::bindShaderResources()
|
||||
ID3D11ShaderResourceView *NULLSRV = 0;
|
||||
_deviceContext->VSSetShaderResources(0, 1, &NULLSRV);
|
||||
|
||||
if (_currentVertexBufferUAV)
|
||||
_deviceContext->CSSetUnorderedAccessViews(0, 1, &_currentVertexBufferUAV, 0); // u0
|
||||
if (_currentBindState.vertexBuffer)
|
||||
_deviceContext->CSSetUnorderedAccessViews(0, 1, &_currentBindState.vertexBuffer, 0); // u0
|
||||
|
||||
if (_currentVaryingBufferUAV)
|
||||
_deviceContext->CSSetUnorderedAccessViews(1, 1, &_currentVaryingBufferUAV, 0); // u1
|
||||
if (_currentBindState.varyingBuffer)
|
||||
_deviceContext->CSSetUnorderedAccessViews(1, 1, &_currentBindState.varyingBuffer, 0); // u1
|
||||
}
|
||||
|
||||
void
|
||||
@ -122,8 +121,10 @@ OsdD3D11ComputeController::ApplyBilinearFaceVerticesKernel(
|
||||
|
||||
assert(context);
|
||||
|
||||
_currentKernelBundle->ApplyBilinearFaceVerticesKernel(
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
_currentBindState.kernelBundle->ApplyBilinearFaceVerticesKernel(
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(),
|
||||
batch.GetStart(), batch.GetEnd(),
|
||||
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset);
|
||||
}
|
||||
|
||||
void
|
||||
@ -132,8 +133,10 @@ OsdD3D11ComputeController::ApplyBilinearEdgeVerticesKernel(
|
||||
|
||||
assert(context);
|
||||
|
||||
_currentKernelBundle->ApplyBilinearEdgeVerticesKernel(
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
_currentBindState.kernelBundle->ApplyBilinearEdgeVerticesKernel(
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(),
|
||||
batch.GetStart(), batch.GetEnd(),
|
||||
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset);
|
||||
}
|
||||
|
||||
void
|
||||
@ -142,8 +145,10 @@ OsdD3D11ComputeController::ApplyBilinearVertexVerticesKernel(
|
||||
|
||||
assert(context);
|
||||
|
||||
_currentKernelBundle->ApplyBilinearVertexVerticesKernel(
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
_currentBindState.kernelBundle->ApplyBilinearVertexVerticesKernel(
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(),
|
||||
batch.GetStart(), batch.GetEnd(),
|
||||
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset);
|
||||
}
|
||||
|
||||
void
|
||||
@ -152,8 +157,10 @@ OsdD3D11ComputeController::ApplyCatmarkFaceVerticesKernel(
|
||||
|
||||
assert(context);
|
||||
|
||||
_currentKernelBundle->ApplyCatmarkFaceVerticesKernel(
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
_currentBindState.kernelBundle->ApplyCatmarkFaceVerticesKernel(
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(),
|
||||
batch.GetStart(), batch.GetEnd(),
|
||||
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset);
|
||||
}
|
||||
|
||||
|
||||
@ -164,8 +171,10 @@ OsdD3D11ComputeController::ApplyCatmarkEdgeVerticesKernel(
|
||||
|
||||
assert(context);
|
||||
|
||||
_currentKernelBundle->ApplyCatmarkEdgeVerticesKernel(
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
_currentBindState.kernelBundle->ApplyCatmarkEdgeVerticesKernel(
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(),
|
||||
batch.GetStart(), batch.GetEnd(),
|
||||
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset);
|
||||
}
|
||||
|
||||
void
|
||||
@ -174,8 +183,10 @@ OsdD3D11ComputeController::ApplyCatmarkVertexVerticesKernelB(
|
||||
|
||||
assert(context);
|
||||
|
||||
_currentKernelBundle->ApplyCatmarkVertexVerticesKernelB(
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
_currentBindState.kernelBundle->ApplyCatmarkVertexVerticesKernelB(
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(),
|
||||
batch.GetStart(), batch.GetEnd(),
|
||||
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset);
|
||||
}
|
||||
|
||||
void
|
||||
@ -184,8 +195,10 @@ OsdD3D11ComputeController::ApplyCatmarkVertexVerticesKernelA1(
|
||||
|
||||
assert(context);
|
||||
|
||||
_currentKernelBundle->ApplyCatmarkVertexVerticesKernelA(
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(), false);
|
||||
_currentBindState.kernelBundle->ApplyCatmarkVertexVerticesKernelA(
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(),
|
||||
batch.GetStart(), batch.GetEnd(), false,
|
||||
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset);
|
||||
}
|
||||
|
||||
void
|
||||
@ -194,8 +207,10 @@ OsdD3D11ComputeController::ApplyCatmarkVertexVerticesKernelA2(
|
||||
|
||||
assert(context);
|
||||
|
||||
_currentKernelBundle->ApplyCatmarkVertexVerticesKernelA(
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(), true);
|
||||
_currentBindState.kernelBundle->ApplyCatmarkVertexVerticesKernelA(
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(),
|
||||
batch.GetStart(), batch.GetEnd(), true,
|
||||
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset);
|
||||
}
|
||||
|
||||
void
|
||||
@ -204,8 +219,10 @@ OsdD3D11ComputeController::ApplyLoopEdgeVerticesKernel(
|
||||
|
||||
assert(context);
|
||||
|
||||
_currentKernelBundle->ApplyLoopEdgeVerticesKernel(
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
_currentBindState.kernelBundle->ApplyLoopEdgeVerticesKernel(
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(),
|
||||
batch.GetStart(), batch.GetEnd(),
|
||||
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset);
|
||||
}
|
||||
|
||||
void
|
||||
@ -214,8 +231,10 @@ OsdD3D11ComputeController::ApplyLoopVertexVerticesKernelB(
|
||||
|
||||
assert(context);
|
||||
|
||||
_currentKernelBundle->ApplyLoopVertexVerticesKernelB(
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
_currentBindState.kernelBundle->ApplyLoopVertexVerticesKernelB(
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(),
|
||||
batch.GetStart(), batch.GetEnd(),
|
||||
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset);
|
||||
}
|
||||
|
||||
void
|
||||
@ -224,8 +243,10 @@ OsdD3D11ComputeController::ApplyLoopVertexVerticesKernelA1(
|
||||
|
||||
assert(context);
|
||||
|
||||
_currentKernelBundle->ApplyLoopVertexVerticesKernelA(
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(), false);
|
||||
_currentBindState.kernelBundle->ApplyLoopVertexVerticesKernelA(
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(),
|
||||
batch.GetStart(), batch.GetEnd(), false,
|
||||
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset);
|
||||
}
|
||||
|
||||
void
|
||||
@ -234,8 +255,10 @@ OsdD3D11ComputeController::ApplyLoopVertexVerticesKernelA2(
|
||||
|
||||
assert(context);
|
||||
|
||||
_currentKernelBundle->ApplyLoopVertexVerticesKernelA(
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(), true);
|
||||
_currentBindState.kernelBundle->ApplyLoopVertexVerticesKernelA(
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(),
|
||||
batch.GetStart(), batch.GetEnd(), true,
|
||||
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset);
|
||||
}
|
||||
|
||||
void
|
||||
@ -253,11 +276,13 @@ OsdD3D11ComputeController::ApplyVertexEdits(
|
||||
int primvarWidth = edit->GetPrimvarWidth();
|
||||
|
||||
if (edit->GetOperation() == FarVertexEdit::Add) {
|
||||
_currentKernelBundle->ApplyEditAdd(primvarOffset, primvarWidth,
|
||||
_currentBindState.kernelBundle->ApplyEditAdd(primvarOffset, primvarWidth,
|
||||
batch.GetVertexOffset(),
|
||||
batch.GetTableOffset(),
|
||||
batch.GetStart(),
|
||||
batch.GetEnd());
|
||||
batch.GetEnd(),
|
||||
_currentBindState.vertexDesc.offset,
|
||||
_currentBindState.varyingDesc.offset);
|
||||
} else {
|
||||
// XXX: edit SET is not implemented yet.
|
||||
}
|
||||
|
79
opensubdiv/osd/d3d11ComputeController.h
Normal file → Executable file
79
opensubdiv/osd/d3d11ComputeController.h
Normal file → Executable file
@ -29,6 +29,7 @@
|
||||
|
||||
#include "../far/dispatcher.h"
|
||||
#include "../osd/d3d11ComputeContext.h"
|
||||
#include "../osd/vertexDescriptor.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
@ -75,15 +76,25 @@ public:
|
||||
///
|
||||
/// @param varyingBuffer varying-interpolated data buffer
|
||||
///
|
||||
/// @param vertexDesc the descriptor of vertex elements to be refined.
|
||||
/// if it's null, all primvars in the vertex buffer
|
||||
/// will be refined.
|
||||
///
|
||||
/// @param varyingDesc the descriptor of varying elements to be refined.
|
||||
/// if it's null, all primvars in the varying buffer
|
||||
/// will be refined.
|
||||
///
|
||||
template<class VERTEX_BUFFER, class VARYING_BUFFER>
|
||||
void Refine(OsdD3D11ComputeContext const *context,
|
||||
FarKernelBatchVector const &batches,
|
||||
VERTEX_BUFFER *vertexBuffer,
|
||||
VARYING_BUFFER *varyingBuffer) {
|
||||
VARYING_BUFFER *varyingBuffer,
|
||||
OsdVertexBufferDescriptor const *vertexDesc=NULL,
|
||||
OsdVertexBufferDescriptor const *varyingDesc=NULL) {
|
||||
|
||||
if (batches.empty()) return;
|
||||
|
||||
bind(vertexBuffer, varyingBuffer);
|
||||
bind(vertexBuffer, varyingBuffer, vertexDesc, varyingDesc);
|
||||
context->BindShaderStorageBuffers(_deviceContext);
|
||||
|
||||
FarDispatcher::Refine(this,
|
||||
@ -145,48 +156,68 @@ protected:
|
||||
|
||||
void ApplyVertexEdits(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
OsdD3D11ComputeKernelBundle * getKernels(int numVertexElements,
|
||||
int numVaryingElements);
|
||||
OsdD3D11ComputeKernelBundle * getKernels(OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc);
|
||||
|
||||
void bindShaderResources();
|
||||
|
||||
void unbindShaderResources();
|
||||
|
||||
template<class VERTEX_BUFFER, class VARYING_BUFFER>
|
||||
void bind(VERTEX_BUFFER *vertex, VARYING_BUFFER *varying) {
|
||||
void bind(VERTEX_BUFFER *vertex, VARYING_BUFFER *varying,
|
||||
OsdVertexBufferDescriptor const *vertexDesc,
|
||||
OsdVertexBufferDescriptor const *varyingDesc) {
|
||||
|
||||
_currentVertexBufferUAV = vertex ? vertex->BindD3D11UAV(_deviceContext) : 0;
|
||||
_currentVaryingBufferUAV = varying ? varying->BindD3D11UAV(_deviceContext) : 0;
|
||||
|
||||
_vdesc.numVertexElements = vertex ? vertex->GetNumElements() : 0;
|
||||
_vdesc.numVaryingElements = varying ? varying->GetNumElements() : 0;
|
||||
|
||||
_currentKernelBundle = getKernels(_vdesc.numVertexElements,
|
||||
_vdesc.numVaryingElements);
|
||||
// if the vertex buffer descriptor is specified, use it.
|
||||
// otherwise, assumes the data is tightly packed in the vertex buffer.
|
||||
if (vertexDesc) {
|
||||
_currentBindState.vertexDesc = *vertexDesc;
|
||||
} else {
|
||||
int numElements = vertex ? vertex->GetNumElements() : 0;
|
||||
_currentBindState.vertexDesc = OsdVertexBufferDescriptor(
|
||||
0, numElements, numElements);
|
||||
}
|
||||
if (varyingDesc) {
|
||||
_currentBindState.varyingDesc = *varyingDesc;
|
||||
} else {
|
||||
int numElements = varying ? varying->GetNumElements() : 0;
|
||||
_currentBindState.varyingDesc = OsdVertexBufferDescriptor(
|
||||
0, numElements, numElements);
|
||||
}
|
||||
|
||||
_currentBindState.vertexBuffer = vertex ? vertex->BindD3D11UAV(_deviceContext) : 0;
|
||||
_currentBindState.varyingBuffer = varying ? varying->BindD3D11UAV(_deviceContext) : 0;
|
||||
_currentBindState.kernelBundle = getKernels(_currentBindState.vertexDesc,
|
||||
_currentBindState.varyingDesc);
|
||||
bindShaderResources();
|
||||
}
|
||||
|
||||
void unbind() {
|
||||
_currentVertexBufferUAV = 0;
|
||||
_currentVaryingBufferUAV = 0;
|
||||
_currentKernelBundle = 0;
|
||||
_currentBindState.Reset();
|
||||
|
||||
unbindShaderResources();
|
||||
}
|
||||
|
||||
private:
|
||||
struct BindState {
|
||||
BindState() : vertexBuffer(0), varyingBuffer(0), kernelBundle(NULL) {}
|
||||
void Reset() {
|
||||
vertexBuffer = varyingBuffer = 0;
|
||||
vertexDesc.Reset();
|
||||
varyingDesc.Reset();
|
||||
}
|
||||
ID3D11UnorderedAccessView *vertexBuffer;
|
||||
ID3D11UnorderedAccessView *varyingBuffer;
|
||||
OsdVertexBufferDescriptor vertexDesc;
|
||||
OsdVertexBufferDescriptor varyingDesc;
|
||||
OsdD3D11ComputeKernelBundle *kernelBundle;
|
||||
};
|
||||
|
||||
BindState _currentBindState;
|
||||
|
||||
ID3D11DeviceContext *_deviceContext;
|
||||
ID3D11Query *_query;
|
||||
std::vector<OsdD3D11ComputeKernelBundle *> _kernelRegistry;
|
||||
|
||||
OsdVertexDescriptor _vdesc;
|
||||
|
||||
ID3D11UnorderedAccessView * _currentVertexBufferUAV,
|
||||
* _currentVaryingBufferUAV;
|
||||
|
||||
OsdD3D11ComputeKernelBundle * _currentKernelBundle;
|
||||
|
||||
};
|
||||
|
||||
} // end namespace OPENSUBDIV_VERSION
|
||||
|
@ -79,10 +79,14 @@ OsdD3D11ComputeKernelBundle::~OsdD3D11ComputeKernelBundle() {
|
||||
}
|
||||
|
||||
bool
|
||||
OsdD3D11ComputeKernelBundle::Compile(int numVertexElements,
|
||||
int numVaryingElements) {
|
||||
OsdD3D11ComputeKernelBundle::Compile(
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc) {
|
||||
|
||||
_vdesc.Set( numVertexElements, numVaryingElements );
|
||||
_numVertexElements = vertexDesc.length;
|
||||
_vertexStride = vertexDesc.stride;
|
||||
_numVaryingElements = varyingDesc.length;
|
||||
_varyingStride = varyingDesc.stride;
|
||||
|
||||
DWORD dwShaderFlags = D3DCOMPILE_ENABLE_STRICTNESS;
|
||||
#ifdef _DEBUG
|
||||
@ -90,18 +94,26 @@ OsdD3D11ComputeKernelBundle::Compile(int numVertexElements,
|
||||
#endif
|
||||
|
||||
std::ostringstream ss;
|
||||
ss << numVertexElements;
|
||||
ss << _numVertexElements;
|
||||
std::string numVertexElementsStr(ss.str());
|
||||
ss.str("");
|
||||
ss << numVaryingElements;
|
||||
ss << _numVaryingElements;
|
||||
std::string numVaryingElementsStr(ss.str());
|
||||
ss.str("");
|
||||
ss << _vertexStride;
|
||||
std::string vertexStrideStr(ss.str());
|
||||
ss.str("");
|
||||
ss << _varyingStride;
|
||||
std::string varyingStrideStr(ss.str());
|
||||
ss.str("");
|
||||
ss << _workGroupSize;
|
||||
std::string workGroupSizeStr(ss.str());
|
||||
|
||||
D3D_SHADER_MACRO shaderDefines[] = {
|
||||
"NUM_VERTEX_ELEMENTS", numVertexElementsStr.c_str(),
|
||||
"VERTEX_STRIDE", vertexStrideStr.c_str(),
|
||||
"NUM_VARYING_ELEMENTS", numVaryingElementsStr.c_str(),
|
||||
"VARYING_STRIDE", varyingStrideStr.c_str(),
|
||||
"WORK_GROUP_SIZE", workGroupSizeStr.c_str(),
|
||||
0, 0
|
||||
};
|
||||
@ -183,6 +195,8 @@ struct OsdD3D11ComputeKernelBundle::KernelCB {
|
||||
int tableOffset; // offset of subdivision table
|
||||
int indexStart; // start index relative to tableOffset
|
||||
int indexEnd; // end index relative to tableOffset
|
||||
int vertexBaseOffset; // base vbo offset of the vertex buffer
|
||||
int varyingBaseOffset; // base vbo offset of the varying buffer
|
||||
BOOL vertexPass; // 4-byte bool
|
||||
|
||||
// vertex edit kernel
|
||||
@ -225,7 +239,8 @@ OsdD3D11ComputeKernelBundle::dispatchCompute(
|
||||
|
||||
void
|
||||
OsdD3D11ComputeKernelBundle::ApplyBilinearFaceVerticesKernel(
|
||||
int vertexOffset, int tableOffset, int start, int end) {
|
||||
int vertexOffset, int tableOffset, int start, int end,
|
||||
int vertexBaseOffset, int varyingBaseOffset) {
|
||||
|
||||
KernelCB args;
|
||||
ZeroMemory(&args, sizeof(args));
|
||||
@ -233,12 +248,15 @@ OsdD3D11ComputeKernelBundle::ApplyBilinearFaceVerticesKernel(
|
||||
args.tableOffset = tableOffset;
|
||||
args.indexStart = start;
|
||||
args.indexEnd = end;
|
||||
args.vertexBaseOffset = vertexBaseOffset;
|
||||
args.varyingBaseOffset = varyingBaseOffset;
|
||||
dispatchCompute(_kernelComputeFace, args);
|
||||
}
|
||||
|
||||
void
|
||||
OsdD3D11ComputeKernelBundle::ApplyBilinearEdgeVerticesKernel(
|
||||
int vertexOffset, int tableOffset, int start, int end) {
|
||||
int vertexOffset, int tableOffset, int start, int end,
|
||||
int vertexBaseOffset, int varyingBaseOffset) {
|
||||
|
||||
KernelCB args;
|
||||
ZeroMemory(&args, sizeof(args));
|
||||
@ -246,12 +264,15 @@ OsdD3D11ComputeKernelBundle::ApplyBilinearEdgeVerticesKernel(
|
||||
args.tableOffset = tableOffset;
|
||||
args.indexStart = start;
|
||||
args.indexEnd = end;
|
||||
args.vertexBaseOffset = vertexBaseOffset;
|
||||
args.varyingBaseOffset = varyingBaseOffset;
|
||||
dispatchCompute(_kernelComputeBilinearEdge, args);
|
||||
}
|
||||
|
||||
void
|
||||
OsdD3D11ComputeKernelBundle::ApplyBilinearVertexVerticesKernel(
|
||||
int vertexOffset, int tableOffset, int start, int end) {
|
||||
int vertexOffset, int tableOffset, int start, int end,
|
||||
int vertexBaseOffset, int varyingBaseOffset) {
|
||||
|
||||
KernelCB args;
|
||||
ZeroMemory(&args, sizeof(args));
|
||||
@ -259,13 +280,16 @@ OsdD3D11ComputeKernelBundle::ApplyBilinearVertexVerticesKernel(
|
||||
args.tableOffset = tableOffset;
|
||||
args.indexStart = start;
|
||||
args.indexEnd = end;
|
||||
args.vertexBaseOffset = vertexBaseOffset;
|
||||
args.varyingBaseOffset = varyingBaseOffset;
|
||||
dispatchCompute(_kernelComputeVertex, args);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
OsdD3D11ComputeKernelBundle::ApplyCatmarkFaceVerticesKernel(
|
||||
int vertexOffset, int tableOffset, int start, int end) {
|
||||
int vertexOffset, int tableOffset, int start, int end,
|
||||
int vertexBaseOffset, int varyingBaseOffset) {
|
||||
|
||||
KernelCB args;
|
||||
ZeroMemory(&args, sizeof(args));
|
||||
@ -273,12 +297,15 @@ OsdD3D11ComputeKernelBundle::ApplyCatmarkFaceVerticesKernel(
|
||||
args.tableOffset = tableOffset;
|
||||
args.indexStart = start;
|
||||
args.indexEnd = end;
|
||||
args.vertexBaseOffset = vertexBaseOffset;
|
||||
args.varyingBaseOffset = varyingBaseOffset;
|
||||
dispatchCompute(_kernelComputeFace, args);
|
||||
}
|
||||
|
||||
void
|
||||
OsdD3D11ComputeKernelBundle::ApplyCatmarkEdgeVerticesKernel(
|
||||
int vertexOffset, int tableOffset, int start, int end) {
|
||||
int vertexOffset, int tableOffset, int start, int end,
|
||||
int vertexBaseOffset, int varyingBaseOffset) {
|
||||
|
||||
KernelCB args;
|
||||
ZeroMemory(&args, sizeof(args));
|
||||
@ -286,12 +313,15 @@ OsdD3D11ComputeKernelBundle::ApplyCatmarkEdgeVerticesKernel(
|
||||
args.tableOffset = tableOffset;
|
||||
args.indexStart = start;
|
||||
args.indexEnd = end;
|
||||
args.vertexBaseOffset = vertexBaseOffset;
|
||||
args.varyingBaseOffset = varyingBaseOffset;
|
||||
dispatchCompute(_kernelComputeEdge, args);
|
||||
}
|
||||
|
||||
void
|
||||
OsdD3D11ComputeKernelBundle::ApplyCatmarkVertexVerticesKernelB(
|
||||
int vertexOffset, int tableOffset, int start, int end) {
|
||||
int vertexOffset, int tableOffset, int start, int end,
|
||||
int vertexBaseOffset, int varyingBaseOffset) {
|
||||
|
||||
KernelCB args;
|
||||
ZeroMemory(&args, sizeof(args));
|
||||
@ -299,12 +329,15 @@ OsdD3D11ComputeKernelBundle::ApplyCatmarkVertexVerticesKernelB(
|
||||
args.tableOffset = tableOffset;
|
||||
args.indexStart = start;
|
||||
args.indexEnd = end;
|
||||
args.vertexBaseOffset = vertexBaseOffset;
|
||||
args.varyingBaseOffset = varyingBaseOffset;
|
||||
dispatchCompute(_kernelComputeCatmarkVertexB, args);
|
||||
}
|
||||
|
||||
void
|
||||
OsdD3D11ComputeKernelBundle::ApplyCatmarkVertexVerticesKernelA(
|
||||
int vertexOffset, int tableOffset, int start, int end, bool pass) {
|
||||
int vertexOffset, int tableOffset, int start, int end, bool pass,
|
||||
int vertexBaseOffset, int varyingBaseOffset) {
|
||||
|
||||
KernelCB args;
|
||||
ZeroMemory(&args, sizeof(args));
|
||||
@ -313,12 +346,15 @@ OsdD3D11ComputeKernelBundle::ApplyCatmarkVertexVerticesKernelA(
|
||||
args.indexStart = start;
|
||||
args.indexEnd = end;
|
||||
args.vertexPass = pass ? 1 : 0;
|
||||
args.vertexBaseOffset = vertexBaseOffset;
|
||||
args.varyingBaseOffset = varyingBaseOffset;
|
||||
dispatchCompute(_kernelComputeVertexA, args);
|
||||
}
|
||||
|
||||
void
|
||||
OsdD3D11ComputeKernelBundle::ApplyLoopEdgeVerticesKernel(
|
||||
int vertexOffset, int tableOffset, int start, int end) {
|
||||
int vertexOffset, int tableOffset, int start, int end,
|
||||
int vertexBaseOffset, int varyingBaseOffset) {
|
||||
|
||||
KernelCB args;
|
||||
ZeroMemory(&args, sizeof(args));
|
||||
@ -326,12 +362,15 @@ OsdD3D11ComputeKernelBundle::ApplyLoopEdgeVerticesKernel(
|
||||
args.tableOffset = tableOffset;
|
||||
args.indexStart = start;
|
||||
args.indexEnd = end;
|
||||
args.vertexBaseOffset = vertexBaseOffset;
|
||||
args.varyingBaseOffset = varyingBaseOffset;
|
||||
dispatchCompute(_kernelComputeEdge, args);
|
||||
}
|
||||
|
||||
void
|
||||
OsdD3D11ComputeKernelBundle::ApplyLoopVertexVerticesKernelB(
|
||||
int vertexOffset, int tableOffset, int start, int end) {
|
||||
int vertexOffset, int tableOffset, int start, int end,
|
||||
int vertexBaseOffset, int varyingBaseOffset) {
|
||||
|
||||
KernelCB args;
|
||||
ZeroMemory(&args, sizeof(args));
|
||||
@ -339,12 +378,15 @@ OsdD3D11ComputeKernelBundle::ApplyLoopVertexVerticesKernelB(
|
||||
args.tableOffset = tableOffset;
|
||||
args.indexStart = start;
|
||||
args.indexEnd = end;
|
||||
args.vertexBaseOffset = vertexBaseOffset;
|
||||
args.varyingBaseOffset = varyingBaseOffset;
|
||||
dispatchCompute(_kernelComputeLoopVertexB, args);
|
||||
}
|
||||
|
||||
void
|
||||
OsdD3D11ComputeKernelBundle::ApplyLoopVertexVerticesKernelA(
|
||||
int vertexOffset, int tableOffset, int start, int end, bool pass) {
|
||||
int vertexOffset, int tableOffset, int start, int end, bool pass,
|
||||
int vertexBaseOffset, int varyingBaseOffset) {
|
||||
|
||||
KernelCB args;
|
||||
ZeroMemory(&args, sizeof(args));
|
||||
@ -353,13 +395,16 @@ OsdD3D11ComputeKernelBundle::ApplyLoopVertexVerticesKernelA(
|
||||
args.indexStart = start;
|
||||
args.indexEnd = end;
|
||||
args.vertexPass = pass ? 1 : 0;
|
||||
args.vertexBaseOffset = vertexBaseOffset;
|
||||
args.varyingBaseOffset = varyingBaseOffset;
|
||||
dispatchCompute(_kernelComputeVertexA, args);
|
||||
}
|
||||
|
||||
void
|
||||
OsdD3D11ComputeKernelBundle::ApplyEditAdd(
|
||||
int primvarOffset, int primvarWidth,
|
||||
int vertexOffset, int tableOffset, int start, int end) {
|
||||
int vertexOffset, int tableOffset, int start, int end,
|
||||
int vertexBaseOffset, int varyingBaseOffset) {
|
||||
|
||||
KernelCB args;
|
||||
ZeroMemory(&args, sizeof(args));
|
||||
@ -369,6 +414,8 @@ OsdD3D11ComputeKernelBundle::ApplyEditAdd(
|
||||
args.indexEnd = end;
|
||||
args.editPrimVarOffset = primvarOffset;
|
||||
args.editPrimVarWidth = primvarWidth;
|
||||
args.vertexBaseOffset = vertexBaseOffset;
|
||||
args.varyingBaseOffset = varyingBaseOffset;
|
||||
dispatchCompute(_kernelEditAdd, args);
|
||||
}
|
||||
|
||||
|
57
opensubdiv/osd/d3d11KernelBundle.h
Normal file → Executable file
57
opensubdiv/osd/d3d11KernelBundle.h
Normal file → Executable file
@ -48,53 +48,71 @@ public:
|
||||
/// Destructor
|
||||
~OsdD3D11ComputeKernelBundle();
|
||||
|
||||
bool Compile(int numVertexElements, int numVaryingElements);
|
||||
bool Compile(OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc);
|
||||
|
||||
void ApplyBilinearFaceVerticesKernel(
|
||||
int vertexOffset, int tableOffset, int start, int end);
|
||||
int vertexOffset, int tableOffset, int start, int end,
|
||||
int vertexBaseOffset, int varyingBaseOffset);
|
||||
|
||||
void ApplyBilinearEdgeVerticesKernel(
|
||||
int vertexOffset, int tableOffset, int start, int end);
|
||||
int vertexOffset, int tableOffset, int start, int end,
|
||||
int vertexBaseOffset, int varyingBaseOffset);
|
||||
|
||||
void ApplyBilinearVertexVerticesKernel(
|
||||
int vertexOffset, int tableOffset, int start, int end);
|
||||
int vertexOffset, int tableOffset, int start, int end,
|
||||
int vertexBaseOffset, int varyingBaseOffset);
|
||||
|
||||
void ApplyCatmarkFaceVerticesKernel(
|
||||
int vertexOffset, int tableOffset, int start, int end);
|
||||
int vertexOffset, int tableOffset, int start, int end,
|
||||
int vertexBaseOffset, int varyingBaseOffset);
|
||||
|
||||
void ApplyCatmarkEdgeVerticesKernel(
|
||||
int vertexOffset, int tableOffset, int start, int end);
|
||||
int vertexOffset, int tableOffset, int start, int end,
|
||||
int vertexBaseOffset, int varyingBaseOffset);
|
||||
|
||||
void ApplyCatmarkVertexVerticesKernelB(
|
||||
int vertexOffset, int tableOffset, int start, int end);
|
||||
int vertexOffset, int tableOffset, int start, int end,
|
||||
int vertexBaseOffset, int varyingBaseOffset);
|
||||
|
||||
void ApplyCatmarkVertexVerticesKernelA(
|
||||
int vertexOffset, int tableOffset, int start, int end, bool pass);
|
||||
int vertexOffset, int tableOffset, int start, int end, bool pass,
|
||||
int vertexBaseOffset, int varyingBaseOffset);
|
||||
|
||||
void ApplyLoopEdgeVerticesKernel(
|
||||
int vertexOffset, int tableOffset, int start, int end);
|
||||
int vertexOffset, int tableOffset, int start, int end,
|
||||
int vertexBaseOffset, int varyingBaseOffset);
|
||||
|
||||
void ApplyLoopVertexVerticesKernelB(
|
||||
int vertexOffset, int tableOffset, int start, int end);
|
||||
int vertexOffset, int tableOffset, int start, int end,
|
||||
int vertexBaseOffset, int varyingBaseOffset);
|
||||
|
||||
void ApplyLoopVertexVerticesKernelA(
|
||||
int vertexOffset, int tableOffset, int start, int end, bool pass);
|
||||
int vertexOffset, int tableOffset, int start, int end, bool pass,
|
||||
int vertexBaseOffset, int varyingBaseOffset);
|
||||
|
||||
void ApplyEditAdd(int primvarOffset, int primvarWidth,
|
||||
int vertexOffset, int tableOffset, int start, int end);
|
||||
int vertexOffset, int tableOffset, int start, int end,
|
||||
int vertexBaseOffset, int varyingBaseOffset);
|
||||
|
||||
struct Match {
|
||||
|
||||
/// Constructor
|
||||
Match(int numVertexElements, int numVaryingElements)
|
||||
: vdesc(numVertexElements, numVaryingElements) {
|
||||
Match(OsdVertexBufferDescriptor const &vertex,
|
||||
OsdVertexBufferDescriptor const &varying)
|
||||
: vertexDesc(vertex), varyingDesc(varying) {
|
||||
}
|
||||
|
||||
bool operator() (OsdD3D11ComputeKernelBundle const *kernel) {
|
||||
return vdesc == kernel->_vdesc;
|
||||
// offset is dynamic. just comparing length and stride here,
|
||||
// returns true if they are equal
|
||||
return (vertexDesc.length == kernel->_numVertexElements and
|
||||
vertexDesc.stride == kernel->_vertexStride and
|
||||
varyingDesc.length == kernel->_numVaryingElements and
|
||||
varyingDesc.stride == kernel->_varyingStride);
|
||||
}
|
||||
|
||||
OsdVertexDescriptor vdesc;
|
||||
OsdVertexBufferDescriptor vertexDesc;
|
||||
OsdVertexBufferDescriptor varyingDesc;
|
||||
};
|
||||
|
||||
friend struct Match;
|
||||
@ -130,7 +148,10 @@ protected:
|
||||
|
||||
int _workGroupSize;
|
||||
|
||||
OsdVertexDescriptor _vdesc;
|
||||
int _numVertexElements;
|
||||
int _vertexStride;
|
||||
int _numVaryingElements;
|
||||
int _varyingStride;
|
||||
};
|
||||
|
||||
} // end namespace OPENSUBDIV_VERSION
|
||||
|
14
opensubdiv/osd/d3d11Mesh.h
Normal file → Executable file
14
opensubdiv/osd/d3d11Mesh.h
Normal file → Executable file
@ -124,6 +124,13 @@ public:
|
||||
virtual void Refine() {
|
||||
_computeController->Refine(_computeContext, _farMesh->GetKernelBatches(), _vertexBuffer, _varyingBuffer);
|
||||
}
|
||||
virtual void Refine(OsdVertexBufferDescriptor const *vertexDesc,
|
||||
OsdVertexBufferDescriptor const *varyingDesc,
|
||||
bool interleaved) {
|
||||
_computeController->Refine(_computeContext, _farMesh->GetKernelBatches(),
|
||||
_vertexBuffer, (interleaved ? _vertexBuffer : _varyingBuffer),
|
||||
vertexDesc, varyingDesc);
|
||||
}
|
||||
virtual void Synchronize() {
|
||||
_computeController->Synchronize();
|
||||
}
|
||||
@ -265,6 +272,13 @@ public:
|
||||
virtual void Refine() {
|
||||
_computeController->Refine(_computeContext, _farMesh->GetKernelBatches(), _vertexBuffer, _varyingBuffer);
|
||||
}
|
||||
virtual void Refine(OsdVertexBufferDescriptor const *vertexDesc,
|
||||
OsdVertexBufferDescriptor const *varyingDesc,
|
||||
bool interleaved) {
|
||||
_computeController->Refine(_computeContext, _farMesh->GetKernelBatches(),
|
||||
_vertexBuffer, (interleaved ? _vertexBuffer : _varyingBuffer),
|
||||
vertexDesc, varyingDesc);
|
||||
}
|
||||
virtual void Synchronize() {
|
||||
_computeController->Synchronize();
|
||||
}
|
||||
|
@ -30,8 +30,7 @@ namespace OpenSubdiv {
|
||||
namespace OPENSUBDIV_VERSION {
|
||||
|
||||
|
||||
OsdGcdComputeController::OsdGcdComputeController() :
|
||||
_currentVertexBuffer(0), _currentVaryingBuffer(0) {
|
||||
OsdGcdComputeController::OsdGcdComputeController() {
|
||||
_gcd_queue = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0);
|
||||
}
|
||||
|
||||
@ -42,7 +41,8 @@ OsdGcdComputeController::ApplyBilinearFaceVerticesKernel(
|
||||
assert(context);
|
||||
|
||||
OsdGcdComputeFace(
|
||||
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
|
||||
(const int*)context->GetTable(FarSubdivisionTables::F_IT)->GetBuffer(),
|
||||
(const int*)context->GetTable(FarSubdivisionTables::F_ITa)->GetBuffer(),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(),
|
||||
@ -56,7 +56,8 @@ OsdGcdComputeController::ApplyBilinearEdgeVerticesKernel(
|
||||
assert(context);
|
||||
|
||||
OsdGcdComputeBilinearEdge(
|
||||
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
|
||||
(const int*)context->GetTable(FarSubdivisionTables::E_IT)->GetBuffer(),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(),
|
||||
_gcd_queue);
|
||||
@ -69,7 +70,8 @@ OsdGcdComputeController::ApplyBilinearVertexVerticesKernel(
|
||||
assert(context);
|
||||
|
||||
OsdGcdComputeBilinearVertex(
|
||||
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
|
||||
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(),
|
||||
_gcd_queue);
|
||||
@ -82,7 +84,8 @@ OsdGcdComputeController::ApplyCatmarkFaceVerticesKernel(
|
||||
assert(context);
|
||||
|
||||
OsdGcdComputeFace(
|
||||
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
|
||||
(const int*)context->GetTable(FarSubdivisionTables::F_IT)->GetBuffer(),
|
||||
(const int*)context->GetTable(FarSubdivisionTables::F_ITa)->GetBuffer(),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(),
|
||||
@ -96,7 +99,8 @@ OsdGcdComputeController::ApplyCatmarkEdgeVerticesKernel(
|
||||
assert(context);
|
||||
|
||||
OsdGcdComputeEdge(
|
||||
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
|
||||
(const int*)context->GetTable(FarSubdivisionTables::E_IT)->GetBuffer(),
|
||||
(const float*)context->GetTable(FarSubdivisionTables::E_W)->GetBuffer(),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(),
|
||||
@ -110,7 +114,8 @@ OsdGcdComputeController::ApplyCatmarkVertexVerticesKernelB(
|
||||
assert(context);
|
||||
|
||||
OsdGcdComputeVertexB(
|
||||
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
|
||||
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
|
||||
(const int*)context->GetTable(FarSubdivisionTables::V_IT)->GetBuffer(),
|
||||
(const float*)context->GetTable(FarSubdivisionTables::V_W)->GetBuffer(),
|
||||
@ -125,7 +130,8 @@ OsdGcdComputeController::ApplyCatmarkVertexVerticesKernelA1(
|
||||
assert(context);
|
||||
|
||||
OsdGcdComputeVertexA(
|
||||
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
|
||||
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
|
||||
(const float*)context->GetTable(FarSubdivisionTables::V_W)->GetBuffer(),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(), false,
|
||||
@ -139,7 +145,8 @@ OsdGcdComputeController::ApplyCatmarkVertexVerticesKernelA2(
|
||||
assert(context);
|
||||
|
||||
OsdGcdComputeVertexA(
|
||||
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
|
||||
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
|
||||
(const float*)context->GetTable(FarSubdivisionTables::V_W)->GetBuffer(),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(), true,
|
||||
@ -153,7 +160,8 @@ OsdGcdComputeController::ApplyLoopEdgeVerticesKernel(
|
||||
assert(context);
|
||||
|
||||
OsdGcdComputeEdge(
|
||||
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
|
||||
(const int*)context->GetTable(FarSubdivisionTables::E_IT)->GetBuffer(),
|
||||
(const float*)context->GetTable(FarSubdivisionTables::E_W)->GetBuffer(),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(),
|
||||
@ -167,7 +175,8 @@ OsdGcdComputeController::ApplyLoopVertexVerticesKernelB(
|
||||
assert(context);
|
||||
|
||||
OsdGcdComputeLoopVertexB(
|
||||
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
|
||||
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
|
||||
(const int*)context->GetTable(FarSubdivisionTables::V_IT)->GetBuffer(),
|
||||
(const float*)context->GetTable(FarSubdivisionTables::V_W)->GetBuffer(),
|
||||
@ -182,7 +191,8 @@ OsdGcdComputeController::ApplyLoopVertexVerticesKernelA1(
|
||||
assert(context);
|
||||
|
||||
OsdGcdComputeVertexA(
|
||||
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
|
||||
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
|
||||
(const float*)context->GetTable(FarSubdivisionTables::V_W)->GetBuffer(),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(), false,
|
||||
@ -196,7 +206,8 @@ OsdGcdComputeController::ApplyLoopVertexVerticesKernelA2(
|
||||
assert(context);
|
||||
|
||||
OsdGcdComputeVertexA(
|
||||
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
|
||||
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
|
||||
(const float*)context->GetTable(FarSubdivisionTables::V_W)->GetBuffer(),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(), true,
|
||||
@ -216,8 +227,8 @@ OsdGcdComputeController::ApplyVertexEdits(
|
||||
const OsdCpuTable * editValues = edit->GetEditValues();
|
||||
|
||||
if (edit->GetOperation() == FarVertexEdit::Add) {
|
||||
OsdGcdEditVertexAdd(_vdesc,
|
||||
_currentVertexBuffer,
|
||||
OsdGcdEditVertexAdd(_currentBindState.vertexBuffer,
|
||||
_currentBindState.vertexDesc,
|
||||
edit->GetPrimvarOffset(),
|
||||
edit->GetPrimvarWidth(),
|
||||
batch.GetVertexOffset(),
|
||||
@ -228,8 +239,8 @@ OsdGcdComputeController::ApplyVertexEdits(
|
||||
static_cast<float*>(editValues->GetBuffer()),
|
||||
_gcd_queue);
|
||||
} else if (edit->GetOperation() == FarVertexEdit::Set) {
|
||||
OsdGcdEditVertexSet(_vdesc,
|
||||
_currentVertexBuffer,
|
||||
OsdGcdEditVertexSet(_currentBindState.vertexBuffer,
|
||||
_currentBindState.vertexDesc,
|
||||
edit->GetPrimvarOffset(),
|
||||
edit->GetPrimvarWidth(),
|
||||
batch.GetVertexOffset(),
|
||||
|
@ -29,6 +29,7 @@
|
||||
|
||||
#include "../far/dispatcher.h"
|
||||
#include "../osd/cpuComputeContext.h"
|
||||
#include "../osd/vertexDescriptor.h"
|
||||
|
||||
#include <dispatch/dispatch.h>
|
||||
|
||||
@ -64,15 +65,25 @@ public:
|
||||
///
|
||||
/// @param varyingBuffer varying-interpolated data buffer
|
||||
///
|
||||
/// @param vertexDesc the descriptor of vertex elements to be refined.
|
||||
/// if it's null, all primvars in the vertex buffer
|
||||
/// will be refined.
|
||||
///
|
||||
/// @param varyingDesc the descriptor of varying elements to be refined.
|
||||
/// if it's null, all primvars in the varying buffer
|
||||
/// will be refined.
|
||||
///
|
||||
template<class VERTEX_BUFFER, class VARYING_BUFFER>
|
||||
void Refine(OsdCpuComputeContext const *context,
|
||||
FarKernelBatchVector const & batches,
|
||||
VERTEX_BUFFER *vertexBuffer,
|
||||
VARYING_BUFFER *varyingBuffer) {
|
||||
VARYING_BUFFER *varyingBuffer,
|
||||
OsdVertexBufferDescriptor const *vertexDesc=NULL,
|
||||
OsdVertexBufferDescriptor const *varyingDesc=NULL) {
|
||||
|
||||
if (batches.empty()) return;
|
||||
|
||||
bind(vertexBuffer, varyingBuffer);
|
||||
bind(vertexBuffer, varyingBuffer, vertexDesc, varyingDesc);
|
||||
|
||||
FarDispatcher::Refine(this, context, batches, /*maxlevel*/-1);
|
||||
|
||||
@ -130,26 +141,50 @@ protected:
|
||||
void ApplyVertexEdits(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
template<class VERTEX_BUFFER, class VARYING_BUFFER>
|
||||
void bind(VERTEX_BUFFER *vertex, VARYING_BUFFER *varying) {
|
||||
void bind(VERTEX_BUFFER *vertex, VARYING_BUFFER *varying,
|
||||
OsdVertexBufferDescriptor const *vertexDesc,
|
||||
OsdVertexBufferDescriptor const *varyingDesc) {
|
||||
|
||||
_currentVertexBuffer = vertex ? vertex->BindCpuBuffer() : 0;
|
||||
_currentVaryingBuffer = varying ? varying->BindCpuBuffer() : 0;
|
||||
// if the vertex buffer descriptor is specified, use it.
|
||||
// otherwise, assumes the data is tightly packed in the vertex buffer.
|
||||
if (vertexDesc) {
|
||||
_currentBindState.vertexDesc = *vertexDesc;
|
||||
} else {
|
||||
int numElements = vertex ? vertex->GetNumElements() : 0;
|
||||
_currentBindState.vertexDesc = OsdVertexBufferDescriptor(
|
||||
0, numElements, numElements);
|
||||
}
|
||||
if (varyingDesc) {
|
||||
_currentBindState.varyingDesc = *varyingDesc;
|
||||
} else {
|
||||
int numElements = varying ? varying->GetNumElements() : 0;
|
||||
_currentBindState.varyingDesc = OsdVertexBufferDescriptor(
|
||||
0, numElements, numElements);
|
||||
}
|
||||
|
||||
int numVertexElements = vertex ? vertex->GetNumElements() : 0;
|
||||
int numVaryingElements = varying ? varying->GetNumElements() : 0;
|
||||
_vdesc.Set(numVertexElements, numVaryingElements);
|
||||
_currentBindState.vertexBuffer = vertex ? vertex->BindCpuBuffer() : 0;
|
||||
_currentBindState.varyingBuffer = varying ? varying->BindCpuBuffer() : 0;
|
||||
}
|
||||
void unbind() {
|
||||
_currentVertexBuffer = 0;
|
||||
_currentVaryingBuffer = 0;
|
||||
_vdesc.Reset();
|
||||
_currentBindState.Reset();
|
||||
}
|
||||
|
||||
private:
|
||||
dispatch_queue_t _gcd_queue;
|
||||
struct BindState {
|
||||
BindState() : vertexBuffer(NULL), varyingBuffer(NULL) {}
|
||||
void Reset() {
|
||||
vertexBuffer = varyingBuffer = NULL;
|
||||
vertexDesc.Reset();
|
||||
varyingDesc.Reset();
|
||||
}
|
||||
float *vertexBuffer;
|
||||
float *varyingBuffer;
|
||||
OsdVertexBufferDescriptor vertexDesc;
|
||||
OsdVertexBufferDescriptor varyingDesc;
|
||||
};
|
||||
|
||||
float *_currentVertexBuffer, *_currentVaryingBuffer;
|
||||
OsdVertexDescriptor _vdesc;
|
||||
BindState _currentBindState;
|
||||
dispatch_queue_t _gcd_queue;
|
||||
|
||||
};
|
||||
|
||||
|
@ -33,9 +33,32 @@ namespace OPENSUBDIV_VERSION {
|
||||
|
||||
const int GCD_WORK_STRIDE = 32;
|
||||
|
||||
static inline void
|
||||
clear(float *origin, int index, OsdVertexBufferDescriptor const &desc) {
|
||||
|
||||
if (origin) {
|
||||
float *dst = origin + index * desc.stride + desc.offset;
|
||||
memset(dst, 0, desc.length * sizeof(float));
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
addWithWeight(float *origin, int dstIndex, int srcIndex,
|
||||
float weight, OsdVertexBufferDescriptor const &desc) {
|
||||
|
||||
if (origin) {
|
||||
const float *src = origin + srcIndex * desc.stride + desc.offset;
|
||||
float *dst = origin + dstIndex * desc.stride + desc.offset;
|
||||
for (int k = 0; k < desc.length; ++k) {
|
||||
dst[k] += src[k] * weight;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void OsdGcdComputeFace(
|
||||
OsdVertexDescriptor const &vdesc, float * vertex, float * varying,
|
||||
float * vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *F_IT, const int *F_ITa,
|
||||
int vertexOffset, int tableOffset, int start, int end,
|
||||
dispatch_queue_t gcdq) {
|
||||
@ -44,18 +67,22 @@ void OsdGcdComputeFace(
|
||||
dispatch_apply(workSize/GCD_WORK_STRIDE, gcdq, ^(size_t blockIdx){
|
||||
const int start_i = start + blockIdx*GCD_WORK_STRIDE;
|
||||
const int end_i = start_i + GCD_WORK_STRIDE;
|
||||
OsdCpuComputeFace(vdesc, vertex, varying, F_IT, F_ITa,
|
||||
OsdCpuComputeFace(vertex, varying, vertexDesc, varyingDesc,
|
||||
F_IT, F_ITa,
|
||||
vertexOffset, tableOffset, start_i, end_i);
|
||||
});
|
||||
const int start_e = end - workSize%GCD_WORK_STRIDE;
|
||||
const int end_e = end;
|
||||
if (start_e < end_e)
|
||||
OsdCpuComputeFace(vdesc, vertex, varying, F_IT, F_ITa,
|
||||
OsdCpuComputeFace(vertex, varying, vertexDesc, varyingDesc,
|
||||
F_IT, F_ITa,
|
||||
vertexOffset, tableOffset, start_e, end_e);
|
||||
}
|
||||
|
||||
void OsdGcdComputeEdge(
|
||||
OsdVertexDescriptor const &vdesc, float * vertex, float * varying,
|
||||
float * vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *E_IT, const float *E_W,
|
||||
int vertexOffset, int tableOffset, int start, int end,
|
||||
dispatch_queue_t gcdq) {
|
||||
@ -64,18 +91,22 @@ void OsdGcdComputeEdge(
|
||||
dispatch_apply(workSize/GCD_WORK_STRIDE, gcdq, ^(size_t blockIdx){
|
||||
const int start_i = start + blockIdx*GCD_WORK_STRIDE;
|
||||
const int end_i = start_i + GCD_WORK_STRIDE;
|
||||
OsdCpuComputeEdge(vdesc, vertex, varying, E_IT, E_W,
|
||||
OsdCpuComputeEdge(vertex, varying, vertexDesc, varyingDesc,
|
||||
E_IT, E_W,
|
||||
vertexOffset, tableOffset, start_i, end_i);
|
||||
});
|
||||
const int start_e = end - workSize%GCD_WORK_STRIDE;
|
||||
const int end_e = end;
|
||||
if (start_e < end_e)
|
||||
OsdCpuComputeEdge(vdesc, vertex, varying, E_IT, E_W,
|
||||
OsdCpuComputeEdge(vertex, varying, vertexDesc, varyingDesc,
|
||||
E_IT, E_W,
|
||||
vertexOffset, tableOffset, start_e, end_e);
|
||||
}
|
||||
|
||||
void OsdGcdComputeVertexA(
|
||||
OsdVertexDescriptor const &vdesc, float * vertex, float * varying,
|
||||
float * vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *V_ITa, const float *V_W,
|
||||
int vertexOffset, int tableOffset, int start, int end, int pass,
|
||||
dispatch_queue_t gcdq) {
|
||||
@ -84,18 +115,22 @@ void OsdGcdComputeVertexA(
|
||||
dispatch_apply(workSize/GCD_WORK_STRIDE, gcdq, ^(size_t blockIdx){
|
||||
const int start_i = start + blockIdx*GCD_WORK_STRIDE;
|
||||
const int end_i = start_i + GCD_WORK_STRIDE;
|
||||
OsdCpuComputeVertexA(vdesc, vertex, varying, V_ITa, V_W,
|
||||
OsdCpuComputeVertexA(vertex, varying, vertexDesc, varyingDesc,
|
||||
V_ITa, V_W,
|
||||
vertexOffset, tableOffset, start_i, end_i, pass);
|
||||
});
|
||||
const int start_e = end - workSize%GCD_WORK_STRIDE;
|
||||
const int end_e = end;
|
||||
if (start_e < end_e)
|
||||
OsdCpuComputeVertexA(vdesc, vertex, varying, V_ITa, V_W,
|
||||
OsdCpuComputeVertexA(vertex, varying, vertexDesc, varyingDesc,
|
||||
V_ITa, V_W,
|
||||
vertexOffset, tableOffset, start_e, end_e, pass);
|
||||
}
|
||||
|
||||
void OsdGcdComputeVertexB(
|
||||
OsdVertexDescriptor const &vdesc, float * vertex, float * varying,
|
||||
float * vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *V_ITa, const int *V_IT, const float *V_W,
|
||||
int vertexOffset, int tableOffset, int start, int end,
|
||||
dispatch_queue_t gcdq) {
|
||||
@ -104,18 +139,22 @@ void OsdGcdComputeVertexB(
|
||||
dispatch_apply(workSize/GCD_WORK_STRIDE, gcdq, ^(size_t blockIdx){
|
||||
const int start_i = start + blockIdx*GCD_WORK_STRIDE;
|
||||
const int end_i = start_i + GCD_WORK_STRIDE;
|
||||
OsdCpuComputeVertexB(vdesc, vertex, varying, V_ITa, V_IT, V_W,
|
||||
OsdCpuComputeVertexB(vertex, varying, vertexDesc, varyingDesc,
|
||||
V_ITa, V_IT, V_W,
|
||||
vertexOffset, tableOffset, start_i, end_i);
|
||||
});
|
||||
const int start_e = end - workSize%GCD_WORK_STRIDE;
|
||||
const int end_e = end;
|
||||
if (start_e < end_e)
|
||||
OsdCpuComputeVertexB(vdesc, vertex, varying, V_ITa, V_IT, V_W,
|
||||
OsdCpuComputeVertexB(vertex, varying, vertexDesc, varyingDesc,
|
||||
V_ITa, V_IT, V_W,
|
||||
vertexOffset, tableOffset, start_e, end_e);
|
||||
}
|
||||
|
||||
void OsdGcdComputeLoopVertexB(
|
||||
OsdVertexDescriptor const &vdesc, float * vertex, float * varying,
|
||||
float * vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *V_ITa, const int *V_IT, const float *V_W,
|
||||
int vertexOffset, int tableOffset, int start, int end,
|
||||
dispatch_queue_t gcdq) {
|
||||
@ -133,19 +172,22 @@ void OsdGcdComputeLoopVertexB(
|
||||
beta = (0.625f - beta) * wp;
|
||||
|
||||
int dstIndex = vertexOffset + i - tableOffset;
|
||||
vdesc.Clear(vertex, varying, dstIndex);
|
||||
clear(vertex, dstIndex, vertexDesc);
|
||||
clear(varying, dstIndex, varyingDesc);
|
||||
|
||||
vdesc.AddWithWeight(vertex, dstIndex, p, weight * (1.0f - (beta * n)));
|
||||
addWithWeight(vertex, dstIndex, p, weight * (1.0f - (beta * n)), vertexDesc);
|
||||
|
||||
for (int j = 0; j < n; ++j)
|
||||
vdesc.AddWithWeight(vertex, dstIndex, V_IT[h+j], weight * beta);
|
||||
addWithWeight(vertex, dstIndex, V_IT[h+j], weight * beta, vertexDesc);
|
||||
|
||||
vdesc.AddVaryingWithWeight(varying, dstIndex, p, 1.0f);
|
||||
addWithWeight(varying, dstIndex, p, 1.0f, varyingDesc);
|
||||
});
|
||||
}
|
||||
|
||||
void OsdGcdComputeBilinearEdge(
|
||||
OsdVertexDescriptor const &vdesc, float * vertex, float * varying,
|
||||
float * vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *E_IT,
|
||||
int vertexOffset, int tableOffset, int start, int end,
|
||||
dispatch_queue_t gcdq) {
|
||||
@ -156,18 +198,21 @@ void OsdGcdComputeBilinearEdge(
|
||||
int eidx1 = E_IT[2*i+1];
|
||||
|
||||
int dstIndex = vertexOffset + i - tableOffset;
|
||||
vdesc.Clear(vertex, varying, dstIndex);
|
||||
clear(vertex, dstIndex, vertexDesc);
|
||||
clear(varying, dstIndex, varyingDesc);
|
||||
|
||||
vdesc.AddWithWeight(vertex, dstIndex, eidx0, 0.5f);
|
||||
vdesc.AddWithWeight(vertex, dstIndex, eidx1, 0.5f);
|
||||
addWithWeight(vertex, dstIndex, eidx0, 0.5f, vertexDesc);
|
||||
addWithWeight(vertex, dstIndex, eidx1, 0.5f, vertexDesc);
|
||||
|
||||
vdesc.AddVaryingWithWeight(varying, dstIndex, eidx0, 0.5f);
|
||||
vdesc.AddVaryingWithWeight(varying, dstIndex, eidx1, 0.5f);
|
||||
addWithWeight(varying, dstIndex, eidx0, 0.5f, varyingDesc);
|
||||
addWithWeight(varying, dstIndex, eidx1, 0.5f, varyingDesc);
|
||||
});
|
||||
}
|
||||
|
||||
void OsdGcdComputeBilinearVertex(
|
||||
OsdVertexDescriptor const &vdesc, float * vertex, float * varying,
|
||||
float * vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *V_ITa,
|
||||
int vertexOffset, int tableOffset, int start, int end,
|
||||
dispatch_queue_t gcdq) {
|
||||
@ -177,15 +222,17 @@ void OsdGcdComputeBilinearVertex(
|
||||
int p = V_ITa[i];
|
||||
|
||||
int dstIndex = vertexOffset + i - tableOffset;
|
||||
vdesc.Clear(vertex, varying, dstIndex);
|
||||
clear(vertex, dstIndex, vertexDesc);
|
||||
clear(varying, dstIndex, varyingDesc);
|
||||
|
||||
vdesc.AddWithWeight(vertex, dstIndex, p, 1.0f);
|
||||
vdesc.AddVaryingWithWeight(varying, dstIndex, p, 1.0f);
|
||||
addWithWeight(vertex, dstIndex, p, 1.0f, vertexDesc);
|
||||
addWithWeight(varying, dstIndex, p, 1.0f, varyingDesc);
|
||||
});
|
||||
}
|
||||
|
||||
void OsdGcdEditVertexAdd(
|
||||
OsdVertexDescriptor const &vdesc, float * vertex,
|
||||
float * vertex,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
int primVarOffset, int primVarWidth,
|
||||
int vertexOffset, int tableOffset,
|
||||
int start, int end,
|
||||
@ -195,14 +242,20 @@ void OsdGcdEditVertexAdd(
|
||||
int vertexCount = end - start;
|
||||
dispatch_apply(vertexCount, gcdq, ^(size_t blockIdx){
|
||||
int i = start + blockIdx + tableOffset;
|
||||
vdesc.ApplyVertexEditAdd(vertex, primVarOffset, primVarWidth,
|
||||
editIndices[i] + vertexOffset,
|
||||
&editValues[i*primVarWidth]);
|
||||
|
||||
if (vertex) {
|
||||
int editIndex = editIndices[i] + vertexOffset;
|
||||
float *dst = vertex + editIndex * vertexDesc.stride
|
||||
+ vertexDesc.offset + primVarOffset;
|
||||
|
||||
dst[i] += editValues[i];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
void OsdGcdEditVertexSet(
|
||||
OsdVertexDescriptor const &vdesc, float * vertex,
|
||||
float * vertex,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
int primVarOffset, int primVarWidth,
|
||||
int vertexOffset, int tableOffset,
|
||||
int start, int end,
|
||||
@ -212,9 +265,14 @@ void OsdGcdEditVertexSet(
|
||||
int vertexCount = end - start;
|
||||
dispatch_apply(vertexCount, gcdq, ^(size_t blockIdx){
|
||||
int i = start + blockIdx + tableOffset;
|
||||
vdesc.ApplyVertexEditSet(vertex, primVarOffset, primVarWidth,
|
||||
editIndices[i] + vertexOffset,
|
||||
&editValues[i*primVarWidth]);
|
||||
|
||||
if (vertex) {
|
||||
int editIndex = editIndices[i] + vertexOffset;
|
||||
float *dst = vertex + editIndex * vertexDesc.stride
|
||||
+ vertexDesc.offset + primVarOffset;
|
||||
|
||||
dst[i] = editValues[i];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
|
@ -32,66 +32,75 @@
|
||||
namespace OpenSubdiv {
|
||||
namespace OPENSUBDIV_VERSION {
|
||||
|
||||
struct OsdVertexDescriptor;
|
||||
struct OsdVertexBufferDescriptor;
|
||||
|
||||
void OsdGcdComputeFace(OsdVertexDescriptor const &vdesc,
|
||||
float * vertex, float * varying,
|
||||
void OsdGcdComputeFace(float * vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *F_IT, const int *F_ITa,
|
||||
int vertexOffset, int tableOffset,
|
||||
int start, int end,
|
||||
dispatch_queue_t gcdq);
|
||||
|
||||
void OsdGcdComputeEdge(OsdVertexDescriptor const &vdesc,
|
||||
float *vertex, float * varying,
|
||||
void OsdGcdComputeEdge(float *vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *E_IT, const float *E_ITa,
|
||||
int vertexOffset, int tableOffset,
|
||||
int start, int end,
|
||||
dispatch_queue_t gcdq);
|
||||
|
||||
void OsdGcdComputeVertexA(OsdVertexDescriptor const &vdesc,
|
||||
float *vertex, float * varying,
|
||||
void OsdGcdComputeVertexA(float *vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *V_ITa, const float *V_IT,
|
||||
int vertexOffset, int tableOffset,
|
||||
int start, int end, int pass,
|
||||
dispatch_queue_t gcdq);
|
||||
|
||||
void OsdGcdComputeVertexB(OsdVertexDescriptor const &vdesc,
|
||||
float *vertex, float * varying,
|
||||
void OsdGcdComputeVertexB(float *vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *V_ITa, const int *V_IT, const float *V_W,
|
||||
int vertexOffset, int tableOffset,
|
||||
int start, int end,
|
||||
dispatch_queue_t gcdq);
|
||||
|
||||
void OsdGcdComputeLoopVertexB(OsdVertexDescriptor const &vdesc,
|
||||
float *vertex, float * varying,
|
||||
void OsdGcdComputeLoopVertexB(float *vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *V_ITa, const int *V_IT,
|
||||
const float *V_W,
|
||||
int vertexOffset, int tableOffset,
|
||||
int start, int end,
|
||||
dispatch_queue_t gcdq);
|
||||
|
||||
void OsdGcdComputeBilinearEdge(OsdVertexDescriptor const &vdesc,
|
||||
float *vertex, float * varying,
|
||||
void OsdGcdComputeBilinearEdge(float *vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *E_IT,
|
||||
int vertexOffset, int tableOffset,
|
||||
int start, int end,
|
||||
dispatch_queue_t gcdq);
|
||||
|
||||
void OsdGcdComputeBilinearVertex(OsdVertexDescriptor const &vdesc,
|
||||
float *vertex, float * varying,
|
||||
void OsdGcdComputeBilinearVertex(float *vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *V_ITa,
|
||||
int vertexOffset, int tableOffset,
|
||||
int start, int end,
|
||||
dispatch_queue_t gcdq);
|
||||
|
||||
void OsdGcdEditVertexAdd(OsdVertexDescriptor const &vdesc, float *vertex,
|
||||
void OsdGcdEditVertexAdd(float *vertex,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
int primVarOffset, int primVarWidth,
|
||||
int vertexOffset, int tableOffset,
|
||||
int start, int end,
|
||||
const unsigned int *editIndices, const float *editValues,
|
||||
dispatch_queue_t gcdq);
|
||||
|
||||
void OsdGcdEditVertexSet(OsdVertexDescriptor const &vdesc, float *vertex,
|
||||
void OsdGcdEditVertexSet(float *vertex,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
int primVarOffset, int primVarWidth,
|
||||
int vertexOffset, int tableOffset,
|
||||
int start, int end,
|
||||
|
@ -29,6 +29,7 @@
|
||||
|
||||
#include "../osd/mesh.h"
|
||||
#include "../osd/glDrawContext.h"
|
||||
#include "../osd/vertexDescriptor.h"
|
||||
|
||||
#ifdef OPENSUBDIV_HAS_OPENCL
|
||||
#if defined(__APPLE__)
|
||||
@ -125,6 +126,14 @@ public:
|
||||
virtual void Refine() {
|
||||
_computeController->Refine(_computeContext, _farMesh->GetKernelBatches(), _vertexBuffer, _varyingBuffer);
|
||||
}
|
||||
virtual void Refine(OsdVertexBufferDescriptor const *vertexDesc,
|
||||
OsdVertexBufferDescriptor const *varyingDesc,
|
||||
bool interleaved) {
|
||||
_computeController->Refine(_computeContext, _farMesh->GetKernelBatches(),
|
||||
_vertexBuffer, (interleaved ? _vertexBuffer : _varyingBuffer),
|
||||
vertexDesc, varyingDesc);
|
||||
}
|
||||
|
||||
virtual void Synchronize() {
|
||||
_computeController->Synchronize();
|
||||
}
|
||||
@ -250,6 +259,7 @@ public:
|
||||
virtual ~OsdMesh() {
|
||||
delete _farMesh;
|
||||
delete _vertexBuffer;
|
||||
delete _varyingBuffer;
|
||||
delete _computeContext;
|
||||
delete _drawContext;
|
||||
}
|
||||
@ -265,6 +275,14 @@ public:
|
||||
virtual void Refine() {
|
||||
_computeController->Refine(_computeContext, _farMesh->GetKernelBatches(), _vertexBuffer, _varyingBuffer);
|
||||
}
|
||||
virtual void Refine(OsdVertexBufferDescriptor const *vertexDesc,
|
||||
OsdVertexBufferDescriptor const *varyingDesc,
|
||||
bool interleaved) {
|
||||
|
||||
_computeController->Refine(_computeContext, _farMesh->GetKernelBatches(),
|
||||
_vertexBuffer, (interleaved ? _vertexBuffer : _varyingBuffer),
|
||||
vertexDesc, varyingDesc);
|
||||
}
|
||||
virtual void Synchronize() {
|
||||
_computeController->Synchronize();
|
||||
}
|
||||
|
@ -34,8 +34,7 @@
|
||||
namespace OpenSubdiv {
|
||||
namespace OPENSUBDIV_VERSION {
|
||||
|
||||
OsdGLSLComputeController::OsdGLSLComputeController()
|
||||
: _currentVertexBuffer(0), _currentVaryingBuffer(0), _currentKernelBundle(NULL) {
|
||||
OsdGLSLComputeController::OsdGLSLComputeController() {
|
||||
}
|
||||
|
||||
OsdGLSLComputeController::~OsdGLSLComputeController() {
|
||||
@ -54,20 +53,21 @@ OsdGLSLComputeController::Synchronize() {
|
||||
}
|
||||
|
||||
OsdGLSLComputeKernelBundle *
|
||||
OsdGLSLComputeController::getKernels(int numVertexElements,
|
||||
int numVaryingElements) {
|
||||
OsdGLSLComputeController::getKernels(
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc) {
|
||||
|
||||
std::vector<OsdGLSLComputeKernelBundle*>::iterator it =
|
||||
std::find_if(_kernelRegistry.begin(), _kernelRegistry.end(),
|
||||
OsdGLSLComputeKernelBundle::Match(numVertexElements,
|
||||
numVaryingElements));
|
||||
OsdGLSLComputeKernelBundle::Match(vertexDesc,
|
||||
varyingDesc));
|
||||
if (it != _kernelRegistry.end()) {
|
||||
return *it;
|
||||
} else {
|
||||
OsdGLSLComputeKernelBundle *kernelBundle =
|
||||
new OsdGLSLComputeKernelBundle();
|
||||
_kernelRegistry.push_back(kernelBundle);
|
||||
kernelBundle->Compile(numVertexElements, numVaryingElements);
|
||||
kernelBundle->Compile(vertexDesc, varyingDesc);
|
||||
return kernelBundle;
|
||||
}
|
||||
}
|
||||
@ -75,18 +75,21 @@ OsdGLSLComputeController::getKernels(int numVertexElements,
|
||||
void
|
||||
OsdGLSLComputeController::bindBufferAndProgram() {
|
||||
|
||||
if (_currentVertexBuffer)
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, _currentVertexBuffer);
|
||||
if (_currentBindState.vertexBuffer)
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, _currentBindState.vertexBuffer);
|
||||
|
||||
if (_currentVaryingBuffer)
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, _currentVaryingBuffer);
|
||||
if (_currentBindState.varyingBuffer)
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, _currentBindState.varyingBuffer);
|
||||
|
||||
_currentKernelBundle->UseProgram();
|
||||
_currentBindState.kernelBundle->UseProgram(_currentBindState.vertexDesc.offset,
|
||||
_currentBindState.varyingDesc.offset);
|
||||
glMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT);
|
||||
}
|
||||
|
||||
void
|
||||
OsdGLSLComputeController::unbindBufferAndProgram() {
|
||||
|
||||
glMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, 0);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, 0);
|
||||
glUseProgram(0);
|
||||
@ -98,8 +101,9 @@ OsdGLSLComputeController::ApplyBilinearFaceVerticesKernel(
|
||||
|
||||
assert(context);
|
||||
|
||||
_currentKernelBundle->ApplyBilinearFaceVerticesKernel(
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
_currentBindState.kernelBundle->ApplyBilinearFaceVerticesKernel(
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(),
|
||||
batch.GetStart(), batch.GetEnd());
|
||||
}
|
||||
|
||||
void
|
||||
@ -108,8 +112,9 @@ OsdGLSLComputeController::ApplyBilinearEdgeVerticesKernel(
|
||||
|
||||
assert(context);
|
||||
|
||||
_currentKernelBundle->ApplyBilinearEdgeVerticesKernel(
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
_currentBindState.kernelBundle->ApplyBilinearEdgeVerticesKernel(
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(),
|
||||
batch.GetStart(), batch.GetEnd());
|
||||
}
|
||||
|
||||
void
|
||||
@ -118,8 +123,9 @@ OsdGLSLComputeController::ApplyBilinearVertexVerticesKernel(
|
||||
|
||||
assert(context);
|
||||
|
||||
_currentKernelBundle->ApplyBilinearVertexVerticesKernel(
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
_currentBindState.kernelBundle->ApplyBilinearVertexVerticesKernel(
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(),
|
||||
batch.GetStart(), batch.GetEnd());
|
||||
}
|
||||
|
||||
void
|
||||
@ -128,20 +134,20 @@ OsdGLSLComputeController::ApplyCatmarkFaceVerticesKernel(
|
||||
|
||||
assert(context);
|
||||
|
||||
_currentKernelBundle->ApplyCatmarkFaceVerticesKernel(
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
_currentBindState.kernelBundle->ApplyCatmarkFaceVerticesKernel(
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(),
|
||||
batch.GetStart(), batch.GetEnd());
|
||||
}
|
||||
|
||||
|
||||
|
||||
void
|
||||
OsdGLSLComputeController::ApplyCatmarkEdgeVerticesKernel(
|
||||
FarKernelBatch const &batch, OsdGLSLComputeContext const *context) const {
|
||||
|
||||
assert(context);
|
||||
|
||||
_currentKernelBundle->ApplyCatmarkEdgeVerticesKernel(
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
_currentBindState.kernelBundle->ApplyCatmarkEdgeVerticesKernel(
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(),
|
||||
batch.GetStart(), batch.GetEnd());
|
||||
}
|
||||
|
||||
void
|
||||
@ -150,8 +156,9 @@ OsdGLSLComputeController::ApplyCatmarkVertexVerticesKernelB(
|
||||
|
||||
assert(context);
|
||||
|
||||
_currentKernelBundle->ApplyCatmarkVertexVerticesKernelB(
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
_currentBindState.kernelBundle->ApplyCatmarkVertexVerticesKernelB(
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(),
|
||||
batch.GetStart(), batch.GetEnd());
|
||||
}
|
||||
|
||||
void
|
||||
@ -160,8 +167,9 @@ OsdGLSLComputeController::ApplyCatmarkVertexVerticesKernelA1(
|
||||
|
||||
assert(context);
|
||||
|
||||
_currentKernelBundle->ApplyCatmarkVertexVerticesKernelA(
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(), false);
|
||||
_currentBindState.kernelBundle->ApplyCatmarkVertexVerticesKernelA(
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(),
|
||||
batch.GetStart(), batch.GetEnd(), false);
|
||||
}
|
||||
|
||||
void
|
||||
@ -170,8 +178,9 @@ OsdGLSLComputeController::ApplyCatmarkVertexVerticesKernelA2(
|
||||
|
||||
assert(context);
|
||||
|
||||
_currentKernelBundle->ApplyCatmarkVertexVerticesKernelA(
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(), true);
|
||||
_currentBindState.kernelBundle->ApplyCatmarkVertexVerticesKernelA(
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(),
|
||||
batch.GetStart(), batch.GetEnd(), true);
|
||||
}
|
||||
|
||||
void
|
||||
@ -180,8 +189,9 @@ OsdGLSLComputeController::ApplyLoopEdgeVerticesKernel(
|
||||
|
||||
assert(context);
|
||||
|
||||
_currentKernelBundle->ApplyLoopEdgeVerticesKernel(
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
_currentBindState.kernelBundle->ApplyLoopEdgeVerticesKernel(
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(),
|
||||
batch.GetStart(), batch.GetEnd());
|
||||
}
|
||||
|
||||
void
|
||||
@ -190,8 +200,9 @@ OsdGLSLComputeController::ApplyLoopVertexVerticesKernelB(
|
||||
|
||||
assert(context);
|
||||
|
||||
_currentKernelBundle->ApplyLoopVertexVerticesKernelB(
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
_currentBindState.kernelBundle->ApplyLoopVertexVerticesKernelB(
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(),
|
||||
batch.GetStart(), batch.GetEnd());
|
||||
}
|
||||
|
||||
void
|
||||
@ -200,8 +211,9 @@ OsdGLSLComputeController::ApplyLoopVertexVerticesKernelA1(
|
||||
|
||||
assert(context);
|
||||
|
||||
_currentKernelBundle->ApplyLoopVertexVerticesKernelA(
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(), false);
|
||||
_currentBindState.kernelBundle->ApplyLoopVertexVerticesKernelA(
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(),
|
||||
batch.GetStart(), batch.GetEnd(), false);
|
||||
}
|
||||
|
||||
void
|
||||
@ -210,8 +222,9 @@ OsdGLSLComputeController::ApplyLoopVertexVerticesKernelA2(
|
||||
|
||||
assert(context);
|
||||
|
||||
_currentKernelBundle->ApplyLoopVertexVerticesKernelA(
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(), true);
|
||||
_currentBindState.kernelBundle->ApplyLoopVertexVerticesKernelA(
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(),
|
||||
batch.GetStart(), batch.GetEnd(), true);
|
||||
}
|
||||
|
||||
void
|
||||
@ -229,12 +242,12 @@ OsdGLSLComputeController::ApplyVertexEdits(
|
||||
int primvarWidth = edit->GetPrimvarWidth();
|
||||
|
||||
if (edit->GetOperation() == FarVertexEdit::Add) {
|
||||
_currentKernelBundle->ApplyEditAdd( primvarOffset,
|
||||
primvarWidth,
|
||||
batch.GetVertexOffset(),
|
||||
batch.GetTableOffset(),
|
||||
batch.GetStart(),
|
||||
batch.GetEnd());
|
||||
_currentBindState.kernelBundle->ApplyEditAdd(primvarOffset,
|
||||
primvarWidth,
|
||||
batch.GetVertexOffset(),
|
||||
batch.GetTableOffset(),
|
||||
batch.GetStart(),
|
||||
batch.GetEnd());
|
||||
} else {
|
||||
// XXX: edit SET is not implemented yet.
|
||||
}
|
||||
|
@ -29,6 +29,7 @@
|
||||
|
||||
#include "../far/dispatcher.h"
|
||||
#include "../osd/glslComputeContext.h"
|
||||
#include "../osd/vertexDescriptor.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
@ -69,18 +70,25 @@ public:
|
||||
///
|
||||
/// @param varyingBuffer varying-interpolated data buffer
|
||||
///
|
||||
/// @param vertexDesc the descriptor of vertex elements to be refined.
|
||||
/// if it's null, all primvars in the vertex buffer
|
||||
/// will be refined.
|
||||
///
|
||||
/// @param varyingDesc the descriptor of varying elements to be refined.
|
||||
/// if it's null, all primvars in the varying buffer
|
||||
/// will be refined.
|
||||
///
|
||||
template<class VERTEX_BUFFER, class VARYING_BUFFER>
|
||||
void Refine(OsdGLSLComputeContext const *context,
|
||||
FarKernelBatchVector const &batches,
|
||||
VERTEX_BUFFER *vertexBuffer,
|
||||
VARYING_BUFFER *varyingBuffer) {
|
||||
VARYING_BUFFER *varyingBuffer,
|
||||
OsdVertexBufferDescriptor const *vertexDesc=NULL,
|
||||
OsdVertexBufferDescriptor const *varyingDesc=NULL) {
|
||||
|
||||
if (batches.empty()) return;
|
||||
|
||||
int numVertexElements = vertexBuffer ? vertexBuffer->GetNumElements() : 0;
|
||||
int numVaryingElements = varyingBuffer ? varyingBuffer->GetNumElements() : 0;
|
||||
|
||||
bind(vertexBuffer, varyingBuffer, getKernels(numVertexElements, numVaryingElements));
|
||||
bind(vertexBuffer, varyingBuffer, vertexDesc, varyingDesc);
|
||||
// bind table buffers.
|
||||
context->BindShaderStorageBuffers();
|
||||
|
||||
@ -141,42 +149,69 @@ protected:
|
||||
|
||||
void ApplyVertexEdits(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
OsdGLSLComputeKernelBundle * getKernels(int numVertexElements,
|
||||
int numVaryingElements);
|
||||
OsdGLSLComputeKernelBundle * getKernels(
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc);
|
||||
|
||||
void bindBufferAndProgram();
|
||||
|
||||
void unbindBufferAndProgram();
|
||||
|
||||
template<class VERTEX_BUFFER, class VARYING_BUFFER>
|
||||
void bind(VERTEX_BUFFER *vertex, VARYING_BUFFER *varying, OsdGLSLComputeKernelBundle *kernelBundle) {
|
||||
void bind(VERTEX_BUFFER *vertex, VARYING_BUFFER *varying,
|
||||
OsdVertexBufferDescriptor const *vertexDesc,
|
||||
OsdVertexBufferDescriptor const *varyingDesc) {
|
||||
|
||||
_currentVertexBuffer = vertex ? vertex->BindVBO() : 0;
|
||||
_currentVaryingBuffer = varying ? varying->BindVBO() : 0;
|
||||
// if the vertex buffer descriptor is specified, use it.
|
||||
// otherwise, assumes the data is tightly packed in the vertex buffer.
|
||||
if (vertexDesc) {
|
||||
_currentBindState.vertexDesc = *vertexDesc;
|
||||
} else {
|
||||
int numElements = vertex ? vertex->GetNumElements() : 0;
|
||||
_currentBindState.vertexDesc = OsdVertexBufferDescriptor(
|
||||
0, numElements, numElements);
|
||||
}
|
||||
if (varyingDesc) {
|
||||
_currentBindState.varyingDesc = *varyingDesc;
|
||||
} else {
|
||||
int numElements = varying ? varying->GetNumElements() : 0;
|
||||
_currentBindState.varyingDesc = OsdVertexBufferDescriptor(
|
||||
0, numElements, numElements);
|
||||
}
|
||||
|
||||
_vdesc.numVertexElements = vertex ? vertex->GetNumElements() : 0;
|
||||
_vdesc.numVaryingElements = varying ? varying->GetNumElements() : 0;
|
||||
|
||||
_currentKernelBundle = kernelBundle;
|
||||
_currentBindState.vertexBuffer = vertex ? vertex->BindVBO() : 0;
|
||||
_currentBindState.varyingBuffer = varying ? varying->BindVBO() : 0;
|
||||
_currentBindState.kernelBundle = getKernels(_currentBindState.vertexDesc,
|
||||
_currentBindState.varyingDesc);
|
||||
|
||||
bindBufferAndProgram();
|
||||
}
|
||||
|
||||
/// Unbinds any previously bound vertex and varying data buffers.
|
||||
void unbind() {
|
||||
_currentVertexBuffer = 0;
|
||||
_currentVaryingBuffer = 0;
|
||||
_currentBindState.Reset();
|
||||
|
||||
unbindBufferAndProgram();
|
||||
}
|
||||
|
||||
private:
|
||||
struct BindState {
|
||||
BindState() : vertexBuffer(0), varyingBuffer(0), kernelBundle(NULL) {}
|
||||
void Reset() {
|
||||
vertexBuffer = varyingBuffer = 0;
|
||||
vertexDesc.Reset();
|
||||
varyingDesc.Reset();
|
||||
}
|
||||
GLuint vertexBuffer;
|
||||
GLuint varyingBuffer;
|
||||
OsdVertexBufferDescriptor vertexDesc;
|
||||
OsdVertexBufferDescriptor varyingDesc;
|
||||
OsdGLSLComputeKernelBundle *kernelBundle;
|
||||
};
|
||||
|
||||
BindState _currentBindState;
|
||||
|
||||
std::vector<OsdGLSLComputeKernelBundle *> _kernelRegistry;
|
||||
|
||||
GLuint _currentVertexBuffer, _currentVaryingBuffer;
|
||||
|
||||
OsdVertexDescriptor _vdesc;
|
||||
|
||||
OsdGLSLComputeKernelBundle * _currentKernelBundle;
|
||||
|
||||
};
|
||||
|
||||
} // end namespace OPENSUBDIV_VERSION
|
||||
|
@ -31,6 +31,8 @@ uniform int vertexOffset = 0; // vertex index offset for the batch
|
||||
uniform int tableOffset = 0; // offset of subdivision table
|
||||
uniform int indexStart = 0; // start index relative to tableOffset
|
||||
uniform int indexEnd = 0; // end index relative to tableOffset
|
||||
uniform int vertexBaseOffset = 0; // base vbo offset of the vertex buffer
|
||||
uniform int varyingBaseOffset = 0; // base vbo offset of the varying buffer
|
||||
uniform bool vertexPass;
|
||||
|
||||
/*
|
||||
@ -40,6 +42,22 @@ uniform bool vertexPass;
|
||||
^ ^ ^
|
||||
vertexOffset | |
|
||||
indexStart indexEnd
|
||||
|
||||
|
||||
|
||||
interleaved buffer example
|
||||
+---------------------------+
|
||||
| x | y | z | r | g | b | a |
|
||||
+---------------------------+
|
||||
^
|
||||
vertexBaseOffset
|
||||
^
|
||||
varyingBaseOffset
|
||||
|
||||
NUM_VERTEX_ELEMENTS = 3
|
||||
NUM_VARYING_ELEMENTS = 4
|
||||
VERTEX_STRIDE = VARYING_STRIDE = 7
|
||||
|
||||
*/
|
||||
|
||||
layout(binding=0) buffer vertex_buffer { float vertexBuffer[]; };
|
||||
@ -86,13 +104,15 @@ Vertex readVertex(int index)
|
||||
Vertex v;
|
||||
|
||||
#if NUM_VERTEX_ELEMENTS > 0
|
||||
int vertexIndex = index * VERTEX_STRIDE + vertexBaseOffset;
|
||||
for (int i = 0; i < NUM_VERTEX_ELEMENTS; i++) {
|
||||
v.vertexData[i] = vertexBuffer[index*NUM_VERTEX_ELEMENTS+i];
|
||||
v.vertexData[i] = vertexBuffer[vertexIndex + i];
|
||||
}
|
||||
#endif
|
||||
#if NUM_VARYING_ELEMENTS > 0
|
||||
int varyingIndex = index * VARYING_STRIDE + varyingBaseOffset;
|
||||
for (int i = 0; i < NUM_VARYING_ELEMENTS; i++) {
|
||||
v.varyingData[i] = varyingBuffer[index*NUM_VARYING_ELEMENTS+i];
|
||||
v.varyingData[i] = varyingBuffer[varyingIndex + i];
|
||||
}
|
||||
#endif
|
||||
return v;
|
||||
@ -101,13 +121,15 @@ Vertex readVertex(int index)
|
||||
void writeVertex(int index, Vertex v)
|
||||
{
|
||||
#if NUM_VERTEX_ELEMENTS > 0
|
||||
int vertexIndex = index * VERTEX_STRIDE + vertexBaseOffset;
|
||||
for (int i = 0; i < NUM_VERTEX_ELEMENTS; i++) {
|
||||
vertexBuffer[index*NUM_VERTEX_ELEMENTS+i] = v.vertexData[i];
|
||||
vertexBuffer[vertexIndex + i] = v.vertexData[i];
|
||||
}
|
||||
#endif
|
||||
#if NUM_VARYING_ELEMENTS > 0
|
||||
int varyingIndex = index * VARYING_STRIDE + varyingBaseOffset;
|
||||
for (int i = 0; i < NUM_VARYING_ELEMENTS; i++) {
|
||||
varyingBuffer[index*NUM_VARYING_ELEMENTS+i] = v.varyingData[i];
|
||||
varyingBuffer[varyingIndex + i] = v.varyingData[i];
|
||||
}
|
||||
#endif
|
||||
}
|
||||
@ -152,6 +174,7 @@ void catmarkComputeFace()
|
||||
addWithWeight(dst, readVertex(index), weight);
|
||||
addVaryingWithWeight(dst, readVertex(index), weight);
|
||||
}
|
||||
|
||||
writeVertex(vid, dst);
|
||||
}
|
||||
|
||||
@ -356,6 +379,7 @@ void editAdd()
|
||||
|
||||
// seemingly we can't iterate dynamically over vertexData[n]
|
||||
// due to mysterious glsl runtime limitation...?
|
||||
#if NUM_VERTEX_ELEMENTS > 0
|
||||
for (int j = 0; j < NUM_VERTEX_ELEMENTS; ++j) {
|
||||
float editValue = _editValues[i*editPrimVarWidth + min(j, editPrimVarWidth)];
|
||||
editValue *= float(j >= editPrimVarOffset);
|
||||
@ -363,6 +387,7 @@ void editAdd()
|
||||
dst.vertexData[j] += editValue;
|
||||
}
|
||||
writeVertex(v + vertexOffset, dst);
|
||||
#endif
|
||||
}
|
||||
|
||||
void main()
|
||||
|
@ -37,6 +37,7 @@
|
||||
#include "../osd/opengl.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <sstream>
|
||||
|
||||
namespace OpenSubdiv {
|
||||
namespace OPENSUBDIV_VERSION {
|
||||
@ -46,7 +47,11 @@ static const char *shaderSource =
|
||||
;
|
||||
|
||||
OsdGLSLComputeKernelBundle::OsdGLSLComputeKernelBundle()
|
||||
: _program(0) {
|
||||
: _program(0),
|
||||
_numVertexElements(0),
|
||||
_vertexStride(0),
|
||||
_numVaryingElements(0),
|
||||
_varyingStride(0) {
|
||||
|
||||
// XXX: too rough!
|
||||
_workGroupSize = 64;
|
||||
@ -58,9 +63,14 @@ OsdGLSLComputeKernelBundle::~OsdGLSLComputeKernelBundle() {
|
||||
}
|
||||
|
||||
bool
|
||||
OsdGLSLComputeKernelBundle::Compile(int numVertexElements, int numVaryingElements) {
|
||||
OsdGLSLComputeKernelBundle::Compile(
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc) {
|
||||
|
||||
_vdesc.Set(numVertexElements, numVaryingElements );
|
||||
_numVertexElements = vertexDesc.length;
|
||||
_vertexStride = vertexDesc.stride;
|
||||
_numVaryingElements = varyingDesc.length;
|
||||
_varyingStride = varyingDesc.stride;
|
||||
|
||||
if (_program) {
|
||||
glDeleteProgram(_program);
|
||||
@ -70,15 +80,16 @@ OsdGLSLComputeKernelBundle::Compile(int numVertexElements, int numVaryingElement
|
||||
|
||||
GLuint shader = glCreateShader(GL_COMPUTE_SHADER);
|
||||
|
||||
char constantDefine[256];
|
||||
snprintf(constantDefine, 256,
|
||||
"#define NUM_VERTEX_ELEMENTS %d\n"
|
||||
"#define NUM_VARYING_ELEMENTS %d\n"
|
||||
"#define WORK_GROUP_SIZE %d\n",
|
||||
numVertexElements, numVaryingElements, _workGroupSize);
|
||||
std::ostringstream defines;
|
||||
defines << "#define NUM_VERTEX_ELEMENTS " << _numVertexElements << "\n"
|
||||
<< "#define VERTEX_STRIDE " << _vertexStride << "\n"
|
||||
<< "#define NUM_VARYING_ELEMENTS " << _numVaryingElements << "\n"
|
||||
<< "#define VARYING_STRIDE " << _varyingStride << "\n"
|
||||
<< "#define WORK_GROUP_SIZE " << _workGroupSize << "\n";
|
||||
std::string defineStr = defines.str();
|
||||
|
||||
const char *shaderSources[3];
|
||||
shaderSources[0] = constantDefine;
|
||||
shaderSources[0] = defineStr.c_str();
|
||||
shaderSources[1] = shaderSource;
|
||||
glShaderSource(shader, 2, shaderSources, NULL);
|
||||
glCompileShader(shader);
|
||||
@ -98,9 +109,6 @@ OsdGLSLComputeKernelBundle::Compile(int numVertexElements, int numVaryingElement
|
||||
|
||||
glDeleteProgram(_program);
|
||||
_program = 0;
|
||||
// XXX ERROR HANDLE
|
||||
printf("%s\n", constantDefine);
|
||||
assert(false);
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -129,11 +137,13 @@ OsdGLSLComputeKernelBundle::Compile(int numVertexElements, int numVaryingElement
|
||||
"loopComputeVertexB");
|
||||
|
||||
// set uniform locations for compute
|
||||
_uniformVertexPass = glGetUniformLocation(_program, "vertexPass");
|
||||
_uniformVertexOffset = glGetUniformLocation(_program, "vertexOffset");
|
||||
_uniformTableOffset = glGetUniformLocation(_program, "tableOffset");
|
||||
_uniformIndexStart = glGetUniformLocation(_program, "indexStart");
|
||||
_uniformIndexEnd = glGetUniformLocation(_program, "indexEnd");
|
||||
_uniformVertexPass = glGetUniformLocation(_program, "vertexPass");
|
||||
_uniformVertexOffset = glGetUniformLocation(_program, "vertexOffset");
|
||||
_uniformTableOffset = glGetUniformLocation(_program, "tableOffset");
|
||||
_uniformIndexStart = glGetUniformLocation(_program, "indexStart");
|
||||
_uniformIndexEnd = glGetUniformLocation(_program, "indexEnd");
|
||||
_uniformVertexBaseOffset = glGetUniformLocation(_program, "vertexBaseOffset");
|
||||
_uniformVaryingBaseOffset = glGetUniformLocation(_program, "varyingBaseOffset");
|
||||
|
||||
_tableUniforms[FarSubdivisionTables::F_IT] = glGetUniformLocation(_program, "_F0_IT");
|
||||
_tableUniforms[FarSubdivisionTables::F_ITa] = glGetUniformLocation(_program, "_F0_ITa");
|
||||
@ -176,8 +186,7 @@ OsdGLSLComputeKernelBundle::dispatchCompute(
|
||||
// we found a problem (issue #295) with nvidia driver 331.49 / Quadro4000
|
||||
// resulting invalid vertices.
|
||||
// Apparently adding TEXTURE_FETCH_BARRIER after face kernel fixes it.
|
||||
// We'll revisit this later.
|
||||
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
|
||||
// The workaroud is commented out, since it looks fixed at driver 334.xx.
|
||||
}
|
||||
|
||||
void
|
||||
@ -186,6 +195,8 @@ OsdGLSLComputeKernelBundle::ApplyBilinearFaceVerticesKernel(
|
||||
|
||||
glUniformSubroutinesuiv(GL_COMPUTE_SHADER, 1, &_subComputeFace);
|
||||
dispatchCompute(vertexOffset, tableOffset, start, end);
|
||||
|
||||
// glMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT);
|
||||
}
|
||||
|
||||
void
|
||||
@ -213,8 +224,8 @@ OsdGLSLComputeKernelBundle::ApplyCatmarkFaceVerticesKernel(
|
||||
dispatchCompute(vertexOffset, tableOffset, start, end);
|
||||
|
||||
// see the comment in dispatchCompute()
|
||||
// this workaround could be a performance problem
|
||||
glMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT);
|
||||
// this workaround causes a performance problem.
|
||||
// glMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT);
|
||||
}
|
||||
|
||||
void
|
||||
@ -279,9 +290,13 @@ OsdGLSLComputeKernelBundle::ApplyEditAdd(
|
||||
}
|
||||
|
||||
void
|
||||
OsdGLSLComputeKernelBundle::UseProgram() const
|
||||
OsdGLSLComputeKernelBundle::UseProgram(int vertexBaseOffset,
|
||||
int varyingBaseOffset) const
|
||||
{
|
||||
glUseProgram(_program);
|
||||
|
||||
glUniform1i(_uniformVertexBaseOffset, vertexBaseOffset);
|
||||
glUniform1i(_uniformVaryingBaseOffset, varyingBaseOffset);
|
||||
}
|
||||
|
||||
} // end namespace OPENSUBDIV_VERSION
|
||||
|
@ -42,7 +42,8 @@ public:
|
||||
OsdGLSLComputeKernelBundle();
|
||||
~OsdGLSLComputeKernelBundle();
|
||||
|
||||
bool Compile(int numVertexElements, int numVaryingElements);
|
||||
bool Compile(OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc);
|
||||
|
||||
void ApplyBilinearFaceVerticesKernel(
|
||||
int vertexOffset, int tableOffset, int start, int end);
|
||||
@ -75,32 +76,40 @@ public:
|
||||
int vertexOffset, int tableOffset, int start, int end, bool pass);
|
||||
|
||||
void ApplyEditAdd(int primvarOffset, int primvarWidth,
|
||||
int vertexOffset, int tableOffset, int start, int end);
|
||||
int vertexOffset, int tableOffset,
|
||||
int start, int end);
|
||||
|
||||
void UseProgram() const;
|
||||
void UseProgram(int vertexBaseOffset, int varyingBaseOffset) const;
|
||||
|
||||
GLuint GetTableUniformLocation(int tableIndex) const {
|
||||
return _tableUniforms[tableIndex];
|
||||
}
|
||||
|
||||
struct Match {
|
||||
|
||||
/// Constructor
|
||||
Match(int numVertexElements, int numVaryingElements)
|
||||
: vdesc(numVertexElements, numVaryingElements) {
|
||||
Match(OsdVertexBufferDescriptor const &vertex,
|
||||
OsdVertexBufferDescriptor const &varying)
|
||||
: vertexDesc(vertex), varyingDesc(varying) {
|
||||
}
|
||||
|
||||
bool operator() (OsdGLSLComputeKernelBundle const *kernel) {
|
||||
return vdesc == kernel->_vdesc;
|
||||
// offset is dynamic. just comparing length and stride here,
|
||||
// returns true if they are equal
|
||||
return (vertexDesc.length == kernel->_numVertexElements and
|
||||
vertexDesc.stride == kernel->_vertexStride and
|
||||
varyingDesc.length == kernel->_numVaryingElements and
|
||||
varyingDesc.stride == kernel->_varyingStride);
|
||||
}
|
||||
|
||||
OsdVertexDescriptor vdesc;
|
||||
OsdVertexBufferDescriptor vertexDesc;
|
||||
OsdVertexBufferDescriptor varyingDesc;
|
||||
};
|
||||
|
||||
friend struct Match;
|
||||
|
||||
protected:
|
||||
void dispatchCompute(int vertexOffset, int tableOffset, int start, int end) const;
|
||||
void dispatchCompute(int vertexOffset, int tableOffset,
|
||||
int start, int end) const ;
|
||||
|
||||
GLuint _program;
|
||||
|
||||
@ -111,6 +120,8 @@ protected:
|
||||
GLuint _uniformTableOffset;
|
||||
GLuint _uniformIndexStart;
|
||||
GLuint _uniformIndexEnd;
|
||||
GLuint _uniformVertexBaseOffset;
|
||||
GLuint _uniformVaryingBaseOffset;
|
||||
|
||||
// uniform locations for vertex edit
|
||||
GLuint _uniformEditPrimVarOffset;
|
||||
@ -135,7 +146,10 @@ protected:
|
||||
|
||||
int _workGroupSize;
|
||||
|
||||
OsdVertexDescriptor _vdesc;
|
||||
int _numVertexElements;
|
||||
int _vertexStride;
|
||||
int _numVaryingElements;
|
||||
int _varyingStride;
|
||||
};
|
||||
|
||||
} // end namespace OPENSUBDIV_VERSION
|
||||
|
@ -140,6 +140,7 @@ mat4 OsdModelViewProjectionMatrix();
|
||||
float OsdTessLevel();
|
||||
int OsdGregoryQuadOffsetBase();
|
||||
int OsdPrimitiveIdBase();
|
||||
int OsdBaseVertex();
|
||||
|
||||
float GetTessLevel(int patchLevel)
|
||||
{
|
||||
|
@ -68,6 +68,14 @@ out block {
|
||||
OSD_USER_VARYING_DECLARE
|
||||
} outpt;
|
||||
|
||||
vec3 readVertex(uint vertexIndex)
|
||||
{
|
||||
vertexIndex += OsdBaseVertex();
|
||||
return vec3(texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*vertexIndex)).x,
|
||||
texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*vertexIndex+1)).x,
|
||||
texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*vertexIndex+2)).x);
|
||||
}
|
||||
|
||||
void main()
|
||||
{
|
||||
int vID = gl_VertexID;
|
||||
@ -122,38 +130,23 @@ void main()
|
||||
}
|
||||
#endif
|
||||
|
||||
vec3 neighbor =
|
||||
vec3(texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*idx_neighbor)).x,
|
||||
texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*idx_neighbor+1)).x,
|
||||
texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*idx_neighbor+2)).x);
|
||||
vec3 neighbor = readVertex(idx_neighbor);
|
||||
|
||||
uint idx_diagonal = uint(texelFetch(OsdValenceBuffer, int(vID * (2*OSD_MAX_VALENCE+1) + 2*i + 1 + 1)).x);
|
||||
|
||||
vec3 diagonal =
|
||||
vec3(texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*idx_diagonal)).x,
|
||||
texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*idx_diagonal+1)).x,
|
||||
texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*idx_diagonal+2)).x);
|
||||
vec3 diagonal = readVertex(idx_diagonal);
|
||||
|
||||
uint idx_neighbor_p = uint(texelFetch(OsdValenceBuffer, int(vID * (2*OSD_MAX_VALENCE+1) + 2*ip + 0 + 1)).x);
|
||||
|
||||
vec3 neighbor_p =
|
||||
vec3(texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*idx_neighbor_p)).x,
|
||||
texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*idx_neighbor_p+1)).x,
|
||||
texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*idx_neighbor_p+2)).x);
|
||||
vec3 neighbor_p = readVertex(idx_neighbor_p);
|
||||
|
||||
uint idx_neighbor_m = uint(texelFetch(OsdValenceBuffer, int(vID * (2*OSD_MAX_VALENCE+1) + 2*im + 0 + 1)).x);
|
||||
|
||||
vec3 neighbor_m =
|
||||
vec3(texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*idx_neighbor_m)).x,
|
||||
texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*idx_neighbor_m+1)).x,
|
||||
texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*idx_neighbor_m+2)).x);
|
||||
vec3 neighbor_m = readVertex(idx_neighbor_m);
|
||||
|
||||
uint idx_diagonal_m = uint(texelFetch(OsdValenceBuffer, int(vID * (2*OSD_MAX_VALENCE+1) + 2*im + 1 + 1)).x);
|
||||
|
||||
vec3 diagonal_m =
|
||||
vec3(texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*idx_diagonal_m)).x,
|
||||
texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*idx_diagonal_m+1)).x,
|
||||
texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*idx_diagonal_m+2)).x);
|
||||
vec3 diagonal_m = readVertex(idx_diagonal_m);
|
||||
|
||||
f[i] = (pos * float(valence) + (neighbor_p + neighbor)*2.0f + diagonal) / (float(valence)+5.0f);
|
||||
|
||||
@ -186,24 +179,16 @@ void main()
|
||||
if (ivalence < 0) {
|
||||
if (valence > 2) {
|
||||
outpt.v.position = (
|
||||
vec3(texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*boundaryEdgeNeighbors[0])).x,
|
||||
texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*boundaryEdgeNeighbors[0]+1)).x,
|
||||
texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*boundaryEdgeNeighbors[0]+2)).x) +
|
||||
vec3(texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*boundaryEdgeNeighbors[1])).x,
|
||||
texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*boundaryEdgeNeighbors[1]+1)).x,
|
||||
texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*boundaryEdgeNeighbors[1]+2)).x) +
|
||||
readVertex(boundaryEdgeNeighbors[0]) +
|
||||
readVertex(boundaryEdgeNeighbors[1]) +
|
||||
4.0f * pos)/6.0f;
|
||||
} else {
|
||||
outpt.v.position = pos;
|
||||
}
|
||||
|
||||
outpt.v.e0 = (
|
||||
vec3(texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*boundaryEdgeNeighbors[0])).x,
|
||||
texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*boundaryEdgeNeighbors[0]+1)).x,
|
||||
texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*boundaryEdgeNeighbors[0]+2)).x) -
|
||||
vec3(texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*boundaryEdgeNeighbors[1])).x,
|
||||
texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*boundaryEdgeNeighbors[1]+1)).x,
|
||||
texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*boundaryEdgeNeighbors[1]+2)).x)
|
||||
readVertex(boundaryEdgeNeighbors[0]) -
|
||||
readVertex(boundaryEdgeNeighbors[1])
|
||||
)/6.0;
|
||||
|
||||
float k = float(float(valence) - 1.0f); //k is the number of faces
|
||||
@ -216,18 +201,11 @@ void main()
|
||||
|
||||
int idx_diagonal = texelFetch(OsdValenceBuffer,int((vID) * (2*OSD_MAX_VALENCE+1) + 2*zerothNeighbor + 1 + 1)).x;
|
||||
idx_diagonal = abs(idx_diagonal);
|
||||
vec3 diagonal =
|
||||
vec3(texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*idx_diagonal)).x,
|
||||
texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*idx_diagonal+1)).x,
|
||||
texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*idx_diagonal+2)).x);
|
||||
vec3 diagonal = readVertex(idx_diagonal);
|
||||
|
||||
outpt.v.e1 = gamma * pos +
|
||||
alpha_0k * vec3(texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*boundaryEdgeNeighbors[0])).x,
|
||||
texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*boundaryEdgeNeighbors[0]+1)).x,
|
||||
texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*boundaryEdgeNeighbors[0]+2)).x) +
|
||||
alpha_0k * vec3(texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*boundaryEdgeNeighbors[1])).x,
|
||||
texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*boundaryEdgeNeighbors[1]+1)).x,
|
||||
texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*boundaryEdgeNeighbors[1]+2)).x) +
|
||||
alpha_0k * readVertex(boundaryEdgeNeighbors[0]) +
|
||||
alpha_0k * readVertex(boundaryEdgeNeighbors[1]) +
|
||||
beta_0 * diagonal;
|
||||
|
||||
for (uint x=1; x<valence - 1; ++x) {
|
||||
@ -238,17 +216,11 @@ void main()
|
||||
int idx_neighbor = texelFetch(OsdValenceBuffer, int((vID) * (2*OSD_MAX_VALENCE+1) + 2*curri + 0 + 1)).x;
|
||||
idx_neighbor = abs(idx_neighbor);
|
||||
|
||||
vec3 neighbor =
|
||||
vec3(texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*idx_neighbor)).x,
|
||||
texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*idx_neighbor+1)).x,
|
||||
texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*idx_neighbor+2)).x);
|
||||
vec3 neighbor = readVertex(idx_neighbor);
|
||||
|
||||
idx_diagonal = texelFetch(OsdValenceBuffer, int((vID) * (2*OSD_MAX_VALENCE+1) + 2*curri + 1 + 1)).x;
|
||||
|
||||
diagonal =
|
||||
vec3(texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*idx_diagonal)).x,
|
||||
texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*idx_diagonal+1)).x,
|
||||
texelFetch(OsdVertexBuffer, int(OSD_NUM_ELEMENTS*idx_diagonal+2)).x);
|
||||
diagonal = readVertex(idx_diagonal);
|
||||
|
||||
outpt.v.e1 += alpha * neighbor + beta * diagonal;
|
||||
}
|
||||
|
@ -35,9 +35,7 @@ namespace OpenSubdiv {
|
||||
namespace OPENSUBDIV_VERSION {
|
||||
|
||||
OsdGLSLTransformFeedbackComputeController::OsdGLSLTransformFeedbackComputeController() :
|
||||
_vertexTexture(0), _varyingTexture(0),
|
||||
_currentVertexBuffer(0), _currentVaryingBuffer(0),
|
||||
_currentKernelBundle(NULL) {
|
||||
_vertexTexture(0), _varyingTexture(0), _vao(0) {
|
||||
}
|
||||
|
||||
OsdGLSLTransformFeedbackComputeController::~OsdGLSLTransformFeedbackComputeController() {
|
||||
@ -58,19 +56,23 @@ OsdGLSLTransformFeedbackComputeController::Synchronize() {
|
||||
}
|
||||
|
||||
OsdGLSLTransformFeedbackKernelBundle *
|
||||
OsdGLSLTransformFeedbackComputeController::getKernels(int numVertexElements,
|
||||
int numVaryingElements) {
|
||||
OsdGLSLTransformFeedbackComputeController::getKernels(
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
bool interleaved) {
|
||||
|
||||
std::vector<OsdGLSLTransformFeedbackKernelBundle*>::iterator it =
|
||||
std::find_if(_kernelRegistry.begin(), _kernelRegistry.end(),
|
||||
OsdGLSLTransformFeedbackKernelBundle::Match(numVertexElements,
|
||||
numVaryingElements));
|
||||
OsdGLSLTransformFeedbackKernelBundle::Match(
|
||||
vertexDesc, varyingDesc, interleaved));
|
||||
|
||||
if (it != _kernelRegistry.end()) {
|
||||
return *it;
|
||||
} else {
|
||||
OsdGLSLTransformFeedbackKernelBundle *kernelBundle = new OsdGLSLTransformFeedbackKernelBundle();
|
||||
OsdGLSLTransformFeedbackKernelBundle *kernelBundle =
|
||||
new OsdGLSLTransformFeedbackKernelBundle();
|
||||
_kernelRegistry.push_back(kernelBundle);
|
||||
kernelBundle->Compile(numVertexElements, numVaryingElements);
|
||||
kernelBundle->Compile(vertexDesc, varyingDesc, interleaved);
|
||||
return kernelBundle;
|
||||
}
|
||||
}
|
||||
@ -86,54 +88,59 @@ bindTexture(GLint samplerUniform, GLuint texture, int unit) {
|
||||
}
|
||||
|
||||
void
|
||||
OsdGLSLTransformFeedbackComputeController::bindTextures() {
|
||||
OsdGLSLTransformFeedbackComputeController::bindResources() {
|
||||
|
||||
glEnable(GL_RASTERIZER_DISCARD);
|
||||
_currentKernelBundle->UseProgram();
|
||||
_currentBindState.kernelBundle->UseProgram(_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset);
|
||||
|
||||
// bind vertex texture
|
||||
if (_currentVertexBuffer) {
|
||||
if (_currentBindState.vertexBuffer) {
|
||||
if (not _vertexTexture) glGenTextures(1, &_vertexTexture);
|
||||
#if defined(GL_EXT_direct_state_access)
|
||||
if (glTextureBufferEXT) {
|
||||
glTextureBufferEXT(_vertexTexture, GL_TEXTURE_BUFFER, GL_R32F, _currentVertexBuffer);
|
||||
glTextureBufferEXT(_vertexTexture, GL_TEXTURE_BUFFER, GL_R32F, _currentBindState.vertexBuffer);
|
||||
} else {
|
||||
#else
|
||||
{
|
||||
#endif
|
||||
glBindTexture(GL_TEXTURE_BUFFER, _vertexTexture);
|
||||
glTexBuffer(GL_TEXTURE_BUFFER, GL_R32F, _currentVertexBuffer);
|
||||
glTexBuffer(GL_TEXTURE_BUFFER, GL_R32F, _currentBindState.vertexBuffer);
|
||||
glBindTexture(GL_TEXTURE_BUFFER, 0);
|
||||
}
|
||||
}
|
||||
|
||||
if (_currentVaryingBuffer) {
|
||||
if (_currentBindState.varyingBuffer) {
|
||||
if (not _varyingTexture) glGenTextures(1, &_varyingTexture);
|
||||
#if defined(GL_EXT_direct_state_access)
|
||||
if (glTextureBufferEXT) {
|
||||
glTextureBufferEXT(_varyingTexture, GL_TEXTURE_BUFFER, GL_R32F, _currentVaryingBuffer);
|
||||
glTextureBufferEXT(_varyingTexture, GL_TEXTURE_BUFFER, GL_R32F, _currentBindState.varyingBuffer);
|
||||
} else {
|
||||
#else
|
||||
{
|
||||
#endif
|
||||
glBindTexture(GL_TEXTURE_BUFFER, _varyingTexture);
|
||||
glTexBuffer(GL_TEXTURE_BUFFER, GL_R32F, _currentVaryingBuffer);
|
||||
glTexBuffer(GL_TEXTURE_BUFFER, GL_R32F, _currentBindState.varyingBuffer);
|
||||
glBindTexture(GL_TEXTURE_BUFFER, 0);
|
||||
}
|
||||
}
|
||||
|
||||
if (_vertexTexture)
|
||||
bindTexture(_currentKernelBundle->GetVertexUniformLocation(), _vertexTexture, 0);
|
||||
bindTexture(_currentBindState.kernelBundle->GetVertexUniformLocation(), _vertexTexture, 0);
|
||||
if (_varyingTexture)
|
||||
bindTexture(_currentKernelBundle->GetVaryingUniformLocation(), _varyingTexture, 1);
|
||||
bindTexture(_currentBindState.kernelBundle->GetVaryingUniformLocation(), _varyingTexture, 1);
|
||||
|
||||
// bind vertex texture image (for edit kernel)
|
||||
glUniform1i(_currentKernelBundle->GetVertexBufferImageUniformLocation(), 0);
|
||||
glUniform1i(_currentBindState.kernelBundle->GetVertexBufferImageUniformLocation(), 0);
|
||||
glBindImageTexture(0, _vertexTexture, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_R32F);
|
||||
|
||||
// bind vertex array
|
||||
// always create new one, to be safe with multiple contexts.
|
||||
glGenVertexArrays(1, &_vao);
|
||||
glBindVertexArray(_vao);
|
||||
}
|
||||
|
||||
void
|
||||
OsdGLSLTransformFeedbackComputeController::unbindTextures() {
|
||||
OsdGLSLTransformFeedbackComputeController::unbindResources() {
|
||||
|
||||
glActiveTexture(GL_TEXTURE0);
|
||||
glBindTexture(GL_TEXTURE_BUFFER, 0);
|
||||
@ -146,6 +153,10 @@ OsdGLSLTransformFeedbackComputeController::unbindTextures() {
|
||||
glDisable(GL_RASTERIZER_DISCARD);
|
||||
glUseProgram(0);
|
||||
glActiveTexture(GL_TEXTURE0);
|
||||
|
||||
// unbind vertex array
|
||||
glBindVertexArray(0);
|
||||
glDeleteVertexArrays(1, &_vao);
|
||||
}
|
||||
|
||||
void
|
||||
@ -154,9 +165,9 @@ OsdGLSLTransformFeedbackComputeController::ApplyBilinearFaceVerticesKernel(
|
||||
|
||||
assert(context);
|
||||
|
||||
_currentKernelBundle->ApplyBilinearFaceVerticesKernel(
|
||||
_currentVertexBuffer, _vdesc.numVertexElements,
|
||||
_currentVaryingBuffer, _vdesc.numVaryingElements,
|
||||
_currentBindState.kernelBundle->ApplyBilinearFaceVerticesKernel(
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset,
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
}
|
||||
|
||||
@ -166,9 +177,9 @@ OsdGLSLTransformFeedbackComputeController::ApplyBilinearEdgeVerticesKernel(
|
||||
|
||||
assert(context);
|
||||
|
||||
_currentKernelBundle->ApplyBilinearEdgeVerticesKernel(
|
||||
_currentVertexBuffer, _vdesc.numVertexElements,
|
||||
_currentVaryingBuffer, _vdesc.numVaryingElements,
|
||||
_currentBindState.kernelBundle->ApplyBilinearEdgeVerticesKernel(
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset,
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
}
|
||||
|
||||
@ -178,9 +189,9 @@ OsdGLSLTransformFeedbackComputeController::ApplyBilinearVertexVerticesKernel(
|
||||
|
||||
assert(context);
|
||||
|
||||
_currentKernelBundle->ApplyBilinearVertexVerticesKernel(
|
||||
_currentVertexBuffer, _vdesc.numVertexElements,
|
||||
_currentVaryingBuffer, _vdesc.numVaryingElements,
|
||||
_currentBindState.kernelBundle->ApplyBilinearVertexVerticesKernel(
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset,
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
}
|
||||
|
||||
@ -190,9 +201,9 @@ OsdGLSLTransformFeedbackComputeController::ApplyCatmarkFaceVerticesKernel(
|
||||
|
||||
assert(context);
|
||||
|
||||
_currentKernelBundle->ApplyCatmarkFaceVerticesKernel(
|
||||
_currentVertexBuffer, _vdesc.numVertexElements,
|
||||
_currentVaryingBuffer, _vdesc.numVaryingElements,
|
||||
_currentBindState.kernelBundle->ApplyCatmarkFaceVerticesKernel(
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset,
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
}
|
||||
|
||||
@ -204,9 +215,9 @@ OsdGLSLTransformFeedbackComputeController::ApplyCatmarkEdgeVerticesKernel(
|
||||
|
||||
assert(context);
|
||||
|
||||
_currentKernelBundle->ApplyCatmarkEdgeVerticesKernel(
|
||||
_currentVertexBuffer, _vdesc.numVertexElements,
|
||||
_currentVaryingBuffer, _vdesc.numVaryingElements,
|
||||
_currentBindState.kernelBundle->ApplyCatmarkEdgeVerticesKernel(
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset,
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
}
|
||||
|
||||
@ -216,9 +227,9 @@ OsdGLSLTransformFeedbackComputeController::ApplyCatmarkVertexVerticesKernelB(
|
||||
|
||||
assert(context);
|
||||
|
||||
_currentKernelBundle->ApplyCatmarkVertexVerticesKernelB(
|
||||
_currentVertexBuffer, _vdesc.numVertexElements,
|
||||
_currentVaryingBuffer, _vdesc.numVaryingElements,
|
||||
_currentBindState.kernelBundle->ApplyCatmarkVertexVerticesKernelB(
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset,
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
}
|
||||
|
||||
@ -228,9 +239,9 @@ OsdGLSLTransformFeedbackComputeController::ApplyCatmarkVertexVerticesKernelA1(
|
||||
|
||||
assert(context);
|
||||
|
||||
_currentKernelBundle->ApplyCatmarkVertexVerticesKernelA(
|
||||
_currentVertexBuffer, _vdesc.numVertexElements,
|
||||
_currentVaryingBuffer, _vdesc.numVaryingElements,
|
||||
_currentBindState.kernelBundle->ApplyCatmarkVertexVerticesKernelA(
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset,
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(), false);
|
||||
}
|
||||
|
||||
@ -240,9 +251,9 @@ OsdGLSLTransformFeedbackComputeController::ApplyCatmarkVertexVerticesKernelA2(
|
||||
|
||||
assert(context);
|
||||
|
||||
_currentKernelBundle->ApplyCatmarkVertexVerticesKernelA(
|
||||
_currentVertexBuffer, _vdesc.numVertexElements,
|
||||
_currentVaryingBuffer, _vdesc.numVaryingElements,
|
||||
_currentBindState.kernelBundle->ApplyCatmarkVertexVerticesKernelA(
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset,
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(), true);
|
||||
}
|
||||
|
||||
@ -252,9 +263,9 @@ OsdGLSLTransformFeedbackComputeController::ApplyLoopEdgeVerticesKernel(
|
||||
|
||||
assert(context);
|
||||
|
||||
_currentKernelBundle->ApplyLoopEdgeVerticesKernel(
|
||||
_currentVertexBuffer, _vdesc.numVertexElements,
|
||||
_currentVaryingBuffer, _vdesc.numVaryingElements,
|
||||
_currentBindState.kernelBundle->ApplyLoopEdgeVerticesKernel(
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset,
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
}
|
||||
|
||||
@ -264,9 +275,9 @@ OsdGLSLTransformFeedbackComputeController::ApplyLoopVertexVerticesKernelB(
|
||||
|
||||
assert(context);
|
||||
|
||||
_currentKernelBundle->ApplyLoopVertexVerticesKernelB(
|
||||
_currentVertexBuffer, _vdesc.numVertexElements,
|
||||
_currentVaryingBuffer, _vdesc.numVaryingElements,
|
||||
_currentBindState.kernelBundle->ApplyLoopVertexVerticesKernelB(
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset,
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
}
|
||||
|
||||
@ -276,9 +287,9 @@ OsdGLSLTransformFeedbackComputeController::ApplyLoopVertexVerticesKernelA1(
|
||||
|
||||
assert(context);
|
||||
|
||||
_currentKernelBundle->ApplyLoopVertexVerticesKernelA(
|
||||
_currentVertexBuffer, _vdesc.numVertexElements,
|
||||
_currentVaryingBuffer, _vdesc.numVaryingElements,
|
||||
_currentBindState.kernelBundle->ApplyLoopVertexVerticesKernelA(
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset,
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(), false);
|
||||
}
|
||||
|
||||
@ -288,9 +299,9 @@ OsdGLSLTransformFeedbackComputeController::ApplyLoopVertexVerticesKernelA2(
|
||||
|
||||
assert(context);
|
||||
|
||||
_currentKernelBundle->ApplyLoopVertexVerticesKernelA(
|
||||
_currentVertexBuffer, _vdesc.numVertexElements,
|
||||
_currentVaryingBuffer, _vdesc.numVaryingElements,
|
||||
_currentBindState.kernelBundle->ApplyLoopVertexVerticesKernelA(
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset,
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(), true);
|
||||
}
|
||||
|
||||
@ -303,15 +314,15 @@ OsdGLSLTransformFeedbackComputeController::ApplyVertexEdits(
|
||||
const OsdGLSLTransformFeedbackHEditTable * edit = context->GetEditTable(batch.GetTableIndex());
|
||||
assert(edit);
|
||||
|
||||
context->BindEditTextures(batch.GetTableIndex(), _currentKernelBundle);
|
||||
context->BindEditTextures(batch.GetTableIndex(), _currentBindState.kernelBundle);
|
||||
|
||||
int primvarOffset = edit->GetPrimvarOffset();
|
||||
int primvarWidth = edit->GetPrimvarWidth();
|
||||
|
||||
if (edit->GetOperation() == FarVertexEdit::Add) {
|
||||
_currentKernelBundle->ApplyEditAdd(
|
||||
_currentVertexBuffer, _vdesc.numVertexElements,
|
||||
_currentVaryingBuffer, _vdesc.numVaryingElements,
|
||||
_currentBindState.kernelBundle->ApplyEditAdd(
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset,
|
||||
primvarOffset, primvarWidth,
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
} else {
|
||||
|
@ -29,6 +29,7 @@
|
||||
|
||||
#include "../far/dispatcher.h"
|
||||
#include "../osd/glslTransformFeedbackComputeContext.h"
|
||||
#include "../osd/vertexDescriptor.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
@ -69,16 +70,26 @@ public:
|
||||
///
|
||||
/// @param varyingBuffer varying-interpolated data buffer
|
||||
///
|
||||
/// @param vertexDesc the descriptor of vertex elements to be refined.
|
||||
/// if it's null, all primvars in the vertex buffer
|
||||
/// will be refined.
|
||||
///
|
||||
/// @param varyingDesc the descriptor of varying elements to be refined.
|
||||
/// if it's null, all primvars in the varying buffer
|
||||
/// will be refined.
|
||||
///
|
||||
template<class VERTEX_BUFFER, class VARYING_BUFFER>
|
||||
void Refine(OsdGLSLTransformFeedbackComputeContext const *context,
|
||||
FarKernelBatchVector const &batches,
|
||||
VERTEX_BUFFER *vertexBuffer,
|
||||
VARYING_BUFFER *varyingBuffer) {
|
||||
VARYING_BUFFER *varyingBuffer,
|
||||
OsdVertexBufferDescriptor const *vertexDesc=NULL,
|
||||
OsdVertexBufferDescriptor const *varyingDesc=NULL) {
|
||||
|
||||
if (batches.empty()) return;
|
||||
|
||||
bind(vertexBuffer, varyingBuffer);
|
||||
context->BindTableTextures(_currentKernelBundle);
|
||||
bind(vertexBuffer, varyingBuffer, vertexDesc, varyingDesc);
|
||||
context->BindTableTextures(_currentBindState.kernelBundle);
|
||||
|
||||
FarDispatcher::Refine(this, context, batches, /*maxlevel*/-1);
|
||||
|
||||
@ -136,47 +147,73 @@ protected:
|
||||
|
||||
void ApplyVertexEdits(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
OsdGLSLTransformFeedbackKernelBundle * getKernels(int numVertexElements,
|
||||
int numVaryingElements);
|
||||
OsdGLSLTransformFeedbackKernelBundle * getKernels(
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
bool interleaved);
|
||||
|
||||
void bindTextures();
|
||||
void bindResources();
|
||||
|
||||
void unbindTextures();
|
||||
void unbindResources();
|
||||
|
||||
template<class VERTEX_BUFFER, class VARYING_BUFFER>
|
||||
void bind(VERTEX_BUFFER *vertex, VARYING_BUFFER *varying) {
|
||||
void bind(VERTEX_BUFFER *vertex, VARYING_BUFFER *varying,
|
||||
OsdVertexBufferDescriptor const *vertexDesc,
|
||||
OsdVertexBufferDescriptor const *varyingDesc) {
|
||||
|
||||
_currentVertexBuffer = vertex ? vertex->BindVBO() : 0;
|
||||
_currentVaryingBuffer = varying ? varying->BindVBO() : 0;
|
||||
// if the vertex buffer descriptor is specified, use it.
|
||||
// otherwise, assumes the data is tightly packed in the vertex buffer.
|
||||
if (vertexDesc) {
|
||||
_currentBindState.vertexDesc = *vertexDesc;
|
||||
} else {
|
||||
int numElements = vertex ? vertex->GetNumElements() : 0;
|
||||
_currentBindState.vertexDesc = OsdVertexBufferDescriptor(
|
||||
0, numElements, numElements);
|
||||
}
|
||||
if (varyingDesc) {
|
||||
_currentBindState.varyingDesc = *varyingDesc;
|
||||
} else {
|
||||
int numElements = varying ? varying->GetNumElements() : 0;
|
||||
_currentBindState.varyingDesc = OsdVertexBufferDescriptor(
|
||||
0, numElements, numElements);
|
||||
}
|
||||
|
||||
_vdesc.numVertexElements = vertex ? vertex->GetNumElements() : 0;
|
||||
_vdesc.numVaryingElements = varying ? varying->GetNumElements() : 0;
|
||||
bool interleaved = (vertex and varying and (vertex == varying));
|
||||
_currentBindState.vertexBuffer = vertex ? vertex->BindVBO() : 0;
|
||||
_currentBindState.varyingBuffer = varying ? varying->BindVBO() : 0;
|
||||
_currentBindState.kernelBundle = getKernels(_currentBindState.vertexDesc,
|
||||
_currentBindState.varyingDesc,
|
||||
interleaved);
|
||||
|
||||
_currentKernelBundle =
|
||||
getKernels(_vdesc.numVertexElements, _vdesc.numVaryingElements);
|
||||
|
||||
bindTextures();
|
||||
bindResources();
|
||||
}
|
||||
|
||||
/// Unbinds any previously bound vertex and varying data buffers.
|
||||
void unbind() {
|
||||
_currentVertexBuffer = 0;
|
||||
_currentVaryingBuffer = 0;
|
||||
_currentKernelBundle = NULL;
|
||||
_currentBindState.Reset();
|
||||
|
||||
unbindTextures();
|
||||
unbindResources();
|
||||
}
|
||||
|
||||
private:
|
||||
struct BindState {
|
||||
BindState() : vertexBuffer(0), varyingBuffer(0), kernelBundle(NULL) {}
|
||||
void Reset() {
|
||||
vertexBuffer = varyingBuffer = 0;
|
||||
vertexDesc.Reset();
|
||||
varyingDesc.Reset();
|
||||
}
|
||||
GLuint vertexBuffer;
|
||||
GLuint varyingBuffer;
|
||||
OsdVertexBufferDescriptor vertexDesc;
|
||||
OsdVertexBufferDescriptor varyingDesc;
|
||||
OsdGLSLTransformFeedbackKernelBundle *kernelBundle;
|
||||
};
|
||||
BindState _currentBindState;
|
||||
|
||||
std::vector<OsdGLSLTransformFeedbackKernelBundle *> _kernelRegistry;
|
||||
|
||||
GLuint _vertexTexture, _varyingTexture;
|
||||
GLuint _currentVertexBuffer, _currentVaryingBuffer;
|
||||
|
||||
OsdVertexDescriptor _vdesc;
|
||||
|
||||
OsdGLSLTransformFeedbackKernelBundle * _currentKernelBundle;
|
||||
|
||||
GLuint _vao;
|
||||
};
|
||||
|
||||
} // end namespace OPENSUBDIV_VERSION
|
||||
|
@ -41,6 +41,8 @@ layout(size1x32) uniform imageBuffer _vertexBufferImage;
|
||||
uniform int vertexOffset = 0; // vertex index offset for the batch
|
||||
uniform int tableOffset = 0; // offset of subdivision table
|
||||
uniform int indexStart = 0; // start index relative to tableOffset
|
||||
uniform int vertexBaseOffset = 0; // base vbo offset of the vertex buffer
|
||||
uniform int varyingBaseOffset = 0; // base vbo offset of the varying buffer
|
||||
uniform bool vertexPass;
|
||||
|
||||
/*
|
||||
@ -50,6 +52,12 @@ uniform bool vertexPass;
|
||||
^ ^
|
||||
vertexOffset |
|
||||
indexStart
|
||||
|
||||
|
||||
NUM_VERTEX_ELEMENTS = 3
|
||||
NUM_VARYING_ELEMENTS = 4
|
||||
VERTEX_STRIDE = VARYING_STRIDE = 7
|
||||
|
||||
*/
|
||||
|
||||
//--------------------------------------------------------------------------------
|
||||
@ -100,13 +108,15 @@ Vertex readVertex(int index)
|
||||
|
||||
// unpacking
|
||||
#if NUM_VERTEX_ELEMENTS > 0
|
||||
int vertexIndex = index * VERTEX_STRIDE;
|
||||
for(int i = 0; i < NUM_VERTEX_ELEMENTS; i++) {
|
||||
v.vertexData[i] = texelFetch(vertexData, index*NUM_VERTEX_ELEMENTS+i).x;
|
||||
v.vertexData[i] = texelFetch(vertexData, vertexIndex+i+vertexBaseOffset).x;
|
||||
}
|
||||
#endif
|
||||
#if NUM_VARYING_ELEMENTS > 0
|
||||
int varyingIndex = index * VARYING_STRIDE;
|
||||
for(int i = 0; i < NUM_VARYING_ELEMENTS; i++){
|
||||
v.varyingData[i] = texelFetch(varyingData, index*NUM_VARYING_ELEMENTS+i).x;
|
||||
v.varyingData[i] = texelFetch(varyingData, varyingIndex+i+varyingBaseOffset).x;
|
||||
}
|
||||
#endif
|
||||
return v;
|
||||
@ -130,7 +140,7 @@ void writeVertex(Vertex v)
|
||||
void writeVertexByImageStore(Vertex v, int index)
|
||||
{
|
||||
#if NUM_VERTEX_ELEMENTS > 0
|
||||
int p = index * NUM_VERTEX_ELEMENTS;
|
||||
int p = index * VERTEX_STRIDE + vertexBaseOffset;
|
||||
for(int i = 0; i < NUM_VERTEX_ELEMENTS; i++) {
|
||||
imageStore(_vertexBufferImage, p+i, vec4(v.vertexData[i], 0, 0, 0));
|
||||
}
|
||||
|
@ -40,6 +40,7 @@
|
||||
|
||||
#include <cassert>
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
|
||||
namespace OpenSubdiv {
|
||||
namespace OPENSUBDIV_VERSION {
|
||||
@ -61,7 +62,12 @@ static const char *shaderDefines = ""
|
||||
;
|
||||
|
||||
OsdGLSLTransformFeedbackKernelBundle::OsdGLSLTransformFeedbackKernelBundle()
|
||||
: _program(0) {
|
||||
: _program(0),
|
||||
_numVertexElements(0),
|
||||
_vertexStride(0),
|
||||
_numVaryingElements(0),
|
||||
_varyingStride(0),
|
||||
_interleaved(false) {
|
||||
}
|
||||
|
||||
OsdGLSLTransformFeedbackKernelBundle::~OsdGLSLTransformFeedbackKernelBundle() {
|
||||
@ -70,24 +76,34 @@ OsdGLSLTransformFeedbackKernelBundle::~OsdGLSLTransformFeedbackKernelBundle() {
|
||||
}
|
||||
|
||||
bool
|
||||
OsdGLSLTransformFeedbackKernelBundle::Compile(int numVertexElements, int numVaryingElements) {
|
||||
OsdGLSLTransformFeedbackKernelBundle::Compile(
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
bool interleaved) {
|
||||
|
||||
assert(numVertexElements >= 3); // at least xyz required (for performance reason)
|
||||
_numVertexElements = vertexDesc.length;
|
||||
_vertexStride = vertexDesc.stride;
|
||||
_numVaryingElements = varyingDesc.length;
|
||||
_varyingStride = varyingDesc.stride;
|
||||
_interleaved = interleaved;
|
||||
|
||||
// modulo of vbo offset
|
||||
_vertexOffsetMod = (_vertexStride ? vertexDesc.offset % _vertexStride : 0);
|
||||
_varyingOffsetMod = (_varyingStride ? varyingDesc.offset % _varyingStride : 0);
|
||||
|
||||
_vdesc.Set(numVertexElements, numVaryingElements);
|
||||
|
||||
_program = glCreateProgram();
|
||||
|
||||
GLuint shader = glCreateShader(GL_VERTEX_SHADER);
|
||||
|
||||
char constantDefine[256];
|
||||
snprintf(constantDefine, 256,
|
||||
"#define NUM_VERTEX_ELEMENTS %d\n"
|
||||
"#define NUM_VARYING_ELEMENTS %d\n",
|
||||
numVertexElements, numVaryingElements);
|
||||
std::ostringstream defines;
|
||||
defines << "#define NUM_VERTEX_ELEMENTS " << _numVertexElements << "\n"
|
||||
<< "#define VERTEX_STRIDE " << _vertexStride << "\n"
|
||||
<< "#define NUM_VARYING_ELEMENTS " << _numVaryingElements << "\n"
|
||||
<< "#define VARYING_STRIDE " << _varyingStride << "\n";
|
||||
std::string defineStr = defines.str();
|
||||
|
||||
const char *shaderSources[3];
|
||||
shaderSources[0] = constantDefine;
|
||||
shaderSources[0] = defineStr.c_str();
|
||||
shaderSources[1] = shaderDefines;
|
||||
shaderSources[2] = shaderSource;
|
||||
glShaderSource(shader, 3, shaderSources, NULL);
|
||||
@ -96,21 +112,85 @@ OsdGLSLTransformFeedbackKernelBundle::Compile(int numVertexElements, int numVary
|
||||
|
||||
std::vector<std::string> outputs;
|
||||
|
||||
// position and custom vertex data are stored same buffer whereas varying data
|
||||
// exists on another buffer. "gl_NextBuffer" identifier helps to split them.
|
||||
for (int i = 0; i < numVertexElements; ++i) {
|
||||
/*
|
||||
output attribute array
|
||||
|
||||
- interleaved
|
||||
outVertexData[0]
|
||||
outVertexData[1]
|
||||
outVertexData[2]
|
||||
(gl_SkipComponents1)
|
||||
outVaryingData[0]
|
||||
outVaryingData[1]
|
||||
outVaryingData[2]
|
||||
outVaryingData[3]
|
||||
(gl_SkipComponents1)
|
||||
...
|
||||
|
||||
|
||||
- non-interleaved
|
||||
outVertexData[0]
|
||||
outVertexData[1]
|
||||
outVertexData[2]
|
||||
gl_NextBuffer
|
||||
outVaryingData[0]
|
||||
outVaryingData[1]
|
||||
outVaryingData[2]
|
||||
outVaryingData[3]
|
||||
|
||||
*/
|
||||
|
||||
if (_interleaved) {
|
||||
assert(_vertexStride == _varyingStride);
|
||||
assert(_numVertexElements + _numVaryingElements <= _vertexStride);
|
||||
char attrName[32];
|
||||
snprintf(attrName, 32, "outVertexData[%d]", i);
|
||||
outputs.push_back(attrName);
|
||||
}
|
||||
for (int i = 0; i < numVaryingElements; ++i) {
|
||||
if (i == 0 and (not outputs.empty())) {
|
||||
|
||||
for (int i = 0; i < _vertexStride; ++i) {
|
||||
int vertexElem = i - _vertexOffsetMod;
|
||||
int varyingElem = i - _varyingOffsetMod;
|
||||
|
||||
if (vertexElem >= 0 and vertexElem < _numVertexElements) {
|
||||
snprintf(attrName, 32, "outVertexData[%d]", vertexElem);
|
||||
outputs.push_back(attrName);
|
||||
} else if (varyingElem >= 0 and varyingElem <= _numVaryingElements) {
|
||||
snprintf(attrName, 32, "outVaryingData[%d]", varyingElem);
|
||||
outputs.push_back(attrName);
|
||||
} else {
|
||||
outputs.push_back("gl_SkipComponents1");
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// non-interleaved
|
||||
char attrName[32];
|
||||
|
||||
// vertex data (may include custom vertex data) and varying data
|
||||
// are stored into the same buffer, interleaved.
|
||||
for (int i = 0; i < _vertexOffsetMod; ++i)
|
||||
outputs.push_back("gl_SkipComponents1");
|
||||
for (int i = 0; i < _numVertexElements; ++i) {
|
||||
snprintf(attrName, 32, "outVertexData[%d]", i);
|
||||
outputs.push_back(attrName);
|
||||
}
|
||||
for (int i = _numVertexElements + _vertexOffsetMod; i < _vertexStride; ++i)
|
||||
outputs.push_back("gl_SkipComponents1");
|
||||
|
||||
// varying
|
||||
if (_numVaryingElements) {
|
||||
outputs.push_back("gl_NextBuffer");
|
||||
}
|
||||
char attrName[32];
|
||||
snprintf(attrName, 32, "outVaryingData[%d]", i);
|
||||
outputs.push_back(attrName);
|
||||
for (int i = 0; i < _varyingOffsetMod; ++i) {
|
||||
outputs.push_back("gl_SkipComponents1");
|
||||
}
|
||||
for (int i = 0; i < _numVaryingElements; ++i) {
|
||||
snprintf(attrName, 32, "outVaryingData[%d]", i);
|
||||
outputs.push_back(attrName);
|
||||
}
|
||||
for (int i = _numVaryingElements + _varyingOffsetMod; i < _varyingStride; ++i) {
|
||||
outputs.push_back("gl_SkipComponents1");
|
||||
}
|
||||
}
|
||||
|
||||
// convert to char* array
|
||||
std::vector<const char *> pOutputs;
|
||||
for (size_t i = 0; i < outputs.size(); ++i) {
|
||||
pOutputs.push_back(&outputs[i][0]);
|
||||
@ -156,6 +236,8 @@ OsdGLSLTransformFeedbackKernelBundle::Compile(int numVertexElements, int numVary
|
||||
_uniformVertexOffset = glGetUniformLocation(_program, "vertexOffset");
|
||||
_uniformTableOffset = glGetUniformLocation(_program, "tableOffset");
|
||||
_uniformIndexStart = glGetUniformLocation(_program, "indexStart");
|
||||
_uniformVertexBaseOffset = glGetUniformLocation(_program, "vertexBaseOffset");
|
||||
_uniformVaryingBaseOffset = glGetUniformLocation(_program, "varyingBaseOffset");
|
||||
|
||||
_uniformTables[FarSubdivisionTables::F_IT] = glGetUniformLocation(_program, "_F0_IT");
|
||||
_uniformTables[FarSubdivisionTables::F_ITa] = glGetUniformLocation(_program, "_F0_ITa");
|
||||
@ -181,32 +263,44 @@ OsdGLSLTransformFeedbackKernelBundle::Compile(int numVertexElements, int numVary
|
||||
|
||||
void
|
||||
OsdGLSLTransformFeedbackKernelBundle::transformGpuBufferData(
|
||||
GLuint vertexBuffer, int numVertexElements,
|
||||
GLuint varyingBuffer, int numVaryingElements,
|
||||
int vertexOffset, int tableOffset, int start, int end) const {
|
||||
GLuint vertexBuffer, GLuint varyingBuffer,
|
||||
int vertexOffset, int varyingOffset,
|
||||
int offset, int tableOffset, int start, int end) const {
|
||||
|
||||
int count = end - start;
|
||||
if (count <= 0) return;
|
||||
|
||||
// set batch range
|
||||
glUniform1i(_uniformIndexStart, start);
|
||||
glUniform1i(_uniformVertexOffset, vertexOffset);
|
||||
glUniform1i(_uniformVertexOffset, offset);
|
||||
glUniform1i(_uniformTableOffset, tableOffset);
|
||||
|
||||
// XXX: end is not used here now
|
||||
OSD_DEBUG_CHECK_GL_ERROR("Uniform index set at offset=%d. start=%d\n",
|
||||
vertexOffset, start);
|
||||
offset, start);
|
||||
|
||||
int vertexOrigin = vertexOffset - _vertexOffsetMod;
|
||||
int varyingOrigin = varyingOffset - _varyingOffsetMod;
|
||||
|
||||
// set transform feedback buffer
|
||||
if (vertexBuffer) {
|
||||
int vertexStride = numVertexElements*sizeof(float);
|
||||
if (_interleaved) {
|
||||
int vertexStride = _vertexStride*sizeof(float);
|
||||
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, 0, vertexBuffer,
|
||||
(start + vertexOffset)*vertexStride, count*vertexStride);
|
||||
}
|
||||
|
||||
if (varyingBuffer){
|
||||
int varyingStride = numVaryingElements*sizeof(float);
|
||||
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, 1, varyingBuffer,
|
||||
(start + vertexOffset)*varyingStride, count*varyingStride);
|
||||
(start + offset)*vertexStride + vertexOrigin*sizeof(float),
|
||||
count*vertexStride);
|
||||
} else {
|
||||
if (vertexBuffer) {
|
||||
int vertexStride = _vertexStride*sizeof(float);
|
||||
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, 0, vertexBuffer,
|
||||
(start + offset)*vertexStride + vertexOrigin*sizeof(float),
|
||||
count*vertexStride);
|
||||
}
|
||||
if (varyingBuffer){
|
||||
int varyingStride = _varyingStride*sizeof(float);
|
||||
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, 1, varyingBuffer,
|
||||
(start + offset)*varyingStride + varyingOrigin*sizeof(float),
|
||||
count*varyingStride);
|
||||
}
|
||||
}
|
||||
|
||||
OSD_DEBUG_CHECK_GL_ERROR("transformGpuBufferData glBindBufferRange\n");
|
||||
@ -222,142 +316,138 @@ OsdGLSLTransformFeedbackKernelBundle::transformGpuBufferData(
|
||||
|
||||
glEndTransformFeedback();
|
||||
glBindBuffer(GL_TRANSFORM_FEEDBACK_BUFFER, 0);
|
||||
|
||||
GLsync sync = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
|
||||
glWaitSync(sync, 0, GL_TIMEOUT_IGNORED);
|
||||
glDeleteSync(sync);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
OsdGLSLTransformFeedbackKernelBundle::ApplyBilinearFaceVerticesKernel(
|
||||
GLuint vertexBuffer, int numVertexElements,
|
||||
GLuint varyingBuffer, int numVaryingElements,
|
||||
int vertexOffset, int tableOffset, int start, int end) {
|
||||
GLuint vertexBuffer, GLuint varyingBuffer,
|
||||
int vertexOffset, int varyingOffset,
|
||||
int offset, int tableOffset, int start, int end) {
|
||||
|
||||
glUniformSubroutinesuiv(GL_VERTEX_SHADER, 1, &_subComputeFace);
|
||||
transformGpuBufferData(vertexBuffer, numVertexElements,
|
||||
varyingBuffer, numVaryingElements,
|
||||
vertexOffset, tableOffset, start, end);
|
||||
transformGpuBufferData(vertexBuffer, varyingBuffer,
|
||||
vertexOffset, varyingOffset,
|
||||
offset, tableOffset, start, end);
|
||||
}
|
||||
|
||||
void
|
||||
OsdGLSLTransformFeedbackKernelBundle::ApplyBilinearEdgeVerticesKernel(
|
||||
GLuint vertexBuffer, int numVertexElements,
|
||||
GLuint varyingBuffer, int numVaryingElements,
|
||||
int vertexOffset, int tableOffset, int start, int end) {
|
||||
GLuint vertexBuffer, GLuint varyingBuffer,
|
||||
int vertexOffset, int varyingOffset,
|
||||
int offset, int tableOffset, int start, int end) {
|
||||
|
||||
glUniformSubroutinesuiv(GL_VERTEX_SHADER, 1, &_subComputeBilinearEdge);
|
||||
transformGpuBufferData(vertexBuffer, numVertexElements,
|
||||
varyingBuffer, numVaryingElements,
|
||||
vertexOffset, tableOffset, start, end);
|
||||
transformGpuBufferData(vertexBuffer, varyingBuffer,
|
||||
vertexOffset, varyingOffset,
|
||||
offset, tableOffset, start, end);
|
||||
}
|
||||
|
||||
void
|
||||
OsdGLSLTransformFeedbackKernelBundle::ApplyBilinearVertexVerticesKernel(
|
||||
GLuint vertexBuffer, int numVertexElements,
|
||||
GLuint varyingBuffer, int numVaryingElements,
|
||||
int vertexOffset, int tableOffset, int start, int end) {
|
||||
GLuint vertexBuffer, GLuint varyingBuffer,
|
||||
int vertexOffset, int varyingOffset,
|
||||
int offset, int tableOffset, int start, int end) {
|
||||
|
||||
glUniformSubroutinesuiv(GL_VERTEX_SHADER, 1, &_subComputeVertex);
|
||||
transformGpuBufferData(vertexBuffer, numVertexElements,
|
||||
varyingBuffer, numVaryingElements,
|
||||
vertexOffset, tableOffset, start, end);
|
||||
transformGpuBufferData(vertexBuffer, varyingBuffer,
|
||||
vertexOffset, varyingOffset,
|
||||
offset, tableOffset, start, end);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
OsdGLSLTransformFeedbackKernelBundle::ApplyCatmarkFaceVerticesKernel(
|
||||
GLuint vertexBuffer, int numVertexElements,
|
||||
GLuint varyingBuffer, int numVaryingElements,
|
||||
int vertexOffset, int tableOffset, int start, int end) {
|
||||
GLuint vertexBuffer, GLuint varyingBuffer,
|
||||
int vertexOffset, int varyingOffset,
|
||||
int offset, int tableOffset, int start, int end) {
|
||||
|
||||
glUniformSubroutinesuiv(GL_VERTEX_SHADER, 1, &_subComputeFace);
|
||||
transformGpuBufferData(vertexBuffer, numVertexElements,
|
||||
varyingBuffer, numVaryingElements,
|
||||
vertexOffset, tableOffset, start, end);
|
||||
transformGpuBufferData(vertexBuffer, varyingBuffer,
|
||||
vertexOffset, varyingOffset,
|
||||
offset, tableOffset, start, end);
|
||||
}
|
||||
|
||||
void
|
||||
OsdGLSLTransformFeedbackKernelBundle::ApplyCatmarkEdgeVerticesKernel(
|
||||
GLuint vertexBuffer, int numVertexElements,
|
||||
GLuint varyingBuffer, int numVaryingElements,
|
||||
int vertexOffset, int tableOffset, int start, int end) {
|
||||
GLuint vertexBuffer, GLuint varyingBuffer,
|
||||
int vertexOffset, int varyingOffset,
|
||||
int offset, int tableOffset, int start, int end) {
|
||||
|
||||
glUniformSubroutinesuiv(GL_VERTEX_SHADER, 1, &_subComputeEdge);
|
||||
transformGpuBufferData(vertexBuffer, numVertexElements,
|
||||
varyingBuffer, numVaryingElements,
|
||||
vertexOffset, tableOffset, start, end);
|
||||
transformGpuBufferData(vertexBuffer, varyingBuffer,
|
||||
vertexOffset, varyingOffset,
|
||||
offset, tableOffset, start, end);
|
||||
}
|
||||
|
||||
void
|
||||
OsdGLSLTransformFeedbackKernelBundle::ApplyCatmarkVertexVerticesKernelB(
|
||||
GLuint vertexBuffer, int numVertexElements,
|
||||
GLuint varyingBuffer, int numVaryingElements,
|
||||
int vertexOffset, int tableOffset, int start, int end) {
|
||||
GLuint vertexBuffer, GLuint varyingBuffer,
|
||||
int vertexOffset, int varyingOffset,
|
||||
int offset, int tableOffset, int start, int end) {
|
||||
|
||||
glUniformSubroutinesuiv(GL_VERTEX_SHADER, 1, &_subComputeCatmarkVertexB);
|
||||
transformGpuBufferData(vertexBuffer, numVertexElements,
|
||||
varyingBuffer, numVaryingElements,
|
||||
vertexOffset, tableOffset, start, end);
|
||||
transformGpuBufferData(vertexBuffer, varyingBuffer,
|
||||
vertexOffset, varyingOffset,
|
||||
offset, tableOffset, start, end);
|
||||
}
|
||||
|
||||
void
|
||||
OsdGLSLTransformFeedbackKernelBundle::ApplyCatmarkVertexVerticesKernelA(
|
||||
GLuint vertexBuffer, int numVertexElements,
|
||||
GLuint varyingBuffer, int numVaryingElements,
|
||||
int vertexOffset, int tableOffset, int start, int end, bool pass) {
|
||||
GLuint vertexBuffer, GLuint varyingBuffer,
|
||||
int vertexOffset, int varyingOffset,
|
||||
int offset, int tableOffset, int start, int end, bool pass) {
|
||||
|
||||
glUniformSubroutinesuiv(GL_VERTEX_SHADER, 1, &_subComputeVertexA);
|
||||
glUniform1i(_uniformVertexPass, pass ? 1 : 0);
|
||||
transformGpuBufferData(vertexBuffer, numVertexElements,
|
||||
varyingBuffer, numVaryingElements,
|
||||
vertexOffset, tableOffset, start, end);
|
||||
transformGpuBufferData(vertexBuffer, varyingBuffer,
|
||||
vertexOffset, varyingOffset,
|
||||
offset, tableOffset, start, end);
|
||||
}
|
||||
|
||||
void
|
||||
OsdGLSLTransformFeedbackKernelBundle::ApplyLoopEdgeVerticesKernel(
|
||||
GLuint vertexBuffer, int numVertexElements,
|
||||
GLuint varyingBuffer, int numVaryingElements,
|
||||
int vertexOffset, int tableOffset, int start, int end) {
|
||||
GLuint vertexBuffer, GLuint varyingBuffer,
|
||||
int vertexOffset, int varyingOffset,
|
||||
int offset, int tableOffset, int start, int end) {
|
||||
|
||||
glUniformSubroutinesuiv(GL_VERTEX_SHADER, 1, &_subComputeEdge);
|
||||
transformGpuBufferData(vertexBuffer, numVertexElements,
|
||||
varyingBuffer, numVaryingElements,
|
||||
vertexOffset, tableOffset, start, end);
|
||||
transformGpuBufferData(vertexBuffer, varyingBuffer,
|
||||
vertexOffset, varyingOffset,
|
||||
offset, tableOffset, start, end);
|
||||
}
|
||||
|
||||
void
|
||||
OsdGLSLTransformFeedbackKernelBundle::ApplyLoopVertexVerticesKernelB(
|
||||
GLuint vertexBuffer, int numVertexElements,
|
||||
GLuint varyingBuffer, int numVaryingElements,
|
||||
int vertexOffset, int tableOffset, int start, int end) {
|
||||
GLuint vertexBuffer, GLuint varyingBuffer,
|
||||
int vertexOffset, int varyingOffset,
|
||||
int offset, int tableOffset, int start, int end) {
|
||||
|
||||
glUniformSubroutinesuiv(GL_VERTEX_SHADER, 1, &_subComputeLoopVertexB);
|
||||
transformGpuBufferData(vertexBuffer, numVertexElements,
|
||||
varyingBuffer, numVaryingElements,
|
||||
vertexOffset, tableOffset, start, end);
|
||||
transformGpuBufferData(vertexBuffer, varyingBuffer,
|
||||
vertexOffset, varyingOffset,
|
||||
offset, tableOffset, start, end);
|
||||
}
|
||||
|
||||
void
|
||||
OsdGLSLTransformFeedbackKernelBundle::ApplyLoopVertexVerticesKernelA(
|
||||
GLuint vertexBuffer, int numVertexElements,
|
||||
GLuint varyingBuffer, int numVaryingElements,
|
||||
int vertexOffset, int tableOffset, int start, int end, bool pass) {
|
||||
GLuint vertexBuffer, GLuint varyingBuffer,
|
||||
int vertexOffset, int varyingOffset,
|
||||
int offset, int tableOffset, int start, int end, bool pass) {
|
||||
|
||||
glUniformSubroutinesuiv(GL_VERTEX_SHADER, 1, &_subComputeVertexA);
|
||||
glUniform1i(_uniformVertexPass, pass ? 1 : 0);
|
||||
transformGpuBufferData(vertexBuffer, numVertexElements,
|
||||
varyingBuffer, numVaryingElements,
|
||||
vertexOffset, tableOffset, start, end);
|
||||
transformGpuBufferData(vertexBuffer, varyingBuffer,
|
||||
vertexOffset, varyingOffset,
|
||||
offset, tableOffset, start, end);
|
||||
}
|
||||
|
||||
void
|
||||
OsdGLSLTransformFeedbackKernelBundle::ApplyEditAdd(
|
||||
GLuint vertexBuffer, int numVertexElements,
|
||||
GLuint varyingBuffer, int numVaryingElements,
|
||||
GLuint vertexBuffer, GLuint varyingBuffer,
|
||||
int vertexOffset, int varyingOffset,
|
||||
int primvarOffset, int primvarWidth,
|
||||
int vertexOffset, int tableOffset, int start, int end) {
|
||||
int offset, int tableOffset, int start, int end) {
|
||||
|
||||
if (end - start <= 0) return;
|
||||
glUniformSubroutinesuiv(GL_VERTEX_SHADER, 1, &_subEditAdd);
|
||||
@ -365,15 +455,19 @@ OsdGLSLTransformFeedbackKernelBundle::ApplyEditAdd(
|
||||
glUniform1i(_uniformEditPrimVarWidth, primvarWidth);
|
||||
|
||||
glUniform1i(_uniformIndexStart, start);
|
||||
glUniform1i(_uniformVertexOffset, vertexOffset);
|
||||
glUniform1i(_uniformVertexOffset, offset);
|
||||
glUniform1i(_uniformTableOffset, tableOffset);
|
||||
glDrawArrays(GL_POINTS, 0, end - start);
|
||||
}
|
||||
|
||||
void
|
||||
OsdGLSLTransformFeedbackKernelBundle::UseProgram() const
|
||||
OsdGLSLTransformFeedbackKernelBundle::UseProgram(int vertexBaseOffset,
|
||||
int varyingBaseOffset) const
|
||||
{
|
||||
glUseProgram(_program);
|
||||
|
||||
glUniform1i(_uniformVertexBaseOffset, vertexBaseOffset);
|
||||
glUniform1i(_uniformVaryingBaseOffset, varyingBaseOffset);
|
||||
}
|
||||
|
||||
|
||||
|
@ -44,65 +44,67 @@ public:
|
||||
|
||||
~OsdGLSLTransformFeedbackKernelBundle();
|
||||
|
||||
bool Compile(int numVertexElements, int numVaryingElements);
|
||||
bool Compile(OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
bool interleaved);
|
||||
|
||||
void ApplyBilinearFaceVerticesKernel(
|
||||
GLuint vertexBuffer, int numVertexElements,
|
||||
GLuint varyingBuffer, int numVaryingElements,
|
||||
int vertexOffset, int tableOffset, int start, int end);
|
||||
GLuint vertexBuffer, GLuint varyingBuffer,
|
||||
int vertexOffset, int varyingOffset,
|
||||
int offset, int tableOffset, int start, int end);
|
||||
|
||||
void ApplyBilinearEdgeVerticesKernel(
|
||||
GLuint vertexBuffer, int numVertexElements,
|
||||
GLuint varyingBuffer, int numVaryingElements,
|
||||
int vertexOffset, int tableOffset, int start, int end);
|
||||
GLuint vertexBuffer, GLuint varyingBuffer,
|
||||
int vertexOffset, int varyingOffset,
|
||||
int offset, int tableOffset, int start, int end);
|
||||
|
||||
void ApplyBilinearVertexVerticesKernel(
|
||||
GLuint vertexBuffer, int numVertexElements,
|
||||
GLuint varyingBuffer, int numVaryingElements,
|
||||
int vertexOffset, int tableOffset, int start, int end);
|
||||
GLuint vertexBuffer, GLuint varyingBuffer,
|
||||
int vertexOffset, int varyingOffset,
|
||||
int offset, int tableOffset, int start, int end);
|
||||
|
||||
void ApplyCatmarkFaceVerticesKernel(
|
||||
GLuint vertexBuffer, int numVertexElements,
|
||||
GLuint varyingBuffer, int numVaryingElements,
|
||||
int vertexOffset, int tableOffset, int start, int end);
|
||||
GLuint vertexBuffer, GLuint varyingBuffer,
|
||||
int vertexOffset, int varyingOffset,
|
||||
int offset, int tableOffset, int start, int end);
|
||||
|
||||
void ApplyCatmarkEdgeVerticesKernel(
|
||||
GLuint vertexBuffer, int numVertexElements,
|
||||
GLuint varyingBuffer, int numVaryingElements,
|
||||
int vertexOffset, int tableOffset, int start, int end);
|
||||
GLuint vertexBuffer, GLuint varyingBuffer,
|
||||
int vertexOffset, int varyingOffset,
|
||||
int offset, int tableOffset, int start, int end);
|
||||
|
||||
void ApplyCatmarkVertexVerticesKernelB(
|
||||
GLuint vertexBuffer, int numVertexElements,
|
||||
GLuint varyingBuffer, int numVaryingElements,
|
||||
int vertexOffset, int tableOffset, int start, int end);
|
||||
GLuint vertexBuffer, GLuint varyingBuffer,
|
||||
int vertexOffset, int varyingOffset,
|
||||
int offset, int tableOffset, int start, int end);
|
||||
|
||||
void ApplyCatmarkVertexVerticesKernelA(
|
||||
GLuint vertexBuffer, int numVertexElements,
|
||||
GLuint varyingBuffer, int numVaryingElements,
|
||||
int vertexOffset, int tableOffset, int start, int end, bool pass);
|
||||
GLuint vertexBuffer, GLuint varyingBuffer,
|
||||
int vertexOffset, int varyingOffset,
|
||||
int offset, int tableOffset, int start, int end, bool pass);
|
||||
|
||||
void ApplyLoopEdgeVerticesKernel(
|
||||
GLuint vertexBuffer, int numVertexElements,
|
||||
GLuint varyingBuffer, int numVaryingElements,
|
||||
int vertexOffset, int tableOffset, int start, int end);
|
||||
GLuint vertexBuffer, GLuint varyingBuffer,
|
||||
int vertexOffset, int varyingOffset,
|
||||
int offset, int tableOffset, int start, int end);
|
||||
|
||||
void ApplyLoopVertexVerticesKernelB(
|
||||
GLuint vertexBuffer, int numVertexElements,
|
||||
GLuint varyingBuffer, int numVaryingElements,
|
||||
int vertexOffset, int tableOffset, int start, int end);
|
||||
GLuint vertexBuffer, GLuint varyingBuffer,
|
||||
int vertexOffset, int varyingOffset,
|
||||
int offset, int tableOffset, int start, int end);
|
||||
|
||||
void ApplyLoopVertexVerticesKernelA(
|
||||
GLuint vertexBuffer, int numVertexElements,
|
||||
GLuint varyingBuffer, int numVaryingElements,
|
||||
int vertexOffset, int tableOffset, int start, int end, bool pass);
|
||||
GLuint vertexBuffer, GLuint varyingBuffer,
|
||||
int vertexOffset, int varyingOffset,
|
||||
int offset, int tableOffset, int start, int end, bool pass);
|
||||
|
||||
void ApplyEditAdd(
|
||||
GLuint vertexBuffer, int numVertexElements,
|
||||
GLuint varyingBuffer, int numVaryingElements,
|
||||
GLuint vertexBuffer, GLuint varyingBuffer,
|
||||
int vertexOffset, int varyingOffset,
|
||||
int primvarOffset, int primvarWidth,
|
||||
int vertexOffset, int tableOffset, int start, int end);
|
||||
int offset, int tableOffset, int start, int end);
|
||||
|
||||
void UseProgram() const;
|
||||
void UseProgram(int vertexBaseOffset, int varyingBaseOffset) const;
|
||||
|
||||
GLint GetTableUniformLocation(int tableIndex) const {
|
||||
return _uniformTables[tableIndex];
|
||||
@ -124,26 +126,35 @@ public:
|
||||
}
|
||||
|
||||
struct Match {
|
||||
|
||||
/// Constructor
|
||||
Match(int numVertexElements, int numVaryingElements)
|
||||
: vdesc(numVertexElements, numVaryingElements) {
|
||||
Match(OsdVertexBufferDescriptor const &vertex,
|
||||
OsdVertexBufferDescriptor const &varying,
|
||||
bool interleaved)
|
||||
: vertexDesc(vertex), varyingDesc(varying), interleaved(interleaved) {
|
||||
}
|
||||
|
||||
bool operator() (OsdGLSLTransformFeedbackKernelBundle const *kernel) {
|
||||
return vdesc == kernel->_vdesc;
|
||||
// offset is dynamic. just comparing length and stride here,
|
||||
// returns true if they are equal
|
||||
return (vertexDesc.length == kernel->_numVertexElements and
|
||||
vertexDesc.stride == kernel->_vertexStride and
|
||||
varyingDesc.length == kernel->_numVaryingElements and
|
||||
varyingDesc.stride == kernel->_varyingStride and
|
||||
interleaved == kernel->_interleaved);
|
||||
}
|
||||
|
||||
OsdVertexDescriptor vdesc;
|
||||
OsdVertexBufferDescriptor vertexDesc;
|
||||
OsdVertexBufferDescriptor varyingDesc;
|
||||
bool interleaved;
|
||||
};
|
||||
|
||||
friend struct Match;
|
||||
|
||||
protected:
|
||||
void transformGpuBufferData(
|
||||
GLuint vertexBuffer, int numVertexElements,
|
||||
GLuint varyingBuffer, int numVaryingElements,
|
||||
int vertexOffset, int tableOffset, int start, int end) const;
|
||||
GLuint vertexBuffer, GLuint varyingBuffer,
|
||||
int vertexOffset, int varyingOffset,
|
||||
int offset, int tableOffset, int start, int end) const;
|
||||
|
||||
GLuint _program;
|
||||
|
||||
@ -153,6 +164,8 @@ protected:
|
||||
GLint _uniformVertexOffset;
|
||||
GLint _uniformTableOffset;
|
||||
GLint _uniformIndexStart;
|
||||
GLint _uniformVertexBaseOffset;
|
||||
GLint _uniformVaryingBaseOffset;
|
||||
|
||||
GLint _uniformVertexBuffer;
|
||||
GLint _uniformVaryingBuffer;
|
||||
@ -182,7 +195,14 @@ protected:
|
||||
|
||||
GLuint _subEditAdd; // hedit kernel (add)
|
||||
|
||||
OsdVertexDescriptor _vdesc;
|
||||
// kernelbundle discriminators
|
||||
int _numVertexElements;
|
||||
int _vertexStride;
|
||||
int _numVaryingElements;
|
||||
int _varyingStride;
|
||||
int _vertexOffsetMod;
|
||||
int _varyingOffsetMod;
|
||||
bool _interleaved;
|
||||
};
|
||||
|
||||
} // end namespace OPENSUBDIV_VERSION
|
||||
|
@ -32,6 +32,8 @@ cbuffer KernelCB : register( b0 ) {
|
||||
int tableOffset; // offset of subdivision table
|
||||
int indexStart; // start index relative to tableOffset
|
||||
int indexEnd; // end index relative to tableOffset
|
||||
int vertexBaseOffset; // base vbo offset of the vertex buffer
|
||||
int varyingBaseOffset; // base vbo offset of the varying buffer
|
||||
bool vertexPass;
|
||||
|
||||
// vertex edit kernel
|
||||
@ -91,13 +93,15 @@ Vertex readVertex(int index)
|
||||
Vertex v;
|
||||
|
||||
#if NUM_VERTEX_ELEMENTS > 0
|
||||
int vertexIndex = index * VERTEX_STRIDE + vertexBaseOffset;
|
||||
for (int i = 0; i < NUM_VERTEX_ELEMENTS; i++) {
|
||||
v.vertexData[i] = vertexBuffer[index*NUM_VERTEX_ELEMENTS+i];
|
||||
v.vertexData[i] = vertexBuffer[vertexIndex + i];
|
||||
}
|
||||
#endif
|
||||
#if NUM_VARYING_ELEMENTS > 0
|
||||
int varyingIndex = index * VARYING_STRIDE + varyingBaseOffset;
|
||||
for (int i = 0; i < NUM_VARYING_ELEMENTS; i++) {
|
||||
v.varyingData[i] = varyingBuffer[index*NUM_VARYING_ELEMENTS+i];
|
||||
v.varyingData[i] = varyingBuffer[varyingIndex + i];
|
||||
}
|
||||
#endif
|
||||
return v;
|
||||
@ -106,13 +110,15 @@ Vertex readVertex(int index)
|
||||
void writeVertex(int index, Vertex v)
|
||||
{
|
||||
#if NUM_VERTEX_ELEMENTS > 0
|
||||
int vertexIndex = index * VERTEX_STRIDE + vertexBaseOffset;
|
||||
for (int i = 0; i < NUM_VERTEX_ELEMENTS; i++) {
|
||||
vertexBuffer[index*NUM_VERTEX_ELEMENTS+i] = v.vertexData[i];
|
||||
vertexBuffer[vertexIndex + i] = v.vertexData[i];
|
||||
}
|
||||
#endif
|
||||
#if NUM_VARYING_ELEMENTS > 0
|
||||
int varyingIndex = index * VARYING_STRIDE + varyingBaseOffset;
|
||||
for (int i = 0; i < NUM_VARYING_ELEMENTS; i++) {
|
||||
varyingBuffer[index*NUM_VARYING_ELEMENTS+i] = v.varyingData[i];
|
||||
varyingBuffer[varyingIndex + i] = v.varyingData[i];
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
@ -33,6 +33,7 @@
|
||||
#include "../hbr/mesh.h"
|
||||
|
||||
#include "../osd/vertex.h"
|
||||
#include "../osd/vertexDescriptor.h"
|
||||
|
||||
#include <bitset>
|
||||
|
||||
@ -68,6 +69,10 @@ public:
|
||||
|
||||
virtual void Refine() = 0;
|
||||
|
||||
virtual void Refine(OsdVertexBufferDescriptor const *vertexDesc,
|
||||
OsdVertexBufferDescriptor const *varyingDesc,
|
||||
bool interleaved) = 0;
|
||||
|
||||
virtual void Synchronize() = 0;
|
||||
|
||||
virtual DrawContext * GetDrawContext() = 0;
|
||||
@ -158,6 +163,13 @@ public:
|
||||
virtual void Refine() {
|
||||
_computeController->Refine(_computeContext, _farMesh->GetKernelBatches(), _vertexBuffer, _varyingBuffer);
|
||||
}
|
||||
virtual void Refine(OsdVertexBufferDescriptor const *vertexDesc,
|
||||
OsdVertexBufferDescriptor const *varyingDesc) {
|
||||
_computeController->Refine(_computeContext, _farMesh->GetKernelBatches(),
|
||||
_vertexBuffer, _varyingBuffer,
|
||||
vertexDesc, varyingDesc);
|
||||
}
|
||||
|
||||
virtual void Synchronize() {
|
||||
_computeController->Synchronize();
|
||||
}
|
||||
|
@ -34,8 +34,7 @@ namespace OpenSubdiv {
|
||||
namespace OPENSUBDIV_VERSION {
|
||||
|
||||
|
||||
OsdOmpComputeController::OsdOmpComputeController(int numThreads) :
|
||||
_currentVertexBuffer(NULL), _currentVaryingBuffer(NULL) {
|
||||
OsdOmpComputeController::OsdOmpComputeController(int numThreads) {
|
||||
|
||||
_numThreads = (numThreads == -1) ? omp_get_max_threads() : numThreads;
|
||||
}
|
||||
@ -48,7 +47,8 @@ OsdOmpComputeController::ApplyBilinearFaceVerticesKernel(
|
||||
assert(context);
|
||||
|
||||
OsdOmpComputeFace(
|
||||
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
|
||||
(const int*)context->GetTable(FarSubdivisionTables::F_IT)->GetBuffer(),
|
||||
(const int*)context->GetTable(FarSubdivisionTables::F_ITa)->GetBuffer(),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
@ -61,7 +61,8 @@ OsdOmpComputeController::ApplyBilinearEdgeVerticesKernel(
|
||||
assert(context);
|
||||
|
||||
OsdOmpComputeBilinearEdge(
|
||||
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
|
||||
(const int*)context->GetTable(FarSubdivisionTables::E_IT)->GetBuffer(),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
}
|
||||
@ -73,7 +74,8 @@ OsdOmpComputeController::ApplyBilinearVertexVerticesKernel(
|
||||
assert(context);
|
||||
|
||||
OsdOmpComputeBilinearVertex(
|
||||
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
|
||||
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
}
|
||||
@ -85,7 +87,8 @@ OsdOmpComputeController::ApplyCatmarkFaceVerticesKernel(
|
||||
assert(context);
|
||||
|
||||
OsdOmpComputeFace(
|
||||
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
|
||||
(const int*)context->GetTable(FarSubdivisionTables::F_IT)->GetBuffer(),
|
||||
(const int*)context->GetTable(FarSubdivisionTables::F_ITa)->GetBuffer(),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
@ -98,7 +101,8 @@ OsdOmpComputeController::ApplyCatmarkEdgeVerticesKernel(
|
||||
assert(context);
|
||||
|
||||
OsdOmpComputeEdge(
|
||||
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
|
||||
(const int*)context->GetTable(FarSubdivisionTables::E_IT)->GetBuffer(),
|
||||
(const float*)context->GetTable(FarSubdivisionTables::E_W)->GetBuffer(),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
@ -111,7 +115,8 @@ OsdOmpComputeController::ApplyCatmarkVertexVerticesKernelB(
|
||||
assert(context);
|
||||
|
||||
OsdOmpComputeVertexB(
|
||||
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
|
||||
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
|
||||
(const int*)context->GetTable(FarSubdivisionTables::V_IT)->GetBuffer(),
|
||||
(const float*)context->GetTable(FarSubdivisionTables::V_W)->GetBuffer(),
|
||||
@ -125,7 +130,8 @@ OsdOmpComputeController::ApplyCatmarkVertexVerticesKernelA1(
|
||||
assert(context);
|
||||
|
||||
OsdOmpComputeVertexA(
|
||||
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
|
||||
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
|
||||
(const float*)context->GetTable(FarSubdivisionTables::V_W)->GetBuffer(),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(), false);
|
||||
@ -138,7 +144,8 @@ OsdOmpComputeController::ApplyCatmarkVertexVerticesKernelA2(
|
||||
assert(context);
|
||||
|
||||
OsdOmpComputeVertexA(
|
||||
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
|
||||
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
|
||||
(const float*)context->GetTable(FarSubdivisionTables::V_W)->GetBuffer(),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(), true);
|
||||
@ -151,7 +158,8 @@ OsdOmpComputeController::ApplyLoopEdgeVerticesKernel(
|
||||
assert(context);
|
||||
|
||||
OsdOmpComputeEdge(
|
||||
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
|
||||
(const int*)context->GetTable(FarSubdivisionTables::E_IT)->GetBuffer(),
|
||||
(const float*)context->GetTable(FarSubdivisionTables::E_W)->GetBuffer(),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
@ -164,7 +172,8 @@ OsdOmpComputeController::ApplyLoopVertexVerticesKernelB(
|
||||
assert(context);
|
||||
|
||||
OsdOmpComputeLoopVertexB(
|
||||
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
|
||||
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
|
||||
(const int*)context->GetTable(FarSubdivisionTables::V_IT)->GetBuffer(),
|
||||
(const float*)context->GetTable(FarSubdivisionTables::V_W)->GetBuffer(),
|
||||
@ -178,7 +187,8 @@ OsdOmpComputeController::ApplyLoopVertexVerticesKernelA1(
|
||||
assert(context);
|
||||
|
||||
OsdOmpComputeVertexA(
|
||||
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
|
||||
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
|
||||
(const float*)context->GetTable(FarSubdivisionTables::V_W)->GetBuffer(),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(), false);
|
||||
@ -191,7 +201,8 @@ OsdOmpComputeController::ApplyLoopVertexVerticesKernelA2(
|
||||
assert(context);
|
||||
|
||||
OsdOmpComputeVertexA(
|
||||
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
|
||||
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
|
||||
(const float*)context->GetTable(FarSubdivisionTables::V_W)->GetBuffer(),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(), true);
|
||||
@ -210,8 +221,8 @@ OsdOmpComputeController::ApplyVertexEdits(
|
||||
const OsdCpuTable * editValues = edit->GetEditValues();
|
||||
|
||||
if (edit->GetOperation() == FarVertexEdit::Add) {
|
||||
OsdOmpEditVertexAdd(_vdesc,
|
||||
_currentVertexBuffer,
|
||||
OsdOmpEditVertexAdd(_currentBindState.vertexBuffer,
|
||||
_currentBindState.vertexDesc,
|
||||
edit->GetPrimvarOffset(),
|
||||
edit->GetPrimvarWidth(),
|
||||
batch.GetVertexOffset(),
|
||||
@ -221,8 +232,8 @@ OsdOmpComputeController::ApplyVertexEdits(
|
||||
static_cast<unsigned int*>(primvarIndices->GetBuffer()),
|
||||
static_cast<float*>(editValues->GetBuffer()));
|
||||
} else if (edit->GetOperation() == FarVertexEdit::Set) {
|
||||
OsdOmpEditVertexSet(_vdesc,
|
||||
_currentVertexBuffer,
|
||||
OsdOmpEditVertexSet(_currentBindState.vertexBuffer,
|
||||
_currentBindState.vertexDesc,
|
||||
edit->GetPrimvarOffset(),
|
||||
edit->GetPrimvarWidth(),
|
||||
batch.GetVertexOffset(),
|
||||
|
@ -29,6 +29,7 @@
|
||||
|
||||
#include "../far/dispatcher.h"
|
||||
#include "../osd/cpuComputeContext.h"
|
||||
#include "../osd/vertexDescriptor.h"
|
||||
|
||||
#ifdef OPENSUBDIV_HAS_OPENMP
|
||||
#include <omp.h>
|
||||
@ -69,17 +70,27 @@ public:
|
||||
///
|
||||
/// @param varyingBuffer varying-interpolated data buffer
|
||||
///
|
||||
/// @param vertexDesc the descriptor of vertex elements to be refined.
|
||||
/// if it's null, all primvars in the vertex buffer
|
||||
/// will be refined.
|
||||
///
|
||||
/// @param varyingDesc the descriptor of varying elements to be refined.
|
||||
/// if it's null, all primvars in the varying buffer
|
||||
/// will be refined.
|
||||
///
|
||||
template<class VERTEX_BUFFER, class VARYING_BUFFER>
|
||||
void Refine(OsdCpuComputeContext const *context,
|
||||
FarKernelBatchVector const & batches,
|
||||
VERTEX_BUFFER * vertexBuffer,
|
||||
VARYING_BUFFER * varyingBuffer) {
|
||||
VARYING_BUFFER * varyingBuffer,
|
||||
OsdVertexBufferDescriptor const *vertexDesc=NULL,
|
||||
OsdVertexBufferDescriptor const *varyingDesc=NULL) {
|
||||
|
||||
if (batches.empty()) return;
|
||||
|
||||
omp_set_num_threads(_numThreads);
|
||||
|
||||
bind(vertexBuffer, varyingBuffer);
|
||||
bind(vertexBuffer, varyingBuffer, vertexDesc, varyingDesc);
|
||||
|
||||
FarDispatcher::Refine(this, context, batches, /*maxlevel*/-1);
|
||||
|
||||
@ -137,24 +148,60 @@ protected:
|
||||
void ApplyVertexEdits(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
template<class VERTEX_BUFFER, class VARYING_BUFFER>
|
||||
void bind(VERTEX_BUFFER *vertex, VARYING_BUFFER *varying) {
|
||||
void bind(VERTEX_BUFFER *vertex, VARYING_BUFFER *varying,
|
||||
OsdVertexBufferDescriptor const *vertexDesc,
|
||||
OsdVertexBufferDescriptor const *varyingDesc) {
|
||||
|
||||
_currentVertexBuffer = vertex ? vertex->BindCpuBuffer() : 0;
|
||||
_currentVaryingBuffer = varying ? varying->BindCpuBuffer() : 0;
|
||||
// if the vertex buffer descriptor is specified, use it.
|
||||
// otherwise, assumes the data is tightly packed in the vertex buffer.
|
||||
if (vertexDesc) {
|
||||
_currentBindState.vertexDesc = *vertexDesc;
|
||||
} else {
|
||||
int numElements = vertex ? vertex->GetNumElements() : 0;
|
||||
_currentBindState.vertexDesc = OsdVertexBufferDescriptor(
|
||||
0, numElements, numElements);
|
||||
}
|
||||
if (varyingDesc) {
|
||||
_currentBindState.varyingDesc = *varyingDesc;
|
||||
} else {
|
||||
int numElements = varying ? varying->GetNumElements() : 0;
|
||||
_currentBindState.varyingDesc = OsdVertexBufferDescriptor(
|
||||
0, numElements, numElements);
|
||||
}
|
||||
|
||||
int numVertexElements = vertex ? vertex->GetNumElements() : 0;
|
||||
int numVaryingElements = varying ? varying->GetNumElements() : 0;
|
||||
_vdesc.Set(numVertexElements, numVaryingElements);
|
||||
// apply vertex offset here
|
||||
if (vertex) {
|
||||
_currentBindState.vertexBuffer =
|
||||
vertex->BindCpuBuffer() + _currentBindState.vertexDesc.offset;
|
||||
} else {
|
||||
_currentBindState.vertexBuffer = NULL;
|
||||
}
|
||||
if (varying) {
|
||||
_currentBindState.varyingBuffer =
|
||||
varying->BindCpuBuffer() + _currentBindState.varyingDesc.offset;
|
||||
} else {
|
||||
_currentBindState.varyingBuffer = NULL;
|
||||
}
|
||||
}
|
||||
void unbind() {
|
||||
_currentVertexBuffer = 0;
|
||||
_currentVaryingBuffer = 0;
|
||||
_vdesc.Reset();
|
||||
_currentBindState.Reset();
|
||||
}
|
||||
|
||||
private:
|
||||
float *_currentVertexBuffer, *_currentVaryingBuffer;
|
||||
OsdVertexDescriptor _vdesc;
|
||||
struct BindState {
|
||||
BindState() : vertexBuffer(NULL), varyingBuffer(NULL) {}
|
||||
void Reset() {
|
||||
vertexBuffer = varyingBuffer = NULL;
|
||||
vertexDesc.Reset();
|
||||
varyingDesc.Reset();
|
||||
}
|
||||
float *vertexBuffer;
|
||||
float *varyingBuffer;
|
||||
OsdVertexBufferDescriptor vertexDesc;
|
||||
OsdVertexBufferDescriptor varyingDesc;
|
||||
};
|
||||
|
||||
BindState _currentBindState;
|
||||
int _numThreads;
|
||||
};
|
||||
|
||||
|
@ -25,40 +25,94 @@
|
||||
#include "../osd/ompKernel.h"
|
||||
#include "../osd/vertexDescriptor.h"
|
||||
|
||||
#include <math.h>
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <cstdlib>
|
||||
#include <omp.h>
|
||||
|
||||
namespace OpenSubdiv {
|
||||
namespace OPENSUBDIV_VERSION {
|
||||
|
||||
static inline void
|
||||
clear(float *dst, OsdVertexBufferDescriptor const &desc) {
|
||||
|
||||
if (dst) {
|
||||
memset(dst, 0, desc.length*sizeof(float));
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
addWithWeight(float *dst, const float *srcOrigin, int srcIndex, float weight,
|
||||
OsdVertexBufferDescriptor const &desc) {
|
||||
|
||||
if (srcOrigin && dst) {
|
||||
const float *src = srcOrigin + srcIndex * desc.stride;
|
||||
for (int k = 0; k < desc.length; ++k) {
|
||||
dst[k] += src[k] * weight;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
copy(float *dstOrigin, const float *src, int dstIndex,
|
||||
OsdVertexBufferDescriptor const &desc) {
|
||||
|
||||
if (dstOrigin && src) {
|
||||
float *dst = dstOrigin + dstIndex * desc.stride;
|
||||
memcpy(dst, src, desc.length*sizeof(float));
|
||||
}
|
||||
}
|
||||
|
||||
void OsdOmpComputeFace(
|
||||
OsdVertexDescriptor const &vdesc, float * vertex, float * varying,
|
||||
float * vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *F_IT, const int *F_ITa, int offset, int tableOffset, int start, int end) {
|
||||
|
||||
int numThreads = omp_get_max_threads();
|
||||
float *vertexResultsArray = (float*)alloca(vertexDesc.length * sizeof(float) * numThreads);
|
||||
float *varyingResultsArray = (float*)alloca(varyingDesc.length * sizeof(float) * numThreads);
|
||||
|
||||
#pragma omp parallel for
|
||||
for (int i = start + tableOffset; i < end + tableOffset; i++) {
|
||||
int h = F_ITa[2*i];
|
||||
int n = F_ITa[2*i+1];
|
||||
|
||||
float weight = 1.0f/n;
|
||||
|
||||
// XXX: should use local vertex struct variable instead of
|
||||
// accumulating directly into global memory.
|
||||
int dstIndex = offset + i - tableOffset;
|
||||
vdesc.Clear(vertex, varying, dstIndex);
|
||||
|
||||
int threadId = omp_get_thread_num();
|
||||
float *vertexResults = vertexResultsArray +
|
||||
vertexDesc.length * threadId;
|
||||
float *varyingResults = varyingResultsArray +
|
||||
varyingDesc.length * threadId;
|
||||
|
||||
// clear
|
||||
clear(vertexResults, vertexDesc);
|
||||
clear(varyingResults, varyingDesc);
|
||||
|
||||
for (int j = 0; j < n; ++j) {
|
||||
int index = F_IT[h+j];
|
||||
vdesc.AddWithWeight(vertex, dstIndex, index, weight);
|
||||
vdesc.AddVaryingWithWeight(varying, dstIndex, index, weight);
|
||||
addWithWeight(vertexResults, vertex, index, weight, vertexDesc);
|
||||
addWithWeight(varyingResults, varying, index, weight, varyingDesc);
|
||||
}
|
||||
|
||||
// write results
|
||||
copy(vertex, vertexResults, dstIndex, vertexDesc);
|
||||
copy(varying, varyingResults, dstIndex, varyingDesc);
|
||||
}
|
||||
}
|
||||
|
||||
void OsdOmpComputeEdge(
|
||||
OsdVertexDescriptor const &vdesc, float *vertex, float *varying,
|
||||
float * vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *E_IT, const float *E_W, int offset, int tableOffset, int start, int end) {
|
||||
|
||||
int numThreads = omp_get_max_threads();
|
||||
float *vertexResultsArray = (float*)alloca(vertexDesc.length * sizeof(float) * numThreads);
|
||||
float *varyingResultsArray = (float*)alloca(varyingDesc.length * sizeof(float) * numThreads);
|
||||
|
||||
#pragma omp parallel for
|
||||
for (int i = start + tableOffset; i < end + tableOffset; i++) {
|
||||
int eidx0 = E_IT[4*i+0];
|
||||
@ -67,30 +121,47 @@ void OsdOmpComputeEdge(
|
||||
int eidx3 = E_IT[4*i+3];
|
||||
|
||||
float vertWeight = E_W[i*2+0];
|
||||
|
||||
int dstIndex = offset + i - tableOffset;
|
||||
vdesc.Clear(vertex, varying, dstIndex);
|
||||
|
||||
vdesc.AddWithWeight(vertex, dstIndex, eidx0, vertWeight);
|
||||
vdesc.AddWithWeight(vertex, dstIndex, eidx1, vertWeight);
|
||||
int threadId = omp_get_thread_num();
|
||||
float *vertexResults = vertexResultsArray +
|
||||
vertexDesc.length * threadId;
|
||||
float *varyingResults = varyingResultsArray +
|
||||
varyingDesc.length * threadId;
|
||||
|
||||
// clear
|
||||
clear(vertexResults, vertexDesc);
|
||||
clear(varyingResults, varyingDesc);
|
||||
|
||||
addWithWeight(vertexResults, vertex, eidx0, vertWeight, vertexDesc);
|
||||
addWithWeight(vertexResults, vertex, eidx1, vertWeight, vertexDesc);
|
||||
|
||||
if (eidx2 != -1) {
|
||||
float faceWeight = E_W[i*2+1];
|
||||
|
||||
vdesc.AddWithWeight(vertex, dstIndex, eidx2, faceWeight);
|
||||
vdesc.AddWithWeight(vertex, dstIndex, eidx3, faceWeight);
|
||||
addWithWeight(vertexResults, vertex, eidx2, faceWeight, vertexDesc);
|
||||
addWithWeight(vertexResults, vertex, eidx3, faceWeight, vertexDesc);
|
||||
}
|
||||
|
||||
vdesc.AddVaryingWithWeight(varying, dstIndex, eidx0, 0.5f);
|
||||
vdesc.AddVaryingWithWeight(varying, dstIndex, eidx1, 0.5f);
|
||||
addWithWeight(varyingResults, varying, eidx0, 0.5f, varyingDesc);
|
||||
addWithWeight(varyingResults, varying, eidx1, 0.5f, varyingDesc);
|
||||
|
||||
copy(vertex, vertexResults, dstIndex, vertexDesc);
|
||||
copy(varying, varyingResults, dstIndex, varyingDesc);
|
||||
}
|
||||
}
|
||||
|
||||
void OsdOmpComputeVertexA(
|
||||
OsdVertexDescriptor const &vdesc, float *vertex, float *varying,
|
||||
float * vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *V_ITa, const float *V_W,
|
||||
int offset, int tableOffset, int start, int end, int pass) {
|
||||
|
||||
int numThreads = omp_get_max_threads();
|
||||
float *vertexResultsArray = (float*)alloca(vertexDesc.length * sizeof(float) * numThreads);
|
||||
float *varyingResultsArray = (float*)alloca(varyingDesc.length * sizeof(float) * numThreads);
|
||||
|
||||
#pragma omp parallel for
|
||||
for (int i = start + tableOffset; i < end + tableOffset; i++) {
|
||||
int n = V_ITa[5*i+1];
|
||||
@ -107,27 +178,47 @@ void OsdOmpComputeVertexA(
|
||||
weight = 1.0f - weight;
|
||||
|
||||
int dstIndex = offset + i - tableOffset;
|
||||
if (not pass)
|
||||
vdesc.Clear(vertex, varying, dstIndex);
|
||||
|
||||
if (eidx0 == -1 || (pass == 0 && (n == -1))) {
|
||||
vdesc.AddWithWeight(vertex, dstIndex, p, weight);
|
||||
} else {
|
||||
vdesc.AddWithWeight(vertex, dstIndex, p, weight * 0.75f);
|
||||
vdesc.AddWithWeight(vertex, dstIndex, eidx0, weight * 0.125f);
|
||||
vdesc.AddWithWeight(vertex, dstIndex, eidx1, weight * 0.125f);
|
||||
int threadId = omp_get_thread_num();
|
||||
float *vertexResults = vertexResultsArray +
|
||||
vertexDesc.length * threadId;
|
||||
float *varyingResults = varyingResultsArray +
|
||||
varyingDesc.length * threadId;
|
||||
|
||||
clear(vertexResults, vertexDesc);
|
||||
clear(varyingResults, varyingDesc);
|
||||
if (pass) {
|
||||
// copy previous results
|
||||
addWithWeight(vertexResults, vertex, dstIndex, 1.0f, vertexDesc);
|
||||
}
|
||||
|
||||
if (not pass)
|
||||
vdesc.AddVaryingWithWeight(varying, dstIndex, p, 1.0f);
|
||||
if (eidx0 == -1 || (pass == 0 && (n == -1))) {
|
||||
addWithWeight(vertexResults, vertex, p, weight, vertexDesc);
|
||||
} else {
|
||||
addWithWeight(vertexResults, vertex, p, weight * 0.75f, vertexDesc);
|
||||
addWithWeight(vertexResults, vertex, eidx0, weight * 0.125f, vertexDesc);
|
||||
addWithWeight(vertexResults, vertex, eidx1, weight * 0.125f, vertexDesc);
|
||||
}
|
||||
|
||||
copy(vertex, vertexResults, dstIndex, vertexDesc);
|
||||
if (not pass) {
|
||||
addWithWeight(varyingResults, varying, p, 1.0f, varyingDesc);
|
||||
copy(varying, varyingResults, dstIndex, varyingDesc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void OsdOmpComputeVertexB(
|
||||
OsdVertexDescriptor const &vdesc, float *vertex, float *varying,
|
||||
float * vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *V_ITa, const int *V_IT, const float *V_W,
|
||||
int offset, int tableOffset, int start, int end) {
|
||||
|
||||
int numThreads = omp_get_max_threads();
|
||||
float *vertexResultsArray = (float*)alloca(vertexDesc.length * sizeof(float) * numThreads);
|
||||
float *varyingResultsArray = (float*)alloca(varyingDesc.length * sizeof(float) * numThreads);
|
||||
|
||||
#pragma omp parallel for
|
||||
for (int i = start + tableOffset; i < end + tableOffset; i++) {
|
||||
int h = V_ITa[5*i];
|
||||
@ -139,23 +230,40 @@ void OsdOmpComputeVertexB(
|
||||
float wv = (n-2.0f) * n * wp;
|
||||
|
||||
int dstIndex = offset + i - tableOffset;
|
||||
vdesc.Clear(vertex, varying, dstIndex);
|
||||
|
||||
vdesc.AddWithWeight(vertex, dstIndex, p, weight * wv);
|
||||
int threadId = omp_get_thread_num();
|
||||
float *vertexResults = vertexResultsArray +
|
||||
vertexDesc.length * threadId;
|
||||
float *varyingResults = varyingResultsArray +
|
||||
varyingDesc.length * threadId;
|
||||
|
||||
clear(vertexResults, vertexDesc);
|
||||
clear(varyingResults, varyingDesc);
|
||||
|
||||
addWithWeight(vertexResults, vertex, p, weight * wv, vertexDesc);
|
||||
|
||||
for (int j = 0; j < n; ++j) {
|
||||
vdesc.AddWithWeight(vertex, dstIndex, V_IT[h+j*2], weight * wp);
|
||||
vdesc.AddWithWeight(vertex, dstIndex, V_IT[h+j*2+1], weight * wp);
|
||||
addWithWeight(vertexResults, vertex, V_IT[h+j*2], weight * wp, vertexDesc);
|
||||
addWithWeight(vertexResults, vertex, V_IT[h+j*2+1], weight * wp, vertexDesc);
|
||||
}
|
||||
vdesc.AddVaryingWithWeight(varying, dstIndex, p, 1.0f);
|
||||
addWithWeight(varyingResults, varying, p, 1.0f, varyingDesc);
|
||||
|
||||
copy(vertex, vertexResults, dstIndex, vertexDesc);
|
||||
copy(varying, varyingResults, dstIndex, varyingDesc);
|
||||
}
|
||||
}
|
||||
|
||||
void OsdOmpComputeLoopVertexB(
|
||||
OsdVertexDescriptor const &vdesc, float *vertex, float *varying,
|
||||
float * vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *V_ITa, const int *V_IT, const float *V_W,
|
||||
int vertexOffset, int tableOffset, int start, int end) {
|
||||
|
||||
int numThreads = omp_get_max_threads();
|
||||
float *vertexResultsArray = (float*)alloca(vertexDesc.length * sizeof(float) * numThreads);
|
||||
float *varyingResultsArray = (float*)alloca(varyingDesc.length * sizeof(float) * numThreads);
|
||||
|
||||
#pragma omp parallel for
|
||||
for (int i = start + tableOffset; i < end + tableOffset; i++) {
|
||||
int h = V_ITa[5*i];
|
||||
@ -169,82 +277,137 @@ void OsdOmpComputeLoopVertexB(
|
||||
beta = (0.625f - beta) * wp;
|
||||
|
||||
int dstIndex = i + vertexOffset - tableOffset;
|
||||
vdesc.Clear(vertex, varying, dstIndex);
|
||||
|
||||
vdesc.AddWithWeight(vertex, dstIndex, p, weight * (1.0f - (beta * n)));
|
||||
int threadId = omp_get_thread_num();
|
||||
float *vertexResults = vertexResultsArray +
|
||||
vertexDesc.length * threadId;
|
||||
float *varyingResults = varyingResultsArray +
|
||||
varyingDesc.length * threadId;
|
||||
|
||||
clear(vertexResults, vertexDesc);
|
||||
clear(varyingResults, varyingDesc);
|
||||
|
||||
addWithWeight(vertexResults, vertex, p, weight * (1.0f - (beta * n)), vertexDesc);
|
||||
|
||||
for (int j = 0; j < n; ++j)
|
||||
vdesc.AddWithWeight(vertex, dstIndex, V_IT[h+j], weight * beta);
|
||||
addWithWeight(vertexResults, vertex, V_IT[h+j], weight * beta, vertexDesc);
|
||||
|
||||
vdesc.AddVaryingWithWeight(varying, dstIndex, p, 1.0f);
|
||||
addWithWeight(varyingResults, varying, p, 1.0f, varyingDesc);
|
||||
|
||||
copy(vertex, vertexResults, dstIndex, vertexDesc);
|
||||
copy(varying, varyingResults, dstIndex, varyingDesc);
|
||||
}
|
||||
}
|
||||
|
||||
void OsdOmpComputeBilinearEdge(
|
||||
OsdVertexDescriptor const &vdesc, float *vertex, float *varying,
|
||||
float * vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *E_IT, int vertexOffset, int tableOffset, int start, int end) {
|
||||
|
||||
int numThreads = omp_get_max_threads();
|
||||
float *vertexResultsArray = (float*)alloca(vertexDesc.length * sizeof(float) * numThreads);
|
||||
float *varyingResultsArray = (float*)alloca(varyingDesc.length * sizeof(float) * numThreads);
|
||||
|
||||
#pragma omp parallel for
|
||||
for (int i = start + tableOffset; i < end + tableOffset; i++) {
|
||||
int eidx0 = E_IT[2*i+0];
|
||||
int eidx1 = E_IT[2*i+1];
|
||||
|
||||
int dstIndex = i + vertexOffset - tableOffset;
|
||||
vdesc.Clear(vertex, varying, dstIndex);
|
||||
|
||||
vdesc.AddWithWeight(vertex, dstIndex, eidx0, 0.5f);
|
||||
vdesc.AddWithWeight(vertex, dstIndex, eidx1, 0.5f);
|
||||
int threadId = omp_get_thread_num();
|
||||
float *vertexResults = vertexResultsArray +
|
||||
vertexDesc.length * threadId;
|
||||
float *varyingResults = varyingResultsArray +
|
||||
varyingDesc.length * threadId;
|
||||
|
||||
vdesc.AddVaryingWithWeight(varying, dstIndex, eidx0, 0.5f);
|
||||
vdesc.AddVaryingWithWeight(varying, dstIndex, eidx1, 0.5f);
|
||||
clear(vertexResults, vertexDesc);
|
||||
clear(varyingResults, varyingDesc);
|
||||
|
||||
addWithWeight(vertexResults, vertex, eidx0, 0.5f, vertexDesc);
|
||||
addWithWeight(vertexResults, vertex, eidx1, 0.5f, vertexDesc);
|
||||
|
||||
addWithWeight(varyingResults, varying, eidx0, 0.5f, varyingDesc);
|
||||
addWithWeight(varyingResults, varying, eidx1, 0.5f, varyingDesc);
|
||||
|
||||
copy(vertex, vertexResults, dstIndex, vertexDesc);
|
||||
copy(varying, varyingResults, dstIndex, varyingDesc);
|
||||
}
|
||||
}
|
||||
|
||||
void OsdOmpComputeBilinearVertex(
|
||||
OsdVertexDescriptor const &vdesc, float *vertex, float *varying,
|
||||
float * vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *V_ITa, int vertexOffset, int tableOffset, int start, int end) {
|
||||
|
||||
int numThreads = omp_get_max_threads();
|
||||
float *vertexResultsArray = (float*)alloca(vertexDesc.length * sizeof(float) * numThreads);
|
||||
float *varyingResultsArray = (float*)alloca(varyingDesc.length * sizeof(float) * numThreads);
|
||||
|
||||
#pragma omp parallel for
|
||||
for (int i = start + tableOffset; i < end + tableOffset; i++) {
|
||||
int p = V_ITa[i];
|
||||
|
||||
int dstIndex = i + vertexOffset - tableOffset;
|
||||
vdesc.Clear(vertex, varying, dstIndex);
|
||||
|
||||
vdesc.AddWithWeight(vertex, dstIndex, p, 1.0f);
|
||||
vdesc.AddVaryingWithWeight(varying, dstIndex, p, 1.0f);
|
||||
int threadId = omp_get_thread_num();
|
||||
float *vertexResults = vertexResultsArray +
|
||||
vertexDesc.length * threadId;
|
||||
float *varyingResults = varyingResultsArray +
|
||||
varyingDesc.length * threadId;
|
||||
|
||||
clear(vertexResults, vertexDesc);
|
||||
clear(varyingResults, varyingDesc);
|
||||
|
||||
addWithWeight(vertexResults, vertex, p, 1.0f, vertexDesc);
|
||||
addWithWeight(varyingResults, varying, p, 1.0f, varyingDesc);
|
||||
|
||||
copy(vertex, vertexResults, dstIndex, vertexDesc);
|
||||
copy(varying, varyingResults, dstIndex, varyingDesc);
|
||||
}
|
||||
}
|
||||
|
||||
void OsdOmpEditVertexAdd(
|
||||
OsdVertexDescriptor const &vdesc, float *vertex,
|
||||
float * vertex,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
int primVarOffset, int primVarWidth, int vertexOffset, int tableOffset,
|
||||
int start, int end,
|
||||
const unsigned int *editIndices, const float *editValues) {
|
||||
|
||||
#pragma omp parallel for
|
||||
for (int i = start+tableOffset; i < end+tableOffset; i++) {
|
||||
vdesc.ApplyVertexEditAdd(vertex,
|
||||
primVarOffset,
|
||||
primVarWidth,
|
||||
editIndices[i] + vertexOffset,
|
||||
&editValues[i*primVarWidth]);
|
||||
|
||||
if (vertex) {
|
||||
int editIndex = editIndices[i] + vertexOffset;
|
||||
float *dst = vertex + editIndex * vertexDesc.stride + primVarOffset;
|
||||
|
||||
for (int i = 0; i < primVarWidth; ++i) {
|
||||
dst[i] += editValues[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void OsdOmpEditVertexSet(
|
||||
OsdVertexDescriptor const &vdesc, float *vertex,
|
||||
float * vertex,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
int primVarOffset, int primVarWidth, int vertexOffset, int tableOffset,
|
||||
int start, int end,
|
||||
const unsigned int *editIndices, const float *editValues) {
|
||||
|
||||
#pragma omp parallel for
|
||||
for (int i = start+tableOffset; i < end+tableOffset; i++) {
|
||||
vdesc.ApplyVertexEditSet(vertex,
|
||||
primVarOffset,
|
||||
primVarWidth,
|
||||
editIndices[i] + vertexOffset,
|
||||
&editValues[i*primVarWidth]);
|
||||
|
||||
if (vertex) {
|
||||
int editIndex = editIndices[i] + vertexOffset;
|
||||
float *dst = vertex + editIndex * vertexDesc.stride + primVarOffset;
|
||||
|
||||
for (int i = 0; i < primVarWidth; ++i) {
|
||||
dst[i] = editValues[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -26,63 +26,73 @@
|
||||
#define OSD_OMP_KERNEL_H
|
||||
|
||||
#include "../version.h"
|
||||
#include "../osd/vertexDescriptor.h"
|
||||
|
||||
namespace OpenSubdiv {
|
||||
namespace OPENSUBDIV_VERSION {
|
||||
|
||||
struct OsdVertexDescriptor;
|
||||
|
||||
void OsdOmpComputeFace(OsdVertexDescriptor const &vdesc,
|
||||
float * vertex, float * varying,
|
||||
void OsdOmpComputeFace(float * vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *F_IT, const int *F_ITa,
|
||||
int vertexOffset, int tableOffset,
|
||||
int start, int end);
|
||||
|
||||
void OsdOmpComputeEdge(OsdVertexDescriptor const &vdesc,
|
||||
float *vertex, float * varying,
|
||||
void OsdOmpComputeEdge(float *vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *E_IT, const float *E_ITa,
|
||||
int vertexOffset, int tableOffset,
|
||||
int start, int end);
|
||||
|
||||
void OsdOmpComputeVertexA(OsdVertexDescriptor const &vdesc,
|
||||
float *vertex, float * varying,
|
||||
void OsdOmpComputeVertexA(float *vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *V_ITa, const float *V_IT,
|
||||
int vertexOffset, int tableOffset,
|
||||
int start, int end, int pass);
|
||||
|
||||
void OsdOmpComputeVertexB(OsdVertexDescriptor const &vdesc,
|
||||
float *vertex, float * varying,
|
||||
void OsdOmpComputeVertexB(float *vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *V_ITa, const int *V_IT, const float *V_W,
|
||||
int vertexOffset, int tableOffset,
|
||||
int start, int end);
|
||||
|
||||
void OsdOmpComputeLoopVertexB(OsdVertexDescriptor const &vdesc,
|
||||
float *vertex, float * varying,
|
||||
void OsdOmpComputeLoopVertexB(float *vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *V_ITa, const int *V_IT,
|
||||
const float *V_W,
|
||||
int vertexOffset, int tableOffset,
|
||||
int start, int end);
|
||||
|
||||
void OsdOmpComputeBilinearEdge(OsdVertexDescriptor const &vdesc,
|
||||
float *vertex, float * varying,
|
||||
void OsdOmpComputeBilinearEdge(float *vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *E_IT,
|
||||
int vertexOffset, int tableOffset,
|
||||
int start, int end);
|
||||
|
||||
void OsdOmpComputeBilinearVertex(OsdVertexDescriptor const &vdesc,
|
||||
float *vertex, float * varying,
|
||||
void OsdOmpComputeBilinearVertex(float *vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
const int *V_ITa,
|
||||
int vertexOffset, int tableOffset,
|
||||
int start, int end);
|
||||
|
||||
void OsdOmpEditVertexAdd(OsdVertexDescriptor const &vdesc, float *vertex,
|
||||
void OsdOmpEditVertexAdd(float *vertex,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
int primVarOffset, int primVarWidth,
|
||||
int vertexOffset, int tableOffset,
|
||||
int start, int end,
|
||||
const unsigned int *editIndices,
|
||||
const float *editValues);
|
||||
|
||||
void OsdOmpEditVertexSet(OsdVertexDescriptor const &vdesc, float *vertex,
|
||||
void OsdOmpEditVertexSet(float *vertex,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
int primVarOffset, int primVarWidth,
|
||||
int vertexOffset, int tableOffset,
|
||||
int start, int end,
|
||||
|
@ -37,9 +37,7 @@ namespace OPENSUBDIV_VERSION {
|
||||
|
||||
|
||||
OsdTbbComputeController::OsdTbbComputeController(int numThreads)
|
||||
: _currentVertexBuffer(NULL),
|
||||
_currentVaryingBuffer(NULL),
|
||||
_numThreads(numThreads) {
|
||||
: _numThreads(numThreads) {
|
||||
|
||||
if(_numThreads == -1)
|
||||
tbb::task_scheduler_init init;
|
||||
@ -55,7 +53,8 @@ OsdTbbComputeController::ApplyBilinearFaceVerticesKernel(
|
||||
assert(context);
|
||||
|
||||
OsdTbbComputeFace(
|
||||
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
|
||||
(const int*)context->GetTable(FarSubdivisionTables::F_IT)->GetBuffer(),
|
||||
(const int*)context->GetTable(FarSubdivisionTables::F_ITa)->GetBuffer(),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
@ -68,7 +67,8 @@ OsdTbbComputeController::ApplyBilinearEdgeVerticesKernel(
|
||||
assert(context);
|
||||
|
||||
OsdTbbComputeBilinearEdge(
|
||||
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
|
||||
(const int*)context->GetTable(FarSubdivisionTables::E_IT)->GetBuffer(),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
}
|
||||
@ -80,7 +80,8 @@ OsdTbbComputeController::ApplyBilinearVertexVerticesKernel(
|
||||
assert(context);
|
||||
|
||||
OsdTbbComputeBilinearVertex(
|
||||
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
|
||||
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
}
|
||||
@ -92,7 +93,8 @@ OsdTbbComputeController::ApplyCatmarkFaceVerticesKernel(
|
||||
assert(context);
|
||||
|
||||
OsdTbbComputeFace(
|
||||
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
|
||||
(const int*)context->GetTable(FarSubdivisionTables::F_IT)->GetBuffer(),
|
||||
(const int*)context->GetTable(FarSubdivisionTables::F_ITa)->GetBuffer(),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
@ -105,7 +107,8 @@ OsdTbbComputeController::ApplyCatmarkEdgeVerticesKernel(
|
||||
assert(context);
|
||||
|
||||
OsdTbbComputeEdge(
|
||||
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
|
||||
(const int*)context->GetTable(FarSubdivisionTables::E_IT)->GetBuffer(),
|
||||
(const float*)context->GetTable(FarSubdivisionTables::E_W)->GetBuffer(),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
@ -118,7 +121,8 @@ OsdTbbComputeController::ApplyCatmarkVertexVerticesKernelB(
|
||||
assert(context);
|
||||
|
||||
OsdTbbComputeVertexB(
|
||||
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
|
||||
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
|
||||
(const int*)context->GetTable(FarSubdivisionTables::V_IT)->GetBuffer(),
|
||||
(const float*)context->GetTable(FarSubdivisionTables::V_W)->GetBuffer(),
|
||||
@ -132,7 +136,8 @@ OsdTbbComputeController::ApplyCatmarkVertexVerticesKernelA1(
|
||||
assert(context);
|
||||
|
||||
OsdTbbComputeVertexA(
|
||||
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
|
||||
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
|
||||
(const float*)context->GetTable(FarSubdivisionTables::V_W)->GetBuffer(),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(), false);
|
||||
@ -145,7 +150,8 @@ OsdTbbComputeController::ApplyCatmarkVertexVerticesKernelA2(
|
||||
assert(context);
|
||||
|
||||
OsdTbbComputeVertexA(
|
||||
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
|
||||
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
|
||||
(const float*)context->GetTable(FarSubdivisionTables::V_W)->GetBuffer(),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(), true);
|
||||
@ -158,7 +164,8 @@ OsdTbbComputeController::ApplyLoopEdgeVerticesKernel(
|
||||
assert(context);
|
||||
|
||||
OsdTbbComputeEdge(
|
||||
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
|
||||
(const int*)context->GetTable(FarSubdivisionTables::E_IT)->GetBuffer(),
|
||||
(const float*)context->GetTable(FarSubdivisionTables::E_W)->GetBuffer(),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
|
||||
@ -171,7 +178,8 @@ OsdTbbComputeController::ApplyLoopVertexVerticesKernelB(
|
||||
assert(context);
|
||||
|
||||
OsdTbbComputeLoopVertexB(
|
||||
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
|
||||
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
|
||||
(const int*)context->GetTable(FarSubdivisionTables::V_IT)->GetBuffer(),
|
||||
(const float*)context->GetTable(FarSubdivisionTables::V_W)->GetBuffer(),
|
||||
@ -185,7 +193,8 @@ OsdTbbComputeController::ApplyLoopVertexVerticesKernelA1(
|
||||
assert(context);
|
||||
|
||||
OsdTbbComputeVertexA(
|
||||
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
|
||||
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
|
||||
(const float*)context->GetTable(FarSubdivisionTables::V_W)->GetBuffer(),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(), false);
|
||||
@ -198,7 +207,8 @@ OsdTbbComputeController::ApplyLoopVertexVerticesKernelA2(
|
||||
assert(context);
|
||||
|
||||
OsdTbbComputeVertexA(
|
||||
_vdesc, _currentVertexBuffer, _currentVaryingBuffer,
|
||||
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
|
||||
_currentBindState.vertexDesc, _currentBindState.varyingDesc,
|
||||
(const int*)context->GetTable(FarSubdivisionTables::V_ITa)->GetBuffer(),
|
||||
(const float*)context->GetTable(FarSubdivisionTables::V_W)->GetBuffer(),
|
||||
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd(), true);
|
||||
@ -217,24 +227,24 @@ OsdTbbComputeController::ApplyVertexEdits(
|
||||
const OsdCpuTable * editValues = edit->GetEditValues();
|
||||
|
||||
if (edit->GetOperation() == FarVertexEdit::Add) {
|
||||
OsdTbbEditVertexAdd(_vdesc,
|
||||
_currentVertexBuffer,
|
||||
OsdTbbEditVertexAdd(_currentBindState.vertexBuffer,
|
||||
_currentBindState.vertexDesc,
|
||||
edit->GetPrimvarOffset(),
|
||||
edit->GetPrimvarWidth(),
|
||||
batch.GetVertexOffset(),
|
||||
batch.GetTableOffset(),
|
||||
batch.GetStart(),
|
||||
batch.GetVertexOffset(),
|
||||
batch.GetTableOffset(),
|
||||
batch.GetStart(),
|
||||
batch.GetEnd(),
|
||||
static_cast<unsigned int*>(primvarIndices->GetBuffer()),
|
||||
static_cast<float*>(editValues->GetBuffer()));
|
||||
} else if (edit->GetOperation() == FarVertexEdit::Set) {
|
||||
OsdTbbEditVertexSet(_vdesc,
|
||||
_currentVertexBuffer,
|
||||
OsdTbbEditVertexSet(_currentBindState.vertexBuffer,
|
||||
_currentBindState.vertexDesc,
|
||||
edit->GetPrimvarOffset(),
|
||||
edit->GetPrimvarWidth(),
|
||||
batch.GetVertexOffset(),
|
||||
batch.GetTableOffset(),
|
||||
batch.GetStart(),
|
||||
batch.GetVertexOffset(),
|
||||
batch.GetTableOffset(),
|
||||
batch.GetStart(),
|
||||
batch.GetEnd(),
|
||||
static_cast<unsigned int*>(primvarIndices->GetBuffer()),
|
||||
static_cast<float*>(editValues->GetBuffer()));
|
||||
|
@ -29,6 +29,7 @@
|
||||
|
||||
#include "../far/dispatcher.h"
|
||||
#include "../osd/cpuComputeContext.h"
|
||||
#include "../osd/vertexDescriptor.h"
|
||||
|
||||
namespace OpenSubdiv {
|
||||
namespace OPENSUBDIV_VERSION {
|
||||
@ -65,13 +66,23 @@ public:
|
||||
///
|
||||
/// @param varyingBuffer varying-interpolated data buffer
|
||||
///
|
||||
/// @param vertexDesc the descriptor of vertex elements to be refined.
|
||||
/// if it's null, all primvars in the vertex buffer
|
||||
/// will be refined.
|
||||
///
|
||||
/// @param varyingDesc the descriptor of varying elements to be refined.
|
||||
/// if it's null, all primvars in the varying buffer
|
||||
/// will be refined.
|
||||
///
|
||||
template<class VERTEX_BUFFER, class VARYING_BUFFER>
|
||||
void Refine(OsdCpuComputeContext const *context,
|
||||
FarKernelBatchVector const & batches,
|
||||
VERTEX_BUFFER * vertexBuffer,
|
||||
VARYING_BUFFER * varyingBuffer) {
|
||||
VARYING_BUFFER * varyingBuffer,
|
||||
OsdVertexBufferDescriptor const *vertexDesc=NULL,
|
||||
OsdVertexBufferDescriptor const *varyingDesc=NULL) {
|
||||
|
||||
bind(vertexBuffer, varyingBuffer);
|
||||
bind(vertexBuffer, varyingBuffer, vertexDesc, varyingDesc);
|
||||
|
||||
FarDispatcher::Refine(this, context, batches, /*maxlevel*/-1);
|
||||
|
||||
@ -128,25 +139,61 @@ protected:
|
||||
|
||||
void ApplyVertexEdits(FarKernelBatch const &batch, ComputeContext const *context) const;
|
||||
|
||||
private:
|
||||
template<class VERTEX_BUFFER, class VARYING_BUFFER>
|
||||
void bind(VERTEX_BUFFER *vertex, VARYING_BUFFER *varying) {
|
||||
void bind(VERTEX_BUFFER *vertex, VARYING_BUFFER *varying,
|
||||
OsdVertexBufferDescriptor const *vertexDesc,
|
||||
OsdVertexBufferDescriptor const *varyingDesc) {
|
||||
|
||||
_currentVertexBuffer = vertex ? vertex->BindCpuBuffer() : 0;
|
||||
_currentVaryingBuffer = varying ? varying->BindCpuBuffer() : 0;
|
||||
// if the vertex buffer descriptor is specified, use it.
|
||||
// otherwise, assumes the data is tightly packed in the vertex buffer.
|
||||
if (vertexDesc) {
|
||||
_currentBindState.vertexDesc = *vertexDesc;
|
||||
} else {
|
||||
int numElements = vertex ? vertex->GetNumElements() : 0;
|
||||
_currentBindState.vertexDesc = OsdVertexBufferDescriptor(
|
||||
0, numElements, numElements);
|
||||
}
|
||||
if (varyingDesc) {
|
||||
_currentBindState.varyingDesc = *varyingDesc;
|
||||
} else {
|
||||
int numElements = varying ? varying->GetNumElements() : 0;
|
||||
_currentBindState.varyingDesc = OsdVertexBufferDescriptor(
|
||||
0, numElements, numElements);
|
||||
}
|
||||
|
||||
int numVertexElements = vertex ? vertex->GetNumElements() : 0;
|
||||
int numVaryingElements = varying ? varying->GetNumElements() : 0;
|
||||
_vdesc.Set(numVertexElements, numVaryingElements);
|
||||
// apply vertex offset here
|
||||
if (vertex) {
|
||||
_currentBindState.vertexBuffer =
|
||||
vertex->BindCpuBuffer() + _currentBindState.vertexDesc.offset;
|
||||
} else {
|
||||
_currentBindState.vertexBuffer = NULL;
|
||||
}
|
||||
if (varying) {
|
||||
_currentBindState.varyingBuffer =
|
||||
varying->BindCpuBuffer() + _currentBindState.varyingDesc.offset;
|
||||
} else {
|
||||
_currentBindState.varyingBuffer = NULL;
|
||||
}
|
||||
}
|
||||
void unbind() {
|
||||
_currentVertexBuffer = 0;
|
||||
_currentVaryingBuffer = 0;
|
||||
_vdesc.Reset();
|
||||
_currentBindState.Reset();
|
||||
}
|
||||
|
||||
float *_currentVertexBuffer, *_currentVaryingBuffer;
|
||||
OsdVertexDescriptor _vdesc;
|
||||
private:
|
||||
struct BindState {
|
||||
BindState() : vertexBuffer(NULL), varyingBuffer(NULL) {}
|
||||
void Reset() {
|
||||
vertexBuffer = varyingBuffer = NULL;
|
||||
vertexDesc.Reset();
|
||||
varyingDesc.Reset();
|
||||
}
|
||||
float *vertexBuffer;
|
||||
float *varyingBuffer;
|
||||
OsdVertexBufferDescriptor vertexDesc;
|
||||
OsdVertexBufferDescriptor varyingDesc;
|
||||
};
|
||||
|
||||
BindState _currentBindState;
|
||||
int _numThreads;
|
||||
};
|
||||
|
||||
|
@ -34,10 +34,33 @@ namespace OPENSUBDIV_VERSION {
|
||||
|
||||
#define grain_size 200
|
||||
|
||||
static inline void
|
||||
clear(float *origin, int index, OsdVertexBufferDescriptor const &desc) {
|
||||
|
||||
if (origin) {
|
||||
float *dst = origin + index * desc.stride;
|
||||
memset(dst, 0, desc.length * sizeof(float));
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
addWithWeight(float *origin, int dstIndex, int srcIndex,
|
||||
float weight, OsdVertexBufferDescriptor const &desc) {
|
||||
|
||||
if (origin) {
|
||||
const float *src = origin + srcIndex * desc.stride;
|
||||
float *dst = origin + dstIndex * desc.stride;
|
||||
for (int k = 0; k < desc.length; ++k) {
|
||||
dst[k] += src[k] * weight;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
class TBBFaceKernel {
|
||||
OsdVertexDescriptor const *vdesc;
|
||||
float *vertex;
|
||||
float *varying;
|
||||
OsdVertexBufferDescriptor vertexDesc;
|
||||
OsdVertexBufferDescriptor varyingDesc;
|
||||
int const *F_IT;
|
||||
int const *F_ITa;
|
||||
int vertexOffset;
|
||||
@ -45,10 +68,10 @@ class TBBFaceKernel {
|
||||
|
||||
public:
|
||||
void operator() (tbb::blocked_range<int> const &r) const {
|
||||
if(vdesc->numVertexElements == 4 && varying == NULL) {
|
||||
if(vertexDesc.length == 4 && varying == NULL) {
|
||||
ComputeFaceKernel<4>
|
||||
(vertex, F_IT, F_ITa, vertexOffset, tableOffset, r.begin(), r.end());
|
||||
} else if(vdesc->numVertexElements == 8 && varying == NULL) {
|
||||
} else if(vertexDesc.length == 8 && varying == NULL) {
|
||||
ComputeFaceKernel<8>
|
||||
(vertex, F_IT, F_ITa, vertexOffset, tableOffset, r.begin(), r.end());
|
||||
}
|
||||
@ -62,12 +85,14 @@ public:
|
||||
// XXX: should use local vertex struct variable instead of
|
||||
// accumulating directly into global memory.
|
||||
int dstIndex = i + vertexOffset - tableOffset;
|
||||
vdesc->Clear(vertex, varying, dstIndex);
|
||||
|
||||
clear(vertex, dstIndex, vertexDesc);
|
||||
clear(varying, dstIndex, varyingDesc);
|
||||
|
||||
for (int j = 0; j < n; ++j) {
|
||||
int index = F_IT[h+j];
|
||||
vdesc->AddWithWeight(vertex, dstIndex, index, weight);
|
||||
vdesc->AddVaryingWithWeight(varying, dstIndex, index, weight);
|
||||
addWithWeight(vertex, dstIndex, index, weight, vertexDesc);
|
||||
addWithWeight(varying, dstIndex, index, weight, varyingDesc);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -75,25 +100,28 @@ public:
|
||||
|
||||
TBBFaceKernel(TBBFaceKernel const &other)
|
||||
{
|
||||
this->vdesc = other.vdesc;
|
||||
this->vertex = other.vertex;
|
||||
this->varying= other.varying;
|
||||
this->vertexDesc = other.vertexDesc;
|
||||
this->varyingDesc = other.varyingDesc;
|
||||
this->F_IT = other.F_IT;
|
||||
this->F_ITa = other.F_ITa;
|
||||
this->vertexOffset = other.vertexOffset;
|
||||
this->tableOffset = other.tableOffset;
|
||||
}
|
||||
|
||||
TBBFaceKernel(OsdVertexDescriptor const *vdesc_in,
|
||||
float *vertex_in,
|
||||
TBBFaceKernel(float *vertex_in,
|
||||
float *varying_in,
|
||||
OsdVertexBufferDescriptor const &vertexDesc_in,
|
||||
OsdVertexBufferDescriptor const &varyingDesc_in,
|
||||
int const *F_IT_in,
|
||||
int const *F_ITa_in,
|
||||
int vertexOffset_in,
|
||||
int tableOffset_in) :
|
||||
vdesc (vdesc_in),
|
||||
vertex (vertex_in),
|
||||
varying(varying_in),
|
||||
vertexDesc(vertexDesc_in),
|
||||
varyingDesc(varyingDesc_in),
|
||||
F_IT (F_IT_in),
|
||||
F_ITa (F_ITa_in),
|
||||
vertexOffset(vertexOffset_in),
|
||||
@ -102,20 +130,23 @@ public:
|
||||
};
|
||||
|
||||
void OsdTbbComputeFace(
|
||||
OsdVertexDescriptor const &vdesc, float * vertex, float * varying,
|
||||
float * vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
int const *F_IT, int const *F_ITa, int vertexOffset, int tableOffset,
|
||||
int start, int end) {
|
||||
|
||||
TBBFaceKernel kernel(&vdesc, vertex, varying, F_IT, F_ITa,
|
||||
TBBFaceKernel kernel(vertex, varying, vertexDesc, varyingDesc, F_IT, F_ITa,
|
||||
vertexOffset, tableOffset);
|
||||
tbb::blocked_range<int> range(start, end, grain_size);
|
||||
tbb::parallel_for(range, kernel);
|
||||
}
|
||||
|
||||
class TBBEdgeKernel {
|
||||
OsdVertexDescriptor const *vdesc;
|
||||
float *vertex;
|
||||
float *varying;
|
||||
OsdVertexBufferDescriptor vertexDesc;
|
||||
OsdVertexBufferDescriptor varyingDesc;
|
||||
int const *E_IT;
|
||||
float const *E_W;
|
||||
int vertexOffset;
|
||||
@ -123,11 +154,11 @@ class TBBEdgeKernel {
|
||||
|
||||
public:
|
||||
void operator() (tbb::blocked_range<int> const &r) const {
|
||||
if(vdesc->numVertexElements == 4 && varying == NULL) {
|
||||
if(vertexDesc.length == 4 && varying == NULL) {
|
||||
ComputeEdgeKernel<4>(vertex, E_IT, E_W, vertexOffset, tableOffset,
|
||||
r.begin(), r.end());
|
||||
}
|
||||
else if(vdesc->numVertexElements == 8 && varying == NULL) {
|
||||
else if(vertexDesc.length == 8 && varying == NULL) {
|
||||
ComputeEdgeKernel<8>(vertex, E_IT, E_W, vertexOffset, tableOffset,
|
||||
r.begin(), r.end());
|
||||
}
|
||||
@ -141,45 +172,49 @@ public:
|
||||
float vertWeight = E_W[i*2+0];
|
||||
|
||||
int dstIndex = i + vertexOffset - tableOffset;
|
||||
vdesc->Clear(vertex, varying, dstIndex);
|
||||
clear(vertex, dstIndex, vertexDesc);
|
||||
clear(varying, dstIndex, varyingDesc);
|
||||
|
||||
vdesc->AddWithWeight(vertex, dstIndex, eidx0, vertWeight);
|
||||
vdesc->AddWithWeight(vertex, dstIndex, eidx1, vertWeight);
|
||||
addWithWeight(vertex, dstIndex, eidx0, vertWeight, vertexDesc);
|
||||
addWithWeight(vertex, dstIndex, eidx1, vertWeight, vertexDesc);
|
||||
|
||||
if (eidx2 != -1) {
|
||||
float faceWeight = E_W[i*2+1];
|
||||
|
||||
vdesc->AddWithWeight(vertex, dstIndex, eidx2, faceWeight);
|
||||
vdesc->AddWithWeight(vertex, dstIndex, eidx3, faceWeight);
|
||||
addWithWeight(vertex, dstIndex, eidx2, faceWeight, vertexDesc);
|
||||
addWithWeight(vertex, dstIndex, eidx3, faceWeight, vertexDesc);
|
||||
}
|
||||
|
||||
vdesc->AddVaryingWithWeight(varying, dstIndex, eidx0, 0.5f);
|
||||
vdesc->AddVaryingWithWeight(varying, dstIndex, eidx1, 0.5f);
|
||||
addWithWeight(varying, dstIndex, eidx0, 0.5f, varyingDesc);
|
||||
addWithWeight(varying, dstIndex, eidx1, 0.5f, varyingDesc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TBBEdgeKernel(TBBEdgeKernel const &other)
|
||||
{
|
||||
this->vdesc = other.vdesc;
|
||||
this->vertex = other.vertex;
|
||||
this->varying= other.varying;
|
||||
this->vertexDesc = other.vertexDesc;
|
||||
this->varyingDesc = other.varyingDesc;
|
||||
this->E_IT = other.E_IT;
|
||||
this->E_W = other.E_W;
|
||||
this->vertexOffset = other.vertexOffset;
|
||||
this->tableOffset = other.tableOffset;
|
||||
}
|
||||
|
||||
TBBEdgeKernel(OsdVertexDescriptor const *vdesc_in,
|
||||
float *vertex_in,
|
||||
TBBEdgeKernel(float *vertex_in,
|
||||
float *varying_in,
|
||||
OsdVertexBufferDescriptor const &vertexDesc_in,
|
||||
OsdVertexBufferDescriptor const &varyingDesc_in,
|
||||
int const *E_IT_in,
|
||||
float const *E_W_in,
|
||||
int vertexOffset_in,
|
||||
int tableOffset_in) :
|
||||
vdesc (vdesc_in),
|
||||
vertex (vertex_in),
|
||||
varying(varying_in),
|
||||
vertexDesc(vertexDesc_in),
|
||||
varyingDesc(varyingDesc_in),
|
||||
E_IT (E_IT_in),
|
||||
E_W (E_W_in),
|
||||
vertexOffset(vertexOffset_in),
|
||||
@ -189,19 +224,22 @@ public:
|
||||
|
||||
|
||||
void OsdTbbComputeEdge(
|
||||
OsdVertexDescriptor const &vdesc, float *vertex, float *varying,
|
||||
float *vertex, float *varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
int const *E_IT, float const *E_W, int vertexOffset, int tableOffset,
|
||||
int start, int end) {
|
||||
tbb::blocked_range<int> range(start, end, grain_size);
|
||||
TBBEdgeKernel kernel(&vdesc, vertex, varying, E_IT, E_W,
|
||||
TBBEdgeKernel kernel(vertex, varying, vertexDesc, varyingDesc, E_IT, E_W,
|
||||
vertexOffset, tableOffset);
|
||||
tbb::parallel_for(range, kernel);
|
||||
}
|
||||
|
||||
class TBBVertexKernelA {
|
||||
OsdVertexDescriptor const *vdesc;
|
||||
float *vertex;
|
||||
float *varying;
|
||||
OsdVertexBufferDescriptor vertexDesc;
|
||||
OsdVertexBufferDescriptor varyingDesc;
|
||||
int const *V_ITa;
|
||||
float const *V_W;
|
||||
int vertexOffset;
|
||||
@ -210,11 +248,11 @@ class TBBVertexKernelA {
|
||||
|
||||
public:
|
||||
void operator() (tbb::blocked_range<int> const &r) const {
|
||||
if(vdesc->numVertexElements == 4 && varying == NULL) {
|
||||
if(vertexDesc.length == 4 && varying == NULL) {
|
||||
ComputeVertexAKernel<4>(vertex, V_ITa, V_W, vertexOffset, tableOffset,
|
||||
r.begin(), r.end(), pass);
|
||||
}
|
||||
else if (vdesc->numVertexElements == 8 && varying == NULL) {
|
||||
else if (vertexDesc.length == 8 && varying == NULL) {
|
||||
ComputeVertexAKernel<8>(vertex, V_ITa, V_W, vertexOffset, tableOffset,
|
||||
r.begin(), r.end(), pass);
|
||||
}
|
||||
@ -235,28 +273,31 @@ public:
|
||||
|
||||
int dstIndex = i + vertexOffset - tableOffset;
|
||||
|
||||
if (not pass)
|
||||
vdesc->Clear(vertex, varying, dstIndex);
|
||||
if (not pass) {
|
||||
clear(vertex, dstIndex, vertexDesc);
|
||||
clear(varying, dstIndex, varyingDesc);
|
||||
}
|
||||
|
||||
if (eidx0 == -1 || (pass == 0 && (n == -1))) {
|
||||
vdesc->AddWithWeight(vertex, dstIndex, p, weight);
|
||||
addWithWeight(vertex, dstIndex, p, weight, vertexDesc);
|
||||
} else {
|
||||
vdesc->AddWithWeight(vertex, dstIndex, p, weight * 0.75f);
|
||||
vdesc->AddWithWeight(vertex, dstIndex, eidx0, weight * 0.125f);
|
||||
vdesc->AddWithWeight(vertex, dstIndex, eidx1, weight * 0.125f);
|
||||
addWithWeight(vertex, dstIndex, p, weight * 0.75f, vertexDesc);
|
||||
addWithWeight(vertex, dstIndex, eidx0, weight * 0.125f, vertexDesc);
|
||||
addWithWeight(vertex, dstIndex, eidx1, weight * 0.125f, vertexDesc);
|
||||
}
|
||||
|
||||
if (not pass)
|
||||
vdesc->AddVaryingWithWeight(varying, dstIndex, p, 1.0f);
|
||||
addWithWeight(varying, dstIndex, p, 1.0f, varyingDesc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TBBVertexKernelA(TBBVertexKernelA const &other)
|
||||
{
|
||||
this->vdesc = other.vdesc;
|
||||
this->vertex = other.vertex;
|
||||
this->varying= other.varying;
|
||||
this->vertexDesc = other.vertexDesc;
|
||||
this->varyingDesc = other.varyingDesc;
|
||||
this->V_ITa = other.V_ITa;
|
||||
this->V_W = other.V_W;
|
||||
this->vertexOffset = other.vertexOffset;
|
||||
@ -264,17 +305,19 @@ public:
|
||||
this->pass = other.pass;
|
||||
}
|
||||
|
||||
TBBVertexKernelA(OsdVertexDescriptor const *vdesc_in,
|
||||
float *vertex_in,
|
||||
TBBVertexKernelA(float *vertex_in,
|
||||
float *varying_in,
|
||||
OsdVertexBufferDescriptor const &vertexDesc_in,
|
||||
OsdVertexBufferDescriptor const &varyingDesc_in,
|
||||
int const *V_ITa_in,
|
||||
float const *V_W_in,
|
||||
int vertexOffset_in,
|
||||
int tableOffset_in,
|
||||
int pass_in) :
|
||||
vdesc (vdesc_in),
|
||||
vertex (vertex_in),
|
||||
varying(varying_in),
|
||||
vertexDesc(vertexDesc_in),
|
||||
varyingDesc(varyingDesc_in),
|
||||
V_ITa (V_ITa_in),
|
||||
V_W (V_W_in),
|
||||
vertexOffset(vertexOffset_in),
|
||||
@ -284,19 +327,23 @@ public:
|
||||
};
|
||||
|
||||
void OsdTbbComputeVertexA(
|
||||
OsdVertexDescriptor const &vdesc, float *vertex, float *varying,
|
||||
float *vertex, float *varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
int const *V_ITa, float const *V_W, int vertexOffset, int tableOffset,
|
||||
int start, int end, int pass) {
|
||||
tbb::blocked_range<int> range(start, end, grain_size);
|
||||
TBBVertexKernelA kernel(&vdesc, vertex, varying, V_ITa, V_W,
|
||||
TBBVertexKernelA kernel(vertex, varying, vertexDesc, varyingDesc,
|
||||
V_ITa, V_W,
|
||||
vertexOffset, tableOffset, pass);
|
||||
tbb::parallel_for(range, kernel);
|
||||
}
|
||||
|
||||
class TBBVertexKernelB {
|
||||
OsdVertexDescriptor const *vdesc;
|
||||
float *vertex;
|
||||
float *varying;
|
||||
OsdVertexBufferDescriptor vertexDesc;
|
||||
OsdVertexBufferDescriptor varyingDesc;
|
||||
int const *V_ITa;
|
||||
int const *V_IT;
|
||||
float const *V_W;
|
||||
@ -305,11 +352,11 @@ class TBBVertexKernelB {
|
||||
|
||||
public:
|
||||
void operator() (tbb::blocked_range<int> const &r) const {
|
||||
if(vdesc->numVertexElements == 4 && varying == NULL) {
|
||||
if(vertexDesc.length == 4 && varying == NULL) {
|
||||
ComputeVertexBKernel<4>(vertex, V_ITa, V_IT, V_W,
|
||||
vertexOffset, tableOffset, r.begin(), r.end());
|
||||
}
|
||||
else if(vdesc->numVertexElements == 8 && varying == NULL) {
|
||||
else if(vertexDesc.length == 8 && varying == NULL) {
|
||||
ComputeVertexBKernel<8>(vertex, V_ITa, V_IT, V_W,
|
||||
vertexOffset, tableOffset, r.begin(), r.end());
|
||||
}
|
||||
@ -324,24 +371,26 @@ public:
|
||||
float wv = (n-2.0f) * n * wp;
|
||||
|
||||
int dstIndex = i + vertexOffset - tableOffset;
|
||||
vdesc->Clear(vertex, varying, dstIndex);
|
||||
clear(vertex, dstIndex, vertexDesc);
|
||||
clear(varying, dstIndex, varyingDesc);
|
||||
|
||||
vdesc->AddWithWeight(vertex, dstIndex, p, weight * wv);
|
||||
addWithWeight(vertex, dstIndex, p, weight * wv, vertexDesc);
|
||||
|
||||
for (int j = 0; j < n; ++j) {
|
||||
vdesc->AddWithWeight(vertex, dstIndex, V_IT[h+j*2], weight * wp);
|
||||
vdesc->AddWithWeight(vertex, dstIndex, V_IT[h+j*2+1], weight * wp);
|
||||
addWithWeight(vertex, dstIndex, V_IT[h+j*2], weight * wp, vertexDesc);
|
||||
addWithWeight(vertex, dstIndex, V_IT[h+j*2+1], weight * wp, vertexDesc);
|
||||
}
|
||||
vdesc->AddVaryingWithWeight(varying, dstIndex, p, 1.0f);
|
||||
addWithWeight(varying, dstIndex, p, 1.0f, varyingDesc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TBBVertexKernelB(TBBVertexKernelB const &other)
|
||||
{
|
||||
this->vdesc = other.vdesc;
|
||||
this->vertex = other.vertex;
|
||||
this->varying= other.varying;
|
||||
this->vertexDesc = other.vertexDesc;
|
||||
this->varyingDesc = other.varyingDesc;
|
||||
this->V_ITa = other.V_ITa;
|
||||
this->V_IT = other.V_IT;
|
||||
this->V_W = other.V_W;
|
||||
@ -349,17 +398,19 @@ public:
|
||||
this->tableOffset = other.tableOffset;
|
||||
}
|
||||
|
||||
TBBVertexKernelB(OsdVertexDescriptor const *vdesc_in,
|
||||
float *vertex_in,
|
||||
TBBVertexKernelB(float *vertex_in,
|
||||
float *varying_in,
|
||||
OsdVertexBufferDescriptor const &vertexDesc_in,
|
||||
OsdVertexBufferDescriptor const &varyingDesc_in,
|
||||
int const *V_ITa_in,
|
||||
int const *V_IT_in,
|
||||
float const *V_W_in,
|
||||
int vertexOffset_in,
|
||||
int tableOffset_in) :
|
||||
vdesc (vdesc_in),
|
||||
vertex (vertex_in),
|
||||
varying(varying_in),
|
||||
vertexDesc(vertexDesc_in),
|
||||
varyingDesc(varyingDesc_in),
|
||||
V_ITa (V_ITa_in),
|
||||
V_IT (V_IT_in),
|
||||
V_W (V_W_in),
|
||||
@ -369,20 +420,24 @@ public:
|
||||
};
|
||||
|
||||
void OsdTbbComputeVertexB(
|
||||
OsdVertexDescriptor const &vdesc, float *vertex, float *varying,
|
||||
float *vertex, float *varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
int const *V_ITa, int const *V_IT, float const *V_W,
|
||||
int vertexOffset, int tableOffset, int start, int end) {
|
||||
|
||||
tbb::blocked_range<int> range(start, end, grain_size);
|
||||
TBBVertexKernelB kernel(&vdesc, vertex, varying, V_ITa, V_IT, V_W,
|
||||
TBBVertexKernelB kernel(vertex, varying, vertexDesc, varyingDesc,
|
||||
V_ITa, V_IT, V_W,
|
||||
vertexOffset, tableOffset);
|
||||
tbb::parallel_for(range, kernel);
|
||||
}
|
||||
|
||||
class TBBLoopVertexKernelB {
|
||||
OsdVertexDescriptor const *vdesc;
|
||||
float *vertex;
|
||||
float *varying;
|
||||
OsdVertexBufferDescriptor vertexDesc;
|
||||
OsdVertexBufferDescriptor varyingDesc;
|
||||
int const *V_ITa;
|
||||
int const *V_IT;
|
||||
float const *V_W;
|
||||
@ -391,11 +446,11 @@ class TBBLoopVertexKernelB {
|
||||
|
||||
public:
|
||||
void operator() (tbb::blocked_range<int> const &r) const {
|
||||
if(vdesc->numVertexElements == 4 && varying == NULL) {
|
||||
if(vertexDesc.length == 4 && varying == NULL) {
|
||||
ComputeLoopVertexBKernel<4>(vertex, V_ITa, V_IT, V_W, vertexOffset,
|
||||
tableOffset, r.begin(), r.end());
|
||||
}
|
||||
else if(vdesc->numVertexElements == 8 && varying == NULL) {
|
||||
else if(vertexDesc.length == 8 && varying == NULL) {
|
||||
ComputeLoopVertexBKernel<8>(vertex, V_ITa, V_IT, V_W, vertexOffset,
|
||||
tableOffset, r.begin(), r.end());
|
||||
}
|
||||
@ -412,23 +467,25 @@ public:
|
||||
beta = (0.625f - beta) * wp;
|
||||
|
||||
int dstIndex = i + vertexOffset - tableOffset;
|
||||
vdesc->Clear(vertex, varying, dstIndex);
|
||||
clear(vertex, dstIndex, vertexDesc);
|
||||
clear(varying, dstIndex, varyingDesc);
|
||||
|
||||
vdesc->AddWithWeight(vertex, dstIndex, p, weight * (1.0f - (beta * n)));
|
||||
addWithWeight(vertex, dstIndex, p, weight * (1.0f - (beta * n)), vertexDesc);
|
||||
|
||||
for (int j = 0; j < n; ++j)
|
||||
vdesc->AddWithWeight(vertex, dstIndex, V_IT[h+j], weight * beta);
|
||||
addWithWeight(vertex, dstIndex, V_IT[h+j], weight * beta, vertexDesc);
|
||||
|
||||
vdesc->AddVaryingWithWeight(varying, dstIndex, p, 1.0f);
|
||||
addWithWeight(varying, dstIndex, p, 1.0f, varyingDesc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TBBLoopVertexKernelB(TBBLoopVertexKernelB const &other)
|
||||
{
|
||||
this->vdesc = other.vdesc;
|
||||
this->vertex = other.vertex;
|
||||
this->varying= other.varying;
|
||||
this->vertexDesc = other.vertexDesc;
|
||||
this->varyingDesc = other.varyingDesc;
|
||||
this->V_ITa = other.V_ITa;
|
||||
this->V_IT = other.V_IT;
|
||||
this->V_W = other.V_W;
|
||||
@ -436,17 +493,19 @@ public:
|
||||
this->tableOffset = other.tableOffset;
|
||||
}
|
||||
|
||||
TBBLoopVertexKernelB(OsdVertexDescriptor const *vdesc_in,
|
||||
float *vertex_in,
|
||||
TBBLoopVertexKernelB(float *vertex_in,
|
||||
float *varying_in,
|
||||
OsdVertexBufferDescriptor const &vertexDesc_in,
|
||||
OsdVertexBufferDescriptor const &varyingDesc_in,
|
||||
int const *V_ITa_in,
|
||||
int const *V_IT_in,
|
||||
float const *V_W_in,
|
||||
int vertexOffset_in,
|
||||
int tableOffset_in) :
|
||||
vdesc (vdesc_in),
|
||||
vertex (vertex_in),
|
||||
varying(varying_in),
|
||||
vertexDesc(vertexDesc_in),
|
||||
varyingDesc(varyingDesc_in),
|
||||
V_ITa (V_ITa_in),
|
||||
V_IT (V_IT_in),
|
||||
V_W (V_W_in),
|
||||
@ -456,31 +515,35 @@ public:
|
||||
};
|
||||
|
||||
void OsdTbbComputeLoopVertexB(
|
||||
OsdVertexDescriptor const &vdesc, float *vertex, float *varying,
|
||||
float *vertex, float *varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
int const *V_ITa, int const *V_IT, float const *V_W,
|
||||
int vertexOffset, int tableOffset, int start, int end) {
|
||||
tbb::blocked_range<int> range(start, end, grain_size);
|
||||
TBBLoopVertexKernelB kernel(&vdesc, vertex, varying, V_ITa, V_IT, V_W,
|
||||
TBBLoopVertexKernelB kernel(vertex, varying, vertexDesc, varyingDesc,
|
||||
V_ITa, V_IT, V_W,
|
||||
vertexOffset, tableOffset);
|
||||
|
||||
tbb::parallel_for(range, kernel);
|
||||
}
|
||||
|
||||
class TBBBilinearEdgeKernel {
|
||||
OsdVertexDescriptor const *vdesc;
|
||||
float *vertex;
|
||||
float *varying;
|
||||
OsdVertexBufferDescriptor vertexDesc;
|
||||
OsdVertexBufferDescriptor varyingDesc;
|
||||
int const *E_IT;
|
||||
int vertexOffset;
|
||||
int tableOffset;
|
||||
|
||||
public:
|
||||
void operator() (tbb::blocked_range<int> const &r) const {
|
||||
if(vdesc->numVertexElements == 4 && varying == NULL) {
|
||||
if(vertexDesc.length == 4 && varying == NULL) {
|
||||
ComputeBilinearEdgeKernel<4>(vertex, E_IT, vertexOffset, tableOffset,
|
||||
r.begin(), r.end());
|
||||
}
|
||||
else if(vdesc->numVertexElements == 8 && varying == NULL) {
|
||||
else if(vertexDesc.length == 8 && varying == NULL) {
|
||||
ComputeBilinearEdgeKernel<8>(vertex, E_IT, vertexOffset, tableOffset,
|
||||
r.begin(), r.end());
|
||||
}
|
||||
@ -490,36 +553,40 @@ public:
|
||||
int eidx1 = E_IT[2*i+1];
|
||||
|
||||
int dstIndex = i + vertexOffset - tableOffset;
|
||||
vdesc->Clear(vertex, varying, dstIndex);
|
||||
clear(vertex, dstIndex, vertexDesc);
|
||||
clear(varying, dstIndex, varyingDesc);
|
||||
|
||||
vdesc->AddWithWeight(vertex, dstIndex, eidx0, 0.5f);
|
||||
vdesc->AddWithWeight(vertex, dstIndex, eidx1, 0.5f);
|
||||
addWithWeight(vertex, dstIndex, eidx0, 0.5f, vertexDesc);
|
||||
addWithWeight(vertex, dstIndex, eidx1, 0.5f, vertexDesc);
|
||||
|
||||
vdesc->AddVaryingWithWeight(varying, dstIndex, eidx0, 0.5f);
|
||||
vdesc->AddVaryingWithWeight(varying, dstIndex, eidx1, 0.5f);
|
||||
addWithWeight(varying, dstIndex, eidx0, 0.5f, varyingDesc);
|
||||
addWithWeight(varying, dstIndex, eidx1, 0.5f, varyingDesc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TBBBilinearEdgeKernel(TBBBilinearEdgeKernel const &other)
|
||||
{
|
||||
this->vdesc = other.vdesc;
|
||||
this->vertex = other.vertex;
|
||||
this->varying= other.varying;
|
||||
this->vertexDesc = other.vertexDesc;
|
||||
this->varyingDesc = other.varyingDesc;
|
||||
this->E_IT = other.E_IT;
|
||||
this->vertexOffset = other.vertexOffset;
|
||||
this->tableOffset = other.tableOffset;
|
||||
}
|
||||
|
||||
TBBBilinearEdgeKernel(OsdVertexDescriptor const *vdesc_in,
|
||||
float *vertex_in,
|
||||
TBBBilinearEdgeKernel(float *vertex_in,
|
||||
float *varying_in,
|
||||
OsdVertexBufferDescriptor const &vertexDesc_in,
|
||||
OsdVertexBufferDescriptor const &varyingDesc_in,
|
||||
int const *E_IT_in,
|
||||
int vertexOffset_in,
|
||||
int tableOffset_in) :
|
||||
vdesc (vdesc_in),
|
||||
vertex (vertex_in),
|
||||
varying(varying_in),
|
||||
vertexDesc(vertexDesc_in),
|
||||
varyingDesc(varyingDesc_in),
|
||||
E_IT (E_IT_in),
|
||||
vertexOffset(vertexOffset_in),
|
||||
tableOffset(tableOffset_in)
|
||||
@ -527,25 +594,29 @@ public:
|
||||
};
|
||||
|
||||
void OsdTbbComputeBilinearEdge(
|
||||
OsdVertexDescriptor const &vdesc, float *vertex, float *varying,
|
||||
float *vertex, float *varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
int const *E_IT, int vertexOffset, int tableOffset, int start, int end) {
|
||||
tbb::blocked_range<int> range(start, end, grain_size);
|
||||
TBBBilinearEdgeKernel kernel(&vdesc, vertex, varying, E_IT, vertexOffset, tableOffset);
|
||||
TBBBilinearEdgeKernel kernel(vertex, varying, vertexDesc, varyingDesc,
|
||||
E_IT, vertexOffset, tableOffset);
|
||||
tbb::parallel_for(range, kernel);
|
||||
}
|
||||
|
||||
class TBBBilinearVertexKernel {
|
||||
OsdVertexDescriptor const *vdesc;
|
||||
float *vertex;
|
||||
float *varying;
|
||||
OsdVertexBufferDescriptor vertexDesc;
|
||||
OsdVertexBufferDescriptor varyingDesc;
|
||||
int const *V_ITa;
|
||||
int vertexOffset;
|
||||
int tableOffset;
|
||||
|
||||
public:
|
||||
void operator() (tbb::blocked_range<int> const &r) const {
|
||||
int numVertexElements = vdesc->numVertexElements;
|
||||
int numVaryingElements = vdesc->numVaryingElements;
|
||||
int numVertexElements = vertexDesc.length;
|
||||
int numVaryingElements = varyingDesc.length;
|
||||
float *src, *des;
|
||||
for (int i = r.begin() + tableOffset; i < r.end() + tableOffset; i++) {
|
||||
int p = V_ITa[i];
|
||||
@ -564,23 +635,26 @@ public:
|
||||
|
||||
TBBBilinearVertexKernel(TBBBilinearVertexKernel const &other)
|
||||
{
|
||||
this->vdesc = other.vdesc;
|
||||
this->vertex = other.vertex;
|
||||
this->varying= other.varying;
|
||||
this->vertexDesc = other.vertexDesc;
|
||||
this->varyingDesc = other.varyingDesc;
|
||||
this->V_ITa = other.V_ITa;
|
||||
this->vertexOffset = other.vertexOffset;
|
||||
this->tableOffset = other.tableOffset;
|
||||
}
|
||||
|
||||
TBBBilinearVertexKernel(OsdVertexDescriptor const *vdesc_in,
|
||||
float *vertex_in,
|
||||
TBBBilinearVertexKernel(float *vertex_in,
|
||||
float *varying_in,
|
||||
OsdVertexBufferDescriptor const &vertexDesc_in,
|
||||
OsdVertexBufferDescriptor const &varyingDesc_in,
|
||||
int const *V_ITa_in,
|
||||
int vertexOffset_in,
|
||||
int tableOffset_in) :
|
||||
vdesc (vdesc_in),
|
||||
vertex (vertex_in),
|
||||
varying(varying_in),
|
||||
vertexDesc(vertexDesc_in),
|
||||
varyingDesc(varyingDesc_in),
|
||||
V_ITa (V_ITa_in),
|
||||
vertexOffset(vertexOffset_in),
|
||||
tableOffset(tableOffset_in)
|
||||
@ -588,40 +662,53 @@ public:
|
||||
};
|
||||
|
||||
void OsdTbbComputeBilinearVertex(
|
||||
OsdVertexDescriptor const &vdesc, float *vertex, float *varying,
|
||||
float *vertex, float *varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
int const *V_ITa, int vertexOffset, int tableOffset, int start, int end) {
|
||||
tbb::blocked_range<int> range(start, end, grain_size);
|
||||
TBBBilinearVertexKernel kernel(&vdesc, vertex, varying, V_ITa, vertexOffset, tableOffset);
|
||||
TBBBilinearVertexKernel kernel(vertex, varying, vertexDesc, varyingDesc,
|
||||
V_ITa, vertexOffset, tableOffset);
|
||||
tbb::parallel_for(range, kernel);
|
||||
}
|
||||
|
||||
void OsdTbbEditVertexAdd(
|
||||
OsdVertexDescriptor const &vdesc, float *vertex,
|
||||
float *vertex,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
int primVarOffset, int primVarWidth, int vertexOffset, int tableOffset,
|
||||
int start, int end,
|
||||
unsigned int const *editIndices, float const *editValues) {
|
||||
|
||||
for (int i = start+tableOffset; i < end+tableOffset; i++) {
|
||||
vdesc.ApplyVertexEditAdd(vertex,
|
||||
primVarOffset,
|
||||
primVarWidth,
|
||||
editIndices[i] + vertexOffset,
|
||||
&editValues[i*primVarWidth]);
|
||||
|
||||
if (vertex) {
|
||||
int editIndex = editIndices[i] + vertexOffset;
|
||||
float *dst = vertex + editIndex * vertexDesc.stride + primVarOffset;
|
||||
|
||||
for (int i = 0; i < primVarWidth; ++i) {
|
||||
dst[i] += editValues[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void OsdTbbEditVertexSet(
|
||||
OsdVertexDescriptor const &vdesc, float *vertex,
|
||||
float *vertex,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
int primVarOffset, int primVarWidth, int vertexOffset, int tableOffset,
|
||||
int start, int end,
|
||||
unsigned int const *editIndices, float const *editValues) {
|
||||
|
||||
for (int i = start+tableOffset; i < end+tableOffset; i++) {
|
||||
vdesc.ApplyVertexEditSet(vertex,
|
||||
primVarOffset,
|
||||
primVarWidth,
|
||||
editIndices[i] + vertexOffset,
|
||||
&editValues[i*primVarWidth]);
|
||||
|
||||
if (vertex) {
|
||||
int editIndex = editIndices[i] + vertexOffset;
|
||||
float *dst = vertex + editIndex * vertexDesc.stride + primVarOffset;
|
||||
|
||||
for (int i = 0; i < primVarWidth; ++i) {
|
||||
dst[i] = editValues[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -30,59 +30,68 @@
|
||||
namespace OpenSubdiv {
|
||||
namespace OPENSUBDIV_VERSION {
|
||||
|
||||
struct OsdVertexDescriptor;
|
||||
struct OsdVertexBufferDescriptor;
|
||||
|
||||
void OsdTbbComputeFace(OsdVertexDescriptor const &vdesc,
|
||||
float * vertex, float * varying,
|
||||
void OsdTbbComputeFace(float * vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
int const *F_IT, int const *F_ITa,
|
||||
int vertexOffset, int tableOffset,
|
||||
int start, int end);
|
||||
|
||||
void OsdTbbComputeEdge(OsdVertexDescriptor const &vdesc,
|
||||
float *vertex, float * varying,
|
||||
void OsdTbbComputeEdge(float *vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
int const *E_IT, float const *E_ITa,
|
||||
int vertexOffset, int tableOffset,
|
||||
int start, int end);
|
||||
|
||||
void OsdTbbComputeVertexA(OsdVertexDescriptor const &vdesc,
|
||||
float *vertex, float * varying,
|
||||
void OsdTbbComputeVertexA(float *vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
int const *V_ITa, float const *V_IT,
|
||||
int vertexOffset, int tableOffset,
|
||||
int start, int end, int pass);
|
||||
|
||||
void OsdTbbComputeVertexB(OsdVertexDescriptor const &vdesc,
|
||||
float *vertex, float * varying,
|
||||
void OsdTbbComputeVertexB(float *vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
int const *V_ITa, int const *V_IT, float const *V_W,
|
||||
int vertexOffset, int tableOffset,
|
||||
int start, int end);
|
||||
|
||||
void OsdTbbComputeLoopVertexB(OsdVertexDescriptor const &vdesc,
|
||||
float *vertex, float * varying,
|
||||
void OsdTbbComputeLoopVertexB(float *vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
int const *V_ITa, int const *V_IT,
|
||||
float const *V_W,
|
||||
int vertexOffset, int tableOffset,
|
||||
int start, int end);
|
||||
|
||||
void OsdTbbComputeBilinearEdge(OsdVertexDescriptor const &vdesc,
|
||||
float *vertex, float * varying,
|
||||
void OsdTbbComputeBilinearEdge(float *vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
int const *E_IT,
|
||||
int vertexOffset, int tableOffset,
|
||||
int start, int end);
|
||||
|
||||
void OsdTbbComputeBilinearVertex(OsdVertexDescriptor const &vdesc,
|
||||
float *vertex, float * varying,
|
||||
void OsdTbbComputeBilinearVertex(float *vertex, float * varying,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
OsdVertexBufferDescriptor const &varyingDesc,
|
||||
int const *V_ITa,
|
||||
int vertexOffset, int tableOffset,
|
||||
int start, int end);
|
||||
|
||||
void OsdTbbEditVertexAdd(OsdVertexDescriptor const &vdesc, float *vertex,
|
||||
void OsdTbbEditVertexAdd(float *vertex,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
int primVarOffset, int primVarWidth,
|
||||
int vertexOffset, int tableOffset,
|
||||
int start, int end,
|
||||
unsigned int const *editIndices,
|
||||
float const *editValues);
|
||||
|
||||
void OsdTbbEditVertexSet(OsdVertexDescriptor const &vdesc, float *vertex,
|
||||
void OsdTbbEditVertexSet(float *vertex,
|
||||
OsdVertexBufferDescriptor const &vertexDesc,
|
||||
int primVarOffset, int primVarWidth,
|
||||
int vertexOffset, int tableOffset,
|
||||
int start, int end,
|
||||
|
@ -31,155 +31,6 @@
|
||||
namespace OpenSubdiv {
|
||||
namespace OPENSUBDIV_VERSION {
|
||||
|
||||
struct OsdVertexDescriptor {
|
||||
|
||||
/// Constructor
|
||||
OsdVertexDescriptor() : numVertexElements(0), numVaryingElements(0) {}
|
||||
|
||||
/// Constructor
|
||||
///
|
||||
/// @param numVertexElem number of vertex-interpolated data elements (floats)
|
||||
///
|
||||
/// @param numVaryingElem number of varying-interpolated data elements (floats)
|
||||
///
|
||||
OsdVertexDescriptor(int numVertexElem, int numVaryingElem)
|
||||
: numVertexElements(numVertexElem),
|
||||
numVaryingElements(numVaryingElem) { }
|
||||
|
||||
/// Sets descriptor
|
||||
///
|
||||
/// @param numVertexElem number of vertex-interpolated data elements (floats)
|
||||
///
|
||||
/// @param numVaryingElem number of varying-interpolated data elements (floats)
|
||||
///
|
||||
void Set(int numVertexElem, int numVaryingElem) {
|
||||
numVertexElements = numVertexElem;
|
||||
numVaryingElements = numVaryingElem;
|
||||
}
|
||||
|
||||
/// Resets the descriptor
|
||||
void Reset() {
|
||||
numVertexElements = numVaryingElements = 0;
|
||||
}
|
||||
|
||||
/// Returns the total number of elements (vertex + varying)
|
||||
int GetNumElements() const {
|
||||
return numVertexElements + numVaryingElements;
|
||||
}
|
||||
|
||||
bool operator == (OsdVertexDescriptor const & other) {
|
||||
return (numVertexElements == other.numVertexElements and
|
||||
numVaryingElements == other.numVaryingElements);
|
||||
}
|
||||
|
||||
/// Resets the contents of vertex & varying primvar data buffers for a given
|
||||
/// vertex.
|
||||
///
|
||||
/// @param vertex The float array containing the vertex-interpolated primvar
|
||||
/// data that needs to be reset.
|
||||
///
|
||||
/// @param varying The float array containing the varying-interpolated primvar
|
||||
/// data that needs to be reset.
|
||||
///
|
||||
/// @param index Vertex index in the buffer.
|
||||
///
|
||||
void Clear(float *vertex, float *varying, int index) const {
|
||||
if (vertex) {
|
||||
memset(vertex+index*numVertexElements, 0, sizeof(float)*numVertexElements);
|
||||
}
|
||||
|
||||
if (varying) {
|
||||
memset(varying+index*numVaryingElements, 0, sizeof(float)*numVaryingElements);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
/// Applies "dst += src*weight" to "vertex" primvar data in a vertex buffer.
|
||||
///
|
||||
/// @param vertex The VertexData buffer
|
||||
///
|
||||
/// @param dstIndex Index of the destination vertex.
|
||||
///
|
||||
/// @param srcIndex Index of the origin vertex.
|
||||
///
|
||||
/// @param weight Weight applied to the primvar data.
|
||||
///
|
||||
inline
|
||||
void AddWithWeight(float *vertex, int dstIndex, int srcIndex, float weight) const {
|
||||
int d = dstIndex * numVertexElements;
|
||||
int s = srcIndex * numVertexElements;
|
||||
#if defined ( __INTEL_COMPILER ) or defined ( __ICC )
|
||||
#pragma ivdep
|
||||
#pragma vector aligned
|
||||
#endif
|
||||
for (int i = 0; i < numVertexElements; ++i)
|
||||
vertex[d++] += vertex[s++] * weight;
|
||||
}
|
||||
|
||||
/// Applies "dst += src*weight" to "varying" primvar data in a vertex buffer.
|
||||
///
|
||||
/// @param varying The VaryingData buffer
|
||||
///
|
||||
/// @param dstIndex Index of the destination vertex.
|
||||
///
|
||||
/// @param srcIndex Index of the source vertex.
|
||||
///
|
||||
/// @param weight Weight applied to the primvar data.
|
||||
///
|
||||
inline
|
||||
void AddVaryingWithWeight(float *varying, int dstIndex, int srcIndex, float weight) const {
|
||||
int d = dstIndex * numVaryingElements;
|
||||
int s = srcIndex * numVaryingElements;
|
||||
#if defined ( __INTEL_COMPILER ) or defined ( __ICC )
|
||||
#pragma ivdep
|
||||
#pragma vector aligned
|
||||
#endif
|
||||
for (int i = 0; i < numVaryingElements; ++i)
|
||||
varying[d++] += varying[s++] * weight;
|
||||
}
|
||||
|
||||
/// Applies an "add" vertex edit
|
||||
///
|
||||
/// @param vertex The primvar data buffer.
|
||||
///
|
||||
/// @param primVarOffset Offset to the primvar datum.
|
||||
///
|
||||
/// @param primVarWidth Length of the primvar datum.
|
||||
///
|
||||
/// @param editIndex The location of the vertex in the buffer.
|
||||
///
|
||||
/// @param editValues The values to add to the primvar datum.
|
||||
///
|
||||
void ApplyVertexEditAdd(float *vertex, int primVarOffset, int primVarWidth, int editIndex, const float *editValues) const {
|
||||
int d = editIndex * numVertexElements + primVarOffset;
|
||||
for (int i = 0; i < primVarWidth; ++i) {
|
||||
vertex[d++] += editValues[i];
|
||||
}
|
||||
}
|
||||
|
||||
/// Applies a "set" vertex edit
|
||||
///
|
||||
/// @param vertex The primvar data buffer.
|
||||
///
|
||||
/// @param primVarOffset Offset to the primvar datum.
|
||||
///
|
||||
/// @param primVarWidth Length of the primvar datum.
|
||||
///
|
||||
/// @param editIndex The location of the vertex in the buffer.
|
||||
///
|
||||
/// @param editValues The values to add to the primvar datum.
|
||||
///
|
||||
void ApplyVertexEditSet(float *vertex, int primVarOffset, int primVarWidth, int editIndex, const float *editValues) const {
|
||||
int d = editIndex * numVertexElements + primVarOffset;
|
||||
for (int i = 0; i < primVarWidth; ++i) {
|
||||
vertex[d++] = editValues[i];
|
||||
}
|
||||
}
|
||||
|
||||
int numVertexElements;
|
||||
int numVaryingElements;
|
||||
};
|
||||
|
||||
/// \brief Describes vertex elements in interleaved data buffers
|
||||
struct OsdVertexBufferDescriptor {
|
||||
|
||||
@ -207,6 +58,13 @@ struct OsdVertexBufferDescriptor {
|
||||
offset = length = stride = 0;
|
||||
}
|
||||
|
||||
/// True if the descriptors are identical
|
||||
bool operator == ( OsdVertexBufferDescriptor const other ) const {
|
||||
return (offset == other.offset and
|
||||
length == other.length and
|
||||
stride == other.stride);
|
||||
}
|
||||
|
||||
int offset; // offset to desired element data
|
||||
int length; // number or length of the data
|
||||
int stride; // stride to the next element
|
||||
|
Loading…
Reference in New Issue
Block a user