From 8da827336d9d683ce6b1c907d3b1e5e35fe8ee17 Mon Sep 17 00:00:00 2001 From: Takahito Tejima Date: Thu, 9 Apr 2015 11:16:54 -0700 Subject: [PATCH 1/4] Removes FarKernelBatch. This is the first step to tease off Osd compute controller/contexts from Far API. Currently FarStencilTable only creates a kernelbatch for the entire range, so we can use [0, numStencils) for all cases instead of KernelBatch. This might not be true if we apply non-factorized level-wise stencils, then we'll add another modular utility to serve those cases. --- examples/glEvalLimit/glEvalLimit.cpp | 7 +- examples/glShareTopology/glShareTopology.cpp | 6 - opensubdiv/far/CMakeLists.txt | 2 - opensubdiv/far/kernelBatch.h | 82 --------- opensubdiv/far/kernelBatchDispatcher.h | 164 ------------------ opensubdiv/far/stencilTablesFactory.cpp | 9 - opensubdiv/far/stencilTablesFactory.h | 8 - opensubdiv/osd/clComputeContext.cpp | 17 +- opensubdiv/osd/clComputeContext.h | 6 + opensubdiv/osd/clComputeController.cpp | 24 ++- opensubdiv/osd/clComputeController.h | 17 +- opensubdiv/osd/cpuComputeContext.h | 2 +- opensubdiv/osd/cpuComputeController.cpp | 36 ++-- opensubdiv/osd/cpuComputeController.h | 20 +-- opensubdiv/osd/cudaComputeContext.cpp | 16 ++ opensubdiv/osd/cudaComputeContext.h | 6 + opensubdiv/osd/cudaComputeController.cpp | 17 +- opensubdiv/osd/cudaComputeController.h | 14 +- opensubdiv/osd/d3d11ComputeContext.cpp | 18 ++ opensubdiv/osd/d3d11ComputeContext.h | 6 + opensubdiv/osd/d3d11ComputeController.cpp | 16 +- opensubdiv/osd/d3d11ComputeController.h | 25 +-- opensubdiv/osd/d3d11Mesh.h | 21 +-- opensubdiv/osd/glMesh.h | 25 +-- opensubdiv/osd/glslComputeContext.cpp | 20 ++- opensubdiv/osd/glslComputeContext.h | 11 +- opensubdiv/osd/glslComputeController.cpp | 18 +- opensubdiv/osd/glslComputeController.h | 21 +-- .../glslTransformFeedbackComputeContext.cpp | 18 +- .../osd/glslTransformFeedbackComputeContext.h | 8 +- ...glslTransformFeedbackComputeController.cpp | 44 +++-- .../glslTransformFeedbackComputeController.h | 25 +-- opensubdiv/osd/mesh.h | 10 +- opensubdiv/osd/ompComputeController.cpp | 36 ++-- opensubdiv/osd/ompComputeController.h | 20 +-- opensubdiv/osd/tbbComputeController.cpp | 36 ++-- opensubdiv/osd/tbbComputeController.h | 20 +-- regression/osd_regression/main.cpp | 18 +- tutorials/osd/tutorial_0/osd_tutorial_0.cpp | 7 +- 39 files changed, 318 insertions(+), 558 deletions(-) delete mode 100644 opensubdiv/far/kernelBatch.h delete mode 100644 opensubdiv/far/kernelBatchDispatcher.h diff --git a/examples/glEvalLimit/glEvalLimit.cpp b/examples/glEvalLimit/glEvalLimit.cpp index d6ad53a0..d53cfb8b 100644 --- a/examples/glEvalLimit/glEvalLimit.cpp +++ b/examples/glEvalLimit/glEvalLimit.cpp @@ -197,8 +197,6 @@ Osd::CpuComputeContext * g_computeCtx = 0; Osd::CpuComputeController g_computeCtrl; -Far::KernelBatchVector g_kernelBatches; - Osd::CpuEvalLimitContext * g_evalCtx = 0; Osd::CpuEvalLimitController g_evalCtrl; @@ -242,7 +240,7 @@ updateGeom() { g_vertexData->UpdateData( &g_positions[0], 0, nverts); - g_computeCtrl.Compute(g_computeCtx, g_kernelBatches, g_vertexData, g_varyingData); + g_computeCtrl.Compute(g_computeCtx, g_vertexData, g_varyingData); s.Stop(); g_computeTime = float(s.GetElapsed() * 1000.0f); @@ -417,9 +415,6 @@ createOsdMesh(ShapeDesc const & shapeDesc, int level) { delete g_evalCtx; g_evalCtx = Osd::CpuEvalLimitContext::Create(*patchTables); - g_kernelBatches.clear(); - g_kernelBatches.push_back(Far::StencilTablesFactory::Create(*concatStencils)); - } { // Create vertex primvar buffer for the CVs diff --git a/examples/glShareTopology/glShareTopology.cpp b/examples/glShareTopology/glShareTopology.cpp index aa751b90..ff2b5943 100644 --- a/examples/glShareTopology/glShareTopology.cpp +++ b/examples/glShareTopology/glShareTopology.cpp @@ -295,8 +295,6 @@ public: _computeContext = ComputeContext::Create(vertexStencils, varyingStencils); - _kernelBatches.push_back(Far::StencilTablesFactory::Create(*vertexStencils)); - _numVertices = vertexStencils->GetNumStencils() + vertexStencils->GetNumControlVertices(); } @@ -328,7 +326,6 @@ public: globalVaryingDesc.stride); _computeController.Compute(_computeContext, - _kernelBatches, typedInstance->GetVertexBuffer(), typedInstance->GetVaryingBuffer(), &vertexDesc, @@ -364,7 +361,6 @@ public: private: COMPUTE_CONTROLLER _computeController; ComputeContext *_computeContext; - Far::KernelBatchVector _kernelBatches; }; // --------------------------------------------------------------------------- @@ -394,8 +390,6 @@ Topology(Far::PatchTables const * patchTables, _computeContext = ComputeContext::Create(g_clContext, vertexStencils, varyingStencils); - _kernelBatches.push_back(Far::StencilTablesFactory::Create(*vertexStencils)); - _numVertices = vertexStencils->GetNumStencils() + vertexStencils->GetNumControlVertices(); } diff --git a/opensubdiv/far/CMakeLists.txt b/opensubdiv/far/CMakeLists.txt index df902186..258ec261 100644 --- a/opensubdiv/far/CMakeLists.txt +++ b/opensubdiv/far/CMakeLists.txt @@ -45,8 +45,6 @@ set(PUBLIC_HEADER_FILES error.h gregoryBasis.h interpolate.h - kernelBatch.h - kernelBatchDispatcher.h patchDescriptor.h patchParam.h patchMap.h diff --git a/opensubdiv/far/kernelBatch.h b/opensubdiv/far/kernelBatch.h deleted file mode 100644 index 03724d15..00000000 --- a/opensubdiv/far/kernelBatch.h +++ /dev/null @@ -1,82 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#ifndef FAR_KERNEL_BATCH_H -#define FAR_KERNEL_BATCH_H - -#include "../version.h" - -#include - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Far { - -/// \brief A GP Compute Kernel descriptor. -/// -/// Vertex refinement through subdivision schemes requires the successive -/// application of dedicated compute kernels. OpenSubdiv groups these vertices -/// in batches based on their topology in order to minimize the number of kernel -/// switches to process a given primitive. -/// -struct KernelBatch { - -public: - - enum KernelType { - KERNEL_UNKNOWN=0, - KERNEL_STENCIL_TABLE, - KERNEL_USER_DEFINED - }; - - /// \brief Constructor. - /// - /// @param _kernelType The type of compute kernel kernel - /// - /// @param _level The level of subdivision of the vertices in the batch - /// - /// @param _start Index of the first vertex in the batch - /// - /// @param _end Index of the last vertex in the batch - /// - KernelBatch( int _kernelType, int _level, int _start, int _end ) : - kernelType(_kernelType), level(_level), start(_start), end(_end) { } - - int kernelType, // the type of compute kernel kernel - level, // the level of subdivision of the vertices in the batch - start, // index of the first vertex in the batch - end; // index of the last vertex in the batch -}; - -typedef std::vector KernelBatchVector; - -} // end namespace Far - -} // end namespace OPENSUBDIV_VERSION -using namespace OPENSUBDIV_VERSION; - -} // end namespace OpenSubdiv - -#endif /* FAR_KERNEL_BATCH_H */ diff --git a/opensubdiv/far/kernelBatchDispatcher.h b/opensubdiv/far/kernelBatchDispatcher.h deleted file mode 100644 index e46641b0..00000000 --- a/opensubdiv/far/kernelBatchDispatcher.h +++ /dev/null @@ -1,164 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#ifndef FAR_KERNELBATCH_DISPATCHER_H -#define FAR_KERNELBATCH_DISPATCHER_H - -#include "../version.h" - -#include "../far/kernelBatch.h" -#include "../far/stencilTables.h" - -#include - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Far { - -/// \brief Subdivision refinement encapsulation layer. -/// -/// The kernel dispatcher allows client code to customize parts or the entire -/// computation process. This pattern aims at hiding the logic specific to -/// the subdivision algorithms and expose a simplified access to minimalistic -/// compute kernels. By default, meshes revert to a default dispatcher that -/// implements single-threaded CPU kernels. -/// -/// - derive a dispatcher class from this one -/// - override the virtual functions -/// - pass the derived dispatcher to the factory (one instance can be shared by many meshes) -/// -/// Note : the caller is responsible for deleting a custom dispatcher -/// -class KernelBatchDispatcher { -public: - - /// \brief Launches the processing of a vector of kernel batches - /// this is a convenient API for controllers which don't have any user defined kernels. - /// - /// @param controller refinement controller implementation (vertex array) - /// - /// @param context refinement context implementation (subdivision tables) - /// passed to the controller. - /// - /// @param batches batches of kernels that need to be processed - /// - /// @param maxlevel process vertex batches up to this level - /// - template static void Apply( - CONTROLLER *controller, CONTEXT *context, KernelBatchVector const & batches, int maxlevel); - -protected: - - /// \brief Launches the processing of a kernel batch - /// returns true if the batch is handled, otherwise returns false (i.e. user defined kernel) - /// - /// @param controller refinement controller implementation - /// - /// @param context refinement context implementation - /// - /// @param batch a batch of kernel that need to be processed - /// - template static bool ApplyKernel( - CONTROLLER *controller, CONTEXT *context, KernelBatch const &batch); - -}; - -/// -/// \brief Far default controller implementation -/// -/// This is Far's default implementation of a kernal batch controller. -/// -class DefaultController { -public: - - template void ApplyStencilTableKernel( - KernelBatch const &batch, CONTEXT *context) const; - -}; - - -// Launches the processing of a kernel batch -template bool -KernelBatchDispatcher::ApplyKernel(CONTROLLER *controller, CONTEXT *context, - KernelBatch const &batch) { - - if (batch.end==0) { - return true; - } - - switch(batch.kernelType) { - - case KernelBatch::KERNEL_UNKNOWN: - assert(0); - - case KernelBatch::KERNEL_STENCIL_TABLE: - controller->ApplyStencilTableKernel(batch, context); - break; - - default: // user defined kernel type - return false; - } - - return true; -} - -// Launches the processing of a vector of kernel batches -template void -KernelBatchDispatcher::Apply(CONTROLLER *controller, CONTEXT *context, - KernelBatchVector const & batches, int maxlevel) { - - for (int i = 0; i < (int)batches.size(); ++i) { - - const KernelBatch &batch = batches[i]; - - if (maxlevel>=0 and batch.level>=maxlevel) { - continue; - } - - ApplyKernel(controller, context, batch); - } -} - -template void -DefaultController::ApplyStencilTableKernel( - KernelBatch const &batch, CONTEXT *context) const { - - StencilTables const * stencilTables = context->GetStencilTables(); - assert(stencilTables); - - typename CONTEXT::VertexType *vsrc = &context->GetVertices().at(0), - *vdst = vsrc + batch.start + stencilTables->GetNumControlVertices(); - - stencilTables->UpdateValues(vsrc, vdst, batch.start, batch.end); -} - -} // end namespace Far - -} // end namespace OPENSUBDIV_VERSION -using namespace OPENSUBDIV_VERSION; - -} // end namespace OpenSubdiv - -#endif /* FAR_KERNELBATCH_DISPATCHER_H */ diff --git a/opensubdiv/far/stencilTablesFactory.cpp b/opensubdiv/far/stencilTablesFactory.cpp index 299bb3b8..47c38bd3 100644 --- a/opensubdiv/far/stencilTablesFactory.cpp +++ b/opensubdiv/far/stencilTablesFactory.cpp @@ -395,15 +395,6 @@ LimitStencilTablesFactory::Create(TopologyRefiner const & refiner, return result; } -//------------------------------------------------------------------------------ - -KernelBatch -StencilTablesFactory::Create(StencilTables const &stencilTables) { - - return KernelBatch( KernelBatch::KERNEL_STENCIL_TABLE, - -1, 0, stencilTables.GetNumStencils()); -} - } // end namespace Far } // end namespace OPENSUBDIV_VERSION diff --git a/opensubdiv/far/stencilTablesFactory.h b/opensubdiv/far/stencilTablesFactory.h index a9a04bb0..8394d55f 100644 --- a/opensubdiv/far/stencilTablesFactory.h +++ b/opensubdiv/far/stencilTablesFactory.h @@ -27,7 +27,6 @@ #include "../version.h" -#include "../far/kernelBatch.h" #include "../far/patchTables.h" #include @@ -103,13 +102,6 @@ public: /// static StencilTables const * Create(int numTables, StencilTables const ** tables); - /// \brief Returns a KernelBatch applying all the stencil in the tables - /// to primvar data. - /// - /// @param stencilTables The stencil tables to batch - /// - static KernelBatch Create(StencilTables const &stencilTables); - private: // Generate stencils for the coarse control-vertices (single weight = 1.0f) diff --git a/opensubdiv/osd/clComputeContext.cpp b/opensubdiv/osd/clComputeContext.cpp index 3f0731a4..5f02a700 100644 --- a/opensubdiv/osd/clComputeContext.cpp +++ b/opensubdiv/osd/clComputeContext.cpp @@ -60,6 +60,7 @@ public: _offsets = createCLBuffer(stencilTables.GetOffsets(), clContext); _indices = createCLBuffer(stencilTables.GetControlIndices(), clContext); _weights = createCLBuffer(stencilTables.GetWeights(), clContext); + _numStencils = stencilTables.GetNumStencils(); } ~CLStencilTables() { @@ -89,12 +90,17 @@ public: return _weights; } + int GetNumStencils() const { + return _numStencils; + } + private: cl_mem _sizes, _offsets, _indices, _weights; + int _numStencils; }; // ----------------------------------------------------------------------------- @@ -104,7 +110,7 @@ CLComputeContext::CLComputeContext( Far::StencilTables const * varyingStencilTables, cl_context clContext) : _vertexStencilTables(0), _varyingStencilTables(0), - _numControlVertices(0) { + _numControlVertices(0) { if (vertexStencilTables) { _vertexStencilTables = new CLStencilTables(*vertexStencilTables, clContext); @@ -139,6 +145,15 @@ CLComputeContext::HasVaryingStencilTables() const { return _varyingStencilTables ? _varyingStencilTables->IsValid() : false; } +int +CLComputeContext::GetNumStencilsInVertexStencilTables() const { + return _vertexStencilTables ? _vertexStencilTables->GetNumStencils() : 0; +} + +int +CLComputeContext::GetNumStencilsInVaryingStencilTables() const { + return _varyingStencilTables ? _varyingStencilTables->GetNumStencils() : 0; +} // ---------------------------------------------------------------------------- cl_mem diff --git a/opensubdiv/osd/clComputeContext.h b/opensubdiv/osd/clComputeContext.h index 2802361f..8fe718f5 100644 --- a/opensubdiv/osd/clComputeContext.h +++ b/opensubdiv/osd/clComputeContext.h @@ -80,6 +80,12 @@ public: return _numControlVertices; } + /// Returns the number of stencils in vertex stencil table + int GetNumStencilsInVertexStencilTables() const; + + /// Returns the number of stencils in varying stencil table + int GetNumStencilsInVaryingStencilTables() const; + /// Returns the Cuda buffer containing vertex-stencil stencil sizes cl_mem GetVertexStencilTablesSizes() const; diff --git a/opensubdiv/osd/clComputeController.cpp b/opensubdiv/osd/clComputeController.cpp index d87a4506..30b6b65f 100644 --- a/opensubdiv/osd/clComputeController.cpp +++ b/opensubdiv/osd/clComputeController.cpp @@ -32,6 +32,7 @@ #include #include #include +#include namespace OpenSubdiv { namespace OPENSUBDIV_VERSION { @@ -138,18 +139,20 @@ private: // ---------------------------------------------------------------------------- void -CLComputeController::ApplyStencilTableKernel( - Far::KernelBatch const &batch, ComputeContext const *context) { +CLComputeController::ApplyStencilTableKernel(ComputeContext const *context) { assert(context); cl_int errNum; - size_t globalWorkSize[1] = { (size_t)(batch.end - batch.start) }; + size_t globalWorkSize = 0; int ncvs = context->GetNumControlVertices(); if (context->HasVertexStencilTables()) { + int start = 0; + int end = context->GetNumStencilsInVertexStencilTables(); + globalWorkSize = (size_t)(end - start); KernelBundle const * bundle = getKernel(_currentBindState.vertexDesc); @@ -167,14 +170,14 @@ CLComputeController::ApplyStencilTableKernel( clSetKernelArg(kernel, 3, sizeof(cl_mem), &indices); clSetKernelArg(kernel, 4, sizeof(cl_mem), &weights); - clSetKernelArg(kernel, 5, sizeof(int), &batch.start); - clSetKernelArg(kernel, 6, sizeof(int), &batch.end); + clSetKernelArg(kernel, 5, sizeof(int), &start); + clSetKernelArg(kernel, 6, sizeof(int), &end); clSetKernelArg(kernel, 7, sizeof(int), &_currentBindState.vertexDesc.offset); clSetKernelArg(kernel, 8, sizeof(int), &ncvs); errNum = clEnqueueNDRangeKernel( - _clQueue, kernel, 1, NULL, globalWorkSize, NULL, 0, NULL, NULL); + _clQueue, kernel, 1, NULL, &globalWorkSize, NULL, 0, NULL, NULL); if (errNum!=CL_SUCCESS) { Far::Error(Far::FAR_RUNTIME_ERROR, "ApplyStencilTableKernel (%d) ", errNum); @@ -182,6 +185,9 @@ CLComputeController::ApplyStencilTableKernel( } if (context->HasVaryingStencilTables()) { + int start = 0; + int end = context->GetNumStencilsInVaryingStencilTables(); + globalWorkSize = (size_t)(end - start); KernelBundle const * bundle = getKernel(_currentBindState.varyingDesc); @@ -199,14 +205,14 @@ CLComputeController::ApplyStencilTableKernel( clSetKernelArg(kernel, 3, sizeof(cl_mem), &indices); clSetKernelArg(kernel, 4, sizeof(cl_mem), &weights); - clSetKernelArg(kernel, 5, sizeof(int), &batch.start); - clSetKernelArg(kernel, 6, sizeof(int), &batch.end); + clSetKernelArg(kernel, 5, sizeof(int), &start); + clSetKernelArg(kernel, 6, sizeof(int), &end); clSetKernelArg(kernel, 7, sizeof(int), &_currentBindState.varyingDesc.offset); clSetKernelArg(kernel, 8, sizeof(int), &ncvs); errNum = clEnqueueNDRangeKernel( - _clQueue, kernel, 1, NULL, globalWorkSize, NULL, 0, NULL, NULL); + _clQueue, kernel, 1, NULL, &globalWorkSize, NULL, 0, NULL, NULL); if (errNum!=CL_SUCCESS) { Far::Error(Far::FAR_RUNTIME_ERROR, "ApplyStencilTableKernel (%d)", errNum); diff --git a/opensubdiv/osd/clComputeController.h b/opensubdiv/osd/clComputeController.h index e08340f2..16b59d11 100644 --- a/opensubdiv/osd/clComputeController.h +++ b/opensubdiv/osd/clComputeController.h @@ -27,7 +27,6 @@ #include "../version.h" -#include "../far/kernelBatchDispatcher.h" #include "../osd/clComputeContext.h" #include "../osd/vertexDescriptor.h" #include "../osd/opencl.h" @@ -70,9 +69,6 @@ public: /// /// @param context The CLContext to apply refinement operations to /// - /// @param batches Vector of batches of vertices organized by operative - /// kernel - /// /// @param vertexBuffer Vertex-interpolated data buffer /// /// @param vertexDesc The descriptor of vertex elements to be refined. @@ -87,17 +83,14 @@ public: /// template void Compute( CLComputeContext const * context, - Far::KernelBatchVector const & batches, VERTEX_BUFFER * vertexBuffer, VARYING_BUFFER * varyingBuffer, VertexBufferDescriptor const * vertexDesc=NULL, VertexBufferDescriptor const * varyingDesc=NULL ){ - if (batches.empty()) return; - bind(vertexBuffer, varyingBuffer, vertexDesc, varyingDesc); - Far::KernelBatchDispatcher::Apply(this, context, batches, /*maxlevel*/ -1); + ApplyStencilTableKernel(context); unbind(); } @@ -113,10 +106,9 @@ public: /// template void Compute(CLComputeContext const * context, - Far::KernelBatchVector const & batches, VERTEX_BUFFER *vertexBuffer) { - Compute(context, batches, vertexBuffer, (VERTEX_BUFFER*)0); + Compute(context, vertexBuffer, (VERTEX_BUFFER*)0); } /// Waits until all running subdivision kernels finish. @@ -130,10 +122,7 @@ public: protected: - friend class Far::KernelBatchDispatcher; - - void ApplyStencilTableKernel(Far::KernelBatch const &batch, - ComputeContext const *context); + void ApplyStencilTableKernel(ComputeContext const *context); template void bind( VERTEX_BUFFER * vertexBuffer, diff --git a/opensubdiv/osd/cpuComputeContext.h b/opensubdiv/osd/cpuComputeContext.h index 3bb5bdff..26f720f5 100644 --- a/opensubdiv/osd/cpuComputeContext.h +++ b/opensubdiv/osd/cpuComputeContext.h @@ -61,7 +61,7 @@ public: /// interpolation /// static CpuComputeContext * Create(Far::StencilTables const * vertexStencilTables, - Far::StencilTables const * varyingStencilTables=0); + Far::StencilTables const * varyingStencilTables=0); /// Destructor virtual ~CpuComputeContext(); diff --git a/opensubdiv/osd/cpuComputeController.cpp b/opensubdiv/osd/cpuComputeController.cpp index 9fa44173..4eca60cf 100644 --- a/opensubdiv/osd/cpuComputeController.cpp +++ b/opensubdiv/osd/cpuComputeController.cpp @@ -46,7 +46,7 @@ CpuComputeController::Synchronize() { void CpuComputeController::ApplyStencilTableKernel( - Far::KernelBatch const &batch, ComputeContext const *context) const { + ComputeContext const *context) const { assert(context); @@ -61,14 +61,17 @@ CpuComputeController::ApplyStencilTableKernel( float * destBuffer = _currentBindState.vertexBuffer + desc.offset + vertexStencils->GetNumControlVertices() * desc.stride; + int start = 0; + int end = vertexStencils->GetNumStencils(); + CpuComputeStencils(_currentBindState.vertexDesc, - srcBuffer, destBuffer, - &vertexStencils->GetSizes().at(0), - &vertexStencils->GetOffsets().at(0), - &vertexStencils->GetControlIndices().at(0), - &vertexStencils->GetWeights().at(0), - batch.start, - batch.end); + srcBuffer, destBuffer, + &vertexStencils->GetSizes().at(0), + &vertexStencils->GetOffsets().at(0), + &vertexStencils->GetControlIndices().at(0), + &vertexStencils->GetWeights().at(0), + start, + end); } Far::StencilTables const * varyingStencils = context->GetVaryingStencilTables(); @@ -82,14 +85,17 @@ CpuComputeController::ApplyStencilTableKernel( float * destBuffer = _currentBindState.varyingBuffer + desc.offset + varyingStencils->GetNumControlVertices() * desc.stride; + int start = 0; + int end = varyingStencils->GetNumStencils(); + CpuComputeStencils(_currentBindState.varyingDesc, - srcBuffer, destBuffer, - &varyingStencils->GetSizes().at(0), - &varyingStencils->GetOffsets().at(0), - &varyingStencils->GetControlIndices().at(0), - &varyingStencils->GetWeights().at(0), - batch.start, - batch.end); + srcBuffer, destBuffer, + &varyingStencils->GetSizes().at(0), + &varyingStencils->GetOffsets().at(0), + &varyingStencils->GetControlIndices().at(0), + &varyingStencils->GetWeights().at(0), + start, + end); } } diff --git a/opensubdiv/osd/cpuComputeController.h b/opensubdiv/osd/cpuComputeController.h index 51c3f2bd..2a9edde7 100644 --- a/opensubdiv/osd/cpuComputeController.h +++ b/opensubdiv/osd/cpuComputeController.h @@ -27,7 +27,6 @@ #include "../version.h" -#include "../far/kernelBatchDispatcher.h" #include "../osd/cpuComputeContext.h" #include "../osd/vertexDescriptor.h" @@ -64,9 +63,6 @@ public: /// /// @param context The CpuContext to apply refinement operations to /// - /// @param batches Vector of batches of vertices organized by operative - /// kernel - /// /// @param vertexBuffer Vertex-interpolated data buffer /// /// @param vertexDesc The descriptor of vertex elements to be refined. @@ -81,17 +77,14 @@ public: /// template void Compute( CpuComputeContext const * context, - Far::KernelBatchVector const & batches, VERTEX_BUFFER * vertexBuffer, VARYING_BUFFER * varyingBuffer, VertexBufferDescriptor const * vertexDesc=NULL, VertexBufferDescriptor const * varyingDesc=NULL ){ - if (batches.empty()) return; - bind(vertexBuffer, varyingBuffer, vertexDesc, varyingDesc); - Far::KernelBatchDispatcher::Apply(this, context, batches, /*maxlevel*/ -1); + ApplyStencilTableKernel(context); unbind(); } @@ -100,17 +93,13 @@ public: /// /// @param context The CpuContext to apply refinement operations to /// - /// @param batches Vector of batches of vertices organized by operative - /// kernel - /// /// @param vertexBuffer Vertex-interpolated data buffer /// template void Compute(CpuComputeContext const * context, - Far::KernelBatchVector const & batches, VERTEX_BUFFER *vertexBuffer) { - Compute(context, batches, vertexBuffer, (VERTEX_BUFFER*)0); + Compute(context, vertexBuffer, (VERTEX_BUFFER*)0); } /// Waits until all running subdivision kernels finish. @@ -119,10 +108,7 @@ public: protected: - friend class Far::KernelBatchDispatcher; - - void ApplyStencilTableKernel(Far::KernelBatch const &batch, - ComputeContext const *context) const; + void ApplyStencilTableKernel(ComputeContext const *context) const; template void bind( VERTEX_BUFFER * vertexBuffer, diff --git a/opensubdiv/osd/cudaComputeContext.cpp b/opensubdiv/osd/cudaComputeContext.cpp index 98ae4acf..cac7b5c6 100644 --- a/opensubdiv/osd/cudaComputeContext.cpp +++ b/opensubdiv/osd/cudaComputeContext.cpp @@ -67,6 +67,7 @@ public: _offsets = createCudaBuffer(stencilTables.GetOffsets()); _indices = createCudaBuffer(stencilTables.GetControlIndices()); _weights = createCudaBuffer(stencilTables.GetWeights()); + _numStencils = stencilTables.GetNumStencils(); } ~CudaStencilTables() { @@ -96,11 +97,16 @@ public: return _weights; } + int GetNumStencils() const { + return _numStencils; + } + private: void * _sizes, * _offsets, * _indices, * _weights; + int _numStencils; }; // ---------------------------------------------------------------------------- @@ -144,6 +150,16 @@ CudaComputeContext::HasVaryingStencilTables() const { return _varyingStencilTables ? _varyingStencilTables->IsValid() : false; } +int +CudaComputeContext::GetNumStencilsInVertexStencilTables() const { + return _vertexStencilTables ? _vertexStencilTables->GetNumStencils() : 0; +} + +int +CudaComputeContext::GetNumStencilsInVaryingStencilTables() const { + return _varyingStencilTables ? _varyingStencilTables->GetNumStencils() : 0; +} + // ---------------------------------------------------------------------------- void * diff --git a/opensubdiv/osd/cudaComputeContext.h b/opensubdiv/osd/cudaComputeContext.h index 29e1a377..94bfaf2f 100644 --- a/opensubdiv/osd/cudaComputeContext.h +++ b/opensubdiv/osd/cudaComputeContext.h @@ -77,6 +77,12 @@ public: return _numControlVertices; } + /// Returns the number of stencils in vertex stencil tables + int GetNumStencilsInVertexStencilTables() const; + + /// Returns the number of stencils in varying stencil tables + int GetNumStencilsInVaryingStencilTables() const; + /// Returns the Cuda buffer containing vertex-stencil stencil sizes void * GetVertexStencilTablesSizes() const; diff --git a/opensubdiv/osd/cudaComputeController.cpp b/opensubdiv/osd/cudaComputeController.cpp index d38a8d73..53bb4c86 100644 --- a/opensubdiv/osd/cudaComputeController.cpp +++ b/opensubdiv/osd/cudaComputeController.cpp @@ -26,6 +26,7 @@ #include #include +#include extern "C" { @@ -46,7 +47,7 @@ namespace Osd { void CudaComputeController::ApplyStencilTableKernel( - Far::KernelBatch const &batch, ComputeContext const *context) const { + ComputeContext const *context) const { assert(context); @@ -55,6 +56,9 @@ CudaComputeController::ApplyStencilTableKernel( int length = _currentBindState.vertexDesc.length, stride = _currentBindState.vertexDesc.stride; + int start = 0; + int end = context->GetNumStencilsInVertexStencilTables(); + float const * src = _currentBindState.GetVertexBufferAtOffset(); float * dst = const_cast(src) + @@ -65,8 +69,8 @@ CudaComputeController::ApplyStencilTableKernel( (int const *)context->GetVertexStencilTablesOffsets(), (int const *)context->GetVertexStencilTablesIndices(), (float const *)context->GetVertexStencilTablesWeights(), - batch.start, - batch.end); + start, + end); } if (context->HasVaryingStencilTables()) { @@ -74,6 +78,9 @@ CudaComputeController::ApplyStencilTableKernel( int length = _currentBindState.varyingDesc.length, stride = _currentBindState.varyingDesc.stride; + int start = 0; + int end = context->GetNumStencilsInVaryingStencilTables(); + float const * src = _currentBindState.GetVaryingBufferAtOffset(); float * dst = const_cast(src) + @@ -84,8 +91,8 @@ CudaComputeController::ApplyStencilTableKernel( (int const *)context->GetVaryingStencilTablesOffsets(), (int const *)context->GetVaryingStencilTablesIndices(), (float const *)context->GetVaryingStencilTablesWeights(), - batch.start, - batch.end); + start, + end); } } diff --git a/opensubdiv/osd/cudaComputeController.h b/opensubdiv/osd/cudaComputeController.h index e1822234..3309760d 100644 --- a/opensubdiv/osd/cudaComputeController.h +++ b/opensubdiv/osd/cudaComputeController.h @@ -27,7 +27,6 @@ #include "../version.h" -#include "../far/kernelBatchDispatcher.h" #include "../osd/cudaComputeContext.h" #include "../osd/vertexDescriptor.h" @@ -77,17 +76,14 @@ public: /// template void Compute( CudaComputeContext const * context, - Far::KernelBatchVector const & batches, VERTEX_BUFFER * vertexBuffer, VARYING_BUFFER * varyingBuffer, VertexBufferDescriptor const * vertexDesc=NULL, VertexBufferDescriptor const * varyingDesc=NULL ){ - if (batches.empty()) return; - bind(vertexBuffer, varyingBuffer, vertexDesc, varyingDesc); - Far::KernelBatchDispatcher::Apply(this, context, batches, /*maxlevel*/ -1); + ApplyStencilTableKernel(context); unbind(); } @@ -103,10 +99,9 @@ public: /// template void Compute(CudaComputeContext const * context, - Far::KernelBatchVector const & batches, VERTEX_BUFFER *vertexBuffer) { - Compute(context, batches, vertexBuffer, (VERTEX_BUFFER*)0); + Compute(context, vertexBuffer, (VERTEX_BUFFER*)0); } /// Waits until all running subdivision kernels finish. @@ -114,10 +109,7 @@ public: protected: - friend class Far::KernelBatchDispatcher; - - void ApplyStencilTableKernel(Far::KernelBatch const &batch, - ComputeContext const *context) const; + void ApplyStencilTableKernel(ComputeContext const *context) const; template void bind( VERTEX_BUFFER * vertexBuffer, diff --git a/opensubdiv/osd/d3d11ComputeContext.cpp b/opensubdiv/osd/d3d11ComputeContext.cpp index 14a3f887..1899abc5 100644 --- a/opensubdiv/osd/d3d11ComputeContext.cpp +++ b/opensubdiv/osd/d3d11ComputeContext.cpp @@ -125,6 +125,8 @@ public: _indices.initialize(stencilTables.GetControlIndices(), DXGI_FORMAT_R32_SINT, deviceContext); _weights.initialize(stencilTables.GetWeights(), DXGI_FORMAT_R32_FLOAT, deviceContext); + + _numStencils = stencilTables.GetNumStencils(); } bool IsValid() const { @@ -148,6 +150,10 @@ public: return _weights; } + int GetNumStencils() const { + return _numStencils; + } + void Bind(ID3D11DeviceContext * deviceContext) const { ID3D11ShaderResourceView *SRViews[] = { _sizes.srv, @@ -170,6 +176,8 @@ private: _offsets, _indices, _weights; + + int _numStencils; }; // ---------------------------------------------------------------------------- @@ -217,6 +225,16 @@ D3D11ComputeContext::HasVaryingStencilTables() const { return _varyingStencilTables ? _varyingStencilTables->IsValid() : false; } +int +D3D11ComputeContext::GetNumStencilsInVertexStencilTables() const { + return _vertexStencilTables ? _vertexStencilTables->GetNumStencils() : 0; +} + +int +D3D11ComputeContext::GetNumStencilsInVaryingStencilTables() const { + return _varyingStencilTables ? _varyingStencilTables->GetNumStencils() : 0; +} + // ---------------------------------------------------------------------------- void diff --git a/opensubdiv/osd/d3d11ComputeContext.h b/opensubdiv/osd/d3d11ComputeContext.h index 73457ecf..db5abfff 100644 --- a/opensubdiv/osd/d3d11ComputeContext.h +++ b/opensubdiv/osd/d3d11ComputeContext.h @@ -78,6 +78,12 @@ public: return _numControlVertices; } + /// Returns the number of stencils in vertex stencil table + int GetNumStencilsInVertexStencilTables() const; + + /// Returns the number of stencils in varying stencil table + int GetNumStencilsInVaryingStencilTables() const; + /// Binds D3D11 buffers containing stencils for 'vertex' interpolation /// /// @param deviceContext The D3D device diff --git a/opensubdiv/osd/d3d11ComputeController.cpp b/opensubdiv/osd/d3d11ComputeController.cpp index b1cffbca..e74ab021 100644 --- a/opensubdiv/osd/d3d11ComputeController.cpp +++ b/opensubdiv/osd/d3d11ComputeController.cpp @@ -155,11 +155,11 @@ public: } void ApplyStencilTableKernel(ID3D11DeviceContext *deviceContext, - Far::KernelBatch const &batch, int offset, int numCVs) { + int offset, int numCVs, int start, int end) { KernelUniformArgs args; - args.uniformStart = batch.start; - args.uniformEnd = batch.end; + args.uniformStart = start; + args.uniformEnd = end; args.uniformOffset = offset; args.uniformNumCVs = numCVs; @@ -299,7 +299,7 @@ D3D11ComputeController::unbindBuffer() { void D3D11ComputeController::ApplyStencilTableKernel( - Far::KernelBatch const &batch, D3D11ComputeContext const *context) const { + D3D11ComputeContext const *context, int numStencils) const { assert(context); @@ -307,8 +307,12 @@ D3D11ComputeController::ApplyStencilTableKernel( D3D11ComputeController::KernelBundle * bundle = const_cast(_currentBindState.kernelBundle); - bundle->ApplyStencilTableKernel(_deviceContext, - batch, _currentBindState.desc.offset, context->GetNumControlVertices()); + bundle->ApplyStencilTableKernel( + _deviceContext, + _currentBindState.desc.offset, + context->GetNumControlVertices(), + 0, + numStencils); } diff --git a/opensubdiv/osd/d3d11ComputeController.h b/opensubdiv/osd/d3d11ComputeController.h index 4ea8e321..c40c2e6a 100644 --- a/opensubdiv/osd/d3d11ComputeController.h +++ b/opensubdiv/osd/d3d11ComputeController.h @@ -27,7 +27,6 @@ #include "../version.h" -#include "../far/kernelBatchDispatcher.h" #include "../osd/d3d11ComputeContext.h" #include "../osd/vertexDescriptor.h" @@ -69,9 +68,6 @@ public: /// /// @param context The D3D11Context to apply refinement operations to /// - /// @param batches Vector of batches of vertices organized by operative - /// kernel - /// /// @param vertexBuffer Vertex-interpolated data buffer /// /// @param vertexDesc The descriptor of vertex elements to be refined. @@ -86,20 +82,18 @@ public: /// template void Compute( D3D11ComputeContext const * context, - Far::KernelBatchVector const & batches, VERTEX_BUFFER * vertexBuffer, VARYING_BUFFER * varyingBuffer, VertexBufferDescriptor const * vertexDesc=NULL, VertexBufferDescriptor const * varyingDesc=NULL ){ - if (batches.empty()) return; - if (vertexBuffer) { bind(vertexBuffer, vertexDesc); context->BindVertexStencilTables(_deviceContext); - Far::KernelBatchDispatcher::Apply(this, context, batches, /*maxlevel*/ -1); + ApplyStencilTableKernel( + context, context->GetNumStencilsInVertexStencilTables()); } if (varyingBuffer) { @@ -107,7 +101,8 @@ public: context->BindVaryingStencilTables(_deviceContext); - Far::KernelBatchDispatcher::Apply(this, context, batches, /*maxlevel*/ -1); + ApplyStencilTableKernel( + context, context->GetNumStencilsInVaryingStencilTables()); } context->UnbindStencilTables(_deviceContext); @@ -119,17 +114,13 @@ public: /// /// @param context The D3D11Context to apply refinement operations to /// - /// @param batches Vector of batches of vertices organized by operative - /// kernel - /// /// @param vertexBuffer Vertex-interpolated data buffer /// template void Compute(D3D11ComputeContext const * context, - Far::KernelBatchVector const & batches, VERTEX_BUFFER *vertexBuffer) { - Compute(context, batches, vertexBuffer, (VERTEX_BUFFER*)0); + Compute(context, vertexBuffer, (VERTEX_BUFFER*)0); } /// Waits until all running subdivision kernels finish. @@ -137,10 +128,8 @@ public: protected: - friend class Far::KernelBatchDispatcher; - - void ApplyStencilTableKernel(Far::KernelBatch const &batch, - ComputeContext const *context) const; + void ApplyStencilTableKernel(ComputeContext const *context, + int numStencils) const; template void bind( BUFFER * buffer, diff --git a/opensubdiv/osd/d3d11Mesh.h b/opensubdiv/osd/d3d11Mesh.h index 5013bbaf..b50b77e3 100644 --- a/opensubdiv/osd/d3d11Mesh.h +++ b/opensubdiv/osd/d3d11Mesh.h @@ -78,7 +78,6 @@ public: Mesh(ComputeController * computeController, Far::TopologyRefiner * refiner, Far::PatchTables * patchTables, - Far::KernelBatchVector const & kernelBatches, VertexBuffer * vertexBuffer, VertexBuffer * varyingBuffer, ComputeContext * computeContext, @@ -87,7 +86,6 @@ public: _refiner(refiner), _patchTables(patchTables), - _kernelBatches(kernelBatches), _vertexBuffer(vertexBuffer), _varyingBuffer(varyingBuffer), _computeContext(computeContext), @@ -119,13 +117,14 @@ public: _varyingBuffer->UpdateData(varyingData, startVertex, numVerts, _d3d11DeviceContext); } virtual void Refine() { - _computeController->Compute(_computeContext, _kernelBatches, _vertexBuffer, _varyingBuffer); + _computeController->Compute(_computeContext, _vertexBuffer, _varyingBuffer); } virtual void Refine(VertexBufferDescriptor const *vertexDesc, VertexBufferDescriptor const *varyingDesc, bool interleaved) { - _computeController->Compute(_computeContext, _kernelBatches, - _vertexBuffer, (interleaved ? _vertexBuffer : _varyingBuffer), + _computeController->Compute(_computeContext, + _vertexBuffer, + (interleaved ? _vertexBuffer : _varyingBuffer), vertexDesc, varyingDesc); } virtual void Synchronize() { @@ -176,8 +175,6 @@ private: if (numVertexElements>0) { vertexStencils = Far::StencilTablesFactory::Create(*_refiner, options); - - _kernelBatches.push_back(Far::StencilTablesFactory::Create(*vertexStencils)); } if (numVaryingElements>0) { @@ -235,7 +232,6 @@ private: Far::TopologyRefiner * _refiner; Far::PatchTables * _patchTables; - Far::KernelBatchVector _kernelBatches; VertexBuffer *_vertexBuffer; VertexBuffer *_varyingBuffer; @@ -286,7 +282,6 @@ public: Mesh(ComputeController * computeController, Far::TopologyRefiner * refiner, Far::PatchTables * patchTables, - Far::KernelBatchVector const & kernelBatches, VertexBuffer * vertexBuffer, VertexBuffer * varyingBuffer, ComputeContext * computeContext, @@ -295,7 +290,6 @@ public: _refiner(refiner), _patchTables(patchTables), - _kernelBatches(kernelBatches), _vertexBuffer(vertexBuffer), _varyingBuffer(varyingBuffer), _computeContext(computeContext), @@ -324,12 +318,12 @@ public: _varyingBuffer->UpdateData(varyingData, startVertex, numVerts, _d3d11DeviceContext); } virtual void Refine() { - _computeController->Compute(_computeContext, _kernelBatches, _vertexBuffer, _varyingBuffer); + _computeController->Compute(_computeContext, _vertexBuffer, _varyingBuffer); } virtual void Refine(VertexBufferDescriptor const *vertexDesc, VertexBufferDescriptor const *varyingDesc, bool interleaved) { - _computeController->Compute(_computeContext, _kernelBatches, + _computeController->Compute(_computeContext, _vertexBuffer, (interleaved ? _vertexBuffer : _varyingBuffer), vertexDesc, varyingDesc); } @@ -382,8 +376,6 @@ private: if (numVertexElements>0) { vertexStencils = Far::StencilTablesFactory::Create(*_refiner, options); - - _kernelBatches.push_back(Far::StencilTablesFactory::Create(*vertexStencils)); } if (numVaryingElements>0) { @@ -441,7 +433,6 @@ private: Far::TopologyRefiner * _refiner; Far::PatchTables * _patchTables; - Far::KernelBatchVector _kernelBatches; VertexBuffer *_vertexBuffer; VertexBuffer *_varyingBuffer; diff --git a/opensubdiv/osd/glMesh.h b/opensubdiv/osd/glMesh.h index 2f3f3934..85a7b865 100644 --- a/opensubdiv/osd/glMesh.h +++ b/opensubdiv/osd/glMesh.h @@ -98,7 +98,6 @@ public: Mesh(ComputeController * computeController, Far::TopologyRefiner * refiner, Far::PatchTables * patchTables, - Far::KernelBatchVector const & kernelBatches, VertexBuffer * vertexBuffer, VertexBuffer * varyingBuffer, ComputeContext * computeContext, @@ -106,7 +105,6 @@ public: _refiner(refiner), _patchTables(patchTables), - _kernelBatches(kernelBatches), _vertexBuffer(vertexBuffer), _varyingBuffer(varyingBuffer), _computeContext(computeContext), @@ -140,14 +138,15 @@ public: } virtual void Refine() { - _computeController->Compute(_computeContext, _kernelBatches, _vertexBuffer, _varyingBuffer); + _computeController->Compute(_computeContext, _vertexBuffer, _varyingBuffer); } virtual void Refine(VertexBufferDescriptor const * vertexDesc, VertexBufferDescriptor const * varyingDesc, bool interleaved) { - _computeController->Compute(_computeContext, _kernelBatches, - _vertexBuffer, (interleaved ? _vertexBuffer : _varyingBuffer), + _computeController->Compute(_computeContext, + _vertexBuffer, + (interleaved ? _vertexBuffer : _varyingBuffer), vertexDesc, varyingDesc); } @@ -255,8 +254,6 @@ private: varyingStencils = concatVaryingStencils; } - _kernelBatches.push_back(Far::StencilTablesFactory::Create(*vertexStencils)); - _computeContext = ComputeContext::Create(vertexStencils, varyingStencils); @@ -279,7 +276,6 @@ private: Far::TopologyRefiner * _refiner; Far::PatchTables * _patchTables; - Far::KernelBatchVector _kernelBatches; VertexBuffer *_vertexBuffer; VertexBuffer *_varyingBuffer; @@ -353,7 +349,6 @@ public: Mesh(ComputeController * computeController, Far::TopologyRefiner * refiner, Far::PatchTables * patchTables, - Far::KernelBatchVector const & kernelBatches, VertexBuffer * vertexBuffer, VertexBuffer * varyingBuffer, ComputeContext * computeContext, @@ -363,7 +358,6 @@ public: _refiner(refiner), _patchTables(patchTables), - _kernelBatches(kernelBatches), _vertexBuffer(vertexBuffer), _varyingBuffer(varyingBuffer), _computeContext(computeContext), @@ -395,14 +389,15 @@ public: } virtual void Refine() { - _computeController->Compute(_computeContext, _kernelBatches, _vertexBuffer, _varyingBuffer); + _computeController->Compute(_computeContext, _vertexBuffer, _varyingBuffer); } virtual void Refine(VertexBufferDescriptor const *vertexDesc, VertexBufferDescriptor const *varyingDesc, bool interleaved) { - _computeController->Compute(_computeContext, _kernelBatches, - _vertexBuffer, (interleaved ? _vertexBuffer : _varyingBuffer), + _computeController->Compute(_computeContext, + _vertexBuffer, + (interleaved ? _vertexBuffer : _varyingBuffer), vertexDesc, varyingDesc); } @@ -499,8 +494,6 @@ private: Far::StencilTables const *concatStencils = Far::StencilTablesFactory::Create(2, inStencils); - _kernelBatches.push_back(Far::StencilTablesFactory::Create(*concatStencils)); - Far::StencilTables const *inVaryingStencils[] = { varyingStencils, endCapVaryingStencils }; @@ -513,7 +506,6 @@ private: delete varyingStencils; varyingStencils = concatVaryingStencils; } - _kernelBatches.push_back(Far::StencilTablesFactory::Create(*vertexStencils)); _computeContext = ComputeContext::Create(_clContext, vertexStencils, @@ -538,7 +530,6 @@ private: Far::TopologyRefiner * _refiner; Far::PatchTables * _patchTables; - Far::KernelBatchVector _kernelBatches; VertexBuffer *_vertexBuffer; VertexBuffer *_varyingBuffer; diff --git a/opensubdiv/osd/glslComputeContext.cpp b/opensubdiv/osd/glslComputeContext.cpp index 5d59e8df..54d2ce21 100644 --- a/opensubdiv/osd/glslComputeContext.cpp +++ b/opensubdiv/osd/glslComputeContext.cpp @@ -71,6 +71,7 @@ public: _offsets = createGLSLBuffer(stencilTables.GetOffsets()); _indices = createGLSLBuffer(stencilTables.GetControlIndices()); _weights = createGLSLBuffer(stencilTables.GetWeights()); + _numStencils = stencilTables.GetNumStencils(); } ~GLSLStencilTables() { @@ -100,6 +101,10 @@ public: return _weights; } + int GetNumStencils() const { + return _numStencils; + } + void Bind() const { glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, _sizes); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, _offsets); @@ -122,6 +127,7 @@ private: _offsets, _indices, _weights; + int _numStencils; }; // ----------------------------------------------------------------------------- @@ -130,7 +136,8 @@ GLSLComputeContext::GLSLComputeContext( Far::StencilTables const * vertexStencilTables, Far::StencilTables const * varyingStencilTables) : _vertexStencilTables(0), _varyingStencilTables(0), - _numControlVertices(0) { + _numControlVertices(0), + _numStencils(0) { if (vertexStencilTables) { _vertexStencilTables = new GLSLStencilTables(*vertexStencilTables); @@ -165,8 +172,17 @@ GLSLComputeContext::HasVaryingStencilTables() const { return _varyingStencilTables ? _varyingStencilTables->IsValid() : false; } -// ---------------------------------------------------------------------------- +int +GLSLComputeContext::GetNumStencilsInVertexStencilTables() const { + return _vertexStencilTables ? _vertexStencilTables->GetNumStencils() : false; +} +int +GLSLComputeContext::GetNumStencilsInVaryingStencilTables() const { + return _varyingStencilTables ? _varyingStencilTables->GetNumStencils() : false; +} + +// ---------------------------------------------------------------------------- void GLSLComputeContext::BindVertexStencilTables() const { diff --git a/opensubdiv/osd/glslComputeContext.h b/opensubdiv/osd/glslComputeContext.h index ef96b24b..bb412ee3 100644 --- a/opensubdiv/osd/glslComputeContext.h +++ b/opensubdiv/osd/glslComputeContext.h @@ -76,10 +76,16 @@ public: return _numControlVertices; } - /// Returns the Cuda buffer containing vertex-stencil stencil sizes + /// Returns the number of stencils in vertex stencil table + int GetNumStencilsInVertexStencilTables() const; + + /// Returns the number of stencils in varying stencil table + int GetNumStencilsInVaryingStencilTables() const; + + /// Returns the GL buffer containing vertex-stencil stencil sizes GLuint GetVertexStencilTablesSizes() const; - /// Returns the Cuda buffer containing vertex-stencil stencil offsets + /// Returns the GL buffer containing vertex-stencil stencil offsets GLuint GetVertexStencilTablesOffsets() const; /// Binds GL buffers containing stencils for 'vertex' interpolation @@ -104,6 +110,7 @@ private: * _varyingStencilTables; int _numControlVertices; + int _numStencils; }; } // end namespace Osd diff --git a/opensubdiv/osd/glslComputeController.cpp b/opensubdiv/osd/glslComputeController.cpp index 5c5150d1..e9b0cfe4 100644 --- a/opensubdiv/osd/glslComputeController.cpp +++ b/opensubdiv/osd/glslComputeController.cpp @@ -24,7 +24,6 @@ #include "../osd/glslComputeController.h" #include "../osd/vertexDescriptor.h" -//#include "../osd/debug.h" #include "../osd/opengl.h" #include "../far/error.h" @@ -138,12 +137,13 @@ public: return true; } - void ApplyStencilTableKernel(Far::KernelBatch const &batch, int offset, int numCVs) const { + void ApplyStencilTableKernel(int offset, int numCVs, + int start, int end) const { // select stencil GLSL subroutine glUniformSubroutinesuiv(GL_COMPUTE_SHADER, 1, &_subStencilKernel); - dispatchCompute(offset, numCVs, batch.start, batch.end); + dispatchCompute(offset, numCVs, start, end); } struct Match { @@ -214,12 +214,20 @@ private: void GLSLComputeController::ApplyStencilTableKernel( - Far::KernelBatch const &batch, ComputeContext const *context) const { + ComputeContext const *context, int numStencils) const { assert(context); + // Note: GLSLComputeContext has a state, knowing whether vertex or + // varying stencil tables are being bound. GetNumStencils() reflects it. + // This structure will likely be revisited. + + int start = 0; + int end = numStencils; + _currentBindState.kernelBundle->ApplyStencilTableKernel( - batch, _currentBindState.desc.offset, context->GetNumControlVertices()); + _currentBindState.desc.offset, context->GetNumControlVertices(), + start, end); } // ---------------------------------------------------------------------------- diff --git a/opensubdiv/osd/glslComputeController.h b/opensubdiv/osd/glslComputeController.h index 7eb08c95..4b3b7127 100644 --- a/opensubdiv/osd/glslComputeController.h +++ b/opensubdiv/osd/glslComputeController.h @@ -27,7 +27,6 @@ #include "../version.h" -#include "../far/kernelBatchDispatcher.h" #include "../osd/glslComputeContext.h" #include "../osd/vertexDescriptor.h" @@ -79,28 +78,27 @@ public: /// template void Compute( GLSLComputeContext const * context, - Far::KernelBatchVector const & batches, VERTEX_BUFFER * vertexBuffer, VARYING_BUFFER * varyingBuffer, VertexBufferDescriptor const * vertexDesc=NULL, VertexBufferDescriptor const * varyingDesc=NULL ){ - if (batches.empty()) return; - if (vertexBuffer) { bind(vertexBuffer, vertexDesc); context->BindVertexStencilTables(); - Far::KernelBatchDispatcher::Apply(this, context, batches, /*maxlevel*/ -1); + ApplyStencilTableKernel( + context, context->GetNumStencilsInVertexStencilTables()); } - + if (varyingBuffer) { bind(varyingBuffer, varyingDesc); context->BindVaryingStencilTables(); - Far::KernelBatchDispatcher::Apply(this, context, batches, /*maxlevel*/ -1); + ApplyStencilTableKernel( + context, context->GetNumStencilsInVaryingStencilTables()); } context->UnbindStencilTables(); @@ -119,10 +117,9 @@ public: /// template void Compute(GLSLComputeContext const * context, - Far::KernelBatchVector const & batches, VERTEX_BUFFER *vertexBuffer) { - Compute(context, batches, vertexBuffer, (VERTEX_BUFFER*)0); + Compute(context, vertexBuffer, (VERTEX_BUFFER*)0); } /// Waits until all running subdivision kernels finish. @@ -130,10 +127,8 @@ public: protected: - friend class Far::KernelBatchDispatcher; - - void ApplyStencilTableKernel(Far::KernelBatch const &batch, - ComputeContext const *context) const; + void ApplyStencilTableKernel(ComputeContext const *context, + int numStencils) const; template void bind( BUFFER * buffer, diff --git a/opensubdiv/osd/glslTransformFeedbackComputeContext.cpp b/opensubdiv/osd/glslTransformFeedbackComputeContext.cpp index 65cd2558..50c9fa3c 100644 --- a/opensubdiv/osd/glslTransformFeedbackComputeContext.cpp +++ b/opensubdiv/osd/glslTransformFeedbackComputeContext.cpp @@ -86,6 +86,7 @@ public: _offsets = createGLTextureBuffer(stencilTables.GetOffsets(), GL_R32I); _indices = createGLTextureBuffer(stencilTables.GetControlIndices(), GL_R32I); _weights = createGLTextureBuffer(stencilTables.GetWeights(), GL_R32F); + _numStencils = stencilTables.GetNumStencils(); } ~GLStencilTables() { @@ -115,12 +116,18 @@ public: return _weights; } + int GetNumStencils() const { + return _numStencils; + } + private: GLuint _sizes, _offsets, _indices, _weights; + + int _numStencils; }; // ----------------------------------------------------------------------------- @@ -164,8 +171,17 @@ GLSLTransformFeedbackComputeContext::HasVaryingStencilTables() const { return _varyingStencilTables ? _varyingStencilTables->IsValid() : false; } -// ---------------------------------------------------------------------------- +int +GLSLTransformFeedbackComputeContext::GetNumStencilsInVertexStencilTables() const { + return _vertexStencilTables ? _vertexStencilTables->GetNumStencils() : 0; +} +int +GLSLTransformFeedbackComputeContext::GetNumStencilsInVaryingStencilTables() const { + return _varyingStencilTables ? _varyingStencilTables->GetNumStencils() : 0; +} + +// ---------------------------------------------------------------------------- GLuint GLSLTransformFeedbackComputeContext::GetVertexStencilTablesSizes() const { return _vertexStencilTables ? _vertexStencilTables->GetSizes() : 0; diff --git a/opensubdiv/osd/glslTransformFeedbackComputeContext.h b/opensubdiv/osd/glslTransformFeedbackComputeContext.h index 2ec1de8a..48a76d65 100644 --- a/opensubdiv/osd/glslTransformFeedbackComputeContext.h +++ b/opensubdiv/osd/glslTransformFeedbackComputeContext.h @@ -59,7 +59,7 @@ public: /// interpolation /// static GLSLTransformFeedbackComputeContext * Create(Far::StencilTables const * vertexStencilTables, - Far::StencilTables const * varyingStencilTables=0); + Far::StencilTables const * varyingStencilTables=0); /// Destructor virtual ~GLSLTransformFeedbackComputeContext(); @@ -75,6 +75,12 @@ public: return _numControlVertices; } + /// Returns the number of stencils in vertex stencil table + int GetNumStencilsInVertexStencilTables() const; + + /// Returns the number of stencils in varying stencil table + int GetNumStencilsInVaryingStencilTables() const; + /// Returns the GL texture buffer containing vertex-stencil stencil sizes GLuint GetVertexStencilTablesSizes() const; diff --git a/opensubdiv/osd/glslTransformFeedbackComputeController.cpp b/opensubdiv/osd/glslTransformFeedbackComputeController.cpp index 391c1e2b..5bd12b01 100644 --- a/opensubdiv/osd/glslTransformFeedbackComputeController.cpp +++ b/opensubdiv/osd/glslTransformFeedbackComputeController.cpp @@ -87,7 +87,7 @@ public: _uniformWeights(0), _uniformStart(0), _uniformEnd(0), - _uniformOffset(0) { } + _uniformPrimvarOffset(0) { } ~KernelBundle() { if (_program) { @@ -95,9 +95,8 @@ public: } } - void UseProgram(int primvarOffset) const { + void UseProgram() const { glUseProgram(_program); - glUniform1i(_uniformOffset, primvarOffset); } bool Compile(VertexBufferDescriptor const & desc) { @@ -138,15 +137,19 @@ public: // outVertexData[2] // (gl_SkipComponents1) // + // note that "primvarOffset" in shader is still needed to read + // interleaved components even if gl_SkipComponents is used. + // char attrName[32]; - for (int i = 0; i < desc.offset; ++i) { + int primvarOffset = (desc.offset % desc.stride); + for (int i = 0; i < primvarOffset; ++i) { outputs.push_back("gl_SkipComponents1"); } for (int i = 0; i < desc.length; ++i) { snprintf(attrName, 32, "outVertexBuffer[%d]", i); outputs.push_back(attrName); } - for (int i = desc.offset + desc.length; i < desc.stride; ++i) { + for (int i = primvarOffset + desc.length; i < desc.stride; ++i) { outputs.push_back("gl_SkipComponents1"); } @@ -192,7 +195,7 @@ public: _uniformStart = glGetUniformLocation(_program, "batchStart"); _uniformEnd = glGetUniformLocation(_program, "batchEnd"); - _uniformOffset = glGetUniformLocation(_program, "primvarOffset"); + _uniformPrimvarOffset = glGetUniformLocation(_program, "primvarOffset"); OSD_DEBUG_CHECK_GL_ERROR("KernelBundle::Compile"); @@ -225,14 +228,16 @@ public: // set batch range glUniform1i(_uniformStart, start); glUniform1i(_uniformEnd, end); - glUniform1i(_uniformOffset, offset); + glUniform1i(_uniformPrimvarOffset, offset); int count = end - start, stride = _desc.stride*sizeof(float); + // note: offset includes both "batching offset" and "primvar offset". + // glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, 0, primvarBuffer, - (start + numCVs)*stride + offset*sizeof(float), + (start + numCVs)*stride + (offset - offset%stride)*sizeof(float), count*stride); glBeginTransformFeedback(GL_POINTS); @@ -246,13 +251,14 @@ public: //OSD_DEBUG_CHECK_GL_ERROR("TransformPrimvarBuffer\n"); } - void ApplyStencilTableKernel(Far::KernelBatch const &batch, - GLuint primvarBuffer, int offset, int numCVs) const { + void ApplyStencilTableKernel(GLuint primvarBuffer, + int offset, int numCVs, + int start, int end) const { glUniformSubroutinesuiv(GL_VERTEX_SHADER, 1, &_subStencilKernel); TransformPrimvarBuffer(primvarBuffer, - offset, numCVs, batch.start, batch.end); + offset, numCVs, start, end); } struct Match { @@ -283,7 +289,7 @@ private: _uniformStart, // batch _uniformEnd, - _uniformOffset; // GL primvar buffer descriptor + _uniformPrimvarOffset; VertexBufferDescriptor _desc; // primvar buffer descriptor }; @@ -294,7 +300,7 @@ GLSLTransformFeedbackComputeController::bindBufferAndProgram( GLuint & feedbackTexture) { glEnable(GL_RASTERIZER_DISCARD); - _currentBindState.kernelBundle->UseProgram(/*primvarOffset*/0); + _currentBindState.kernelBundle->UseProgram(); if (not feedbackTexture) { glGenTextures(1, &feedbackTexture); @@ -385,14 +391,18 @@ GLSLTransformFeedbackComputeController::getKernel( void GLSLTransformFeedbackComputeController::ApplyStencilTableKernel( - Far::KernelBatch const &batch, - GLSLTransformFeedbackComputeContext const *context) const { + GLSLTransformFeedbackComputeContext const *context, int numStencils) const { assert(context); - _currentBindState.kernelBundle->ApplyStencilTableKernel(batch, + int start = 0; + int end = numStencils; + + _currentBindState.kernelBundle->ApplyStencilTableKernel( _currentBindState.buffer, _currentBindState.desc.offset, - context->GetNumControlVertices()); + context->GetNumControlVertices(), + start, + end); } diff --git a/opensubdiv/osd/glslTransformFeedbackComputeController.h b/opensubdiv/osd/glslTransformFeedbackComputeController.h index ad6af0f4..50b38f54 100644 --- a/opensubdiv/osd/glslTransformFeedbackComputeController.h +++ b/opensubdiv/osd/glslTransformFeedbackComputeController.h @@ -27,7 +27,6 @@ #include "../version.h" -#include "../far/kernelBatchDispatcher.h" #include "../osd/glslTransformFeedbackComputeContext.h" #include "../osd/vertexDescriptor.h" @@ -67,9 +66,6 @@ public: /// @param context The GLSLTransformFeedbackComputeContext to apply /// refinement operations to /// - /// @param batches Vector of batches of vertices organized by operative - /// kernel - /// /// @param vertexBuffer Vertex-interpolated data buffer /// /// @param vertexDesc The descriptor of vertex elements to be refined. @@ -84,21 +80,19 @@ public: /// template void Compute( GLSLTransformFeedbackComputeContext const * context, - Far::KernelBatchVector const & batches, VERTEX_BUFFER * vertexBuffer, VARYING_BUFFER * varyingBuffer, VertexBufferDescriptor const * vertexDesc=NULL, VertexBufferDescriptor const * varyingDesc=NULL ){ - if (batches.empty()) return; - if (vertexBuffer) { bind(vertexBuffer, vertexDesc, _vertexTexture); bindContextStencilTables(context, false); - Far::KernelBatchDispatcher::Apply(this, context, batches, /*maxlevel*/ -1); + ApplyStencilTableKernel( + context, context->GetNumStencilsInVertexStencilTables()); } if (varyingBuffer) { @@ -107,7 +101,8 @@ public: bindContextStencilTables(context, true); - Far::KernelBatchDispatcher::Apply(this, context, batches, /*maxlevel*/ -1); + ApplyStencilTableKernel( + context, context->GetNumStencilsInVaryingStencilTables()); } unbind(); } @@ -117,17 +112,13 @@ public: /// @param context The GLSLTransformFeedbackComputeContext to apply /// refinement operations to /// - /// @param batches Vector of batches of vertices organized by operative - /// kernel - /// /// @param vertexBuffer Vertex-interpolated data buffer /// template void Compute(GLSLTransformFeedbackComputeContext const * context, - Far::KernelBatchVector const & batches, VERTEX_BUFFER *vertexBuffer) { - Compute(context, batches, vertexBuffer, (VERTEX_BUFFER*)0); + Compute(context, vertexBuffer, (VERTEX_BUFFER*)0); } /// Waits until all running subdivision kernels finish. @@ -135,10 +126,8 @@ public: protected: - friend class Far::KernelBatchDispatcher; - - void ApplyStencilTableKernel(Far::KernelBatch const &batch, - ComputeContext const *context) const; + void ApplyStencilTableKernel(ComputeContext const *context, + int numStencils) const; template void bind( BUFFER * buffer, VertexBufferDescriptor const * desc, diff --git a/opensubdiv/osd/mesh.h b/opensubdiv/osd/mesh.h index 7ae49026..f28ecde4 100644 --- a/opensubdiv/osd/mesh.h +++ b/opensubdiv/osd/mesh.h @@ -27,7 +27,6 @@ #include "../version.h" -#include "../far/kernelBatch.h" #include "../far/topologyRefiner.h" #include "../far/patchTablesFactory.h" #include "../far/stencilTables.h" @@ -154,7 +153,6 @@ public: Mesh(ComputeController * computeController, Far::TopologyRefiner * refiner, Far::PatchTables * patchTables, - Far::KernelBatchVector const & kernelBatches, VertexBuffer * vertexBuffer, VertexBuffer * varyingBuffer, ComputeContext * computeContext, @@ -162,7 +160,6 @@ public: _refiner(refiner), _patchTables(patchTables), - _kernelBatches(kernelBatches), _vertexBuffer(vertexBuffer), _varyingBuffer(varyingBuffer), _computeContext(computeContext), @@ -192,11 +189,11 @@ public: } virtual void Refine() { - _computeController->Compute(_computeContext, _kernelBatches, _vertexBuffer, _varyingBuffer); + _computeController->Compute(_computeContext, _vertexBuffer, _varyingBuffer); } virtual void Refine(VertexBufferDescriptor const *vertexDesc, VertexBufferDescriptor const *varyingDesc) { - _computeController->Refine(_computeContext, _kernelBatches, _vertexBuffer, _varyingBuffer, vertexDesc, varyingDesc); + _computeController->Refine(_computeContext, _vertexBuffer, _varyingBuffer, vertexDesc, varyingDesc); } virtual void Synchronize() { @@ -237,8 +234,6 @@ private: if (numVertexElements>0) { vertexStencils = Far::StencilTablesFactory::Create(*_refiner, options); - - _kernelBatches.push_back(Far::StencilTablesFactory::Create(*vertexStencils)); } if (numVaryingElements>0) { @@ -291,7 +286,6 @@ private: Far::TopologyRefiner * _refiner; Far::PatchTables * _patchTables; - Far::KernelBatchVector _kernelBatches; VertexBuffer * _vertexBuffer, * _varyingBuffer; diff --git a/opensubdiv/osd/ompComputeController.cpp b/opensubdiv/osd/ompComputeController.cpp index c948ee34..3b9f1eeb 100644 --- a/opensubdiv/osd/ompComputeController.cpp +++ b/opensubdiv/osd/ompComputeController.cpp @@ -40,7 +40,7 @@ OmpComputeController::OmpComputeController(int numThreads) { void OmpComputeController::ApplyStencilTableKernel( - Far::KernelBatch const &batch, ComputeContext const *context) const { + ComputeContext const *context) const { assert(context); @@ -48,6 +48,9 @@ OmpComputeController::ApplyStencilTableKernel( if (vertexStencils and _currentBindState.vertexBuffer) { + int start = 0; + int end = vertexStencils->GetNumStencils(); + VertexBufferDescriptor const & desc = _currentBindState.vertexDesc; float const * srcBuffer = _currentBindState.vertexBuffer + desc.offset; @@ -56,19 +59,22 @@ OmpComputeController::ApplyStencilTableKernel( vertexStencils->GetNumControlVertices() * desc.stride; OmpComputeStencils(_currentBindState.vertexDesc, - srcBuffer, destBuffer, - &vertexStencils->GetSizes().at(0), - &vertexStencils->GetOffsets().at(0), - &vertexStencils->GetControlIndices().at(0), - &vertexStencils->GetWeights().at(0), - batch.start, - batch.end); + srcBuffer, destBuffer, + &vertexStencils->GetSizes().at(0), + &vertexStencils->GetOffsets().at(0), + &vertexStencils->GetControlIndices().at(0), + &vertexStencils->GetWeights().at(0), + start, + end); } Far::StencilTables const * varyingStencils = context->GetVaryingStencilTables(); if (varyingStencils and _currentBindState.varyingBuffer) { + int start = 0; + int end = varyingStencils->GetNumStencils(); + VertexBufferDescriptor const & desc = _currentBindState.varyingDesc; float const * srcBuffer = _currentBindState.varyingBuffer + desc.offset; @@ -77,13 +83,13 @@ OmpComputeController::ApplyStencilTableKernel( varyingStencils->GetNumControlVertices() * desc.stride; OmpComputeStencils(_currentBindState.varyingDesc, - srcBuffer, destBuffer, - &varyingStencils->GetSizes().at(0), - &varyingStencils->GetOffsets().at(0), - &varyingStencils->GetControlIndices().at(0), - &varyingStencils->GetWeights().at(0), - batch.start, - batch.end); + srcBuffer, destBuffer, + &varyingStencils->GetSizes().at(0), + &varyingStencils->GetOffsets().at(0), + &varyingStencils->GetControlIndices().at(0), + &varyingStencils->GetWeights().at(0), + start, + end); } } diff --git a/opensubdiv/osd/ompComputeController.h b/opensubdiv/osd/ompComputeController.h index f43301f1..29831d4c 100644 --- a/opensubdiv/osd/ompComputeController.h +++ b/opensubdiv/osd/ompComputeController.h @@ -27,7 +27,6 @@ #include "../version.h" -#include "../far/kernelBatchDispatcher.h" #include "../osd/cpuComputeContext.h" #include "../osd/vertexDescriptor.h" @@ -66,9 +65,6 @@ public: /// /// @param context The CpuContext to apply refinement operations to /// - /// @param batches Vector of batches of vertices organized by operative - /// kernel - /// /// @param vertexBuffer Vertex-interpolated data buffer /// /// @param vertexDesc The descriptor of vertex elements to be refined. @@ -83,19 +79,16 @@ public: /// template void Compute( CpuComputeContext const * context, - Far::KernelBatchVector const & batches, VERTEX_BUFFER * vertexBuffer, VARYING_BUFFER * varyingBuffer, VertexBufferDescriptor const * vertexDesc=NULL, VertexBufferDescriptor const * varyingDesc=NULL ){ - if (batches.empty()) return; - omp_set_num_threads(_numThreads); bind(vertexBuffer, varyingBuffer, vertexDesc, varyingDesc); - Far::KernelBatchDispatcher::Apply(this, context, batches, /*maxlevel*/ -1); + ApplyStencilTableKernel(context); unbind(); } @@ -104,17 +97,13 @@ public: /// /// @param context The CpuContext to apply refinement operations to /// - /// @param batches Vector of batches of vertices organized by operative - /// kernel - /// /// @param vertexBuffer Vertex-interpolated data buffer /// template void Compute(CpuComputeContext const * context, - Far::KernelBatchVector const & batches, VERTEX_BUFFER *vertexBuffer) { - Compute(context, batches, vertexBuffer, (VERTEX_BUFFER*)0); + Compute(context, vertexBuffer, (VERTEX_BUFFER*)0); } /// Waits until all running subdivision kernels finish. @@ -122,10 +111,7 @@ public: protected: - friend class Far::KernelBatchDispatcher; - - void ApplyStencilTableKernel(Far::KernelBatch const &batch, - ComputeContext const *context) const; + void ApplyStencilTableKernel(ComputeContext const *context) const; template void bind( VERTEX_BUFFER * vertexBuffer, diff --git a/opensubdiv/osd/tbbComputeController.cpp b/opensubdiv/osd/tbbComputeController.cpp index a87f2fa0..6ce55113 100644 --- a/opensubdiv/osd/tbbComputeController.cpp +++ b/opensubdiv/osd/tbbComputeController.cpp @@ -49,7 +49,7 @@ TbbComputeController::TbbComputeController(int numThreads) void TbbComputeController::ApplyStencilTableKernel( - Far::KernelBatch const &batch, ComputeContext const *context) const { + ComputeContext const *context) const { assert(context); @@ -57,6 +57,9 @@ TbbComputeController::ApplyStencilTableKernel( if (vertexStencils and _currentBindState.vertexBuffer) { + int start = 0; + int end = vertexStencils->GetNumStencils(); + VertexBufferDescriptor const & desc = _currentBindState.vertexDesc; float const * srcBuffer = _currentBindState.vertexBuffer + desc.offset; @@ -65,19 +68,22 @@ TbbComputeController::ApplyStencilTableKernel( vertexStencils->GetNumControlVertices() * desc.stride; TbbComputeStencils(_currentBindState.vertexDesc, - srcBuffer, destBuffer, - &vertexStencils->GetSizes().at(0), - &vertexStencils->GetOffsets().at(0), - &vertexStencils->GetControlIndices().at(0), - &vertexStencils->GetWeights().at(0), - batch.start, - batch.end); + srcBuffer, destBuffer, + &vertexStencils->GetSizes().at(0), + &vertexStencils->GetOffsets().at(0), + &vertexStencils->GetControlIndices().at(0), + &vertexStencils->GetWeights().at(0), + start, + end); } Far::StencilTables const * varyingStencils = context->GetVaryingStencilTables(); if (varyingStencils and _currentBindState.varyingBuffer) { + int start = 0; + int end = varyingStencils->GetNumStencils(); + VertexBufferDescriptor const & desc = _currentBindState.varyingDesc; float const * srcBuffer = _currentBindState.varyingBuffer + desc.offset; @@ -86,13 +92,13 @@ TbbComputeController::ApplyStencilTableKernel( varyingStencils->GetNumControlVertices() * desc.stride; TbbComputeStencils(_currentBindState.varyingDesc, - srcBuffer, destBuffer, - &varyingStencils->GetSizes().at(0), - &varyingStencils->GetOffsets().at(0), - &varyingStencils->GetControlIndices().at(0), - &varyingStencils->GetWeights().at(0), - batch.start, - batch.end); + srcBuffer, destBuffer, + &varyingStencils->GetSizes().at(0), + &varyingStencils->GetOffsets().at(0), + &varyingStencils->GetControlIndices().at(0), + &varyingStencils->GetWeights().at(0), + start, + end); } } diff --git a/opensubdiv/osd/tbbComputeController.h b/opensubdiv/osd/tbbComputeController.h index 7f1ac4c0..6bb13d87 100644 --- a/opensubdiv/osd/tbbComputeController.h +++ b/opensubdiv/osd/tbbComputeController.h @@ -27,7 +27,6 @@ #include "../version.h" -#include "../far/kernelBatchDispatcher.h" #include "../osd/cpuComputeContext.h" #include "../osd/vertexDescriptor.h" @@ -62,9 +61,6 @@ public: /// /// @param context The CpuContext to apply refinement operations to /// - /// @param batches Vector of batches of vertices organized by operative - /// kernel - /// /// @param vertexBuffer Vertex-interpolated data buffer /// /// @param vertexDesc The descriptor of vertex elements to be refined. @@ -79,17 +75,14 @@ public: /// template void Compute( CpuComputeContext const * context, - Far::KernelBatchVector const & batches, VERTEX_BUFFER * vertexBuffer, VARYING_BUFFER * varyingBuffer, VertexBufferDescriptor const * vertexDesc=NULL, VertexBufferDescriptor const * varyingDesc=NULL ){ - if (batches.empty()) return; - bind(vertexBuffer, varyingBuffer, vertexDesc, varyingDesc); - Far::KernelBatchDispatcher::Apply(this, context, batches, /*maxlevel*/ -1); + ApplyStencilTableKernel(context); unbind(); } @@ -98,17 +91,13 @@ public: /// /// @param context The CpuContext to apply refinement operations to /// - /// @param batches Vector of batches of vertices organized by operative - /// kernel - /// /// @param vertexBuffer Vertex-interpolated data buffer /// template void Compute(CpuComputeContext const * context, - Far::KernelBatchVector const & batches, VERTEX_BUFFER *vertexBuffer) { - Compute(context, batches, vertexBuffer, (VERTEX_BUFFER*)0); + Compute(context, vertexBuffer, (VERTEX_BUFFER*)0); } /// Waits until all running subdivision kernels finish. @@ -116,10 +105,7 @@ public: protected: - friend class Far::KernelBatchDispatcher; - - void ApplyStencilTableKernel(Far::KernelBatch const &batch, - ComputeContext const *context) const; + void ApplyStencilTableKernel(ComputeContext const *context) const; template void bind( VERTEX_BUFFER * vertexBuffer, diff --git a/regression/osd_regression/main.cpp b/regression/osd_regression/main.cpp index ec499863..6caa4c36 100644 --- a/regression/osd_regression/main.cpp +++ b/regression/osd_regression/main.cpp @@ -303,11 +303,7 @@ checkMeshCPU( FarTopologyRefiner *refiner, vb->UpdateData( coarseverts[0].GetPos(), 0, (int)coarseverts.size() ); - Far::KernelBatchVector kernelBatches; - kernelBatches.push_back( - Far::StencilTablesFactory::Create(*vertexStencils)); - - controller->Compute( context, kernelBatches, vb ); + controller->Compute( context, vb ); int result = checkVertexBuffer(*refiner, refmesh, vb->BindCpuBuffer(), vb->GetNumElements()); @@ -341,11 +337,7 @@ checkMeshCPUGL(FarTopologyRefiner *refiner, vb->UpdateData( coarseverts[0].GetPos(), 0, (int)coarseverts.size() ); - Far::KernelBatchVector kernelBatches; - kernelBatches.push_back( - Far::StencilTablesFactory::Create(*vertexStencils)); - - controller->Compute( context, kernelBatches, vb ); + controller->Compute( context, vb ); int result = checkVertexBuffer(*refiner, refmesh, vb->BindCpuBuffer(), vb->GetNumElements()); @@ -383,11 +375,7 @@ checkMeshCL( FarTopologyRefiner *refiner, vb->UpdateData( coarseverts[0].GetPos(), 0, (int)coarseverts.size(), g_clQueue ); - Far::KernelBatchVector kernelBatches; - kernelBatches.push_back( - Far::StencilTablesFactory::Create(*vertexStencils)); - - controller->Compute( context, kernelBatches, vb ); + controller->Compute( context, vb ); // read data back from CL buffer size_t dataSize = vb->GetNumVertices() * vb->GetNumElements(); diff --git a/tutorials/osd/tutorial_0/osd_tutorial_0.cpp b/tutorials/osd/tutorial_0/osd_tutorial_0.cpp index d36315c1..a9ea4f32 100644 --- a/tutorials/osd/tutorial_0/osd_tutorial_0.cpp +++ b/tutorials/osd/tutorial_0/osd_tutorial_0.cpp @@ -75,8 +75,6 @@ int main(int, char **) { Osd::CpuComputeContext * context=0; - Far::KernelBatchVector batches; - // // Setup phase // @@ -92,9 +90,6 @@ int main(int, char **) { Far::StencilTables const * stencilTables = Far::StencilTablesFactory::Create(*refiner, options); - // We need a kernel batch to dispatch Compute launches - batches.push_back(Far::StencilTablesFactory::Create(*stencilTables)); - // Create an Osd Compute Context from the stencil tables context = Osd::CpuComputeContext::Create(stencilTables); @@ -122,7 +117,7 @@ int main(int, char **) { vbuffer->UpdateData(g_verts, 0, nCoarseVerts); // Launch the computation - controller.Compute(context, batches, vbuffer); + controller.Compute(context, vbuffer); } { // Visualization with Maya : print a MEL script that generates particles From 214e62e67a10d16031642c1fbcc86f5f224731bb Mon Sep 17 00:00:00 2001 From: Takahito Tejima Date: Thu, 9 Apr 2015 13:58:47 -0700 Subject: [PATCH 2/4] add a constructor to FarStencilTables to initialze members. --- opensubdiv/far/stencilTables.h | 1 + 1 file changed, 1 insertion(+) diff --git a/opensubdiv/far/stencilTables.h b/opensubdiv/far/stencilTables.h index 09cb0a95..1e5063b8 100644 --- a/opensubdiv/far/stencilTables.h +++ b/opensubdiv/far/stencilTables.h @@ -203,6 +203,7 @@ protected: void resize(int nstencils, int nelems); protected: + StencilTables() : _numControlVertices(0) {} friend class StencilTablesFactory; friend class GregoryBasisFactory; From 24a435da013a129985a06739b6964b316b3e4ed6 Mon Sep 17 00:00:00 2001 From: Takahito Tejima Date: Thu, 9 Apr 2015 13:59:59 -0700 Subject: [PATCH 3/4] avoid lauching stencil kernel and not to allocate device tables if the stencil table is empty. --- opensubdiv/osd/clComputeContext.cpp | 12 ++++--- opensubdiv/osd/cpuComputeController.cpp | 36 ++++++++++--------- opensubdiv/osd/cudaComputeContext.cpp | 12 ++++--- opensubdiv/osd/cudaComputeController.cpp | 32 +++++++++-------- opensubdiv/osd/d3d11ComputeContext.cpp | 31 +++++++++------- opensubdiv/osd/glslComputeContext.cpp | 20 ++++++----- .../glslTransformFeedbackComputeContext.cpp | 20 ++++++----- opensubdiv/osd/ompComputeController.cpp | 36 ++++++++++--------- opensubdiv/osd/tbbComputeController.cpp | 36 ++++++++++--------- 9 files changed, 137 insertions(+), 98 deletions(-) diff --git a/opensubdiv/osd/clComputeContext.cpp b/opensubdiv/osd/clComputeContext.cpp index 5f02a700..0069aa11 100644 --- a/opensubdiv/osd/clComputeContext.cpp +++ b/opensubdiv/osd/clComputeContext.cpp @@ -56,11 +56,15 @@ class CLComputeContext::CLStencilTables { public: CLStencilTables(Far::StencilTables const & stencilTables, cl_context clContext) { - _sizes = createCLBuffer(stencilTables.GetSizes(), clContext); - _offsets = createCLBuffer(stencilTables.GetOffsets(), clContext); - _indices = createCLBuffer(stencilTables.GetControlIndices(), clContext); - _weights = createCLBuffer(stencilTables.GetWeights(), clContext); _numStencils = stencilTables.GetNumStencils(); + if (_numStencils > 0) { + _sizes = createCLBuffer(stencilTables.GetSizes(), clContext); + _offsets = createCLBuffer(stencilTables.GetOffsets(), clContext); + _indices = createCLBuffer(stencilTables.GetControlIndices(), clContext); + _weights = createCLBuffer(stencilTables.GetWeights(), clContext); + } else { + _sizes = _offsets = _indices = _weights = NULL; + } } ~CLStencilTables() { diff --git a/opensubdiv/osd/cpuComputeController.cpp b/opensubdiv/osd/cpuComputeController.cpp index 4eca60cf..c23130d9 100644 --- a/opensubdiv/osd/cpuComputeController.cpp +++ b/opensubdiv/osd/cpuComputeController.cpp @@ -64,14 +64,16 @@ CpuComputeController::ApplyStencilTableKernel( int start = 0; int end = vertexStencils->GetNumStencils(); - CpuComputeStencils(_currentBindState.vertexDesc, - srcBuffer, destBuffer, - &vertexStencils->GetSizes().at(0), - &vertexStencils->GetOffsets().at(0), - &vertexStencils->GetControlIndices().at(0), - &vertexStencils->GetWeights().at(0), - start, - end); + if (end > start) { + CpuComputeStencils(_currentBindState.vertexDesc, + srcBuffer, destBuffer, + &vertexStencils->GetSizes().at(0), + &vertexStencils->GetOffsets().at(0), + &vertexStencils->GetControlIndices().at(0), + &vertexStencils->GetWeights().at(0), + start, + end); + } } Far::StencilTables const * varyingStencils = context->GetVaryingStencilTables(); @@ -88,14 +90,16 @@ CpuComputeController::ApplyStencilTableKernel( int start = 0; int end = varyingStencils->GetNumStencils(); - CpuComputeStencils(_currentBindState.varyingDesc, - srcBuffer, destBuffer, - &varyingStencils->GetSizes().at(0), - &varyingStencils->GetOffsets().at(0), - &varyingStencils->GetControlIndices().at(0), - &varyingStencils->GetWeights().at(0), - start, - end); + if (end > start) { + CpuComputeStencils(_currentBindState.varyingDesc, + srcBuffer, destBuffer, + &varyingStencils->GetSizes().at(0), + &varyingStencils->GetOffsets().at(0), + &varyingStencils->GetControlIndices().at(0), + &varyingStencils->GetWeights().at(0), + start, + end); + } } } diff --git a/opensubdiv/osd/cudaComputeContext.cpp b/opensubdiv/osd/cudaComputeContext.cpp index cac7b5c6..667056de 100644 --- a/opensubdiv/osd/cudaComputeContext.cpp +++ b/opensubdiv/osd/cudaComputeContext.cpp @@ -63,11 +63,15 @@ class CudaComputeContext::CudaStencilTables { public: CudaStencilTables(Far::StencilTables const & stencilTables) { - _sizes = createCudaBuffer(stencilTables.GetSizes()); - _offsets = createCudaBuffer(stencilTables.GetOffsets()); - _indices = createCudaBuffer(stencilTables.GetControlIndices()); - _weights = createCudaBuffer(stencilTables.GetWeights()); _numStencils = stencilTables.GetNumStencils(); + if (_numStencils > 0) { + _sizes = createCudaBuffer(stencilTables.GetSizes()); + _offsets = createCudaBuffer(stencilTables.GetOffsets()); + _indices = createCudaBuffer(stencilTables.GetControlIndices()); + _weights = createCudaBuffer(stencilTables.GetWeights()); + } else { + _sizes = _offsets = _indices = _weights = NULL; + } } ~CudaStencilTables() { diff --git a/opensubdiv/osd/cudaComputeController.cpp b/opensubdiv/osd/cudaComputeController.cpp index 53bb4c86..b6a7760e 100644 --- a/opensubdiv/osd/cudaComputeController.cpp +++ b/opensubdiv/osd/cudaComputeController.cpp @@ -64,13 +64,15 @@ CudaComputeController::ApplyStencilTableKernel( float * dst = const_cast(src) + context->GetNumControlVertices() * stride; - CudaComputeStencils(src, dst, length, stride, - (unsigned char const *)context->GetVertexStencilTablesSizes(), - (int const *)context->GetVertexStencilTablesOffsets(), - (int const *)context->GetVertexStencilTablesIndices(), - (float const *)context->GetVertexStencilTablesWeights(), - start, - end); + if (end > start) { + CudaComputeStencils(src, dst, length, stride, + (unsigned char const *)context->GetVertexStencilTablesSizes(), + (int const *)context->GetVertexStencilTablesOffsets(), + (int const *)context->GetVertexStencilTablesIndices(), + (float const *)context->GetVertexStencilTablesWeights(), + start, + end); + } } if (context->HasVaryingStencilTables()) { @@ -86,13 +88,15 @@ CudaComputeController::ApplyStencilTableKernel( float * dst = const_cast(src) + context->GetNumControlVertices() * stride; - CudaComputeStencils(src, dst, length, stride, - (unsigned char const *)context->GetVaryingStencilTablesSizes(), - (int const *)context->GetVaryingStencilTablesOffsets(), - (int const *)context->GetVaryingStencilTablesIndices(), - (float const *)context->GetVaryingStencilTablesWeights(), - start, - end); + if (end > start) { + CudaComputeStencils(src, dst, length, stride, + (unsigned char const *)context->GetVaryingStencilTablesSizes(), + (int const *)context->GetVaryingStencilTablesOffsets(), + (int const *)context->GetVaryingStencilTablesIndices(), + (float const *)context->GetVaryingStencilTablesWeights(), + start, + end); + } } } diff --git a/opensubdiv/osd/d3d11ComputeContext.cpp b/opensubdiv/osd/d3d11ComputeContext.cpp index 1899abc5..798d5eee 100644 --- a/opensubdiv/osd/d3d11ComputeContext.cpp +++ b/opensubdiv/osd/d3d11ComputeContext.cpp @@ -114,19 +114,26 @@ public: D3D11StencilTables(Far::StencilTables const & stencilTables, ID3D11DeviceContext *deviceContext) { - // convert unsigned char sizes buffer to ints (HLSL does not have uint8 type) - std::vector const sizes(stencilTables.GetSizes().begin(), - stencilTables.GetSizes().end()); - - _sizes.initialize(sizes, DXGI_FORMAT_R32_SINT, deviceContext); - - _offsets.initialize(stencilTables.GetOffsets(), DXGI_FORMAT_R32_SINT, deviceContext); - - _indices.initialize(stencilTables.GetControlIndices(), DXGI_FORMAT_R32_SINT, deviceContext); - - _weights.initialize(stencilTables.GetWeights(), DXGI_FORMAT_R32_FLOAT, deviceContext); - _numStencils = stencilTables.GetNumStencils(); + if (_numStencils > 0) { + // convert unsigned char sizes buffer to ints + // (HLSL does not have uint8 type) + std::vector const sizes(stencilTables.GetSizes().begin(), + stencilTables.GetSizes().end()); + + _sizes.initialize(sizes, + DXGI_FORMAT_R32_SINT, + deviceContext); + _offsets.initialize(stencilTables.GetOffsets(), + DXGI_FORMAT_R32_SINT, + deviceContext); + _indices.initialize(stencilTables.GetControlIndices(), + DXGI_FORMAT_R32_SINT, + deviceContext); + _weights.initialize(stencilTables.GetWeights(), + DXGI_FORMAT_R32_FLOAT, + deviceContext); + } } bool IsValid() const { diff --git a/opensubdiv/osd/glslComputeContext.cpp b/opensubdiv/osd/glslComputeContext.cpp index 54d2ce21..32886d74 100644 --- a/opensubdiv/osd/glslComputeContext.cpp +++ b/opensubdiv/osd/glslComputeContext.cpp @@ -67,18 +67,22 @@ class GLSLComputeContext::GLSLStencilTables { public: GLSLStencilTables(Far::StencilTables const & stencilTables) { - _sizes = createGLSLBuffer(stencilTables.GetSizes()); - _offsets = createGLSLBuffer(stencilTables.GetOffsets()); - _indices = createGLSLBuffer(stencilTables.GetControlIndices()); - _weights = createGLSLBuffer(stencilTables.GetWeights()); _numStencils = stencilTables.GetNumStencils(); + if (_numStencils > 0) { + _sizes = createGLSLBuffer(stencilTables.GetSizes()); + _offsets = createGLSLBuffer(stencilTables.GetOffsets()); + _indices = createGLSLBuffer(stencilTables.GetControlIndices()); + _weights = createGLSLBuffer(stencilTables.GetWeights()); + } else { + _sizes = _offsets = _indices = _weights = 0; + } } ~GLSLStencilTables() { - glDeleteBuffers(1, &_sizes); - glDeleteBuffers(1, &_offsets); - glDeleteBuffers(1, &_weights); - glDeleteBuffers(1, &_indices); + if (_sizes) glDeleteBuffers(1, &_sizes); + if (_offsets) glDeleteBuffers(1, &_offsets); + if (_weights) glDeleteBuffers(1, &_weights); + if (_indices) glDeleteBuffers(1, &_indices); } bool IsValid() const { diff --git a/opensubdiv/osd/glslTransformFeedbackComputeContext.cpp b/opensubdiv/osd/glslTransformFeedbackComputeContext.cpp index 50c9fa3c..ad44c462 100644 --- a/opensubdiv/osd/glslTransformFeedbackComputeContext.cpp +++ b/opensubdiv/osd/glslTransformFeedbackComputeContext.cpp @@ -82,18 +82,22 @@ class GLSLTransformFeedbackComputeContext::GLStencilTables { public: GLStencilTables(Far::StencilTables const & stencilTables) { - _sizes = createGLTextureBuffer(stencilTables.GetSizes(), GL_R8UI); - _offsets = createGLTextureBuffer(stencilTables.GetOffsets(), GL_R32I); - _indices = createGLTextureBuffer(stencilTables.GetControlIndices(), GL_R32I); - _weights = createGLTextureBuffer(stencilTables.GetWeights(), GL_R32F); _numStencils = stencilTables.GetNumStencils(); + if (_numStencils > 0) { + _sizes = createGLTextureBuffer(stencilTables.GetSizes(), GL_R8UI); + _offsets = createGLTextureBuffer(stencilTables.GetOffsets(), GL_R32I); + _indices = createGLTextureBuffer(stencilTables.GetControlIndices(), GL_R32I); + _weights = createGLTextureBuffer(stencilTables.GetWeights(), GL_R32F); + } else { + _sizes = _offsets = _indices = _weights = 0; + } } ~GLStencilTables() { - glDeleteTextures(1, &_sizes); - glDeleteTextures(1, &_offsets); - glDeleteTextures(1, &_weights); - glDeleteTextures(1, &_indices); + if (_sizes) glDeleteTextures(1, &_sizes); + if (_offsets) glDeleteTextures(1, &_offsets); + if (_weights) glDeleteTextures(1, &_weights); + if (_indices) glDeleteTextures(1, &_indices); } bool IsValid() const { diff --git a/opensubdiv/osd/ompComputeController.cpp b/opensubdiv/osd/ompComputeController.cpp index 3b9f1eeb..9e94a994 100644 --- a/opensubdiv/osd/ompComputeController.cpp +++ b/opensubdiv/osd/ompComputeController.cpp @@ -58,14 +58,16 @@ OmpComputeController::ApplyStencilTableKernel( float * destBuffer = _currentBindState.vertexBuffer + desc.offset + vertexStencils->GetNumControlVertices() * desc.stride; - OmpComputeStencils(_currentBindState.vertexDesc, - srcBuffer, destBuffer, - &vertexStencils->GetSizes().at(0), - &vertexStencils->GetOffsets().at(0), - &vertexStencils->GetControlIndices().at(0), - &vertexStencils->GetWeights().at(0), - start, - end); + if (end > start) { + OmpComputeStencils(_currentBindState.vertexDesc, + srcBuffer, destBuffer, + &vertexStencils->GetSizes().at(0), + &vertexStencils->GetOffsets().at(0), + &vertexStencils->GetControlIndices().at(0), + &vertexStencils->GetWeights().at(0), + start, + end); + } } Far::StencilTables const * varyingStencils = context->GetVaryingStencilTables(); @@ -82,14 +84,16 @@ OmpComputeController::ApplyStencilTableKernel( float * destBuffer = _currentBindState.varyingBuffer + desc.offset + varyingStencils->GetNumControlVertices() * desc.stride; - OmpComputeStencils(_currentBindState.varyingDesc, - srcBuffer, destBuffer, - &varyingStencils->GetSizes().at(0), - &varyingStencils->GetOffsets().at(0), - &varyingStencils->GetControlIndices().at(0), - &varyingStencils->GetWeights().at(0), - start, - end); + if (end > start) { + OmpComputeStencils(_currentBindState.varyingDesc, + srcBuffer, destBuffer, + &varyingStencils->GetSizes().at(0), + &varyingStencils->GetOffsets().at(0), + &varyingStencils->GetControlIndices().at(0), + &varyingStencils->GetWeights().at(0), + start, + end); + } } } diff --git a/opensubdiv/osd/tbbComputeController.cpp b/opensubdiv/osd/tbbComputeController.cpp index 6ce55113..75ddcf05 100644 --- a/opensubdiv/osd/tbbComputeController.cpp +++ b/opensubdiv/osd/tbbComputeController.cpp @@ -67,14 +67,16 @@ TbbComputeController::ApplyStencilTableKernel( float * destBuffer = _currentBindState.vertexBuffer + desc.offset + vertexStencils->GetNumControlVertices() * desc.stride; - TbbComputeStencils(_currentBindState.vertexDesc, - srcBuffer, destBuffer, - &vertexStencils->GetSizes().at(0), - &vertexStencils->GetOffsets().at(0), - &vertexStencils->GetControlIndices().at(0), - &vertexStencils->GetWeights().at(0), - start, - end); + if (end > start) { + TbbComputeStencils(_currentBindState.vertexDesc, + srcBuffer, destBuffer, + &vertexStencils->GetSizes().at(0), + &vertexStencils->GetOffsets().at(0), + &vertexStencils->GetControlIndices().at(0), + &vertexStencils->GetWeights().at(0), + start, + end); + } } Far::StencilTables const * varyingStencils = context->GetVaryingStencilTables(); @@ -91,14 +93,16 @@ TbbComputeController::ApplyStencilTableKernel( float * destBuffer = _currentBindState.varyingBuffer + desc.offset + varyingStencils->GetNumControlVertices() * desc.stride; - TbbComputeStencils(_currentBindState.varyingDesc, - srcBuffer, destBuffer, - &varyingStencils->GetSizes().at(0), - &varyingStencils->GetOffsets().at(0), - &varyingStencils->GetControlIndices().at(0), - &varyingStencils->GetWeights().at(0), - start, - end); + if (end > start) { + TbbComputeStencils(_currentBindState.varyingDesc, + srcBuffer, destBuffer, + &varyingStencils->GetSizes().at(0), + &varyingStencils->GetOffsets().at(0), + &varyingStencils->GetControlIndices().at(0), + &varyingStencils->GetWeights().at(0), + start, + end); + } } } From 1ec0b2b1f271e593a3e7a7053e33be12eea9e9c3 Mon Sep 17 00:00:00 2001 From: Takahito Tejima Date: Thu, 9 Apr 2015 17:07:52 -0700 Subject: [PATCH 4/4] add includes --- opensubdiv/osd/glslComputeController.h | 1 + opensubdiv/osd/glslTransformFeedbackComputeController.h | 1 + 2 files changed, 2 insertions(+) diff --git a/opensubdiv/osd/glslComputeController.h b/opensubdiv/osd/glslComputeController.h index 4b3b7127..8c87b37d 100644 --- a/opensubdiv/osd/glslComputeController.h +++ b/opensubdiv/osd/glslComputeController.h @@ -31,6 +31,7 @@ #include "../osd/vertexDescriptor.h" #include +#include namespace OpenSubdiv { namespace OPENSUBDIV_VERSION { diff --git a/opensubdiv/osd/glslTransformFeedbackComputeController.h b/opensubdiv/osd/glslTransformFeedbackComputeController.h index 50b38f54..3a9c92ce 100644 --- a/opensubdiv/osd/glslTransformFeedbackComputeController.h +++ b/opensubdiv/osd/glslTransformFeedbackComputeController.h @@ -31,6 +31,7 @@ #include "../osd/vertexDescriptor.h" #include +#include namespace OpenSubdiv { namespace OPENSUBDIV_VERSION {